From ace9429bb58fd418f0c81d4c2835699bddf6bde6 Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Thu, 11 Apr 2024 10:27:49 +0200
Subject: Adding upstream version 6.6.15.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 arch/s390/Kbuild                             |   13 +
 arch/s390/Kconfig                            |  935 ++++
 arch/s390/Kconfig.debug                      |   22 +
 arch/s390/Makefile                           |  177 +
 arch/s390/appldata/Makefile                  |    9 +
 arch/s390/appldata/appldata.h                |   48 +
 arch/s390/appldata/appldata_base.c           |  428 ++
 arch/s390/appldata/appldata_mem.c            |  161 +
 arch/s390/appldata/appldata_net_sum.c        |  163 +
 arch/s390/appldata/appldata_os.c             |  211 +
 arch/s390/boot/.gitignore                    |    7 +
 arch/s390/boot/Makefile                      |  133 +
 arch/s390/boot/als.c                         |  114 +
 arch/s390/boot/boot.h                        |  102 +
 arch/s390/boot/clz_ctz.c                     |    2 +
 arch/s390/boot/cmdline.c                     |    2 +
 arch/s390/boot/ctype.c                       |    2 +
 arch/s390/boot/decompressor.c                |   86 +
 arch/s390/boot/decompressor.h                |   12 +
 arch/s390/boot/ebcdic.c                      |    2 +
 arch/s390/boot/head.S                        |  320 ++
 arch/s390/boot/head_kdump.S                  |  101 +
 arch/s390/boot/install.sh                    |   24 +
 arch/s390/boot/ipl_data.c                    |   84 +
 arch/s390/boot/ipl_parm.c                    |  306 ++
 arch/s390/boot/ipl_report.c                  |  165 +
 arch/s390/boot/ipl_vmparm.c                  |    2 +
 arch/s390/boot/kaslr.c                       |  198 +
 arch/s390/boot/machine_kexec_reloc.c         |    2 +
 arch/s390/boot/mem.S                         |    2 +
 arch/s390/boot/pgm_check_info.c              |  179 +
 arch/s390/boot/physmem_info.c                |  328 ++
 arch/s390/boot/sclp_early_core.c             |   11 +
 arch/s390/boot/startup.c                     |  378 ++
 arch/s390/boot/string.c                      |  140 +
 arch/s390/boot/uv.c                          |   95 +
 arch/s390/boot/uv.h                          |   22 +
 arch/s390/boot/version.c                     |    8 +
 arch/s390/boot/vmem.c                        |  458 ++
 arch/s390/boot/vmlinux.lds.S                 |  128 +
 arch/s390/configs/btf.config                 |    2 +
 arch/s390/configs/debug_defconfig            |  887 ++++
 arch/s390/configs/defconfig                  |  816 ++++
 arch/s390/configs/kasan.config               |    4 +
 arch/s390/configs/zfcpdump_defconfig         |   82 +
 arch/s390/crypto/Kconfig                     |  135 +
 arch/s390/crypto/Makefile                    |   21 +
 arch/s390/crypto/aes_s390.c                  | 1057 +++++
 arch/s390/crypto/arch_random.c               |   19 +
 arch/s390/crypto/chacha-glue.c               |  130 +
 arch/s390/crypto/chacha-s390.S               |  908 ++++
 arch/s390/crypto/chacha-s390.h               |   14 +
 arch/s390/crypto/crc32-vx.c                  |  310 ++
 arch/s390/crypto/crc32be-vx.S                |  213 +
 arch/s390/crypto/crc32le-vx.S                |  275 ++
 arch/s390/crypto/des_s390.c                  |  502 +++
 arch/s390/crypto/ghash_s390.c                |  154 +
 arch/s390/crypto/paes_s390.c                 |  809 ++++
 arch/s390/crypto/prng.c                      |  911 ++++
 arch/s390/crypto/sha.h                       |   35 +
 arch/s390/crypto/sha1_s390.c                 |  103 +
 arch/s390/crypto/sha256_s390.c               |  143 +
 arch/s390/crypto/sha3_256_s390.c             |  146 +
 arch/s390/crypto/sha3_512_s390.c             |  154 +
 arch/s390/crypto/sha512_s390.c               |  149 +
 arch/s390/crypto/sha_common.c                |  124 +
 arch/s390/hypfs/Makefile                     |   14 +
 arch/s390/hypfs/hypfs.h                      |   85 +
 arch/s390/hypfs/hypfs_dbfs.c                 |  122 +
 arch/s390/hypfs/hypfs_diag.c                 |  219 +
 arch/s390/hypfs/hypfs_diag.h                 |   35 +
 arch/s390/hypfs/hypfs_diag0c.c               |  125 +
 arch/s390/hypfs/hypfs_diag_fs.c              |  393 ++
 arch/s390/hypfs/hypfs_sprp.c                 |  151 +
 arch/s390/hypfs/hypfs_vm.c                   |  141 +
 arch/s390/hypfs/hypfs_vm.h                   |   50 +
 arch/s390/hypfs/hypfs_vm_fs.c                |  139 +
 arch/s390/hypfs/inode.c                      |  477 +++
 arch/s390/include/asm/Kbuild                 |    9 +
 arch/s390/include/asm/abs_lowcore.h          |   27 +
 arch/s390/include/asm/airq.h                 |  110 +
 arch/s390/include/asm/alternative-asm.h      |   56 +
 arch/s390/include/asm/alternative.h          |  120 +
 arch/s390/include/asm/ap.h                   |  555 +++
 arch/s390/include/asm/appldata.h             |   68 +
 arch/s390/include/asm/archrandom.h           |   38 +
 arch/s390/include/asm/asm-const.h            |   12 +
 arch/s390/include/asm/asm-extable.h          |   92 +
 arch/s390/include/asm/asm-prototypes.h       |   13 +
 arch/s390/include/asm/atomic.h               |  150 +
 arch/s390/include/asm/atomic_ops.h           |  201 +
 arch/s390/include/asm/barrier.h              |   82 +
 arch/s390/include/asm/bitops.h               |  378 ++
 arch/s390/include/asm/boot_data.h            |   18 +
 arch/s390/include/asm/bug.h                  |   71 +
 arch/s390/include/asm/cache.h                |   19 +
 arch/s390/include/asm/ccwdev.h               |  237 ++
 arch/s390/include/asm/ccwgroup.h             |   75 +
 arch/s390/include/asm/checksum.h             |  133 +
 arch/s390/include/asm/chpid.h                |   51 +
 arch/s390/include/asm/chsc.h                 |   69 +
 arch/s390/include/asm/cio.h                  |  378 ++
 arch/s390/include/asm/clocksource.h          |    7 +
 arch/s390/include/asm/clp.h                  |   59 +
 arch/s390/include/asm/cmb.h                  |   14 +
 arch/s390/include/asm/cmpxchg.h              |  207 +
 arch/s390/include/asm/compat.h               |  140 +
 arch/s390/include/asm/cpacf.h                |  551 +++
 arch/s390/include/asm/cpcmd.h                |   32 +
 arch/s390/include/asm/cpu.h                  |   28 +
 arch/s390/include/asm/cpu_mf-insn.h          |   22 +
 arch/s390/include/asm/cpu_mf.h               |  277 ++
 arch/s390/include/asm/cpufeature.h           |   23 +
 arch/s390/include/asm/cputime.h              |   21 +
 arch/s390/include/asm/crw.h                  |   55 +
 arch/s390/include/asm/css_chars.h            |   47 +
 arch/s390/include/asm/ctl_reg.h              |  146 +
 arch/s390/include/asm/current.h              |   19 +
 arch/s390/include/asm/debug.h                |  490 +++
 arch/s390/include/asm/delay.h                |   24 +
 arch/s390/include/asm/diag.h                 |  351 ++
 arch/s390/include/asm/dis.h                  |   30 +
 arch/s390/include/asm/dma.h                  |   14 +
 arch/s390/include/asm/dwarf.h                |   37 +
 arch/s390/include/asm/eadm.h                 |  120 +
 arch/s390/include/asm/ebcdic.h               |   47 +
 arch/s390/include/asm/elf.h                  |  302 ++
 arch/s390/include/asm/entry-common.h         |   63 +
 arch/s390/include/asm/exec.h                 |   13 +
 arch/s390/include/asm/extable.h              |   72 +
 arch/s390/include/asm/extmem.h               |   32 +
 arch/s390/include/asm/facility.h             |  114 +
 arch/s390/include/asm/fcx.h                  |  312 ++
 arch/s390/include/asm/fpu/api.h              |  119 +
 arch/s390/include/asm/fpu/internal.h         |   62 +
 arch/s390/include/asm/fpu/types.h            |   38 +
 arch/s390/include/asm/ftrace.h               |  169 +
 arch/s390/include/asm/ftrace.lds.h           |   21 +
 arch/s390/include/asm/futex.h                |   81 +
 arch/s390/include/asm/gmap.h                 |  188 +
 arch/s390/include/asm/hardirq.h              |   28 +
 arch/s390/include/asm/hugetlb.h              |  147 +
 arch/s390/include/asm/hw_irq.h               |   11 +
 arch/s390/include/asm/idals.h                |  244 ++
 arch/s390/include/asm/idle.h                 |   27 +
 arch/s390/include/asm/io.h                   |   80 +
 arch/s390/include/asm/ipl.h                  |  170 +
 arch/s390/include/asm/irq.h                  |  123 +
 arch/s390/include/asm/irq_work.h             |   12 +
 arch/s390/include/asm/irqflags.h             |   78 +
 arch/s390/include/asm/isc.h                  |   31 +
 arch/s390/include/asm/itcw.h                 |   31 +
 arch/s390/include/asm/jump_label.h           |   55 +
 arch/s390/include/asm/kasan.h                |   18 +
 arch/s390/include/asm/kdebug.h               |   28 +
 arch/s390/include/asm/kexec.h                |  110 +
 arch/s390/include/asm/kfence.h               |   42 +
 arch/s390/include/asm/kprobes.h              |   81 +
 arch/s390/include/asm/kvm_host.h             | 1060 +++++
 arch/s390/include/asm/kvm_para.h             |  123 +
 arch/s390/include/asm/linkage.h              |   10 +
 arch/s390/include/asm/lowcore.h              |  232 +
 arch/s390/include/asm/maccess.h              |   20 +
 arch/s390/include/asm/mem_encrypt.h          |   12 +
 arch/s390/include/asm/mmu.h                  |   45 +
 arch/s390/include/asm/mmu_context.h          |  128 +
 arch/s390/include/asm/mmzone.h               |   17 +
 arch/s390/include/asm/module.h               |   41 +
 arch/s390/include/asm/msi.h                  |   17 +
 arch/s390/include/asm/nmi.h                  |  108 +
 arch/s390/include/asm/nospec-branch.h        |   22 +
 arch/s390/include/asm/nospec-insn.h          |  132 +
 arch/s390/include/asm/numa.h                 |   25 +
 arch/s390/include/asm/os_info.h              |   54 +
 arch/s390/include/asm/page-states.h          |   21 +
 arch/s390/include/asm/page.h                 |  218 +
 arch/s390/include/asm/pai.h                  |   84 +
 arch/s390/include/asm/pci.h                  |  327 ++
 arch/s390/include/asm/pci_clp.h              |  217 +
 arch/s390/include/asm/pci_debug.h            |   30 +
 arch/s390/include/asm/pci_dma.h              |  198 +
 arch/s390/include/asm/pci_insn.h             |  159 +
 arch/s390/include/asm/pci_io.h               |  206 +
 arch/s390/include/asm/percpu.h               |  185 +
 arch/s390/include/asm/perf_event.h           |   81 +
 arch/s390/include/asm/pfault.h               |   26 +
 arch/s390/include/asm/pgalloc.h              |  157 +
 arch/s390/include/asm/pgtable.h              | 1872 ++++++++
 arch/s390/include/asm/physmem_info.h         |  172 +
 arch/s390/include/asm/pkey.h                 |   28 +
 arch/s390/include/asm/pnet.h                 |   15 +
 arch/s390/include/asm/preempt.h              |  139 +
 arch/s390/include/asm/processor.h            |  380 ++
 arch/s390/include/asm/ptdump.h               |   14 +
 arch/s390/include/asm/ptrace.h               |  263 ++
 arch/s390/include/asm/purgatory.h            |   17 +
 arch/s390/include/asm/qdio.h                 |  364 ++
 arch/s390/include/asm/runtime_instr.h        |   28 +
 arch/s390/include/asm/rwonce.h               |   31 +
 arch/s390/include/asm/schid.h                |   22 +
 arch/s390/include/asm/sclp.h                 |  164 +
 arch/s390/include/asm/scsw.h                 | 1050 +++++
 arch/s390/include/asm/seccomp.h              |   28 +
 arch/s390/include/asm/sections.h             |   29 +
 arch/s390/include/asm/set_memory.h           |   66 +
 arch/s390/include/asm/setup.h                |  161 +
 arch/s390/include/asm/signal.h               |   26 +
 arch/s390/include/asm/sigp.h                 |   72 +
 arch/s390/include/asm/smp.h                  |   70 +
 arch/s390/include/asm/softirq_stack.h        |   14 +
 arch/s390/include/asm/sparsemem.h            |    8 +
 arch/s390/include/asm/spinlock.h             |  148 +
 arch/s390/include/asm/spinlock_types.h       |   22 +
 arch/s390/include/asm/stacktrace.h           |  241 ++
 arch/s390/include/asm/stp.h                  |   98 +
 arch/s390/include/asm/string.h               |  206 +
 arch/s390/include/asm/switch_to.h            |   49 +
 arch/s390/include/asm/syscall.h              |  154 +
 arch/s390/include/asm/syscall_wrapper.h      |  140 +
 arch/s390/include/asm/sysinfo.h              |  201 +
 arch/s390/include/asm/text-patching.h        |   16 +
 arch/s390/include/asm/thread_info.h          |  106 +
 arch/s390/include/asm/timex.h                |  281 ++
 arch/s390/include/asm/tlb.h                  |  137 +
 arch/s390/include/asm/tlbflush.h             |  129 +
 arch/s390/include/asm/topology.h             |  101 +
 arch/s390/include/asm/tpi.h                  |   37 +
 arch/s390/include/asm/trace/diag.h           |   44 +
 arch/s390/include/asm/trace/zcrypt.h         |  123 +
 arch/s390/include/asm/types.h                |   19 +
 arch/s390/include/asm/uaccess.h              |  601 +++
 arch/s390/include/asm/unistd.h               |   40 +
 arch/s390/include/asm/unwind.h               |   98 +
 arch/s390/include/asm/uprobes.h              |   33 +
 arch/s390/include/asm/user.h                 |   71 +
 arch/s390/include/asm/uv.h                   |  517 +++
 arch/s390/include/asm/vdso.h                 |   34 +
 arch/s390/include/asm/vdso/clocksource.h     |    8 +
 arch/s390/include/asm/vdso/data.h            |   13 +
 arch/s390/include/asm/vdso/gettimeofday.h    |   63 +
 arch/s390/include/asm/vdso/processor.h       |    7 +
 arch/s390/include/asm/vdso/vsyscall.h        |   26 +
 arch/s390/include/asm/vmalloc.h              |    4 +
 arch/s390/include/asm/vmlinux.lds.h          |   33 +
 arch/s390/include/asm/vtime.h                |   21 +
 arch/s390/include/asm/vtimer.h               |   30 +
 arch/s390/include/asm/vx-insn-asm.h          |  681 +++
 arch/s390/include/asm/vx-insn.h              |   19 +
 arch/s390/include/asm/xor.h                  |   21 +
 arch/s390/include/uapi/asm/Kbuild            |    4 +
 arch/s390/include/uapi/asm/auxvec.h          |    9 +
 arch/s390/include/uapi/asm/bitsperlong.h     |   14 +
 arch/s390/include/uapi/asm/bpf_perf_event.h  |    9 +
 arch/s390/include/uapi/asm/byteorder.h       |    7 +
 arch/s390/include/uapi/asm/chpid.h           |   23 +
 arch/s390/include/uapi/asm/chsc.h            |  144 +
 arch/s390/include/uapi/asm/clp.h             |   29 +
 arch/s390/include/uapi/asm/cmb.h             |   54 +
 arch/s390/include/uapi/asm/dasd.h            |  354 ++
 arch/s390/include/uapi/asm/fs3270.h          |   25 +
 arch/s390/include/uapi/asm/guarded_storage.h |   78 +
 arch/s390/include/uapi/asm/hwctrset.h        |   51 +
 arch/s390/include/uapi/asm/hypfs.h           |   55 +
 arch/s390/include/uapi/asm/ioctls.h          |    9 +
 arch/s390/include/uapi/asm/ipcbuf.h          |   34 +
 arch/s390/include/uapi/asm/ipl.h             |  208 +
 arch/s390/include/uapi/asm/kvm.h             |  308 ++
 arch/s390/include/uapi/asm/kvm_para.h        |    8 +
 arch/s390/include/uapi/asm/kvm_perf.h        |   22 +
 arch/s390/include/uapi/asm/monwriter.h       |   32 +
 arch/s390/include/uapi/asm/perf_regs.h       |   44 +
 arch/s390/include/uapi/asm/pkey.h            |  452 ++
 arch/s390/include/uapi/asm/posix_types.h     |   58 +
 arch/s390/include/uapi/asm/ptrace.h          |  455 ++
 arch/s390/include/uapi/asm/qeth.h            |  116 +
 arch/s390/include/uapi/asm/raw3270.h         |   75 +
 arch/s390/include/uapi/asm/runtime_instr.h   |   74 +
 arch/s390/include/uapi/asm/schid.h           |   20 +
 arch/s390/include/uapi/asm/sclp_ctl.h        |   25 +
 arch/s390/include/uapi/asm/setup.h           |    1 +
 arch/s390/include/uapi/asm/sie.h             |  252 ++
 arch/s390/include/uapi/asm/sigcontext.h      |   85 +
 arch/s390/include/uapi/asm/signal.h          |  115 +
 arch/s390/include/uapi/asm/stat.h            |  104 +
 arch/s390/include/uapi/asm/statfs.h          |   51 +
 arch/s390/include/uapi/asm/sthyi.h           |    7 +
 arch/s390/include/uapi/asm/tape390.h         |  103 +
 arch/s390/include/uapi/asm/types.h           |   30 +
 arch/s390/include/uapi/asm/ucontext.h        |   41 +
 arch/s390/include/uapi/asm/unistd.h          |   17 +
 arch/s390/include/uapi/asm/uvdevice.h        |  102 +
 arch/s390/include/uapi/asm/virtio-ccw.h      |   18 +
 arch/s390/include/uapi/asm/vmcp.h            |   25 +
 arch/s390/include/uapi/asm/vtoc.h            |  214 +
 arch/s390/include/uapi/asm/zcrypt.h          |  373 ++
 arch/s390/kernel/.gitignore                  |    2 +
 arch/s390/kernel/Makefile                    |   85 +
 arch/s390/kernel/abs_lowcore.c               |   46 +
 arch/s390/kernel/alternative.c               |   75 +
 arch/s390/kernel/asm-offsets.c               |  190 +
 arch/s390/kernel/audit.c                     |   81 +
 arch/s390/kernel/audit.h                     |   16 +
 arch/s390/kernel/cache.c                     |  170 +
 arch/s390/kernel/cert_store.c                |  812 ++++
 arch/s390/kernel/compat_audit.c              |   48 +
 arch/s390/kernel/compat_linux.c              |  289 ++
 arch/s390/kernel/compat_linux.h              |  101 +
 arch/s390/kernel/compat_ptrace.h             |   64 +
 arch/s390/kernel/compat_signal.c             |  422 ++
 arch/s390/kernel/cpcmd.c                     |  114 +
 arch/s390/kernel/cpufeature.c                |   46 +
 arch/s390/kernel/crash_dump.c                |  652 +++
 arch/s390/kernel/debug.c                     | 1574 +++++++
 arch/s390/kernel/diag.c                      |  269 ++
 arch/s390/kernel/dis.c                       |  590 +++
 arch/s390/kernel/dumpstack.c                 |  224 +
 arch/s390/kernel/early.c                     |  320 ++
 arch/s390/kernel/early_printk.c              |   36 +
 arch/s390/kernel/earlypgm.S                  |   23 +
 arch/s390/kernel/ebcdic.c                    |  401 ++
 arch/s390/kernel/entry.S                     |  669 +++
 arch/s390/kernel/entry.h                     |   75 +
 arch/s390/kernel/fpu.c                       |  264 ++
 arch/s390/kernel/ftrace.c                    |  340 ++
 arch/s390/kernel/ftrace.h                    |   24 +
 arch/s390/kernel/guarded_storage.c           |  128 +
 arch/s390/kernel/head64.S                    |   40 +
 arch/s390/kernel/idle.c                      |   94 +
 arch/s390/kernel/ima_arch.c                  |   14 +
 arch/s390/kernel/ipl.c                       | 2522 +++++++++++
 arch/s390/kernel/ipl_vmparm.c                |   38 +
 arch/s390/kernel/irq.c                       |  402 ++
 arch/s390/kernel/jump_label.c                |   82 +
 arch/s390/kernel/kdebugfs.c                  |   14 +
 arch/s390/kernel/kexec_elf.c                 |  136 +
 arch/s390/kernel/kexec_image.c               |   65 +
 arch/s390/kernel/kprobes.c                   |  525 +++
 arch/s390/kernel/kprobes.h                   |    9 +
 arch/s390/kernel/kprobes_insn_page.S         |   22 +
 arch/s390/kernel/lgr.c                       |  187 +
 arch/s390/kernel/machine_kexec.c             |  286 ++
 arch/s390/kernel/machine_kexec_file.c        |  379 ++
 arch/s390/kernel/machine_kexec_reloc.c       |   56 +
 arch/s390/kernel/mcount.S                    |  195 +
 arch/s390/kernel/module.c                    |  576 +++
 arch/s390/kernel/nmi.c                       |  513 +++
 arch/s390/kernel/nospec-branch.c             |  156 +
 arch/s390/kernel/nospec-sysfs.c              |   23 +
 arch/s390/kernel/numa.c                      |   35 +
 arch/s390/kernel/os_info.c                   |  173 +
 arch/s390/kernel/perf_cpum_cf.c              | 1950 +++++++++
 arch/s390/kernel/perf_cpum_cf_events.c       |  909 ++++
 arch/s390/kernel/perf_cpum_sf.c              | 2280 ++++++++++
 arch/s390/kernel/perf_event.c                |  223 +
 arch/s390/kernel/perf_pai_crypto.c           |  698 +++
 arch/s390/kernel/perf_pai_ext.c              |  667 +++
 arch/s390/kernel/perf_regs.c                 |   68 +
 arch/s390/kernel/process.c                   |  240 ++
 arch/s390/kernel/processor.c                 |  366 ++
 arch/s390/kernel/ptrace.c                    | 1608 +++++++
 arch/s390/kernel/reipl.S                     |   81 +
 arch/s390/kernel/relocate_kernel.S           |   76 +
 arch/s390/kernel/rethook.c                   |   34 +
 arch/s390/kernel/rethook.h                   |    7 +
 arch/s390/kernel/runtime_instr.c             |  102 +
 arch/s390/kernel/setup.c                     | 1012 +++++
 arch/s390/kernel/signal.c                    |  534 +++
 arch/s390/kernel/smp.c                       | 1317 ++++++
 arch/s390/kernel/stacktrace.c                |   60 +
 arch/s390/kernel/sthyi.c                     |  517 +++
 arch/s390/kernel/syscall.c                   |  170 +
 arch/s390/kernel/syscalls/Makefile           |   45 +
 arch/s390/kernel/syscalls/syscall.tbl        |  457 ++
 arch/s390/kernel/syscalls/syscalltbl         |  232 +
 arch/s390/kernel/sysinfo.c                   |  570 +++
 arch/s390/kernel/text_amode31.S              |  159 +
 arch/s390/kernel/time.c                      |  944 +++++
 arch/s390/kernel/topology.c                  |  661 +++
 arch/s390/kernel/trace.c                     |   33 +
 arch/s390/kernel/traps.c                     |  403 ++
 arch/s390/kernel/unwind_bc.c                 |  178 +
 arch/s390/kernel/uprobes.c                   |  388 ++
 arch/s390/kernel/uv.c                        |  717 ++++
 arch/s390/kernel/vdso.c                      |  258 ++
 arch/s390/kernel/vdso32/.gitignore           |    2 +
 arch/s390/kernel/vdso32/Makefile             |   80 +
 arch/s390/kernel/vdso32/gen_vdso_offsets.sh  |   15 +
 arch/s390/kernel/vdso32/note.S               |   13 +
 arch/s390/kernel/vdso32/vdso32.lds.S         |  142 +
 arch/s390/kernel/vdso32/vdso32_wrapper.S     |   15 +
 arch/s390/kernel/vdso32/vdso_user_wrapper.S  |   22 +
 arch/s390/kernel/vdso64/.gitignore           |    2 +
 arch/s390/kernel/vdso64/Makefile             |   89 +
 arch/s390/kernel/vdso64/gen_vdso_offsets.sh  |   15 +
 arch/s390/kernel/vdso64/getcpu.c             |   21 +
 arch/s390/kernel/vdso64/note.S               |   13 +
 arch/s390/kernel/vdso64/vdso.h               |   14 +
 arch/s390/kernel/vdso64/vdso64.lds.S         |  146 +
 arch/s390/kernel/vdso64/vdso64_generic.c     |   19 +
 arch/s390/kernel/vdso64/vdso64_wrapper.S     |   15 +
 arch/s390/kernel/vdso64/vdso_user_wrapper.S  |   57 +
 arch/s390/kernel/vmlinux.lds.S               |  243 ++
 arch/s390/kernel/vtime.c                     |  454 ++
 arch/s390/kvm/Kconfig                        |   59 +
 arch/s390/kvm/Makefile                       |   14 +
 arch/s390/kvm/diag.c                         |  307 ++
 arch/s390/kvm/gaccess.c                      | 1624 +++++++
 arch/s390/kvm/gaccess.h                      |  458 ++
 arch/s390/kvm/guestdbg.c                     |  626 +++
 arch/s390/kvm/intercept.c                    |  667 +++
 arch/s390/kvm/interrupt.c                    | 3481 +++++++++++++++
 arch/s390/kvm/kvm-s390.c                     | 5895 ++++++++++++++++++++++++++
 arch/s390/kvm/kvm-s390.h                     |  524 +++
 arch/s390/kvm/pci.c                          |  704 +++
 arch/s390/kvm/pci.h                          |   87 +
 arch/s390/kvm/priv.c                         | 1573 +++++++
 arch/s390/kvm/pv.c                           |  894 ++++
 arch/s390/kvm/sigp.c                         |  495 +++
 arch/s390/kvm/trace-s390.h                   |  340 ++
 arch/s390/kvm/trace.h                        |  462 ++
 arch/s390/kvm/vsie.c                         | 1488 +++++++
 arch/s390/lib/Makefile                       |   25 +
 arch/s390/lib/delay.c                        |   45 +
 arch/s390/lib/error-inject.c                 |   14 +
 arch/s390/lib/expoline/Makefile              |    3 +
 arch/s390/lib/expoline/expoline.S            |   12 +
 arch/s390/lib/find.c                         |   76 +
 arch/s390/lib/mem.S                          |  197 +
 arch/s390/lib/probes.c                       |  161 +
 arch/s390/lib/spinlock.c                     |  324 ++
 arch/s390/lib/string.c                       |  348 ++
 arch/s390/lib/test_kprobes.c                 |   75 +
 arch/s390/lib/test_kprobes.h                 |   10 +
 arch/s390/lib/test_kprobes_asm.S             |   45 +
 arch/s390/lib/test_modules.c                 |   32 +
 arch/s390/lib/test_modules.h                 |   53 +
 arch/s390/lib/test_modules_helpers.c         |   13 +
 arch/s390/lib/test_unwind.c                  |  522 +++
 arch/s390/lib/tishift.S                      |   63 +
 arch/s390/lib/uaccess.c                      |  185 +
 arch/s390/lib/xor.c                          |  140 +
 arch/s390/mm/Makefile                        |   13 +
 arch/s390/mm/cmm.c                           |  431 ++
 arch/s390/mm/dump_pagetables.c               |  315 ++
 arch/s390/mm/extable.c                       |   86 +
 arch/s390/mm/extmem.c                        |  660 +++
 arch/s390/mm/fault.c                         |  708 ++++
 arch/s390/mm/gmap.c                          | 2894 +++++++++++++
 arch/s390/mm/hugetlbpage.c                   |  342 ++
 arch/s390/mm/init.c                          |  313 ++
 arch/s390/mm/maccess.c                       |  193 +
 arch/s390/mm/mmap.c                          |  210 +
 arch/s390/mm/page-states.c                   |  226 +
 arch/s390/mm/pageattr.c                      |  452 ++
 arch/s390/mm/pfault.c                        |  248 ++
 arch/s390/mm/pgalloc.c                       |  757 ++++
 arch/s390/mm/pgtable.c                       | 1207 ++++++
 arch/s390/mm/vmem.c                          |  669 +++
 arch/s390/net/Makefile                       |    6 +
 arch/s390/net/bpf_jit.h                      |   55 +
 arch/s390/net/bpf_jit_comp.c                 | 2537 +++++++++++
 arch/s390/net/pnet.c                         |   91 +
 arch/s390/pci/Makefile                       |    9 +
 arch/s390/pci/pci.c                          | 1135 +++++
 arch/s390/pci/pci_bus.c                      |  379 ++
 arch/s390/pci/pci_bus.h                      |   42 +
 arch/s390/pci/pci_clp.c                      |  669 +++
 arch/s390/pci/pci_debug.c                    |  210 +
 arch/s390/pci/pci_dma.c                      |  746 ++++
 arch/s390/pci/pci_event.c                    |  389 ++
 arch/s390/pci/pci_insn.c                     |  443 ++
 arch/s390/pci/pci_iov.c                      |   99 +
 arch/s390/pci/pci_iov.h                      |   30 +
 arch/s390/pci/pci_irq.c                      |  530 +++
 arch/s390/pci/pci_kvm_hook.c                 |   11 +
 arch/s390/pci/pci_mmio.c                     |  335 ++
 arch/s390/pci/pci_sysfs.c                    |  239 ++
 arch/s390/purgatory/.gitignore               |    5 +
 arch/s390/purgatory/Makefile                 |   54 +
 arch/s390/purgatory/head.S                   |  265 ++
 arch/s390/purgatory/kexec-purgatory.S        |   12 +
 arch/s390/purgatory/purgatory.c              |   33 +
 arch/s390/purgatory/purgatory.lds.S          |   54 +
 arch/s390/purgatory/string.c                 |    3 +
 arch/s390/tools/.gitignore                   |    3 +
 arch/s390/tools/Makefile                     |   27 +
 arch/s390/tools/gcc-thunk-extern.sh          |   24 +
 arch/s390/tools/gen_facilities.c             |  168 +
 arch/s390/tools/gen_opcode_table.c           |  337 ++
 arch/s390/tools/opcodes.txt                  | 1250 ++++++
 490 files changed, 118141 insertions(+)
 create mode 100644 arch/s390/Kbuild
 create mode 100644 arch/s390/Kconfig
 create mode 100644 arch/s390/Kconfig.debug
 create mode 100644 arch/s390/Makefile
 create mode 100644 arch/s390/appldata/Makefile
 create mode 100644 arch/s390/appldata/appldata.h
 create mode 100644 arch/s390/appldata/appldata_base.c
 create mode 100644 arch/s390/appldata/appldata_mem.c
 create mode 100644 arch/s390/appldata/appldata_net_sum.c
 create mode 100644 arch/s390/appldata/appldata_os.c
 create mode 100644 arch/s390/boot/.gitignore
 create mode 100644 arch/s390/boot/Makefile
 create mode 100644 arch/s390/boot/als.c
 create mode 100644 arch/s390/boot/boot.h
 create mode 100644 arch/s390/boot/clz_ctz.c
 create mode 100644 arch/s390/boot/cmdline.c
 create mode 100644 arch/s390/boot/ctype.c
 create mode 100644 arch/s390/boot/decompressor.c
 create mode 100644 arch/s390/boot/decompressor.h
 create mode 100644 arch/s390/boot/ebcdic.c
 create mode 100644 arch/s390/boot/head.S
 create mode 100644 arch/s390/boot/head_kdump.S
 create mode 100755 arch/s390/boot/install.sh
 create mode 100644 arch/s390/boot/ipl_data.c
 create mode 100644 arch/s390/boot/ipl_parm.c
 create mode 100644 arch/s390/boot/ipl_report.c
 create mode 100644 arch/s390/boot/ipl_vmparm.c
 create mode 100644 arch/s390/boot/kaslr.c
 create mode 100644 arch/s390/boot/machine_kexec_reloc.c
 create mode 100644 arch/s390/boot/mem.S
 create mode 100644 arch/s390/boot/pgm_check_info.c
 create mode 100644 arch/s390/boot/physmem_info.c
 create mode 100644 arch/s390/boot/sclp_early_core.c
 create mode 100644 arch/s390/boot/startup.c
 create mode 100644 arch/s390/boot/string.c
 create mode 100644 arch/s390/boot/uv.c
 create mode 100644 arch/s390/boot/uv.h
 create mode 100644 arch/s390/boot/version.c
 create mode 100644 arch/s390/boot/vmem.c
 create mode 100644 arch/s390/boot/vmlinux.lds.S
 create mode 100644 arch/s390/configs/btf.config
 create mode 100644 arch/s390/configs/debug_defconfig
 create mode 100644 arch/s390/configs/defconfig
 create mode 100644 arch/s390/configs/kasan.config
 create mode 100644 arch/s390/configs/zfcpdump_defconfig
 create mode 100644 arch/s390/crypto/Kconfig
 create mode 100644 arch/s390/crypto/Makefile
 create mode 100644 arch/s390/crypto/aes_s390.c
 create mode 100644 arch/s390/crypto/arch_random.c
 create mode 100644 arch/s390/crypto/chacha-glue.c
 create mode 100644 arch/s390/crypto/chacha-s390.S
 create mode 100644 arch/s390/crypto/chacha-s390.h
 create mode 100644 arch/s390/crypto/crc32-vx.c
 create mode 100644 arch/s390/crypto/crc32be-vx.S
 create mode 100644 arch/s390/crypto/crc32le-vx.S
 create mode 100644 arch/s390/crypto/des_s390.c
 create mode 100644 arch/s390/crypto/ghash_s390.c
 create mode 100644 arch/s390/crypto/paes_s390.c
 create mode 100644 arch/s390/crypto/prng.c
 create mode 100644 arch/s390/crypto/sha.h
 create mode 100644 arch/s390/crypto/sha1_s390.c
 create mode 100644 arch/s390/crypto/sha256_s390.c
 create mode 100644 arch/s390/crypto/sha3_256_s390.c
 create mode 100644 arch/s390/crypto/sha3_512_s390.c
 create mode 100644 arch/s390/crypto/sha512_s390.c
 create mode 100644 arch/s390/crypto/sha_common.c
 create mode 100644 arch/s390/hypfs/Makefile
 create mode 100644 arch/s390/hypfs/hypfs.h
 create mode 100644 arch/s390/hypfs/hypfs_dbfs.c
 create mode 100644 arch/s390/hypfs/hypfs_diag.c
 create mode 100644 arch/s390/hypfs/hypfs_diag.h
 create mode 100644 arch/s390/hypfs/hypfs_diag0c.c
 create mode 100644 arch/s390/hypfs/hypfs_diag_fs.c
 create mode 100644 arch/s390/hypfs/hypfs_sprp.c
 create mode 100644 arch/s390/hypfs/hypfs_vm.c
 create mode 100644 arch/s390/hypfs/hypfs_vm.h
 create mode 100644 arch/s390/hypfs/hypfs_vm_fs.c
 create mode 100644 arch/s390/hypfs/inode.c
 create mode 100644 arch/s390/include/asm/Kbuild
 create mode 100644 arch/s390/include/asm/abs_lowcore.h
 create mode 100644 arch/s390/include/asm/airq.h
 create mode 100644 arch/s390/include/asm/alternative-asm.h
 create mode 100644 arch/s390/include/asm/alternative.h
 create mode 100644 arch/s390/include/asm/ap.h
 create mode 100644 arch/s390/include/asm/appldata.h
 create mode 100644 arch/s390/include/asm/archrandom.h
 create mode 100644 arch/s390/include/asm/asm-const.h
 create mode 100644 arch/s390/include/asm/asm-extable.h
 create mode 100644 arch/s390/include/asm/asm-prototypes.h
 create mode 100644 arch/s390/include/asm/atomic.h
 create mode 100644 arch/s390/include/asm/atomic_ops.h
 create mode 100644 arch/s390/include/asm/barrier.h
 create mode 100644 arch/s390/include/asm/bitops.h
 create mode 100644 arch/s390/include/asm/boot_data.h
 create mode 100644 arch/s390/include/asm/bug.h
 create mode 100644 arch/s390/include/asm/cache.h
 create mode 100644 arch/s390/include/asm/ccwdev.h
 create mode 100644 arch/s390/include/asm/ccwgroup.h
 create mode 100644 arch/s390/include/asm/checksum.h
 create mode 100644 arch/s390/include/asm/chpid.h
 create mode 100644 arch/s390/include/asm/chsc.h
 create mode 100644 arch/s390/include/asm/cio.h
 create mode 100644 arch/s390/include/asm/clocksource.h
 create mode 100644 arch/s390/include/asm/clp.h
 create mode 100644 arch/s390/include/asm/cmb.h
 create mode 100644 arch/s390/include/asm/cmpxchg.h
 create mode 100644 arch/s390/include/asm/compat.h
 create mode 100644 arch/s390/include/asm/cpacf.h
 create mode 100644 arch/s390/include/asm/cpcmd.h
 create mode 100644 arch/s390/include/asm/cpu.h
 create mode 100644 arch/s390/include/asm/cpu_mf-insn.h
 create mode 100644 arch/s390/include/asm/cpu_mf.h
 create mode 100644 arch/s390/include/asm/cpufeature.h
 create mode 100644 arch/s390/include/asm/cputime.h
 create mode 100644 arch/s390/include/asm/crw.h
 create mode 100644 arch/s390/include/asm/css_chars.h
 create mode 100644 arch/s390/include/asm/ctl_reg.h
 create mode 100644 arch/s390/include/asm/current.h
 create mode 100644 arch/s390/include/asm/debug.h
 create mode 100644 arch/s390/include/asm/delay.h
 create mode 100644 arch/s390/include/asm/diag.h
 create mode 100644 arch/s390/include/asm/dis.h
 create mode 100644 arch/s390/include/asm/dma.h
 create mode 100644 arch/s390/include/asm/dwarf.h
 create mode 100644 arch/s390/include/asm/eadm.h
 create mode 100644 arch/s390/include/asm/ebcdic.h
 create mode 100644 arch/s390/include/asm/elf.h
 create mode 100644 arch/s390/include/asm/entry-common.h
 create mode 100644 arch/s390/include/asm/exec.h
 create mode 100644 arch/s390/include/asm/extable.h
 create mode 100644 arch/s390/include/asm/extmem.h
 create mode 100644 arch/s390/include/asm/facility.h
 create mode 100644 arch/s390/include/asm/fcx.h
 create mode 100644 arch/s390/include/asm/fpu/api.h
 create mode 100644 arch/s390/include/asm/fpu/internal.h
 create mode 100644 arch/s390/include/asm/fpu/types.h
 create mode 100644 arch/s390/include/asm/ftrace.h
 create mode 100644 arch/s390/include/asm/ftrace.lds.h
 create mode 100644 arch/s390/include/asm/futex.h
 create mode 100644 arch/s390/include/asm/gmap.h
 create mode 100644 arch/s390/include/asm/hardirq.h
 create mode 100644 arch/s390/include/asm/hugetlb.h
 create mode 100644 arch/s390/include/asm/hw_irq.h
 create mode 100644 arch/s390/include/asm/idals.h
 create mode 100644 arch/s390/include/asm/idle.h
 create mode 100644 arch/s390/include/asm/io.h
 create mode 100644 arch/s390/include/asm/ipl.h
 create mode 100644 arch/s390/include/asm/irq.h
 create mode 100644 arch/s390/include/asm/irq_work.h
 create mode 100644 arch/s390/include/asm/irqflags.h
 create mode 100644 arch/s390/include/asm/isc.h
 create mode 100644 arch/s390/include/asm/itcw.h
 create mode 100644 arch/s390/include/asm/jump_label.h
 create mode 100644 arch/s390/include/asm/kasan.h
 create mode 100644 arch/s390/include/asm/kdebug.h
 create mode 100644 arch/s390/include/asm/kexec.h
 create mode 100644 arch/s390/include/asm/kfence.h
 create mode 100644 arch/s390/include/asm/kprobes.h
 create mode 100644 arch/s390/include/asm/kvm_host.h
 create mode 100644 arch/s390/include/asm/kvm_para.h
 create mode 100644 arch/s390/include/asm/linkage.h
 create mode 100644 arch/s390/include/asm/lowcore.h
 create mode 100644 arch/s390/include/asm/maccess.h
 create mode 100644 arch/s390/include/asm/mem_encrypt.h
 create mode 100644 arch/s390/include/asm/mmu.h
 create mode 100644 arch/s390/include/asm/mmu_context.h
 create mode 100644 arch/s390/include/asm/mmzone.h
 create mode 100644 arch/s390/include/asm/module.h
 create mode 100644 arch/s390/include/asm/msi.h
 create mode 100644 arch/s390/include/asm/nmi.h
 create mode 100644 arch/s390/include/asm/nospec-branch.h
 create mode 100644 arch/s390/include/asm/nospec-insn.h
 create mode 100644 arch/s390/include/asm/numa.h
 create mode 100644 arch/s390/include/asm/os_info.h
 create mode 100644 arch/s390/include/asm/page-states.h
 create mode 100644 arch/s390/include/asm/page.h
 create mode 100644 arch/s390/include/asm/pai.h
 create mode 100644 arch/s390/include/asm/pci.h
 create mode 100644 arch/s390/include/asm/pci_clp.h
 create mode 100644 arch/s390/include/asm/pci_debug.h
 create mode 100644 arch/s390/include/asm/pci_dma.h
 create mode 100644 arch/s390/include/asm/pci_insn.h
 create mode 100644 arch/s390/include/asm/pci_io.h
 create mode 100644 arch/s390/include/asm/percpu.h
 create mode 100644 arch/s390/include/asm/perf_event.h
 create mode 100644 arch/s390/include/asm/pfault.h
 create mode 100644 arch/s390/include/asm/pgalloc.h
 create mode 100644 arch/s390/include/asm/pgtable.h
 create mode 100644 arch/s390/include/asm/physmem_info.h
 create mode 100644 arch/s390/include/asm/pkey.h
 create mode 100644 arch/s390/include/asm/pnet.h
 create mode 100644 arch/s390/include/asm/preempt.h
 create mode 100644 arch/s390/include/asm/processor.h
 create mode 100644 arch/s390/include/asm/ptdump.h
 create mode 100644 arch/s390/include/asm/ptrace.h
 create mode 100644 arch/s390/include/asm/purgatory.h
 create mode 100644 arch/s390/include/asm/qdio.h
 create mode 100644 arch/s390/include/asm/runtime_instr.h
 create mode 100644 arch/s390/include/asm/rwonce.h
 create mode 100644 arch/s390/include/asm/schid.h
 create mode 100644 arch/s390/include/asm/sclp.h
 create mode 100644 arch/s390/include/asm/scsw.h
 create mode 100644 arch/s390/include/asm/seccomp.h
 create mode 100644 arch/s390/include/asm/sections.h
 create mode 100644 arch/s390/include/asm/set_memory.h
 create mode 100644 arch/s390/include/asm/setup.h
 create mode 100644 arch/s390/include/asm/signal.h
 create mode 100644 arch/s390/include/asm/sigp.h
 create mode 100644 arch/s390/include/asm/smp.h
 create mode 100644 arch/s390/include/asm/softirq_stack.h
 create mode 100644 arch/s390/include/asm/sparsemem.h
 create mode 100644 arch/s390/include/asm/spinlock.h
 create mode 100644 arch/s390/include/asm/spinlock_types.h
 create mode 100644 arch/s390/include/asm/stacktrace.h
 create mode 100644 arch/s390/include/asm/stp.h
 create mode 100644 arch/s390/include/asm/string.h
 create mode 100644 arch/s390/include/asm/switch_to.h
 create mode 100644 arch/s390/include/asm/syscall.h
 create mode 100644 arch/s390/include/asm/syscall_wrapper.h
 create mode 100644 arch/s390/include/asm/sysinfo.h
 create mode 100644 arch/s390/include/asm/text-patching.h
 create mode 100644 arch/s390/include/asm/thread_info.h
 create mode 100644 arch/s390/include/asm/timex.h
 create mode 100644 arch/s390/include/asm/tlb.h
 create mode 100644 arch/s390/include/asm/tlbflush.h
 create mode 100644 arch/s390/include/asm/topology.h
 create mode 100644 arch/s390/include/asm/tpi.h
 create mode 100644 arch/s390/include/asm/trace/diag.h
 create mode 100644 arch/s390/include/asm/trace/zcrypt.h
 create mode 100644 arch/s390/include/asm/types.h
 create mode 100644 arch/s390/include/asm/uaccess.h
 create mode 100644 arch/s390/include/asm/unistd.h
 create mode 100644 arch/s390/include/asm/unwind.h
 create mode 100644 arch/s390/include/asm/uprobes.h
 create mode 100644 arch/s390/include/asm/user.h
 create mode 100644 arch/s390/include/asm/uv.h
 create mode 100644 arch/s390/include/asm/vdso.h
 create mode 100644 arch/s390/include/asm/vdso/clocksource.h
 create mode 100644 arch/s390/include/asm/vdso/data.h
 create mode 100644 arch/s390/include/asm/vdso/gettimeofday.h
 create mode 100644 arch/s390/include/asm/vdso/processor.h
 create mode 100644 arch/s390/include/asm/vdso/vsyscall.h
 create mode 100644 arch/s390/include/asm/vmalloc.h
 create mode 100644 arch/s390/include/asm/vmlinux.lds.h
 create mode 100644 arch/s390/include/asm/vtime.h
 create mode 100644 arch/s390/include/asm/vtimer.h
 create mode 100644 arch/s390/include/asm/vx-insn-asm.h
 create mode 100644 arch/s390/include/asm/vx-insn.h
 create mode 100644 arch/s390/include/asm/xor.h
 create mode 100644 arch/s390/include/uapi/asm/Kbuild
 create mode 100644 arch/s390/include/uapi/asm/auxvec.h
 create mode 100644 arch/s390/include/uapi/asm/bitsperlong.h
 create mode 100644 arch/s390/include/uapi/asm/bpf_perf_event.h
 create mode 100644 arch/s390/include/uapi/asm/byteorder.h
 create mode 100644 arch/s390/include/uapi/asm/chpid.h
 create mode 100644 arch/s390/include/uapi/asm/chsc.h
 create mode 100644 arch/s390/include/uapi/asm/clp.h
 create mode 100644 arch/s390/include/uapi/asm/cmb.h
 create mode 100644 arch/s390/include/uapi/asm/dasd.h
 create mode 100644 arch/s390/include/uapi/asm/fs3270.h
 create mode 100644 arch/s390/include/uapi/asm/guarded_storage.h
 create mode 100644 arch/s390/include/uapi/asm/hwctrset.h
 create mode 100644 arch/s390/include/uapi/asm/hypfs.h
 create mode 100644 arch/s390/include/uapi/asm/ioctls.h
 create mode 100644 arch/s390/include/uapi/asm/ipcbuf.h
 create mode 100644 arch/s390/include/uapi/asm/ipl.h
 create mode 100644 arch/s390/include/uapi/asm/kvm.h
 create mode 100644 arch/s390/include/uapi/asm/kvm_para.h
 create mode 100644 arch/s390/include/uapi/asm/kvm_perf.h
 create mode 100644 arch/s390/include/uapi/asm/monwriter.h
 create mode 100644 arch/s390/include/uapi/asm/perf_regs.h
 create mode 100644 arch/s390/include/uapi/asm/pkey.h
 create mode 100644 arch/s390/include/uapi/asm/posix_types.h
 create mode 100644 arch/s390/include/uapi/asm/ptrace.h
 create mode 100644 arch/s390/include/uapi/asm/qeth.h
 create mode 100644 arch/s390/include/uapi/asm/raw3270.h
 create mode 100644 arch/s390/include/uapi/asm/runtime_instr.h
 create mode 100644 arch/s390/include/uapi/asm/schid.h
 create mode 100644 arch/s390/include/uapi/asm/sclp_ctl.h
 create mode 100644 arch/s390/include/uapi/asm/setup.h
 create mode 100644 arch/s390/include/uapi/asm/sie.h
 create mode 100644 arch/s390/include/uapi/asm/sigcontext.h
 create mode 100644 arch/s390/include/uapi/asm/signal.h
 create mode 100644 arch/s390/include/uapi/asm/stat.h
 create mode 100644 arch/s390/include/uapi/asm/statfs.h
 create mode 100644 arch/s390/include/uapi/asm/sthyi.h
 create mode 100644 arch/s390/include/uapi/asm/tape390.h
 create mode 100644 arch/s390/include/uapi/asm/types.h
 create mode 100644 arch/s390/include/uapi/asm/ucontext.h
 create mode 100644 arch/s390/include/uapi/asm/unistd.h
 create mode 100644 arch/s390/include/uapi/asm/uvdevice.h
 create mode 100644 arch/s390/include/uapi/asm/virtio-ccw.h
 create mode 100644 arch/s390/include/uapi/asm/vmcp.h
 create mode 100644 arch/s390/include/uapi/asm/vtoc.h
 create mode 100644 arch/s390/include/uapi/asm/zcrypt.h
 create mode 100644 arch/s390/kernel/.gitignore
 create mode 100644 arch/s390/kernel/Makefile
 create mode 100644 arch/s390/kernel/abs_lowcore.c
 create mode 100644 arch/s390/kernel/alternative.c
 create mode 100644 arch/s390/kernel/asm-offsets.c
 create mode 100644 arch/s390/kernel/audit.c
 create mode 100644 arch/s390/kernel/audit.h
 create mode 100644 arch/s390/kernel/cache.c
 create mode 100644 arch/s390/kernel/cert_store.c
 create mode 100644 arch/s390/kernel/compat_audit.c
 create mode 100644 arch/s390/kernel/compat_linux.c
 create mode 100644 arch/s390/kernel/compat_linux.h
 create mode 100644 arch/s390/kernel/compat_ptrace.h
 create mode 100644 arch/s390/kernel/compat_signal.c
 create mode 100644 arch/s390/kernel/cpcmd.c
 create mode 100644 arch/s390/kernel/cpufeature.c
 create mode 100644 arch/s390/kernel/crash_dump.c
 create mode 100644 arch/s390/kernel/debug.c
 create mode 100644 arch/s390/kernel/diag.c
 create mode 100644 arch/s390/kernel/dis.c
 create mode 100644 arch/s390/kernel/dumpstack.c
 create mode 100644 arch/s390/kernel/early.c
 create mode 100644 arch/s390/kernel/early_printk.c
 create mode 100644 arch/s390/kernel/earlypgm.S
 create mode 100644 arch/s390/kernel/ebcdic.c
 create mode 100644 arch/s390/kernel/entry.S
 create mode 100644 arch/s390/kernel/entry.h
 create mode 100644 arch/s390/kernel/fpu.c
 create mode 100644 arch/s390/kernel/ftrace.c
 create mode 100644 arch/s390/kernel/ftrace.h
 create mode 100644 arch/s390/kernel/guarded_storage.c
 create mode 100644 arch/s390/kernel/head64.S
 create mode 100644 arch/s390/kernel/idle.c
 create mode 100644 arch/s390/kernel/ima_arch.c
 create mode 100644 arch/s390/kernel/ipl.c
 create mode 100644 arch/s390/kernel/ipl_vmparm.c
 create mode 100644 arch/s390/kernel/irq.c
 create mode 100644 arch/s390/kernel/jump_label.c
 create mode 100644 arch/s390/kernel/kdebugfs.c
 create mode 100644 arch/s390/kernel/kexec_elf.c
 create mode 100644 arch/s390/kernel/kexec_image.c
 create mode 100644 arch/s390/kernel/kprobes.c
 create mode 100644 arch/s390/kernel/kprobes.h
 create mode 100644 arch/s390/kernel/kprobes_insn_page.S
 create mode 100644 arch/s390/kernel/lgr.c
 create mode 100644 arch/s390/kernel/machine_kexec.c
 create mode 100644 arch/s390/kernel/machine_kexec_file.c
 create mode 100644 arch/s390/kernel/machine_kexec_reloc.c
 create mode 100644 arch/s390/kernel/mcount.S
 create mode 100644 arch/s390/kernel/module.c
 create mode 100644 arch/s390/kernel/nmi.c
 create mode 100644 arch/s390/kernel/nospec-branch.c
 create mode 100644 arch/s390/kernel/nospec-sysfs.c
 create mode 100644 arch/s390/kernel/numa.c
 create mode 100644 arch/s390/kernel/os_info.c
 create mode 100644 arch/s390/kernel/perf_cpum_cf.c
 create mode 100644 arch/s390/kernel/perf_cpum_cf_events.c
 create mode 100644 arch/s390/kernel/perf_cpum_sf.c
 create mode 100644 arch/s390/kernel/perf_event.c
 create mode 100644 arch/s390/kernel/perf_pai_crypto.c
 create mode 100644 arch/s390/kernel/perf_pai_ext.c
 create mode 100644 arch/s390/kernel/perf_regs.c
 create mode 100644 arch/s390/kernel/process.c
 create mode 100644 arch/s390/kernel/processor.c
 create mode 100644 arch/s390/kernel/ptrace.c
 create mode 100644 arch/s390/kernel/reipl.S
 create mode 100644 arch/s390/kernel/relocate_kernel.S
 create mode 100644 arch/s390/kernel/rethook.c
 create mode 100644 arch/s390/kernel/rethook.h
 create mode 100644 arch/s390/kernel/runtime_instr.c
 create mode 100644 arch/s390/kernel/setup.c
 create mode 100644 arch/s390/kernel/signal.c
 create mode 100644 arch/s390/kernel/smp.c
 create mode 100644 arch/s390/kernel/stacktrace.c
 create mode 100644 arch/s390/kernel/sthyi.c
 create mode 100644 arch/s390/kernel/syscall.c
 create mode 100644 arch/s390/kernel/syscalls/Makefile
 create mode 100644 arch/s390/kernel/syscalls/syscall.tbl
 create mode 100755 arch/s390/kernel/syscalls/syscalltbl
 create mode 100644 arch/s390/kernel/sysinfo.c
 create mode 100644 arch/s390/kernel/text_amode31.S
 create mode 100644 arch/s390/kernel/time.c
 create mode 100644 arch/s390/kernel/topology.c
 create mode 100644 arch/s390/kernel/trace.c
 create mode 100644 arch/s390/kernel/traps.c
 create mode 100644 arch/s390/kernel/unwind_bc.c
 create mode 100644 arch/s390/kernel/uprobes.c
 create mode 100644 arch/s390/kernel/uv.c
 create mode 100644 arch/s390/kernel/vdso.c
 create mode 100644 arch/s390/kernel/vdso32/.gitignore
 create mode 100644 arch/s390/kernel/vdso32/Makefile
 create mode 100755 arch/s390/kernel/vdso32/gen_vdso_offsets.sh
 create mode 100644 arch/s390/kernel/vdso32/note.S
 create mode 100644 arch/s390/kernel/vdso32/vdso32.lds.S
 create mode 100644 arch/s390/kernel/vdso32/vdso32_wrapper.S
 create mode 100644 arch/s390/kernel/vdso32/vdso_user_wrapper.S
 create mode 100644 arch/s390/kernel/vdso64/.gitignore
 create mode 100644 arch/s390/kernel/vdso64/Makefile
 create mode 100755 arch/s390/kernel/vdso64/gen_vdso_offsets.sh
 create mode 100644 arch/s390/kernel/vdso64/getcpu.c
 create mode 100644 arch/s390/kernel/vdso64/note.S
 create mode 100644 arch/s390/kernel/vdso64/vdso.h
 create mode 100644 arch/s390/kernel/vdso64/vdso64.lds.S
 create mode 100644 arch/s390/kernel/vdso64/vdso64_generic.c
 create mode 100644 arch/s390/kernel/vdso64/vdso64_wrapper.S
 create mode 100644 arch/s390/kernel/vdso64/vdso_user_wrapper.S
 create mode 100644 arch/s390/kernel/vmlinux.lds.S
 create mode 100644 arch/s390/kernel/vtime.c
 create mode 100644 arch/s390/kvm/Kconfig
 create mode 100644 arch/s390/kvm/Makefile
 create mode 100644 arch/s390/kvm/diag.c
 create mode 100644 arch/s390/kvm/gaccess.c
 create mode 100644 arch/s390/kvm/gaccess.h
 create mode 100644 arch/s390/kvm/guestdbg.c
 create mode 100644 arch/s390/kvm/intercept.c
 create mode 100644 arch/s390/kvm/interrupt.c
 create mode 100644 arch/s390/kvm/kvm-s390.c
 create mode 100644 arch/s390/kvm/kvm-s390.h
 create mode 100644 arch/s390/kvm/pci.c
 create mode 100644 arch/s390/kvm/pci.h
 create mode 100644 arch/s390/kvm/priv.c
 create mode 100644 arch/s390/kvm/pv.c
 create mode 100644 arch/s390/kvm/sigp.c
 create mode 100644 arch/s390/kvm/trace-s390.h
 create mode 100644 arch/s390/kvm/trace.h
 create mode 100644 arch/s390/kvm/vsie.c
 create mode 100644 arch/s390/lib/Makefile
 create mode 100644 arch/s390/lib/delay.c
 create mode 100644 arch/s390/lib/error-inject.c
 create mode 100644 arch/s390/lib/expoline/Makefile
 create mode 100644 arch/s390/lib/expoline/expoline.S
 create mode 100644 arch/s390/lib/find.c
 create mode 100644 arch/s390/lib/mem.S
 create mode 100644 arch/s390/lib/probes.c
 create mode 100644 arch/s390/lib/spinlock.c
 create mode 100644 arch/s390/lib/string.c
 create mode 100644 arch/s390/lib/test_kprobes.c
 create mode 100644 arch/s390/lib/test_kprobes.h
 create mode 100644 arch/s390/lib/test_kprobes_asm.S
 create mode 100644 arch/s390/lib/test_modules.c
 create mode 100644 arch/s390/lib/test_modules.h
 create mode 100644 arch/s390/lib/test_modules_helpers.c
 create mode 100644 arch/s390/lib/test_unwind.c
 create mode 100644 arch/s390/lib/tishift.S
 create mode 100644 arch/s390/lib/uaccess.c
 create mode 100644 arch/s390/lib/xor.c
 create mode 100644 arch/s390/mm/Makefile
 create mode 100644 arch/s390/mm/cmm.c
 create mode 100644 arch/s390/mm/dump_pagetables.c
 create mode 100644 arch/s390/mm/extable.c
 create mode 100644 arch/s390/mm/extmem.c
 create mode 100644 arch/s390/mm/fault.c
 create mode 100644 arch/s390/mm/gmap.c
 create mode 100644 arch/s390/mm/hugetlbpage.c
 create mode 100644 arch/s390/mm/init.c
 create mode 100644 arch/s390/mm/maccess.c
 create mode 100644 arch/s390/mm/mmap.c
 create mode 100644 arch/s390/mm/page-states.c
 create mode 100644 arch/s390/mm/pageattr.c
 create mode 100644 arch/s390/mm/pfault.c
 create mode 100644 arch/s390/mm/pgalloc.c
 create mode 100644 arch/s390/mm/pgtable.c
 create mode 100644 arch/s390/mm/vmem.c
 create mode 100644 arch/s390/net/Makefile
 create mode 100644 arch/s390/net/bpf_jit.h
 create mode 100644 arch/s390/net/bpf_jit_comp.c
 create mode 100644 arch/s390/net/pnet.c
 create mode 100644 arch/s390/pci/Makefile
 create mode 100644 arch/s390/pci/pci.c
 create mode 100644 arch/s390/pci/pci_bus.c
 create mode 100644 arch/s390/pci/pci_bus.h
 create mode 100644 arch/s390/pci/pci_clp.c
 create mode 100644 arch/s390/pci/pci_debug.c
 create mode 100644 arch/s390/pci/pci_dma.c
 create mode 100644 arch/s390/pci/pci_event.c
 create mode 100644 arch/s390/pci/pci_insn.c
 create mode 100644 arch/s390/pci/pci_iov.c
 create mode 100644 arch/s390/pci/pci_iov.h
 create mode 100644 arch/s390/pci/pci_irq.c
 create mode 100644 arch/s390/pci/pci_kvm_hook.c
 create mode 100644 arch/s390/pci/pci_mmio.c
 create mode 100644 arch/s390/pci/pci_sysfs.c
 create mode 100644 arch/s390/purgatory/.gitignore
 create mode 100644 arch/s390/purgatory/Makefile
 create mode 100644 arch/s390/purgatory/head.S
 create mode 100644 arch/s390/purgatory/kexec-purgatory.S
 create mode 100644 arch/s390/purgatory/purgatory.c
 create mode 100644 arch/s390/purgatory/purgatory.lds.S
 create mode 100644 arch/s390/purgatory/string.c
 create mode 100644 arch/s390/tools/.gitignore
 create mode 100644 arch/s390/tools/Makefile
 create mode 100755 arch/s390/tools/gcc-thunk-extern.sh
 create mode 100644 arch/s390/tools/gen_facilities.c
 create mode 100644 arch/s390/tools/gen_opcode_table.c
 create mode 100644 arch/s390/tools/opcodes.txt

(limited to 'arch/s390')

diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild
new file mode 100644
index 0000000000..a5d3503b35
--- /dev/null
+++ b/arch/s390/Kbuild
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-y				+= kernel/
+obj-y				+= mm/
+obj-$(CONFIG_KVM)		+= kvm/
+obj-y				+= crypto/
+obj-$(CONFIG_S390_HYPFS)	+= hypfs/
+obj-$(CONFIG_APPLDATA_BASE)	+= appldata/
+obj-y				+= net/
+obj-$(CONFIG_PCI)		+= pci/
+obj-$(CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY) += purgatory/
+
+# for cleaning
+subdir- += boot tools
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
new file mode 100644
index 0000000000..bd4782f23f
--- /dev/null
+++ b/arch/s390/Kconfig
@@ -0,0 +1,935 @@
+# SPDX-License-Identifier: GPL-2.0
+config MMU
+	def_bool y
+
+config CPU_BIG_ENDIAN
+	def_bool y
+
+config LOCKDEP_SUPPORT
+	def_bool y
+
+config STACKTRACE_SUPPORT
+	def_bool y
+
+config ARCH_HAS_ILOG2_U32
+	def_bool n
+
+config ARCH_HAS_ILOG2_U64
+	def_bool n
+
+config GENERIC_HWEIGHT
+	def_bool y
+
+config GENERIC_BUG
+	def_bool y if BUG
+
+config GENERIC_BUG_RELATIVE_POINTERS
+	def_bool y
+
+config GENERIC_LOCKBREAK
+	def_bool y if PREEMPTION
+
+config PGSTE
+	def_bool y if KVM
+
+config AUDIT_ARCH
+	def_bool y
+
+config NO_IOPORT_MAP
+	def_bool y
+
+config PCI_QUIRKS
+	def_bool n
+
+config ARCH_SUPPORTS_UPROBES
+	def_bool y
+
+config KASAN_SHADOW_OFFSET
+	hex
+	depends on KASAN
+	default 0x1C000000000000
+
+config S390
+	def_bool y
+	#
+	# Note: keep this list sorted alphabetically
+	#
+	imply IMA_SECURE_AND_OR_TRUSTED_BOOT
+	select ALTERNATE_USER_ADDRESS_SPACE
+	select ARCH_32BIT_USTAT_F_TINODE
+	select ARCH_BINFMT_ELF_STATE
+	select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE
+	select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM
+	select ARCH_ENABLE_MEMORY_HOTREMOVE
+	select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
+	select ARCH_HAS_CURRENT_STACK_POINTER
+	select ARCH_HAS_DEBUG_VM_PGTABLE
+	select ARCH_HAS_DEBUG_WX
+	select ARCH_HAS_DEVMEM_IS_ALLOWED
+	select ARCH_HAS_ELF_RANDOMIZE
+	select ARCH_HAS_FORCE_DMA_UNENCRYPTED
+	select ARCH_HAS_FORTIFY_SOURCE
+	select ARCH_HAS_GCOV_PROFILE_ALL
+	select ARCH_HAS_GIGANTIC_PAGE
+	select ARCH_HAS_KCOV
+	select ARCH_HAS_MEMBARRIER_SYNC_CORE
+	select ARCH_HAS_MEM_ENCRYPT
+	select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
+	select ARCH_HAS_PTE_SPECIAL
+	select ARCH_HAS_SCALED_CPUTIME
+	select ARCH_HAS_SET_DIRECT_MAP
+	select ARCH_HAS_SET_MEMORY
+	select ARCH_HAS_STRICT_KERNEL_RWX
+	select ARCH_HAS_STRICT_MODULE_RWX
+	select ARCH_HAS_SYSCALL_WRAPPER
+	select ARCH_HAS_UBSAN_SANITIZE_ALL
+	select ARCH_HAS_VDSO_DATA
+	select ARCH_HAVE_NMI_SAFE_CMPXCHG
+	select ARCH_INLINE_READ_LOCK
+	select ARCH_INLINE_READ_LOCK_BH
+	select ARCH_INLINE_READ_LOCK_IRQ
+	select ARCH_INLINE_READ_LOCK_IRQSAVE
+	select ARCH_INLINE_READ_TRYLOCK
+	select ARCH_INLINE_READ_UNLOCK
+	select ARCH_INLINE_READ_UNLOCK_BH
+	select ARCH_INLINE_READ_UNLOCK_IRQ
+	select ARCH_INLINE_READ_UNLOCK_IRQRESTORE
+	select ARCH_INLINE_SPIN_LOCK
+	select ARCH_INLINE_SPIN_LOCK_BH
+	select ARCH_INLINE_SPIN_LOCK_IRQ
+	select ARCH_INLINE_SPIN_LOCK_IRQSAVE
+	select ARCH_INLINE_SPIN_TRYLOCK
+	select ARCH_INLINE_SPIN_TRYLOCK_BH
+	select ARCH_INLINE_SPIN_UNLOCK
+	select ARCH_INLINE_SPIN_UNLOCK_BH
+	select ARCH_INLINE_SPIN_UNLOCK_IRQ
+	select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE
+	select ARCH_INLINE_WRITE_LOCK
+	select ARCH_INLINE_WRITE_LOCK_BH
+	select ARCH_INLINE_WRITE_LOCK_IRQ
+	select ARCH_INLINE_WRITE_LOCK_IRQSAVE
+	select ARCH_INLINE_WRITE_TRYLOCK
+	select ARCH_INLINE_WRITE_UNLOCK
+	select ARCH_INLINE_WRITE_UNLOCK_BH
+	select ARCH_INLINE_WRITE_UNLOCK_IRQ
+	select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
+	select ARCH_STACKWALK
+	select ARCH_SUPPORTS_ATOMIC_RMW
+	select ARCH_SUPPORTS_DEBUG_PAGEALLOC
+	select ARCH_SUPPORTS_HUGETLBFS
+	select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && CC_IS_CLANG
+	select ARCH_SUPPORTS_NUMA_BALANCING
+	select ARCH_SUPPORTS_PER_VMA_LOCK
+	select ARCH_USE_BUILTIN_BSWAP
+	select ARCH_USE_CMPXCHG_LOCKREF
+	select ARCH_USE_SYM_ANNOTATIONS
+	select ARCH_WANTS_DYNAMIC_TASK_STRUCT
+	select ARCH_WANTS_NO_INSTR
+	select ARCH_WANT_DEFAULT_BPF_JIT
+	select ARCH_WANT_IPC_PARSE_VERSION
+	select ARCH_WANT_KERNEL_PMD_MKWRITE
+	select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
+	select BUILDTIME_TABLE_SORT
+	select CLONE_BACKWARDS2
+	select DMA_OPS if PCI
+	select DYNAMIC_FTRACE if FUNCTION_TRACER
+	select FUNCTION_ALIGNMENT_8B if CC_IS_GCC
+	select FUNCTION_ALIGNMENT_16B if !CC_IS_GCC
+	select GENERIC_ALLOCATOR
+	select GENERIC_CPU_AUTOPROBE
+	select GENERIC_CPU_VULNERABILITIES
+	select GENERIC_ENTRY
+	select GENERIC_GETTIMEOFDAY
+	select GENERIC_PTDUMP
+	select GENERIC_SMP_IDLE_THREAD
+	select GENERIC_TIME_VSYSCALL
+	select GENERIC_VDSO_TIME_NS
+	select GENERIC_IOREMAP if PCI
+	select HAVE_ALIGNED_STRUCT_PAGE if SLUB
+	select HAVE_ARCH_AUDITSYSCALL
+	select HAVE_ARCH_JUMP_LABEL
+	select HAVE_ARCH_JUMP_LABEL_RELATIVE
+	select HAVE_ARCH_KASAN
+	select HAVE_ARCH_KASAN_VMALLOC
+	select HAVE_ARCH_KCSAN
+	select HAVE_ARCH_KFENCE
+	select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
+	select HAVE_ARCH_SECCOMP_FILTER
+	select HAVE_ARCH_SOFT_DIRTY
+	select HAVE_ARCH_STACKLEAK
+	select HAVE_ARCH_TRACEHOOK
+	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+	select HAVE_ARCH_VMAP_STACK
+	select HAVE_ASM_MODVERSIONS
+	select HAVE_CMPXCHG_DOUBLE
+	select HAVE_CMPXCHG_LOCAL
+	select HAVE_DEBUG_KMEMLEAK
+	select HAVE_DMA_CONTIGUOUS
+	select HAVE_DYNAMIC_FTRACE
+	select HAVE_DYNAMIC_FTRACE_WITH_ARGS
+	select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+	select HAVE_DYNAMIC_FTRACE_WITH_REGS
+	select HAVE_EBPF_JIT if HAVE_MARCH_Z196_FEATURES
+	select HAVE_EFFICIENT_UNALIGNED_ACCESS
+	select HAVE_FAST_GUP
+	select HAVE_FENTRY
+	select HAVE_FTRACE_MCOUNT_RECORD
+	select HAVE_FUNCTION_ARG_ACCESS_API
+	select HAVE_FUNCTION_ERROR_INJECTION
+	select HAVE_FUNCTION_GRAPH_RETVAL
+	select HAVE_FUNCTION_GRAPH_TRACER
+	select HAVE_FUNCTION_TRACER
+	select HAVE_GCC_PLUGINS
+	select HAVE_GENERIC_VDSO
+	select HAVE_IOREMAP_PROT if PCI
+	select HAVE_KERNEL_BZIP2
+	select HAVE_KERNEL_GZIP
+	select HAVE_KERNEL_LZ4
+	select HAVE_KERNEL_LZMA
+	select HAVE_KERNEL_LZO
+	select HAVE_KERNEL_UNCOMPRESSED
+	select HAVE_KERNEL_XZ
+	select HAVE_KERNEL_ZSTD
+	select HAVE_KPROBES
+	select HAVE_KPROBES_ON_FTRACE
+	select HAVE_KRETPROBES
+	select HAVE_KVM
+	select HAVE_LIVEPATCH
+	select HAVE_MEMBLOCK_PHYS_MAP
+	select HAVE_MOD_ARCH_SPECIFIC
+	select HAVE_NMI
+	select HAVE_NOP_MCOUNT
+	select HAVE_PCI
+	select HAVE_PERF_EVENTS
+	select HAVE_PERF_REGS
+	select HAVE_PERF_USER_STACK_DUMP
+	select HAVE_REGS_AND_STACK_ACCESS_API
+	select HAVE_RELIABLE_STACKTRACE
+	select HAVE_RETHOOK
+	select HAVE_RSEQ
+	select HAVE_SAMPLE_FTRACE_DIRECT
+	select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
+	select HAVE_SETUP_PER_CPU_AREA
+	select HAVE_SOFTIRQ_ON_OWN_STACK
+	select HAVE_SYSCALL_TRACEPOINTS
+	select HAVE_VIRT_CPU_ACCOUNTING
+	select HAVE_VIRT_CPU_ACCOUNTING_IDLE
+	select IOMMU_HELPER		if PCI
+	select IOMMU_SUPPORT		if PCI
+	select KEXEC
+	select MMU_GATHER_MERGE_VMAS
+	select MMU_GATHER_NO_GATHER
+	select MMU_GATHER_RCU_TABLE_FREE
+	select MODULES_USE_ELF_RELA
+	select NEED_DMA_MAP_STATE	if PCI
+	select NEED_PER_CPU_EMBED_FIRST_CHUNK
+	select NEED_SG_DMA_LENGTH	if PCI
+	select OLD_SIGACTION
+	select OLD_SIGSUSPEND3
+	select PCI_DOMAINS		if PCI
+	select PCI_MSI			if PCI
+	select PCI_MSI_ARCH_FALLBACKS	if PCI_MSI
+	select SPARSE_IRQ
+	select SWIOTLB
+	select SYSCTL_EXCEPTION_TRACE
+	select THREAD_INFO_IN_TASK
+	select TRACE_IRQFLAGS_SUPPORT
+	select TTY
+	select VIRT_CPU_ACCOUNTING
+	select ZONE_DMA
+	# Note: keep the above list sorted alphabetically
+
+config SCHED_OMIT_FRAME_POINTER
+	def_bool y
+
+config PGTABLE_LEVELS
+	int
+	default 5
+
+source "kernel/livepatch/Kconfig"
+
+config ARCH_SUPPORTS_KEXEC
+	def_bool y
+
+config ARCH_SUPPORTS_KEXEC_FILE
+	def_bool y
+
+config ARCH_SUPPORTS_KEXEC_SIG
+	def_bool MODULE_SIG_FORMAT
+
+config ARCH_SUPPORTS_KEXEC_PURGATORY
+	def_bool y
+
+config ARCH_SUPPORTS_CRASH_DUMP
+	def_bool y
+	help
+	  Refer to <file:Documentation/arch/s390/zfcpdump.rst> for more details on this.
+	  This option also enables s390 zfcpdump.
+	  See also <file:Documentation/arch/s390/zfcpdump.rst>
+
+menu "Processor type and features"
+
+config HAVE_MARCH_Z10_FEATURES
+	def_bool n
+
+config HAVE_MARCH_Z196_FEATURES
+	def_bool n
+	select HAVE_MARCH_Z10_FEATURES
+
+config HAVE_MARCH_ZEC12_FEATURES
+	def_bool n
+	select HAVE_MARCH_Z196_FEATURES
+
+config HAVE_MARCH_Z13_FEATURES
+	def_bool n
+	select HAVE_MARCH_ZEC12_FEATURES
+
+config HAVE_MARCH_Z14_FEATURES
+	def_bool n
+	select HAVE_MARCH_Z13_FEATURES
+
+config HAVE_MARCH_Z15_FEATURES
+	def_bool n
+	select HAVE_MARCH_Z14_FEATURES
+
+config HAVE_MARCH_Z16_FEATURES
+	def_bool n
+	select HAVE_MARCH_Z15_FEATURES
+
+choice
+	prompt "Processor type"
+	default MARCH_Z196
+
+config MARCH_Z10
+	bool "IBM System z10"
+	select HAVE_MARCH_Z10_FEATURES
+	depends on $(cc-option,-march=z10)
+	help
+	  Select this to enable optimizations for IBM System z10 (2097 and 2098
+	  series). This is the oldest machine generation currently supported.
+
+config MARCH_Z196
+	bool "IBM zEnterprise 114 and 196"
+	select HAVE_MARCH_Z196_FEATURES
+	depends on $(cc-option,-march=z196)
+	help
+	  Select this to enable optimizations for IBM zEnterprise 114 and 196
+	  (2818 and 2817 series). The kernel will be slightly faster but will
+	  not work on older machines.
+
+config MARCH_ZEC12
+	bool "IBM zBC12 and zEC12"
+	select HAVE_MARCH_ZEC12_FEATURES
+	depends on $(cc-option,-march=zEC12)
+	help
+	  Select this to enable optimizations for IBM zBC12 and zEC12 (2828 and
+	  2827 series). The kernel will be slightly faster but will not work on
+	  older machines.
+
+config MARCH_Z13
+	bool "IBM z13s and z13"
+	select HAVE_MARCH_Z13_FEATURES
+	depends on $(cc-option,-march=z13)
+	help
+	  Select this to enable optimizations for IBM z13s and z13 (2965 and
+	  2964 series). The kernel will be slightly faster but will not work on
+	  older machines.
+
+config MARCH_Z14
+	bool "IBM z14 ZR1 and z14"
+	select HAVE_MARCH_Z14_FEATURES
+	depends on $(cc-option,-march=z14)
+	help
+	  Select this to enable optimizations for IBM z14 ZR1 and z14 (3907
+	  and 3906 series). The kernel will be slightly faster but will not
+	  work on older machines.
+
+config MARCH_Z15
+	bool "IBM z15"
+	select HAVE_MARCH_Z15_FEATURES
+	depends on $(cc-option,-march=z15)
+	help
+	  Select this to enable optimizations for IBM z15 (8562
+	  and 8561 series). The kernel will be slightly faster but will not
+	  work on older machines.
+
+config MARCH_Z16
+	bool "IBM z16"
+	select HAVE_MARCH_Z16_FEATURES
+	depends on $(cc-option,-march=z16)
+	help
+	  Select this to enable optimizations for IBM z16 (3931 and
+	  3932 series).
+
+endchoice
+
+config MARCH_Z10_TUNE
+	def_bool TUNE_Z10 || MARCH_Z10 && TUNE_DEFAULT
+
+config MARCH_Z196_TUNE
+	def_bool TUNE_Z196 || MARCH_Z196 && TUNE_DEFAULT
+
+config MARCH_ZEC12_TUNE
+	def_bool TUNE_ZEC12 || MARCH_ZEC12 && TUNE_DEFAULT
+
+config MARCH_Z13_TUNE
+	def_bool TUNE_Z13 || MARCH_Z13 && TUNE_DEFAULT
+
+config MARCH_Z14_TUNE
+	def_bool TUNE_Z14 || MARCH_Z14 && TUNE_DEFAULT
+
+config MARCH_Z15_TUNE
+	def_bool TUNE_Z15 || MARCH_Z15 && TUNE_DEFAULT
+
+config MARCH_Z16_TUNE
+	def_bool TUNE_Z16 || MARCH_Z16 && TUNE_DEFAULT
+
+choice
+	prompt "Tune code generation"
+	default TUNE_DEFAULT
+	help
+	  Cause the compiler to tune (-mtune) the generated code for a machine.
+	  This will make the code run faster on the selected machine but
+	  somewhat slower on other machines.
+	  This option only changes how the compiler emits instructions, not the
+	  selection of instructions itself, so the resulting kernel will run on
+	  all other machines.
+
+config TUNE_DEFAULT
+	bool "Default"
+	help
+	  Tune the generated code for the target processor for which the kernel
+	  will be compiled.
+
+config TUNE_Z10
+	bool "IBM System z10"
+
+config TUNE_Z196
+	bool "IBM zEnterprise 114 and 196"
+	depends on $(cc-option,-mtune=z196)
+
+config TUNE_ZEC12
+	bool "IBM zBC12 and zEC12"
+	depends on $(cc-option,-mtune=zEC12)
+
+config TUNE_Z13
+	bool "IBM z13s and z13"
+	depends on $(cc-option,-mtune=z13)
+
+config TUNE_Z14
+	bool "IBM z14 ZR1 and z14"
+	depends on $(cc-option,-mtune=z14)
+
+config TUNE_Z15
+	bool "IBM z15"
+	depends on $(cc-option,-mtune=z15)
+
+config TUNE_Z16
+	bool "IBM z16"
+	depends on $(cc-option,-mtune=z16)
+
+endchoice
+
+config 64BIT
+	def_bool y
+
+config COMMAND_LINE_SIZE
+	int "Maximum size of kernel command line"
+	default 4096
+	range 896 1048576
+	help
+	  This allows you to specify the maximum length of the kernel command
+	  line.
+
+config COMPAT
+	def_bool y
+	prompt "Kernel support for 31 bit emulation"
+	select ARCH_WANT_OLD_COMPAT_IPC
+	select COMPAT_OLD_SIGACTION
+	select HAVE_UID16
+	depends on MULTIUSER
+	depends on !CC_IS_CLANG
+	help
+	  Select this option if you want to enable your system kernel to
+	  handle system-calls from ELF binaries for 31 bit ESA.  This option
+	  (and some other stuff like libraries and such) is needed for
+	  executing 31 bit applications.  It is safe to say "Y".
+
+config SMP
+	def_bool y
+
+config NR_CPUS
+	int "Maximum number of CPUs (2-512)"
+	range 2 512
+	default "64"
+	help
+	  This allows you to specify the maximum number of CPUs which this
+	  kernel will support. The maximum supported value is 512 and the
+	  minimum value which makes sense is 2.
+
+	  This is purely to save memory - each supported CPU adds
+	  approximately sixteen kilobytes to the kernel image.
+
+config HOTPLUG_CPU
+	def_bool y
+
+config NUMA
+	bool "NUMA support"
+	depends on SCHED_TOPOLOGY
+	default n
+	help
+	  Enable NUMA support
+
+	  This option adds NUMA support to the kernel.
+
+config NODES_SHIFT
+	int
+	depends on NUMA
+	default "1"
+
+config SCHED_SMT
+	def_bool n
+
+config SCHED_MC
+	def_bool n
+
+config SCHED_TOPOLOGY
+	def_bool y
+	prompt "Topology scheduler support"
+	select SCHED_SMT
+	select SCHED_MC
+	help
+	  Topology scheduler support improves the CPU scheduler's decision
+	  making when dealing with machines that have multi-threading,
+	  multiple cores or multiple books.
+
+source "kernel/Kconfig.hz"
+
+config CERT_STORE
+	bool "Get user certificates via DIAG320"
+	depends on KEYS
+	select CRYPTO_LIB_SHA256
+	help
+	  Enable this option if you want to access user-provided secure boot
+	  certificates via DIAG 0x320.
+
+	  These certificates will be made available via the keyring named
+	  'cert_store'.
+
+config KERNEL_NOBP
+	def_bool n
+	prompt "Enable modified branch prediction for the kernel by default"
+	help
+	  If this option is selected the kernel will switch to a modified
+	  branch prediction mode if the firmware interface is available.
+	  The modified branch prediction mode improves the behaviour in
+	  regard to speculative execution.
+
+	  With the option enabled the kernel parameter "nobp=0" or "nospec"
+	  can be used to run the kernel in the normal branch prediction mode.
+
+	  With the option disabled the modified branch prediction mode is
+	  enabled with the "nobp=1" kernel parameter.
+
+	  If unsure, say N.
+
+config EXPOLINE
+	def_bool n
+	depends on $(cc-option,-mindirect-branch=thunk)
+	prompt "Avoid speculative indirect branches in the kernel"
+	help
+	  Compile the kernel with the expoline compiler options to guard
+	  against kernel-to-user data leaks by avoiding speculative indirect
+	  branches.
+	  Requires a compiler with -mindirect-branch=thunk support for full
+	  protection. The kernel may run slower.
+
+	  If unsure, say N.
+
+config EXPOLINE_EXTERN
+	def_bool n
+	depends on EXPOLINE
+	depends on CC_IS_GCC && GCC_VERSION >= 110200
+	depends on $(success,$(srctree)/arch/s390/tools/gcc-thunk-extern.sh $(CC))
+	prompt "Generate expolines as extern functions."
+	help
+	  This option is required for some tooling like kpatch. The kernel is
+	  compiled with -mindirect-branch=thunk-extern and requires a newer
+	  compiler.
+
+	  If unsure, say N.
+
+choice
+	prompt "Expoline default"
+	depends on EXPOLINE
+	default EXPOLINE_FULL
+
+config EXPOLINE_OFF
+	bool "spectre_v2=off"
+
+config EXPOLINE_AUTO
+	bool "spectre_v2=auto"
+
+config EXPOLINE_FULL
+	bool "spectre_v2=on"
+
+endchoice
+
+config RELOCATABLE
+	def_bool y
+	help
+	  This builds a kernel image that retains relocation information
+	  so it can be loaded at an arbitrary address.
+	  The kernel is linked as a position-independent executable (PIE)
+	  and contains dynamic relocations which are processed early in the
+	  bootup process.
+	  The relocations make the kernel image about 15% larger (compressed
+	  10%), but are discarded at runtime.
+	  Note: this option exists only for documentation purposes, please do
+	  not remove it.
+
+config RANDOMIZE_BASE
+	bool "Randomize the address of the kernel image (KASLR)"
+	default y
+	help
+	  In support of Kernel Address Space Layout Randomization (KASLR),
+	  this randomizes the address at which the kernel image is loaded,
+	  as a security feature that deters exploit attempts relying on
+	  knowledge of the location of kernel internals.
+
+endmenu
+
+menu "Memory setup"
+
+config ARCH_SPARSEMEM_ENABLE
+	def_bool y
+	select SPARSEMEM_VMEMMAP_ENABLE
+	select SPARSEMEM_VMEMMAP
+
+config ARCH_SPARSEMEM_DEFAULT
+	def_bool y
+
+config MAX_PHYSMEM_BITS
+	int "Maximum size of supported physical memory in bits (42-53)"
+	range 42 53
+	default "46"
+	help
+	  This option specifies the maximum supported size of physical memory
+	  in bits. Supported is any size between 2^42 (4TB) and 2^53 (8PB).
+	  Increasing the number of bits also increases the kernel image size.
+	  By default 46 bits (64TB) are supported.
+
+config CHECK_STACK
+	def_bool y
+	depends on !VMAP_STACK
+	prompt "Detect kernel stack overflow"
+	help
+	  This option enables the compiler option -mstack-guard and
+	  -mstack-size if they are available. If the compiler supports them
+	  it will emit additional code to each function prolog to trigger
+	  an illegal operation if the kernel stack is about to overflow.
+
+	  Say N if you are unsure.
+
+config STACK_GUARD
+	int "Size of the guard area (128-1024)"
+	range 128 1024
+	depends on CHECK_STACK
+	default "256"
+	help
+	  This allows you to specify the size of the guard area at the lower
+	  end of the kernel stack. If the kernel stack points into the guard
+	  area on function entry an illegal operation is triggered. The size
+	  needs to be a power of 2. Please keep in mind that the size of an
+	  interrupt frame is 184 bytes for 31 bit and 328 bytes on 64 bit.
+	  The minimum size for the stack guard should be 256 for 31 bit and
+	  512 for 64 bit.
+
+endmenu
+
+menu "I/O subsystem"
+
+config QDIO
+	def_tristate y
+	prompt "QDIO support"
+	help
+	  This driver provides the Queued Direct I/O base support for
+	  IBM System z.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called qdio.
+
+	  If unsure, say Y.
+
+if PCI
+
+config PCI_NR_FUNCTIONS
+	int "Maximum number of PCI functions (1-4096)"
+	range 1 4096
+	default "512"
+	help
+	  This allows you to specify the maximum number of PCI functions which
+	  this kernel will support.
+
+endif # PCI
+
+config HAS_IOMEM
+	def_bool PCI
+
+config CHSC_SCH
+	def_tristate m
+	prompt "Support for CHSC subchannels"
+	help
+	  This driver allows usage of CHSC subchannels. A CHSC subchannel
+	  is usually present on LPAR only.
+	  The driver creates a device /dev/chsc, which may be used to
+	  obtain I/O configuration information about the machine and
+	  to issue asynchronous chsc commands (DANGEROUS).
+	  You will usually only want to use this interface on a special
+	  LPAR designated for system management.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called chsc_sch.
+
+	  If unsure, say N.
+
+config SCM_BUS
+	def_bool y
+	prompt "SCM bus driver"
+	help
+	  Bus driver for Storage Class Memory.
+
+config EADM_SCH
+	def_tristate m
+	prompt "Support for EADM subchannels"
+	depends on SCM_BUS
+	help
+	  This driver allows usage of EADM subchannels. EADM subchannels act
+	  as a communication vehicle for SCM increments.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called eadm_sch.
+
+config VFIO_CCW
+	def_tristate n
+	prompt "Support for VFIO-CCW subchannels"
+	depends on VFIO
+	select VFIO_MDEV
+	help
+	  This driver allows usage of I/O subchannels via VFIO-CCW.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called vfio_ccw.
+
+config VFIO_AP
+	def_tristate n
+	prompt "VFIO support for AP devices"
+	depends on KVM
+	depends on VFIO
+	depends on ZCRYPT
+	select VFIO_MDEV
+	help
+	  This driver grants access to Adjunct Processor (AP) devices
+	  via the VFIO mediated device interface.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called vfio_ap.
+
+endmenu
+
+config CCW
+	def_bool y
+
+config HAVE_PNETID
+	tristate
+	default (SMC || CCWGROUP)
+
+menu "Virtualization"
+
+config PROTECTED_VIRTUALIZATION_GUEST
+	def_bool n
+	prompt "Protected virtualization guest support"
+	help
+	  Select this option, if you want to be able to run this
+	  kernel as a protected virtualization KVM guest.
+	  Protected virtualization capable machines have a mini hypervisor
+	  located at machine level (an ultravisor). With help of the
+	  Ultravisor, KVM will be able to run "protected" VMs, special
+	  VMs whose memory and management data are unavailable to KVM.
+
+config PFAULT
+	def_bool y
+	prompt "Pseudo page fault support"
+	help
+	  Select this option, if you want to use PFAULT pseudo page fault
+	  handling under VM. If running native or in LPAR, this option
+	  has no effect. If your VM does not support PFAULT, PAGEEX
+	  pseudo page fault handling will be used.
+	  Note that VM 4.2 supports PFAULT but has a bug in its
+	  implementation that causes some problems.
+	  Everybody who wants to run Linux under VM != VM4.2 should select
+	  this option.
+
+config CMM
+	def_tristate n
+	prompt "Cooperative memory management"
+	help
+	  Select this option, if you want to enable the kernel interface
+	  to reduce the memory size of the system. This is accomplished
+	  by allocating pages of memory and put them "on hold". This only
+	  makes sense for a system running under VM where the unused pages
+	  will be reused by VM for other guest systems. The interface
+	  allows an external monitor to balance memory of many systems.
+	  Everybody who wants to run Linux under VM should select this
+	  option.
+
+config CMM_IUCV
+	def_bool y
+	prompt "IUCV special message interface to cooperative memory management"
+	depends on CMM && (SMSGIUCV=y || CMM=SMSGIUCV)
+	help
+	  Select this option to enable the special message interface to
+	  the cooperative memory management.
+
+config APPLDATA_BASE
+	def_bool n
+	prompt "Linux - VM Monitor Stream, base infrastructure"
+	depends on PROC_SYSCTL
+	help
+	  This provides a kernel interface for creating and updating z/VM APPLDATA
+	  monitor records. The monitor records are updated at certain time
+	  intervals, once the timer is started.
+	  Writing 1 or 0 to /proc/appldata/timer starts(1) or stops(0) the timer,
+	  i.e. enables or disables monitoring on the Linux side.
+	  A custom interval value (in seconds) can be written to
+	  /proc/appldata/interval.
+
+	  Defaults are 60 seconds interval and timer off.
+	  The /proc entries can also be read from, showing the current settings.
+
+config APPLDATA_MEM
+	def_tristate m
+	prompt "Monitor memory management statistics"
+	depends on APPLDATA_BASE && VM_EVENT_COUNTERS
+	help
+	  This provides memory management related data to the Linux - VM Monitor
+	  Stream, like paging/swapping rate, memory utilisation, etc.
+	  Writing 1 or 0 to /proc/appldata/memory creates(1) or removes(0) a z/VM
+	  APPLDATA monitor record, i.e. enables or disables monitoring this record
+	  on the z/VM side.
+
+	  Default is disabled.
+	  The /proc entry can also be read from, showing the current settings.
+
+	  This can also be compiled as a module, which will be called
+	  appldata_mem.o.
+
+config APPLDATA_OS
+	def_tristate m
+	prompt "Monitor OS statistics"
+	depends on APPLDATA_BASE
+	help
+	  This provides OS related data to the Linux - VM Monitor Stream, like
+	  CPU utilisation, etc.
+	  Writing 1 or 0 to /proc/appldata/os creates(1) or removes(0) a z/VM
+	  APPLDATA monitor record, i.e. enables or disables monitoring this record
+	  on the z/VM side.
+
+	  Default is disabled.
+	  This can also be compiled as a module, which will be called
+	  appldata_os.o.
+
+config APPLDATA_NET_SUM
+	def_tristate m
+	prompt "Monitor overall network statistics"
+	depends on APPLDATA_BASE && NET
+	help
+	  This provides network related data to the Linux - VM Monitor Stream,
+	  currently there is only a total sum of network I/O statistics, no
+	  per-interface data.
+	  Writing 1 or 0 to /proc/appldata/net_sum creates(1) or removes(0) a z/VM
+	  APPLDATA monitor record, i.e. enables or disables monitoring this record
+	  on the z/VM side.
+
+	  Default is disabled.
+	  This can also be compiled as a module, which will be called
+	  appldata_net_sum.o.
+
+config S390_HYPFS
+	def_bool y
+	prompt "s390 hypervisor information"
+	help
+	  This provides several binary files at (debugfs)/s390_hypfs/ to
+	  provide accounting information in an s390 hypervisor environment.
+
+config S390_HYPFS_FS
+	def_bool n
+	prompt "s390 hypervisor file system support"
+	select SYS_HYPERVISOR
+	depends on S390_HYPFS
+	help
+	  This is a virtual file system intended to provide accounting
+	  information in an s390 hypervisor environment. This file system
+	  is deprecated and should not be used.
+
+	  Say N if you are unsure.
+
+source "arch/s390/kvm/Kconfig"
+
+config S390_GUEST
+	def_bool y
+	prompt "s390 support for virtio devices"
+	select TTY
+	select VIRTUALIZATION
+	select VIRTIO
+	help
+	  Enabling this option adds support for virtio based paravirtual device
+	  drivers on s390.
+
+	  Select this option if you want to run the kernel as a guest under
+	  the KVM hypervisor.
+
+endmenu
+
+config S390_MODULES_SANITY_TEST_HELPERS
+	def_bool n
+
+menu "Selftests"
+
+config S390_UNWIND_SELFTEST
+	def_tristate n
+	depends on KUNIT
+	default KUNIT_ALL_TESTS
+	prompt "Test unwind functions"
+	help
+	  This option enables s390 specific stack unwinder testing kernel
+	  module. This option is not useful for distributions or general
+	  kernels, but only for kernel developers working on architecture code.
+
+	  Say N if you are unsure.
+
+config S390_KPROBES_SANITY_TEST
+	def_tristate n
+	prompt "Enable s390 specific kprobes tests"
+	depends on KPROBES
+	depends on KUNIT
+	help
+	  This option enables an s390 specific kprobes test module. This option
+	  is not useful for distributions or general kernels, but only for kernel
+	  developers working on architecture code.
+
+	  Say N if you are unsure.
+
+config S390_MODULES_SANITY_TEST
+	def_tristate n
+	depends on KUNIT
+	default KUNIT_ALL_TESTS
+	prompt "Enable s390 specific modules tests"
+	select S390_MODULES_SANITY_TEST_HELPERS
+	help
+	  This option enables an s390 specific modules test. This option is
+	  not useful for distributions or general kernels, but only for
+	  kernel developers working on architecture code.
+
+	  Say N if you are unsure.
+endmenu
diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug
new file mode 100644
index 0000000000..c4300ea4ab
--- /dev/null
+++ b/arch/s390/Kconfig.debug
@@ -0,0 +1,22 @@
+# SPDX-License-Identifier: GPL-2.0
+
+config EARLY_PRINTK
+	def_bool y
+
+config DEBUG_ENTRY
+	bool "Debug low-level entry code"
+	depends on DEBUG_KERNEL
+	help
+	  This option enables sanity checks in s390 low-level entry code.
+	  Some of these sanity checks may slow down kernel entries and
+	  exits or otherwise impact performance.
+
+	  If unsure, say N.
+
+config CIO_INJECT
+	bool "CIO Inject interfaces"
+	depends on DEBUG_KERNEL && DEBUG_FS
+	help
+	  This option provides a debugging facility to inject certain artificial events
+	  and instruction responses to the CIO layer of Linux kernel. The newly created
+	  debugfs user-interfaces will be at /sys/kernel/debug/s390/cio/*
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
new file mode 100644
index 0000000000..a53a36ee07
--- /dev/null
+++ b/arch/s390/Makefile
@@ -0,0 +1,177 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# s390/Makefile
+#
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies.
+#
+# Copyright (C) 1994 by Linus Torvalds
+#
+
+LD_BFD		:= elf64-s390
+KBUILD_LDFLAGS	:= -m elf64_s390
+KBUILD_AFLAGS_MODULE += -fPIC
+KBUILD_CFLAGS_MODULE += -fPIC
+KBUILD_AFLAGS	+= -m64
+KBUILD_CFLAGS	+= -m64
+KBUILD_CFLAGS	+= -fPIE
+LDFLAGS_vmlinux	:= -pie
+aflags_dwarf	:= -Wa,-gdwarf-2
+KBUILD_AFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -D__ASSEMBLY__
+ifndef CONFIG_AS_IS_LLVM
+KBUILD_AFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),$(aflags_dwarf))
+endif
+KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 -mpacked-stack
+KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY
+KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float -mbackchain
+KBUILD_CFLAGS_DECOMPRESSOR += -fno-asynchronous-unwind-tables
+KBUILD_CFLAGS_DECOMPRESSOR += -ffreestanding
+KBUILD_CFLAGS_DECOMPRESSOR += -fno-stack-protector
+KBUILD_CFLAGS_DECOMPRESSOR += -fPIE
+KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-disable-warning, address-of-packed-member)
+KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),-g)
+KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO_DWARF4), $(call cc-option, -gdwarf-4,))
+KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_CC_NO_ARRAY_BOUNDS),-Wno-array-bounds)
+
+UTS_MACHINE	:= s390x
+STACK_SIZE	:= $(if $(CONFIG_KASAN),65536,16384)
+CHECKFLAGS	+= -D__s390__ -D__s390x__
+
+export LD_BFD
+
+mflags-$(CONFIG_MARCH_Z10)    := -march=z10
+mflags-$(CONFIG_MARCH_Z196)   := -march=z196
+mflags-$(CONFIG_MARCH_ZEC12)  := -march=zEC12
+mflags-$(CONFIG_MARCH_Z13)    := -march=z13
+mflags-$(CONFIG_MARCH_Z14)    := -march=z14
+mflags-$(CONFIG_MARCH_Z15)    := -march=z15
+mflags-$(CONFIG_MARCH_Z16)    := -march=z16
+
+export CC_FLAGS_MARCH := $(mflags-y)
+
+aflags-y += $(mflags-y)
+cflags-y += $(mflags-y)
+
+cflags-$(CONFIG_MARCH_Z10_TUNE)		+= -mtune=z10
+cflags-$(CONFIG_MARCH_Z196_TUNE)	+= -mtune=z196
+cflags-$(CONFIG_MARCH_ZEC12_TUNE)	+= -mtune=zEC12
+cflags-$(CONFIG_MARCH_Z13_TUNE)		+= -mtune=z13
+cflags-$(CONFIG_MARCH_Z14_TUNE)		+= -mtune=z14
+cflags-$(CONFIG_MARCH_Z15_TUNE)		+= -mtune=z15
+cflags-$(CONFIG_MARCH_Z16_TUNE)		+= -mtune=z16
+
+cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include
+
+#
+# Prevent tail-call optimizations, to get clearer backtraces:
+#
+cflags-$(CONFIG_FRAME_POINTER) += -fno-optimize-sibling-calls
+
+KBUILD_AFLAGS_DECOMPRESSOR += $(aflags-y)
+KBUILD_CFLAGS_DECOMPRESSOR += $(cflags-y)
+
+ifneq ($(call cc-option,-mstack-size=8192 -mstack-guard=128),)
+  CC_FLAGS_CHECK_STACK := -mstack-size=$(STACK_SIZE)
+  ifeq ($(call cc-option,-mstack-size=8192),)
+    CC_FLAGS_CHECK_STACK += -mstack-guard=$(CONFIG_STACK_GUARD)
+  endif
+  export CC_FLAGS_CHECK_STACK
+  cflags-$(CONFIG_CHECK_STACK) += $(CC_FLAGS_CHECK_STACK)
+endif
+
+ifdef CONFIG_EXPOLINE
+  ifdef CONFIG_EXPOLINE_EXTERN
+    KBUILD_LDFLAGS_MODULE += arch/s390/lib/expoline/expoline.o
+    CC_FLAGS_EXPOLINE := -mindirect-branch=thunk-extern
+    CC_FLAGS_EXPOLINE += -mfunction-return=thunk-extern
+  else
+    CC_FLAGS_EXPOLINE := -mindirect-branch=thunk
+    CC_FLAGS_EXPOLINE += -mfunction-return=thunk
+  endif
+  CC_FLAGS_EXPOLINE += -mindirect-branch-table
+  export CC_FLAGS_EXPOLINE
+  cflags-y += $(CC_FLAGS_EXPOLINE) -DCC_USING_EXPOLINE
+  aflags-y += -DCC_USING_EXPOLINE
+endif
+
+ifdef CONFIG_FUNCTION_TRACER
+  ifeq ($(call cc-option,-mfentry -mnop-mcount),)
+    # make use of hotpatch feature if the compiler supports it
+    cc_hotpatch	:= -mhotpatch=0,3
+    ifneq ($(call cc-option,$(cc_hotpatch)),)
+      CC_FLAGS_FTRACE := $(cc_hotpatch)
+      KBUILD_AFLAGS	+= -DCC_USING_HOTPATCH
+      KBUILD_CFLAGS	+= -DCC_USING_HOTPATCH
+    endif
+  endif
+endif
+
+# Test CFI features of binutils
+cfi := $(call as-instr,.cfi_startproc\n.cfi_val_offset 15$(comma)-160\n.cfi_endproc,-DCONFIG_AS_CFI_VAL_OFFSET=1)
+
+KBUILD_CFLAGS	+= -mpacked-stack -mbackchain -msoft-float $(cflags-y)
+KBUILD_CFLAGS	+= -pipe -Wno-sign-compare
+KBUILD_CFLAGS	+= -fno-asynchronous-unwind-tables $(cfi)
+KBUILD_AFLAGS	+= $(aflags-y) $(cfi)
+export KBUILD_AFLAGS_DECOMPRESSOR
+export KBUILD_CFLAGS_DECOMPRESSOR
+
+OBJCOPYFLAGS	:= -O binary
+
+libs-y		+= arch/s390/lib/
+
+boot		:= arch/s390/boot
+syscalls	:= arch/s390/kernel/syscalls
+tools		:= arch/s390/tools
+
+all: bzImage
+
+#KBUILD_IMAGE is necessary for packaging targets like rpm-pkg, deb-pkg...
+KBUILD_IMAGE	:= $(boot)/bzImage
+
+install:
+	$(call cmd,install)
+
+bzImage: vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+
+zfcpdump:
+	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+
+vdso_install:
+	$(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso64 $@
+
+archheaders:
+	$(Q)$(MAKE) $(build)=$(syscalls) uapi
+
+archprepare:
+	$(Q)$(MAKE) $(build)=$(syscalls) kapi
+	$(Q)$(MAKE) $(build)=$(tools) kapi
+ifeq ($(KBUILD_EXTMOD),)
+# We need to generate vdso-offsets.h before compiling certain files in kernel/.
+# In order to do that, we should use the archprepare target, but we can't since
+# asm-offsets.h is included in some files used to generate vdso-offsets.h, and
+# asm-offsets.h is built in prepare0, for which archprepare is a dependency.
+# Therefore we need to generate the header after prepare0 has been made, hence
+# this hack.
+prepare: vdso_prepare
+vdso_prepare: prepare0
+	$(Q)$(MAKE) $(build)=arch/s390/kernel/vdso64 include/generated/vdso64-offsets.h
+	$(if $(CONFIG_COMPAT),$(Q)$(MAKE) \
+		$(build)=arch/s390/kernel/vdso32 include/generated/vdso32-offsets.h)
+
+ifdef CONFIG_EXPOLINE_EXTERN
+modules_prepare: expoline_prepare
+expoline_prepare: scripts
+	$(Q)$(MAKE) $(build)=arch/s390/lib/expoline arch/s390/lib/expoline/expoline.o
+endif
+endif
+
+# Don't use tabs in echo arguments
+define archhelp
+  echo	'* bzImage         - Kernel image for IPL ($(boot)/bzImage)'
+  echo	'  install         - Install kernel using'
+  echo	'                    (your) ~/bin/$(INSTALLKERNEL) or'
+  echo	'                    (distribution) /sbin/$(INSTALLKERNEL) or'
+  echo	'                    install to $$(INSTALL_PATH)'
+endef
diff --git a/arch/s390/appldata/Makefile b/arch/s390/appldata/Makefile
new file mode 100644
index 0000000000..b06def4a4f
--- /dev/null
+++ b/arch/s390/appldata/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the Linux - z/VM Monitor Stream.
+#
+
+obj-$(CONFIG_APPLDATA_BASE) += appldata_base.o
+obj-$(CONFIG_APPLDATA_MEM) += appldata_mem.o
+obj-$(CONFIG_APPLDATA_OS) += appldata_os.o
+obj-$(CONFIG_APPLDATA_NET_SUM) += appldata_net_sum.o
diff --git a/arch/s390/appldata/appldata.h b/arch/s390/appldata/appldata.h
new file mode 100644
index 0000000000..10346d2f3d
--- /dev/null
+++ b/arch/s390/appldata/appldata.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Definitions and interface for Linux - z/VM Monitor Stream.
+ *
+ * Copyright IBM Corp. 2003, 2008
+ *
+ * Author: Gerald Schaefer <gerald.schaefer@de.ibm.com>
+ */
+
+#define APPLDATA_MAX_REC_SIZE	  4024	/* Maximum size of the */
+					/* data buffer */
+#define APPLDATA_MAX_PROCS 100
+
+#define APPLDATA_PROC_NAME_LENGTH 16	/* Max. length of /proc name */
+
+#define APPLDATA_RECORD_MEM_ID		0x01	/* IDs to identify the */
+#define APPLDATA_RECORD_OS_ID		0x02	/* individual records, */
+#define APPLDATA_RECORD_NET_SUM_ID	0x03	/* must be < 256 !     */
+#define APPLDATA_RECORD_PROC_ID		0x04
+
+#define CTL_APPLDATA_TIMER 	2121	/* sysctl IDs, must be unique */
+#define CTL_APPLDATA_INTERVAL 	2122
+#define CTL_APPLDATA_MEM	2123
+#define CTL_APPLDATA_OS		2124
+#define CTL_APPLDATA_NET_SUM	2125
+#define CTL_APPLDATA_PROC	2126
+
+struct appldata_ops {
+	struct list_head list;
+	struct ctl_table_header *sysctl_header;
+	struct ctl_table *ctl_table;
+	int    active;				/* monitoring status */
+
+	/* fill in from here */
+	char name[APPLDATA_PROC_NAME_LENGTH];	/* name of /proc fs node */
+	unsigned char record_nr;		/* Record Nr. for Product ID */
+	void (*callback)(void *data);		/* callback function */
+	void *data;				/* record data */
+	unsigned int size;			/* size of record */
+	struct module *owner;			/* THIS_MODULE */
+	char mod_lvl[2];			/* modification level, EBCDIC */
+};
+
+extern int appldata_register_ops(struct appldata_ops *ops);
+extern void appldata_unregister_ops(struct appldata_ops *ops);
+extern int appldata_diag(char record_nr, u16 function, unsigned long buffer,
+			 u16 length, char *mod_lvl);
+
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
new file mode 100644
index 0000000000..3b09946256
--- /dev/null
+++ b/arch/s390/appldata/appldata_base.c
@@ -0,0 +1,428 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Base infrastructure for Linux-z/VM Monitor Stream, Stage 1.
+ * Exports appldata_register_ops() and appldata_unregister_ops() for the
+ * data gathering modules.
+ *
+ * Copyright IBM Corp. 2003, 2009
+ *
+ * Author: Gerald Schaefer <gerald.schaefer@de.ibm.com>
+ */
+
+#define KMSG_COMPONENT	"appldata"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/sched/stat.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/proc_fs.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/pagemap.h>
+#include <linux/sysctl.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/workqueue.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
+#include <asm/appldata.h>
+#include <asm/vtimer.h>
+#include <asm/smp.h>
+
+#include "appldata.h"
+
+
+#define APPLDATA_CPU_INTERVAL	10000		/* default (CPU) time for
+						   sampling interval in
+						   milliseconds */
+
+#define TOD_MICRO	0x01000			/* nr. of TOD clock units
+						   for 1 microsecond */
+
+/*
+ * /proc entries (sysctl)
+ */
+static const char appldata_proc_name[APPLDATA_PROC_NAME_LENGTH] = "appldata";
+static int appldata_timer_handler(struct ctl_table *ctl, int write,
+				  void *buffer, size_t *lenp, loff_t *ppos);
+static int appldata_interval_handler(struct ctl_table *ctl, int write,
+				     void *buffer, size_t *lenp, loff_t *ppos);
+
+static struct ctl_table_header *appldata_sysctl_header;
+static struct ctl_table appldata_table[] = {
+	{
+		.procname	= "timer",
+		.mode		= S_IRUGO | S_IWUSR,
+		.proc_handler	= appldata_timer_handler,
+	},
+	{
+		.procname	= "interval",
+		.mode		= S_IRUGO | S_IWUSR,
+		.proc_handler	= appldata_interval_handler,
+	},
+	{ },
+};
+
+/*
+ * Timer
+ */
+static struct vtimer_list appldata_timer;
+
+static DEFINE_SPINLOCK(appldata_timer_lock);
+static int appldata_interval = APPLDATA_CPU_INTERVAL;
+static int appldata_timer_active;
+
+/*
+ * Work queue
+ */
+static struct workqueue_struct *appldata_wq;
+static void appldata_work_fn(struct work_struct *work);
+static DECLARE_WORK(appldata_work, appldata_work_fn);
+
+
+/*
+ * Ops list
+ */
+static DEFINE_MUTEX(appldata_ops_mutex);
+static LIST_HEAD(appldata_ops_list);
+
+
+/*************************** timer, work, DIAG *******************************/
+/*
+ * appldata_timer_function()
+ *
+ * schedule work and reschedule timer
+ */
+static void appldata_timer_function(unsigned long data)
+{
+	queue_work(appldata_wq, (struct work_struct *) data);
+}
+
+/*
+ * appldata_work_fn()
+ *
+ * call data gathering function for each (active) module
+ */
+static void appldata_work_fn(struct work_struct *work)
+{
+	struct list_head *lh;
+	struct appldata_ops *ops;
+
+	mutex_lock(&appldata_ops_mutex);
+	list_for_each(lh, &appldata_ops_list) {
+		ops = list_entry(lh, struct appldata_ops, list);
+		if (ops->active == 1) {
+			ops->callback(ops->data);
+		}
+	}
+	mutex_unlock(&appldata_ops_mutex);
+}
+
+static struct appldata_product_id appldata_id = {
+	.prod_nr    = {0xD3, 0xC9, 0xD5, 0xE4,
+		       0xE7, 0xD2, 0xD9},	/* "LINUXKR" */
+	.prod_fn    = 0xD5D3,			/* "NL" */
+	.version_nr = 0xF2F6,			/* "26" */
+	.release_nr = 0xF0F1,			/* "01" */
+};
+
+/*
+ * appldata_diag()
+ *
+ * prepare parameter list, issue DIAG 0xDC
+ */
+int appldata_diag(char record_nr, u16 function, unsigned long buffer,
+			u16 length, char *mod_lvl)
+{
+	struct appldata_parameter_list *parm_list;
+	struct appldata_product_id *id;
+	int rc;
+
+	parm_list = kmalloc(sizeof(*parm_list), GFP_KERNEL);
+	id = kmemdup(&appldata_id, sizeof(appldata_id), GFP_KERNEL);
+	rc = -ENOMEM;
+	if (parm_list && id) {
+		id->record_nr = record_nr;
+		id->mod_lvl = (mod_lvl[0]) << 8 | mod_lvl[1];
+		rc = appldata_asm(parm_list, id, function,
+				  (void *) buffer, length);
+	}
+	kfree(id);
+	kfree(parm_list);
+	return rc;
+}
+/************************ timer, work, DIAG <END> ****************************/
+
+
+/****************************** /proc stuff **********************************/
+
+#define APPLDATA_ADD_TIMER	0
+#define APPLDATA_DEL_TIMER	1
+#define APPLDATA_MOD_TIMER	2
+
+/*
+ * __appldata_vtimer_setup()
+ *
+ * Add, delete or modify virtual timers on all online cpus.
+ * The caller needs to get the appldata_timer_lock spinlock.
+ */
+static void __appldata_vtimer_setup(int cmd)
+{
+	u64 timer_interval = (u64) appldata_interval * 1000 * TOD_MICRO;
+
+	switch (cmd) {
+	case APPLDATA_ADD_TIMER:
+		if (appldata_timer_active)
+			break;
+		appldata_timer.expires = timer_interval;
+		add_virt_timer_periodic(&appldata_timer);
+		appldata_timer_active = 1;
+		break;
+	case APPLDATA_DEL_TIMER:
+		del_virt_timer(&appldata_timer);
+		if (!appldata_timer_active)
+			break;
+		appldata_timer_active = 0;
+		break;
+	case APPLDATA_MOD_TIMER:
+		if (!appldata_timer_active)
+			break;
+		mod_virt_timer_periodic(&appldata_timer, timer_interval);
+	}
+}
+
+/*
+ * appldata_timer_handler()
+ *
+ * Start/Stop timer, show status of timer (0 = not active, 1 = active)
+ */
+static int
+appldata_timer_handler(struct ctl_table *ctl, int write,
+			   void *buffer, size_t *lenp, loff_t *ppos)
+{
+	int timer_active = appldata_timer_active;
+	int rc;
+	struct ctl_table ctl_entry = {
+		.procname	= ctl->procname,
+		.data		= &timer_active,
+		.maxlen		= sizeof(int),
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	};
+
+	rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos);
+	if (rc < 0 || !write)
+		return rc;
+
+	spin_lock(&appldata_timer_lock);
+	if (timer_active)
+		__appldata_vtimer_setup(APPLDATA_ADD_TIMER);
+	else
+		__appldata_vtimer_setup(APPLDATA_DEL_TIMER);
+	spin_unlock(&appldata_timer_lock);
+	return 0;
+}
+
+/*
+ * appldata_interval_handler()
+ *
+ * Set (CPU) timer interval for collection of data (in milliseconds), show
+ * current timer interval.
+ */
+static int
+appldata_interval_handler(struct ctl_table *ctl, int write,
+			   void *buffer, size_t *lenp, loff_t *ppos)
+{
+	int interval = appldata_interval;
+	int rc;
+	struct ctl_table ctl_entry = {
+		.procname	= ctl->procname,
+		.data		= &interval,
+		.maxlen		= sizeof(int),
+		.extra1		= SYSCTL_ONE,
+	};
+
+	rc = proc_dointvec_minmax(&ctl_entry, write, buffer, lenp, ppos);
+	if (rc < 0 || !write)
+		return rc;
+
+	spin_lock(&appldata_timer_lock);
+	appldata_interval = interval;
+	__appldata_vtimer_setup(APPLDATA_MOD_TIMER);
+	spin_unlock(&appldata_timer_lock);
+	return 0;
+}
+
+/*
+ * appldata_generic_handler()
+ *
+ * Generic start/stop monitoring and DIAG, show status of
+ * monitoring (0 = not in process, 1 = in process)
+ */
+static int
+appldata_generic_handler(struct ctl_table *ctl, int write,
+			   void *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct appldata_ops *ops = NULL, *tmp_ops;
+	struct list_head *lh;
+	int rc, found;
+	int active;
+	struct ctl_table ctl_entry = {
+		.data		= &active,
+		.maxlen		= sizeof(int),
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	};
+
+	found = 0;
+	mutex_lock(&appldata_ops_mutex);
+	list_for_each(lh, &appldata_ops_list) {
+		tmp_ops = list_entry(lh, struct appldata_ops, list);
+		if (&tmp_ops->ctl_table[0] == ctl) {
+			found = 1;
+		}
+	}
+	if (!found) {
+		mutex_unlock(&appldata_ops_mutex);
+		return -ENODEV;
+	}
+	ops = ctl->data;
+	if (!try_module_get(ops->owner)) {	// protect this function
+		mutex_unlock(&appldata_ops_mutex);
+		return -ENODEV;
+	}
+	mutex_unlock(&appldata_ops_mutex);
+
+	active = ops->active;
+	rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos);
+	if (rc < 0 || !write) {
+		module_put(ops->owner);
+		return rc;
+	}
+
+	mutex_lock(&appldata_ops_mutex);
+	if (active && (ops->active == 0)) {
+		// protect work queue callback
+		if (!try_module_get(ops->owner)) {
+			mutex_unlock(&appldata_ops_mutex);
+			module_put(ops->owner);
+			return -ENODEV;
+		}
+		ops->callback(ops->data);	// init record
+		rc = appldata_diag(ops->record_nr,
+					APPLDATA_START_INTERVAL_REC,
+					(unsigned long) ops->data, ops->size,
+					ops->mod_lvl);
+		if (rc != 0) {
+			pr_err("Starting the data collection for %s "
+			       "failed with rc=%d\n", ops->name, rc);
+			module_put(ops->owner);
+		} else
+			ops->active = 1;
+	} else if (!active && (ops->active == 1)) {
+		ops->active = 0;
+		rc = appldata_diag(ops->record_nr, APPLDATA_STOP_REC,
+				(unsigned long) ops->data, ops->size,
+				ops->mod_lvl);
+		if (rc != 0)
+			pr_err("Stopping the data collection for %s "
+			       "failed with rc=%d\n", ops->name, rc);
+		module_put(ops->owner);
+	}
+	mutex_unlock(&appldata_ops_mutex);
+	module_put(ops->owner);
+	return 0;
+}
+
+/*************************** /proc stuff <END> *******************************/
+
+
+/************************* module-ops management *****************************/
+/*
+ * appldata_register_ops()
+ *
+ * update ops list, register /proc/sys entries
+ */
+int appldata_register_ops(struct appldata_ops *ops)
+{
+	if (ops->size > APPLDATA_MAX_REC_SIZE)
+		return -EINVAL;
+
+	/* The last entry must be an empty one */
+	ops->ctl_table = kcalloc(2, sizeof(struct ctl_table), GFP_KERNEL);
+	if (!ops->ctl_table)
+		return -ENOMEM;
+
+	mutex_lock(&appldata_ops_mutex);
+	list_add(&ops->list, &appldata_ops_list);
+	mutex_unlock(&appldata_ops_mutex);
+
+	ops->ctl_table[0].procname = ops->name;
+	ops->ctl_table[0].mode = S_IRUGO | S_IWUSR;
+	ops->ctl_table[0].proc_handler = appldata_generic_handler;
+	ops->ctl_table[0].data = ops;
+
+	ops->sysctl_header = register_sysctl_sz(appldata_proc_name, ops->ctl_table, 1);
+	if (!ops->sysctl_header)
+		goto out;
+	return 0;
+out:
+	mutex_lock(&appldata_ops_mutex);
+	list_del(&ops->list);
+	mutex_unlock(&appldata_ops_mutex);
+	kfree(ops->ctl_table);
+	return -ENOMEM;
+}
+
+/*
+ * appldata_unregister_ops()
+ *
+ * update ops list, unregister /proc entries, stop DIAG if necessary
+ */
+void appldata_unregister_ops(struct appldata_ops *ops)
+{
+	mutex_lock(&appldata_ops_mutex);
+	list_del(&ops->list);
+	mutex_unlock(&appldata_ops_mutex);
+	unregister_sysctl_table(ops->sysctl_header);
+	kfree(ops->ctl_table);
+}
+/********************** module-ops management <END> **************************/
+
+
+/******************************* init / exit *********************************/
+
+/*
+ * appldata_init()
+ *
+ * init timer, register /proc entries
+ */
+static int __init appldata_init(void)
+{
+	init_virt_timer(&appldata_timer);
+	appldata_timer.function = appldata_timer_function;
+	appldata_timer.data = (unsigned long) &appldata_work;
+	appldata_wq = alloc_ordered_workqueue("appldata", 0);
+	if (!appldata_wq)
+		return -ENOMEM;
+	appldata_sysctl_header = register_sysctl(appldata_proc_name, appldata_table);
+	return 0;
+}
+
+__initcall(appldata_init);
+
+/**************************** init / exit <END> ******************************/
+
+EXPORT_SYMBOL_GPL(appldata_register_ops);
+EXPORT_SYMBOL_GPL(appldata_unregister_ops);
+EXPORT_SYMBOL_GPL(appldata_diag);
+
+#ifdef CONFIG_SWAP
+EXPORT_SYMBOL_GPL(si_swapinfo);
+#endif
+EXPORT_SYMBOL_GPL(nr_threads);
+EXPORT_SYMBOL_GPL(nr_running);
+EXPORT_SYMBOL_GPL(nr_iowait);
diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c
new file mode 100644
index 0000000000..fc608f9b79
--- /dev/null
+++ b/arch/s390/appldata/appldata_mem.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Data gathering module for Linux-VM Monitor Stream, Stage 1.
+ * Collects data related to memory management.
+ *
+ * Copyright IBM Corp. 2003, 2006
+ *
+ * Author: Gerald Schaefer <gerald.schaefer@de.ibm.com>
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/kernel_stat.h>
+#include <linux/pagemap.h>
+#include <linux/swap.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+
+#include "appldata.h"
+
+
+#define P2K(x) ((x) << (PAGE_SHIFT - 10))	/* Converts #Pages to KB */
+
+/*
+ * Memory data
+ *
+ * This is accessed as binary data by z/VM. If changes to it can't be avoided,
+ * the structure version (product ID, see appldata_base.c) needs to be changed
+ * as well and all documentation and z/VM applications using it must be
+ * updated.
+ */
+struct appldata_mem_data {
+	u64 timestamp;
+	u32 sync_count_1;       /* after VM collected the record data, */
+	u32 sync_count_2;	/* sync_count_1 and sync_count_2 should be the
+				   same. If not, the record has been updated on
+				   the Linux side while VM was collecting the
+				   (possibly corrupt) data */
+
+	u64 pgpgin;		/* data read from disk  */
+	u64 pgpgout;		/* data written to disk */
+	u64 pswpin;		/* pages swapped in  */
+	u64 pswpout;		/* pages swapped out */
+
+	u64 sharedram;		/* sharedram is currently set to 0 */
+
+	u64 totalram;		/* total main memory size */
+	u64 freeram;		/* free main memory size  */
+	u64 totalhigh;		/* total high memory size */
+	u64 freehigh;		/* free high memory size  */
+
+	u64 bufferram;		/* memory reserved for buffers, free cache */
+	u64 cached;		/* size of (used) cache, w/o buffers */
+	u64 totalswap;		/* total swap space size */
+	u64 freeswap;		/* free swap space */
+
+// New in 2.6 -->
+	u64 pgalloc;		/* page allocations */
+	u64 pgfault;		/* page faults (major+minor) */
+	u64 pgmajfault;		/* page faults (major only) */
+// <-- New in 2.6
+
+} __packed;
+
+
+/*
+ * appldata_get_mem_data()
+ *
+ * gather memory data
+ */
+static void appldata_get_mem_data(void *data)
+{
+	/*
+	 * don't put large structures on the stack, we are
+	 * serialized through the appldata_ops_mutex and can use static
+	 */
+	static struct sysinfo val;
+	unsigned long ev[NR_VM_EVENT_ITEMS];
+	struct appldata_mem_data *mem_data;
+
+	mem_data = data;
+	mem_data->sync_count_1++;
+
+	all_vm_events(ev);
+	mem_data->pgpgin     = ev[PGPGIN] >> 1;
+	mem_data->pgpgout    = ev[PGPGOUT] >> 1;
+	mem_data->pswpin     = ev[PSWPIN];
+	mem_data->pswpout    = ev[PSWPOUT];
+	mem_data->pgalloc    = ev[PGALLOC_NORMAL];
+	mem_data->pgalloc    += ev[PGALLOC_DMA];
+	mem_data->pgfault    = ev[PGFAULT];
+	mem_data->pgmajfault = ev[PGMAJFAULT];
+
+	si_meminfo(&val);
+	mem_data->sharedram = val.sharedram;
+	mem_data->totalram  = P2K(val.totalram);
+	mem_data->freeram   = P2K(val.freeram);
+	mem_data->totalhigh = P2K(val.totalhigh);
+	mem_data->freehigh  = P2K(val.freehigh);
+	mem_data->bufferram = P2K(val.bufferram);
+	mem_data->cached    = P2K(global_node_page_state(NR_FILE_PAGES)
+				- val.bufferram);
+
+	si_swapinfo(&val);
+	mem_data->totalswap = P2K(val.totalswap);
+	mem_data->freeswap  = P2K(val.freeswap);
+
+	mem_data->timestamp = get_tod_clock();
+	mem_data->sync_count_2++;
+}
+
+
+static struct appldata_ops ops = {
+	.name      = "mem",
+	.record_nr = APPLDATA_RECORD_MEM_ID,
+	.size	   = sizeof(struct appldata_mem_data),
+	.callback  = &appldata_get_mem_data,
+	.owner     = THIS_MODULE,
+	.mod_lvl   = {0xF0, 0xF0},		/* EBCDIC "00" */
+};
+
+
+/*
+ * appldata_mem_init()
+ *
+ * init_data, register ops
+ */
+static int __init appldata_mem_init(void)
+{
+	int ret;
+
+	ops.data = kzalloc(sizeof(struct appldata_mem_data), GFP_KERNEL);
+	if (!ops.data)
+		return -ENOMEM;
+
+	ret = appldata_register_ops(&ops);
+	if (ret)
+		kfree(ops.data);
+
+	return ret;
+}
+
+/*
+ * appldata_mem_exit()
+ *
+ * unregister ops
+ */
+static void __exit appldata_mem_exit(void)
+{
+	appldata_unregister_ops(&ops);
+	kfree(ops.data);
+}
+
+
+module_init(appldata_mem_init);
+module_exit(appldata_mem_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Gerald Schaefer");
+MODULE_DESCRIPTION("Linux-VM Monitor Stream, MEMORY statistics");
diff --git a/arch/s390/appldata/appldata_net_sum.c b/arch/s390/appldata/appldata_net_sum.c
new file mode 100644
index 0000000000..59c282ca00
--- /dev/null
+++ b/arch/s390/appldata/appldata_net_sum.c
@@ -0,0 +1,163 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Data gathering module for Linux-VM Monitor Stream, Stage 1.
+ * Collects accumulated network statistics (Packets received/transmitted,
+ * dropped, errors, ...).
+ *
+ * Copyright IBM Corp. 2003, 2006
+ *
+ * Author: Gerald Schaefer <gerald.schaefer@de.ibm.com>
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/kernel_stat.h>
+#include <linux/netdevice.h>
+#include <net/net_namespace.h>
+
+#include "appldata.h"
+
+
+/*
+ * Network data
+ *
+ * This is accessed as binary data by z/VM. If changes to it can't be avoided,
+ * the structure version (product ID, see appldata_base.c) needs to be changed
+ * as well and all documentation and z/VM applications using it must be updated.
+ */
+struct appldata_net_sum_data {
+	u64 timestamp;
+	u32 sync_count_1;	/* after VM collected the record data, */
+	u32 sync_count_2;	/* sync_count_1 and sync_count_2 should be the
+				   same. If not, the record has been updated on
+				   the Linux side while VM was collecting the
+				   (possibly corrupt) data */
+
+	u32 nr_interfaces;	/* nr. of network interfaces being monitored */
+
+	u32 padding;		/* next value is 64-bit aligned, so these */
+				/* 4 byte would be padded out by compiler */
+
+	u64 rx_packets;		/* total packets received        */
+	u64 tx_packets;		/* total packets transmitted     */
+	u64 rx_bytes;		/* total bytes received          */
+	u64 tx_bytes;		/* total bytes transmitted       */
+	u64 rx_errors;		/* bad packets received          */
+	u64 tx_errors;		/* packet transmit problems      */
+	u64 rx_dropped;		/* no space in linux buffers     */
+	u64 tx_dropped;		/* no space available in linux   */
+	u64 collisions;		/* collisions while transmitting */
+} __packed;
+
+
+/*
+ * appldata_get_net_sum_data()
+ *
+ * gather accumulated network statistics
+ */
+static void appldata_get_net_sum_data(void *data)
+{
+	int i;
+	struct appldata_net_sum_data *net_data;
+	struct net_device *dev;
+	unsigned long rx_packets, tx_packets, rx_bytes, tx_bytes, rx_errors,
+			tx_errors, rx_dropped, tx_dropped, collisions;
+
+	net_data = data;
+	net_data->sync_count_1++;
+
+	i = 0;
+	rx_packets = 0;
+	tx_packets = 0;
+	rx_bytes   = 0;
+	tx_bytes   = 0;
+	rx_errors  = 0;
+	tx_errors  = 0;
+	rx_dropped = 0;
+	tx_dropped = 0;
+	collisions = 0;
+
+	rcu_read_lock();
+	for_each_netdev_rcu(&init_net, dev) {
+		const struct rtnl_link_stats64 *stats;
+		struct rtnl_link_stats64 temp;
+
+		stats = dev_get_stats(dev, &temp);
+		rx_packets += stats->rx_packets;
+		tx_packets += stats->tx_packets;
+		rx_bytes   += stats->rx_bytes;
+		tx_bytes   += stats->tx_bytes;
+		rx_errors  += stats->rx_errors;
+		tx_errors  += stats->tx_errors;
+		rx_dropped += stats->rx_dropped;
+		tx_dropped += stats->tx_dropped;
+		collisions += stats->collisions;
+		i++;
+	}
+	rcu_read_unlock();
+
+	net_data->nr_interfaces = i;
+	net_data->rx_packets = rx_packets;
+	net_data->tx_packets = tx_packets;
+	net_data->rx_bytes   = rx_bytes;
+	net_data->tx_bytes   = tx_bytes;
+	net_data->rx_errors  = rx_errors;
+	net_data->tx_errors  = tx_errors;
+	net_data->rx_dropped = rx_dropped;
+	net_data->tx_dropped = tx_dropped;
+	net_data->collisions = collisions;
+
+	net_data->timestamp = get_tod_clock();
+	net_data->sync_count_2++;
+}
+
+
+static struct appldata_ops ops = {
+	.name	   = "net_sum",
+	.record_nr = APPLDATA_RECORD_NET_SUM_ID,
+	.size	   = sizeof(struct appldata_net_sum_data),
+	.callback  = &appldata_get_net_sum_data,
+	.owner     = THIS_MODULE,
+	.mod_lvl   = {0xF0, 0xF0},		/* EBCDIC "00" */
+};
+
+
+/*
+ * appldata_net_init()
+ *
+ * init data, register ops
+ */
+static int __init appldata_net_init(void)
+{
+	int ret;
+
+	ops.data = kzalloc(sizeof(struct appldata_net_sum_data), GFP_KERNEL);
+	if (!ops.data)
+		return -ENOMEM;
+
+	ret = appldata_register_ops(&ops);
+	if (ret)
+		kfree(ops.data);
+
+	return ret;
+}
+
+/*
+ * appldata_net_exit()
+ *
+ * unregister ops
+ */
+static void __exit appldata_net_exit(void)
+{
+	appldata_unregister_ops(&ops);
+	kfree(ops.data);
+}
+
+
+module_init(appldata_net_init);
+module_exit(appldata_net_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Gerald Schaefer");
+MODULE_DESCRIPTION("Linux-VM Monitor Stream, accumulated network statistics");
diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c
new file mode 100644
index 0000000000..a363d30ce7
--- /dev/null
+++ b/arch/s390/appldata/appldata_os.c
@@ -0,0 +1,211 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Data gathering module for Linux-VM Monitor Stream, Stage 1.
+ * Collects misc. OS related data (CPU utilization, running processes).
+ *
+ * Copyright IBM Corp. 2003, 2006
+ *
+ * Author: Gerald Schaefer <gerald.schaefer@de.ibm.com>
+ */
+
+#define KMSG_COMPONENT	"appldata"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/kernel_stat.h>
+#include <linux/netdevice.h>
+#include <linux/sched.h>
+#include <linux/sched/loadavg.h>
+#include <linux/sched/stat.h>
+#include <asm/appldata.h>
+#include <asm/smp.h>
+
+#include "appldata.h"
+
+/*
+ * OS data
+ *
+ * This is accessed as binary data by z/VM. If changes to it can't be avoided,
+ * the structure version (product ID, see appldata_base.c) needs to be changed
+ * as well and all documentation and z/VM applications using it must be
+ * updated.
+ */
+struct appldata_os_per_cpu {
+	u32 per_cpu_user;	/* timer ticks spent in user mode   */
+	u32 per_cpu_nice;	/* ... spent with modified priority */
+	u32 per_cpu_system;	/* ... spent in kernel mode         */
+	u32 per_cpu_idle;	/* ... spent in idle mode           */
+
+	/* New in 2.6 */
+	u32 per_cpu_irq;	/* ... spent in interrupts          */
+	u32 per_cpu_softirq;	/* ... spent in softirqs            */
+	u32 per_cpu_iowait;	/* ... spent while waiting for I/O  */
+
+	/* New in modification level 01 */
+	u32 per_cpu_steal;	/* ... stolen by hypervisor	    */
+	u32 cpu_id;		/* number of this CPU		    */
+} __attribute__((packed));
+
+struct appldata_os_data {
+	u64 timestamp;
+	u32 sync_count_1;	/* after VM collected the record data, */
+	u32 sync_count_2;	/* sync_count_1 and sync_count_2 should be the
+				   same. If not, the record has been updated on
+				   the Linux side while VM was collecting the
+				   (possibly corrupt) data */
+
+	u32 nr_cpus;		/* number of (virtual) CPUs        */
+	u32 per_cpu_size;	/* size of the per-cpu data struct */
+	u32 cpu_offset;		/* offset of the first per-cpu data struct */
+
+	u32 nr_running;		/* number of runnable threads      */
+	u32 nr_threads;		/* number of threads               */
+	u32 avenrun[3];		/* average nr. of running processes during */
+				/* the last 1, 5 and 15 minutes */
+
+	/* New in 2.6 */
+	u32 nr_iowait;		/* number of blocked threads
+				   (waiting for I/O)               */
+
+	/* per cpu data */
+	struct appldata_os_per_cpu os_cpu[];
+} __attribute__((packed));
+
+static struct appldata_os_data *appldata_os_data;
+
+static struct appldata_ops ops = {
+	.name	   = "os",
+	.record_nr = APPLDATA_RECORD_OS_ID,
+	.owner	   = THIS_MODULE,
+	.mod_lvl   = {0xF0, 0xF1},		/* EBCDIC "01" */
+};
+
+
+/*
+ * appldata_get_os_data()
+ *
+ * gather OS data
+ */
+static void appldata_get_os_data(void *data)
+{
+	int i, j, rc;
+	struct appldata_os_data *os_data;
+	unsigned int new_size;
+
+	os_data = data;
+	os_data->sync_count_1++;
+
+	os_data->nr_threads = nr_threads;
+	os_data->nr_running = nr_running();
+	os_data->nr_iowait  = nr_iowait();
+	os_data->avenrun[0] = avenrun[0] + (FIXED_1/200);
+	os_data->avenrun[1] = avenrun[1] + (FIXED_1/200);
+	os_data->avenrun[2] = avenrun[2] + (FIXED_1/200);
+
+	j = 0;
+	for_each_online_cpu(i) {
+		os_data->os_cpu[j].per_cpu_user =
+			nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_USER]);
+		os_data->os_cpu[j].per_cpu_nice =
+			nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_NICE]);
+		os_data->os_cpu[j].per_cpu_system =
+			nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]);
+		os_data->os_cpu[j].per_cpu_idle =
+			nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IDLE]);
+		os_data->os_cpu[j].per_cpu_irq =
+			nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IRQ]);
+		os_data->os_cpu[j].per_cpu_softirq =
+			nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ]);
+		os_data->os_cpu[j].per_cpu_iowait =
+			nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IOWAIT]);
+		os_data->os_cpu[j].per_cpu_steal =
+			nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_STEAL]);
+		os_data->os_cpu[j].cpu_id = i;
+		j++;
+	}
+
+	os_data->nr_cpus = j;
+
+	new_size = struct_size(os_data, os_cpu, os_data->nr_cpus);
+	if (ops.size != new_size) {
+		if (ops.active) {
+			rc = appldata_diag(APPLDATA_RECORD_OS_ID,
+					   APPLDATA_START_INTERVAL_REC,
+					   (unsigned long) ops.data, new_size,
+					   ops.mod_lvl);
+			if (rc != 0)
+				pr_err("Starting a new OS data collection "
+				       "failed with rc=%d\n", rc);
+
+			rc = appldata_diag(APPLDATA_RECORD_OS_ID,
+					   APPLDATA_STOP_REC,
+					   (unsigned long) ops.data, ops.size,
+					   ops.mod_lvl);
+			if (rc != 0)
+				pr_err("Stopping a faulty OS data "
+				       "collection failed with rc=%d\n", rc);
+		}
+		ops.size = new_size;
+	}
+	os_data->timestamp = get_tod_clock();
+	os_data->sync_count_2++;
+}
+
+
+/*
+ * appldata_os_init()
+ *
+ * init data, register ops
+ */
+static int __init appldata_os_init(void)
+{
+	int rc, max_size;
+
+	max_size = struct_size(appldata_os_data, os_cpu, num_possible_cpus());
+	if (max_size > APPLDATA_MAX_REC_SIZE) {
+		pr_err("Maximum OS record size %i exceeds the maximum "
+		       "record size %i\n", max_size, APPLDATA_MAX_REC_SIZE);
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	appldata_os_data = kzalloc(max_size, GFP_KERNEL | GFP_DMA);
+	if (appldata_os_data == NULL) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	appldata_os_data->per_cpu_size = sizeof(struct appldata_os_per_cpu);
+	appldata_os_data->cpu_offset   = offsetof(struct appldata_os_data,
+							os_cpu);
+
+	ops.data = appldata_os_data;
+	ops.callback  = &appldata_get_os_data;
+	rc = appldata_register_ops(&ops);
+	if (rc != 0)
+		kfree(appldata_os_data);
+out:
+	return rc;
+}
+
+/*
+ * appldata_os_exit()
+ *
+ * unregister ops
+ */
+static void __exit appldata_os_exit(void)
+{
+	appldata_unregister_ops(&ops);
+	kfree(appldata_os_data);
+}
+
+
+module_init(appldata_os_init);
+module_exit(appldata_os_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Gerald Schaefer");
+MODULE_DESCRIPTION("Linux-VM Monitor Stream, OS statistics");
diff --git a/arch/s390/boot/.gitignore b/arch/s390/boot/.gitignore
new file mode 100644
index 0000000000..f56591bc08
--- /dev/null
+++ b/arch/s390/boot/.gitignore
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only
+image
+bzImage
+section_cmp.*
+vmlinux
+vmlinux.lds
+vmlinux.syms
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
new file mode 100644
index 0000000000..c7c81e5f92
--- /dev/null
+++ b/arch/s390/boot/Makefile
@@ -0,0 +1,133 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the linux s390-specific parts of the memory manager.
+#
+
+KCOV_INSTRUMENT := n
+GCOV_PROFILE := n
+UBSAN_SANITIZE := n
+KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
+
+KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR)
+KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR)
+
+#
+# Use minimum architecture for als.c to be able to print an error
+# message if the kernel is started on a machine which is too old
+#
+ifndef CONFIG_CC_IS_CLANG
+CC_FLAGS_MARCH_MINIMUM := -march=z900
+else
+CC_FLAGS_MARCH_MINIMUM := -march=z10
+endif
+
+ifneq ($(CC_FLAGS_MARCH),$(CC_FLAGS_MARCH_MINIMUM))
+AFLAGS_REMOVE_head.o		+= $(CC_FLAGS_MARCH)
+AFLAGS_head.o			+= $(CC_FLAGS_MARCH_MINIMUM)
+AFLAGS_REMOVE_mem.o		+= $(CC_FLAGS_MARCH)
+AFLAGS_mem.o			+= $(CC_FLAGS_MARCH_MINIMUM)
+CFLAGS_REMOVE_als.o		+= $(CC_FLAGS_MARCH)
+CFLAGS_als.o			+= $(CC_FLAGS_MARCH_MINIMUM)
+CFLAGS_REMOVE_sclp_early_core.o	+= $(CC_FLAGS_MARCH)
+CFLAGS_sclp_early_core.o	+= $(CC_FLAGS_MARCH_MINIMUM)
+endif
+
+CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char
+
+obj-y	:= head.o als.o startup.o physmem_info.o ipl_parm.o ipl_report.o vmem.o
+obj-y	+= string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o
+obj-y	+= version.o pgm_check_info.o ctype.o ipl_data.o machine_kexec_reloc.o
+obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE))	+= uv.o
+obj-$(CONFIG_RANDOMIZE_BASE)	+= kaslr.o
+obj-y	+= $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o
+obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o
+obj-all := $(obj-y) piggy.o syms.o
+
+targets	:= bzImage section_cmp.boot.data section_cmp.boot.preserved.data $(obj-y)
+targets	+= vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2
+targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4
+targets += vmlinux.bin.zst info.bin syms.bin vmlinux.syms $(obj-all)
+
+OBJECTS := $(addprefix $(obj)/,$(obj-y))
+OBJECTS_ALL := $(addprefix $(obj)/,$(obj-all))
+
+clean-files += vmlinux.map
+
+quiet_cmd_section_cmp = SECTCMP $*
+define cmd_section_cmp
+	s1=`$(OBJDUMP) -t -j "$*" "$<" | sort | \
+		sed -n "/0000000000000000/! s/.*\s$*\s\+//p" | sha256sum`; \
+	s2=`$(OBJDUMP) -t -j "$*" "$(word 2,$^)" | sort | \
+		sed -n "/0000000000000000/! s/.*\s$*\s\+//p" | sha256sum`; \
+	if [ "$$s1" != "$$s2" ]; then \
+		echo "error: section $* differs between $< and $(word 2,$^)" >&2; \
+		exit 1; \
+	fi; \
+	touch $@
+endef
+
+$(obj)/bzImage: $(obj)/vmlinux $(obj)/section_cmp.boot.data $(obj)/section_cmp.boot.preserved.data FORCE
+	$(call if_changed,objcopy)
+
+$(obj)/section_cmp%: vmlinux $(obj)/vmlinux FORCE
+	$(call if_changed,section_cmp)
+
+LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup $(if $(CONFIG_VMLINUX_MAP),-Map=$(obj)/vmlinux.map) --build-id=sha1 -T
+$(obj)/vmlinux: $(obj)/vmlinux.lds $(OBJECTS_ALL) FORCE
+	$(call if_changed,ld)
+
+LDFLAGS_vmlinux.syms := --oformat $(LD_BFD) -e startup -T
+$(obj)/vmlinux.syms: $(obj)/vmlinux.lds $(OBJECTS) FORCE
+	$(call if_changed,ld)
+
+quiet_cmd_dumpsyms = DUMPSYMS $<
+define cmd_dumpsyms
+	$(NM) -n -S --format=bsd "$<" | sed -nE 's/^0*([0-9a-fA-F]+) 0*([0-9a-fA-F]+) [tT] ([^ ]*)$$/\1 \2 \3/p' | tr '\n' '\0' > "$@"
+endef
+
+$(obj)/syms.bin: $(obj)/vmlinux.syms FORCE
+	$(call if_changed,dumpsyms)
+
+OBJCOPYFLAGS_syms.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.decompressor.syms
+$(obj)/syms.o: $(obj)/syms.bin FORCE
+	$(call if_changed,objcopy)
+
+OBJCOPYFLAGS_info.bin := -O binary --only-section=.vmlinux.info --set-section-flags .vmlinux.info=load
+$(obj)/info.bin: vmlinux FORCE
+	$(call if_changed,objcopy)
+
+OBJCOPYFLAGS_info.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.vmlinux.info
+$(obj)/info.o: $(obj)/info.bin FORCE
+	$(call if_changed,objcopy)
+
+OBJCOPYFLAGS_vmlinux.bin := -O binary --remove-section=.comment --remove-section=.vmlinux.info -S
+$(obj)/vmlinux.bin: vmlinux FORCE
+	$(call if_changed,objcopy)
+
+suffix-$(CONFIG_KERNEL_GZIP)  := .gz
+suffix-$(CONFIG_KERNEL_BZIP2) := .bz2
+suffix-$(CONFIG_KERNEL_LZ4)  := .lz4
+suffix-$(CONFIG_KERNEL_LZMA)  := .lzma
+suffix-$(CONFIG_KERNEL_LZO)  := .lzo
+suffix-$(CONFIG_KERNEL_XZ)  := .xz
+suffix-$(CONFIG_KERNEL_ZSTD)  := .zst
+
+$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
+	$(call if_changed,gzip)
+$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE
+	$(call if_changed,bzip2_with_size)
+$(obj)/vmlinux.bin.lz4: $(obj)/vmlinux.bin FORCE
+	$(call if_changed,lz4_with_size)
+$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE
+	$(call if_changed,lzma_with_size)
+$(obj)/vmlinux.bin.lzo: $(obj)/vmlinux.bin FORCE
+	$(call if_changed,lzo_with_size)
+$(obj)/vmlinux.bin.xz: $(obj)/vmlinux.bin FORCE
+	$(call if_changed,xzkern_with_size)
+$(obj)/vmlinux.bin.zst: $(obj)/vmlinux.bin FORCE
+	$(call if_changed,zstd22_with_size)
+
+OBJCOPYFLAGS_piggy.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.vmlinux.bin.compressed
+$(obj)/piggy.o: $(obj)/vmlinux.bin$(suffix-y) FORCE
+	$(call if_changed,objcopy)
diff --git a/arch/s390/boot/als.c b/arch/s390/boot/als.c
new file mode 100644
index 0000000000..47c48fbfb5
--- /dev/null
+++ b/arch/s390/boot/als.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    Copyright IBM Corp. 2016
+ */
+#include <linux/kernel.h>
+#include <asm/processor.h>
+#include <asm/facility.h>
+#include <asm/lowcore.h>
+#include <asm/sclp.h>
+#include "boot.h"
+
+/*
+ * The code within this file will be called very early. It may _not_
+ * access anything within the bss section, since that is not cleared
+ * yet and may contain data (e.g. initrd) that must be saved by other
+ * code.
+ * For temporary objects the stack (16k) should be used.
+ */
+
+static unsigned long als[] = { FACILITIES_ALS };
+
+static void u16_to_hex(char *str, u16 val)
+{
+	int i, num;
+
+	for (i = 1; i <= 4; i++) {
+		num = (val >> (16 - 4 * i)) & 0xf;
+		if (num >= 10)
+			num += 7;
+		*str++ = '0' + num;
+	}
+	*str = '\0';
+}
+
+static void print_machine_type(void)
+{
+	static char mach_str[80] = "Detected machine-type number: ";
+	char type_str[5];
+	struct cpuid id;
+
+	get_cpu_id(&id);
+	u16_to_hex(type_str, id.machine);
+	strcat(mach_str, type_str);
+	strcat(mach_str, "\n");
+	sclp_early_printk(mach_str);
+}
+
+static void u16_to_decimal(char *str, u16 val)
+{
+	int div = 1;
+
+	while (div * 10 <= val)
+		div *= 10;
+	while (div) {
+		*str++ = '0' + val / div;
+		val %= div;
+		div /= 10;
+	}
+	*str = '\0';
+}
+
+void print_missing_facilities(void)
+{
+	static char als_str[80] = "Missing facilities: ";
+	unsigned long val;
+	char val_str[6];
+	int i, j, first;
+
+	first = 1;
+	for (i = 0; i < ARRAY_SIZE(als); i++) {
+		val = ~stfle_fac_list[i] & als[i];
+		for (j = 0; j < BITS_PER_LONG; j++) {
+			if (!(val & (1UL << (BITS_PER_LONG - 1 - j))))
+				continue;
+			if (!first)
+				strcat(als_str, ",");
+			/*
+			 * Make sure we stay within one line. Consider that
+			 * each facility bit adds up to five characters and
+			 * z/VM adds a four character prefix.
+			 */
+			if (strlen(als_str) > 70) {
+				strcat(als_str, "\n");
+				sclp_early_printk(als_str);
+				*als_str = '\0';
+			}
+			u16_to_decimal(val_str, i * BITS_PER_LONG + j);
+			strcat(als_str, val_str);
+			first = 0;
+		}
+	}
+	strcat(als_str, "\n");
+	sclp_early_printk(als_str);
+}
+
+static void facility_mismatch(void)
+{
+	sclp_early_printk("The Linux kernel requires more recent processor hardware\n");
+	print_machine_type();
+	print_missing_facilities();
+	sclp_early_printk("See Principles of Operations for facility bits\n");
+	disabled_wait();
+}
+
+void verify_facilities(void)
+{
+	int i;
+
+	__stfle(stfle_fac_list, ARRAY_SIZE(stfle_fac_list));
+	for (i = 0; i < ARRAY_SIZE(als); i++) {
+		if ((stfle_fac_list[i] & als[i]) != als[i])
+			facility_mismatch();
+	}
+}
diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h
new file mode 100644
index 0000000000..222c6886ac
--- /dev/null
+++ b/arch/s390/boot/boot.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BOOT_BOOT_H
+#define BOOT_BOOT_H
+
+#include <linux/types.h>
+
+#define IPL_START	0x200
+
+#ifndef __ASSEMBLY__
+
+#include <asm/physmem_info.h>
+
+struct machine_info {
+	unsigned char has_edat1 : 1;
+	unsigned char has_edat2 : 1;
+	unsigned char has_nx : 1;
+};
+
+struct vmlinux_info {
+	unsigned long default_lma;
+	unsigned long entry;
+	unsigned long image_size;	/* does not include .bss */
+	unsigned long bss_size;		/* uncompressed image .bss size */
+	unsigned long bootdata_off;
+	unsigned long bootdata_size;
+	unsigned long bootdata_preserved_off;
+	unsigned long bootdata_preserved_size;
+	unsigned long dynsym_start;
+	unsigned long rela_dyn_start;
+	unsigned long rela_dyn_end;
+	unsigned long amode31_size;
+	unsigned long init_mm_off;
+	unsigned long swapper_pg_dir_off;
+	unsigned long invalid_pg_dir_off;
+#ifdef CONFIG_KASAN
+	unsigned long kasan_early_shadow_page_off;
+	unsigned long kasan_early_shadow_pte_off;
+	unsigned long kasan_early_shadow_pmd_off;
+	unsigned long kasan_early_shadow_pud_off;
+	unsigned long kasan_early_shadow_p4d_off;
+#endif
+};
+
+void startup_kernel(void);
+unsigned long detect_max_physmem_end(void);
+void detect_physmem_online_ranges(unsigned long max_physmem_end);
+void physmem_set_usable_limit(unsigned long limit);
+void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size);
+void physmem_free(enum reserved_range_type type);
+/* for continuous/multiple allocations per type */
+unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned long size,
+				     unsigned long align);
+/* for single allocations, 1 per type */
+unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long size,
+				  unsigned long align, unsigned long min, unsigned long max,
+				  bool die_on_oom);
+unsigned long get_physmem_alloc_pos(void);
+bool ipl_report_certs_intersects(unsigned long addr, unsigned long size,
+				 unsigned long *intersection_start);
+bool is_ipl_block_dump(void);
+void store_ipl_parmblock(void);
+int read_ipl_report(void);
+void save_ipl_cert_comp_list(void);
+void setup_boot_command_line(void);
+void parse_boot_command_line(void);
+void verify_facilities(void);
+void print_missing_facilities(void);
+void sclp_early_setup_buffer(void);
+void print_pgm_check_info(void);
+unsigned long randomize_within_range(unsigned long size, unsigned long align,
+				     unsigned long min, unsigned long max);
+void setup_vmem(unsigned long asce_limit);
+void __printf(1, 2) decompressor_printk(const char *fmt, ...);
+void print_stacktrace(unsigned long sp);
+void error(char *m);
+
+extern struct machine_info machine;
+
+/* Symbols defined by linker scripts */
+extern const char kernel_version[];
+extern unsigned long memory_limit;
+extern unsigned long vmalloc_size;
+extern int vmalloc_size_set;
+extern char __boot_data_start[], __boot_data_end[];
+extern char __boot_data_preserved_start[], __boot_data_preserved_end[];
+extern char _decompressor_syms_start[], _decompressor_syms_end[];
+extern char _stack_start[], _stack_end[];
+extern char _end[], _decompressor_end[];
+extern unsigned char _compressed_start[];
+extern unsigned char _compressed_end[];
+extern struct vmlinux_info _vmlinux_info;
+#define vmlinux _vmlinux_info
+
+#define __abs_lowcore_pa(x)	(((unsigned long)(x) - __abs_lowcore) % sizeof(struct lowcore))
+
+static inline bool intersects(unsigned long addr0, unsigned long size0,
+			      unsigned long addr1, unsigned long size1)
+{
+	return addr0 + size0 > addr1 && addr1 + size1 > addr0;
+}
+#endif /* __ASSEMBLY__ */
+#endif /* BOOT_BOOT_H */
diff --git a/arch/s390/boot/clz_ctz.c b/arch/s390/boot/clz_ctz.c
new file mode 100644
index 0000000000..c3ebf24859
--- /dev/null
+++ b/arch/s390/boot/clz_ctz.c
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../../../../lib/clz_ctz.c"
diff --git a/arch/s390/boot/cmdline.c b/arch/s390/boot/cmdline.c
new file mode 100644
index 0000000000..73d826cdbd
--- /dev/null
+++ b/arch/s390/boot/cmdline.c
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../../../lib/cmdline.c"
diff --git a/arch/s390/boot/ctype.c b/arch/s390/boot/ctype.c
new file mode 100644
index 0000000000..2495810b47
--- /dev/null
+++ b/arch/s390/boot/ctype.c
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../../../lib/ctype.c"
diff --git a/arch/s390/boot/decompressor.c b/arch/s390/boot/decompressor.c
new file mode 100644
index 0000000000..d762733a07
--- /dev/null
+++ b/arch/s390/boot/decompressor.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Definitions and wrapper functions for kernel decompressor
+ *
+ * Copyright IBM Corp. 2010
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <asm/page.h>
+#include "decompressor.h"
+#include "boot.h"
+
+/*
+ * gzip declarations
+ */
+#define STATIC static
+
+#undef memset
+#undef memcpy
+#undef memmove
+#define memmove memmove
+#define memzero(s, n) memset((s), 0, (n))
+
+#if defined(CONFIG_KERNEL_BZIP2)
+#define BOOT_HEAP_SIZE	0x400000
+#elif defined(CONFIG_KERNEL_ZSTD)
+#define BOOT_HEAP_SIZE	0x30000
+#else
+#define BOOT_HEAP_SIZE	0x10000
+#endif
+
+static unsigned long free_mem_ptr = (unsigned long) _end;
+static unsigned long free_mem_end_ptr = (unsigned long) _end + BOOT_HEAP_SIZE;
+
+#ifdef CONFIG_KERNEL_GZIP
+#include "../../../../lib/decompress_inflate.c"
+#endif
+
+#ifdef CONFIG_KERNEL_BZIP2
+#include "../../../../lib/decompress_bunzip2.c"
+#endif
+
+#ifdef CONFIG_KERNEL_LZ4
+#include "../../../../lib/decompress_unlz4.c"
+#endif
+
+#ifdef CONFIG_KERNEL_LZMA
+#include "../../../../lib/decompress_unlzma.c"
+#endif
+
+#ifdef CONFIG_KERNEL_LZO
+#include "../../../../lib/decompress_unlzo.c"
+#endif
+
+#ifdef CONFIG_KERNEL_XZ
+#include "../../../../lib/decompress_unxz.c"
+#endif
+
+#ifdef CONFIG_KERNEL_ZSTD
+#include "../../../../lib/decompress_unzstd.c"
+#endif
+
+#define decompress_offset ALIGN((unsigned long)_end + BOOT_HEAP_SIZE, PAGE_SIZE)
+
+unsigned long mem_safe_offset(void)
+{
+	/*
+	 * due to 4MB HEAD_SIZE for bzip2
+	 * 'decompress_offset + vmlinux.image_size' could be larger than
+	 * kernel at final position + its .bss, so take the larger of two
+	 */
+	return max(decompress_offset + vmlinux.image_size,
+		   vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size);
+}
+
+void *decompress_kernel(void)
+{
+	void *output = (void *)decompress_offset;
+
+	__decompress(_compressed_start, _compressed_end - _compressed_start,
+		     NULL, NULL, output, vmlinux.image_size, NULL, error);
+	return output;
+}
diff --git a/arch/s390/boot/decompressor.h b/arch/s390/boot/decompressor.h
new file mode 100644
index 0000000000..92b81d2ea3
--- /dev/null
+++ b/arch/s390/boot/decompressor.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BOOT_COMPRESSED_DECOMPRESSOR_H
+#define BOOT_COMPRESSED_DECOMPRESSOR_H
+
+#ifdef CONFIG_KERNEL_UNCOMPRESSED
+static inline void *decompress_kernel(void) { return NULL; }
+#else
+void *decompress_kernel(void);
+#endif
+unsigned long mem_safe_offset(void);
+
+#endif /* BOOT_COMPRESSED_DECOMPRESSOR_H */
diff --git a/arch/s390/boot/ebcdic.c b/arch/s390/boot/ebcdic.c
new file mode 100644
index 0000000000..7391e7d360
--- /dev/null
+++ b/arch/s390/boot/ebcdic.c
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../kernel/ebcdic.c"
diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S
new file mode 100644
index 0000000000..637c29c3f6
--- /dev/null
+++ b/arch/s390/boot/head.S
@@ -0,0 +1,320 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 1999, 2010
+ *
+ *    Author(s): Hartmut Penner <hp@de.ibm.com>
+ *		 Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *		 Rob van der Heij <rvdhei@iae.nl>
+ *
+ * There are 5 different IPL methods
+ *  1) load the image directly into ram at address 0 and do an PSW restart
+ *  2) linload will load the image from address 0x10000 to memory 0x10000
+ *     and start the code thru LPSW 0x0008000080010000 (VM only, deprecated)
+ *  3) generate the tape ipl header, store the generated image on a tape
+ *     and ipl from it
+ *     In case of SL tape you need to IPL 5 times to get past VOL1 etc
+ *  4) generate the vm reader ipl header, move the generated image to the
+ *     VM reader (use option NOH!) and do a ipl from reader (VM only)
+ *  5) direct call of start by the SALIPL loader
+ *  We use the cpuid to distinguish between VM and native ipl
+ *  params for kernel are pushed to 0x10400 (see setup.h)
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/sclp.h>
+#include "boot.h"
+
+#define EP_OFFSET	0x10008
+#define EP_STRING	"S390EP"
+#define IPL_BS		0x730
+
+__HEAD
+ipl_start:
+	mvi	__LC_AR_MODE_ID,1	# set esame flag
+	slr	%r0,%r0			# set cpuid to zero
+	lhi	%r1,2			# mode 2 = esame (dump)
+	sigp	%r1,%r0,0x12		# switch to esame mode
+	sam64				# switch to 64 bit addressing mode
+	lgh	%r1,__LC_SUBCHANNEL_ID	# test if subchannel number
+	brctg	%r1,.Lnoload		#  is valid
+	llgf	%r1,__LC_SUBCHANNEL_ID	# load ipl subchannel number
+	lghi	%r2,IPL_BS		# load start address
+	bras	%r14,.Lloader		# load rest of ipl image
+	larl	%r12,parmarea		# pointer to parameter area
+	stg	%r1,IPL_DEVICE-PARMAREA(%r12) # save ipl device number
+#
+# load parameter file from ipl device
+#
+.Lagain1:
+	larl	%r2,_end		# ramdisk loc. is temp
+	bras	%r14,.Lloader		# load parameter file
+	ltgr	%r2,%r2			# got anything ?
+	jz	.Lnopf
+	lg	%r3,MAX_COMMAND_LINE_SIZE-PARMAREA(%r12)
+	aghi	%r3,-1
+	clgr	%r2,%r3
+	jl	.Lnotrunc
+	lgr	%r2,%r3
+.Lnotrunc:
+	larl	%r4,_end
+	larl	%r13,.L_hdr
+	clc	0(3,%r4),0(%r13)	# if it is HDRx
+	jz	.Lagain1		# skip dataset header
+	larl	%r13,.L_eof
+	clc	0(3,%r4),0(%r13)	# if it is EOFx
+	jz	.Lagain1		# skip data set trailer
+	lgr	%r5,%r2
+	la	%r6,COMMAND_LINE-PARMAREA(%r12)
+	lgr	%r7,%r2
+	aghi	%r7,1
+	mvcl	%r6,%r4
+.Lnopf:
+#
+# load ramdisk from ipl device
+#
+.Lagain2:
+	larl	%r2,_end		# addr of ramdisk
+	stg	%r2,INITRD_START-PARMAREA(%r12)
+	bras	%r14,.Lloader		# load ramdisk
+	stg	%r2,INITRD_SIZE-PARMAREA(%r12) # store size of rd
+	ltgr	%r2,%r2
+	jnz	.Lrdcont
+	stg	%r2,INITRD_START-PARMAREA(%r12) # no ramdisk found
+.Lrdcont:
+	larl	%r2,_end
+	larl	%r13,.L_hdr		# skip HDRx and EOFx
+	clc	0(3,%r2),0(%r13)
+	jz	.Lagain2
+	larl	%r13,.L_eof
+	clc	0(3,%r2),0(%r13)
+	jz	.Lagain2
+#
+# reset files in VM reader
+#
+	larl	%r13,.Lcpuid
+	stidp	0(%r13)			# store cpuid
+	tm	0(%r13),0xff		# running VM ?
+	jno	.Lnoreset
+	larl	%r2,.Lreset
+	lghi	%r3,26
+	diag	%r2,%r3,8
+	larl	%r5,.Lirb
+	stsch	0(%r5)			# check if irq is pending
+	tm	30(%r5),0x0f		# by verifying if any of the
+	jnz	.Lwaitforirq		# activity or status control
+	tm	31(%r5),0xff		# bits is set in the schib
+	jz	.Lnoreset
+.Lwaitforirq:
+	bras	%r14,.Lirqwait		# wait for IO interrupt
+	c	%r1,__LC_SUBCHANNEL_ID	# compare subchannel number
+	jne	.Lwaitforirq
+	larl	%r5,.Lirb
+	tsch	0(%r5)
+.Lnoreset:
+	j	.Lnoload
+#
+# everything loaded, go for it
+#
+.Lnoload:
+	jg	startup
+#
+# subroutine to wait for end I/O
+#
+.Lirqwait:
+	larl	%r13,.Lnewpswmask	# set up IO interrupt psw
+	mvc	__LC_IO_NEW_PSW(8),0(%r13)
+	stg	%r14,__LC_IO_NEW_PSW+8
+	larl	%r13,.Lwaitpsw
+	lpswe	0(%r13)
+.Lioint:
+#
+# subroutine for loading cards from the reader
+#
+.Lloader:
+	lgr	%r4,%r14
+	larl	%r3,.Lorb		# r2 = address of orb into r2
+	larl	%r5,.Lirb		# r4 = address of irb
+	larl	%r6,.Lccws
+	lghi	%r7,20
+.Linit:
+	st	%r2,4(%r6)		# initialize CCW data addresses
+	la	%r2,0x50(%r2)
+	la	%r6,8(%r6)
+	brctg	%r7,.Linit
+	larl	%r13,.Lcr6
+	lctlg	%c6,%c6,0(%r13)
+	xgr	%r2,%r2
+.Lldlp:
+	ssch	0(%r3)			# load chunk of 1600 bytes
+	jnz	.Llderr
+.Lwait4irq:
+	bras	%r14,.Lirqwait
+	c	%r1,__LC_SUBCHANNEL_ID	# compare subchannel number
+	jne	.Lwait4irq
+	tsch	0(%r5)
+	xgr	%r0,%r0
+	ic	%r0,8(%r5)		# get device status
+	cghi	%r0,8			# channel end ?
+	je	.Lcont
+	cghi	%r0,12			# channel end + device end ?
+	je	.Lcont
+	llgf	%r0,4(%r5)
+	sgf	%r0,8(%r3)		# r0/8 = number of ccws executed
+	mghi	%r0,10			# *10 = number of bytes in ccws
+	llgh	%r3,10(%r5)		# get residual count
+	sgr	%r0,%r3			# #ccws*80-residual=#bytes read
+	agr	%r2,%r0
+	br	%r4			# r2 contains the total size
+.Lcont:
+	aghi	%r2,0x640		# add 0x640 to total size
+	larl	%r6,.Lccws
+	lghi	%r7,20
+.Lincr:
+	l	%r0,4(%r6)		# update CCW data addresses
+	aghi	%r0,0x640
+	st	%r0,4(%r6)
+	aghi	%r6,8
+	brctg	%r7,.Lincr
+	j	.Lldlp
+.Llderr:
+	larl	%r13,.Lcrash
+	lpsw	0(%r13)
+
+	.balign	8
+.Lwaitpsw:
+	.quad	0x0202000180000000,.Lioint
+.Lnewpswmask:
+	.quad	0x0000000180000000
+	.balign	8
+.Lorb:	.long	0x00000000,0x0080ff00,.Lccws
+.Lirb:	.long	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+	.balign	8
+.Lcr6:	.quad	0x00000000ff000000
+	.balign	8
+.Lcrash:.long	0x000a0000,0x00000000
+	.balign	8
+.Lccws: .rept	19
+	.long	0x02600050,0x00000000
+	.endr
+	.long	0x02200050,0x00000000
+.Lreset:.byte	0xc3,0xc8,0xc1,0xd5,0xc7,0xc5,0x40,0xd9,0xc4,0xd9,0x40
+	.byte	0xc1,0xd3,0xd3,0x40,0xd2,0xc5,0xc5,0xd7,0x40,0xd5,0xd6
+	.byte	0xc8,0xd6,0xd3,0xc4	# "change rdr all keep nohold"
+.L_eof: .long	0xc5d6c600	 /* C'EOF' */
+.L_hdr: .long	0xc8c4d900	 /* C'HDR' */
+	.balign	8
+.Lcpuid:.fill	8,1,0
+
+#
+# normal startup-code, running in absolute addressing mode
+# this is called either by the ipl loader or directly by PSW restart
+# or linload or SALIPL
+#
+	.org	STARTUP_NORMAL_OFFSET - IPL_START
+SYM_CODE_START(startup)
+	j	startup_normal
+	.org	EP_OFFSET - IPL_START
+#
+# This is a list of s390 kernel entry points. At address 0x1000f the number of
+# valid entry points is stored.
+#
+# IMPORTANT: Do not change this table, it is s390 kernel ABI!
+#
+	.ascii	EP_STRING
+	.byte	0x00,0x01
+#
+# kdump startup-code, running in 64 bit absolute addressing mode
+#
+	.org	STARTUP_KDUMP_OFFSET - IPL_START
+	j	startup_kdump
+SYM_CODE_END(startup)
+SYM_CODE_START_LOCAL(startup_normal)
+	mvi	__LC_AR_MODE_ID,1	# set esame flag
+	slr	%r0,%r0 		# set cpuid to zero
+	lhi	%r1,2			# mode 2 = esame (dump)
+	sigp	%r1,%r0,0x12		# switch to esame mode
+	bras	%r13,0f
+	.fill	16,4,0x0
+0:	lmh	%r0,%r15,0(%r13)	# clear high-order half of gprs
+	sam64				# switch to 64 bit addressing mode
+	larl	%r13,.Lext_new_psw
+	mvc	__LC_EXT_NEW_PSW(16),0(%r13)
+	larl	%r13,.Lpgm_new_psw
+	mvc	__LC_PGM_NEW_PSW(16),0(%r13)
+	larl	%r13,.Lio_new_psw
+	mvc	__LC_IO_NEW_PSW(16),0(%r13)
+	xc	0x200(256),0x200	# partially clear lowcore
+	xc	0x300(256),0x300
+	xc	0xe00(256),0xe00
+	xc	0xf00(256),0xf00
+	larl	%r13,.Lctl
+	lctlg	%c0,%c15,0(%r13)	# load control registers
+	stcke	__LC_BOOT_CLOCK
+	mvc	__LC_LAST_UPDATE_CLOCK(8),__LC_BOOT_CLOCK+1
+	larl	%r13,6f
+	spt	0(%r13)
+	mvc	__LC_LAST_UPDATE_TIMER(8),0(%r13)
+	larl	%r15,_stack_end-STACK_FRAME_OVERHEAD
+	brasl	%r14,sclp_early_setup_buffer
+	brasl	%r14,verify_facilities
+	brasl	%r14,startup_kernel
+SYM_CODE_END(startup_normal)
+
+	.balign	8
+6:	.long	0x7fffffff,0xffffffff
+.Lext_new_psw:
+	.quad	0x0002000180000000,0x1b0	# disabled wait
+.Lpgm_new_psw:
+	.quad	0x0000000180000000,startup_pgm_check_handler
+.Lio_new_psw:
+	.quad	0x0002000180000000,0x1f0	# disabled wait
+.Lctl:	.quad	0x04040000		# cr0: AFP registers & secondary space
+	.quad	0			# cr1: primary space segment table
+	.quad	0			# cr2: dispatchable unit control table
+	.quad	0			# cr3: instruction authorization
+	.quad	0xffff			# cr4: instruction authorization
+	.quad	0			# cr5: primary-aste origin
+	.quad	0			# cr6:	I/O interrupts
+	.quad	0			# cr7:	secondary space segment table
+	.quad	0x0000000000008000	# cr8:	access registers translation
+	.quad	0			# cr9:	tracing off
+	.quad	0			# cr10: tracing off
+	.quad	0			# cr11: tracing off
+	.quad	0			# cr12: tracing off
+	.quad	0			# cr13: home space segment table
+	.quad	0xc0000000		# cr14: machine check handling off
+	.quad	0			# cr15: linkage stack operations
+
+#include "head_kdump.S"
+
+#
+# This program check is active immediately after kernel start
+# and until early_pgm_check_handler is set in kernel/early.c
+# It simply saves general/control registers and psw in
+# the save area and does disabled wait with a faulty address.
+#
+SYM_CODE_START_LOCAL(startup_pgm_check_handler)
+	stmg	%r8,%r15,__LC_SAVE_AREA_SYNC
+	la	%r8,4095
+	stctg	%c0,%c15,__LC_CREGS_SAVE_AREA-4095(%r8)
+	stmg	%r0,%r7,__LC_GPREGS_SAVE_AREA-4095(%r8)
+	mvc	__LC_GPREGS_SAVE_AREA-4095+64(64,%r8),__LC_SAVE_AREA_SYNC
+	mvc	__LC_PSW_SAVE_AREA-4095(16,%r8),__LC_PGM_OLD_PSW
+	mvc	__LC_RETURN_PSW(16),__LC_PGM_OLD_PSW
+	ni	__LC_RETURN_PSW,0xfc	# remove IO and EX bits
+	ni	__LC_RETURN_PSW+1,0xfb	# remove MCHK bit
+	oi	__LC_RETURN_PSW+1,0x2	# set wait state bit
+	larl	%r9,.Lold_psw_disabled_wait
+	stg	%r9,__LC_PGM_NEW_PSW+8
+	larl	%r15,_dump_info_stack_end-STACK_FRAME_OVERHEAD
+	brasl	%r14,print_pgm_check_info
+.Lold_psw_disabled_wait:
+	la	%r8,4095
+	lmg	%r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r8)
+	lpswe	__LC_RETURN_PSW		# disabled wait
+SYM_CODE_END(startup_pgm_check_handler)
diff --git a/arch/s390/boot/head_kdump.S b/arch/s390/boot/head_kdump.S
new file mode 100644
index 0000000000..f7107c7625
--- /dev/null
+++ b/arch/s390/boot/head_kdump.S
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * S390 kdump lowlevel functions (new kernel)
+ *
+ * Copyright IBM Corp. 2011
+ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#include <asm/sigp.h>
+
+#define DATAMOVER_ADDR	0x4000
+#define COPY_PAGE_ADDR	0x6000
+
+#ifdef CONFIG_CRASH_DUMP
+
+#
+# kdump entry (new kernel - not yet relocated)
+#
+# Note: This code has to be position independent
+#
+
+SYM_CODE_START_LOCAL(startup_kdump)
+	lhi	%r1,2				# mode 2 = esame (dump)
+	sigp	%r1,%r0,SIGP_SET_ARCHITECTURE	# Switch to esame mode
+	sam64					# Switch to 64 bit addressing
+	basr	%r13,0
+.Lbase:
+	larl	%r2,.Lbase_addr			# Check, if we have been
+	lg	%r2,0(%r2)			# already relocated:
+	clgr	%r2,%r13			#
+	jne	.Lrelocate			# No : Start data mover
+	lghi	%r2,0				# Yes: Start kdump kernel
+	brasl	%r14,startup_kdump_relocated
+
+.Lrelocate:
+	larl	%r4,startup
+	lg	%r2,0x418(%r4)			# Get kdump base
+	lg	%r3,0x420(%r4)			# Get kdump size
+
+	larl	%r10,.Lcopy_start		# Source of data mover
+	lghi	%r8,DATAMOVER_ADDR		# Target of data mover
+	mvc	0(256,%r8),0(%r10)		# Copy data mover code
+
+	agr	%r8,%r2				# Copy data mover to
+	mvc	0(256,%r8),0(%r10)		# reserved mem
+
+	lghi	%r14,DATAMOVER_ADDR		# Jump to copied data mover
+	basr	%r14,%r14
+.Lbase_addr:
+	.quad	.Lbase
+
+#
+# kdump data mover code (runs at address DATAMOVER_ADDR)
+#
+# r2: kdump base address
+# r3: kdump size
+#
+.Lcopy_start:
+	basr	%r13,0				# Base
+0:
+	lgr	%r11,%r2			# Save kdump base address
+	lgr	%r12,%r2
+	agr	%r12,%r3			# Compute kdump end address
+
+	lghi	%r5,0
+	lghi	%r10,COPY_PAGE_ADDR		# Load copy page address
+1:
+	mvc	0(256,%r10),0(%r5)		# Copy old kernel to tmp
+	mvc	0(256,%r5),0(%r11)		# Copy new kernel to old
+	mvc	0(256,%r11),0(%r10)		# Copy tmp to new
+	aghi	%r11,256
+	aghi	%r5,256
+	clgr	%r11,%r12
+	jl	1b
+
+	lg	%r14,.Lstartup_kdump-0b(%r13)
+	basr	%r14,%r14			# Start relocated kernel
+.Lstartup_kdump:
+	.long	0x00000000,0x00000000 + startup_kdump_relocated
+.Lcopy_end:
+
+#
+# Startup of kdump (relocated new kernel)
+#
+	.balign	2
+startup_kdump_relocated:
+	basr	%r13,0
+0:	lpswe	.Lrestart_psw-0b(%r13)		# Start new kernel...
+SYM_CODE_END(startup_kdump)
+	.balign	8
+.Lrestart_psw:
+	.quad	0x0000000080000000,0x0000000000000000 + startup
+#else
+SYM_CODE_START_LOCAL(startup_kdump)
+	larl	%r13,startup_kdump_crash
+	lpswe	0(%r13)
+SYM_CODE_END(startup_kdump)
+	.balign	8
+startup_kdump_crash:
+	.quad	0x0002000080000000,0x0000000000000000 + startup_kdump_crash
+#endif /* CONFIG_CRASH_DUMP */
diff --git a/arch/s390/boot/install.sh b/arch/s390/boot/install.sh
new file mode 100755
index 0000000000..a13dd2f2aa
--- /dev/null
+++ b/arch/s390/boot/install.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# arch/s390x/boot/install.sh
+#
+# Copyright (C) 1995 by Linus Torvalds
+#
+# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin
+#
+# "make install" script for s390 architecture
+#
+# Arguments:
+#   $1 - kernel version
+#   $2 - kernel image file
+#   $3 - kernel map file
+#   $4 - default install path (blank if root directory)
+
+echo "Warning: '${INSTALLKERNEL}' command not available - additional " \
+     "bootloader config required" >&2
+if [ -f "$4/vmlinuz-$1" ]; then mv -- "$4/vmlinuz-$1" "$4/vmlinuz-$1.old"; fi
+if [ -f "$4/System.map-$1" ]; then mv -- "$4/System.map-$1" "$4/System.map-$1.old"; fi
+
+cat -- "$2" > "$4/vmlinuz-$1"
+cp -- "$3" "$4/System.map-$1"
diff --git a/arch/s390/boot/ipl_data.c b/arch/s390/boot/ipl_data.c
new file mode 100644
index 0000000000..0846e2b249
--- /dev/null
+++ b/arch/s390/boot/ipl_data.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/compat.h>
+#include <linux/ptrace.h>
+#include <asm/cio.h>
+#include <asm/asm-offsets.h>
+#include "boot.h"
+
+#define CCW0(cmd, addr, cnt, flg) \
+	{ .cmd_code = cmd, .cda = addr, .count = cnt, .flags = flg, }
+
+#define PSW_MASK_DISABLED (PSW_MASK_WAIT | PSW_MASK_EA | PSW_MASK_BA)
+
+struct ipl_lowcore {
+	psw_t32		ipl_psw;			/* 0x0000 */
+	struct ccw0	ccwpgm[2];			/* 0x0008 */
+	u8		fill[56];			/* 0x0018 */
+	struct ccw0	ccwpgmcc[20];			/* 0x0050 */
+	u8		pad_0xf0[0x01a0-0x00f0];	/* 0x00f0 */
+	psw_t		restart_psw;			/* 0x01a0 */
+	psw_t		external_new_psw;		/* 0x01b0 */
+	psw_t		svc_new_psw;			/* 0x01c0 */
+	psw_t		program_new_psw;		/* 0x01d0 */
+	psw_t		mcck_new_psw;			/* 0x01e0 */
+	psw_t		io_new_psw;			/* 0x01f0 */
+};
+
+/*
+ * Initial lowcore for IPL: the first 24 bytes are loaded by IPL to
+ * addresses 0-23 (a PSW and two CCWs). Bytes 24-79 are discarded.
+ * The next 160 bytes are loaded to addresses 0x18-0xb7. They form
+ * the continuation of the CCW program started by IPL and load the
+ * range 0x0f0-0x730 from the image to the range 0x0f0-0x730 in
+ * memory. At the end of the channel program the PSW at location 0 is
+ * loaded.
+ * Initial processing starts at 0x200 = iplstart.
+ *
+ * The restart psw points to iplstart which allows to load a kernel
+ * image into memory and starting it by a psw restart on any cpu. All
+ * other default psw new locations contain a disabled wait psw where
+ * the address indicates which psw was loaded.
+ *
+ * Note that the 'file' utility can detect s390 kernel images. For
+ * that to succeed the two initial CCWs, and the 0x40 fill bytes must
+ * be present.
+ */
+static struct ipl_lowcore ipl_lowcore __used __section(".ipldata") = {
+	.ipl_psw = { .mask = PSW32_MASK_BASE, .addr = PSW32_ADDR_AMODE | IPL_START },
+	.ccwpgm = {
+		[ 0] = CCW0(CCW_CMD_READ_IPL, 0x018, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+		[ 1] = CCW0(CCW_CMD_READ_IPL, 0x068, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+	},
+	.fill = {
+		[ 0 ... 55] = 0x40,
+	},
+	.ccwpgmcc = {
+		[ 0] = CCW0(CCW_CMD_READ_IPL, 0x0f0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+		[ 1] = CCW0(CCW_CMD_READ_IPL, 0x140, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+		[ 2] = CCW0(CCW_CMD_READ_IPL, 0x190, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+		[ 3] = CCW0(CCW_CMD_READ_IPL, 0x1e0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+		[ 4] = CCW0(CCW_CMD_READ_IPL, 0x230, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+		[ 5] = CCW0(CCW_CMD_READ_IPL, 0x280, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+		[ 6] = CCW0(CCW_CMD_READ_IPL, 0x2d0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+		[ 7] = CCW0(CCW_CMD_READ_IPL, 0x320, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+		[ 8] = CCW0(CCW_CMD_READ_IPL, 0x370, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+		[ 9] = CCW0(CCW_CMD_READ_IPL, 0x3c0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+		[10] = CCW0(CCW_CMD_READ_IPL, 0x410, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+		[11] = CCW0(CCW_CMD_READ_IPL, 0x460, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+		[12] = CCW0(CCW_CMD_READ_IPL, 0x4b0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+		[13] = CCW0(CCW_CMD_READ_IPL, 0x500, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+		[14] = CCW0(CCW_CMD_READ_IPL, 0x550, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+		[15] = CCW0(CCW_CMD_READ_IPL, 0x5a0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+		[16] = CCW0(CCW_CMD_READ_IPL, 0x5f0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+		[17] = CCW0(CCW_CMD_READ_IPL, 0x640, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+		[18] = CCW0(CCW_CMD_READ_IPL, 0x690, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+		[19] = CCW0(CCW_CMD_READ_IPL, 0x6e0, 0x50, CCW_FLAG_SLI),
+	},
+	.restart_psw	  = { .mask = 0, .addr = IPL_START, },
+	.external_new_psw = { .mask = PSW_MASK_DISABLED, .addr = __LC_EXT_NEW_PSW, },
+	.svc_new_psw	  = { .mask = PSW_MASK_DISABLED, .addr = __LC_SVC_NEW_PSW, },
+	.program_new_psw  = { .mask = PSW_MASK_DISABLED, .addr = __LC_PGM_NEW_PSW, },
+	.mcck_new_psw	  = { .mask = PSW_MASK_DISABLED, .addr = __LC_MCK_NEW_PSW, },
+	.io_new_psw	  = { .mask = PSW_MASK_DISABLED, .addr = __LC_IO_NEW_PSW, },
+};
diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c
new file mode 100644
index 0000000000..7b75217626
--- /dev/null
+++ b/arch/s390/boot/ipl_parm.c
@@ -0,0 +1,306 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/ctype.h>
+#include <linux/pgtable.h>
+#include <asm/ebcdic.h>
+#include <asm/sclp.h>
+#include <asm/sections.h>
+#include <asm/boot_data.h>
+#include <asm/facility.h>
+#include <asm/setup.h>
+#include <asm/uv.h>
+#include "boot.h"
+
+struct parmarea parmarea __section(".parmarea") = {
+	.kernel_version		= (unsigned long)kernel_version,
+	.max_command_line_size	= COMMAND_LINE_SIZE,
+	.command_line		= "root=/dev/ram0 ro",
+};
+
+char __bootdata(early_command_line)[COMMAND_LINE_SIZE];
+
+unsigned int __bootdata_preserved(zlib_dfltcc_support) = ZLIB_DFLTCC_FULL;
+struct ipl_parameter_block __bootdata_preserved(ipl_block);
+int __bootdata_preserved(ipl_block_valid);
+int __bootdata_preserved(__kaslr_enabled);
+
+unsigned long vmalloc_size = VMALLOC_DEFAULT_SIZE;
+unsigned long memory_limit;
+int vmalloc_size_set;
+
+static inline int __diag308(unsigned long subcode, void *addr)
+{
+	unsigned long reg1, reg2;
+	union register_pair r1;
+	psw_t old;
+
+	r1.even = (unsigned long) addr;
+	r1.odd	= 0;
+	asm volatile(
+		"	mvc	0(16,%[psw_old]),0(%[psw_pgm])\n"
+		"	epsw	%[reg1],%[reg2]\n"
+		"	st	%[reg1],0(%[psw_pgm])\n"
+		"	st	%[reg2],4(%[psw_pgm])\n"
+		"	larl	%[reg1],1f\n"
+		"	stg	%[reg1],8(%[psw_pgm])\n"
+		"	diag	%[r1],%[subcode],0x308\n"
+		"1:	mvc	0(16,%[psw_pgm]),0(%[psw_old])\n"
+		: [r1] "+&d" (r1.pair),
+		  [reg1] "=&d" (reg1),
+		  [reg2] "=&a" (reg2),
+		  "+Q" (S390_lowcore.program_new_psw),
+		  "=Q" (old)
+		: [subcode] "d" (subcode),
+		  [psw_old] "a" (&old),
+		  [psw_pgm] "a" (&S390_lowcore.program_new_psw)
+		: "cc", "memory");
+	return r1.odd;
+}
+
+void store_ipl_parmblock(void)
+{
+	int rc;
+
+	rc = __diag308(DIAG308_STORE, &ipl_block);
+	if (rc == DIAG308_RC_OK &&
+	    ipl_block.hdr.version <= IPL_MAX_SUPPORTED_VERSION)
+		ipl_block_valid = 1;
+}
+
+bool is_ipl_block_dump(void)
+{
+	if (ipl_block.pb0_hdr.pbt == IPL_PBT_FCP &&
+	    ipl_block.fcp.opt == IPL_PB0_FCP_OPT_DUMP)
+		return true;
+	if (ipl_block.pb0_hdr.pbt == IPL_PBT_NVME &&
+	    ipl_block.nvme.opt == IPL_PB0_NVME_OPT_DUMP)
+		return true;
+	if (ipl_block.pb0_hdr.pbt == IPL_PBT_ECKD &&
+	    ipl_block.eckd.opt == IPL_PB0_ECKD_OPT_DUMP)
+		return true;
+	return false;
+}
+
+static size_t scpdata_length(const u8 *buf, size_t count)
+{
+	while (count) {
+		if (buf[count - 1] != '\0' && buf[count - 1] != ' ')
+			break;
+		count--;
+	}
+	return count;
+}
+
+static size_t ipl_block_get_ascii_scpdata(char *dest, size_t size,
+					  const struct ipl_parameter_block *ipb)
+{
+	const __u8 *scp_data;
+	__u32 scp_data_len;
+	int has_lowercase;
+	size_t count = 0;
+	size_t i;
+
+	switch (ipb->pb0_hdr.pbt) {
+	case IPL_PBT_FCP:
+		scp_data_len = ipb->fcp.scp_data_len;
+		scp_data = ipb->fcp.scp_data;
+		break;
+	case IPL_PBT_NVME:
+		scp_data_len = ipb->nvme.scp_data_len;
+		scp_data = ipb->nvme.scp_data;
+		break;
+	case IPL_PBT_ECKD:
+		scp_data_len = ipb->eckd.scp_data_len;
+		scp_data = ipb->eckd.scp_data;
+		break;
+
+	default:
+		goto out;
+	}
+
+	count = min(size - 1, scpdata_length(scp_data, scp_data_len));
+	if (!count)
+		goto out;
+
+	has_lowercase = 0;
+	for (i = 0; i < count; i++) {
+		if (!isascii(scp_data[i])) {
+			count = 0;
+			goto out;
+		}
+		if (!has_lowercase && islower(scp_data[i]))
+			has_lowercase = 1;
+	}
+
+	if (has_lowercase)
+		memcpy(dest, scp_data, count);
+	else
+		for (i = 0; i < count; i++)
+			dest[i] = tolower(scp_data[i]);
+out:
+	dest[count] = '\0';
+	return count;
+}
+
+static void append_ipl_block_parm(void)
+{
+	char *parm, *delim;
+	size_t len, rc = 0;
+
+	len = strlen(early_command_line);
+
+	delim = early_command_line + len;    /* '\0' character position */
+	parm = early_command_line + len + 1; /* append right after '\0' */
+
+	switch (ipl_block.pb0_hdr.pbt) {
+	case IPL_PBT_CCW:
+		rc = ipl_block_get_ascii_vmparm(
+			parm, COMMAND_LINE_SIZE - len - 1, &ipl_block);
+		break;
+	case IPL_PBT_FCP:
+	case IPL_PBT_NVME:
+	case IPL_PBT_ECKD:
+		rc = ipl_block_get_ascii_scpdata(
+			parm, COMMAND_LINE_SIZE - len - 1, &ipl_block);
+		break;
+	}
+	if (rc) {
+		if (*parm == '=')
+			memmove(early_command_line, parm + 1, rc);
+		else
+			*delim = ' '; /* replace '\0' with space */
+	}
+}
+
+static inline int has_ebcdic_char(const char *str)
+{
+	int i;
+
+	for (i = 0; str[i]; i++)
+		if (str[i] & 0x80)
+			return 1;
+	return 0;
+}
+
+void setup_boot_command_line(void)
+{
+	parmarea.command_line[COMMAND_LINE_SIZE - 1] = 0;
+	/* convert arch command line to ascii if necessary */
+	if (has_ebcdic_char(parmarea.command_line))
+		EBCASC(parmarea.command_line, COMMAND_LINE_SIZE);
+	/* copy arch command line */
+	strcpy(early_command_line, strim(parmarea.command_line));
+
+	/* append IPL PARM data to the boot command line */
+	if (!is_prot_virt_guest() && ipl_block_valid)
+		append_ipl_block_parm();
+}
+
+static void modify_facility(unsigned long nr, bool clear)
+{
+	if (clear)
+		__clear_facility(nr, stfle_fac_list);
+	else
+		__set_facility(nr, stfle_fac_list);
+}
+
+static void check_cleared_facilities(void)
+{
+	unsigned long als[] = { FACILITIES_ALS };
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(als); i++) {
+		if ((stfle_fac_list[i] & als[i]) != als[i]) {
+			sclp_early_printk("Warning: The Linux kernel requires facilities cleared via command line option\n");
+			print_missing_facilities();
+			break;
+		}
+	}
+}
+
+static void modify_fac_list(char *str)
+{
+	unsigned long val, endval;
+	char *endp;
+	bool clear;
+
+	while (*str) {
+		clear = false;
+		if (*str == '!') {
+			clear = true;
+			str++;
+		}
+		val = simple_strtoull(str, &endp, 0);
+		if (str == endp)
+			break;
+		str = endp;
+		if (*str == '-') {
+			str++;
+			endval = simple_strtoull(str, &endp, 0);
+			if (str == endp)
+				break;
+			str = endp;
+			while (val <= endval) {
+				modify_facility(val, clear);
+				val++;
+			}
+		} else {
+			modify_facility(val, clear);
+		}
+		if (*str != ',')
+			break;
+		str++;
+	}
+	check_cleared_facilities();
+}
+
+static char command_line_buf[COMMAND_LINE_SIZE];
+void parse_boot_command_line(void)
+{
+	char *param, *val;
+	bool enabled;
+	char *args;
+	int rc;
+
+	__kaslr_enabled = IS_ENABLED(CONFIG_RANDOMIZE_BASE);
+	args = strcpy(command_line_buf, early_command_line);
+	while (*args) {
+		args = next_arg(args, &param, &val);
+
+		if (!strcmp(param, "mem") && val)
+			memory_limit = round_down(memparse(val, NULL), PAGE_SIZE);
+
+		if (!strcmp(param, "vmalloc") && val) {
+			vmalloc_size = round_up(memparse(val, NULL), PAGE_SIZE);
+			vmalloc_size_set = 1;
+		}
+
+		if (!strcmp(param, "dfltcc") && val) {
+			if (!strcmp(val, "off"))
+				zlib_dfltcc_support = ZLIB_DFLTCC_DISABLED;
+			else if (!strcmp(val, "on"))
+				zlib_dfltcc_support = ZLIB_DFLTCC_FULL;
+			else if (!strcmp(val, "def_only"))
+				zlib_dfltcc_support = ZLIB_DFLTCC_DEFLATE_ONLY;
+			else if (!strcmp(val, "inf_only"))
+				zlib_dfltcc_support = ZLIB_DFLTCC_INFLATE_ONLY;
+			else if (!strcmp(val, "always"))
+				zlib_dfltcc_support = ZLIB_DFLTCC_FULL_DEBUG;
+		}
+
+		if (!strcmp(param, "facilities") && val)
+			modify_fac_list(val);
+
+		if (!strcmp(param, "nokaslr"))
+			__kaslr_enabled = 0;
+
+#if IS_ENABLED(CONFIG_KVM)
+		if (!strcmp(param, "prot_virt")) {
+			rc = kstrtobool(val, &enabled);
+			if (!rc && enabled)
+				prot_virt_host = 1;
+		}
+#endif
+	}
+}
diff --git a/arch/s390/boot/ipl_report.c b/arch/s390/boot/ipl_report.c
new file mode 100644
index 0000000000..1803035e68
--- /dev/null
+++ b/arch/s390/boot/ipl_report.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/init.h>
+#include <linux/ctype.h>
+#include <asm/ebcdic.h>
+#include <asm/sclp.h>
+#include <asm/sections.h>
+#include <asm/boot_data.h>
+#include <asm/physmem_info.h>
+#include <uapi/asm/ipl.h>
+#include "boot.h"
+
+int __bootdata_preserved(ipl_secure_flag);
+
+unsigned long __bootdata_preserved(ipl_cert_list_addr);
+unsigned long __bootdata_preserved(ipl_cert_list_size);
+
+unsigned long __bootdata(early_ipl_comp_list_addr);
+unsigned long __bootdata(early_ipl_comp_list_size);
+
+static struct ipl_rb_certificates *certs;
+static struct ipl_rb_components *comps;
+static bool ipl_report_needs_saving;
+
+#define for_each_rb_entry(entry, rb) \
+	for (entry = rb->entries; \
+	     (void *) entry + sizeof(*entry) <= (void *) rb + rb->len; \
+	     entry++)
+
+static unsigned long get_cert_comp_list_size(void)
+{
+	struct ipl_rb_certificate_entry *cert;
+	struct ipl_rb_component_entry *comp;
+	size_t size;
+
+	/*
+	 * Find the length for the IPL report boot data
+	 */
+	early_ipl_comp_list_size = 0;
+	for_each_rb_entry(comp, comps)
+		early_ipl_comp_list_size += sizeof(*comp);
+	ipl_cert_list_size = 0;
+	for_each_rb_entry(cert, certs)
+		ipl_cert_list_size += sizeof(unsigned int) + cert->len;
+	return ipl_cert_list_size + early_ipl_comp_list_size;
+}
+
+bool ipl_report_certs_intersects(unsigned long addr, unsigned long size,
+				 unsigned long *intersection_start)
+{
+	struct ipl_rb_certificate_entry *cert;
+
+	if (!ipl_report_needs_saving)
+		return false;
+
+	for_each_rb_entry(cert, certs) {
+		if (intersects(addr, size, cert->addr, cert->len)) {
+			*intersection_start = cert->addr;
+			return true;
+		}
+	}
+	return false;
+}
+
+static void copy_components_bootdata(void)
+{
+	struct ipl_rb_component_entry *comp, *ptr;
+
+	ptr = (struct ipl_rb_component_entry *) early_ipl_comp_list_addr;
+	for_each_rb_entry(comp, comps)
+		memcpy(ptr++, comp, sizeof(*ptr));
+}
+
+static void copy_certificates_bootdata(void)
+{
+	struct ipl_rb_certificate_entry *cert;
+	void *ptr;
+
+	ptr = (void *) ipl_cert_list_addr;
+	for_each_rb_entry(cert, certs) {
+		*(unsigned int *) ptr = cert->len;
+		ptr += sizeof(unsigned int);
+		memcpy(ptr, (void *) cert->addr, cert->len);
+		ptr += cert->len;
+	}
+}
+
+int read_ipl_report(void)
+{
+	struct ipl_pl_hdr *pl_hdr;
+	struct ipl_rl_hdr *rl_hdr;
+	struct ipl_rb_hdr *rb_hdr;
+	unsigned long tmp;
+	void *rl_end;
+
+	/*
+	 * Check if there is a IPL report by looking at the copy
+	 * of the IPL parameter information block.
+	 */
+	if (!ipl_block_valid ||
+	    !(ipl_block.hdr.flags & IPL_PL_FLAG_IPLSR))
+		return -1;
+	ipl_secure_flag = !!(ipl_block.hdr.flags & IPL_PL_FLAG_SIPL);
+	/*
+	 * There is an IPL report, to find it load the pointer to the
+	 * IPL parameter information block from lowcore and skip past
+	 * the IPL parameter list, then align the address to a double
+	 * word boundary.
+	 */
+	tmp = (unsigned long) S390_lowcore.ipl_parmblock_ptr;
+	pl_hdr = (struct ipl_pl_hdr *) tmp;
+	tmp = (tmp + pl_hdr->len + 7) & -8UL;
+	rl_hdr = (struct ipl_rl_hdr *) tmp;
+	/* Walk through the IPL report blocks in the IPL Report list */
+	certs = NULL;
+	comps = NULL;
+	rl_end = (void *) rl_hdr + rl_hdr->len;
+	rb_hdr = (void *) rl_hdr + sizeof(*rl_hdr);
+	while ((void *) rb_hdr + sizeof(*rb_hdr) < rl_end &&
+	       (void *) rb_hdr + rb_hdr->len <= rl_end) {
+
+		switch (rb_hdr->rbt) {
+		case IPL_RBT_CERTIFICATES:
+			certs = (struct ipl_rb_certificates *) rb_hdr;
+			break;
+		case IPL_RBT_COMPONENTS:
+			comps = (struct ipl_rb_components *) rb_hdr;
+			break;
+		default:
+			break;
+		}
+
+		rb_hdr = (void *) rb_hdr + rb_hdr->len;
+	}
+
+	/*
+	 * With either the component list or the certificate list
+	 * missing the kernel will stay ignorant of secure IPL.
+	 */
+	if (!comps || !certs) {
+		certs = NULL;
+		return -1;
+	}
+
+	ipl_report_needs_saving = true;
+	physmem_reserve(RR_IPLREPORT, (unsigned long)pl_hdr,
+			(unsigned long)rl_end - (unsigned long)pl_hdr);
+	return 0;
+}
+
+void save_ipl_cert_comp_list(void)
+{
+	unsigned long size;
+
+	if (!ipl_report_needs_saving)
+		return;
+
+	size = get_cert_comp_list_size();
+	early_ipl_comp_list_addr = physmem_alloc_top_down(RR_CERT_COMP_LIST, size, sizeof(int));
+	ipl_cert_list_addr = early_ipl_comp_list_addr + early_ipl_comp_list_size;
+
+	copy_components_bootdata();
+	copy_certificates_bootdata();
+	physmem_free(RR_IPLREPORT);
+	ipl_report_needs_saving = false;
+}
diff --git a/arch/s390/boot/ipl_vmparm.c b/arch/s390/boot/ipl_vmparm.c
new file mode 100644
index 0000000000..8dacd5fadf
--- /dev/null
+++ b/arch/s390/boot/ipl_vmparm.c
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../kernel/ipl_vmparm.c"
diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c
new file mode 100644
index 0000000000..90602101e2
--- /dev/null
+++ b/arch/s390/boot/kaslr.c
@@ -0,0 +1,198 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2019
+ */
+#include <linux/pgtable.h>
+#include <asm/physmem_info.h>
+#include <asm/cpacf.h>
+#include <asm/timex.h>
+#include <asm/sclp.h>
+#include <asm/kasan.h>
+#include "decompressor.h"
+#include "boot.h"
+
+#define PRNG_MODE_TDES	 1
+#define PRNG_MODE_SHA512 2
+#define PRNG_MODE_TRNG	 3
+
+struct prno_parm {
+	u32 res;
+	u32 reseed_counter;
+	u64 stream_bytes;
+	u8  V[112];
+	u8  C[112];
+};
+
+struct prng_parm {
+	u8  parm_block[32];
+	u32 reseed_counter;
+	u64 byte_counter;
+};
+
+static int check_prng(void)
+{
+	if (!cpacf_query_func(CPACF_KMC, CPACF_KMC_PRNG)) {
+		sclp_early_printk("KASLR disabled: CPU has no PRNG\n");
+		return 0;
+	}
+	if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG))
+		return PRNG_MODE_TRNG;
+	if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_SHA512_DRNG_GEN))
+		return PRNG_MODE_SHA512;
+	else
+		return PRNG_MODE_TDES;
+}
+
+static int get_random(unsigned long limit, unsigned long *value)
+{
+	struct prng_parm prng = {
+		/* initial parameter block for tdes mode, copied from libica */
+		.parm_block = {
+			0x0F, 0x2B, 0x8E, 0x63, 0x8C, 0x8E, 0xD2, 0x52,
+			0x64, 0xB7, 0xA0, 0x7B, 0x75, 0x28, 0xB8, 0xF4,
+			0x75, 0x5F, 0xD2, 0xA6, 0x8D, 0x97, 0x11, 0xFF,
+			0x49, 0xD8, 0x23, 0xF3, 0x7E, 0x21, 0xEC, 0xA0
+		},
+	};
+	unsigned long seed, random;
+	struct prno_parm prno;
+	__u64 entropy[4];
+	int mode, i;
+
+	mode = check_prng();
+	seed = get_tod_clock_fast();
+	switch (mode) {
+	case PRNG_MODE_TRNG:
+		cpacf_trng(NULL, 0, (u8 *) &random, sizeof(random));
+		break;
+	case PRNG_MODE_SHA512:
+		cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED, &prno, NULL, 0,
+			   (u8 *) &seed, sizeof(seed));
+		cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN, &prno, (u8 *) &random,
+			   sizeof(random), NULL, 0);
+		break;
+	case PRNG_MODE_TDES:
+		/* add entropy */
+		*(unsigned long *) prng.parm_block ^= seed;
+		for (i = 0; i < 16; i++) {
+			cpacf_kmc(CPACF_KMC_PRNG, prng.parm_block,
+				  (u8 *) entropy, (u8 *) entropy,
+				  sizeof(entropy));
+			memcpy(prng.parm_block, entropy, sizeof(entropy));
+		}
+		random = seed;
+		cpacf_kmc(CPACF_KMC_PRNG, prng.parm_block, (u8 *) &random,
+			  (u8 *) &random, sizeof(random));
+		break;
+	default:
+		return -1;
+	}
+	*value = random % limit;
+	return 0;
+}
+
+static void sort_reserved_ranges(struct reserved_range *res, unsigned long size)
+{
+	struct reserved_range tmp;
+	int i, j;
+
+	for (i = 1; i < size; i++) {
+		tmp = res[i];
+		for (j = i - 1; j >= 0 && res[j].start > tmp.start; j--)
+			res[j + 1] = res[j];
+		res[j + 1] = tmp;
+	}
+}
+
+static unsigned long iterate_valid_positions(unsigned long size, unsigned long align,
+					     unsigned long _min, unsigned long _max,
+					     struct reserved_range *res, size_t res_count,
+					     bool pos_count, unsigned long find_pos)
+{
+	unsigned long start, end, tmp_end, range_pos, pos = 0;
+	struct reserved_range *res_end = res + res_count;
+	struct reserved_range *skip_res;
+	int i;
+
+	align = max(align, 8UL);
+	_min = round_up(_min, align);
+	for_each_physmem_usable_range(i, &start, &end) {
+		if (_min >= end)
+			continue;
+		start = round_up(start, align);
+		if (start >= _max)
+			break;
+		start = max(_min, start);
+		end = min(_max, end);
+
+		while (start + size <= end) {
+			/* skip reserved ranges below the start */
+			while (res && res->end <= start) {
+				res++;
+				if (res >= res_end)
+					res = NULL;
+			}
+			skip_res = NULL;
+			tmp_end = end;
+			/* has intersecting reserved range */
+			if (res && res->start < end) {
+				skip_res = res;
+				tmp_end = res->start;
+			}
+			if (start + size <= tmp_end) {
+				range_pos = (tmp_end - start - size) / align + 1;
+				if (pos_count) {
+					pos += range_pos;
+				} else {
+					if (range_pos >= find_pos)
+						return start + (find_pos - 1) * align;
+					find_pos -= range_pos;
+				}
+			}
+			if (!skip_res)
+				break;
+			start = round_up(skip_res->end, align);
+		}
+	}
+
+	return pos_count ? pos : 0;
+}
+
+/*
+ * Two types of decompressor memory allocations/reserves are considered
+ * differently.
+ *
+ * "Static" or "single" allocations are done via physmem_alloc_range() and
+ * physmem_reserve(), and they are listed in physmem_info.reserved[]. Each
+ * type of "static" allocation can only have one allocation per type and
+ * cannot have chains.
+ *
+ * On the other hand, "dynamic" or "repetitive" allocations are done via
+ * physmem_alloc_top_down(). These allocations are tightly packed together
+ * top down from the end of online memory. physmem_alloc_pos represents
+ * current position where those allocations start.
+ *
+ * Functions randomize_within_range() and iterate_valid_positions()
+ * only consider "dynamic" allocations by never looking above
+ * physmem_alloc_pos. "Static" allocations, however, are explicitly
+ * considered by checking the "res" (reserves) array. The first
+ * reserved_range of a "dynamic" allocation may also be checked along the
+ * way, but it will always be above the maximum value anyway.
+ */
+unsigned long randomize_within_range(unsigned long size, unsigned long align,
+				     unsigned long min, unsigned long max)
+{
+	struct reserved_range res[RR_MAX];
+	unsigned long max_pos, pos;
+
+	memcpy(res, physmem_info.reserved, sizeof(res));
+	sort_reserved_ranges(res, ARRAY_SIZE(res));
+	max = min(max, get_physmem_alloc_pos());
+
+	max_pos = iterate_valid_positions(size, align, min, max, res, ARRAY_SIZE(res), true, 0);
+	if (!max_pos)
+		return 0;
+	if (get_random(max_pos, &pos))
+		return 0;
+	return iterate_valid_positions(size, align, min, max, res, ARRAY_SIZE(res), false, pos + 1);
+}
diff --git a/arch/s390/boot/machine_kexec_reloc.c b/arch/s390/boot/machine_kexec_reloc.c
new file mode 100644
index 0000000000..b7a5d0f720
--- /dev/null
+++ b/arch/s390/boot/machine_kexec_reloc.c
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../kernel/machine_kexec_reloc.c"
diff --git a/arch/s390/boot/mem.S b/arch/s390/boot/mem.S
new file mode 100644
index 0000000000..b33463633f
--- /dev/null
+++ b/arch/s390/boot/mem.S
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include "../lib/mem.S"
diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c
new file mode 100644
index 0000000000..97244cd7a2
--- /dev/null
+++ b/arch/s390/boot/pgm_check_info.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/stdarg.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <asm/stacktrace.h>
+#include <asm/boot_data.h>
+#include <asm/lowcore.h>
+#include <asm/setup.h>
+#include <asm/sclp.h>
+#include <asm/uv.h>
+#include "boot.h"
+
+const char hex_asc[] = "0123456789abcdef";
+
+static char *as_hex(char *dst, unsigned long val, int pad)
+{
+	char *p, *end = p = dst + max(pad, (int)__fls(val | 1) / 4 + 1);
+
+	for (*p-- = 0; p >= dst; val >>= 4)
+		*p-- = hex_asc[val & 0x0f];
+	return end;
+}
+
+static char *symstart(char *p)
+{
+	while (*p)
+		p--;
+	return p + 1;
+}
+
+static noinline char *findsym(unsigned long ip, unsigned short *off, unsigned short *len)
+{
+	/* symbol entries are in a form "10000 c4 startup\0" */
+	char *a = _decompressor_syms_start;
+	char *b = _decompressor_syms_end;
+	unsigned long start;
+	unsigned long size;
+	char *pivot;
+	char *endp;
+
+	while (a < b) {
+		pivot = symstart(a + (b - a) / 2);
+		start = simple_strtoull(pivot, &endp, 16);
+		size = simple_strtoull(endp + 1, &endp, 16);
+		if (ip < start) {
+			b = pivot;
+			continue;
+		}
+		if (ip > start + size) {
+			a = pivot + strlen(pivot) + 1;
+			continue;
+		}
+		*off = ip - start;
+		*len = size;
+		return endp + 1;
+	}
+	return NULL;
+}
+
+static noinline char *strsym(void *ip)
+{
+	static char buf[64];
+	unsigned short off;
+	unsigned short len;
+	char *p;
+
+	p = findsym((unsigned long)ip, &off, &len);
+	if (p) {
+		strncpy(buf, p, sizeof(buf));
+		/* reserve 15 bytes for offset/len in symbol+0x1234/0x1234 */
+		p = buf + strnlen(buf, sizeof(buf) - 15);
+		strcpy(p, "+0x");
+		p = as_hex(p + 3, off, 0);
+		strcpy(p, "/0x");
+		as_hex(p + 3, len, 0);
+	} else {
+		as_hex(buf, (unsigned long)ip, 16);
+	}
+	return buf;
+}
+
+void decompressor_printk(const char *fmt, ...)
+{
+	char buf[1024] = { 0 };
+	char *end = buf + sizeof(buf) - 1; /* make sure buf is 0 terminated */
+	unsigned long pad;
+	char *p = buf;
+	va_list args;
+
+	va_start(args, fmt);
+	for (; p < end && *fmt; fmt++) {
+		if (*fmt != '%') {
+			*p++ = *fmt;
+			continue;
+		}
+		pad = isdigit(*++fmt) ? simple_strtol(fmt, (char **)&fmt, 10) : 0;
+		switch (*fmt) {
+		case 's':
+			p = buf + strlcat(buf, va_arg(args, char *), sizeof(buf));
+			break;
+		case 'p':
+			if (*++fmt != 'S')
+				goto out;
+			p = buf + strlcat(buf, strsym(va_arg(args, void *)), sizeof(buf));
+			break;
+		case 'l':
+			if (*++fmt != 'x' || end - p <= max(sizeof(long) * 2, pad))
+				goto out;
+			p = as_hex(p, va_arg(args, unsigned long), pad);
+			break;
+		case 'x':
+			if (end - p <= max(sizeof(int) * 2, pad))
+				goto out;
+			p = as_hex(p, va_arg(args, unsigned int), pad);
+			break;
+		default:
+			goto out;
+		}
+	}
+out:
+	va_end(args);
+	sclp_early_printk(buf);
+}
+
+void print_stacktrace(unsigned long sp)
+{
+	struct stack_info boot_stack = { STACK_TYPE_TASK, (unsigned long)_stack_start,
+					 (unsigned long)_stack_end };
+	bool first = true;
+
+	decompressor_printk("Call Trace:\n");
+	while (!(sp & 0x7) && on_stack(&boot_stack, sp, sizeof(struct stack_frame))) {
+		struct stack_frame *sf = (struct stack_frame *)sp;
+
+		decompressor_printk(first ? "(sp:%016lx [<%016lx>] %pS)\n" :
+					    " sp:%016lx [<%016lx>] %pS\n",
+				    sp, sf->gprs[8], (void *)sf->gprs[8]);
+		if (sf->back_chain <= sp)
+			break;
+		sp = sf->back_chain;
+		first = false;
+	}
+}
+
+void print_pgm_check_info(void)
+{
+	unsigned long *gpregs = (unsigned long *)S390_lowcore.gpregs_save_area;
+	struct psw_bits *psw = &psw_bits(S390_lowcore.psw_save_area);
+
+	decompressor_printk("Linux version %s\n", kernel_version);
+	if (!is_prot_virt_guest() && early_command_line[0])
+		decompressor_printk("Kernel command line: %s\n", early_command_line);
+	decompressor_printk("Kernel fault: interruption code %04x ilc:%x\n",
+			    S390_lowcore.pgm_code, S390_lowcore.pgm_ilc >> 1);
+	if (kaslr_enabled())
+		decompressor_printk("Kernel random base: %lx\n", __kaslr_offset);
+	decompressor_printk("PSW : %016lx %016lx (%pS)\n",
+			    S390_lowcore.psw_save_area.mask,
+			    S390_lowcore.psw_save_area.addr,
+			    (void *)S390_lowcore.psw_save_area.addr);
+	decompressor_printk(
+		"      R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x P:%x AS:%x CC:%x PM:%x RI:%x EA:%x\n",
+		psw->per, psw->dat, psw->io, psw->ext, psw->key, psw->mcheck,
+		psw->wait, psw->pstate, psw->as, psw->cc, psw->pm, psw->ri,
+		psw->eaba);
+	decompressor_printk("GPRS: %016lx %016lx %016lx %016lx\n",
+			    gpregs[0], gpregs[1], gpregs[2], gpregs[3]);
+	decompressor_printk("      %016lx %016lx %016lx %016lx\n",
+			    gpregs[4], gpregs[5], gpregs[6], gpregs[7]);
+	decompressor_printk("      %016lx %016lx %016lx %016lx\n",
+			    gpregs[8], gpregs[9], gpregs[10], gpregs[11]);
+	decompressor_printk("      %016lx %016lx %016lx %016lx\n",
+			    gpregs[12], gpregs[13], gpregs[14], gpregs[15]);
+	print_stacktrace(S390_lowcore.gpregs_save_area[15]);
+	decompressor_printk("Last Breaking-Event-Address:\n");
+	decompressor_printk(" [<%016lx>] %pS\n", (unsigned long)S390_lowcore.pgm_last_break,
+			    (void *)S390_lowcore.pgm_last_break);
+}
diff --git a/arch/s390/boot/physmem_info.c b/arch/s390/boot/physmem_info.c
new file mode 100644
index 0000000000..0cf79826ee
--- /dev/null
+++ b/arch/s390/boot/physmem_info.c
@@ -0,0 +1,328 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/processor.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <asm/physmem_info.h>
+#include <asm/stacktrace.h>
+#include <asm/boot_data.h>
+#include <asm/sparsemem.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/sclp.h>
+#include <asm/uv.h>
+#include "decompressor.h"
+#include "boot.h"
+
+struct physmem_info __bootdata(physmem_info);
+static unsigned int physmem_alloc_ranges;
+static unsigned long physmem_alloc_pos;
+
+/* up to 256 storage elements, 1020 subincrements each */
+#define ENTRIES_EXTENDED_MAX						       \
+	(256 * (1020 / 2) * sizeof(struct physmem_range))
+
+static struct physmem_range *__get_physmem_range_ptr(u32 n)
+{
+	if (n < MEM_INLINED_ENTRIES)
+		return &physmem_info.online[n];
+	if (unlikely(!physmem_info.online_extended)) {
+		physmem_info.online_extended = (struct physmem_range *)physmem_alloc_range(
+			RR_MEM_DETECT_EXTENDED, ENTRIES_EXTENDED_MAX, sizeof(long), 0,
+			physmem_alloc_pos, true);
+	}
+	return &physmem_info.online_extended[n - MEM_INLINED_ENTRIES];
+}
+
+/*
+ * sequential calls to add_physmem_online_range with adjacent memory ranges
+ * are merged together into single memory range.
+ */
+void add_physmem_online_range(u64 start, u64 end)
+{
+	struct physmem_range *range;
+
+	if (physmem_info.range_count) {
+		range = __get_physmem_range_ptr(physmem_info.range_count - 1);
+		if (range->end == start) {
+			range->end = end;
+			return;
+		}
+	}
+
+	range = __get_physmem_range_ptr(physmem_info.range_count);
+	range->start = start;
+	range->end = end;
+	physmem_info.range_count++;
+}
+
+static int __diag260(unsigned long rx1, unsigned long rx2)
+{
+	unsigned long reg1, reg2, ry;
+	union register_pair rx;
+	psw_t old;
+	int rc;
+
+	rx.even = rx1;
+	rx.odd	= rx2;
+	ry = 0x10; /* storage configuration */
+	rc = -1;   /* fail */
+	asm volatile(
+		"	mvc	0(16,%[psw_old]),0(%[psw_pgm])\n"
+		"	epsw	%[reg1],%[reg2]\n"
+		"	st	%[reg1],0(%[psw_pgm])\n"
+		"	st	%[reg2],4(%[psw_pgm])\n"
+		"	larl	%[reg1],1f\n"
+		"	stg	%[reg1],8(%[psw_pgm])\n"
+		"	diag	%[rx],%[ry],0x260\n"
+		"	ipm	%[rc]\n"
+		"	srl	%[rc],28\n"
+		"1:	mvc	0(16,%[psw_pgm]),0(%[psw_old])\n"
+		: [reg1] "=&d" (reg1),
+		  [reg2] "=&a" (reg2),
+		  [rc] "+&d" (rc),
+		  [ry] "+&d" (ry),
+		  "+Q" (S390_lowcore.program_new_psw),
+		  "=Q" (old)
+		: [rx] "d" (rx.pair),
+		  [psw_old] "a" (&old),
+		  [psw_pgm] "a" (&S390_lowcore.program_new_psw)
+		: "cc", "memory");
+	return rc == 0 ? ry : -1;
+}
+
+static int diag260(void)
+{
+	int rc, i;
+
+	struct {
+		unsigned long start;
+		unsigned long end;
+	} storage_extents[8] __aligned(16); /* VM supports up to 8 extends */
+
+	memset(storage_extents, 0, sizeof(storage_extents));
+	rc = __diag260((unsigned long)storage_extents, sizeof(storage_extents));
+	if (rc == -1)
+		return -1;
+
+	for (i = 0; i < min_t(int, rc, ARRAY_SIZE(storage_extents)); i++)
+		add_physmem_online_range(storage_extents[i].start, storage_extents[i].end + 1);
+	return 0;
+}
+
+static int tprot(unsigned long addr)
+{
+	unsigned long reg1, reg2;
+	int rc = -EFAULT;
+	psw_t old;
+
+	asm volatile(
+		"	mvc	0(16,%[psw_old]),0(%[psw_pgm])\n"
+		"	epsw	%[reg1],%[reg2]\n"
+		"	st	%[reg1],0(%[psw_pgm])\n"
+		"	st	%[reg2],4(%[psw_pgm])\n"
+		"	larl	%[reg1],1f\n"
+		"	stg	%[reg1],8(%[psw_pgm])\n"
+		"	tprot	0(%[addr]),0\n"
+		"	ipm	%[rc]\n"
+		"	srl	%[rc],28\n"
+		"1:	mvc	0(16,%[psw_pgm]),0(%[psw_old])\n"
+		: [reg1] "=&d" (reg1),
+		  [reg2] "=&a" (reg2),
+		  [rc] "+&d" (rc),
+		  "=Q" (S390_lowcore.program_new_psw.addr),
+		  "=Q" (old)
+		: [psw_old] "a" (&old),
+		  [psw_pgm] "a" (&S390_lowcore.program_new_psw),
+		  [addr] "a" (addr)
+		: "cc", "memory");
+	return rc;
+}
+
+static unsigned long search_mem_end(void)
+{
+	unsigned long range = 1 << (MAX_PHYSMEM_BITS - 20); /* in 1MB blocks */
+	unsigned long offset = 0;
+	unsigned long pivot;
+
+	while (range > 1) {
+		range >>= 1;
+		pivot = offset + range;
+		if (!tprot(pivot << 20))
+			offset = pivot;
+	}
+	return (offset + 1) << 20;
+}
+
+unsigned long detect_max_physmem_end(void)
+{
+	unsigned long max_physmem_end = 0;
+
+	if (!sclp_early_get_memsize(&max_physmem_end)) {
+		physmem_info.info_source = MEM_DETECT_SCLP_READ_INFO;
+	} else {
+		max_physmem_end = search_mem_end();
+		physmem_info.info_source = MEM_DETECT_BIN_SEARCH;
+	}
+	return max_physmem_end;
+}
+
+void detect_physmem_online_ranges(unsigned long max_physmem_end)
+{
+	if (!sclp_early_read_storage_info()) {
+		physmem_info.info_source = MEM_DETECT_SCLP_STOR_INFO;
+	} else if (!diag260()) {
+		physmem_info.info_source = MEM_DETECT_DIAG260;
+	} else if (max_physmem_end) {
+		add_physmem_online_range(0, max_physmem_end);
+	}
+}
+
+void physmem_set_usable_limit(unsigned long limit)
+{
+	physmem_info.usable = limit;
+	physmem_alloc_pos = limit;
+}
+
+static void die_oom(unsigned long size, unsigned long align, unsigned long min, unsigned long max)
+{
+	unsigned long start, end, total_mem = 0, total_reserved_mem = 0;
+	struct reserved_range *range;
+	enum reserved_range_type t;
+	int i;
+
+	decompressor_printk("Linux version %s\n", kernel_version);
+	if (!is_prot_virt_guest() && early_command_line[0])
+		decompressor_printk("Kernel command line: %s\n", early_command_line);
+	decompressor_printk("Out of memory allocating %lx bytes %lx aligned in range %lx:%lx\n",
+			    size, align, min, max);
+	decompressor_printk("Reserved memory ranges:\n");
+	for_each_physmem_reserved_range(t, range, &start, &end) {
+		decompressor_printk("%016lx %016lx %s\n", start, end, get_rr_type_name(t));
+		total_reserved_mem += end - start;
+	}
+	decompressor_printk("Usable online memory ranges (info source: %s [%x]):\n",
+			    get_physmem_info_source(), physmem_info.info_source);
+	for_each_physmem_usable_range(i, &start, &end) {
+		decompressor_printk("%016lx %016lx\n", start, end);
+		total_mem += end - start;
+	}
+	decompressor_printk("Usable online memory total: %lx Reserved: %lx Free: %lx\n",
+			    total_mem, total_reserved_mem,
+			    total_mem > total_reserved_mem ? total_mem - total_reserved_mem : 0);
+	print_stacktrace(current_frame_address());
+	sclp_early_printk("\n\n -- System halted\n");
+	disabled_wait();
+}
+
+void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size)
+{
+	physmem_info.reserved[type].start = addr;
+	physmem_info.reserved[type].end = addr + size;
+}
+
+void physmem_free(enum reserved_range_type type)
+{
+	physmem_info.reserved[type].start = 0;
+	physmem_info.reserved[type].end = 0;
+}
+
+static bool __physmem_alloc_intersects(unsigned long addr, unsigned long size,
+				       unsigned long *intersection_start)
+{
+	unsigned long res_addr, res_size;
+	int t;
+
+	for (t = 0; t < RR_MAX; t++) {
+		if (!get_physmem_reserved(t, &res_addr, &res_size))
+			continue;
+		if (intersects(addr, size, res_addr, res_size)) {
+			*intersection_start = res_addr;
+			return true;
+		}
+	}
+	return ipl_report_certs_intersects(addr, size, intersection_start);
+}
+
+static unsigned long __physmem_alloc_range(unsigned long size, unsigned long align,
+					   unsigned long min, unsigned long max,
+					   unsigned int from_ranges, unsigned int *ranges_left,
+					   bool die_on_oom)
+{
+	unsigned int nranges = from_ranges ?: physmem_info.range_count;
+	unsigned long range_start, range_end;
+	unsigned long intersection_start;
+	unsigned long addr, pos = max;
+
+	align = max(align, 8UL);
+	while (nranges) {
+		__get_physmem_range(nranges - 1, &range_start, &range_end, false);
+		pos = min(range_end, pos);
+
+		if (round_up(min, align) + size > pos)
+			break;
+		addr = round_down(pos - size, align);
+		if (range_start > addr) {
+			nranges--;
+			continue;
+		}
+		if (__physmem_alloc_intersects(addr, size, &intersection_start)) {
+			pos = intersection_start;
+			continue;
+		}
+
+		if (ranges_left)
+			*ranges_left = nranges;
+		return addr;
+	}
+	if (die_on_oom)
+		die_oom(size, align, min, max);
+	return 0;
+}
+
+unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long size,
+				  unsigned long align, unsigned long min, unsigned long max,
+				  bool die_on_oom)
+{
+	unsigned long addr;
+
+	max = min(max, physmem_alloc_pos);
+	addr = __physmem_alloc_range(size, align, min, max, 0, NULL, die_on_oom);
+	if (addr)
+		physmem_reserve(type, addr, size);
+	return addr;
+}
+
+unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned long size,
+				     unsigned long align)
+{
+	struct reserved_range *range = &physmem_info.reserved[type];
+	struct reserved_range *new_range;
+	unsigned int ranges_left;
+	unsigned long addr;
+
+	addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos, physmem_alloc_ranges,
+				     &ranges_left, true);
+	/* if not a consecutive allocation of the same type or first allocation */
+	if (range->start != addr + size) {
+		if (range->end) {
+			physmem_alloc_pos = __physmem_alloc_range(
+				sizeof(struct reserved_range), 0, 0, physmem_alloc_pos,
+				physmem_alloc_ranges, &ranges_left, true);
+			new_range = (struct reserved_range *)physmem_alloc_pos;
+			*new_range = *range;
+			range->chain = new_range;
+			addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos,
+						     ranges_left, &ranges_left, true);
+		}
+		range->end = addr + size;
+	}
+	range->start = addr;
+	physmem_alloc_pos = addr;
+	physmem_alloc_ranges = ranges_left;
+	return addr;
+}
+
+unsigned long get_physmem_alloc_pos(void)
+{
+	return physmem_alloc_pos;
+}
diff --git a/arch/s390/boot/sclp_early_core.c b/arch/s390/boot/sclp_early_core.c
new file mode 100644
index 0000000000..6f30646afb
--- /dev/null
+++ b/arch/s390/boot/sclp_early_core.c
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "boot.h"
+#include "../../../drivers/s390/char/sclp_early_core.c"
+
+/* SCLP early buffer must stay page-aligned and below 2GB */
+static char __sclp_early_sccb[EXT_SCCB_READ_SCP] __aligned(PAGE_SIZE);
+
+void sclp_early_setup_buffer(void)
+{
+	sclp_early_set_buffer(&__sclp_early_sccb);
+}
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
new file mode 100644
index 0000000000..d3e48bd9c3
--- /dev/null
+++ b/arch/s390/boot/startup.c
@@ -0,0 +1,378 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/string.h>
+#include <linux/elf.h>
+#include <asm/boot_data.h>
+#include <asm/sections.h>
+#include <asm/maccess.h>
+#include <asm/cpu_mf.h>
+#include <asm/setup.h>
+#include <asm/kasan.h>
+#include <asm/kexec.h>
+#include <asm/sclp.h>
+#include <asm/diag.h>
+#include <asm/uv.h>
+#include <asm/abs_lowcore.h>
+#include <asm/physmem_info.h>
+#include "decompressor.h"
+#include "boot.h"
+#include "uv.h"
+
+unsigned long __bootdata_preserved(__kaslr_offset);
+unsigned long __bootdata_preserved(__abs_lowcore);
+unsigned long __bootdata_preserved(__memcpy_real_area);
+pte_t *__bootdata_preserved(memcpy_real_ptep);
+unsigned long __bootdata_preserved(VMALLOC_START);
+unsigned long __bootdata_preserved(VMALLOC_END);
+struct page *__bootdata_preserved(vmemmap);
+unsigned long __bootdata_preserved(vmemmap_size);
+unsigned long __bootdata_preserved(MODULES_VADDR);
+unsigned long __bootdata_preserved(MODULES_END);
+unsigned long __bootdata_preserved(max_mappable);
+unsigned long __bootdata(ident_map_size);
+
+u64 __bootdata_preserved(stfle_fac_list[16]);
+u64 __bootdata_preserved(alt_stfle_fac_list[16]);
+struct oldmem_data __bootdata_preserved(oldmem_data);
+
+struct machine_info machine;
+
+void error(char *x)
+{
+	sclp_early_printk("\n\n");
+	sclp_early_printk(x);
+	sclp_early_printk("\n\n -- System halted");
+
+	disabled_wait();
+}
+
+static void detect_facilities(void)
+{
+	if (test_facility(8)) {
+		machine.has_edat1 = 1;
+		__ctl_set_bit(0, 23);
+	}
+	if (test_facility(78))
+		machine.has_edat2 = 1;
+	if (test_facility(130))
+		machine.has_nx = 1;
+}
+
+static void setup_lpp(void)
+{
+	S390_lowcore.current_pid = 0;
+	S390_lowcore.lpp = LPP_MAGIC;
+	if (test_facility(40))
+		lpp(&S390_lowcore.lpp);
+}
+
+#ifdef CONFIG_KERNEL_UNCOMPRESSED
+unsigned long mem_safe_offset(void)
+{
+	return vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size;
+}
+#endif
+
+static void rescue_initrd(unsigned long min, unsigned long max)
+{
+	unsigned long old_addr, addr, size;
+
+	if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD))
+		return;
+	if (!get_physmem_reserved(RR_INITRD, &addr, &size))
+		return;
+	if (addr >= min && addr + size <= max)
+		return;
+	old_addr = addr;
+	physmem_free(RR_INITRD);
+	addr = physmem_alloc_top_down(RR_INITRD, size, 0);
+	memmove((void *)addr, (void *)old_addr, size);
+}
+
+static void copy_bootdata(void)
+{
+	if (__boot_data_end - __boot_data_start != vmlinux.bootdata_size)
+		error(".boot.data section size mismatch");
+	memcpy((void *)vmlinux.bootdata_off, __boot_data_start, vmlinux.bootdata_size);
+	if (__boot_data_preserved_end - __boot_data_preserved_start != vmlinux.bootdata_preserved_size)
+		error(".boot.preserved.data section size mismatch");
+	memcpy((void *)vmlinux.bootdata_preserved_off, __boot_data_preserved_start, vmlinux.bootdata_preserved_size);
+}
+
+static void handle_relocs(unsigned long offset)
+{
+	Elf64_Rela *rela_start, *rela_end, *rela;
+	int r_type, r_sym, rc;
+	Elf64_Addr loc, val;
+	Elf64_Sym *dynsym;
+
+	rela_start = (Elf64_Rela *) vmlinux.rela_dyn_start;
+	rela_end = (Elf64_Rela *) vmlinux.rela_dyn_end;
+	dynsym = (Elf64_Sym *) vmlinux.dynsym_start;
+	for (rela = rela_start; rela < rela_end; rela++) {
+		loc = rela->r_offset + offset;
+		val = rela->r_addend;
+		r_sym = ELF64_R_SYM(rela->r_info);
+		if (r_sym) {
+			if (dynsym[r_sym].st_shndx != SHN_UNDEF)
+				val += dynsym[r_sym].st_value + offset;
+		} else {
+			/*
+			 * 0 == undefined symbol table index (STN_UNDEF),
+			 * used for R_390_RELATIVE, only add KASLR offset
+			 */
+			val += offset;
+		}
+		r_type = ELF64_R_TYPE(rela->r_info);
+		rc = arch_kexec_do_relocs(r_type, (void *) loc, val, 0);
+		if (rc)
+			error("Unknown relocation type");
+	}
+}
+
+/*
+ * Merge information from several sources into a single ident_map_size value.
+ * "ident_map_size" represents the upper limit of physical memory we may ever
+ * reach. It might not be all online memory, but also include standby (offline)
+ * memory. "ident_map_size" could be lower then actual standby or even online
+ * memory present, due to limiting factors. We should never go above this limit.
+ * It is the size of our identity mapping.
+ *
+ * Consider the following factors:
+ * 1. max_physmem_end - end of physical memory online or standby.
+ *    Always >= end of the last online memory range (get_physmem_online_end()).
+ * 2. CONFIG_MAX_PHYSMEM_BITS - the maximum size of physical memory the
+ *    kernel is able to support.
+ * 3. "mem=" kernel command line option which limits physical memory usage.
+ * 4. OLDMEM_BASE which is a kdump memory limit when the kernel is executed as
+ *    crash kernel.
+ * 5. "hsa" size which is a memory limit when the kernel is executed during
+ *    zfcp/nvme dump.
+ */
+static void setup_ident_map_size(unsigned long max_physmem_end)
+{
+	unsigned long hsa_size;
+
+	ident_map_size = max_physmem_end;
+	if (memory_limit)
+		ident_map_size = min(ident_map_size, memory_limit);
+	ident_map_size = min(ident_map_size, 1UL << MAX_PHYSMEM_BITS);
+
+#ifdef CONFIG_CRASH_DUMP
+	if (oldmem_data.start) {
+		__kaslr_enabled = 0;
+		ident_map_size = min(ident_map_size, oldmem_data.size);
+	} else if (ipl_block_valid && is_ipl_block_dump()) {
+		__kaslr_enabled = 0;
+		if (!sclp_early_get_hsa_size(&hsa_size) && hsa_size)
+			ident_map_size = min(ident_map_size, hsa_size);
+	}
+#endif
+}
+
+static unsigned long setup_kernel_memory_layout(void)
+{
+	unsigned long vmemmap_start;
+	unsigned long asce_limit;
+	unsigned long rte_size;
+	unsigned long pages;
+	unsigned long vsize;
+	unsigned long vmax;
+
+	pages = ident_map_size / PAGE_SIZE;
+	/* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */
+	vmemmap_size = SECTION_ALIGN_UP(pages) * sizeof(struct page);
+
+	/* choose kernel address space layout: 4 or 3 levels. */
+	vsize = round_up(ident_map_size, _REGION3_SIZE) + vmemmap_size +
+		MODULES_LEN + MEMCPY_REAL_SIZE + ABS_LOWCORE_MAP_SIZE;
+	vsize = size_add(vsize, vmalloc_size);
+	if (IS_ENABLED(CONFIG_KASAN) || (vsize > _REGION2_SIZE)) {
+		asce_limit = _REGION1_SIZE;
+		rte_size = _REGION2_SIZE;
+	} else {
+		asce_limit = _REGION2_SIZE;
+		rte_size = _REGION3_SIZE;
+	}
+
+	/*
+	 * Forcing modules and vmalloc area under the ultravisor
+	 * secure storage limit, so that any vmalloc allocation
+	 * we do could be used to back secure guest storage.
+	 */
+	vmax = adjust_to_uv_max(asce_limit);
+#ifdef CONFIG_KASAN
+	/* force vmalloc and modules below kasan shadow */
+	vmax = min(vmax, KASAN_SHADOW_START);
+#endif
+	__memcpy_real_area = round_down(vmax - MEMCPY_REAL_SIZE, PAGE_SIZE);
+	__abs_lowcore = round_down(__memcpy_real_area - ABS_LOWCORE_MAP_SIZE,
+				   sizeof(struct lowcore));
+	MODULES_END = round_down(__abs_lowcore, _SEGMENT_SIZE);
+	MODULES_VADDR = MODULES_END - MODULES_LEN;
+	VMALLOC_END = MODULES_VADDR;
+
+	/* allow vmalloc area to occupy up to about 1/2 of the rest virtual space left */
+	vmalloc_size = min(vmalloc_size, round_down(VMALLOC_END / 2, _REGION3_SIZE));
+	VMALLOC_START = VMALLOC_END - vmalloc_size;
+
+	/* split remaining virtual space between 1:1 mapping & vmemmap array */
+	pages = VMALLOC_START / (PAGE_SIZE + sizeof(struct page));
+	pages = SECTION_ALIGN_UP(pages);
+	/* keep vmemmap_start aligned to a top level region table entry */
+	vmemmap_start = round_down(VMALLOC_START - pages * sizeof(struct page), rte_size);
+	vmemmap_start = min(vmemmap_start, 1UL << MAX_PHYSMEM_BITS);
+	/* maximum mappable address as seen by arch_get_mappable_range() */
+	max_mappable = vmemmap_start;
+	/* make sure identity map doesn't overlay with vmemmap */
+	ident_map_size = min(ident_map_size, vmemmap_start);
+	vmemmap_size = SECTION_ALIGN_UP(ident_map_size / PAGE_SIZE) * sizeof(struct page);
+	/* make sure vmemmap doesn't overlay with vmalloc area */
+	VMALLOC_START = max(vmemmap_start + vmemmap_size, VMALLOC_START);
+	vmemmap = (struct page *)vmemmap_start;
+
+	return asce_limit;
+}
+
+/*
+ * This function clears the BSS section of the decompressed Linux kernel and NOT the decompressor's.
+ */
+static void clear_bss_section(unsigned long vmlinux_lma)
+{
+	memset((void *)vmlinux_lma + vmlinux.image_size, 0, vmlinux.bss_size);
+}
+
+/*
+ * Set vmalloc area size to an 8th of (potential) physical memory
+ * size, unless size has been set by kernel command line parameter.
+ */
+static void setup_vmalloc_size(void)
+{
+	unsigned long size;
+
+	if (vmalloc_size_set)
+		return;
+	size = round_up(ident_map_size / 8, _SEGMENT_SIZE);
+	vmalloc_size = max(size, vmalloc_size);
+}
+
+static void offset_vmlinux_info(unsigned long offset)
+{
+	*(unsigned long *)(&vmlinux.entry) += offset;
+	vmlinux.bootdata_off += offset;
+	vmlinux.bootdata_preserved_off += offset;
+	vmlinux.rela_dyn_start += offset;
+	vmlinux.rela_dyn_end += offset;
+	vmlinux.dynsym_start += offset;
+	vmlinux.init_mm_off += offset;
+	vmlinux.swapper_pg_dir_off += offset;
+	vmlinux.invalid_pg_dir_off += offset;
+#ifdef CONFIG_KASAN
+	vmlinux.kasan_early_shadow_page_off += offset;
+	vmlinux.kasan_early_shadow_pte_off += offset;
+	vmlinux.kasan_early_shadow_pmd_off += offset;
+	vmlinux.kasan_early_shadow_pud_off += offset;
+	vmlinux.kasan_early_shadow_p4d_off += offset;
+#endif
+}
+
+void startup_kernel(void)
+{
+	unsigned long max_physmem_end;
+	unsigned long vmlinux_lma = 0;
+	unsigned long amode31_lma = 0;
+	unsigned long asce_limit;
+	unsigned long safe_addr;
+	void *img;
+	psw_t psw;
+
+	setup_lpp();
+	safe_addr = mem_safe_offset();
+
+	/*
+	 * Reserve decompressor memory together with decompression heap, buffer and
+	 * memory which might be occupied by uncompressed kernel at default 1Mb
+	 * position (if KASLR is off or failed).
+	 */
+	physmem_reserve(RR_DECOMPRESSOR, 0, safe_addr);
+	if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && parmarea.initrd_size)
+		physmem_reserve(RR_INITRD, parmarea.initrd_start, parmarea.initrd_size);
+	oldmem_data.start = parmarea.oldmem_base;
+	oldmem_data.size = parmarea.oldmem_size;
+
+	store_ipl_parmblock();
+	read_ipl_report();
+	uv_query_info();
+	sclp_early_read_info();
+	setup_boot_command_line();
+	parse_boot_command_line();
+	detect_facilities();
+	sanitize_prot_virt_host();
+	max_physmem_end = detect_max_physmem_end();
+	setup_ident_map_size(max_physmem_end);
+	setup_vmalloc_size();
+	asce_limit = setup_kernel_memory_layout();
+	/* got final ident_map_size, physmem allocations could be performed now */
+	physmem_set_usable_limit(ident_map_size);
+	detect_physmem_online_ranges(max_physmem_end);
+	save_ipl_cert_comp_list();
+	rescue_initrd(safe_addr, ident_map_size);
+
+	if (kaslr_enabled()) {
+		vmlinux_lma = randomize_within_range(vmlinux.image_size + vmlinux.bss_size,
+						     THREAD_SIZE, vmlinux.default_lma,
+						     ident_map_size);
+		if (vmlinux_lma) {
+			__kaslr_offset = vmlinux_lma - vmlinux.default_lma;
+			offset_vmlinux_info(__kaslr_offset);
+		}
+	}
+	vmlinux_lma = vmlinux_lma ?: vmlinux.default_lma;
+	physmem_reserve(RR_VMLINUX, vmlinux_lma, vmlinux.image_size + vmlinux.bss_size);
+
+	if (!IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED)) {
+		img = decompress_kernel();
+		memmove((void *)vmlinux_lma, img, vmlinux.image_size);
+	} else if (__kaslr_offset) {
+		img = (void *)vmlinux.default_lma;
+		memmove((void *)vmlinux_lma, img, vmlinux.image_size);
+		memset(img, 0, vmlinux.image_size);
+	}
+
+	/* vmlinux decompression is done, shrink reserved low memory */
+	physmem_reserve(RR_DECOMPRESSOR, 0, (unsigned long)_decompressor_end);
+	if (kaslr_enabled())
+		amode31_lma = randomize_within_range(vmlinux.amode31_size, PAGE_SIZE, 0, SZ_2G);
+	amode31_lma = amode31_lma ?: vmlinux.default_lma - vmlinux.amode31_size;
+	physmem_reserve(RR_AMODE31, amode31_lma, vmlinux.amode31_size);
+
+	/*
+	 * The order of the following operations is important:
+	 *
+	 * - handle_relocs() must follow clear_bss_section() to establish static
+	 *   memory references to data in .bss to be used by setup_vmem()
+	 *   (i.e init_mm.pgd)
+	 *
+	 * - setup_vmem() must follow handle_relocs() to be able using
+	 *   static memory references to data in .bss (i.e init_mm.pgd)
+	 *
+	 * - copy_bootdata() must follow setup_vmem() to propagate changes to
+	 *   bootdata made by setup_vmem()
+	 */
+	clear_bss_section(vmlinux_lma);
+	handle_relocs(__kaslr_offset);
+	setup_vmem(asce_limit);
+	copy_bootdata();
+
+	/*
+	 * Save KASLR offset for early dumps, before vmcore_info is set.
+	 * Mark as uneven to distinguish from real vmcore_info pointer.
+	 */
+	S390_lowcore.vmcore_info = __kaslr_offset ? __kaslr_offset | 0x1UL : 0;
+
+	/*
+	 * Jump to the decompressed kernel entry point and switch DAT mode on.
+	 */
+	psw.addr = vmlinux.entry;
+	psw.mask = PSW_KERNEL_BITS;
+	__load_psw(psw);
+}
diff --git a/arch/s390/boot/string.c b/arch/s390/boot/string.c
new file mode 100644
index 0000000000..faccb33b46
--- /dev/null
+++ b/arch/s390/boot/string.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/ctype.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#undef CONFIG_KASAN
+#undef CONFIG_KASAN_GENERIC
+#include "../lib/string.c"
+
+int strncmp(const char *cs, const char *ct, size_t count)
+{
+	unsigned char c1, c2;
+
+	while (count) {
+		c1 = *cs++;
+		c2 = *ct++;
+		if (c1 != c2)
+			return c1 < c2 ? -1 : 1;
+		if (!c1)
+			break;
+		count--;
+	}
+	return 0;
+}
+
+char *skip_spaces(const char *str)
+{
+	while (isspace(*str))
+		++str;
+	return (char *)str;
+}
+
+char *strim(char *s)
+{
+	size_t size;
+	char *end;
+
+	size = strlen(s);
+	if (!size)
+		return s;
+
+	end = s + size - 1;
+	while (end >= s && isspace(*end))
+		end--;
+	*(end + 1) = '\0';
+
+	return skip_spaces(s);
+}
+
+/* Works only for digits and letters, but small and fast */
+#define TOLOWER(x) ((x) | 0x20)
+
+static unsigned int simple_guess_base(const char *cp)
+{
+	if (cp[0] == '0') {
+		if (TOLOWER(cp[1]) == 'x' && isxdigit(cp[2]))
+			return 16;
+		else
+			return 8;
+	} else {
+		return 10;
+	}
+}
+
+/**
+ * simple_strtoull - convert a string to an unsigned long long
+ * @cp: The start of the string
+ * @endp: A pointer to the end of the parsed string will be placed here
+ * @base: The number base to use
+ */
+
+unsigned long long simple_strtoull(const char *cp, char **endp,
+				   unsigned int base)
+{
+	unsigned long long result = 0;
+
+	if (!base)
+		base = simple_guess_base(cp);
+
+	if (base == 16 && cp[0] == '0' && TOLOWER(cp[1]) == 'x')
+		cp += 2;
+
+	while (isxdigit(*cp)) {
+		unsigned int value;
+
+		value = isdigit(*cp) ? *cp - '0' : TOLOWER(*cp) - 'a' + 10;
+		if (value >= base)
+			break;
+		result = result * base + value;
+		cp++;
+	}
+	if (endp)
+		*endp = (char *)cp;
+
+	return result;
+}
+
+long simple_strtol(const char *cp, char **endp, unsigned int base)
+{
+	if (*cp == '-')
+		return -simple_strtoull(cp + 1, endp, base);
+
+	return simple_strtoull(cp, endp, base);
+}
+
+int kstrtobool(const char *s, bool *res)
+{
+	if (!s)
+		return -EINVAL;
+
+	switch (s[0]) {
+	case 'y':
+	case 'Y':
+	case '1':
+		*res = true;
+		return 0;
+	case 'n':
+	case 'N':
+	case '0':
+		*res = false;
+		return 0;
+	case 'o':
+	case 'O':
+		switch (s[1]) {
+		case 'n':
+		case 'N':
+			*res = true;
+			return 0;
+		case 'f':
+		case 'F':
+			*res = false;
+			return 0;
+		default:
+			break;
+		}
+	default:
+		break;
+	}
+
+	return -EINVAL;
+}
diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c
new file mode 100644
index 0000000000..1e66d2cbb0
--- /dev/null
+++ b/arch/s390/boot/uv.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <asm/uv.h>
+#include <asm/boot_data.h>
+#include <asm/facility.h>
+#include <asm/sections.h>
+
+#include "boot.h"
+#include "uv.h"
+
+/* will be used in arch/s390/kernel/uv.c */
+#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
+int __bootdata_preserved(prot_virt_guest);
+#endif
+#if IS_ENABLED(CONFIG_KVM)
+int __bootdata_preserved(prot_virt_host);
+#endif
+struct uv_info __bootdata_preserved(uv_info);
+
+void uv_query_info(void)
+{
+	struct uv_cb_qui uvcb = {
+		.header.cmd = UVC_CMD_QUI,
+		.header.len = sizeof(uvcb)
+	};
+
+	if (!test_facility(158))
+		return;
+
+	/* rc==0x100 means that there is additional data we do not process */
+	if (uv_call(0, (uint64_t)&uvcb) && uvcb.header.rc != 0x100)
+		return;
+
+	if (IS_ENABLED(CONFIG_KVM)) {
+		memcpy(uv_info.inst_calls_list, uvcb.inst_calls_list, sizeof(uv_info.inst_calls_list));
+		uv_info.uv_base_stor_len = uvcb.uv_base_stor_len;
+		uv_info.guest_base_stor_len = uvcb.conf_base_phys_stor_len;
+		uv_info.guest_virt_base_stor_len = uvcb.conf_base_virt_stor_len;
+		uv_info.guest_virt_var_stor_len = uvcb.conf_virt_var_stor_len;
+		uv_info.guest_cpu_stor_len = uvcb.cpu_stor_len;
+		uv_info.max_sec_stor_addr = ALIGN(uvcb.max_guest_stor_addr, PAGE_SIZE);
+		uv_info.max_num_sec_conf = uvcb.max_num_sec_conf;
+		uv_info.max_guest_cpu_id = uvcb.max_guest_cpu_id;
+		uv_info.uv_feature_indications = uvcb.uv_feature_indications;
+		uv_info.supp_se_hdr_ver = uvcb.supp_se_hdr_versions;
+		uv_info.supp_se_hdr_pcf = uvcb.supp_se_hdr_pcf;
+		uv_info.conf_dump_storage_state_len = uvcb.conf_dump_storage_state_len;
+		uv_info.conf_dump_finalize_len = uvcb.conf_dump_finalize_len;
+		uv_info.supp_att_req_hdr_ver = uvcb.supp_att_req_hdr_ver;
+		uv_info.supp_att_pflags = uvcb.supp_att_pflags;
+		uv_info.supp_add_secret_req_ver = uvcb.supp_add_secret_req_ver;
+		uv_info.supp_add_secret_pcf = uvcb.supp_add_secret_pcf;
+		uv_info.supp_secret_types = uvcb.supp_secret_types;
+		uv_info.max_secrets = uvcb.max_secrets;
+	}
+
+#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
+	if (test_bit_inv(BIT_UVC_CMD_SET_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list) &&
+	    test_bit_inv(BIT_UVC_CMD_REMOVE_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list))
+		prot_virt_guest = 1;
+#endif
+}
+
+#if IS_ENABLED(CONFIG_KVM)
+unsigned long adjust_to_uv_max(unsigned long limit)
+{
+	if (is_prot_virt_host() && uv_info.max_sec_stor_addr)
+		limit = min_t(unsigned long, limit, uv_info.max_sec_stor_addr);
+	return limit;
+}
+
+static int is_prot_virt_host_capable(void)
+{
+	/* disable if no prot_virt=1 given on command-line */
+	if (!is_prot_virt_host())
+		return 0;
+	/* disable if protected guest virtualization is enabled */
+	if (is_prot_virt_guest())
+		return 0;
+	/* disable if no hardware support */
+	if (!test_facility(158))
+		return 0;
+	/* disable if kdump */
+	if (oldmem_data.start)
+		return 0;
+	/* disable if stand-alone dump */
+	if (ipl_block_valid && is_ipl_block_dump())
+		return 0;
+	return 1;
+}
+
+void sanitize_prot_virt_host(void)
+{
+	prot_virt_host = is_prot_virt_host_capable();
+}
+#endif
diff --git a/arch/s390/boot/uv.h b/arch/s390/boot/uv.h
new file mode 100644
index 0000000000..0f3070856f
--- /dev/null
+++ b/arch/s390/boot/uv.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BOOT_UV_H
+#define BOOT_UV_H
+
+#if IS_ENABLED(CONFIG_KVM)
+unsigned long adjust_to_uv_max(unsigned long limit);
+void sanitize_prot_virt_host(void);
+#else
+static inline unsigned long adjust_to_uv_max(unsigned long limit)
+{
+	return limit;
+}
+static inline void sanitize_prot_virt_host(void) {}
+#endif
+
+#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM)
+void uv_query_info(void);
+#else
+static inline void uv_query_info(void) {}
+#endif
+
+#endif /* BOOT_UV_H */
diff --git a/arch/s390/boot/version.c b/arch/s390/boot/version.c
new file mode 100644
index 0000000000..fd32f03877
--- /dev/null
+++ b/arch/s390/boot/version.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <generated/utsversion.h>
+#include <generated/utsrelease.h>
+#include <generated/compile.h>
+#include "boot.h"
+
+const char kernel_version[] = UTS_RELEASE
+	" (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") " UTS_VERSION;
diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c
new file mode 100644
index 0000000000..442a74f113
--- /dev/null
+++ b/arch/s390/boot/vmem.c
@@ -0,0 +1,458 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/sched/task.h>
+#include <linux/pgtable.h>
+#include <linux/kasan.h>
+#include <asm/pgalloc.h>
+#include <asm/facility.h>
+#include <asm/sections.h>
+#include <asm/physmem_info.h>
+#include <asm/maccess.h>
+#include <asm/abs_lowcore.h>
+#include "decompressor.h"
+#include "boot.h"
+
+unsigned long __bootdata_preserved(s390_invalid_asce);
+
+#ifdef CONFIG_PROC_FS
+atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]);
+#endif
+
+#define init_mm			(*(struct mm_struct *)vmlinux.init_mm_off)
+#define swapper_pg_dir		vmlinux.swapper_pg_dir_off
+#define invalid_pg_dir		vmlinux.invalid_pg_dir_off
+
+enum populate_mode {
+	POPULATE_NONE,
+	POPULATE_DIRECT,
+	POPULATE_ABS_LOWCORE,
+#ifdef CONFIG_KASAN
+	POPULATE_KASAN_MAP_SHADOW,
+	POPULATE_KASAN_ZERO_SHADOW,
+	POPULATE_KASAN_SHALLOW
+#endif
+};
+
+static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode);
+
+#ifdef CONFIG_KASAN
+
+#define kasan_early_shadow_page	vmlinux.kasan_early_shadow_page_off
+#define kasan_early_shadow_pte	((pte_t *)vmlinux.kasan_early_shadow_pte_off)
+#define kasan_early_shadow_pmd	((pmd_t *)vmlinux.kasan_early_shadow_pmd_off)
+#define kasan_early_shadow_pud	((pud_t *)vmlinux.kasan_early_shadow_pud_off)
+#define kasan_early_shadow_p4d	((p4d_t *)vmlinux.kasan_early_shadow_p4d_off)
+#define __sha(x)		((unsigned long)kasan_mem_to_shadow((void *)x))
+
+static pte_t pte_z;
+
+static inline void kasan_populate(unsigned long start, unsigned long end, enum populate_mode mode)
+{
+	start = PAGE_ALIGN_DOWN(__sha(start));
+	end = PAGE_ALIGN(__sha(end));
+	pgtable_populate(start, end, mode);
+}
+
+static void kasan_populate_shadow(void)
+{
+	pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY);
+	pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY);
+	p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY);
+	unsigned long memgap_start = 0;
+	unsigned long untracked_end;
+	unsigned long start, end;
+	int i;
+
+	pte_z = __pte(__pa(kasan_early_shadow_page) | pgprot_val(PAGE_KERNEL_RO));
+	if (!machine.has_nx)
+		pte_z = clear_pte_bit(pte_z, __pgprot(_PAGE_NOEXEC));
+	crst_table_init((unsigned long *)kasan_early_shadow_p4d, p4d_val(p4d_z));
+	crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z));
+	crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z));
+	memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE);
+
+	/*
+	 * Current memory layout:
+	 * +- 0 -------------+	       +- shadow start -+
+	 * |1:1 ident mapping|	      /|1/8 of ident map|
+	 * |		     |	     / |		|
+	 * +-end of ident map+	    /  +----------------+
+	 * | ... gap ...     |	   /   |    kasan	|
+	 * |		     |	  /    |  zero page	|
+	 * +- vmalloc area  -+	 /     |   mapping	|
+	 * | vmalloc_size    |	/      | (untracked)	|
+	 * +- modules vaddr -+ /       +----------------+
+	 * | 2Gb	     |/        |    unmapped	| allocated per module
+	 * +- shadow start  -+	       +----------------+
+	 * | 1/8 addr space  |	       | zero pg mapping| (untracked)
+	 * +- shadow end ----+---------+- shadow end ---+
+	 *
+	 * Current memory layout (KASAN_VMALLOC):
+	 * +- 0 -------------+	       +- shadow start -+
+	 * |1:1 ident mapping|	      /|1/8 of ident map|
+	 * |		     |	     / |		|
+	 * +-end of ident map+	    /  +----------------+
+	 * | ... gap ...     |	   /   | kasan zero page| (untracked)
+	 * |		     |	  /    | mapping	|
+	 * +- vmalloc area  -+	 /     +----------------+
+	 * | vmalloc_size    |	/      |shallow populate|
+	 * +- modules vaddr -+ /       +----------------+
+	 * | 2Gb	     |/        |shallow populate|
+	 * +- shadow start  -+	       +----------------+
+	 * | 1/8 addr space  |	       | zero pg mapping| (untracked)
+	 * +- shadow end ----+---------+- shadow end ---+
+	 */
+
+	for_each_physmem_usable_range(i, &start, &end) {
+		kasan_populate(start, end, POPULATE_KASAN_MAP_SHADOW);
+		if (memgap_start && physmem_info.info_source == MEM_DETECT_DIAG260)
+			kasan_populate(memgap_start, start, POPULATE_KASAN_ZERO_SHADOW);
+		memgap_start = end;
+	}
+	if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
+		untracked_end = VMALLOC_START;
+		/* shallowly populate kasan shadow for vmalloc and modules */
+		kasan_populate(VMALLOC_START, MODULES_END, POPULATE_KASAN_SHALLOW);
+	} else {
+		untracked_end = MODULES_VADDR;
+	}
+	/* populate kasan shadow for untracked memory */
+	kasan_populate(ident_map_size, untracked_end, POPULATE_KASAN_ZERO_SHADOW);
+	kasan_populate(MODULES_END, _REGION1_SIZE, POPULATE_KASAN_ZERO_SHADOW);
+}
+
+static bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr,
+					   unsigned long end, enum populate_mode mode)
+{
+	if (mode == POPULATE_KASAN_ZERO_SHADOW &&
+	    IS_ALIGNED(addr, PGDIR_SIZE) && end - addr >= PGDIR_SIZE) {
+		pgd_populate(&init_mm, pgd, kasan_early_shadow_p4d);
+		return true;
+	}
+	return false;
+}
+
+static bool kasan_p4d_populate_zero_shadow(p4d_t *p4d, unsigned long addr,
+					   unsigned long end, enum populate_mode mode)
+{
+	if (mode == POPULATE_KASAN_ZERO_SHADOW &&
+	    IS_ALIGNED(addr, P4D_SIZE) && end - addr >= P4D_SIZE) {
+		p4d_populate(&init_mm, p4d, kasan_early_shadow_pud);
+		return true;
+	}
+	return false;
+}
+
+static bool kasan_pud_populate_zero_shadow(pud_t *pud, unsigned long addr,
+					   unsigned long end, enum populate_mode mode)
+{
+	if (mode == POPULATE_KASAN_ZERO_SHADOW &&
+	    IS_ALIGNED(addr, PUD_SIZE) && end - addr >= PUD_SIZE) {
+		pud_populate(&init_mm, pud, kasan_early_shadow_pmd);
+		return true;
+	}
+	return false;
+}
+
+static bool kasan_pmd_populate_zero_shadow(pmd_t *pmd, unsigned long addr,
+					   unsigned long end, enum populate_mode mode)
+{
+	if (mode == POPULATE_KASAN_ZERO_SHADOW &&
+	    IS_ALIGNED(addr, PMD_SIZE) && end - addr >= PMD_SIZE) {
+		pmd_populate(&init_mm, pmd, kasan_early_shadow_pte);
+		return true;
+	}
+	return false;
+}
+
+static bool kasan_pte_populate_zero_shadow(pte_t *pte, enum populate_mode mode)
+{
+	pte_t entry;
+
+	if (mode == POPULATE_KASAN_ZERO_SHADOW) {
+		set_pte(pte, pte_z);
+		return true;
+	}
+	return false;
+}
+#else
+
+static inline void kasan_populate_shadow(void) {}
+
+static inline bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr,
+						  unsigned long end, enum populate_mode mode)
+{
+	return false;
+}
+
+static inline bool kasan_p4d_populate_zero_shadow(p4d_t *p4d, unsigned long addr,
+						  unsigned long end, enum populate_mode mode)
+{
+	return false;
+}
+
+static inline bool kasan_pud_populate_zero_shadow(pud_t *pud, unsigned long addr,
+						  unsigned long end, enum populate_mode mode)
+{
+	return false;
+}
+
+static inline bool kasan_pmd_populate_zero_shadow(pmd_t *pmd, unsigned long addr,
+						  unsigned long end, enum populate_mode mode)
+{
+	return false;
+}
+
+static bool kasan_pte_populate_zero_shadow(pte_t *pte, enum populate_mode mode)
+{
+	return false;
+}
+
+#endif
+
+/*
+ * Mimic virt_to_kpte() in lack of init_mm symbol. Skip pmd NULL check though.
+ */
+static inline pte_t *__virt_to_kpte(unsigned long va)
+{
+	return pte_offset_kernel(pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va), va);
+}
+
+static void *boot_crst_alloc(unsigned long val)
+{
+	unsigned long size = PAGE_SIZE << CRST_ALLOC_ORDER;
+	unsigned long *table;
+
+	table = (unsigned long *)physmem_alloc_top_down(RR_VMEM, size, size);
+	crst_table_init(table, val);
+	return table;
+}
+
+static pte_t *boot_pte_alloc(void)
+{
+	static void *pte_leftover;
+	pte_t *pte;
+
+	/*
+	 * handling pte_leftovers this way helps to avoid memory fragmentation
+	 * during POPULATE_KASAN_MAP_SHADOW when EDAT is off
+	 */
+	if (!pte_leftover) {
+		pte_leftover = (void *)physmem_alloc_top_down(RR_VMEM, PAGE_SIZE, PAGE_SIZE);
+		pte = pte_leftover + _PAGE_TABLE_SIZE;
+	} else {
+		pte = pte_leftover;
+		pte_leftover = NULL;
+	}
+
+	memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
+	return pte;
+}
+
+static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_mode mode)
+{
+	switch (mode) {
+	case POPULATE_NONE:
+		return -1;
+	case POPULATE_DIRECT:
+		return addr;
+	case POPULATE_ABS_LOWCORE:
+		return __abs_lowcore_pa(addr);
+#ifdef CONFIG_KASAN
+	case POPULATE_KASAN_MAP_SHADOW:
+		addr = physmem_alloc_top_down(RR_VMEM, size, size);
+		memset((void *)addr, 0, size);
+		return addr;
+#endif
+	default:
+		return -1;
+	}
+}
+
+static bool can_large_pud(pud_t *pu_dir, unsigned long addr, unsigned long end)
+{
+	return machine.has_edat2 &&
+	       IS_ALIGNED(addr, PUD_SIZE) && (end - addr) >= PUD_SIZE;
+}
+
+static bool can_large_pmd(pmd_t *pm_dir, unsigned long addr, unsigned long end)
+{
+	return machine.has_edat1 &&
+	       IS_ALIGNED(addr, PMD_SIZE) && (end - addr) >= PMD_SIZE;
+}
+
+static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end,
+				 enum populate_mode mode)
+{
+	unsigned long pages = 0;
+	pte_t *pte, entry;
+
+	pte = pte_offset_kernel(pmd, addr);
+	for (; addr < end; addr += PAGE_SIZE, pte++) {
+		if (pte_none(*pte)) {
+			if (kasan_pte_populate_zero_shadow(pte, mode))
+				continue;
+			entry = __pte(_pa(addr, PAGE_SIZE, mode));
+			entry = set_pte_bit(entry, PAGE_KERNEL);
+			if (!machine.has_nx)
+				entry = clear_pte_bit(entry, __pgprot(_PAGE_NOEXEC));
+			set_pte(pte, entry);
+			pages++;
+		}
+	}
+	if (mode == POPULATE_DIRECT)
+		update_page_count(PG_DIRECT_MAP_4K, pages);
+}
+
+static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long end,
+				 enum populate_mode mode)
+{
+	unsigned long next, pages = 0;
+	pmd_t *pmd, entry;
+	pte_t *pte;
+
+	pmd = pmd_offset(pud, addr);
+	for (; addr < end; addr = next, pmd++) {
+		next = pmd_addr_end(addr, end);
+		if (pmd_none(*pmd)) {
+			if (kasan_pmd_populate_zero_shadow(pmd, addr, next, mode))
+				continue;
+			if (can_large_pmd(pmd, addr, next)) {
+				entry = __pmd(_pa(addr, _SEGMENT_SIZE, mode));
+				entry = set_pmd_bit(entry, SEGMENT_KERNEL);
+				if (!machine.has_nx)
+					entry = clear_pmd_bit(entry, __pgprot(_SEGMENT_ENTRY_NOEXEC));
+				set_pmd(pmd, entry);
+				pages++;
+				continue;
+			}
+			pte = boot_pte_alloc();
+			pmd_populate(&init_mm, pmd, pte);
+		} else if (pmd_large(*pmd)) {
+			continue;
+		}
+		pgtable_pte_populate(pmd, addr, next, mode);
+	}
+	if (mode == POPULATE_DIRECT)
+		update_page_count(PG_DIRECT_MAP_1M, pages);
+}
+
+static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end,
+				 enum populate_mode mode)
+{
+	unsigned long next, pages = 0;
+	pud_t *pud, entry;
+	pmd_t *pmd;
+
+	pud = pud_offset(p4d, addr);
+	for (; addr < end; addr = next, pud++) {
+		next = pud_addr_end(addr, end);
+		if (pud_none(*pud)) {
+			if (kasan_pud_populate_zero_shadow(pud, addr, next, mode))
+				continue;
+			if (can_large_pud(pud, addr, next)) {
+				entry = __pud(_pa(addr, _REGION3_SIZE, mode));
+				entry = set_pud_bit(entry, REGION3_KERNEL);
+				if (!machine.has_nx)
+					entry = clear_pud_bit(entry, __pgprot(_REGION_ENTRY_NOEXEC));
+				set_pud(pud, entry);
+				pages++;
+				continue;
+			}
+			pmd = boot_crst_alloc(_SEGMENT_ENTRY_EMPTY);
+			pud_populate(&init_mm, pud, pmd);
+		} else if (pud_large(*pud)) {
+			continue;
+		}
+		pgtable_pmd_populate(pud, addr, next, mode);
+	}
+	if (mode == POPULATE_DIRECT)
+		update_page_count(PG_DIRECT_MAP_2G, pages);
+}
+
+static void pgtable_p4d_populate(pgd_t *pgd, unsigned long addr, unsigned long end,
+				 enum populate_mode mode)
+{
+	unsigned long next;
+	p4d_t *p4d;
+	pud_t *pud;
+
+	p4d = p4d_offset(pgd, addr);
+	for (; addr < end; addr = next, p4d++) {
+		next = p4d_addr_end(addr, end);
+		if (p4d_none(*p4d)) {
+			if (kasan_p4d_populate_zero_shadow(p4d, addr, next, mode))
+				continue;
+			pud = boot_crst_alloc(_REGION3_ENTRY_EMPTY);
+			p4d_populate(&init_mm, p4d, pud);
+		}
+		pgtable_pud_populate(p4d, addr, next, mode);
+	}
+}
+
+static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode)
+{
+	unsigned long next;
+	pgd_t *pgd;
+	p4d_t *p4d;
+
+	pgd = pgd_offset(&init_mm, addr);
+	for (; addr < end; addr = next, pgd++) {
+		next = pgd_addr_end(addr, end);
+		if (pgd_none(*pgd)) {
+			if (kasan_pgd_populate_zero_shadow(pgd, addr, next, mode))
+				continue;
+			p4d = boot_crst_alloc(_REGION2_ENTRY_EMPTY);
+			pgd_populate(&init_mm, pgd, p4d);
+		}
+#ifdef CONFIG_KASAN
+		if (mode == POPULATE_KASAN_SHALLOW)
+			continue;
+#endif
+		pgtable_p4d_populate(pgd, addr, next, mode);
+	}
+}
+
+void setup_vmem(unsigned long asce_limit)
+{
+	unsigned long start, end;
+	unsigned long asce_type;
+	unsigned long asce_bits;
+	int i;
+
+	if (asce_limit == _REGION1_SIZE) {
+		asce_type = _REGION2_ENTRY_EMPTY;
+		asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
+	} else {
+		asce_type = _REGION3_ENTRY_EMPTY;
+		asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
+	}
+	s390_invalid_asce = invalid_pg_dir | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
+
+	crst_table_init((unsigned long *)swapper_pg_dir, asce_type);
+	crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY);
+
+	/*
+	 * To allow prefixing the lowcore must be mapped with 4KB pages.
+	 * To prevent creation of a large page at address 0 first map
+	 * the lowcore and create the identity mapping only afterwards.
+	 */
+	pgtable_populate(0, sizeof(struct lowcore), POPULATE_DIRECT);
+	for_each_physmem_usable_range(i, &start, &end)
+		pgtable_populate(start, end, POPULATE_DIRECT);
+	pgtable_populate(__abs_lowcore, __abs_lowcore + sizeof(struct lowcore),
+			 POPULATE_ABS_LOWCORE);
+	pgtable_populate(__memcpy_real_area, __memcpy_real_area + PAGE_SIZE,
+			 POPULATE_NONE);
+	memcpy_real_ptep = __virt_to_kpte(__memcpy_real_area);
+
+	kasan_populate_shadow();
+
+	S390_lowcore.kernel_asce = swapper_pg_dir | asce_bits;
+	S390_lowcore.user_asce = s390_invalid_asce;
+
+	__ctl_load(S390_lowcore.kernel_asce, 1, 1);
+	__ctl_load(S390_lowcore.user_asce, 7, 7);
+	__ctl_load(S390_lowcore.kernel_asce, 13, 13);
+
+	init_mm.context.asce = S390_lowcore.kernel_asce;
+}
diff --git a/arch/s390/boot/vmlinux.lds.S b/arch/s390/boot/vmlinux.lds.S
new file mode 100644
index 0000000000..389df0e0d9
--- /dev/null
+++ b/arch/s390/boot/vmlinux.lds.S
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm-generic/vmlinux.lds.h>
+#include <asm/vmlinux.lds.h>
+#include <asm/thread_info.h>
+#include <asm/page.h>
+#include <asm/sclp.h>
+#include "boot.h"
+
+OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
+OUTPUT_ARCH(s390:64-bit)
+
+ENTRY(startup)
+
+SECTIONS
+{
+	. = 0;
+	.ipldata : {
+		*(.ipldata)
+	}
+	. = IPL_START;
+	.head.text : {
+		_head = . ;
+		HEAD_TEXT
+		_ehead = . ;
+	}
+	. = PARMAREA;
+	.parmarea : {
+		*(.parmarea)
+	}
+	.text :	{
+		_text = .;	/* Text */
+		*(.text)
+		*(.text.*)
+		_etext = . ;
+	}
+	.rodata : {
+		_rodata = . ;
+		*(.rodata)	 /* read-only data */
+		*(.rodata.*)
+		_erodata = . ;
+	}
+	NOTES
+	.data :	{
+		_data = . ;
+		*(.data)
+		*(.data.*)
+		_edata = . ;
+	}
+
+	BOOT_DATA
+	BOOT_DATA_PRESERVED
+
+	/*
+	 * This is the BSS section of the decompressor and not of the decompressed Linux kernel.
+	 * It will consume place in the decompressor's image.
+	 */
+	. = ALIGN(8);
+	.bss : {
+		_bss = . ;
+		*(.bss)
+		*(.bss.*)
+		*(COMMON)
+		/*
+		 * Stacks for the decompressor
+		 */
+		. = ALIGN(PAGE_SIZE);
+		_dump_info_stack_start = .;
+		. += PAGE_SIZE;
+		_dump_info_stack_end = .;
+		. = ALIGN(PAGE_SIZE);
+		_stack_start = .;
+		. += BOOT_STACK_SIZE;
+		_stack_end = .;
+		_ebss = .;
+	}
+
+	/*
+	 * uncompressed image info used by the decompressor it should match
+	 * struct vmlinux_info. It comes from .vmlinux.info section of
+	 * uncompressed vmlinux in a form of info.o
+	 */
+	. = ALIGN(8);
+	.vmlinux.info : {
+		_vmlinux_info = .;
+		*(.vmlinux.info)
+	}
+
+	.decompressor.syms : {
+		. += 1; /* make sure we have \0 before the first entry */
+		. = ALIGN(2);
+		_decompressor_syms_start = .;
+		*(.decompressor.syms)
+		_decompressor_syms_end = .;
+	}
+
+	_decompressor_end = .;
+
+#ifdef CONFIG_KERNEL_UNCOMPRESSED
+	. = 0x100000;
+#else
+	. = ALIGN(8);
+#endif
+	.rodata.compressed : {
+		_compressed_start = .;
+		*(.vmlinux.bin.compressed)
+		_compressed_end = .;
+	}
+
+#define SB_TRAILER_SIZE 32
+	/* Trailer needed for Secure Boot */
+	. += SB_TRAILER_SIZE; /* make sure .sb.trailer does not overwrite the previous section */
+	. = ALIGN(4096) - SB_TRAILER_SIZE;
+	.sb.trailer : {
+		QUAD(0)
+		QUAD(0)
+		QUAD(0)
+		QUAD(0x000000207a49504c)
+	}
+	_end = .;
+
+	/* Sections to be discarded */
+	/DISCARD/ : {
+		*(.eh_frame)
+		*(__ex_table)
+		*(*__ksymtab*)
+		*(___kcrctab*)
+	}
+}
diff --git a/arch/s390/configs/btf.config b/arch/s390/configs/btf.config
new file mode 100644
index 0000000000..eb7f84f592
--- /dev/null
+++ b/arch/s390/configs/btf.config
@@ -0,0 +1,2 @@
+# Help: Enable BTF debug info
+CONFIG_DEBUG_INFO_BTF=y
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
new file mode 100644
index 0000000000..dd06086293
--- /dev/null
+++ b/arch/s390/configs/debug_defconfig
@@ -0,0 +1,887 @@
+CONFIG_UAPI_HEADER_TEST=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_WATCH_QUEUE=y
+CONFIG_AUDIT=y
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_BPF_JIT=y
+CONFIG_BPF_JIT_ALWAYS_ON=y
+CONFIG_BPF_LSM=y
+CONFIG_PREEMPT=y
+CONFIG_SCHED_CORE=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BSD_PROCESS_ACCT_V3=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NUMA_BALANCING=y
+CONFIG_MEMCG=y
+CONFIG_BLK_CGROUP=y
+CONFIG_CFS_BANDWIDTH=y
+CONFIG_CGROUP_PIDS=y
+CONFIG_CGROUP_RDMA=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_HUGETLB=y
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_PERF=y
+CONFIG_CGROUP_BPF=y
+CONFIG_CGROUP_MISC=y
+CONFIG_NAMESPACES=y
+CONFIG_USER_NS=y
+CONFIG_CHECKPOINT_RESTORE=y
+CONFIG_SCHED_AUTOGROUP=y
+CONFIG_EXPERT=y
+# CONFIG_SYSFS_SYSCALL is not set
+CONFIG_PROFILING=y
+CONFIG_KEXEC_FILE=y
+CONFIG_KEXEC_SIG=y
+CONFIG_CRASH_DUMP=y
+CONFIG_LIVEPATCH=y
+CONFIG_MARCH_ZEC12=y
+CONFIG_TUNE_ZEC12=y
+CONFIG_NR_CPUS=512
+CONFIG_NUMA=y
+CONFIG_HZ_100=y
+CONFIG_CERT_STORE=y
+CONFIG_EXPOLINE=y
+CONFIG_EXPOLINE_AUTO=y
+CONFIG_CHSC_SCH=y
+CONFIG_VFIO_CCW=m
+CONFIG_VFIO_AP=m
+CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y
+CONFIG_CMM=m
+CONFIG_APPLDATA_BASE=y
+CONFIG_S390_HYPFS_FS=y
+CONFIG_KVM=m
+CONFIG_S390_UNWIND_SELFTEST=m
+CONFIG_S390_KPROBES_SANITY_TEST=m
+CONFIG_S390_MODULES_SANITY_TEST=m
+CONFIG_KPROBES=y
+CONFIG_JUMP_LABEL=y
+CONFIG_STATIC_KEYS_SELFTEST=y
+CONFIG_SECCOMP_CACHE_DEBUG=y
+CONFIG_LOCK_EVENT_COUNTS=y
+# CONFIG_GCC_PLUGINS is not set
+CONFIG_MODULES=y
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_MODULE_UNLOAD_TAINT_TRACKING=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_MODULE_SIG_SHA256=y
+CONFIG_BLK_DEV_THROTTLING=y
+CONFIG_BLK_WBT=y
+CONFIG_BLK_CGROUP_IOLATENCY=y
+CONFIG_BLK_CGROUP_IOCOST=y
+CONFIG_BLK_CGROUP_IOPRIO=y
+CONFIG_BLK_INLINE_ENCRYPTION=y
+CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_IBM_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_IOSCHED_BFQ=y
+CONFIG_BINFMT_MISC=m
+CONFIG_ZSWAP=y
+CONFIG_ZSMALLOC_STAT=y
+CONFIG_SLUB_STATS=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTREMOVE=y
+CONFIG_KSM=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_CMA_DEBUG=y
+CONFIG_CMA_DEBUGFS=y
+CONFIG_CMA_SYSFS=y
+CONFIG_CMA_AREAS=7
+CONFIG_MEM_SOFT_DIRTY=y
+CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
+CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_PERCPU_STATS=y
+CONFIG_GUP_TEST=y
+CONFIG_ANON_VMA_NAME=y
+CONFIG_USERFAULTFD=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_PACKET_DIAG=m
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=m
+CONFIG_XFRM_USER=m
+CONFIG_NET_KEY=m
+CONFIG_SMC=m
+CONFIG_SMC_DIAG=m
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_NET_IPIP=m
+CONFIG_NET_IPGRE_DEMUX=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_BROADCAST=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_SYN_COOKIES=y
+CONFIG_NET_IPVTI=m
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_ESPINTCP=y
+CONFIG_INET_IPCOMP=m
+CONFIG_INET_DIAG=m
+CONFIG_INET_UDP_DIAG=m
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_CONG_HSTCP=m
+CONFIG_TCP_CONG_HYBLA=m
+CONFIG_TCP_CONG_SCALABLE=m
+CONFIG_TCP_CONG_LP=m
+CONFIG_TCP_CONG_VENO=m
+CONFIG_TCP_CONG_YEAH=m
+CONFIG_TCP_CONG_ILLINOIS=m
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_ESPINTCP=y
+CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_MIP6=m
+CONFIG_IPV6_VTI=m
+CONFIG_IPV6_SIT=m
+CONFIG_IPV6_GRE=m
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IPV6_SUBTREES=y
+CONFIG_IPV6_RPL_LWTUNNEL=y
+CONFIG_MPTCP=y
+CONFIG_NETFILTER=y
+CONFIG_BRIDGE_NETFILTER=m
+CONFIG_NETFILTER_NETLINK_HOOK=m
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_PROCFS=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CONNTRACK_TIMEOUT=y
+CONFIG_NF_CONNTRACK_TIMESTAMP=y
+CONFIG_NF_CONNTRACK_AMANDA=m
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_H323=m
+CONFIG_NF_CONNTRACK_IRC=m
+CONFIG_NF_CONNTRACK_NETBIOS_NS=m
+CONFIG_NF_CONNTRACK_SNMP=m
+CONFIG_NF_CONNTRACK_PPTP=m
+CONFIG_NF_CONNTRACK_SANE=m
+CONFIG_NF_CONNTRACK_SIP=m
+CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_CT_NETLINK=m
+CONFIG_NF_CT_NETLINK_TIMEOUT=m
+CONFIG_NF_TABLES=m
+CONFIG_NF_TABLES_INET=y
+CONFIG_NFT_CT=m
+CONFIG_NFT_LOG=m
+CONFIG_NFT_LIMIT=m
+CONFIG_NFT_NAT=m
+CONFIG_NFT_REJECT=m
+CONFIG_NFT_COMPAT=m
+CONFIG_NFT_HASH=m
+CONFIG_NFT_FIB_INET=m
+CONFIG_NETFILTER_XTABLES_COMPAT=y
+CONFIG_NETFILTER_XT_SET=m
+CONFIG_NETFILTER_XT_TARGET_AUDIT=m
+CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
+CONFIG_NETFILTER_XT_TARGET_CT=m
+CONFIG_NETFILTER_XT_TARGET_DSCP=m
+CONFIG_NETFILTER_XT_TARGET_HMARK=m
+CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
+CONFIG_NETFILTER_XT_TARGET_LOG=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_TARGET_NFLOG=m
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_TEE=m
+CONFIG_NETFILTER_XT_TARGET_TPROXY=m
+CONFIG_NETFILTER_XT_TARGET_TRACE=m
+CONFIG_NETFILTER_XT_TARGET_SECMARK=m
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
+CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
+CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
+CONFIG_NETFILTER_XT_MATCH_BPF=m
+CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
+CONFIG_NETFILTER_XT_MATCH_COMMENT=m
+CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
+CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NETFILTER_XT_MATCH_CPU=m
+CONFIG_NETFILTER_XT_MATCH_DCCP=m
+CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m
+CONFIG_NETFILTER_XT_MATCH_DSCP=m
+CONFIG_NETFILTER_XT_MATCH_ESP=m
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
+CONFIG_NETFILTER_XT_MATCH_IPVS=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_LIMIT=m
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_MARK=m
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
+CONFIG_NETFILTER_XT_MATCH_NFACCT=m
+CONFIG_NETFILTER_XT_MATCH_OSF=m
+CONFIG_NETFILTER_XT_MATCH_OWNER=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
+CONFIG_NETFILTER_XT_MATCH_QUOTA=m
+CONFIG_NETFILTER_XT_MATCH_RATEEST=m
+CONFIG_NETFILTER_XT_MATCH_REALM=m
+CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_TIME=m
+CONFIG_NETFILTER_XT_MATCH_U32=m
+CONFIG_IP_SET=m
+CONFIG_IP_SET_BITMAP_IP=m
+CONFIG_IP_SET_BITMAP_IPMAC=m
+CONFIG_IP_SET_BITMAP_PORT=m
+CONFIG_IP_SET_HASH_IP=m
+CONFIG_IP_SET_HASH_IPPORT=m
+CONFIG_IP_SET_HASH_IPPORTIP=m
+CONFIG_IP_SET_HASH_IPPORTNET=m
+CONFIG_IP_SET_HASH_NETPORTNET=m
+CONFIG_IP_SET_HASH_NET=m
+CONFIG_IP_SET_HASH_NETNET=m
+CONFIG_IP_SET_HASH_NETPORT=m
+CONFIG_IP_SET_HASH_NETIFACE=m
+CONFIG_IP_SET_LIST_SET=m
+CONFIG_IP_VS=m
+CONFIG_IP_VS_PROTO_TCP=y
+CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_PROTO_ESP=y
+CONFIG_IP_VS_PROTO_AH=y
+CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
+CONFIG_IP_VS_LC=m
+CONFIG_IP_VS_WLC=m
+CONFIG_IP_VS_LBLC=m
+CONFIG_IP_VS_LBLCR=m
+CONFIG_IP_VS_DH=m
+CONFIG_IP_VS_SH=m
+CONFIG_IP_VS_SED=m
+CONFIG_IP_VS_NQ=m
+CONFIG_IP_VS_TWOS=m
+CONFIG_IP_VS_FTP=m
+CONFIG_IP_VS_PE_SIP=m
+CONFIG_NFT_FIB_IPV4=m
+CONFIG_NF_TABLES_ARP=y
+CONFIG_NF_LOG_IPV4=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_AH=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_RPFILTER=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_SECURITY=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+CONFIG_NFT_FIB_IPV6=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_AH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_MH=m
+CONFIG_IP6_NF_MATCH_RPFILTER=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_TARGET_HL=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_RAW=m
+CONFIG_IP6_NF_SECURITY=m
+CONFIG_IP6_NF_NAT=m
+CONFIG_IP6_NF_TARGET_MASQUERADE=m
+CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_RDS=m
+CONFIG_RDS_RDMA=m
+CONFIG_RDS_TCP=m
+CONFIG_RDS_DEBUG=y
+CONFIG_L2TP=m
+CONFIG_L2TP_DEBUGFS=m
+CONFIG_L2TP_V3=y
+CONFIG_L2TP_IP=m
+CONFIG_L2TP_ETH=m
+CONFIG_BRIDGE=y
+CONFIG_BRIDGE_MRP=y
+CONFIG_VLAN_8021Q=m
+CONFIG_VLAN_8021Q_GVRP=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_HFSC=m
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_MULTIQ=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFB=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_SCH_DRR=m
+CONFIG_NET_SCH_MQPRIO=m
+CONFIG_NET_SCH_CHOKE=m
+CONFIG_NET_SCH_QFQ=m
+CONFIG_NET_SCH_CODEL=m
+CONFIG_NET_SCH_FQ_CODEL=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_SCH_PLUG=m
+CONFIG_NET_SCH_ETS=m
+CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_CLS_U32_PERF=y
+CONFIG_CLS_U32_MARK=y
+CONFIG_NET_CLS_FLOW=m
+CONFIG_NET_CLS_CGROUP=y
+CONFIG_NET_CLS_BPF=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_ACT_POLICE=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_GACT_PROB=y
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_IPT=m
+CONFIG_NET_ACT_NAT=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_SIMP=m
+CONFIG_NET_ACT_SKBEDIT=m
+CONFIG_NET_ACT_CSUM=m
+CONFIG_NET_ACT_GATE=m
+CONFIG_NET_TC_SKB_EXT=y
+CONFIG_DNS_RESOLVER=y
+CONFIG_OPENVSWITCH=m
+CONFIG_VSOCKETS=m
+CONFIG_VIRTIO_VSOCKETS=m
+CONFIG_NETLINK_DIAG=m
+CONFIG_NET_SWITCHDEV=y
+CONFIG_CGROUP_NET_PRIO=y
+CONFIG_NET_PKTGEN=m
+CONFIG_PCI=y
+# CONFIG_PCIEASPM is not set
+CONFIG_PCI_DEBUG=y
+CONFIG_PCI_IOV=y
+CONFIG_HOTPLUG_PCI=y
+CONFIG_HOTPLUG_PCI_S390=y
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_SAFE=y
+# CONFIG_FW_LOADER is not set
+CONFIG_CONNECTOR=y
+CONFIG_ZRAM=y
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_DRBD=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=32768
+CONFIG_VIRTIO_BLK=y
+CONFIG_BLK_DEV_RBD=m
+CONFIG_BLK_DEV_NVME=m
+CONFIG_ENCLOSURE_SERVICES=m
+CONFIG_GENWQE=m
+CONFIG_RAID_ATTRS=m
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=m
+CONFIG_BLK_DEV_SR=m
+CONFIG_CHR_DEV_SG=y
+CONFIG_CHR_DEV_SCH=m
+CONFIG_SCSI_ENCLOSURE=m
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_SPI_ATTRS=m
+CONFIG_SCSI_FC_ATTRS=m
+CONFIG_SCSI_SAS_LIBSAS=m
+CONFIG_SCSI_SRP_ATTRS=m
+CONFIG_ISCSI_TCP=m
+CONFIG_SCSI_DEBUG=m
+CONFIG_ZFCP=m
+CONFIG_SCSI_VIRTIO=m
+CONFIG_SCSI_DH=y
+CONFIG_SCSI_DH_RDAC=m
+CONFIG_SCSI_DH_HP_SW=m
+CONFIG_SCSI_DH_EMC=m
+CONFIG_SCSI_DH_ALUA=m
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+# CONFIG_MD_BITMAP_FILE is not set
+CONFIG_MD_LINEAR=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_MD_FAULTY=m
+CONFIG_MD_CLUSTER=m
+CONFIG_BCACHE=m
+CONFIG_BLK_DEV_DM=y
+CONFIG_DM_UNSTRIPED=m
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_THIN_PROVISIONING=m
+CONFIG_DM_WRITECACHE=m
+CONFIG_DM_CLONE=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_LOG_USERSPACE=m
+CONFIG_DM_RAID=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_DM_MULTIPATH_QL=m
+CONFIG_DM_MULTIPATH_ST=m
+CONFIG_DM_MULTIPATH_HST=m
+CONFIG_DM_MULTIPATH_IOA=m
+CONFIG_DM_DELAY=m
+CONFIG_DM_INIT=y
+CONFIG_DM_UEVENT=y
+CONFIG_DM_FLAKEY=m
+CONFIG_DM_VERITY=m
+CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y
+CONFIG_DM_SWITCH=m
+CONFIG_DM_INTEGRITY=m
+CONFIG_NETDEVICES=y
+CONFIG_BONDING=m
+CONFIG_DUMMY=m
+CONFIG_EQUALIZER=m
+CONFIG_IFB=m
+CONFIG_MACVLAN=m
+CONFIG_MACVTAP=m
+CONFIG_VXLAN=m
+CONFIG_BAREUDP=m
+CONFIG_AMT=m
+CONFIG_TUN=m
+CONFIG_VETH=m
+CONFIG_VIRTIO_NET=m
+CONFIG_NLMON=m
+# CONFIG_NET_VENDOR_3COM is not set
+# CONFIG_NET_VENDOR_ADAPTEC is not set
+# CONFIG_NET_VENDOR_AGERE is not set
+# CONFIG_NET_VENDOR_ALACRITECH is not set
+# CONFIG_NET_VENDOR_ALTEON is not set
+# CONFIG_NET_VENDOR_AMAZON is not set
+# CONFIG_NET_VENDOR_AMD is not set
+# CONFIG_NET_VENDOR_AQUANTIA is not set
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_ASIX is not set
+# CONFIG_NET_VENDOR_ATHEROS is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_CADENCE is not set
+# CONFIG_NET_VENDOR_CAVIUM is not set
+# CONFIG_NET_VENDOR_CHELSIO is not set
+# CONFIG_NET_VENDOR_CISCO is not set
+# CONFIG_NET_VENDOR_CORTINA is not set
+# CONFIG_NET_VENDOR_DAVICOM is not set
+# CONFIG_NET_VENDOR_DEC is not set
+# CONFIG_NET_VENDOR_DLINK is not set
+# CONFIG_NET_VENDOR_EMULEX is not set
+# CONFIG_NET_VENDOR_ENGLEDER is not set
+# CONFIG_NET_VENDOR_EZCHIP is not set
+# CONFIG_NET_VENDOR_FUNGIBLE is not set
+# CONFIG_NET_VENDOR_GOOGLE is not set
+# CONFIG_NET_VENDOR_HUAWEI is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_LITEX is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+CONFIG_MLX4_EN=m
+CONFIG_MLX5_CORE=m
+CONFIG_MLX5_CORE_EN=y
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_MICROCHIP is not set
+# CONFIG_NET_VENDOR_MICROSEMI is not set
+# CONFIG_NET_VENDOR_MICROSOFT is not set
+# CONFIG_NET_VENDOR_MYRI is not set
+# CONFIG_NET_VENDOR_NI is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_NETERION is not set
+# CONFIG_NET_VENDOR_NETRONOME is not set
+# CONFIG_NET_VENDOR_NVIDIA is not set
+# CONFIG_NET_VENDOR_OKI is not set
+# CONFIG_NET_VENDOR_PACKET_ENGINES is not set
+# CONFIG_NET_VENDOR_PENSANDO is not set
+# CONFIG_NET_VENDOR_QLOGIC is not set
+# CONFIG_NET_VENDOR_BROCADE is not set
+# CONFIG_NET_VENDOR_QUALCOMM is not set
+# CONFIG_NET_VENDOR_RDC is not set
+# CONFIG_NET_VENDOR_REALTEK is not set
+# CONFIG_NET_VENDOR_RENESAS is not set
+# CONFIG_NET_VENDOR_ROCKER is not set
+# CONFIG_NET_VENDOR_SAMSUNG is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+# CONFIG_NET_VENDOR_SILAN is not set
+# CONFIG_NET_VENDOR_SIS is not set
+# CONFIG_NET_VENDOR_SOLARFLARE is not set
+# CONFIG_NET_VENDOR_SMSC is not set
+# CONFIG_NET_VENDOR_SOCIONEXT is not set
+# CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_SUN is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
+# CONFIG_NET_VENDOR_TEHUTI is not set
+# CONFIG_NET_VENDOR_TI is not set
+# CONFIG_NET_VENDOR_VERTEXCOM is not set
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WANGXUN is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_XILINX is not set
+CONFIG_PPP=m
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPP_FILTER=y
+CONFIG_PPP_MPPE=m
+CONFIG_PPP_MULTILINK=y
+CONFIG_PPPOE=m
+CONFIG_PPTP=m
+CONFIG_PPPOL2TP=m
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+CONFIG_ISM=m
+CONFIG_INPUT_EVDEV=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+CONFIG_LEGACY_PTY_COUNT=0
+# CONFIG_LEGACY_TIOCSTI is not set
+CONFIG_VIRTIO_CONSOLE=m
+CONFIG_HW_RANDOM_VIRTIO=m
+CONFIG_HANGCHECK_TIMER=m
+CONFIG_TN3270_FS=y
+CONFIG_PPS=m
+# CONFIG_PTP_1588_CLOCK is not set
+# CONFIG_HWMON is not set
+CONFIG_WATCHDOG=y
+CONFIG_WATCHDOG_NOWAYOUT=y
+CONFIG_SOFT_WATCHDOG=m
+CONFIG_DIAG288_WATCHDOG=m
+# CONFIG_DRM_DEBUG_MODESET_LOCK is not set
+CONFIG_FB=y
+# CONFIG_FB_DEVICE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
+# CONFIG_HID_SUPPORT is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_USER_ACCESS=m
+CONFIG_MLX4_INFINIBAND=m
+CONFIG_MLX5_INFINIBAND=m
+CONFIG_SYNC_FILE=y
+CONFIG_VFIO=m
+CONFIG_VFIO_PCI=m
+CONFIG_MLX5_VFIO_PCI=m
+CONFIG_VIRTIO_PCI=m
+CONFIG_VIRTIO_BALLOON=m
+CONFIG_VIRTIO_INPUT=y
+CONFIG_VHOST_NET=m
+CONFIG_VHOST_VSOCK=m
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_JBD2_DEBUG=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_SECURITY=y
+CONFIG_JFS_STATISTICS=y
+CONFIG_XFS_FS=y
+CONFIG_XFS_QUOTA=y
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_XFS_RT=y
+CONFIG_XFS_DEBUG=y
+CONFIG_GFS2_FS=m
+CONFIG_GFS2_FS_LOCKING_DLM=y
+CONFIG_OCFS2_FS=m
+CONFIG_BTRFS_FS=y
+CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_BTRFS_DEBUG=y
+CONFIG_BTRFS_ASSERT=y
+CONFIG_NILFS2_FS=m
+CONFIG_FS_DAX=y
+CONFIG_EXPORTFS_BLOCK_OPS=y
+CONFIG_FS_ENCRYPTION=y
+CONFIG_FS_VERITY=y
+CONFIG_FS_VERITY_BUILTIN_SIGNATURES=y
+CONFIG_FANOTIFY=y
+CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
+CONFIG_QUOTA_NETLINK_INTERFACE=y
+CONFIG_QUOTA_DEBUG=y
+CONFIG_QFMT_V1=m
+CONFIG_QFMT_V2=m
+CONFIG_AUTOFS_FS=m
+CONFIG_FUSE_FS=y
+CONFIG_CUSE=m
+CONFIG_VIRTIO_FS=m
+CONFIG_OVERLAY_FS=m
+CONFIG_NETFS_STATS=y
+CONFIG_FSCACHE=m
+CONFIG_CACHEFILES=m
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_EXFAT_FS=m
+CONFIG_NTFS_FS=m
+CONFIG_NTFS_RW=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_TMPFS_INODE64=y
+CONFIG_TMPFS_QUOTA=y
+CONFIG_HUGETLBFS=y
+CONFIG_ECRYPT_FS=m
+CONFIG_CRAMFS=m
+CONFIG_SQUASHFS=m
+CONFIG_SQUASHFS_CHOICE_DECOMP_BY_MOUNT=y
+CONFIG_SQUASHFS_XATTR=y
+CONFIG_SQUASHFS_LZ4=y
+CONFIG_SQUASHFS_LZO=y
+CONFIG_SQUASHFS_XZ=y
+CONFIG_SQUASHFS_ZSTD=y
+CONFIG_ROMFS_FS=m
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=m
+CONFIG_NFS_SWAP=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3_ACL=y
+CONFIG_NFSD_V4=y
+CONFIG_NFSD_V4_SECURITY_LABEL=y
+CONFIG_CIFS=m
+CONFIG_CIFS_UPCALL=y
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+# CONFIG_CIFS_DEBUG is not set
+CONFIG_CIFS_DFS_UPCALL=y
+CONFIG_CIFS_SWN_UPCALL=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_ASCII=m
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_UTF8=m
+CONFIG_DLM=m
+CONFIG_UNICODE=y
+CONFIG_PERSISTENT_KEYRINGS=y
+CONFIG_ENCRYPTED_KEYS=m
+CONFIG_KEY_NOTIFICATIONS=y
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_HARDENED_USERCOPY=y
+CONFIG_FORTIFY_SOURCE=y
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM=y
+CONFIG_SECURITY_LOCKDOWN_LSM=y
+CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y
+CONFIG_SECURITY_LANDLOCK=y
+CONFIG_INTEGRITY_SIGNATURE=y
+CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
+CONFIG_INTEGRITY_PLATFORM_KEYRING=y
+CONFIG_IMA=y
+CONFIG_IMA_DEFAULT_HASH_SHA256=y
+CONFIG_IMA_WRITE_POLICY=y
+CONFIG_IMA_APPRAISE=y
+CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor"
+CONFIG_INIT_STACK_NONE=y
+CONFIG_BUG_ON_DATA_CORRUPTION=y
+CONFIG_CRYPTO_USER=m
+# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
+CONFIG_CRYPTO_PCRYPT=m
+CONFIG_CRYPTO_CRYPTD=m
+CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_ECDSA=m
+CONFIG_CRYPTO_ECRDSA=m
+CONFIG_CRYPTO_SM2=m
+CONFIG_CRYPTO_CURVE25519=m
+CONFIG_CRYPTO_AES_TI=m
+CONFIG_CRYPTO_ANUBIS=m
+CONFIG_CRYPTO_ARIA=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_CAMELLIA=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_DES=m
+CONFIG_CRYPTO_FCRYPT=m
+CONFIG_CRYPTO_KHAZAD=m
+CONFIG_CRYPTO_SEED=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_SM4_GENERIC=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_ADIANTUM=m
+CONFIG_CRYPTO_ARC4=m
+CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_HCTR2=m
+CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_LRW=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_AEGIS128=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_GCM=y
+CONFIG_CRYPTO_SEQIV=y
+CONFIG_CRYPTO_MD4=m
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_RMD160=m
+CONFIG_CRYPTO_SM3_GENERIC=m
+CONFIG_CRYPTO_VMAC=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_XCBC=m
+CONFIG_CRYPTO_CRC32=m
+CONFIG_CRYPTO_842=m
+CONFIG_CRYPTO_LZ4=m
+CONFIG_CRYPTO_LZ4HC=m
+CONFIG_CRYPTO_ZSTD=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
+CONFIG_CRYPTO_USER_API_HASH=m
+CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
+CONFIG_CRYPTO_USER_API_AEAD=m
+CONFIG_CRYPTO_STATS=y
+CONFIG_CRYPTO_CRC32_S390=y
+CONFIG_CRYPTO_SHA512_S390=m
+CONFIG_CRYPTO_SHA1_S390=m
+CONFIG_CRYPTO_SHA256_S390=m
+CONFIG_CRYPTO_SHA3_256_S390=m
+CONFIG_CRYPTO_SHA3_512_S390=m
+CONFIG_CRYPTO_GHASH_S390=m
+CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_DES_S390=m
+CONFIG_CRYPTO_CHACHA_S390=m
+CONFIG_ZCRYPT=m
+CONFIG_PKEY=m
+CONFIG_CRYPTO_PAES_S390=m
+CONFIG_CRYPTO_DEV_VIRTIO=m
+CONFIG_SYSTEM_BLACKLIST_KEYRING=y
+CONFIG_CORDIC=m
+CONFIG_CRYPTO_LIB_CURVE25519=m
+CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
+CONFIG_CRC32_SELFTEST=y
+CONFIG_CRC4=m
+CONFIG_CRC7=m
+CONFIG_CRC8=m
+CONFIG_RANDOM32_SELFTEST=y
+CONFIG_XZ_DEC_MICROLZMA=y
+CONFIG_DMA_CMA=y
+CONFIG_CMA_SIZE_MBYTES=0
+CONFIG_PRINTK_TIME=y
+CONFIG_DYNAMIC_DEBUG=y
+CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_GDB_SCRIPTS=y
+CONFIG_HEADERS_INSTALL=y
+CONFIG_DEBUG_SECTION_MISMATCH=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_PAGEALLOC=y
+CONFIG_SLUB_DEBUG_ON=y
+CONFIG_PAGE_OWNER=y
+CONFIG_DEBUG_RODATA_TEST=y
+CONFIG_DEBUG_WX=y
+CONFIG_PTDUMP_DEBUGFS=y
+CONFIG_DEBUG_OBJECTS=y
+CONFIG_DEBUG_OBJECTS_SELFTEST=y
+CONFIG_DEBUG_OBJECTS_FREE=y
+CONFIG_DEBUG_OBJECTS_TIMERS=y
+CONFIG_DEBUG_OBJECTS_WORK=y
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
+CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER=y
+CONFIG_DEBUG_STACK_USAGE=y
+CONFIG_DEBUG_VM=y
+CONFIG_DEBUG_VM_PGFLAGS=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
+CONFIG_DEBUG_PER_CPU_MAPS=y
+CONFIG_KFENCE=y
+CONFIG_KFENCE_DEFERRABLE=y
+CONFIG_KFENCE_STATIC_KEYS=y
+CONFIG_DEBUG_SHIRQ=y
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_WQ_WATCHDOG=y
+CONFIG_TEST_LOCKUP=m
+CONFIG_DEBUG_PREEMPT=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_LOCK_STAT=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_DEBUG_LOCKING_API_SELFTESTS=y
+CONFIG_DEBUG_IRQFLAGS=y
+CONFIG_DEBUG_LIST=y
+CONFIG_DEBUG_SG=y
+CONFIG_DEBUG_NOTIFIERS=y
+CONFIG_RCU_TORTURE_TEST=m
+CONFIG_RCU_REF_SCALE_TEST=m
+CONFIG_RCU_CPU_STALL_TIMEOUT=300
+# CONFIG_RCU_TRACE is not set
+CONFIG_LATENCYTOP=y
+CONFIG_BOOTTIME_TRACING=y
+CONFIG_FUNCTION_GRAPH_RETVAL=y
+CONFIG_FPROBE=y
+CONFIG_FUNCTION_PROFILER=y
+CONFIG_STACK_TRACER=y
+CONFIG_IRQSOFF_TRACER=y
+CONFIG_PREEMPT_TRACER=y
+CONFIG_SCHED_TRACER=y
+CONFIG_FTRACE_SYSCALLS=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_USER_EVENTS=y
+CONFIG_HIST_TRIGGERS=y
+CONFIG_FTRACE_STARTUP_TEST=y
+# CONFIG_EVENT_TRACE_STARTUP_TEST is not set
+CONFIG_SAMPLES=y
+CONFIG_SAMPLE_TRACE_PRINTK=m
+CONFIG_SAMPLE_FTRACE_DIRECT=m
+CONFIG_SAMPLE_FTRACE_DIRECT_MULTI=m
+CONFIG_SAMPLE_FTRACE_OPS=m
+CONFIG_DEBUG_ENTRY=y
+CONFIG_CIO_INJECT=y
+CONFIG_KUNIT=m
+CONFIG_KUNIT_DEBUGFS=y
+CONFIG_NOTIFIER_ERROR_INJECTION=m
+CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m
+CONFIG_FAULT_INJECTION=y
+CONFIG_FAILSLAB=y
+CONFIG_FAIL_PAGE_ALLOC=y
+CONFIG_FAULT_INJECTION_USERCOPY=y
+CONFIG_FAIL_MAKE_REQUEST=y
+CONFIG_FAIL_IO_TIMEOUT=y
+CONFIG_FAIL_FUTEX=y
+CONFIG_FAULT_INJECTION_DEBUG_FS=y
+CONFIG_FAULT_INJECTION_CONFIGFS=y
+CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y
+CONFIG_LKDTM=m
+CONFIG_TEST_MIN_HEAP=y
+CONFIG_KPROBES_SANITY_TEST=m
+CONFIG_RBTREE_TEST=y
+CONFIG_INTERVAL_TREE_TEST=m
+CONFIG_PERCPU_TEST=m
+CONFIG_ATOMIC64_SELFTEST=y
+CONFIG_STRING_SELFTEST=y
+CONFIG_TEST_BITOPS=m
+CONFIG_TEST_BPF=m
+CONFIG_TEST_LIVEPATCH=m
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
new file mode 100644
index 0000000000..1b8150e50f
--- /dev/null
+++ b/arch/s390/configs/defconfig
@@ -0,0 +1,816 @@
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_WATCH_QUEUE=y
+CONFIG_AUDIT=y
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_BPF_JIT=y
+CONFIG_BPF_JIT_ALWAYS_ON=y
+CONFIG_BPF_LSM=y
+CONFIG_SCHED_CORE=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BSD_PROCESS_ACCT_V3=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NUMA_BALANCING=y
+CONFIG_MEMCG=y
+CONFIG_BLK_CGROUP=y
+CONFIG_CFS_BANDWIDTH=y
+CONFIG_CGROUP_PIDS=y
+CONFIG_CGROUP_RDMA=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_HUGETLB=y
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_PERF=y
+CONFIG_CGROUP_BPF=y
+CONFIG_CGROUP_MISC=y
+CONFIG_NAMESPACES=y
+CONFIG_USER_NS=y
+CONFIG_CHECKPOINT_RESTORE=y
+CONFIG_SCHED_AUTOGROUP=y
+CONFIG_EXPERT=y
+# CONFIG_SYSFS_SYSCALL is not set
+CONFIG_PROFILING=y
+CONFIG_KEXEC_FILE=y
+CONFIG_KEXEC_SIG=y
+CONFIG_CRASH_DUMP=y
+CONFIG_LIVEPATCH=y
+CONFIG_MARCH_ZEC12=y
+CONFIG_TUNE_ZEC12=y
+CONFIG_NR_CPUS=512
+CONFIG_NUMA=y
+CONFIG_HZ_100=y
+CONFIG_CERT_STORE=y
+CONFIG_EXPOLINE=y
+CONFIG_EXPOLINE_AUTO=y
+CONFIG_CHSC_SCH=y
+CONFIG_VFIO_CCW=m
+CONFIG_VFIO_AP=m
+CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y
+CONFIG_CMM=m
+CONFIG_APPLDATA_BASE=y
+CONFIG_S390_HYPFS_FS=y
+CONFIG_KVM=m
+CONFIG_S390_UNWIND_SELFTEST=m
+CONFIG_S390_KPROBES_SANITY_TEST=m
+CONFIG_S390_MODULES_SANITY_TEST=m
+CONFIG_KPROBES=y
+CONFIG_JUMP_LABEL=y
+# CONFIG_GCC_PLUGINS is not set
+CONFIG_MODULES=y
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_MODULE_UNLOAD_TAINT_TRACKING=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_MODULE_SIG_SHA256=y
+CONFIG_BLK_DEV_THROTTLING=y
+CONFIG_BLK_WBT=y
+CONFIG_BLK_CGROUP_IOLATENCY=y
+CONFIG_BLK_CGROUP_IOCOST=y
+CONFIG_BLK_CGROUP_IOPRIO=y
+CONFIG_BLK_INLINE_ENCRYPTION=y
+CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_IBM_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_IOSCHED_BFQ=y
+CONFIG_BINFMT_MISC=m
+CONFIG_ZSWAP=y
+CONFIG_ZSMALLOC_STAT=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTREMOVE=y
+CONFIG_KSM=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_CMA_SYSFS=y
+CONFIG_CMA_AREAS=7
+CONFIG_MEM_SOFT_DIRTY=y
+CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
+CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_PERCPU_STATS=y
+CONFIG_ANON_VMA_NAME=y
+CONFIG_USERFAULTFD=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_PACKET_DIAG=m
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=m
+CONFIG_XFRM_USER=m
+CONFIG_NET_KEY=m
+CONFIG_SMC=m
+CONFIG_SMC_DIAG=m
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_NET_IPIP=m
+CONFIG_NET_IPGRE_DEMUX=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_BROADCAST=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_SYN_COOKIES=y
+CONFIG_NET_IPVTI=m
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_ESPINTCP=y
+CONFIG_INET_IPCOMP=m
+CONFIG_INET_DIAG=m
+CONFIG_INET_UDP_DIAG=m
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_CONG_HSTCP=m
+CONFIG_TCP_CONG_HYBLA=m
+CONFIG_TCP_CONG_SCALABLE=m
+CONFIG_TCP_CONG_LP=m
+CONFIG_TCP_CONG_VENO=m
+CONFIG_TCP_CONG_YEAH=m
+CONFIG_TCP_CONG_ILLINOIS=m
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_ESPINTCP=y
+CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_MIP6=m
+CONFIG_IPV6_VTI=m
+CONFIG_IPV6_SIT=m
+CONFIG_IPV6_GRE=m
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IPV6_SUBTREES=y
+CONFIG_IPV6_RPL_LWTUNNEL=y
+CONFIG_MPTCP=y
+CONFIG_NETFILTER=y
+CONFIG_BRIDGE_NETFILTER=m
+CONFIG_NETFILTER_NETLINK_HOOK=m
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_PROCFS=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CONNTRACK_TIMEOUT=y
+CONFIG_NF_CONNTRACK_TIMESTAMP=y
+CONFIG_NF_CONNTRACK_AMANDA=m
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_H323=m
+CONFIG_NF_CONNTRACK_IRC=m
+CONFIG_NF_CONNTRACK_NETBIOS_NS=m
+CONFIG_NF_CONNTRACK_SNMP=m
+CONFIG_NF_CONNTRACK_PPTP=m
+CONFIG_NF_CONNTRACK_SANE=m
+CONFIG_NF_CONNTRACK_SIP=m
+CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_CT_NETLINK=m
+CONFIG_NF_CT_NETLINK_TIMEOUT=m
+CONFIG_NF_TABLES=m
+CONFIG_NF_TABLES_INET=y
+CONFIG_NFT_CT=m
+CONFIG_NFT_LOG=m
+CONFIG_NFT_LIMIT=m
+CONFIG_NFT_NAT=m
+CONFIG_NFT_REJECT=m
+CONFIG_NFT_COMPAT=m
+CONFIG_NFT_HASH=m
+CONFIG_NFT_FIB_INET=m
+CONFIG_NETFILTER_XTABLES_COMPAT=y
+CONFIG_NETFILTER_XT_SET=m
+CONFIG_NETFILTER_XT_TARGET_AUDIT=m
+CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
+CONFIG_NETFILTER_XT_TARGET_CT=m
+CONFIG_NETFILTER_XT_TARGET_DSCP=m
+CONFIG_NETFILTER_XT_TARGET_HMARK=m
+CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
+CONFIG_NETFILTER_XT_TARGET_LOG=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_TARGET_NFLOG=m
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_TEE=m
+CONFIG_NETFILTER_XT_TARGET_TPROXY=m
+CONFIG_NETFILTER_XT_TARGET_TRACE=m
+CONFIG_NETFILTER_XT_TARGET_SECMARK=m
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
+CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
+CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
+CONFIG_NETFILTER_XT_MATCH_BPF=m
+CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
+CONFIG_NETFILTER_XT_MATCH_COMMENT=m
+CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
+CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NETFILTER_XT_MATCH_CPU=m
+CONFIG_NETFILTER_XT_MATCH_DCCP=m
+CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m
+CONFIG_NETFILTER_XT_MATCH_DSCP=m
+CONFIG_NETFILTER_XT_MATCH_ESP=m
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
+CONFIG_NETFILTER_XT_MATCH_IPVS=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_LIMIT=m
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_MARK=m
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
+CONFIG_NETFILTER_XT_MATCH_NFACCT=m
+CONFIG_NETFILTER_XT_MATCH_OSF=m
+CONFIG_NETFILTER_XT_MATCH_OWNER=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
+CONFIG_NETFILTER_XT_MATCH_QUOTA=m
+CONFIG_NETFILTER_XT_MATCH_RATEEST=m
+CONFIG_NETFILTER_XT_MATCH_REALM=m
+CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_TIME=m
+CONFIG_NETFILTER_XT_MATCH_U32=m
+CONFIG_IP_SET=m
+CONFIG_IP_SET_BITMAP_IP=m
+CONFIG_IP_SET_BITMAP_IPMAC=m
+CONFIG_IP_SET_BITMAP_PORT=m
+CONFIG_IP_SET_HASH_IP=m
+CONFIG_IP_SET_HASH_IPPORT=m
+CONFIG_IP_SET_HASH_IPPORTIP=m
+CONFIG_IP_SET_HASH_IPPORTNET=m
+CONFIG_IP_SET_HASH_NETPORTNET=m
+CONFIG_IP_SET_HASH_NET=m
+CONFIG_IP_SET_HASH_NETNET=m
+CONFIG_IP_SET_HASH_NETPORT=m
+CONFIG_IP_SET_HASH_NETIFACE=m
+CONFIG_IP_SET_LIST_SET=m
+CONFIG_IP_VS=m
+CONFIG_IP_VS_PROTO_TCP=y
+CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_PROTO_ESP=y
+CONFIG_IP_VS_PROTO_AH=y
+CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
+CONFIG_IP_VS_LC=m
+CONFIG_IP_VS_WLC=m
+CONFIG_IP_VS_LBLC=m
+CONFIG_IP_VS_LBLCR=m
+CONFIG_IP_VS_DH=m
+CONFIG_IP_VS_SH=m
+CONFIG_IP_VS_SED=m
+CONFIG_IP_VS_NQ=m
+CONFIG_IP_VS_TWOS=m
+CONFIG_IP_VS_FTP=m
+CONFIG_IP_VS_PE_SIP=m
+CONFIG_NFT_FIB_IPV4=m
+CONFIG_NF_TABLES_ARP=y
+CONFIG_NF_LOG_IPV4=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_AH=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_RPFILTER=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_SECURITY=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+CONFIG_NFT_FIB_IPV6=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_AH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_MH=m
+CONFIG_IP6_NF_MATCH_RPFILTER=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_TARGET_HL=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_RAW=m
+CONFIG_IP6_NF_SECURITY=m
+CONFIG_IP6_NF_NAT=m
+CONFIG_IP6_NF_TARGET_MASQUERADE=m
+CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_RDS=m
+CONFIG_RDS_RDMA=m
+CONFIG_RDS_TCP=m
+CONFIG_L2TP=m
+CONFIG_L2TP_DEBUGFS=m
+CONFIG_L2TP_V3=y
+CONFIG_L2TP_IP=m
+CONFIG_L2TP_ETH=m
+CONFIG_BRIDGE=y
+CONFIG_BRIDGE_MRP=y
+CONFIG_VLAN_8021Q=m
+CONFIG_VLAN_8021Q_GVRP=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_HFSC=m
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_MULTIQ=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFB=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_SCH_DRR=m
+CONFIG_NET_SCH_MQPRIO=m
+CONFIG_NET_SCH_CHOKE=m
+CONFIG_NET_SCH_QFQ=m
+CONFIG_NET_SCH_CODEL=m
+CONFIG_NET_SCH_FQ_CODEL=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_SCH_PLUG=m
+CONFIG_NET_SCH_ETS=m
+CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_CLS_U32_PERF=y
+CONFIG_CLS_U32_MARK=y
+CONFIG_NET_CLS_FLOW=m
+CONFIG_NET_CLS_CGROUP=y
+CONFIG_NET_CLS_BPF=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_ACT_POLICE=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_GACT_PROB=y
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_IPT=m
+CONFIG_NET_ACT_NAT=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_SIMP=m
+CONFIG_NET_ACT_SKBEDIT=m
+CONFIG_NET_ACT_CSUM=m
+CONFIG_NET_ACT_GATE=m
+CONFIG_NET_TC_SKB_EXT=y
+CONFIG_DNS_RESOLVER=y
+CONFIG_OPENVSWITCH=m
+CONFIG_VSOCKETS=m
+CONFIG_VIRTIO_VSOCKETS=m
+CONFIG_NETLINK_DIAG=m
+CONFIG_NET_SWITCHDEV=y
+CONFIG_CGROUP_NET_PRIO=y
+CONFIG_NET_PKTGEN=m
+CONFIG_PCI=y
+# CONFIG_PCIEASPM is not set
+CONFIG_PCI_IOV=y
+CONFIG_HOTPLUG_PCI=y
+CONFIG_HOTPLUG_PCI_S390=y
+CONFIG_UEVENT_HELPER=y
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_SAFE=y
+# CONFIG_FW_LOADER is not set
+CONFIG_CONNECTOR=y
+CONFIG_ZRAM=y
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_DRBD=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=32768
+CONFIG_VIRTIO_BLK=y
+CONFIG_BLK_DEV_RBD=m
+CONFIG_BLK_DEV_NVME=m
+CONFIG_ENCLOSURE_SERVICES=m
+CONFIG_GENWQE=m
+CONFIG_RAID_ATTRS=m
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=m
+CONFIG_BLK_DEV_SR=m
+CONFIG_CHR_DEV_SG=y
+CONFIG_CHR_DEV_SCH=m
+CONFIG_SCSI_ENCLOSURE=m
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_SPI_ATTRS=m
+CONFIG_SCSI_FC_ATTRS=m
+CONFIG_SCSI_SAS_LIBSAS=m
+CONFIG_SCSI_SRP_ATTRS=m
+CONFIG_ISCSI_TCP=m
+CONFIG_SCSI_DEBUG=m
+CONFIG_ZFCP=m
+CONFIG_SCSI_VIRTIO=m
+CONFIG_SCSI_DH=y
+CONFIG_SCSI_DH_RDAC=m
+CONFIG_SCSI_DH_HP_SW=m
+CONFIG_SCSI_DH_EMC=m
+CONFIG_SCSI_DH_ALUA=m
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+# CONFIG_MD_BITMAP_FILE is not set
+CONFIG_MD_LINEAR=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_MD_FAULTY=m
+CONFIG_MD_CLUSTER=m
+CONFIG_BCACHE=m
+CONFIG_BLK_DEV_DM=y
+CONFIG_DM_UNSTRIPED=m
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_THIN_PROVISIONING=m
+CONFIG_DM_WRITECACHE=m
+CONFIG_DM_CLONE=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_LOG_USERSPACE=m
+CONFIG_DM_RAID=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_DM_MULTIPATH_QL=m
+CONFIG_DM_MULTIPATH_ST=m
+CONFIG_DM_MULTIPATH_HST=m
+CONFIG_DM_MULTIPATH_IOA=m
+CONFIG_DM_DELAY=m
+CONFIG_DM_INIT=y
+CONFIG_DM_UEVENT=y
+CONFIG_DM_FLAKEY=m
+CONFIG_DM_VERITY=m
+CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y
+CONFIG_DM_SWITCH=m
+CONFIG_DM_INTEGRITY=m
+CONFIG_NETDEVICES=y
+CONFIG_BONDING=m
+CONFIG_DUMMY=m
+CONFIG_EQUALIZER=m
+CONFIG_IFB=m
+CONFIG_MACVLAN=m
+CONFIG_MACVTAP=m
+CONFIG_VXLAN=m
+CONFIG_BAREUDP=m
+CONFIG_AMT=m
+CONFIG_TUN=m
+CONFIG_VETH=m
+CONFIG_VIRTIO_NET=m
+CONFIG_NLMON=m
+# CONFIG_NET_VENDOR_3COM is not set
+# CONFIG_NET_VENDOR_ADAPTEC is not set
+# CONFIG_NET_VENDOR_AGERE is not set
+# CONFIG_NET_VENDOR_ALACRITECH is not set
+# CONFIG_NET_VENDOR_ALTEON is not set
+# CONFIG_NET_VENDOR_AMAZON is not set
+# CONFIG_NET_VENDOR_AMD is not set
+# CONFIG_NET_VENDOR_AQUANTIA is not set
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_ASIX is not set
+# CONFIG_NET_VENDOR_ATHEROS is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_CADENCE is not set
+# CONFIG_NET_VENDOR_CAVIUM is not set
+# CONFIG_NET_VENDOR_CHELSIO is not set
+# CONFIG_NET_VENDOR_CISCO is not set
+# CONFIG_NET_VENDOR_CORTINA is not set
+# CONFIG_NET_VENDOR_DAVICOM is not set
+# CONFIG_NET_VENDOR_DEC is not set
+# CONFIG_NET_VENDOR_DLINK is not set
+# CONFIG_NET_VENDOR_EMULEX is not set
+# CONFIG_NET_VENDOR_ENGLEDER is not set
+# CONFIG_NET_VENDOR_EZCHIP is not set
+# CONFIG_NET_VENDOR_FUNGIBLE is not set
+# CONFIG_NET_VENDOR_GOOGLE is not set
+# CONFIG_NET_VENDOR_HUAWEI is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_LITEX is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+CONFIG_MLX4_EN=m
+CONFIG_MLX5_CORE=m
+CONFIG_MLX5_CORE_EN=y
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_MICROCHIP is not set
+# CONFIG_NET_VENDOR_MICROSEMI is not set
+# CONFIG_NET_VENDOR_MICROSOFT is not set
+# CONFIG_NET_VENDOR_MYRI is not set
+# CONFIG_NET_VENDOR_NI is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_NETERION is not set
+# CONFIG_NET_VENDOR_NETRONOME is not set
+# CONFIG_NET_VENDOR_NVIDIA is not set
+# CONFIG_NET_VENDOR_OKI is not set
+# CONFIG_NET_VENDOR_PACKET_ENGINES is not set
+# CONFIG_NET_VENDOR_PENSANDO is not set
+# CONFIG_NET_VENDOR_QLOGIC is not set
+# CONFIG_NET_VENDOR_BROCADE is not set
+# CONFIG_NET_VENDOR_QUALCOMM is not set
+# CONFIG_NET_VENDOR_RDC is not set
+# CONFIG_NET_VENDOR_REALTEK is not set
+# CONFIG_NET_VENDOR_RENESAS is not set
+# CONFIG_NET_VENDOR_ROCKER is not set
+# CONFIG_NET_VENDOR_SAMSUNG is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+# CONFIG_NET_VENDOR_SILAN is not set
+# CONFIG_NET_VENDOR_SIS is not set
+# CONFIG_NET_VENDOR_SOLARFLARE is not set
+# CONFIG_NET_VENDOR_SMSC is not set
+# CONFIG_NET_VENDOR_SOCIONEXT is not set
+# CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_SUN is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
+# CONFIG_NET_VENDOR_TEHUTI is not set
+# CONFIG_NET_VENDOR_TI is not set
+# CONFIG_NET_VENDOR_VERTEXCOM is not set
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WANGXUN is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_XILINX is not set
+CONFIG_PPP=m
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPP_FILTER=y
+CONFIG_PPP_MPPE=m
+CONFIG_PPP_MULTILINK=y
+CONFIG_PPPOE=m
+CONFIG_PPTP=m
+CONFIG_PPPOL2TP=m
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+CONFIG_ISM=m
+CONFIG_INPUT_EVDEV=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+CONFIG_LEGACY_PTY_COUNT=0
+# CONFIG_LEGACY_TIOCSTI is not set
+CONFIG_VIRTIO_CONSOLE=m
+CONFIG_HW_RANDOM_VIRTIO=m
+CONFIG_HANGCHECK_TIMER=m
+CONFIG_TN3270_FS=y
+# CONFIG_PTP_1588_CLOCK is not set
+# CONFIG_HWMON is not set
+CONFIG_WATCHDOG=y
+CONFIG_WATCHDOG_CORE=y
+CONFIG_WATCHDOG_NOWAYOUT=y
+CONFIG_SOFT_WATCHDOG=m
+CONFIG_DIAG288_WATCHDOG=m
+CONFIG_FB=y
+# CONFIG_FB_DEVICE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
+# CONFIG_HID_SUPPORT is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_USER_ACCESS=m
+CONFIG_MLX4_INFINIBAND=m
+CONFIG_MLX5_INFINIBAND=m
+CONFIG_SYNC_FILE=y
+CONFIG_VFIO=m
+CONFIG_VFIO_PCI=m
+CONFIG_MLX5_VFIO_PCI=m
+CONFIG_VIRTIO_PCI=m
+CONFIG_VIRTIO_BALLOON=m
+CONFIG_VIRTIO_INPUT=y
+CONFIG_VHOST_NET=m
+CONFIG_VHOST_VSOCK=m
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_JBD2_DEBUG=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_SECURITY=y
+CONFIG_JFS_STATISTICS=y
+CONFIG_XFS_FS=y
+CONFIG_XFS_QUOTA=y
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_XFS_RT=y
+CONFIG_GFS2_FS=m
+CONFIG_GFS2_FS_LOCKING_DLM=y
+CONFIG_OCFS2_FS=m
+CONFIG_BTRFS_FS=y
+CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_NILFS2_FS=m
+CONFIG_FS_DAX=y
+CONFIG_EXPORTFS_BLOCK_OPS=y
+CONFIG_FS_ENCRYPTION=y
+CONFIG_FS_VERITY=y
+CONFIG_FS_VERITY_BUILTIN_SIGNATURES=y
+CONFIG_FANOTIFY=y
+CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
+CONFIG_QUOTA_NETLINK_INTERFACE=y
+CONFIG_QFMT_V1=m
+CONFIG_QFMT_V2=m
+CONFIG_AUTOFS_FS=m
+CONFIG_FUSE_FS=y
+CONFIG_CUSE=m
+CONFIG_VIRTIO_FS=m
+CONFIG_OVERLAY_FS=m
+CONFIG_NETFS_STATS=y
+CONFIG_FSCACHE=m
+CONFIG_CACHEFILES=m
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_EXFAT_FS=m
+CONFIG_NTFS_FS=m
+CONFIG_NTFS_RW=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_TMPFS_INODE64=y
+CONFIG_TMPFS_QUOTA=y
+CONFIG_HUGETLBFS=y
+CONFIG_CONFIGFS_FS=m
+CONFIG_ECRYPT_FS=m
+CONFIG_CRAMFS=m
+CONFIG_SQUASHFS=m
+CONFIG_SQUASHFS_CHOICE_DECOMP_BY_MOUNT=y
+CONFIG_SQUASHFS_XATTR=y
+CONFIG_SQUASHFS_LZ4=y
+CONFIG_SQUASHFS_LZO=y
+CONFIG_SQUASHFS_XZ=y
+CONFIG_SQUASHFS_ZSTD=y
+CONFIG_ROMFS_FS=m
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=m
+CONFIG_NFS_SWAP=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3_ACL=y
+CONFIG_NFSD_V4=y
+CONFIG_NFSD_V4_SECURITY_LABEL=y
+CONFIG_CIFS=m
+CONFIG_CIFS_UPCALL=y
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+# CONFIG_CIFS_DEBUG is not set
+CONFIG_CIFS_DFS_UPCALL=y
+CONFIG_CIFS_SWN_UPCALL=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_ASCII=m
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_UTF8=m
+CONFIG_DLM=m
+CONFIG_UNICODE=y
+CONFIG_PERSISTENT_KEYRINGS=y
+CONFIG_ENCRYPTED_KEYS=m
+CONFIG_KEY_NOTIFICATIONS=y
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM=y
+CONFIG_SECURITY_LOCKDOWN_LSM=y
+CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y
+CONFIG_SECURITY_LANDLOCK=y
+CONFIG_INTEGRITY_SIGNATURE=y
+CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
+CONFIG_INTEGRITY_PLATFORM_KEYRING=y
+CONFIG_IMA=y
+CONFIG_IMA_DEFAULT_HASH_SHA256=y
+CONFIG_IMA_WRITE_POLICY=y
+CONFIG_IMA_APPRAISE=y
+CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor"
+CONFIG_INIT_STACK_NONE=y
+CONFIG_BUG_ON_DATA_CORRUPTION=y
+CONFIG_CRYPTO_FIPS=y
+CONFIG_CRYPTO_USER=m
+# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
+CONFIG_CRYPTO_PCRYPT=m
+CONFIG_CRYPTO_CRYPTD=m
+CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_ECDSA=m
+CONFIG_CRYPTO_ECRDSA=m
+CONFIG_CRYPTO_SM2=m
+CONFIG_CRYPTO_CURVE25519=m
+CONFIG_CRYPTO_AES_TI=m
+CONFIG_CRYPTO_ANUBIS=m
+CONFIG_CRYPTO_ARIA=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_CAMELLIA=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_DES=m
+CONFIG_CRYPTO_FCRYPT=m
+CONFIG_CRYPTO_KHAZAD=m
+CONFIG_CRYPTO_SEED=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_SM4_GENERIC=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_ADIANTUM=m
+CONFIG_CRYPTO_ARC4=m
+CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_HCTR2=m
+CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_LRW=m
+CONFIG_CRYPTO_OFB=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_AEGIS128=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_GCM=y
+CONFIG_CRYPTO_SEQIV=y
+CONFIG_CRYPTO_MD4=m
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_RMD160=m
+CONFIG_CRYPTO_SM3_GENERIC=m
+CONFIG_CRYPTO_VMAC=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_XCBC=m
+CONFIG_CRYPTO_CRC32=m
+CONFIG_CRYPTO_842=m
+CONFIG_CRYPTO_LZ4=m
+CONFIG_CRYPTO_LZ4HC=m
+CONFIG_CRYPTO_ZSTD=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
+CONFIG_CRYPTO_USER_API_HASH=m
+CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
+CONFIG_CRYPTO_USER_API_AEAD=m
+CONFIG_CRYPTO_STATS=y
+CONFIG_CRYPTO_CRC32_S390=y
+CONFIG_CRYPTO_SHA512_S390=m
+CONFIG_CRYPTO_SHA1_S390=m
+CONFIG_CRYPTO_SHA256_S390=m
+CONFIG_CRYPTO_SHA3_256_S390=m
+CONFIG_CRYPTO_SHA3_512_S390=m
+CONFIG_CRYPTO_GHASH_S390=m
+CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_DES_S390=m
+CONFIG_CRYPTO_CHACHA_S390=m
+CONFIG_ZCRYPT=m
+CONFIG_PKEY=m
+CONFIG_CRYPTO_PAES_S390=m
+CONFIG_CRYPTO_DEV_VIRTIO=m
+CONFIG_SYSTEM_BLACKLIST_KEYRING=y
+CONFIG_CORDIC=m
+CONFIG_PRIME_NUMBERS=m
+CONFIG_CRYPTO_LIB_CURVE25519=m
+CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
+CONFIG_CRC4=m
+CONFIG_CRC7=m
+CONFIG_CRC8=m
+CONFIG_XZ_DEC_MICROLZMA=y
+CONFIG_DMA_CMA=y
+CONFIG_CMA_SIZE_MBYTES=0
+CONFIG_PRINTK_TIME=y
+CONFIG_DYNAMIC_DEBUG=y
+CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_GDB_SCRIPTS=y
+CONFIG_DEBUG_SECTION_MISMATCH=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_WX=y
+CONFIG_PTDUMP_DEBUGFS=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_TEST_LOCKUP=m
+CONFIG_RCU_TORTURE_TEST=m
+CONFIG_RCU_REF_SCALE_TEST=m
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
+CONFIG_LATENCYTOP=y
+CONFIG_BOOTTIME_TRACING=y
+CONFIG_FUNCTION_GRAPH_RETVAL=y
+CONFIG_FPROBE=y
+CONFIG_FUNCTION_PROFILER=y
+CONFIG_STACK_TRACER=y
+CONFIG_SCHED_TRACER=y
+CONFIG_FTRACE_SYSCALLS=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_USER_EVENTS=y
+CONFIG_HIST_TRIGGERS=y
+CONFIG_SAMPLES=y
+CONFIG_SAMPLE_TRACE_PRINTK=m
+CONFIG_SAMPLE_FTRACE_DIRECT=m
+CONFIG_SAMPLE_FTRACE_DIRECT_MULTI=m
+CONFIG_SAMPLE_FTRACE_OPS=m
+CONFIG_KUNIT=m
+CONFIG_KUNIT_DEBUGFS=y
+CONFIG_LKDTM=m
+CONFIG_KPROBES_SANITY_TEST=m
+CONFIG_PERCPU_TEST=m
+CONFIG_ATOMIC64_SELFTEST=y
+CONFIG_TEST_BPF=m
+CONFIG_TEST_LIVEPATCH=m
diff --git a/arch/s390/configs/kasan.config b/arch/s390/configs/kasan.config
new file mode 100644
index 0000000000..84c2b551e9
--- /dev/null
+++ b/arch/s390/configs/kasan.config
@@ -0,0 +1,4 @@
+# Help: Enable KASan for debugging
+CONFIG_KASAN=y
+CONFIG_KASAN_INLINE=y
+CONFIG_KASAN_VMALLOC=y
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
new file mode 100644
index 0000000000..b831083b4e
--- /dev/null
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -0,0 +1,82 @@
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BPF_SYSCALL=y
+# CONFIG_CPU_ISOLATION is not set
+# CONFIG_UTS_NS is not set
+# CONFIG_TIME_NS is not set
+# CONFIG_PID_NS is not set
+# CONFIG_NET_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_CRASH_DUMP=y
+CONFIG_MARCH_ZEC12=y
+CONFIG_TUNE_ZEC12=y
+# CONFIG_COMPAT is not set
+CONFIG_NR_CPUS=2
+CONFIG_HZ_100=y
+# CONFIG_CHSC_SCH is not set
+# CONFIG_SCM_BUS is not set
+# CONFIG_PFAULT is not set
+# CONFIG_S390_HYPFS is not set
+# CONFIG_VIRTUALIZATION is not set
+# CONFIG_S390_GUEST is not set
+# CONFIG_SECCOMP is not set
+# CONFIG_GCC_PLUGINS is not set
+# CONFIG_BLOCK_LEGACY_AUTOLOAD is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+# CONFIG_SWAP is not set
+# CONFIG_COMPAT_BRK is not set
+# CONFIG_COMPACTION is not set
+# CONFIG_MIGRATION is not set
+CONFIG_NET=y
+# CONFIG_IUCV is not set
+# CONFIG_PCPU_DEV_REFCNT is not set
+# CONFIG_ETHTOOL_NETLINK is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_SAFE=y
+CONFIG_BLK_DEV_RAM=y
+# CONFIG_DCSSBLK is not set
+# CONFIG_DASD is not set
+CONFIG_ENCLOSURE_SERVICES=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_SCSI_ENCLOSURE=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_FC_ATTRS=y
+CONFIG_ZFCP=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_LEGACY_TIOCSTI is not set
+# CONFIG_HVC_IUCV is not set
+# CONFIG_HW_RANDOM_S390 is not set
+# CONFIG_HMC_DRV is not set
+# CONFIG_S390_TAPE is not set
+# CONFIG_VMCP is not set
+# CONFIG_MONWRITER is not set
+# CONFIG_S390_VMUR is not set
+# CONFIG_HID_SUPPORT is not set
+# CONFIG_VIRTIO_MENU is not set
+# CONFIG_VHOST_MENU is not set
+# CONFIG_IOMMU_SUPPORT is not set
+# CONFIG_DNOTIFY is not set
+# CONFIG_INOTIFY_USER is not set
+# CONFIG_MISC_FILESYSTEMS is not set
+# CONFIG_NETWORK_FILESYSTEMS is not set
+CONFIG_LSM="yama,loadpin,safesetid,integrity"
+CONFIG_INIT_STACK_NONE=y
+# CONFIG_ZLIB_DFLTCC is not set
+CONFIG_XZ_DEC_MICROLZMA=y
+CONFIG_PRINTK_TIME=y
+# CONFIG_SYMBOLIC_ERRNAME is not set
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_DEBUG_FS=y
+CONFIG_PANIC_ON_OOPS=y
+# CONFIG_SCHED_DEBUG is not set
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
+# CONFIG_RCU_TRACE is not set
+# CONFIG_FTRACE is not set
+# CONFIG_RUNTIME_TESTING_MENU is not set
diff --git a/arch/s390/crypto/Kconfig b/arch/s390/crypto/Kconfig
new file mode 100644
index 0000000000..06ee706b0d
--- /dev/null
+++ b/arch/s390/crypto/Kconfig
@@ -0,0 +1,135 @@
+# SPDX-License-Identifier: GPL-2.0
+
+menu "Accelerated Cryptographic Algorithms for CPU (s390)"
+
+config CRYPTO_CRC32_S390
+	tristate "CRC32c and CRC32"
+	depends on S390
+	select CRYPTO_HASH
+	select CRC32
+	help
+	  CRC32c and CRC32 CRC algorithms
+
+	  Architecture: s390
+
+	  It is available with IBM z13 or later.
+
+config CRYPTO_SHA512_S390
+	tristate "Hash functions: SHA-384 and SHA-512"
+	depends on S390
+	select CRYPTO_HASH
+	help
+	  SHA-384 and SHA-512 secure hash algorithms (FIPS 180)
+
+	  Architecture: s390
+
+	  It is available as of z10.
+
+config CRYPTO_SHA1_S390
+	tristate "Hash functions: SHA-1"
+	depends on S390
+	select CRYPTO_HASH
+	help
+	  SHA-1 secure hash algorithm (FIPS 180)
+
+	  Architecture: s390
+
+	  It is available as of z990.
+
+config CRYPTO_SHA256_S390
+	tristate "Hash functions: SHA-224 and SHA-256"
+	depends on S390
+	select CRYPTO_HASH
+	help
+	  SHA-224 and SHA-256 secure hash algorithms (FIPS 180)
+
+	  Architecture: s390
+
+	  It is available as of z9.
+
+config CRYPTO_SHA3_256_S390
+	tristate "Hash functions: SHA3-224 and SHA3-256"
+	depends on S390
+	select CRYPTO_HASH
+	help
+	  SHA3-224 and SHA3-256 secure hash algorithms (FIPS 202)
+
+	  Architecture: s390
+
+	  It is available as of z14.
+
+config CRYPTO_SHA3_512_S390
+	tristate "Hash functions: SHA3-384 and SHA3-512"
+	depends on S390
+	select CRYPTO_HASH
+	help
+	  SHA3-384 and SHA3-512 secure hash algorithms (FIPS 202)
+
+	  Architecture: s390
+
+	  It is available as of z14.
+
+config CRYPTO_GHASH_S390
+	tristate "Hash functions: GHASH"
+	depends on S390
+	select CRYPTO_HASH
+	help
+	  GCM GHASH hash function (NIST SP800-38D)
+
+	  Architecture: s390
+
+	  It is available as of z196.
+
+config CRYPTO_AES_S390
+	tristate "Ciphers: AES, modes: ECB, CBC, CTR, XTS, GCM"
+	depends on S390
+	select CRYPTO_ALGAPI
+	select CRYPTO_SKCIPHER
+	help
+	  Block cipher: AES cipher algorithms (FIPS 197)
+	  AEAD cipher: AES with GCM
+	  Length-preserving ciphers: AES with ECB, CBC, XTS, and CTR modes
+
+	  Architecture: s390
+
+	  As of z9 the ECB and CBC modes are hardware accelerated
+	  for 128 bit keys.
+
+	  As of z10 the ECB and CBC modes are hardware accelerated
+	  for all AES key sizes.
+
+	  As of z196 the CTR mode is hardware accelerated for all AES
+	  key sizes and XTS mode is hardware accelerated for 256 and
+	  512 bit keys.
+
+config CRYPTO_DES_S390
+	tristate "Ciphers: DES and Triple DES EDE, modes: ECB, CBC, CTR"
+	depends on S390
+	select CRYPTO_ALGAPI
+	select CRYPTO_SKCIPHER
+	select CRYPTO_LIB_DES
+	help
+	  Block ciphers: DES (FIPS 46-2) cipher algorithm
+	  Block ciphers: Triple DES EDE (FIPS 46-3) cipher algorithm
+	  Length-preserving ciphers: DES with ECB, CBC, and CTR modes
+	  Length-preserving ciphers: Triple DES EDED with ECB, CBC, and CTR modes
+
+	  Architecture: s390
+
+	  As of z990 the ECB and CBC mode are hardware accelerated.
+	  As of z196 the CTR mode is hardware accelerated.
+
+config CRYPTO_CHACHA_S390
+	tristate "Ciphers: ChaCha20"
+	depends on S390
+	select CRYPTO_SKCIPHER
+	select CRYPTO_LIB_CHACHA_GENERIC
+	select CRYPTO_ARCH_HAVE_LIB_CHACHA
+	help
+	  Length-preserving cipher: ChaCha20 stream cipher (RFC 7539)
+
+	  Architecture: s390
+
+	  It is available as of z13.
+
+endmenu
diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile
new file mode 100644
index 0000000000..1b1cc478fa
--- /dev/null
+++ b/arch/s390/crypto/Makefile
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Cryptographic API
+#
+
+obj-$(CONFIG_CRYPTO_SHA1_S390) += sha1_s390.o sha_common.o
+obj-$(CONFIG_CRYPTO_SHA256_S390) += sha256_s390.o sha_common.o
+obj-$(CONFIG_CRYPTO_SHA512_S390) += sha512_s390.o sha_common.o
+obj-$(CONFIG_CRYPTO_SHA3_256_S390) += sha3_256_s390.o sha_common.o
+obj-$(CONFIG_CRYPTO_SHA3_512_S390) += sha3_512_s390.o sha_common.o
+obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o
+obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o
+obj-$(CONFIG_CRYPTO_PAES_S390) += paes_s390.o
+obj-$(CONFIG_CRYPTO_CHACHA_S390) += chacha_s390.o
+obj-$(CONFIG_S390_PRNG) += prng.o
+obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o
+obj-$(CONFIG_CRYPTO_CRC32_S390) += crc32-vx_s390.o
+obj-y += arch_random.o
+
+crc32-vx_s390-y := crc32-vx.o crc32le-vx.o crc32be-vx.o
+chacha_s390-y := chacha-glue.o chacha-s390.o
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
new file mode 100644
index 0000000000..c6fe5405de
--- /dev/null
+++ b/arch/s390/crypto/aes_s390.c
@@ -0,0 +1,1057 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Cryptographic API.
+ *
+ * s390 implementation of the AES Cipher Algorithm.
+ *
+ * s390 Version:
+ *   Copyright IBM Corp. 2005, 2017
+ *   Author(s): Jan Glauber (jang@de.ibm.com)
+ *		Sebastian Siewior (sebastian@breakpoint.cc> SW-Fallback
+ *		Patrick Steuer <patrick.steuer@de.ibm.com>
+ *		Harald Freudenberger <freude@de.ibm.com>
+ *
+ * Derived from "crypto/aes_generic.c"
+ */
+
+#define KMSG_COMPONENT "aes_s390"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <crypto/aes.h>
+#include <crypto/algapi.h>
+#include <crypto/ghash.h>
+#include <crypto/internal/aead.h>
+#include <crypto/internal/cipher.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/fips.h>
+#include <linux/string.h>
+#include <crypto/xts.h>
+#include <asm/cpacf.h>
+
+static u8 *ctrblk;
+static DEFINE_MUTEX(ctrblk_lock);
+
+static cpacf_mask_t km_functions, kmc_functions, kmctr_functions,
+		    kma_functions;
+
+struct s390_aes_ctx {
+	u8 key[AES_MAX_KEY_SIZE];
+	int key_len;
+	unsigned long fc;
+	union {
+		struct crypto_skcipher *skcipher;
+		struct crypto_cipher *cip;
+	} fallback;
+};
+
+struct s390_xts_ctx {
+	u8 key[32];
+	u8 pcc_key[32];
+	int key_len;
+	unsigned long fc;
+	struct crypto_skcipher *fallback;
+};
+
+struct gcm_sg_walk {
+	struct scatter_walk walk;
+	unsigned int walk_bytes;
+	u8 *walk_ptr;
+	unsigned int walk_bytes_remain;
+	u8 buf[AES_BLOCK_SIZE];
+	unsigned int buf_bytes;
+	u8 *ptr;
+	unsigned int nbytes;
+};
+
+static int setkey_fallback_cip(struct crypto_tfm *tfm, const u8 *in_key,
+		unsigned int key_len)
+{
+	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+
+	sctx->fallback.cip->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
+	sctx->fallback.cip->base.crt_flags |= (tfm->crt_flags &
+			CRYPTO_TFM_REQ_MASK);
+
+	return crypto_cipher_setkey(sctx->fallback.cip, in_key, key_len);
+}
+
+static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+		       unsigned int key_len)
+{
+	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+	unsigned long fc;
+
+	/* Pick the correct function code based on the key length */
+	fc = (key_len == 16) ? CPACF_KM_AES_128 :
+	     (key_len == 24) ? CPACF_KM_AES_192 :
+	     (key_len == 32) ? CPACF_KM_AES_256 : 0;
+
+	/* Check if the function code is available */
+	sctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
+	if (!sctx->fc)
+		return setkey_fallback_cip(tfm, in_key, key_len);
+
+	sctx->key_len = key_len;
+	memcpy(sctx->key, in_key, key_len);
+	return 0;
+}
+
+static void crypto_aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+
+	if (unlikely(!sctx->fc)) {
+		crypto_cipher_encrypt_one(sctx->fallback.cip, out, in);
+		return;
+	}
+	cpacf_km(sctx->fc, &sctx->key, out, in, AES_BLOCK_SIZE);
+}
+
+static void crypto_aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+
+	if (unlikely(!sctx->fc)) {
+		crypto_cipher_decrypt_one(sctx->fallback.cip, out, in);
+		return;
+	}
+	cpacf_km(sctx->fc | CPACF_DECRYPT,
+		 &sctx->key, out, in, AES_BLOCK_SIZE);
+}
+
+static int fallback_init_cip(struct crypto_tfm *tfm)
+{
+	const char *name = tfm->__crt_alg->cra_name;
+	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+
+	sctx->fallback.cip = crypto_alloc_cipher(name, 0,
+						 CRYPTO_ALG_NEED_FALLBACK);
+
+	if (IS_ERR(sctx->fallback.cip)) {
+		pr_err("Allocating AES fallback algorithm %s failed\n",
+		       name);
+		return PTR_ERR(sctx->fallback.cip);
+	}
+
+	return 0;
+}
+
+static void fallback_exit_cip(struct crypto_tfm *tfm)
+{
+	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+
+	crypto_free_cipher(sctx->fallback.cip);
+	sctx->fallback.cip = NULL;
+}
+
+static struct crypto_alg aes_alg = {
+	.cra_name		=	"aes",
+	.cra_driver_name	=	"aes-s390",
+	.cra_priority		=	300,
+	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER |
+					CRYPTO_ALG_NEED_FALLBACK,
+	.cra_blocksize		=	AES_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct s390_aes_ctx),
+	.cra_module		=	THIS_MODULE,
+	.cra_init               =       fallback_init_cip,
+	.cra_exit               =       fallback_exit_cip,
+	.cra_u			=	{
+		.cipher = {
+			.cia_min_keysize	=	AES_MIN_KEY_SIZE,
+			.cia_max_keysize	=	AES_MAX_KEY_SIZE,
+			.cia_setkey		=	aes_set_key,
+			.cia_encrypt		=	crypto_aes_encrypt,
+			.cia_decrypt		=	crypto_aes_decrypt,
+		}
+	}
+};
+
+static int setkey_fallback_skcipher(struct crypto_skcipher *tfm, const u8 *key,
+				    unsigned int len)
+{
+	struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
+
+	crypto_skcipher_clear_flags(sctx->fallback.skcipher,
+				    CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_set_flags(sctx->fallback.skcipher,
+				  crypto_skcipher_get_flags(tfm) &
+				  CRYPTO_TFM_REQ_MASK);
+	return crypto_skcipher_setkey(sctx->fallback.skcipher, key, len);
+}
+
+static int fallback_skcipher_crypt(struct s390_aes_ctx *sctx,
+				   struct skcipher_request *req,
+				   unsigned long modifier)
+{
+	struct skcipher_request *subreq = skcipher_request_ctx(req);
+
+	*subreq = *req;
+	skcipher_request_set_tfm(subreq, sctx->fallback.skcipher);
+	return (modifier & CPACF_DECRYPT) ?
+		crypto_skcipher_decrypt(subreq) :
+		crypto_skcipher_encrypt(subreq);
+}
+
+static int ecb_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
+			   unsigned int key_len)
+{
+	struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
+	unsigned long fc;
+
+	/* Pick the correct function code based on the key length */
+	fc = (key_len == 16) ? CPACF_KM_AES_128 :
+	     (key_len == 24) ? CPACF_KM_AES_192 :
+	     (key_len == 32) ? CPACF_KM_AES_256 : 0;
+
+	/* Check if the function code is available */
+	sctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
+	if (!sctx->fc)
+		return setkey_fallback_skcipher(tfm, in_key, key_len);
+
+	sctx->key_len = key_len;
+	memcpy(sctx->key, in_key, key_len);
+	return 0;
+}
+
+static int ecb_aes_crypt(struct skcipher_request *req, unsigned long modifier)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes, n;
+	int ret;
+
+	if (unlikely(!sctx->fc))
+		return fallback_skcipher_crypt(sctx, req, modifier);
+
+	ret = skcipher_walk_virt(&walk, req, false);
+	while ((nbytes = walk.nbytes) != 0) {
+		/* only use complete blocks */
+		n = nbytes & ~(AES_BLOCK_SIZE - 1);
+		cpacf_km(sctx->fc | modifier, sctx->key,
+			 walk.dst.virt.addr, walk.src.virt.addr, n);
+		ret = skcipher_walk_done(&walk, nbytes - n);
+	}
+	return ret;
+}
+
+static int ecb_aes_encrypt(struct skcipher_request *req)
+{
+	return ecb_aes_crypt(req, 0);
+}
+
+static int ecb_aes_decrypt(struct skcipher_request *req)
+{
+	return ecb_aes_crypt(req, CPACF_DECRYPT);
+}
+
+static int fallback_init_skcipher(struct crypto_skcipher *tfm)
+{
+	const char *name = crypto_tfm_alg_name(&tfm->base);
+	struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
+
+	sctx->fallback.skcipher = crypto_alloc_skcipher(name, 0,
+				CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC);
+
+	if (IS_ERR(sctx->fallback.skcipher)) {
+		pr_err("Allocating AES fallback algorithm %s failed\n",
+		       name);
+		return PTR_ERR(sctx->fallback.skcipher);
+	}
+
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct skcipher_request) +
+				    crypto_skcipher_reqsize(sctx->fallback.skcipher));
+	return 0;
+}
+
+static void fallback_exit_skcipher(struct crypto_skcipher *tfm)
+{
+	struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
+
+	crypto_free_skcipher(sctx->fallback.skcipher);
+}
+
+static struct skcipher_alg ecb_aes_alg = {
+	.base.cra_name		=	"ecb(aes)",
+	.base.cra_driver_name	=	"ecb-aes-s390",
+	.base.cra_priority	=	401,	/* combo: aes + ecb + 1 */
+	.base.cra_flags		=	CRYPTO_ALG_NEED_FALLBACK,
+	.base.cra_blocksize	=	AES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_aes_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.init			=	fallback_init_skcipher,
+	.exit			=	fallback_exit_skcipher,
+	.min_keysize		=	AES_MIN_KEY_SIZE,
+	.max_keysize		=	AES_MAX_KEY_SIZE,
+	.setkey			=	ecb_aes_set_key,
+	.encrypt		=	ecb_aes_encrypt,
+	.decrypt		=	ecb_aes_decrypt,
+};
+
+static int cbc_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
+			   unsigned int key_len)
+{
+	struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
+	unsigned long fc;
+
+	/* Pick the correct function code based on the key length */
+	fc = (key_len == 16) ? CPACF_KMC_AES_128 :
+	     (key_len == 24) ? CPACF_KMC_AES_192 :
+	     (key_len == 32) ? CPACF_KMC_AES_256 : 0;
+
+	/* Check if the function code is available */
+	sctx->fc = (fc && cpacf_test_func(&kmc_functions, fc)) ? fc : 0;
+	if (!sctx->fc)
+		return setkey_fallback_skcipher(tfm, in_key, key_len);
+
+	sctx->key_len = key_len;
+	memcpy(sctx->key, in_key, key_len);
+	return 0;
+}
+
+static int cbc_aes_crypt(struct skcipher_request *req, unsigned long modifier)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes, n;
+	int ret;
+	struct {
+		u8 iv[AES_BLOCK_SIZE];
+		u8 key[AES_MAX_KEY_SIZE];
+	} param;
+
+	if (unlikely(!sctx->fc))
+		return fallback_skcipher_crypt(sctx, req, modifier);
+
+	ret = skcipher_walk_virt(&walk, req, false);
+	if (ret)
+		return ret;
+	memcpy(param.iv, walk.iv, AES_BLOCK_SIZE);
+	memcpy(param.key, sctx->key, sctx->key_len);
+	while ((nbytes = walk.nbytes) != 0) {
+		/* only use complete blocks */
+		n = nbytes & ~(AES_BLOCK_SIZE - 1);
+		cpacf_kmc(sctx->fc | modifier, &param,
+			  walk.dst.virt.addr, walk.src.virt.addr, n);
+		memcpy(walk.iv, param.iv, AES_BLOCK_SIZE);
+		ret = skcipher_walk_done(&walk, nbytes - n);
+	}
+	memzero_explicit(&param, sizeof(param));
+	return ret;
+}
+
+static int cbc_aes_encrypt(struct skcipher_request *req)
+{
+	return cbc_aes_crypt(req, 0);
+}
+
+static int cbc_aes_decrypt(struct skcipher_request *req)
+{
+	return cbc_aes_crypt(req, CPACF_DECRYPT);
+}
+
+static struct skcipher_alg cbc_aes_alg = {
+	.base.cra_name		=	"cbc(aes)",
+	.base.cra_driver_name	=	"cbc-aes-s390",
+	.base.cra_priority	=	402,	/* ecb-aes-s390 + 1 */
+	.base.cra_flags		=	CRYPTO_ALG_NEED_FALLBACK,
+	.base.cra_blocksize	=	AES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_aes_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.init			=	fallback_init_skcipher,
+	.exit			=	fallback_exit_skcipher,
+	.min_keysize		=	AES_MIN_KEY_SIZE,
+	.max_keysize		=	AES_MAX_KEY_SIZE,
+	.ivsize			=	AES_BLOCK_SIZE,
+	.setkey			=	cbc_aes_set_key,
+	.encrypt		=	cbc_aes_encrypt,
+	.decrypt		=	cbc_aes_decrypt,
+};
+
+static int xts_fallback_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			       unsigned int len)
+{
+	struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm);
+
+	crypto_skcipher_clear_flags(xts_ctx->fallback, CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_set_flags(xts_ctx->fallback,
+				  crypto_skcipher_get_flags(tfm) &
+				  CRYPTO_TFM_REQ_MASK);
+	return crypto_skcipher_setkey(xts_ctx->fallback, key, len);
+}
+
+static int xts_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
+			   unsigned int key_len)
+{
+	struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm);
+	unsigned long fc;
+	int err;
+
+	err = xts_fallback_setkey(tfm, in_key, key_len);
+	if (err)
+		return err;
+
+	/* Pick the correct function code based on the key length */
+	fc = (key_len == 32) ? CPACF_KM_XTS_128 :
+	     (key_len == 64) ? CPACF_KM_XTS_256 : 0;
+
+	/* Check if the function code is available */
+	xts_ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
+	if (!xts_ctx->fc)
+		return 0;
+
+	/* Split the XTS key into the two subkeys */
+	key_len = key_len / 2;
+	xts_ctx->key_len = key_len;
+	memcpy(xts_ctx->key, in_key, key_len);
+	memcpy(xts_ctx->pcc_key, in_key + key_len, key_len);
+	return 0;
+}
+
+static int xts_aes_crypt(struct skcipher_request *req, unsigned long modifier)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int offset, nbytes, n;
+	int ret;
+	struct {
+		u8 key[32];
+		u8 tweak[16];
+		u8 block[16];
+		u8 bit[16];
+		u8 xts[16];
+	} pcc_param;
+	struct {
+		u8 key[32];
+		u8 init[16];
+	} xts_param;
+
+	if (req->cryptlen < AES_BLOCK_SIZE)
+		return -EINVAL;
+
+	if (unlikely(!xts_ctx->fc || (req->cryptlen % AES_BLOCK_SIZE) != 0)) {
+		struct skcipher_request *subreq = skcipher_request_ctx(req);
+
+		*subreq = *req;
+		skcipher_request_set_tfm(subreq, xts_ctx->fallback);
+		return (modifier & CPACF_DECRYPT) ?
+			crypto_skcipher_decrypt(subreq) :
+			crypto_skcipher_encrypt(subreq);
+	}
+
+	ret = skcipher_walk_virt(&walk, req, false);
+	if (ret)
+		return ret;
+	offset = xts_ctx->key_len & 0x10;
+	memset(pcc_param.block, 0, sizeof(pcc_param.block));
+	memset(pcc_param.bit, 0, sizeof(pcc_param.bit));
+	memset(pcc_param.xts, 0, sizeof(pcc_param.xts));
+	memcpy(pcc_param.tweak, walk.iv, sizeof(pcc_param.tweak));
+	memcpy(pcc_param.key + offset, xts_ctx->pcc_key, xts_ctx->key_len);
+	cpacf_pcc(xts_ctx->fc, pcc_param.key + offset);
+
+	memcpy(xts_param.key + offset, xts_ctx->key, xts_ctx->key_len);
+	memcpy(xts_param.init, pcc_param.xts, 16);
+
+	while ((nbytes = walk.nbytes) != 0) {
+		/* only use complete blocks */
+		n = nbytes & ~(AES_BLOCK_SIZE - 1);
+		cpacf_km(xts_ctx->fc | modifier, xts_param.key + offset,
+			 walk.dst.virt.addr, walk.src.virt.addr, n);
+		ret = skcipher_walk_done(&walk, nbytes - n);
+	}
+	memzero_explicit(&pcc_param, sizeof(pcc_param));
+	memzero_explicit(&xts_param, sizeof(xts_param));
+	return ret;
+}
+
+static int xts_aes_encrypt(struct skcipher_request *req)
+{
+	return xts_aes_crypt(req, 0);
+}
+
+static int xts_aes_decrypt(struct skcipher_request *req)
+{
+	return xts_aes_crypt(req, CPACF_DECRYPT);
+}
+
+static int xts_fallback_init(struct crypto_skcipher *tfm)
+{
+	const char *name = crypto_tfm_alg_name(&tfm->base);
+	struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm);
+
+	xts_ctx->fallback = crypto_alloc_skcipher(name, 0,
+				CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC);
+
+	if (IS_ERR(xts_ctx->fallback)) {
+		pr_err("Allocating XTS fallback algorithm %s failed\n",
+		       name);
+		return PTR_ERR(xts_ctx->fallback);
+	}
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct skcipher_request) +
+				    crypto_skcipher_reqsize(xts_ctx->fallback));
+	return 0;
+}
+
+static void xts_fallback_exit(struct crypto_skcipher *tfm)
+{
+	struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm);
+
+	crypto_free_skcipher(xts_ctx->fallback);
+}
+
+static struct skcipher_alg xts_aes_alg = {
+	.base.cra_name		=	"xts(aes)",
+	.base.cra_driver_name	=	"xts-aes-s390",
+	.base.cra_priority	=	402,	/* ecb-aes-s390 + 1 */
+	.base.cra_flags		=	CRYPTO_ALG_NEED_FALLBACK,
+	.base.cra_blocksize	=	AES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_xts_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.init			=	xts_fallback_init,
+	.exit			=	xts_fallback_exit,
+	.min_keysize		=	2 * AES_MIN_KEY_SIZE,
+	.max_keysize		=	2 * AES_MAX_KEY_SIZE,
+	.ivsize			=	AES_BLOCK_SIZE,
+	.setkey			=	xts_aes_set_key,
+	.encrypt		=	xts_aes_encrypt,
+	.decrypt		=	xts_aes_decrypt,
+};
+
+static int ctr_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
+			   unsigned int key_len)
+{
+	struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
+	unsigned long fc;
+
+	/* Pick the correct function code based on the key length */
+	fc = (key_len == 16) ? CPACF_KMCTR_AES_128 :
+	     (key_len == 24) ? CPACF_KMCTR_AES_192 :
+	     (key_len == 32) ? CPACF_KMCTR_AES_256 : 0;
+
+	/* Check if the function code is available */
+	sctx->fc = (fc && cpacf_test_func(&kmctr_functions, fc)) ? fc : 0;
+	if (!sctx->fc)
+		return setkey_fallback_skcipher(tfm, in_key, key_len);
+
+	sctx->key_len = key_len;
+	memcpy(sctx->key, in_key, key_len);
+	return 0;
+}
+
+static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
+{
+	unsigned int i, n;
+
+	/* only use complete blocks, max. PAGE_SIZE */
+	memcpy(ctrptr, iv, AES_BLOCK_SIZE);
+	n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(AES_BLOCK_SIZE - 1);
+	for (i = (n / AES_BLOCK_SIZE) - 1; i > 0; i--) {
+		memcpy(ctrptr + AES_BLOCK_SIZE, ctrptr, AES_BLOCK_SIZE);
+		crypto_inc(ctrptr + AES_BLOCK_SIZE, AES_BLOCK_SIZE);
+		ctrptr += AES_BLOCK_SIZE;
+	}
+	return n;
+}
+
+static int ctr_aes_crypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
+	u8 buf[AES_BLOCK_SIZE], *ctrptr;
+	struct skcipher_walk walk;
+	unsigned int n, nbytes;
+	int ret, locked;
+
+	if (unlikely(!sctx->fc))
+		return fallback_skcipher_crypt(sctx, req, 0);
+
+	locked = mutex_trylock(&ctrblk_lock);
+
+	ret = skcipher_walk_virt(&walk, req, false);
+	while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
+		n = AES_BLOCK_SIZE;
+
+		if (nbytes >= 2*AES_BLOCK_SIZE && locked)
+			n = __ctrblk_init(ctrblk, walk.iv, nbytes);
+		ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk.iv;
+		cpacf_kmctr(sctx->fc, sctx->key, walk.dst.virt.addr,
+			    walk.src.virt.addr, n, ctrptr);
+		if (ctrptr == ctrblk)
+			memcpy(walk.iv, ctrptr + n - AES_BLOCK_SIZE,
+			       AES_BLOCK_SIZE);
+		crypto_inc(walk.iv, AES_BLOCK_SIZE);
+		ret = skcipher_walk_done(&walk, nbytes - n);
+	}
+	if (locked)
+		mutex_unlock(&ctrblk_lock);
+	/*
+	 * final block may be < AES_BLOCK_SIZE, copy only nbytes
+	 */
+	if (nbytes) {
+		memset(buf, 0, AES_BLOCK_SIZE);
+		memcpy(buf, walk.src.virt.addr, nbytes);
+		cpacf_kmctr(sctx->fc, sctx->key, buf, buf,
+			    AES_BLOCK_SIZE, walk.iv);
+		memcpy(walk.dst.virt.addr, buf, nbytes);
+		crypto_inc(walk.iv, AES_BLOCK_SIZE);
+		ret = skcipher_walk_done(&walk, 0);
+	}
+
+	return ret;
+}
+
+static struct skcipher_alg ctr_aes_alg = {
+	.base.cra_name		=	"ctr(aes)",
+	.base.cra_driver_name	=	"ctr-aes-s390",
+	.base.cra_priority	=	402,	/* ecb-aes-s390 + 1 */
+	.base.cra_flags		=	CRYPTO_ALG_NEED_FALLBACK,
+	.base.cra_blocksize	=	1,
+	.base.cra_ctxsize	=	sizeof(struct s390_aes_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.init			=	fallback_init_skcipher,
+	.exit			=	fallback_exit_skcipher,
+	.min_keysize		=	AES_MIN_KEY_SIZE,
+	.max_keysize		=	AES_MAX_KEY_SIZE,
+	.ivsize			=	AES_BLOCK_SIZE,
+	.setkey			=	ctr_aes_set_key,
+	.encrypt		=	ctr_aes_crypt,
+	.decrypt		=	ctr_aes_crypt,
+	.chunksize		=	AES_BLOCK_SIZE,
+};
+
+static int gcm_aes_setkey(struct crypto_aead *tfm, const u8 *key,
+			  unsigned int keylen)
+{
+	struct s390_aes_ctx *ctx = crypto_aead_ctx(tfm);
+
+	switch (keylen) {
+	case AES_KEYSIZE_128:
+		ctx->fc = CPACF_KMA_GCM_AES_128;
+		break;
+	case AES_KEYSIZE_192:
+		ctx->fc = CPACF_KMA_GCM_AES_192;
+		break;
+	case AES_KEYSIZE_256:
+		ctx->fc = CPACF_KMA_GCM_AES_256;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	memcpy(ctx->key, key, keylen);
+	ctx->key_len = keylen;
+	return 0;
+}
+
+static int gcm_aes_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
+{
+	switch (authsize) {
+	case 4:
+	case 8:
+	case 12:
+	case 13:
+	case 14:
+	case 15:
+	case 16:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void gcm_walk_start(struct gcm_sg_walk *gw, struct scatterlist *sg,
+			   unsigned int len)
+{
+	memset(gw, 0, sizeof(*gw));
+	gw->walk_bytes_remain = len;
+	scatterwalk_start(&gw->walk, sg);
+}
+
+static inline unsigned int _gcm_sg_clamp_and_map(struct gcm_sg_walk *gw)
+{
+	struct scatterlist *nextsg;
+
+	gw->walk_bytes = scatterwalk_clamp(&gw->walk, gw->walk_bytes_remain);
+	while (!gw->walk_bytes) {
+		nextsg = sg_next(gw->walk.sg);
+		if (!nextsg)
+			return 0;
+		scatterwalk_start(&gw->walk, nextsg);
+		gw->walk_bytes = scatterwalk_clamp(&gw->walk,
+						   gw->walk_bytes_remain);
+	}
+	gw->walk_ptr = scatterwalk_map(&gw->walk);
+	return gw->walk_bytes;
+}
+
+static inline void _gcm_sg_unmap_and_advance(struct gcm_sg_walk *gw,
+					     unsigned int nbytes)
+{
+	gw->walk_bytes_remain -= nbytes;
+	scatterwalk_unmap(gw->walk_ptr);
+	scatterwalk_advance(&gw->walk, nbytes);
+	scatterwalk_done(&gw->walk, 0, gw->walk_bytes_remain);
+	gw->walk_ptr = NULL;
+}
+
+static int gcm_in_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded)
+{
+	int n;
+
+	if (gw->buf_bytes && gw->buf_bytes >= minbytesneeded) {
+		gw->ptr = gw->buf;
+		gw->nbytes = gw->buf_bytes;
+		goto out;
+	}
+
+	if (gw->walk_bytes_remain == 0) {
+		gw->ptr = NULL;
+		gw->nbytes = 0;
+		goto out;
+	}
+
+	if (!_gcm_sg_clamp_and_map(gw)) {
+		gw->ptr = NULL;
+		gw->nbytes = 0;
+		goto out;
+	}
+
+	if (!gw->buf_bytes && gw->walk_bytes >= minbytesneeded) {
+		gw->ptr = gw->walk_ptr;
+		gw->nbytes = gw->walk_bytes;
+		goto out;
+	}
+
+	while (1) {
+		n = min(gw->walk_bytes, AES_BLOCK_SIZE - gw->buf_bytes);
+		memcpy(gw->buf + gw->buf_bytes, gw->walk_ptr, n);
+		gw->buf_bytes += n;
+		_gcm_sg_unmap_and_advance(gw, n);
+		if (gw->buf_bytes >= minbytesneeded) {
+			gw->ptr = gw->buf;
+			gw->nbytes = gw->buf_bytes;
+			goto out;
+		}
+		if (!_gcm_sg_clamp_and_map(gw)) {
+			gw->ptr = NULL;
+			gw->nbytes = 0;
+			goto out;
+		}
+	}
+
+out:
+	return gw->nbytes;
+}
+
+static int gcm_out_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded)
+{
+	if (gw->walk_bytes_remain == 0) {
+		gw->ptr = NULL;
+		gw->nbytes = 0;
+		goto out;
+	}
+
+	if (!_gcm_sg_clamp_and_map(gw)) {
+		gw->ptr = NULL;
+		gw->nbytes = 0;
+		goto out;
+	}
+
+	if (gw->walk_bytes >= minbytesneeded) {
+		gw->ptr = gw->walk_ptr;
+		gw->nbytes = gw->walk_bytes;
+		goto out;
+	}
+
+	scatterwalk_unmap(gw->walk_ptr);
+	gw->walk_ptr = NULL;
+
+	gw->ptr = gw->buf;
+	gw->nbytes = sizeof(gw->buf);
+
+out:
+	return gw->nbytes;
+}
+
+static int gcm_in_walk_done(struct gcm_sg_walk *gw, unsigned int bytesdone)
+{
+	if (gw->ptr == NULL)
+		return 0;
+
+	if (gw->ptr == gw->buf) {
+		int n = gw->buf_bytes - bytesdone;
+		if (n > 0) {
+			memmove(gw->buf, gw->buf + bytesdone, n);
+			gw->buf_bytes = n;
+		} else
+			gw->buf_bytes = 0;
+	} else
+		_gcm_sg_unmap_and_advance(gw, bytesdone);
+
+	return bytesdone;
+}
+
+static int gcm_out_walk_done(struct gcm_sg_walk *gw, unsigned int bytesdone)
+{
+	int i, n;
+
+	if (gw->ptr == NULL)
+		return 0;
+
+	if (gw->ptr == gw->buf) {
+		for (i = 0; i < bytesdone; i += n) {
+			if (!_gcm_sg_clamp_and_map(gw))
+				return i;
+			n = min(gw->walk_bytes, bytesdone - i);
+			memcpy(gw->walk_ptr, gw->buf + i, n);
+			_gcm_sg_unmap_and_advance(gw, n);
+		}
+	} else
+		_gcm_sg_unmap_and_advance(gw, bytesdone);
+
+	return bytesdone;
+}
+
+static int gcm_aes_crypt(struct aead_request *req, unsigned int flags)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct s390_aes_ctx *ctx = crypto_aead_ctx(tfm);
+	unsigned int ivsize = crypto_aead_ivsize(tfm);
+	unsigned int taglen = crypto_aead_authsize(tfm);
+	unsigned int aadlen = req->assoclen;
+	unsigned int pclen = req->cryptlen;
+	int ret = 0;
+
+	unsigned int n, len, in_bytes, out_bytes,
+		     min_bytes, bytes, aad_bytes, pc_bytes;
+	struct gcm_sg_walk gw_in, gw_out;
+	u8 tag[GHASH_DIGEST_SIZE];
+
+	struct {
+		u32 _[3];		/* reserved */
+		u32 cv;			/* Counter Value */
+		u8 t[GHASH_DIGEST_SIZE];/* Tag */
+		u8 h[AES_BLOCK_SIZE];	/* Hash-subkey */
+		u64 taadl;		/* Total AAD Length */
+		u64 tpcl;		/* Total Plain-/Cipher-text Length */
+		u8 j0[GHASH_BLOCK_SIZE];/* initial counter value */
+		u8 k[AES_MAX_KEY_SIZE];	/* Key */
+	} param;
+
+	/*
+	 * encrypt
+	 *   req->src: aad||plaintext
+	 *   req->dst: aad||ciphertext||tag
+	 * decrypt
+	 *   req->src: aad||ciphertext||tag
+	 *   req->dst: aad||plaintext, return 0 or -EBADMSG
+	 * aad, plaintext and ciphertext may be empty.
+	 */
+	if (flags & CPACF_DECRYPT)
+		pclen -= taglen;
+	len = aadlen + pclen;
+
+	memset(&param, 0, sizeof(param));
+	param.cv = 1;
+	param.taadl = aadlen * 8;
+	param.tpcl = pclen * 8;
+	memcpy(param.j0, req->iv, ivsize);
+	*(u32 *)(param.j0 + ivsize) = 1;
+	memcpy(param.k, ctx->key, ctx->key_len);
+
+	gcm_walk_start(&gw_in, req->src, len);
+	gcm_walk_start(&gw_out, req->dst, len);
+
+	do {
+		min_bytes = min_t(unsigned int,
+				  aadlen > 0 ? aadlen : pclen, AES_BLOCK_SIZE);
+		in_bytes = gcm_in_walk_go(&gw_in, min_bytes);
+		out_bytes = gcm_out_walk_go(&gw_out, min_bytes);
+		bytes = min(in_bytes, out_bytes);
+
+		if (aadlen + pclen <= bytes) {
+			aad_bytes = aadlen;
+			pc_bytes = pclen;
+			flags |= CPACF_KMA_LAAD | CPACF_KMA_LPC;
+		} else {
+			if (aadlen <= bytes) {
+				aad_bytes = aadlen;
+				pc_bytes = (bytes - aadlen) &
+					   ~(AES_BLOCK_SIZE - 1);
+				flags |= CPACF_KMA_LAAD;
+			} else {
+				aad_bytes = bytes & ~(AES_BLOCK_SIZE - 1);
+				pc_bytes = 0;
+			}
+		}
+
+		if (aad_bytes > 0)
+			memcpy(gw_out.ptr, gw_in.ptr, aad_bytes);
+
+		cpacf_kma(ctx->fc | flags, &param,
+			  gw_out.ptr + aad_bytes,
+			  gw_in.ptr + aad_bytes, pc_bytes,
+			  gw_in.ptr, aad_bytes);
+
+		n = aad_bytes + pc_bytes;
+		if (gcm_in_walk_done(&gw_in, n) != n)
+			return -ENOMEM;
+		if (gcm_out_walk_done(&gw_out, n) != n)
+			return -ENOMEM;
+		aadlen -= aad_bytes;
+		pclen -= pc_bytes;
+	} while (aadlen + pclen > 0);
+
+	if (flags & CPACF_DECRYPT) {
+		scatterwalk_map_and_copy(tag, req->src, len, taglen, 0);
+		if (crypto_memneq(tag, param.t, taglen))
+			ret = -EBADMSG;
+	} else
+		scatterwalk_map_and_copy(param.t, req->dst, len, taglen, 1);
+
+	memzero_explicit(&param, sizeof(param));
+	return ret;
+}
+
+static int gcm_aes_encrypt(struct aead_request *req)
+{
+	return gcm_aes_crypt(req, CPACF_ENCRYPT);
+}
+
+static int gcm_aes_decrypt(struct aead_request *req)
+{
+	return gcm_aes_crypt(req, CPACF_DECRYPT);
+}
+
+static struct aead_alg gcm_aes_aead = {
+	.setkey			= gcm_aes_setkey,
+	.setauthsize		= gcm_aes_setauthsize,
+	.encrypt		= gcm_aes_encrypt,
+	.decrypt		= gcm_aes_decrypt,
+
+	.ivsize			= GHASH_BLOCK_SIZE - sizeof(u32),
+	.maxauthsize		= GHASH_DIGEST_SIZE,
+	.chunksize		= AES_BLOCK_SIZE,
+
+	.base			= {
+		.cra_blocksize		= 1,
+		.cra_ctxsize		= sizeof(struct s390_aes_ctx),
+		.cra_priority		= 900,
+		.cra_name		= "gcm(aes)",
+		.cra_driver_name	= "gcm-aes-s390",
+		.cra_module		= THIS_MODULE,
+	},
+};
+
+static struct crypto_alg *aes_s390_alg;
+static struct skcipher_alg *aes_s390_skcipher_algs[4];
+static int aes_s390_skciphers_num;
+static struct aead_alg *aes_s390_aead_alg;
+
+static int aes_s390_register_skcipher(struct skcipher_alg *alg)
+{
+	int ret;
+
+	ret = crypto_register_skcipher(alg);
+	if (!ret)
+		aes_s390_skcipher_algs[aes_s390_skciphers_num++] = alg;
+	return ret;
+}
+
+static void aes_s390_fini(void)
+{
+	if (aes_s390_alg)
+		crypto_unregister_alg(aes_s390_alg);
+	while (aes_s390_skciphers_num--)
+		crypto_unregister_skcipher(aes_s390_skcipher_algs[aes_s390_skciphers_num]);
+	if (ctrblk)
+		free_page((unsigned long) ctrblk);
+
+	if (aes_s390_aead_alg)
+		crypto_unregister_aead(aes_s390_aead_alg);
+}
+
+static int __init aes_s390_init(void)
+{
+	int ret;
+
+	/* Query available functions for KM, KMC, KMCTR and KMA */
+	cpacf_query(CPACF_KM, &km_functions);
+	cpacf_query(CPACF_KMC, &kmc_functions);
+	cpacf_query(CPACF_KMCTR, &kmctr_functions);
+	cpacf_query(CPACF_KMA, &kma_functions);
+
+	if (cpacf_test_func(&km_functions, CPACF_KM_AES_128) ||
+	    cpacf_test_func(&km_functions, CPACF_KM_AES_192) ||
+	    cpacf_test_func(&km_functions, CPACF_KM_AES_256)) {
+		ret = crypto_register_alg(&aes_alg);
+		if (ret)
+			goto out_err;
+		aes_s390_alg = &aes_alg;
+		ret = aes_s390_register_skcipher(&ecb_aes_alg);
+		if (ret)
+			goto out_err;
+	}
+
+	if (cpacf_test_func(&kmc_functions, CPACF_KMC_AES_128) ||
+	    cpacf_test_func(&kmc_functions, CPACF_KMC_AES_192) ||
+	    cpacf_test_func(&kmc_functions, CPACF_KMC_AES_256)) {
+		ret = aes_s390_register_skcipher(&cbc_aes_alg);
+		if (ret)
+			goto out_err;
+	}
+
+	if (cpacf_test_func(&km_functions, CPACF_KM_XTS_128) ||
+	    cpacf_test_func(&km_functions, CPACF_KM_XTS_256)) {
+		ret = aes_s390_register_skcipher(&xts_aes_alg);
+		if (ret)
+			goto out_err;
+	}
+
+	if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_AES_128) ||
+	    cpacf_test_func(&kmctr_functions, CPACF_KMCTR_AES_192) ||
+	    cpacf_test_func(&kmctr_functions, CPACF_KMCTR_AES_256)) {
+		ctrblk = (u8 *) __get_free_page(GFP_KERNEL);
+		if (!ctrblk) {
+			ret = -ENOMEM;
+			goto out_err;
+		}
+		ret = aes_s390_register_skcipher(&ctr_aes_alg);
+		if (ret)
+			goto out_err;
+	}
+
+	if (cpacf_test_func(&kma_functions, CPACF_KMA_GCM_AES_128) ||
+	    cpacf_test_func(&kma_functions, CPACF_KMA_GCM_AES_192) ||
+	    cpacf_test_func(&kma_functions, CPACF_KMA_GCM_AES_256)) {
+		ret = crypto_register_aead(&gcm_aes_aead);
+		if (ret)
+			goto out_err;
+		aes_s390_aead_alg = &gcm_aes_aead;
+	}
+
+	return 0;
+out_err:
+	aes_s390_fini();
+	return ret;
+}
+
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, aes_s390_init);
+module_exit(aes_s390_fini);
+
+MODULE_ALIAS_CRYPTO("aes-all");
+
+MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm");
+MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(CRYPTO_INTERNAL);
diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c
new file mode 100644
index 0000000000..a8a2407381
--- /dev/null
+++ b/arch/s390/crypto/arch_random.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * s390 arch random implementation.
+ *
+ * Copyright IBM Corp. 2017, 2020
+ * Author(s): Harald Freudenberger
+ */
+
+#include <linux/kernel.h>
+#include <linux/atomic.h>
+#include <linux/random.h>
+#include <linux/static_key.h>
+#include <asm/archrandom.h>
+#include <asm/cpacf.h>
+
+DEFINE_STATIC_KEY_FALSE(s390_arch_random_available);
+
+atomic64_t s390_arch_random_counter = ATOMIC64_INIT(0);
+EXPORT_SYMBOL(s390_arch_random_counter);
diff --git a/arch/s390/crypto/chacha-glue.c b/arch/s390/crypto/chacha-glue.c
new file mode 100644
index 0000000000..5fae187f94
--- /dev/null
+++ b/arch/s390/crypto/chacha-glue.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * s390 ChaCha stream cipher.
+ *
+ * Copyright IBM Corp. 2021
+ */
+
+#define KMSG_COMPONENT "chacha_s390"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <crypto/internal/chacha.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/algapi.h>
+#include <linux/cpufeature.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sizes.h>
+#include <asm/fpu/api.h>
+#include "chacha-s390.h"
+
+static void chacha20_crypt_s390(u32 *state, u8 *dst, const u8 *src,
+				unsigned int nbytes, const u32 *key,
+				u32 *counter)
+{
+	struct kernel_fpu vxstate;
+
+	kernel_fpu_begin(&vxstate, KERNEL_VXR);
+	chacha20_vx(dst, src, nbytes, key, counter);
+	kernel_fpu_end(&vxstate, KERNEL_VXR);
+
+	*counter += round_up(nbytes, CHACHA_BLOCK_SIZE) / CHACHA_BLOCK_SIZE;
+}
+
+static int chacha20_s390(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+	u32 state[CHACHA_STATE_WORDS] __aligned(16);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
+	int rc;
+
+	rc = skcipher_walk_virt(&walk, req, false);
+	chacha_init_generic(state, ctx->key, req->iv);
+
+	while (walk.nbytes > 0) {
+		nbytes = walk.nbytes;
+		if (nbytes < walk.total)
+			nbytes = round_down(nbytes, walk.stride);
+
+		if (nbytes <= CHACHA_BLOCK_SIZE) {
+			chacha_crypt_generic(state, walk.dst.virt.addr,
+					     walk.src.virt.addr, nbytes,
+					     ctx->nrounds);
+		} else {
+			chacha20_crypt_s390(state, walk.dst.virt.addr,
+					    walk.src.virt.addr, nbytes,
+					    &state[4], &state[12]);
+		}
+		rc = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+	}
+	return rc;
+}
+
+void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
+{
+	/* TODO: implement hchacha_block_arch() in assembly */
+	hchacha_block_generic(state, stream, nrounds);
+}
+EXPORT_SYMBOL(hchacha_block_arch);
+
+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
+{
+	chacha_init_generic(state, key, iv);
+}
+EXPORT_SYMBOL(chacha_init_arch);
+
+void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src,
+		       unsigned int bytes, int nrounds)
+{
+	/* s390 chacha20 implementation has 20 rounds hard-coded,
+	 * it cannot handle a block of data or less, but otherwise
+	 * it can handle data of arbitrary size
+	 */
+	if (bytes <= CHACHA_BLOCK_SIZE || nrounds != 20 || !MACHINE_HAS_VX)
+		chacha_crypt_generic(state, dst, src, bytes, nrounds);
+	else
+		chacha20_crypt_s390(state, dst, src, bytes,
+				    &state[4], &state[12]);
+}
+EXPORT_SYMBOL(chacha_crypt_arch);
+
+static struct skcipher_alg chacha_algs[] = {
+	{
+		.base.cra_name		= "chacha20",
+		.base.cra_driver_name	= "chacha20-s390",
+		.base.cra_priority	= 900,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
+		.base.cra_module	= THIS_MODULE,
+
+		.min_keysize		= CHACHA_KEY_SIZE,
+		.max_keysize		= CHACHA_KEY_SIZE,
+		.ivsize			= CHACHA_IV_SIZE,
+		.chunksize		= CHACHA_BLOCK_SIZE,
+		.setkey			= chacha20_setkey,
+		.encrypt		= chacha20_s390,
+		.decrypt		= chacha20_s390,
+	}
+};
+
+static int __init chacha_mod_init(void)
+{
+	return IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) ?
+		crypto_register_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs)) : 0;
+}
+
+static void __exit chacha_mod_fini(void)
+{
+	if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER))
+		crypto_unregister_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs));
+}
+
+module_cpu_feature_match(S390_CPU_FEATURE_VXRS, chacha_mod_init);
+module_exit(chacha_mod_fini);
+
+MODULE_DESCRIPTION("ChaCha20 stream cipher");
+MODULE_LICENSE("GPL v2");
+
+MODULE_ALIAS_CRYPTO("chacha20");
diff --git a/arch/s390/crypto/chacha-s390.S b/arch/s390/crypto/chacha-s390.S
new file mode 100644
index 0000000000..37cb63f25b
--- /dev/null
+++ b/arch/s390/crypto/chacha-s390.S
@@ -0,0 +1,908 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Original implementation written by Andy Polyakov, @dot-asm.
+ * This is an adaptation of the original code for kernel use.
+ *
+ * Copyright (C) 2006-2019 CRYPTOGAMS by <appro@openssl.org>. All Rights Reserved.
+ */
+
+#include <linux/linkage.h>
+#include <asm/nospec-insn.h>
+#include <asm/vx-insn.h>
+
+#define SP	%r15
+#define FRAME	(16 * 8 + 4 * 8)
+
+	.data
+	.balign	32
+
+SYM_DATA_START_LOCAL(sigma)
+	.long	0x61707865,0x3320646e,0x79622d32,0x6b206574	# endian-neutral
+	.long	1,0,0,0
+	.long	2,0,0,0
+	.long	3,0,0,0
+	.long	0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c	# byte swap
+
+	.long	0,1,2,3
+	.long	0x61707865,0x61707865,0x61707865,0x61707865	# smashed sigma
+	.long	0x3320646e,0x3320646e,0x3320646e,0x3320646e
+	.long	0x79622d32,0x79622d32,0x79622d32,0x79622d32
+	.long	0x6b206574,0x6b206574,0x6b206574,0x6b206574
+SYM_DATA_END(sigma)
+
+	.previous
+
+	GEN_BR_THUNK %r14
+
+	.text
+
+#############################################################################
+# void chacha20_vx_4x(u8 *out, counst u8 *inp, size_t len,
+#		      counst u32 *key, const u32 *counter)
+
+#define	OUT		%r2
+#define	INP		%r3
+#define	LEN		%r4
+#define	KEY		%r5
+#define	COUNTER		%r6
+
+#define BEPERM		%v31
+#define CTR		%v26
+
+#define K0		%v16
+#define K1		%v17
+#define K2		%v18
+#define K3		%v19
+
+#define XA0		%v0
+#define XA1		%v1
+#define XA2		%v2
+#define XA3		%v3
+
+#define XB0		%v4
+#define XB1		%v5
+#define XB2		%v6
+#define XB3		%v7
+
+#define XC0		%v8
+#define XC1		%v9
+#define XC2		%v10
+#define XC3		%v11
+
+#define XD0		%v12
+#define XD1		%v13
+#define XD2		%v14
+#define XD3		%v15
+
+#define XT0		%v27
+#define XT1		%v28
+#define XT2		%v29
+#define XT3		%v30
+
+SYM_FUNC_START(chacha20_vx_4x)
+	stmg	%r6,%r7,6*8(SP)
+
+	larl	%r7,sigma
+	lhi	%r0,10
+	lhi	%r1,0
+
+	VL	K0,0,,%r7		# load sigma
+	VL	K1,0,,KEY		# load key
+	VL	K2,16,,KEY
+	VL	K3,0,,COUNTER		# load counter
+
+	VL	BEPERM,0x40,,%r7
+	VL	CTR,0x50,,%r7
+
+	VLM	XA0,XA3,0x60,%r7,4	# load [smashed] sigma
+
+	VREPF	XB0,K1,0		# smash the key
+	VREPF	XB1,K1,1
+	VREPF	XB2,K1,2
+	VREPF	XB3,K1,3
+
+	VREPF	XD0,K3,0
+	VREPF	XD1,K3,1
+	VREPF	XD2,K3,2
+	VREPF	XD3,K3,3
+	VAF	XD0,XD0,CTR
+
+	VREPF	XC0,K2,0
+	VREPF	XC1,K2,1
+	VREPF	XC2,K2,2
+	VREPF	XC3,K2,3
+
+.Loop_4x:
+	VAF	XA0,XA0,XB0
+	VX	XD0,XD0,XA0
+	VERLLF	XD0,XD0,16
+
+	VAF	XA1,XA1,XB1
+	VX	XD1,XD1,XA1
+	VERLLF	XD1,XD1,16
+
+	VAF	XA2,XA2,XB2
+	VX	XD2,XD2,XA2
+	VERLLF	XD2,XD2,16
+
+	VAF	XA3,XA3,XB3
+	VX	XD3,XD3,XA3
+	VERLLF	XD3,XD3,16
+
+	VAF	XC0,XC0,XD0
+	VX	XB0,XB0,XC0
+	VERLLF	XB0,XB0,12
+
+	VAF	XC1,XC1,XD1
+	VX	XB1,XB1,XC1
+	VERLLF	XB1,XB1,12
+
+	VAF	XC2,XC2,XD2
+	VX	XB2,XB2,XC2
+	VERLLF	XB2,XB2,12
+
+	VAF	XC3,XC3,XD3
+	VX	XB3,XB3,XC3
+	VERLLF	XB3,XB3,12
+
+	VAF	XA0,XA0,XB0
+	VX	XD0,XD0,XA0
+	VERLLF	XD0,XD0,8
+
+	VAF	XA1,XA1,XB1
+	VX	XD1,XD1,XA1
+	VERLLF	XD1,XD1,8
+
+	VAF	XA2,XA2,XB2
+	VX	XD2,XD2,XA2
+	VERLLF	XD2,XD2,8
+
+	VAF	XA3,XA3,XB3
+	VX	XD3,XD3,XA3
+	VERLLF	XD3,XD3,8
+
+	VAF	XC0,XC0,XD0
+	VX	XB0,XB0,XC0
+	VERLLF	XB0,XB0,7
+
+	VAF	XC1,XC1,XD1
+	VX	XB1,XB1,XC1
+	VERLLF	XB1,XB1,7
+
+	VAF	XC2,XC2,XD2
+	VX	XB2,XB2,XC2
+	VERLLF	XB2,XB2,7
+
+	VAF	XC3,XC3,XD3
+	VX	XB3,XB3,XC3
+	VERLLF	XB3,XB3,7
+
+	VAF	XA0,XA0,XB1
+	VX	XD3,XD3,XA0
+	VERLLF	XD3,XD3,16
+
+	VAF	XA1,XA1,XB2
+	VX	XD0,XD0,XA1
+	VERLLF	XD0,XD0,16
+
+	VAF	XA2,XA2,XB3
+	VX	XD1,XD1,XA2
+	VERLLF	XD1,XD1,16
+
+	VAF	XA3,XA3,XB0
+	VX	XD2,XD2,XA3
+	VERLLF	XD2,XD2,16
+
+	VAF	XC2,XC2,XD3
+	VX	XB1,XB1,XC2
+	VERLLF	XB1,XB1,12
+
+	VAF	XC3,XC3,XD0
+	VX	XB2,XB2,XC3
+	VERLLF	XB2,XB2,12
+
+	VAF	XC0,XC0,XD1
+	VX	XB3,XB3,XC0
+	VERLLF	XB3,XB3,12
+
+	VAF	XC1,XC1,XD2
+	VX	XB0,XB0,XC1
+	VERLLF	XB0,XB0,12
+
+	VAF	XA0,XA0,XB1
+	VX	XD3,XD3,XA0
+	VERLLF	XD3,XD3,8
+
+	VAF	XA1,XA1,XB2
+	VX	XD0,XD0,XA1
+	VERLLF	XD0,XD0,8
+
+	VAF	XA2,XA2,XB3
+	VX	XD1,XD1,XA2
+	VERLLF	XD1,XD1,8
+
+	VAF	XA3,XA3,XB0
+	VX	XD2,XD2,XA3
+	VERLLF	XD2,XD2,8
+
+	VAF	XC2,XC2,XD3
+	VX	XB1,XB1,XC2
+	VERLLF	XB1,XB1,7
+
+	VAF	XC3,XC3,XD0
+	VX	XB2,XB2,XC3
+	VERLLF	XB2,XB2,7
+
+	VAF	XC0,XC0,XD1
+	VX	XB3,XB3,XC0
+	VERLLF	XB3,XB3,7
+
+	VAF	XC1,XC1,XD2
+	VX	XB0,XB0,XC1
+	VERLLF	XB0,XB0,7
+	brct	%r0,.Loop_4x
+
+	VAF	XD0,XD0,CTR
+
+	VMRHF	XT0,XA0,XA1		# transpose data
+	VMRHF	XT1,XA2,XA3
+	VMRLF	XT2,XA0,XA1
+	VMRLF	XT3,XA2,XA3
+	VPDI	XA0,XT0,XT1,0b0000
+	VPDI	XA1,XT0,XT1,0b0101
+	VPDI	XA2,XT2,XT3,0b0000
+	VPDI	XA3,XT2,XT3,0b0101
+
+	VMRHF	XT0,XB0,XB1
+	VMRHF	XT1,XB2,XB3
+	VMRLF	XT2,XB0,XB1
+	VMRLF	XT3,XB2,XB3
+	VPDI	XB0,XT0,XT1,0b0000
+	VPDI	XB1,XT0,XT1,0b0101
+	VPDI	XB2,XT2,XT3,0b0000
+	VPDI	XB3,XT2,XT3,0b0101
+
+	VMRHF	XT0,XC0,XC1
+	VMRHF	XT1,XC2,XC3
+	VMRLF	XT2,XC0,XC1
+	VMRLF	XT3,XC2,XC3
+	VPDI	XC0,XT0,XT1,0b0000
+	VPDI	XC1,XT0,XT1,0b0101
+	VPDI	XC2,XT2,XT3,0b0000
+	VPDI	XC3,XT2,XT3,0b0101
+
+	VMRHF	XT0,XD0,XD1
+	VMRHF	XT1,XD2,XD3
+	VMRLF	XT2,XD0,XD1
+	VMRLF	XT3,XD2,XD3
+	VPDI	XD0,XT0,XT1,0b0000
+	VPDI	XD1,XT0,XT1,0b0101
+	VPDI	XD2,XT2,XT3,0b0000
+	VPDI	XD3,XT2,XT3,0b0101
+
+	VAF	XA0,XA0,K0
+	VAF	XB0,XB0,K1
+	VAF	XC0,XC0,K2
+	VAF	XD0,XD0,K3
+
+	VPERM	XA0,XA0,XA0,BEPERM
+	VPERM	XB0,XB0,XB0,BEPERM
+	VPERM	XC0,XC0,XC0,BEPERM
+	VPERM	XD0,XD0,XD0,BEPERM
+
+	VLM	XT0,XT3,0,INP,0
+
+	VX	XT0,XT0,XA0
+	VX	XT1,XT1,XB0
+	VX	XT2,XT2,XC0
+	VX	XT3,XT3,XD0
+
+	VSTM	XT0,XT3,0,OUT,0
+
+	la	INP,0x40(INP)
+	la	OUT,0x40(OUT)
+	aghi	LEN,-0x40
+
+	VAF	XA0,XA1,K0
+	VAF	XB0,XB1,K1
+	VAF	XC0,XC1,K2
+	VAF	XD0,XD1,K3
+
+	VPERM	XA0,XA0,XA0,BEPERM
+	VPERM	XB0,XB0,XB0,BEPERM
+	VPERM	XC0,XC0,XC0,BEPERM
+	VPERM	XD0,XD0,XD0,BEPERM
+
+	clgfi	LEN,0x40
+	jl	.Ltail_4x
+
+	VLM	XT0,XT3,0,INP,0
+
+	VX	XT0,XT0,XA0
+	VX	XT1,XT1,XB0
+	VX	XT2,XT2,XC0
+	VX	XT3,XT3,XD0
+
+	VSTM	XT0,XT3,0,OUT,0
+
+	la	INP,0x40(INP)
+	la	OUT,0x40(OUT)
+	aghi	LEN,-0x40
+	je	.Ldone_4x
+
+	VAF	XA0,XA2,K0
+	VAF	XB0,XB2,K1
+	VAF	XC0,XC2,K2
+	VAF	XD0,XD2,K3
+
+	VPERM	XA0,XA0,XA0,BEPERM
+	VPERM	XB0,XB0,XB0,BEPERM
+	VPERM	XC0,XC0,XC0,BEPERM
+	VPERM	XD0,XD0,XD0,BEPERM
+
+	clgfi	LEN,0x40
+	jl	.Ltail_4x
+
+	VLM	XT0,XT3,0,INP,0
+
+	VX	XT0,XT0,XA0
+	VX	XT1,XT1,XB0
+	VX	XT2,XT2,XC0
+	VX	XT3,XT3,XD0
+
+	VSTM	XT0,XT3,0,OUT,0
+
+	la	INP,0x40(INP)
+	la	OUT,0x40(OUT)
+	aghi	LEN,-0x40
+	je	.Ldone_4x
+
+	VAF	XA0,XA3,K0
+	VAF	XB0,XB3,K1
+	VAF	XC0,XC3,K2
+	VAF	XD0,XD3,K3
+
+	VPERM	XA0,XA0,XA0,BEPERM
+	VPERM	XB0,XB0,XB0,BEPERM
+	VPERM	XC0,XC0,XC0,BEPERM
+	VPERM	XD0,XD0,XD0,BEPERM
+
+	clgfi	LEN,0x40
+	jl	.Ltail_4x
+
+	VLM	XT0,XT3,0,INP,0
+
+	VX	XT0,XT0,XA0
+	VX	XT1,XT1,XB0
+	VX	XT2,XT2,XC0
+	VX	XT3,XT3,XD0
+
+	VSTM	XT0,XT3,0,OUT,0
+
+.Ldone_4x:
+	lmg	%r6,%r7,6*8(SP)
+	BR_EX	%r14
+
+.Ltail_4x:
+	VLR	XT0,XC0
+	VLR	XT1,XD0
+
+	VST	XA0,8*8+0x00,,SP
+	VST	XB0,8*8+0x10,,SP
+	VST	XT0,8*8+0x20,,SP
+	VST	XT1,8*8+0x30,,SP
+
+	lghi	%r1,0
+
+.Loop_tail_4x:
+	llgc	%r5,0(%r1,INP)
+	llgc	%r6,8*8(%r1,SP)
+	xr	%r6,%r5
+	stc	%r6,0(%r1,OUT)
+	la	%r1,1(%r1)
+	brct	LEN,.Loop_tail_4x
+
+	lmg	%r6,%r7,6*8(SP)
+	BR_EX	%r14
+SYM_FUNC_END(chacha20_vx_4x)
+
+#undef	OUT
+#undef	INP
+#undef	LEN
+#undef	KEY
+#undef	COUNTER
+
+#undef BEPERM
+
+#undef K0
+#undef K1
+#undef K2
+#undef K3
+
+
+#############################################################################
+# void chacha20_vx(u8 *out, counst u8 *inp, size_t len,
+#		   counst u32 *key, const u32 *counter)
+
+#define	OUT		%r2
+#define	INP		%r3
+#define	LEN		%r4
+#define	KEY		%r5
+#define	COUNTER		%r6
+
+#define BEPERM		%v31
+
+#define K0		%v27
+#define K1		%v24
+#define K2		%v25
+#define K3		%v26
+
+#define A0		%v0
+#define B0		%v1
+#define C0		%v2
+#define D0		%v3
+
+#define A1		%v4
+#define B1		%v5
+#define C1		%v6
+#define D1		%v7
+
+#define A2		%v8
+#define B2		%v9
+#define C2		%v10
+#define D2		%v11
+
+#define A3		%v12
+#define B3		%v13
+#define C3		%v14
+#define D3		%v15
+
+#define A4		%v16
+#define B4		%v17
+#define C4		%v18
+#define D4		%v19
+
+#define A5		%v20
+#define B5		%v21
+#define C5		%v22
+#define D5		%v23
+
+#define T0		%v27
+#define T1		%v28
+#define T2		%v29
+#define T3		%v30
+
+SYM_FUNC_START(chacha20_vx)
+	clgfi	LEN,256
+	jle	chacha20_vx_4x
+	stmg	%r6,%r7,6*8(SP)
+
+	lghi	%r1,-FRAME
+	lgr	%r0,SP
+	la	SP,0(%r1,SP)
+	stg	%r0,0(SP)		# back-chain
+
+	larl	%r7,sigma
+	lhi	%r0,10
+
+	VLM	K1,K2,0,KEY,0		# load key
+	VL	K3,0,,COUNTER		# load counter
+
+	VLM	K0,BEPERM,0,%r7,4	# load sigma, increments, ...
+
+.Loop_outer_vx:
+	VLR	A0,K0
+	VLR	B0,K1
+	VLR	A1,K0
+	VLR	B1,K1
+	VLR	A2,K0
+	VLR	B2,K1
+	VLR	A3,K0
+	VLR	B3,K1
+	VLR	A4,K0
+	VLR	B4,K1
+	VLR	A5,K0
+	VLR	B5,K1
+
+	VLR	D0,K3
+	VAF	D1,K3,T1		# K[3]+1
+	VAF	D2,K3,T2		# K[3]+2
+	VAF	D3,K3,T3		# K[3]+3
+	VAF	D4,D2,T2		# K[3]+4
+	VAF	D5,D2,T3		# K[3]+5
+
+	VLR	C0,K2
+	VLR	C1,K2
+	VLR	C2,K2
+	VLR	C3,K2
+	VLR	C4,K2
+	VLR	C5,K2
+
+	VLR	T1,D1
+	VLR	T2,D2
+	VLR	T3,D3
+
+.Loop_vx:
+	VAF	A0,A0,B0
+	VAF	A1,A1,B1
+	VAF	A2,A2,B2
+	VAF	A3,A3,B3
+	VAF	A4,A4,B4
+	VAF	A5,A5,B5
+	VX	D0,D0,A0
+	VX	D1,D1,A1
+	VX	D2,D2,A2
+	VX	D3,D3,A3
+	VX	D4,D4,A4
+	VX	D5,D5,A5
+	VERLLF	D0,D0,16
+	VERLLF	D1,D1,16
+	VERLLF	D2,D2,16
+	VERLLF	D3,D3,16
+	VERLLF	D4,D4,16
+	VERLLF	D5,D5,16
+
+	VAF	C0,C0,D0
+	VAF	C1,C1,D1
+	VAF	C2,C2,D2
+	VAF	C3,C3,D3
+	VAF	C4,C4,D4
+	VAF	C5,C5,D5
+	VX	B0,B0,C0
+	VX	B1,B1,C1
+	VX	B2,B2,C2
+	VX	B3,B3,C3
+	VX	B4,B4,C4
+	VX	B5,B5,C5
+	VERLLF	B0,B0,12
+	VERLLF	B1,B1,12
+	VERLLF	B2,B2,12
+	VERLLF	B3,B3,12
+	VERLLF	B4,B4,12
+	VERLLF	B5,B5,12
+
+	VAF	A0,A0,B0
+	VAF	A1,A1,B1
+	VAF	A2,A2,B2
+	VAF	A3,A3,B3
+	VAF	A4,A4,B4
+	VAF	A5,A5,B5
+	VX	D0,D0,A0
+	VX	D1,D1,A1
+	VX	D2,D2,A2
+	VX	D3,D3,A3
+	VX	D4,D4,A4
+	VX	D5,D5,A5
+	VERLLF	D0,D0,8
+	VERLLF	D1,D1,8
+	VERLLF	D2,D2,8
+	VERLLF	D3,D3,8
+	VERLLF	D4,D4,8
+	VERLLF	D5,D5,8
+
+	VAF	C0,C0,D0
+	VAF	C1,C1,D1
+	VAF	C2,C2,D2
+	VAF	C3,C3,D3
+	VAF	C4,C4,D4
+	VAF	C5,C5,D5
+	VX	B0,B0,C0
+	VX	B1,B1,C1
+	VX	B2,B2,C2
+	VX	B3,B3,C3
+	VX	B4,B4,C4
+	VX	B5,B5,C5
+	VERLLF	B0,B0,7
+	VERLLF	B1,B1,7
+	VERLLF	B2,B2,7
+	VERLLF	B3,B3,7
+	VERLLF	B4,B4,7
+	VERLLF	B5,B5,7
+
+	VSLDB	C0,C0,C0,8
+	VSLDB	C1,C1,C1,8
+	VSLDB	C2,C2,C2,8
+	VSLDB	C3,C3,C3,8
+	VSLDB	C4,C4,C4,8
+	VSLDB	C5,C5,C5,8
+	VSLDB	B0,B0,B0,4
+	VSLDB	B1,B1,B1,4
+	VSLDB	B2,B2,B2,4
+	VSLDB	B3,B3,B3,4
+	VSLDB	B4,B4,B4,4
+	VSLDB	B5,B5,B5,4
+	VSLDB	D0,D0,D0,12
+	VSLDB	D1,D1,D1,12
+	VSLDB	D2,D2,D2,12
+	VSLDB	D3,D3,D3,12
+	VSLDB	D4,D4,D4,12
+	VSLDB	D5,D5,D5,12
+
+	VAF	A0,A0,B0
+	VAF	A1,A1,B1
+	VAF	A2,A2,B2
+	VAF	A3,A3,B3
+	VAF	A4,A4,B4
+	VAF	A5,A5,B5
+	VX	D0,D0,A0
+	VX	D1,D1,A1
+	VX	D2,D2,A2
+	VX	D3,D3,A3
+	VX	D4,D4,A4
+	VX	D5,D5,A5
+	VERLLF	D0,D0,16
+	VERLLF	D1,D1,16
+	VERLLF	D2,D2,16
+	VERLLF	D3,D3,16
+	VERLLF	D4,D4,16
+	VERLLF	D5,D5,16
+
+	VAF	C0,C0,D0
+	VAF	C1,C1,D1
+	VAF	C2,C2,D2
+	VAF	C3,C3,D3
+	VAF	C4,C4,D4
+	VAF	C5,C5,D5
+	VX	B0,B0,C0
+	VX	B1,B1,C1
+	VX	B2,B2,C2
+	VX	B3,B3,C3
+	VX	B4,B4,C4
+	VX	B5,B5,C5
+	VERLLF	B0,B0,12
+	VERLLF	B1,B1,12
+	VERLLF	B2,B2,12
+	VERLLF	B3,B3,12
+	VERLLF	B4,B4,12
+	VERLLF	B5,B5,12
+
+	VAF	A0,A0,B0
+	VAF	A1,A1,B1
+	VAF	A2,A2,B2
+	VAF	A3,A3,B3
+	VAF	A4,A4,B4
+	VAF	A5,A5,B5
+	VX	D0,D0,A0
+	VX	D1,D1,A1
+	VX	D2,D2,A2
+	VX	D3,D3,A3
+	VX	D4,D4,A4
+	VX	D5,D5,A5
+	VERLLF	D0,D0,8
+	VERLLF	D1,D1,8
+	VERLLF	D2,D2,8
+	VERLLF	D3,D3,8
+	VERLLF	D4,D4,8
+	VERLLF	D5,D5,8
+
+	VAF	C0,C0,D0
+	VAF	C1,C1,D1
+	VAF	C2,C2,D2
+	VAF	C3,C3,D3
+	VAF	C4,C4,D4
+	VAF	C5,C5,D5
+	VX	B0,B0,C0
+	VX	B1,B1,C1
+	VX	B2,B2,C2
+	VX	B3,B3,C3
+	VX	B4,B4,C4
+	VX	B5,B5,C5
+	VERLLF	B0,B0,7
+	VERLLF	B1,B1,7
+	VERLLF	B2,B2,7
+	VERLLF	B3,B3,7
+	VERLLF	B4,B4,7
+	VERLLF	B5,B5,7
+
+	VSLDB	C0,C0,C0,8
+	VSLDB	C1,C1,C1,8
+	VSLDB	C2,C2,C2,8
+	VSLDB	C3,C3,C3,8
+	VSLDB	C4,C4,C4,8
+	VSLDB	C5,C5,C5,8
+	VSLDB	B0,B0,B0,12
+	VSLDB	B1,B1,B1,12
+	VSLDB	B2,B2,B2,12
+	VSLDB	B3,B3,B3,12
+	VSLDB	B4,B4,B4,12
+	VSLDB	B5,B5,B5,12
+	VSLDB	D0,D0,D0,4
+	VSLDB	D1,D1,D1,4
+	VSLDB	D2,D2,D2,4
+	VSLDB	D3,D3,D3,4
+	VSLDB	D4,D4,D4,4
+	VSLDB	D5,D5,D5,4
+	brct	%r0,.Loop_vx
+
+	VAF	A0,A0,K0
+	VAF	B0,B0,K1
+	VAF	C0,C0,K2
+	VAF	D0,D0,K3
+	VAF	A1,A1,K0
+	VAF	D1,D1,T1		# +K[3]+1
+
+	VPERM	A0,A0,A0,BEPERM
+	VPERM	B0,B0,B0,BEPERM
+	VPERM	C0,C0,C0,BEPERM
+	VPERM	D0,D0,D0,BEPERM
+
+	clgfi	LEN,0x40
+	jl	.Ltail_vx
+
+	VAF	D2,D2,T2		# +K[3]+2
+	VAF	D3,D3,T3		# +K[3]+3
+	VLM	T0,T3,0,INP,0
+
+	VX	A0,A0,T0
+	VX	B0,B0,T1
+	VX	C0,C0,T2
+	VX	D0,D0,T3
+
+	VLM	K0,T3,0,%r7,4		# re-load sigma and increments
+
+	VSTM	A0,D0,0,OUT,0
+
+	la	INP,0x40(INP)
+	la	OUT,0x40(OUT)
+	aghi	LEN,-0x40
+	je	.Ldone_vx
+
+	VAF	B1,B1,K1
+	VAF	C1,C1,K2
+
+	VPERM	A0,A1,A1,BEPERM
+	VPERM	B0,B1,B1,BEPERM
+	VPERM	C0,C1,C1,BEPERM
+	VPERM	D0,D1,D1,BEPERM
+
+	clgfi	LEN,0x40
+	jl	.Ltail_vx
+
+	VLM	A1,D1,0,INP,0
+
+	VX	A0,A0,A1
+	VX	B0,B0,B1
+	VX	C0,C0,C1
+	VX	D0,D0,D1
+
+	VSTM	A0,D0,0,OUT,0
+
+	la	INP,0x40(INP)
+	la	OUT,0x40(OUT)
+	aghi	LEN,-0x40
+	je	.Ldone_vx
+
+	VAF	A2,A2,K0
+	VAF	B2,B2,K1
+	VAF	C2,C2,K2
+
+	VPERM	A0,A2,A2,BEPERM
+	VPERM	B0,B2,B2,BEPERM
+	VPERM	C0,C2,C2,BEPERM
+	VPERM	D0,D2,D2,BEPERM
+
+	clgfi	LEN,0x40
+	jl	.Ltail_vx
+
+	VLM	A1,D1,0,INP,0
+
+	VX	A0,A0,A1
+	VX	B0,B0,B1
+	VX	C0,C0,C1
+	VX	D0,D0,D1
+
+	VSTM	A0,D0,0,OUT,0
+
+	la	INP,0x40(INP)
+	la	OUT,0x40(OUT)
+	aghi	LEN,-0x40
+	je	.Ldone_vx
+
+	VAF	A3,A3,K0
+	VAF	B3,B3,K1
+	VAF	C3,C3,K2
+	VAF	D2,K3,T3		# K[3]+3
+
+	VPERM	A0,A3,A3,BEPERM
+	VPERM	B0,B3,B3,BEPERM
+	VPERM	C0,C3,C3,BEPERM
+	VPERM	D0,D3,D3,BEPERM
+
+	clgfi	LEN,0x40
+	jl	.Ltail_vx
+
+	VAF	D3,D2,T1		# K[3]+4
+	VLM	A1,D1,0,INP,0
+
+	VX	A0,A0,A1
+	VX	B0,B0,B1
+	VX	C0,C0,C1
+	VX	D0,D0,D1
+
+	VSTM	A0,D0,0,OUT,0
+
+	la	INP,0x40(INP)
+	la	OUT,0x40(OUT)
+	aghi	LEN,-0x40
+	je	.Ldone_vx
+
+	VAF	A4,A4,K0
+	VAF	B4,B4,K1
+	VAF	C4,C4,K2
+	VAF	D4,D4,D3		# +K[3]+4
+	VAF	D3,D3,T1		# K[3]+5
+	VAF	K3,D2,T3		# K[3]+=6
+
+	VPERM	A0,A4,A4,BEPERM
+	VPERM	B0,B4,B4,BEPERM
+	VPERM	C0,C4,C4,BEPERM
+	VPERM	D0,D4,D4,BEPERM
+
+	clgfi	LEN,0x40
+	jl	.Ltail_vx
+
+	VLM	A1,D1,0,INP,0
+
+	VX	A0,A0,A1
+	VX	B0,B0,B1
+	VX	C0,C0,C1
+	VX	D0,D0,D1
+
+	VSTM	A0,D0,0,OUT,0
+
+	la	INP,0x40(INP)
+	la	OUT,0x40(OUT)
+	aghi	LEN,-0x40
+	je	.Ldone_vx
+
+	VAF	A5,A5,K0
+	VAF	B5,B5,K1
+	VAF	C5,C5,K2
+	VAF	D5,D5,D3		# +K[3]+5
+
+	VPERM	A0,A5,A5,BEPERM
+	VPERM	B0,B5,B5,BEPERM
+	VPERM	C0,C5,C5,BEPERM
+	VPERM	D0,D5,D5,BEPERM
+
+	clgfi	LEN,0x40
+	jl	.Ltail_vx
+
+	VLM	A1,D1,0,INP,0
+
+	VX	A0,A0,A1
+	VX	B0,B0,B1
+	VX	C0,C0,C1
+	VX	D0,D0,D1
+
+	VSTM	A0,D0,0,OUT,0
+
+	la	INP,0x40(INP)
+	la	OUT,0x40(OUT)
+	lhi	%r0,10
+	aghi	LEN,-0x40
+	jne	.Loop_outer_vx
+
+.Ldone_vx:
+	lmg	%r6,%r7,FRAME+6*8(SP)
+	la	SP,FRAME(SP)
+	BR_EX	%r14
+
+.Ltail_vx:
+	VSTM	A0,D0,8*8,SP,3
+	lghi	%r1,0
+
+.Loop_tail_vx:
+	llgc	%r5,0(%r1,INP)
+	llgc	%r6,8*8(%r1,SP)
+	xr	%r6,%r5
+	stc	%r6,0(%r1,OUT)
+	la	%r1,1(%r1)
+	brct	LEN,.Loop_tail_vx
+
+	lmg	%r6,%r7,FRAME+6*8(SP)
+	la	SP,FRAME(SP)
+	BR_EX	%r14
+SYM_FUNC_END(chacha20_vx)
+
+.previous
diff --git a/arch/s390/crypto/chacha-s390.h b/arch/s390/crypto/chacha-s390.h
new file mode 100644
index 0000000000..733744ce30
--- /dev/null
+++ b/arch/s390/crypto/chacha-s390.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * s390 ChaCha stream cipher.
+ *
+ * Copyright IBM Corp. 2021
+ */
+
+#ifndef _CHACHA_S390_H
+#define _CHACHA_S390_H
+
+void chacha20_vx(u8 *out, const u8 *inp, size_t len, const u32 *key,
+		 const u32 *counter);
+
+#endif /* _CHACHA_S390_H */
diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c
new file mode 100644
index 0000000000..017143e9ce
--- /dev/null
+++ b/arch/s390/crypto/crc32-vx.c
@@ -0,0 +1,310 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Crypto-API module for CRC-32 algorithms implemented with the
+ * z/Architecture Vector Extension Facility.
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+#define KMSG_COMPONENT	"crc32-vx"
+#define pr_fmt(fmt)	KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+#include <linux/crc32.h>
+#include <crypto/internal/hash.h>
+#include <asm/fpu/api.h>
+
+
+#define CRC32_BLOCK_SIZE	1
+#define CRC32_DIGEST_SIZE	4
+
+#define VX_MIN_LEN		64
+#define VX_ALIGNMENT		16L
+#define VX_ALIGN_MASK		(VX_ALIGNMENT - 1)
+
+struct crc_ctx {
+	u32 key;
+};
+
+struct crc_desc_ctx {
+	u32 crc;
+};
+
+/* Prototypes for functions in assembly files */
+u32 crc32_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
+u32 crc32_be_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
+u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
+
+/*
+ * DEFINE_CRC32_VX() - Define a CRC-32 function using the vector extension
+ *
+ * Creates a function to perform a particular CRC-32 computation. Depending
+ * on the message buffer, the hardware-accelerated or software implementation
+ * is used.   Note that the message buffer is aligned to improve fetch
+ * operations of VECTOR LOAD MULTIPLE instructions.
+ *
+ */
+#define DEFINE_CRC32_VX(___fname, ___crc32_vx, ___crc32_sw)		    \
+	static u32 __pure ___fname(u32 crc,				    \
+				unsigned char const *data, size_t datalen)  \
+	{								    \
+		struct kernel_fpu vxstate;				    \
+		unsigned long prealign, aligned, remaining;		    \
+									    \
+		if (datalen < VX_MIN_LEN + VX_ALIGN_MASK)		    \
+			return ___crc32_sw(crc, data, datalen);		    \
+									    \
+		if ((unsigned long)data & VX_ALIGN_MASK) {		    \
+			prealign = VX_ALIGNMENT -			    \
+				  ((unsigned long)data & VX_ALIGN_MASK);    \
+			datalen -= prealign;				    \
+			crc = ___crc32_sw(crc, data, prealign);		    \
+			data = (void *)((unsigned long)data + prealign);    \
+		}							    \
+									    \
+		aligned = datalen & ~VX_ALIGN_MASK;			    \
+		remaining = datalen & VX_ALIGN_MASK;			    \
+									    \
+		kernel_fpu_begin(&vxstate, KERNEL_VXR_LOW);		    \
+		crc = ___crc32_vx(crc, data, aligned);			    \
+		kernel_fpu_end(&vxstate, KERNEL_VXR_LOW);		    \
+									    \
+		if (remaining)						    \
+			crc = ___crc32_sw(crc, data + aligned, remaining);  \
+									    \
+		return crc;						    \
+	}
+
+DEFINE_CRC32_VX(crc32_le_vx, crc32_le_vgfm_16, crc32_le)
+DEFINE_CRC32_VX(crc32_be_vx, crc32_be_vgfm_16, crc32_be)
+DEFINE_CRC32_VX(crc32c_le_vx, crc32c_le_vgfm_16, __crc32c_le)
+
+
+static int crc32_vx_cra_init_zero(struct crypto_tfm *tfm)
+{
+	struct crc_ctx *mctx = crypto_tfm_ctx(tfm);
+
+	mctx->key = 0;
+	return 0;
+}
+
+static int crc32_vx_cra_init_invert(struct crypto_tfm *tfm)
+{
+	struct crc_ctx *mctx = crypto_tfm_ctx(tfm);
+
+	mctx->key = ~0;
+	return 0;
+}
+
+static int crc32_vx_init(struct shash_desc *desc)
+{
+	struct crc_ctx *mctx = crypto_shash_ctx(desc->tfm);
+	struct crc_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	ctx->crc = mctx->key;
+	return 0;
+}
+
+static int crc32_vx_setkey(struct crypto_shash *tfm, const u8 *newkey,
+			   unsigned int newkeylen)
+{
+	struct crc_ctx *mctx = crypto_shash_ctx(tfm);
+
+	if (newkeylen != sizeof(mctx->key))
+		return -EINVAL;
+	mctx->key = le32_to_cpu(*(__le32 *)newkey);
+	return 0;
+}
+
+static int crc32be_vx_setkey(struct crypto_shash *tfm, const u8 *newkey,
+			     unsigned int newkeylen)
+{
+	struct crc_ctx *mctx = crypto_shash_ctx(tfm);
+
+	if (newkeylen != sizeof(mctx->key))
+		return -EINVAL;
+	mctx->key = be32_to_cpu(*(__be32 *)newkey);
+	return 0;
+}
+
+static int crc32le_vx_final(struct shash_desc *desc, u8 *out)
+{
+	struct crc_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	*(__le32 *)out = cpu_to_le32p(&ctx->crc);
+	return 0;
+}
+
+static int crc32be_vx_final(struct shash_desc *desc, u8 *out)
+{
+	struct crc_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	*(__be32 *)out = cpu_to_be32p(&ctx->crc);
+	return 0;
+}
+
+static int crc32c_vx_final(struct shash_desc *desc, u8 *out)
+{
+	struct crc_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	/*
+	 * Perform a final XOR with 0xFFFFFFFF to be in sync
+	 * with the generic crc32c shash implementation.
+	 */
+	*(__le32 *)out = ~cpu_to_le32p(&ctx->crc);
+	return 0;
+}
+
+static int __crc32le_vx_finup(u32 *crc, const u8 *data, unsigned int len,
+			      u8 *out)
+{
+	*(__le32 *)out = cpu_to_le32(crc32_le_vx(*crc, data, len));
+	return 0;
+}
+
+static int __crc32be_vx_finup(u32 *crc, const u8 *data, unsigned int len,
+			      u8 *out)
+{
+	*(__be32 *)out = cpu_to_be32(crc32_be_vx(*crc, data, len));
+	return 0;
+}
+
+static int __crc32c_vx_finup(u32 *crc, const u8 *data, unsigned int len,
+			     u8 *out)
+{
+	/*
+	 * Perform a final XOR with 0xFFFFFFFF to be in sync
+	 * with the generic crc32c shash implementation.
+	 */
+	*(__le32 *)out = ~cpu_to_le32(crc32c_le_vx(*crc, data, len));
+	return 0;
+}
+
+
+#define CRC32_VX_FINUP(alg, func)					      \
+	static int alg ## _vx_finup(struct shash_desc *desc, const u8 *data,  \
+				   unsigned int datalen, u8 *out)	      \
+	{								      \
+		return __ ## alg ## _vx_finup(shash_desc_ctx(desc),	      \
+					      data, datalen, out);	      \
+	}
+
+CRC32_VX_FINUP(crc32le, crc32_le_vx)
+CRC32_VX_FINUP(crc32be, crc32_be_vx)
+CRC32_VX_FINUP(crc32c, crc32c_le_vx)
+
+#define CRC32_VX_DIGEST(alg, func)					      \
+	static int alg ## _vx_digest(struct shash_desc *desc, const u8 *data, \
+				     unsigned int len, u8 *out)		      \
+	{								      \
+		return __ ## alg ## _vx_finup(crypto_shash_ctx(desc->tfm),    \
+					      data, len, out);		      \
+	}
+
+CRC32_VX_DIGEST(crc32le, crc32_le_vx)
+CRC32_VX_DIGEST(crc32be, crc32_be_vx)
+CRC32_VX_DIGEST(crc32c, crc32c_le_vx)
+
+#define CRC32_VX_UPDATE(alg, func)					      \
+	static int alg ## _vx_update(struct shash_desc *desc, const u8 *data, \
+				     unsigned int datalen)		      \
+	{								      \
+		struct crc_desc_ctx *ctx = shash_desc_ctx(desc);	      \
+		ctx->crc = func(ctx->crc, data, datalen);		      \
+		return 0;						      \
+	}
+
+CRC32_VX_UPDATE(crc32le, crc32_le_vx)
+CRC32_VX_UPDATE(crc32be, crc32_be_vx)
+CRC32_VX_UPDATE(crc32c, crc32c_le_vx)
+
+
+static struct shash_alg crc32_vx_algs[] = {
+	/* CRC-32 LE */
+	{
+		.init		=	crc32_vx_init,
+		.setkey		=	crc32_vx_setkey,
+		.update		=	crc32le_vx_update,
+		.final		=	crc32le_vx_final,
+		.finup		=	crc32le_vx_finup,
+		.digest		=	crc32le_vx_digest,
+		.descsize	=	sizeof(struct crc_desc_ctx),
+		.digestsize	=	CRC32_DIGEST_SIZE,
+		.base		=	{
+			.cra_name	 = "crc32",
+			.cra_driver_name = "crc32-vx",
+			.cra_priority	 = 200,
+			.cra_flags	 = CRYPTO_ALG_OPTIONAL_KEY,
+			.cra_blocksize	 = CRC32_BLOCK_SIZE,
+			.cra_ctxsize	 = sizeof(struct crc_ctx),
+			.cra_module	 = THIS_MODULE,
+			.cra_init	 = crc32_vx_cra_init_zero,
+		},
+	},
+	/* CRC-32 BE */
+	{
+		.init		=	crc32_vx_init,
+		.setkey		=	crc32be_vx_setkey,
+		.update		=	crc32be_vx_update,
+		.final		=	crc32be_vx_final,
+		.finup		=	crc32be_vx_finup,
+		.digest		=	crc32be_vx_digest,
+		.descsize	=	sizeof(struct crc_desc_ctx),
+		.digestsize	=	CRC32_DIGEST_SIZE,
+		.base		=	{
+			.cra_name	 = "crc32be",
+			.cra_driver_name = "crc32be-vx",
+			.cra_priority	 = 200,
+			.cra_flags	 = CRYPTO_ALG_OPTIONAL_KEY,
+			.cra_blocksize	 = CRC32_BLOCK_SIZE,
+			.cra_ctxsize	 = sizeof(struct crc_ctx),
+			.cra_module	 = THIS_MODULE,
+			.cra_init	 = crc32_vx_cra_init_zero,
+		},
+	},
+	/* CRC-32C LE */
+	{
+		.init		=	crc32_vx_init,
+		.setkey		=	crc32_vx_setkey,
+		.update		=	crc32c_vx_update,
+		.final		=	crc32c_vx_final,
+		.finup		=	crc32c_vx_finup,
+		.digest		=	crc32c_vx_digest,
+		.descsize	=	sizeof(struct crc_desc_ctx),
+		.digestsize	=	CRC32_DIGEST_SIZE,
+		.base		=	{
+			.cra_name	 = "crc32c",
+			.cra_driver_name = "crc32c-vx",
+			.cra_priority	 = 200,
+			.cra_flags	 = CRYPTO_ALG_OPTIONAL_KEY,
+			.cra_blocksize	 = CRC32_BLOCK_SIZE,
+			.cra_ctxsize	 = sizeof(struct crc_ctx),
+			.cra_module	 = THIS_MODULE,
+			.cra_init	 = crc32_vx_cra_init_invert,
+		},
+	},
+};
+
+
+static int __init crc_vx_mod_init(void)
+{
+	return crypto_register_shashes(crc32_vx_algs,
+				       ARRAY_SIZE(crc32_vx_algs));
+}
+
+static void __exit crc_vx_mod_exit(void)
+{
+	crypto_unregister_shashes(crc32_vx_algs, ARRAY_SIZE(crc32_vx_algs));
+}
+
+module_cpu_feature_match(S390_CPU_FEATURE_VXRS, crc_vx_mod_init);
+module_exit(crc_vx_mod_exit);
+
+MODULE_AUTHOR("Hendrik Brueckner <brueckner@linux.vnet.ibm.com>");
+MODULE_LICENSE("GPL");
+
+MODULE_ALIAS_CRYPTO("crc32");
+MODULE_ALIAS_CRYPTO("crc32-vx");
+MODULE_ALIAS_CRYPTO("crc32c");
+MODULE_ALIAS_CRYPTO("crc32c-vx");
diff --git a/arch/s390/crypto/crc32be-vx.S b/arch/s390/crypto/crc32be-vx.S
new file mode 100644
index 0000000000..34ee479268
--- /dev/null
+++ b/arch/s390/crypto/crc32be-vx.S
@@ -0,0 +1,213 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Hardware-accelerated CRC-32 variants for Linux on z Systems
+ *
+ * Use the z/Architecture Vector Extension Facility to accelerate the
+ * computing of CRC-32 checksums.
+ *
+ * This CRC-32 implementation algorithm processes the most-significant
+ * bit first (BE).
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#include <linux/linkage.h>
+#include <asm/nospec-insn.h>
+#include <asm/vx-insn.h>
+
+/* Vector register range containing CRC-32 constants */
+#define CONST_R1R2		%v9
+#define CONST_R3R4		%v10
+#define CONST_R5		%v11
+#define CONST_R6		%v12
+#define CONST_RU_POLY		%v13
+#define CONST_CRC_POLY		%v14
+
+	.data
+	.balign	8
+
+/*
+ * The CRC-32 constant block contains reduction constants to fold and
+ * process particular chunks of the input data stream in parallel.
+ *
+ * For the CRC-32 variants, the constants are precomputed according to
+ * these definitions:
+ *
+ *	R1 = x4*128+64 mod P(x)
+ *	R2 = x4*128    mod P(x)
+ *	R3 = x128+64   mod P(x)
+ *	R4 = x128      mod P(x)
+ *	R5 = x96       mod P(x)
+ *	R6 = x64       mod P(x)
+ *
+ *	Barret reduction constant, u, is defined as floor(x**64 / P(x)).
+ *
+ *	where P(x) is the polynomial in the normal domain and the P'(x) is the
+ *	polynomial in the reversed (bitreflected) domain.
+ *
+ * Note that the constant definitions below are extended in order to compute
+ * intermediate results with a single VECTOR GALOIS FIELD MULTIPLY instruction.
+ * The rightmost doubleword can be 0 to prevent contribution to the result or
+ * can be multiplied by 1 to perform an XOR without the need for a separate
+ * VECTOR EXCLUSIVE OR instruction.
+ *
+ * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials:
+ *
+ *	P(x)  = 0x04C11DB7
+ *	P'(x) = 0xEDB88320
+ */
+
+SYM_DATA_START_LOCAL(constants_CRC_32_BE)
+	.quad		0x08833794c, 0x0e6228b11	# R1, R2
+	.quad		0x0c5b9cd4c, 0x0e8a45605	# R3, R4
+	.quad		0x0f200aa66, 1 << 32		# R5, x32
+	.quad		0x0490d678d, 1			# R6, 1
+	.quad		0x104d101df, 0			# u
+	.quad		0x104C11DB7, 0			# P(x)
+SYM_DATA_END(constants_CRC_32_BE)
+
+	.previous
+
+	GEN_BR_THUNK %r14
+
+	.text
+/*
+ * The CRC-32 function(s) use these calling conventions:
+ *
+ * Parameters:
+ *
+ *	%r2:	Initial CRC value, typically ~0; and final CRC (return) value.
+ *	%r3:	Input buffer pointer, performance might be improved if the
+ *		buffer is on a doubleword boundary.
+ *	%r4:	Length of the buffer, must be 64 bytes or greater.
+ *
+ * Register usage:
+ *
+ *	%r5:	CRC-32 constant pool base pointer.
+ *	V0:	Initial CRC value and intermediate constants and results.
+ *	V1..V4:	Data for CRC computation.
+ *	V5..V8:	Next data chunks that are fetched from the input buffer.
+ *
+ *	V9..V14: CRC-32 constants.
+ */
+SYM_FUNC_START(crc32_be_vgfm_16)
+	/* Load CRC-32 constants */
+	larl	%r5,constants_CRC_32_BE
+	VLM	CONST_R1R2,CONST_CRC_POLY,0,%r5
+
+	/* Load the initial CRC value into the leftmost word of V0. */
+	VZERO	%v0
+	VLVGF	%v0,%r2,0
+
+	/* Load a 64-byte data chunk and XOR with CRC */
+	VLM	%v1,%v4,0,%r3		/* 64-bytes into V1..V4 */
+	VX	%v1,%v0,%v1		/* V1 ^= CRC */
+	aghi	%r3,64			/* BUF = BUF + 64 */
+	aghi	%r4,-64			/* LEN = LEN - 64 */
+
+	/* Check remaining buffer size and jump to proper folding method */
+	cghi	%r4,64
+	jl	.Lless_than_64bytes
+
+.Lfold_64bytes_loop:
+	/* Load the next 64-byte data chunk into V5 to V8 */
+	VLM	%v5,%v8,0,%r3
+
+	/*
+	 * Perform a GF(2) multiplication of the doublewords in V1 with
+	 * the reduction constants in V0.  The intermediate result is
+	 * then folded (accumulated) with the next data chunk in V5 and
+	 * stored in V1.  Repeat this step for the register contents
+	 * in V2, V3, and V4 respectively.
+	 */
+	VGFMAG	%v1,CONST_R1R2,%v1,%v5
+	VGFMAG	%v2,CONST_R1R2,%v2,%v6
+	VGFMAG	%v3,CONST_R1R2,%v3,%v7
+	VGFMAG	%v4,CONST_R1R2,%v4,%v8
+
+	/* Adjust buffer pointer and length for next loop */
+	aghi	%r3,64			/* BUF = BUF + 64 */
+	aghi	%r4,-64			/* LEN = LEN - 64 */
+
+	cghi	%r4,64
+	jnl	.Lfold_64bytes_loop
+
+.Lless_than_64bytes:
+	/* Fold V1 to V4 into a single 128-bit value in V1 */
+	VGFMAG	%v1,CONST_R3R4,%v1,%v2
+	VGFMAG	%v1,CONST_R3R4,%v1,%v3
+	VGFMAG	%v1,CONST_R3R4,%v1,%v4
+
+	/* Check whether to continue with 64-bit folding */
+	cghi	%r4,16
+	jl	.Lfinal_fold
+
+.Lfold_16bytes_loop:
+
+	VL	%v2,0,,%r3		/* Load next data chunk */
+	VGFMAG	%v1,CONST_R3R4,%v1,%v2	/* Fold next data chunk */
+
+	/* Adjust buffer pointer and size for folding next data chunk */
+	aghi	%r3,16
+	aghi	%r4,-16
+
+	/* Process remaining data chunks */
+	cghi	%r4,16
+	jnl	.Lfold_16bytes_loop
+
+.Lfinal_fold:
+	/*
+	 * The R5 constant is used to fold a 128-bit value into an 96-bit value
+	 * that is XORed with the next 96-bit input data chunk.  To use a single
+	 * VGFMG instruction, multiply the rightmost 64-bit with x^32 (1<<32) to
+	 * form an intermediate 96-bit value (with appended zeros) which is then
+	 * XORed with the intermediate reduction result.
+	 */
+	VGFMG	%v1,CONST_R5,%v1
+
+	/*
+	 * Further reduce the remaining 96-bit value to a 64-bit value using a
+	 * single VGFMG, the rightmost doubleword is multiplied with 0x1. The
+	 * intermediate result is then XORed with the product of the leftmost
+	 * doubleword with R6.	The result is a 64-bit value and is subject to
+	 * the Barret reduction.
+	 */
+	VGFMG	%v1,CONST_R6,%v1
+
+	/*
+	 * The input values to the Barret reduction are the degree-63 polynomial
+	 * in V1 (R(x)), degree-32 generator polynomial, and the reduction
+	 * constant u.	The Barret reduction result is the CRC value of R(x) mod
+	 * P(x).
+	 *
+	 * The Barret reduction algorithm is defined as:
+	 *
+	 *    1. T1(x) = floor( R(x) / x^32 ) GF2MUL u
+	 *    2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x)
+	 *    3. C(x)  = R(x) XOR T2(x) mod x^32
+	 *
+	 * Note: To compensate the division by x^32, use the vector unpack
+	 * instruction to move the leftmost word into the leftmost doubleword
+	 * of the vector register.  The rightmost doubleword is multiplied
+	 * with zero to not contribute to the intermediate results.
+	 */
+
+	/* T1(x) = floor( R(x) / x^32 ) GF2MUL u */
+	VUPLLF	%v2,%v1
+	VGFMG	%v2,CONST_RU_POLY,%v2
+
+	/*
+	 * Compute the GF(2) product of the CRC polynomial in VO with T1(x) in
+	 * V2 and XOR the intermediate result, T2(x),  with the value in V1.
+	 * The final result is in the rightmost word of V2.
+	 */
+	VUPLLF	%v2,%v2
+	VGFMAG	%v2,CONST_CRC_POLY,%v2,%v1
+
+.Ldone:
+	VLGVF	%r2,%v2,3
+	BR_EX	%r14
+SYM_FUNC_END(crc32_be_vgfm_16)
+
+.previous
diff --git a/arch/s390/crypto/crc32le-vx.S b/arch/s390/crypto/crc32le-vx.S
new file mode 100644
index 0000000000..5a819ae09a
--- /dev/null
+++ b/arch/s390/crypto/crc32le-vx.S
@@ -0,0 +1,275 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Hardware-accelerated CRC-32 variants for Linux on z Systems
+ *
+ * Use the z/Architecture Vector Extension Facility to accelerate the
+ * computing of bitreflected CRC-32 checksums for IEEE 802.3 Ethernet
+ * and Castagnoli.
+ *
+ * This CRC-32 implementation algorithm is bitreflected and processes
+ * the least-significant bit first (Little-Endian).
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#include <linux/linkage.h>
+#include <asm/nospec-insn.h>
+#include <asm/vx-insn.h>
+
+/* Vector register range containing CRC-32 constants */
+#define CONST_PERM_LE2BE	%v9
+#define CONST_R2R1		%v10
+#define CONST_R4R3		%v11
+#define CONST_R5		%v12
+#define CONST_RU_POLY		%v13
+#define CONST_CRC_POLY		%v14
+
+	.data
+	.balign	8
+
+/*
+ * The CRC-32 constant block contains reduction constants to fold and
+ * process particular chunks of the input data stream in parallel.
+ *
+ * For the CRC-32 variants, the constants are precomputed according to
+ * these definitions:
+ *
+ *	R1 = [(x4*128+32 mod P'(x) << 32)]' << 1
+ *	R2 = [(x4*128-32 mod P'(x) << 32)]' << 1
+ *	R3 = [(x128+32 mod P'(x) << 32)]'   << 1
+ *	R4 = [(x128-32 mod P'(x) << 32)]'   << 1
+ *	R5 = [(x64 mod P'(x) << 32)]'	    << 1
+ *	R6 = [(x32 mod P'(x) << 32)]'	    << 1
+ *
+ *	The bitreflected Barret reduction constant, u', is defined as
+ *	the bit reversal of floor(x**64 / P(x)).
+ *
+ *	where P(x) is the polynomial in the normal domain and the P'(x) is the
+ *	polynomial in the reversed (bitreflected) domain.
+ *
+ * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials:
+ *
+ *	P(x)  = 0x04C11DB7
+ *	P'(x) = 0xEDB88320
+ *
+ * CRC-32C (Castagnoli) polynomials:
+ *
+ *	P(x)  = 0x1EDC6F41
+ *	P'(x) = 0x82F63B78
+ */
+
+SYM_DATA_START_LOCAL(constants_CRC_32_LE)
+	.octa		0x0F0E0D0C0B0A09080706050403020100	# BE->LE mask
+	.quad		0x1c6e41596, 0x154442bd4		# R2, R1
+	.quad		0x0ccaa009e, 0x1751997d0		# R4, R3
+	.octa		0x163cd6124				# R5
+	.octa		0x1F7011641				# u'
+	.octa		0x1DB710641				# P'(x) << 1
+SYM_DATA_END(constants_CRC_32_LE)
+
+SYM_DATA_START_LOCAL(constants_CRC_32C_LE)
+	.octa		0x0F0E0D0C0B0A09080706050403020100	# BE->LE mask
+	.quad		0x09e4addf8, 0x740eef02			# R2, R1
+	.quad		0x14cd00bd6, 0xf20c0dfe			# R4, R3
+	.octa		0x0dd45aab8				# R5
+	.octa		0x0dea713f1				# u'
+	.octa		0x105ec76f0				# P'(x) << 1
+SYM_DATA_END(constants_CRC_32C_LE)
+
+	.previous
+
+	GEN_BR_THUNK %r14
+
+	.text
+
+/*
+ * The CRC-32 functions use these calling conventions:
+ *
+ * Parameters:
+ *
+ *	%r2:	Initial CRC value, typically ~0; and final CRC (return) value.
+ *	%r3:	Input buffer pointer, performance might be improved if the
+ *		buffer is on a doubleword boundary.
+ *	%r4:	Length of the buffer, must be 64 bytes or greater.
+ *
+ * Register usage:
+ *
+ *	%r5:	CRC-32 constant pool base pointer.
+ *	V0:	Initial CRC value and intermediate constants and results.
+ *	V1..V4:	Data for CRC computation.
+ *	V5..V8:	Next data chunks that are fetched from the input buffer.
+ *	V9:	Constant for BE->LE conversion and shift operations
+ *
+ *	V10..V14: CRC-32 constants.
+ */
+
+SYM_FUNC_START(crc32_le_vgfm_16)
+	larl	%r5,constants_CRC_32_LE
+	j	crc32_le_vgfm_generic
+SYM_FUNC_END(crc32_le_vgfm_16)
+
+SYM_FUNC_START(crc32c_le_vgfm_16)
+	larl	%r5,constants_CRC_32C_LE
+	j	crc32_le_vgfm_generic
+SYM_FUNC_END(crc32c_le_vgfm_16)
+
+SYM_FUNC_START(crc32_le_vgfm_generic)
+	/* Load CRC-32 constants */
+	VLM	CONST_PERM_LE2BE,CONST_CRC_POLY,0,%r5
+
+	/*
+	 * Load the initial CRC value.
+	 *
+	 * The CRC value is loaded into the rightmost word of the
+	 * vector register and is later XORed with the LSB portion
+	 * of the loaded input data.
+	 */
+	VZERO	%v0			/* Clear V0 */
+	VLVGF	%v0,%r2,3		/* Load CRC into rightmost word */
+
+	/* Load a 64-byte data chunk and XOR with CRC */
+	VLM	%v1,%v4,0,%r3		/* 64-bytes into V1..V4 */
+	VPERM	%v1,%v1,%v1,CONST_PERM_LE2BE
+	VPERM	%v2,%v2,%v2,CONST_PERM_LE2BE
+	VPERM	%v3,%v3,%v3,CONST_PERM_LE2BE
+	VPERM	%v4,%v4,%v4,CONST_PERM_LE2BE
+
+	VX	%v1,%v0,%v1		/* V1 ^= CRC */
+	aghi	%r3,64			/* BUF = BUF + 64 */
+	aghi	%r4,-64			/* LEN = LEN - 64 */
+
+	cghi	%r4,64
+	jl	.Lless_than_64bytes
+
+.Lfold_64bytes_loop:
+	/* Load the next 64-byte data chunk into V5 to V8 */
+	VLM	%v5,%v8,0,%r3
+	VPERM	%v5,%v5,%v5,CONST_PERM_LE2BE
+	VPERM	%v6,%v6,%v6,CONST_PERM_LE2BE
+	VPERM	%v7,%v7,%v7,CONST_PERM_LE2BE
+	VPERM	%v8,%v8,%v8,CONST_PERM_LE2BE
+
+	/*
+	 * Perform a GF(2) multiplication of the doublewords in V1 with
+	 * the R1 and R2 reduction constants in V0.  The intermediate result
+	 * is then folded (accumulated) with the next data chunk in V5 and
+	 * stored in V1. Repeat this step for the register contents
+	 * in V2, V3, and V4 respectively.
+	 */
+	VGFMAG	%v1,CONST_R2R1,%v1,%v5
+	VGFMAG	%v2,CONST_R2R1,%v2,%v6
+	VGFMAG	%v3,CONST_R2R1,%v3,%v7
+	VGFMAG	%v4,CONST_R2R1,%v4,%v8
+
+	aghi	%r3,64			/* BUF = BUF + 64 */
+	aghi	%r4,-64			/* LEN = LEN - 64 */
+
+	cghi	%r4,64
+	jnl	.Lfold_64bytes_loop
+
+.Lless_than_64bytes:
+	/*
+	 * Fold V1 to V4 into a single 128-bit value in V1.  Multiply V1 with R3
+	 * and R4 and accumulating the next 128-bit chunk until a single 128-bit
+	 * value remains.
+	 */
+	VGFMAG	%v1,CONST_R4R3,%v1,%v2
+	VGFMAG	%v1,CONST_R4R3,%v1,%v3
+	VGFMAG	%v1,CONST_R4R3,%v1,%v4
+
+	cghi	%r4,16
+	jl	.Lfinal_fold
+
+.Lfold_16bytes_loop:
+
+	VL	%v2,0,,%r3		/* Load next data chunk */
+	VPERM	%v2,%v2,%v2,CONST_PERM_LE2BE
+	VGFMAG	%v1,CONST_R4R3,%v1,%v2	/* Fold next data chunk */
+
+	aghi	%r3,16
+	aghi	%r4,-16
+
+	cghi	%r4,16
+	jnl	.Lfold_16bytes_loop
+
+.Lfinal_fold:
+	/*
+	 * Set up a vector register for byte shifts.  The shift value must
+	 * be loaded in bits 1-4 in byte element 7 of a vector register.
+	 * Shift by 8 bytes: 0x40
+	 * Shift by 4 bytes: 0x20
+	 */
+	VLEIB	%v9,0x40,7
+
+	/*
+	 * Prepare V0 for the next GF(2) multiplication: shift V0 by 8 bytes
+	 * to move R4 into the rightmost doubleword and set the leftmost
+	 * doubleword to 0x1.
+	 */
+	VSRLB	%v0,CONST_R4R3,%v9
+	VLEIG	%v0,1,0
+
+	/*
+	 * Compute GF(2) product of V1 and V0.	The rightmost doubleword
+	 * of V1 is multiplied with R4.  The leftmost doubleword of V1 is
+	 * multiplied by 0x1 and is then XORed with rightmost product.
+	 * Implicitly, the intermediate leftmost product becomes padded
+	 */
+	VGFMG	%v1,%v0,%v1
+
+	/*
+	 * Now do the final 32-bit fold by multiplying the rightmost word
+	 * in V1 with R5 and XOR the result with the remaining bits in V1.
+	 *
+	 * To achieve this by a single VGFMAG, right shift V1 by a word
+	 * and store the result in V2 which is then accumulated.  Use the
+	 * vector unpack instruction to load the rightmost half of the
+	 * doubleword into the rightmost doubleword element of V1; the other
+	 * half is loaded in the leftmost doubleword.
+	 * The vector register with CONST_R5 contains the R5 constant in the
+	 * rightmost doubleword and the leftmost doubleword is zero to ignore
+	 * the leftmost product of V1.
+	 */
+	VLEIB	%v9,0x20,7		  /* Shift by words */
+	VSRLB	%v2,%v1,%v9		  /* Store remaining bits in V2 */
+	VUPLLF	%v1,%v1			  /* Split rightmost doubleword */
+	VGFMAG	%v1,CONST_R5,%v1,%v2	  /* V1 = (V1 * R5) XOR V2 */
+
+	/*
+	 * Apply a Barret reduction to compute the final 32-bit CRC value.
+	 *
+	 * The input values to the Barret reduction are the degree-63 polynomial
+	 * in V1 (R(x)), degree-32 generator polynomial, and the reduction
+	 * constant u.	The Barret reduction result is the CRC value of R(x) mod
+	 * P(x).
+	 *
+	 * The Barret reduction algorithm is defined as:
+	 *
+	 *    1. T1(x) = floor( R(x) / x^32 ) GF2MUL u
+	 *    2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x)
+	 *    3. C(x)  = R(x) XOR T2(x) mod x^32
+	 *
+	 *  Note: The leftmost doubleword of vector register containing
+	 *  CONST_RU_POLY is zero and, thus, the intermediate GF(2) product
+	 *  is zero and does not contribute to the final result.
+	 */
+
+	/* T1(x) = floor( R(x) / x^32 ) GF2MUL u */
+	VUPLLF	%v2,%v1
+	VGFMG	%v2,CONST_RU_POLY,%v2
+
+	/*
+	 * Compute the GF(2) product of the CRC polynomial with T1(x) in
+	 * V2 and XOR the intermediate result, T2(x), with the value in V1.
+	 * The final result is stored in word element 2 of V2.
+	 */
+	VUPLLF	%v2,%v2
+	VGFMAG	%v2,CONST_CRC_POLY,%v2,%v1
+
+.Ldone:
+	VLGVF	%r2,%v2,2
+	BR_EX	%r14
+SYM_FUNC_END(crc32_le_vgfm_generic)
+
+.previous
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
new file mode 100644
index 0000000000..8e75b83a5d
--- /dev/null
+++ b/arch/s390/crypto/des_s390.c
@@ -0,0 +1,502 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Cryptographic API.
+ *
+ * s390 implementation of the DES Cipher Algorithm.
+ *
+ * Copyright IBM Corp. 2003, 2011
+ * Author(s): Thomas Spatzier
+ *	      Jan Glauber (jan.glauber@de.ibm.com)
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/fips.h>
+#include <linux/mutex.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/des.h>
+#include <crypto/internal/skcipher.h>
+#include <asm/cpacf.h>
+
+#define DES3_KEY_SIZE	(3 * DES_KEY_SIZE)
+
+static u8 *ctrblk;
+static DEFINE_MUTEX(ctrblk_lock);
+
+static cpacf_mask_t km_functions, kmc_functions, kmctr_functions;
+
+struct s390_des_ctx {
+	u8 iv[DES_BLOCK_SIZE];
+	u8 key[DES3_KEY_SIZE];
+};
+
+static int des_setkey(struct crypto_tfm *tfm, const u8 *key,
+		      unsigned int key_len)
+{
+	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
+	int err;
+
+	err = crypto_des_verify_key(tfm, key);
+	if (err)
+		return err;
+
+	memcpy(ctx->key, key, key_len);
+	return 0;
+}
+
+static int des_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key,
+			       unsigned int key_len)
+{
+	return des_setkey(crypto_skcipher_tfm(tfm), key, key_len);
+}
+
+static void s390_des_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	cpacf_km(CPACF_KM_DEA, ctx->key, out, in, DES_BLOCK_SIZE);
+}
+
+static void s390_des_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	cpacf_km(CPACF_KM_DEA | CPACF_DECRYPT,
+		 ctx->key, out, in, DES_BLOCK_SIZE);
+}
+
+static struct crypto_alg des_alg = {
+	.cra_name		=	"des",
+	.cra_driver_name	=	"des-s390",
+	.cra_priority		=	300,
+	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize		=	DES_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
+	.cra_module		=	THIS_MODULE,
+	.cra_u			=	{
+		.cipher = {
+			.cia_min_keysize	=	DES_KEY_SIZE,
+			.cia_max_keysize	=	DES_KEY_SIZE,
+			.cia_setkey		=	des_setkey,
+			.cia_encrypt		=	s390_des_encrypt,
+			.cia_decrypt		=	s390_des_decrypt,
+		}
+	}
+};
+
+static int ecb_desall_crypt(struct skcipher_request *req, unsigned long fc)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_des_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes, n;
+	int ret;
+
+	ret = skcipher_walk_virt(&walk, req, false);
+	while ((nbytes = walk.nbytes) != 0) {
+		/* only use complete blocks */
+		n = nbytes & ~(DES_BLOCK_SIZE - 1);
+		cpacf_km(fc, ctx->key, walk.dst.virt.addr,
+			 walk.src.virt.addr, n);
+		ret = skcipher_walk_done(&walk, nbytes - n);
+	}
+	return ret;
+}
+
+static int cbc_desall_crypt(struct skcipher_request *req, unsigned long fc)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_des_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes, n;
+	int ret;
+	struct {
+		u8 iv[DES_BLOCK_SIZE];
+		u8 key[DES3_KEY_SIZE];
+	} param;
+
+	ret = skcipher_walk_virt(&walk, req, false);
+	if (ret)
+		return ret;
+	memcpy(param.iv, walk.iv, DES_BLOCK_SIZE);
+	memcpy(param.key, ctx->key, DES3_KEY_SIZE);
+	while ((nbytes = walk.nbytes) != 0) {
+		/* only use complete blocks */
+		n = nbytes & ~(DES_BLOCK_SIZE - 1);
+		cpacf_kmc(fc, &param, walk.dst.virt.addr,
+			  walk.src.virt.addr, n);
+		memcpy(walk.iv, param.iv, DES_BLOCK_SIZE);
+		ret = skcipher_walk_done(&walk, nbytes - n);
+	}
+	return ret;
+}
+
+static int ecb_des_encrypt(struct skcipher_request *req)
+{
+	return ecb_desall_crypt(req, CPACF_KM_DEA);
+}
+
+static int ecb_des_decrypt(struct skcipher_request *req)
+{
+	return ecb_desall_crypt(req, CPACF_KM_DEA | CPACF_DECRYPT);
+}
+
+static struct skcipher_alg ecb_des_alg = {
+	.base.cra_name		=	"ecb(des)",
+	.base.cra_driver_name	=	"ecb-des-s390",
+	.base.cra_priority	=	400,	/* combo: des + ecb */
+	.base.cra_blocksize	=	DES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_des_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.min_keysize		=	DES_KEY_SIZE,
+	.max_keysize		=	DES_KEY_SIZE,
+	.setkey			=	des_setkey_skcipher,
+	.encrypt		=	ecb_des_encrypt,
+	.decrypt		=	ecb_des_decrypt,
+};
+
+static int cbc_des_encrypt(struct skcipher_request *req)
+{
+	return cbc_desall_crypt(req, CPACF_KMC_DEA);
+}
+
+static int cbc_des_decrypt(struct skcipher_request *req)
+{
+	return cbc_desall_crypt(req, CPACF_KMC_DEA | CPACF_DECRYPT);
+}
+
+static struct skcipher_alg cbc_des_alg = {
+	.base.cra_name		=	"cbc(des)",
+	.base.cra_driver_name	=	"cbc-des-s390",
+	.base.cra_priority	=	400,	/* combo: des + cbc */
+	.base.cra_blocksize	=	DES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_des_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.min_keysize		=	DES_KEY_SIZE,
+	.max_keysize		=	DES_KEY_SIZE,
+	.ivsize			=	DES_BLOCK_SIZE,
+	.setkey			=	des_setkey_skcipher,
+	.encrypt		=	cbc_des_encrypt,
+	.decrypt		=	cbc_des_decrypt,
+};
+
+/*
+ * RFC2451:
+ *
+ *   For DES-EDE3, there is no known need to reject weak or
+ *   complementation keys.  Any weakness is obviated by the use of
+ *   multiple keys.
+ *
+ *   However, if the first two or last two independent 64-bit keys are
+ *   equal (k1 == k2 or k2 == k3), then the DES3 operation is simply the
+ *   same as DES.  Implementers MUST reject keys that exhibit this
+ *   property.
+ *
+ *   In fips mode additionally check for all 3 keys are unique.
+ *
+ */
+static int des3_setkey(struct crypto_tfm *tfm, const u8 *key,
+		       unsigned int key_len)
+{
+	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
+	int err;
+
+	err = crypto_des3_ede_verify_key(tfm, key);
+	if (err)
+		return err;
+
+	memcpy(ctx->key, key, key_len);
+	return 0;
+}
+
+static int des3_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key,
+				unsigned int key_len)
+{
+	return des3_setkey(crypto_skcipher_tfm(tfm), key, key_len);
+}
+
+static void des3_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	cpacf_km(CPACF_KM_TDEA_192, ctx->key, dst, src, DES_BLOCK_SIZE);
+}
+
+static void des3_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	cpacf_km(CPACF_KM_TDEA_192 | CPACF_DECRYPT,
+		 ctx->key, dst, src, DES_BLOCK_SIZE);
+}
+
+static struct crypto_alg des3_alg = {
+	.cra_name		=	"des3_ede",
+	.cra_driver_name	=	"des3_ede-s390",
+	.cra_priority		=	300,
+	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize		=	DES_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
+	.cra_module		=	THIS_MODULE,
+	.cra_u			=	{
+		.cipher = {
+			.cia_min_keysize	=	DES3_KEY_SIZE,
+			.cia_max_keysize	=	DES3_KEY_SIZE,
+			.cia_setkey		=	des3_setkey,
+			.cia_encrypt		=	des3_encrypt,
+			.cia_decrypt		=	des3_decrypt,
+		}
+	}
+};
+
+static int ecb_des3_encrypt(struct skcipher_request *req)
+{
+	return ecb_desall_crypt(req, CPACF_KM_TDEA_192);
+}
+
+static int ecb_des3_decrypt(struct skcipher_request *req)
+{
+	return ecb_desall_crypt(req, CPACF_KM_TDEA_192 | CPACF_DECRYPT);
+}
+
+static struct skcipher_alg ecb_des3_alg = {
+	.base.cra_name		=	"ecb(des3_ede)",
+	.base.cra_driver_name	=	"ecb-des3_ede-s390",
+	.base.cra_priority	=	400,	/* combo: des3 + ecb */
+	.base.cra_blocksize	=	DES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_des_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.min_keysize		=	DES3_KEY_SIZE,
+	.max_keysize		=	DES3_KEY_SIZE,
+	.setkey			=	des3_setkey_skcipher,
+	.encrypt		=	ecb_des3_encrypt,
+	.decrypt		=	ecb_des3_decrypt,
+};
+
+static int cbc_des3_encrypt(struct skcipher_request *req)
+{
+	return cbc_desall_crypt(req, CPACF_KMC_TDEA_192);
+}
+
+static int cbc_des3_decrypt(struct skcipher_request *req)
+{
+	return cbc_desall_crypt(req, CPACF_KMC_TDEA_192 | CPACF_DECRYPT);
+}
+
+static struct skcipher_alg cbc_des3_alg = {
+	.base.cra_name		=	"cbc(des3_ede)",
+	.base.cra_driver_name	=	"cbc-des3_ede-s390",
+	.base.cra_priority	=	400,	/* combo: des3 + cbc */
+	.base.cra_blocksize	=	DES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_des_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.min_keysize		=	DES3_KEY_SIZE,
+	.max_keysize		=	DES3_KEY_SIZE,
+	.ivsize			=	DES_BLOCK_SIZE,
+	.setkey			=	des3_setkey_skcipher,
+	.encrypt		=	cbc_des3_encrypt,
+	.decrypt		=	cbc_des3_decrypt,
+};
+
+static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
+{
+	unsigned int i, n;
+
+	/* align to block size, max. PAGE_SIZE */
+	n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(DES_BLOCK_SIZE - 1);
+	memcpy(ctrptr, iv, DES_BLOCK_SIZE);
+	for (i = (n / DES_BLOCK_SIZE) - 1; i > 0; i--) {
+		memcpy(ctrptr + DES_BLOCK_SIZE, ctrptr, DES_BLOCK_SIZE);
+		crypto_inc(ctrptr + DES_BLOCK_SIZE, DES_BLOCK_SIZE);
+		ctrptr += DES_BLOCK_SIZE;
+	}
+	return n;
+}
+
+static int ctr_desall_crypt(struct skcipher_request *req, unsigned long fc)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_des_ctx *ctx = crypto_skcipher_ctx(tfm);
+	u8 buf[DES_BLOCK_SIZE], *ctrptr;
+	struct skcipher_walk walk;
+	unsigned int n, nbytes;
+	int ret, locked;
+
+	locked = mutex_trylock(&ctrblk_lock);
+
+	ret = skcipher_walk_virt(&walk, req, false);
+	while ((nbytes = walk.nbytes) >= DES_BLOCK_SIZE) {
+		n = DES_BLOCK_SIZE;
+		if (nbytes >= 2*DES_BLOCK_SIZE && locked)
+			n = __ctrblk_init(ctrblk, walk.iv, nbytes);
+		ctrptr = (n > DES_BLOCK_SIZE) ? ctrblk : walk.iv;
+		cpacf_kmctr(fc, ctx->key, walk.dst.virt.addr,
+			    walk.src.virt.addr, n, ctrptr);
+		if (ctrptr == ctrblk)
+			memcpy(walk.iv, ctrptr + n - DES_BLOCK_SIZE,
+				DES_BLOCK_SIZE);
+		crypto_inc(walk.iv, DES_BLOCK_SIZE);
+		ret = skcipher_walk_done(&walk, nbytes - n);
+	}
+	if (locked)
+		mutex_unlock(&ctrblk_lock);
+	/* final block may be < DES_BLOCK_SIZE, copy only nbytes */
+	if (nbytes) {
+		cpacf_kmctr(fc, ctx->key, buf, walk.src.virt.addr,
+			    DES_BLOCK_SIZE, walk.iv);
+		memcpy(walk.dst.virt.addr, buf, nbytes);
+		crypto_inc(walk.iv, DES_BLOCK_SIZE);
+		ret = skcipher_walk_done(&walk, 0);
+	}
+	return ret;
+}
+
+static int ctr_des_crypt(struct skcipher_request *req)
+{
+	return ctr_desall_crypt(req, CPACF_KMCTR_DEA);
+}
+
+static struct skcipher_alg ctr_des_alg = {
+	.base.cra_name		=	"ctr(des)",
+	.base.cra_driver_name	=	"ctr-des-s390",
+	.base.cra_priority	=	400,	/* combo: des + ctr */
+	.base.cra_blocksize	=	1,
+	.base.cra_ctxsize	=	sizeof(struct s390_des_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.min_keysize		=	DES_KEY_SIZE,
+	.max_keysize		=	DES_KEY_SIZE,
+	.ivsize			=	DES_BLOCK_SIZE,
+	.setkey			=	des_setkey_skcipher,
+	.encrypt		=	ctr_des_crypt,
+	.decrypt		=	ctr_des_crypt,
+	.chunksize		=	DES_BLOCK_SIZE,
+};
+
+static int ctr_des3_crypt(struct skcipher_request *req)
+{
+	return ctr_desall_crypt(req, CPACF_KMCTR_TDEA_192);
+}
+
+static struct skcipher_alg ctr_des3_alg = {
+	.base.cra_name		=	"ctr(des3_ede)",
+	.base.cra_driver_name	=	"ctr-des3_ede-s390",
+	.base.cra_priority	=	400,	/* combo: des3 + ede */
+	.base.cra_blocksize	=	1,
+	.base.cra_ctxsize	=	sizeof(struct s390_des_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.min_keysize		=	DES3_KEY_SIZE,
+	.max_keysize		=	DES3_KEY_SIZE,
+	.ivsize			=	DES_BLOCK_SIZE,
+	.setkey			=	des3_setkey_skcipher,
+	.encrypt		=	ctr_des3_crypt,
+	.decrypt		=	ctr_des3_crypt,
+	.chunksize		=	DES_BLOCK_SIZE,
+};
+
+static struct crypto_alg *des_s390_algs_ptr[2];
+static int des_s390_algs_num;
+static struct skcipher_alg *des_s390_skciphers_ptr[6];
+static int des_s390_skciphers_num;
+
+static int des_s390_register_alg(struct crypto_alg *alg)
+{
+	int ret;
+
+	ret = crypto_register_alg(alg);
+	if (!ret)
+		des_s390_algs_ptr[des_s390_algs_num++] = alg;
+	return ret;
+}
+
+static int des_s390_register_skcipher(struct skcipher_alg *alg)
+{
+	int ret;
+
+	ret = crypto_register_skcipher(alg);
+	if (!ret)
+		des_s390_skciphers_ptr[des_s390_skciphers_num++] = alg;
+	return ret;
+}
+
+static void des_s390_exit(void)
+{
+	while (des_s390_algs_num--)
+		crypto_unregister_alg(des_s390_algs_ptr[des_s390_algs_num]);
+	while (des_s390_skciphers_num--)
+		crypto_unregister_skcipher(des_s390_skciphers_ptr[des_s390_skciphers_num]);
+	if (ctrblk)
+		free_page((unsigned long) ctrblk);
+}
+
+static int __init des_s390_init(void)
+{
+	int ret;
+
+	/* Query available functions for KM, KMC and KMCTR */
+	cpacf_query(CPACF_KM, &km_functions);
+	cpacf_query(CPACF_KMC, &kmc_functions);
+	cpacf_query(CPACF_KMCTR, &kmctr_functions);
+
+	if (cpacf_test_func(&km_functions, CPACF_KM_DEA)) {
+		ret = des_s390_register_alg(&des_alg);
+		if (ret)
+			goto out_err;
+		ret = des_s390_register_skcipher(&ecb_des_alg);
+		if (ret)
+			goto out_err;
+	}
+	if (cpacf_test_func(&kmc_functions, CPACF_KMC_DEA)) {
+		ret = des_s390_register_skcipher(&cbc_des_alg);
+		if (ret)
+			goto out_err;
+	}
+	if (cpacf_test_func(&km_functions, CPACF_KM_TDEA_192)) {
+		ret = des_s390_register_alg(&des3_alg);
+		if (ret)
+			goto out_err;
+		ret = des_s390_register_skcipher(&ecb_des3_alg);
+		if (ret)
+			goto out_err;
+	}
+	if (cpacf_test_func(&kmc_functions, CPACF_KMC_TDEA_192)) {
+		ret = des_s390_register_skcipher(&cbc_des3_alg);
+		if (ret)
+			goto out_err;
+	}
+
+	if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_DEA) ||
+	    cpacf_test_func(&kmctr_functions, CPACF_KMCTR_TDEA_192)) {
+		ctrblk = (u8 *) __get_free_page(GFP_KERNEL);
+		if (!ctrblk) {
+			ret = -ENOMEM;
+			goto out_err;
+		}
+	}
+
+	if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_DEA)) {
+		ret = des_s390_register_skcipher(&ctr_des_alg);
+		if (ret)
+			goto out_err;
+	}
+	if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_TDEA_192)) {
+		ret = des_s390_register_skcipher(&ctr_des3_alg);
+		if (ret)
+			goto out_err;
+	}
+
+	return 0;
+out_err:
+	des_s390_exit();
+	return ret;
+}
+
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, des_s390_init);
+module_exit(des_s390_exit);
+
+MODULE_ALIAS_CRYPTO("des");
+MODULE_ALIAS_CRYPTO("des3_ede");
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("DES & Triple DES EDE Cipher Algorithms");
diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c
new file mode 100644
index 0000000000..0800a2a579
--- /dev/null
+++ b/arch/s390/crypto/ghash_s390.c
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Cryptographic API.
+ *
+ * s390 implementation of the GHASH algorithm for GCM (Galois/Counter Mode).
+ *
+ * Copyright IBM Corp. 2011
+ * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+#include <asm/cpacf.h>
+
+#define GHASH_BLOCK_SIZE	16
+#define GHASH_DIGEST_SIZE	16
+
+struct ghash_ctx {
+	u8 key[GHASH_BLOCK_SIZE];
+};
+
+struct ghash_desc_ctx {
+	u8 icv[GHASH_BLOCK_SIZE];
+	u8 key[GHASH_BLOCK_SIZE];
+	u8 buffer[GHASH_BLOCK_SIZE];
+	u32 bytes;
+};
+
+static int ghash_init(struct shash_desc *desc)
+{
+	struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+	struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
+
+	memset(dctx, 0, sizeof(*dctx));
+	memcpy(dctx->key, ctx->key, GHASH_BLOCK_SIZE);
+
+	return 0;
+}
+
+static int ghash_setkey(struct crypto_shash *tfm,
+			const u8 *key, unsigned int keylen)
+{
+	struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
+
+	if (keylen != GHASH_BLOCK_SIZE)
+		return -EINVAL;
+
+	memcpy(ctx->key, key, GHASH_BLOCK_SIZE);
+
+	return 0;
+}
+
+static int ghash_update(struct shash_desc *desc,
+			 const u8 *src, unsigned int srclen)
+{
+	struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+	unsigned int n;
+	u8 *buf = dctx->buffer;
+
+	if (dctx->bytes) {
+		u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes);
+
+		n = min(srclen, dctx->bytes);
+		dctx->bytes -= n;
+		srclen -= n;
+
+		memcpy(pos, src, n);
+		src += n;
+
+		if (!dctx->bytes) {
+			cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf,
+				   GHASH_BLOCK_SIZE);
+		}
+	}
+
+	n = srclen & ~(GHASH_BLOCK_SIZE - 1);
+	if (n) {
+		cpacf_kimd(CPACF_KIMD_GHASH, dctx, src, n);
+		src += n;
+		srclen -= n;
+	}
+
+	if (srclen) {
+		dctx->bytes = GHASH_BLOCK_SIZE - srclen;
+		memcpy(buf, src, srclen);
+	}
+
+	return 0;
+}
+
+static int ghash_flush(struct ghash_desc_ctx *dctx)
+{
+	u8 *buf = dctx->buffer;
+
+	if (dctx->bytes) {
+		u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes);
+
+		memset(pos, 0, dctx->bytes);
+		cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE);
+		dctx->bytes = 0;
+	}
+
+	return 0;
+}
+
+static int ghash_final(struct shash_desc *desc, u8 *dst)
+{
+	struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+	int ret;
+
+	ret = ghash_flush(dctx);
+	if (!ret)
+		memcpy(dst, dctx->icv, GHASH_BLOCK_SIZE);
+	return ret;
+}
+
+static struct shash_alg ghash_alg = {
+	.digestsize	= GHASH_DIGEST_SIZE,
+	.init		= ghash_init,
+	.update		= ghash_update,
+	.final		= ghash_final,
+	.setkey		= ghash_setkey,
+	.descsize	= sizeof(struct ghash_desc_ctx),
+	.base		= {
+		.cra_name		= "ghash",
+		.cra_driver_name	= "ghash-s390",
+		.cra_priority		= 300,
+		.cra_blocksize		= GHASH_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct ghash_ctx),
+		.cra_module		= THIS_MODULE,
+	},
+};
+
+static int __init ghash_mod_init(void)
+{
+	if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_GHASH))
+		return -ENODEV;
+
+	return crypto_register_shash(&ghash_alg);
+}
+
+static void __exit ghash_mod_exit(void)
+{
+	crypto_unregister_shash(&ghash_alg);
+}
+
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, ghash_mod_init);
+module_exit(ghash_mod_exit);
+
+MODULE_ALIAS_CRYPTO("ghash");
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("GHASH hash function, s390 implementation");
diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c
new file mode 100644
index 0000000000..55ee5567a5
--- /dev/null
+++ b/arch/s390/crypto/paes_s390.c
@@ -0,0 +1,809 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Cryptographic API.
+ *
+ * s390 implementation of the AES Cipher Algorithm with protected keys.
+ *
+ * s390 Version:
+ *   Copyright IBM Corp. 2017, 2023
+ *   Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *		Harald Freudenberger <freude@de.ibm.com>
+ */
+
+#define KMSG_COMPONENT "paes_s390"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <crypto/aes.h>
+#include <crypto/algapi.h>
+#include <linux/bug.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/xts.h>
+#include <asm/cpacf.h>
+#include <asm/pkey.h>
+
+/*
+ * Key blobs smaller/bigger than these defines are rejected
+ * by the common code even before the individual setkey function
+ * is called. As paes can handle different kinds of key blobs
+ * and padding is also possible, the limits need to be generous.
+ */
+#define PAES_MIN_KEYSIZE 16
+#define PAES_MAX_KEYSIZE MAXEP11AESKEYBLOBSIZE
+
+static u8 *ctrblk;
+static DEFINE_MUTEX(ctrblk_lock);
+
+static cpacf_mask_t km_functions, kmc_functions, kmctr_functions;
+
+struct key_blob {
+	/*
+	 * Small keys will be stored in the keybuf. Larger keys are
+	 * stored in extra allocated memory. In both cases does
+	 * key point to the memory where the key is stored.
+	 * The code distinguishes by checking keylen against
+	 * sizeof(keybuf). See the two following helper functions.
+	 */
+	u8 *key;
+	u8 keybuf[128];
+	unsigned int keylen;
+};
+
+static inline int _key_to_kb(struct key_blob *kb,
+			     const u8 *key,
+			     unsigned int keylen)
+{
+	struct clearkey_header {
+		u8  type;
+		u8  res0[3];
+		u8  version;
+		u8  res1[3];
+		u32 keytype;
+		u32 len;
+	} __packed * h;
+
+	switch (keylen) {
+	case 16:
+	case 24:
+	case 32:
+		/* clear key value, prepare pkey clear key token in keybuf */
+		memset(kb->keybuf, 0, sizeof(kb->keybuf));
+		h = (struct clearkey_header *) kb->keybuf;
+		h->version = 0x02; /* TOKVER_CLEAR_KEY */
+		h->keytype = (keylen - 8) >> 3;
+		h->len = keylen;
+		memcpy(kb->keybuf + sizeof(*h), key, keylen);
+		kb->keylen = sizeof(*h) + keylen;
+		kb->key = kb->keybuf;
+		break;
+	default:
+		/* other key material, let pkey handle this */
+		if (keylen <= sizeof(kb->keybuf))
+			kb->key = kb->keybuf;
+		else {
+			kb->key = kmalloc(keylen, GFP_KERNEL);
+			if (!kb->key)
+				return -ENOMEM;
+		}
+		memcpy(kb->key, key, keylen);
+		kb->keylen = keylen;
+		break;
+	}
+
+	return 0;
+}
+
+static inline void _free_kb_keybuf(struct key_blob *kb)
+{
+	if (kb->key && kb->key != kb->keybuf
+	    && kb->keylen > sizeof(kb->keybuf)) {
+		kfree_sensitive(kb->key);
+		kb->key = NULL;
+	}
+}
+
+struct s390_paes_ctx {
+	struct key_blob kb;
+	struct pkey_protkey pk;
+	spinlock_t pk_lock;
+	unsigned long fc;
+};
+
+struct s390_pxts_ctx {
+	struct key_blob kb[2];
+	struct pkey_protkey pk[2];
+	spinlock_t pk_lock;
+	unsigned long fc;
+};
+
+static inline int __paes_keyblob2pkey(struct key_blob *kb,
+				     struct pkey_protkey *pk)
+{
+	int i, ret;
+
+	/* try three times in case of failure */
+	for (i = 0; i < 3; i++) {
+		if (i > 0 && ret == -EAGAIN && in_task())
+			if (msleep_interruptible(1000))
+				return -EINTR;
+		ret = pkey_keyblob2pkey(kb->key, kb->keylen,
+					pk->protkey, &pk->len, &pk->type);
+		if (ret == 0)
+			break;
+	}
+
+	return ret;
+}
+
+static inline int __paes_convert_key(struct s390_paes_ctx *ctx)
+{
+	int ret;
+	struct pkey_protkey pkey;
+
+	pkey.len = sizeof(pkey.protkey);
+	ret = __paes_keyblob2pkey(&ctx->kb, &pkey);
+	if (ret)
+		return ret;
+
+	spin_lock_bh(&ctx->pk_lock);
+	memcpy(&ctx->pk, &pkey, sizeof(pkey));
+	spin_unlock_bh(&ctx->pk_lock);
+
+	return 0;
+}
+
+static int ecb_paes_init(struct crypto_skcipher *tfm)
+{
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	ctx->kb.key = NULL;
+	spin_lock_init(&ctx->pk_lock);
+
+	return 0;
+}
+
+static void ecb_paes_exit(struct crypto_skcipher *tfm)
+{
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	_free_kb_keybuf(&ctx->kb);
+}
+
+static inline int __ecb_paes_set_key(struct s390_paes_ctx *ctx)
+{
+	int rc;
+	unsigned long fc;
+
+	rc = __paes_convert_key(ctx);
+	if (rc)
+		return rc;
+
+	/* Pick the correct function code based on the protected key type */
+	fc = (ctx->pk.type == PKEY_KEYTYPE_AES_128) ? CPACF_KM_PAES_128 :
+		(ctx->pk.type == PKEY_KEYTYPE_AES_192) ? CPACF_KM_PAES_192 :
+		(ctx->pk.type == PKEY_KEYTYPE_AES_256) ? CPACF_KM_PAES_256 : 0;
+
+	/* Check if the function code is available */
+	ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
+
+	return ctx->fc ? 0 : -EINVAL;
+}
+
+static int ecb_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
+			    unsigned int key_len)
+{
+	int rc;
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	_free_kb_keybuf(&ctx->kb);
+	rc = _key_to_kb(&ctx->kb, in_key, key_len);
+	if (rc)
+		return rc;
+
+	return __ecb_paes_set_key(ctx);
+}
+
+static int ecb_paes_crypt(struct skcipher_request *req, unsigned long modifier)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes, n, k;
+	int ret;
+	struct {
+		u8 key[MAXPROTKEYSIZE];
+	} param;
+
+	ret = skcipher_walk_virt(&walk, req, false);
+	if (ret)
+		return ret;
+
+	spin_lock_bh(&ctx->pk_lock);
+	memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
+	spin_unlock_bh(&ctx->pk_lock);
+
+	while ((nbytes = walk.nbytes) != 0) {
+		/* only use complete blocks */
+		n = nbytes & ~(AES_BLOCK_SIZE - 1);
+		k = cpacf_km(ctx->fc | modifier, &param,
+			     walk.dst.virt.addr, walk.src.virt.addr, n);
+		if (k)
+			ret = skcipher_walk_done(&walk, nbytes - k);
+		if (k < n) {
+			if (__paes_convert_key(ctx))
+				return skcipher_walk_done(&walk, -EIO);
+			spin_lock_bh(&ctx->pk_lock);
+			memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
+			spin_unlock_bh(&ctx->pk_lock);
+		}
+	}
+	return ret;
+}
+
+static int ecb_paes_encrypt(struct skcipher_request *req)
+{
+	return ecb_paes_crypt(req, 0);
+}
+
+static int ecb_paes_decrypt(struct skcipher_request *req)
+{
+	return ecb_paes_crypt(req, CPACF_DECRYPT);
+}
+
+static struct skcipher_alg ecb_paes_alg = {
+	.base.cra_name		=	"ecb(paes)",
+	.base.cra_driver_name	=	"ecb-paes-s390",
+	.base.cra_priority	=	401,	/* combo: aes + ecb + 1 */
+	.base.cra_blocksize	=	AES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_paes_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.base.cra_list		=	LIST_HEAD_INIT(ecb_paes_alg.base.cra_list),
+	.init			=	ecb_paes_init,
+	.exit			=	ecb_paes_exit,
+	.min_keysize		=	PAES_MIN_KEYSIZE,
+	.max_keysize		=	PAES_MAX_KEYSIZE,
+	.setkey			=	ecb_paes_set_key,
+	.encrypt		=	ecb_paes_encrypt,
+	.decrypt		=	ecb_paes_decrypt,
+};
+
+static int cbc_paes_init(struct crypto_skcipher *tfm)
+{
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	ctx->kb.key = NULL;
+	spin_lock_init(&ctx->pk_lock);
+
+	return 0;
+}
+
+static void cbc_paes_exit(struct crypto_skcipher *tfm)
+{
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	_free_kb_keybuf(&ctx->kb);
+}
+
+static inline int __cbc_paes_set_key(struct s390_paes_ctx *ctx)
+{
+	int rc;
+	unsigned long fc;
+
+	rc = __paes_convert_key(ctx);
+	if (rc)
+		return rc;
+
+	/* Pick the correct function code based on the protected key type */
+	fc = (ctx->pk.type == PKEY_KEYTYPE_AES_128) ? CPACF_KMC_PAES_128 :
+		(ctx->pk.type == PKEY_KEYTYPE_AES_192) ? CPACF_KMC_PAES_192 :
+		(ctx->pk.type == PKEY_KEYTYPE_AES_256) ? CPACF_KMC_PAES_256 : 0;
+
+	/* Check if the function code is available */
+	ctx->fc = (fc && cpacf_test_func(&kmc_functions, fc)) ? fc : 0;
+
+	return ctx->fc ? 0 : -EINVAL;
+}
+
+static int cbc_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
+			    unsigned int key_len)
+{
+	int rc;
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	_free_kb_keybuf(&ctx->kb);
+	rc = _key_to_kb(&ctx->kb, in_key, key_len);
+	if (rc)
+		return rc;
+
+	return __cbc_paes_set_key(ctx);
+}
+
+static int cbc_paes_crypt(struct skcipher_request *req, unsigned long modifier)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes, n, k;
+	int ret;
+	struct {
+		u8 iv[AES_BLOCK_SIZE];
+		u8 key[MAXPROTKEYSIZE];
+	} param;
+
+	ret = skcipher_walk_virt(&walk, req, false);
+	if (ret)
+		return ret;
+
+	memcpy(param.iv, walk.iv, AES_BLOCK_SIZE);
+	spin_lock_bh(&ctx->pk_lock);
+	memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
+	spin_unlock_bh(&ctx->pk_lock);
+
+	while ((nbytes = walk.nbytes) != 0) {
+		/* only use complete blocks */
+		n = nbytes & ~(AES_BLOCK_SIZE - 1);
+		k = cpacf_kmc(ctx->fc | modifier, &param,
+			      walk.dst.virt.addr, walk.src.virt.addr, n);
+		if (k) {
+			memcpy(walk.iv, param.iv, AES_BLOCK_SIZE);
+			ret = skcipher_walk_done(&walk, nbytes - k);
+		}
+		if (k < n) {
+			if (__paes_convert_key(ctx))
+				return skcipher_walk_done(&walk, -EIO);
+			spin_lock_bh(&ctx->pk_lock);
+			memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
+			spin_unlock_bh(&ctx->pk_lock);
+		}
+	}
+	return ret;
+}
+
+static int cbc_paes_encrypt(struct skcipher_request *req)
+{
+	return cbc_paes_crypt(req, 0);
+}
+
+static int cbc_paes_decrypt(struct skcipher_request *req)
+{
+	return cbc_paes_crypt(req, CPACF_DECRYPT);
+}
+
+static struct skcipher_alg cbc_paes_alg = {
+	.base.cra_name		=	"cbc(paes)",
+	.base.cra_driver_name	=	"cbc-paes-s390",
+	.base.cra_priority	=	402,	/* ecb-paes-s390 + 1 */
+	.base.cra_blocksize	=	AES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_paes_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.base.cra_list		=	LIST_HEAD_INIT(cbc_paes_alg.base.cra_list),
+	.init			=	cbc_paes_init,
+	.exit			=	cbc_paes_exit,
+	.min_keysize		=	PAES_MIN_KEYSIZE,
+	.max_keysize		=	PAES_MAX_KEYSIZE,
+	.ivsize			=	AES_BLOCK_SIZE,
+	.setkey			=	cbc_paes_set_key,
+	.encrypt		=	cbc_paes_encrypt,
+	.decrypt		=	cbc_paes_decrypt,
+};
+
+static int xts_paes_init(struct crypto_skcipher *tfm)
+{
+	struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	ctx->kb[0].key = NULL;
+	ctx->kb[1].key = NULL;
+	spin_lock_init(&ctx->pk_lock);
+
+	return 0;
+}
+
+static void xts_paes_exit(struct crypto_skcipher *tfm)
+{
+	struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	_free_kb_keybuf(&ctx->kb[0]);
+	_free_kb_keybuf(&ctx->kb[1]);
+}
+
+static inline int __xts_paes_convert_key(struct s390_pxts_ctx *ctx)
+{
+	struct pkey_protkey pkey0, pkey1;
+
+	pkey0.len = sizeof(pkey0.protkey);
+	pkey1.len = sizeof(pkey1.protkey);
+
+	if (__paes_keyblob2pkey(&ctx->kb[0], &pkey0) ||
+	    __paes_keyblob2pkey(&ctx->kb[1], &pkey1))
+		return -EINVAL;
+
+	spin_lock_bh(&ctx->pk_lock);
+	memcpy(&ctx->pk[0], &pkey0, sizeof(pkey0));
+	memcpy(&ctx->pk[1], &pkey1, sizeof(pkey1));
+	spin_unlock_bh(&ctx->pk_lock);
+
+	return 0;
+}
+
+static inline int __xts_paes_set_key(struct s390_pxts_ctx *ctx)
+{
+	unsigned long fc;
+
+	if (__xts_paes_convert_key(ctx))
+		return -EINVAL;
+
+	if (ctx->pk[0].type != ctx->pk[1].type)
+		return -EINVAL;
+
+	/* Pick the correct function code based on the protected key type */
+	fc = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? CPACF_KM_PXTS_128 :
+		(ctx->pk[0].type == PKEY_KEYTYPE_AES_256) ?
+		CPACF_KM_PXTS_256 : 0;
+
+	/* Check if the function code is available */
+	ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
+
+	return ctx->fc ? 0 : -EINVAL;
+}
+
+static int xts_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
+			    unsigned int xts_key_len)
+{
+	int rc;
+	struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	u8 ckey[2 * AES_MAX_KEY_SIZE];
+	unsigned int ckey_len, key_len;
+
+	if (xts_key_len % 2)
+		return -EINVAL;
+
+	key_len = xts_key_len / 2;
+
+	_free_kb_keybuf(&ctx->kb[0]);
+	_free_kb_keybuf(&ctx->kb[1]);
+	rc = _key_to_kb(&ctx->kb[0], in_key, key_len);
+	if (rc)
+		return rc;
+	rc = _key_to_kb(&ctx->kb[1], in_key + key_len, key_len);
+	if (rc)
+		return rc;
+
+	rc = __xts_paes_set_key(ctx);
+	if (rc)
+		return rc;
+
+	/*
+	 * xts_verify_key verifies the key length is not odd and makes
+	 * sure that the two keys are not the same. This can be done
+	 * on the two protected keys as well
+	 */
+	ckey_len = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ?
+		AES_KEYSIZE_128 : AES_KEYSIZE_256;
+	memcpy(ckey, ctx->pk[0].protkey, ckey_len);
+	memcpy(ckey + ckey_len, ctx->pk[1].protkey, ckey_len);
+	return xts_verify_key(tfm, ckey, 2*ckey_len);
+}
+
+static int xts_paes_crypt(struct skcipher_request *req, unsigned long modifier)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int keylen, offset, nbytes, n, k;
+	int ret;
+	struct {
+		u8 key[MAXPROTKEYSIZE];	/* key + verification pattern */
+		u8 tweak[16];
+		u8 block[16];
+		u8 bit[16];
+		u8 xts[16];
+	} pcc_param;
+	struct {
+		u8 key[MAXPROTKEYSIZE];	/* key + verification pattern */
+		u8 init[16];
+	} xts_param;
+
+	ret = skcipher_walk_virt(&walk, req, false);
+	if (ret)
+		return ret;
+
+	keylen = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 48 : 64;
+	offset = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 16 : 0;
+
+	memset(&pcc_param, 0, sizeof(pcc_param));
+	memcpy(pcc_param.tweak, walk.iv, sizeof(pcc_param.tweak));
+	spin_lock_bh(&ctx->pk_lock);
+	memcpy(pcc_param.key + offset, ctx->pk[1].protkey, keylen);
+	memcpy(xts_param.key + offset, ctx->pk[0].protkey, keylen);
+	spin_unlock_bh(&ctx->pk_lock);
+	cpacf_pcc(ctx->fc, pcc_param.key + offset);
+	memcpy(xts_param.init, pcc_param.xts, 16);
+
+	while ((nbytes = walk.nbytes) != 0) {
+		/* only use complete blocks */
+		n = nbytes & ~(AES_BLOCK_SIZE - 1);
+		k = cpacf_km(ctx->fc | modifier, xts_param.key + offset,
+			     walk.dst.virt.addr, walk.src.virt.addr, n);
+		if (k)
+			ret = skcipher_walk_done(&walk, nbytes - k);
+		if (k < n) {
+			if (__xts_paes_convert_key(ctx))
+				return skcipher_walk_done(&walk, -EIO);
+			spin_lock_bh(&ctx->pk_lock);
+			memcpy(xts_param.key + offset,
+			       ctx->pk[0].protkey, keylen);
+			spin_unlock_bh(&ctx->pk_lock);
+		}
+	}
+
+	return ret;
+}
+
+static int xts_paes_encrypt(struct skcipher_request *req)
+{
+	return xts_paes_crypt(req, 0);
+}
+
+static int xts_paes_decrypt(struct skcipher_request *req)
+{
+	return xts_paes_crypt(req, CPACF_DECRYPT);
+}
+
+static struct skcipher_alg xts_paes_alg = {
+	.base.cra_name		=	"xts(paes)",
+	.base.cra_driver_name	=	"xts-paes-s390",
+	.base.cra_priority	=	402,	/* ecb-paes-s390 + 1 */
+	.base.cra_blocksize	=	AES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_pxts_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.base.cra_list		=	LIST_HEAD_INIT(xts_paes_alg.base.cra_list),
+	.init			=	xts_paes_init,
+	.exit			=	xts_paes_exit,
+	.min_keysize		=	2 * PAES_MIN_KEYSIZE,
+	.max_keysize		=	2 * PAES_MAX_KEYSIZE,
+	.ivsize			=	AES_BLOCK_SIZE,
+	.setkey			=	xts_paes_set_key,
+	.encrypt		=	xts_paes_encrypt,
+	.decrypt		=	xts_paes_decrypt,
+};
+
+static int ctr_paes_init(struct crypto_skcipher *tfm)
+{
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	ctx->kb.key = NULL;
+	spin_lock_init(&ctx->pk_lock);
+
+	return 0;
+}
+
+static void ctr_paes_exit(struct crypto_skcipher *tfm)
+{
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	_free_kb_keybuf(&ctx->kb);
+}
+
+static inline int __ctr_paes_set_key(struct s390_paes_ctx *ctx)
+{
+	int rc;
+	unsigned long fc;
+
+	rc = __paes_convert_key(ctx);
+	if (rc)
+		return rc;
+
+	/* Pick the correct function code based on the protected key type */
+	fc = (ctx->pk.type == PKEY_KEYTYPE_AES_128) ? CPACF_KMCTR_PAES_128 :
+		(ctx->pk.type == PKEY_KEYTYPE_AES_192) ? CPACF_KMCTR_PAES_192 :
+		(ctx->pk.type == PKEY_KEYTYPE_AES_256) ?
+		CPACF_KMCTR_PAES_256 : 0;
+
+	/* Check if the function code is available */
+	ctx->fc = (fc && cpacf_test_func(&kmctr_functions, fc)) ? fc : 0;
+
+	return ctx->fc ? 0 : -EINVAL;
+}
+
+static int ctr_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
+			    unsigned int key_len)
+{
+	int rc;
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	_free_kb_keybuf(&ctx->kb);
+	rc = _key_to_kb(&ctx->kb, in_key, key_len);
+	if (rc)
+		return rc;
+
+	return __ctr_paes_set_key(ctx);
+}
+
+static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
+{
+	unsigned int i, n;
+
+	/* only use complete blocks, max. PAGE_SIZE */
+	memcpy(ctrptr, iv, AES_BLOCK_SIZE);
+	n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(AES_BLOCK_SIZE - 1);
+	for (i = (n / AES_BLOCK_SIZE) - 1; i > 0; i--) {
+		memcpy(ctrptr + AES_BLOCK_SIZE, ctrptr, AES_BLOCK_SIZE);
+		crypto_inc(ctrptr + AES_BLOCK_SIZE, AES_BLOCK_SIZE);
+		ctrptr += AES_BLOCK_SIZE;
+	}
+	return n;
+}
+
+static int ctr_paes_crypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	u8 buf[AES_BLOCK_SIZE], *ctrptr;
+	struct skcipher_walk walk;
+	unsigned int nbytes, n, k;
+	int ret, locked;
+	struct {
+		u8 key[MAXPROTKEYSIZE];
+	} param;
+
+	ret = skcipher_walk_virt(&walk, req, false);
+	if (ret)
+		return ret;
+
+	spin_lock_bh(&ctx->pk_lock);
+	memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
+	spin_unlock_bh(&ctx->pk_lock);
+
+	locked = mutex_trylock(&ctrblk_lock);
+
+	while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
+		n = AES_BLOCK_SIZE;
+		if (nbytes >= 2*AES_BLOCK_SIZE && locked)
+			n = __ctrblk_init(ctrblk, walk.iv, nbytes);
+		ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk.iv;
+		k = cpacf_kmctr(ctx->fc, &param, walk.dst.virt.addr,
+				walk.src.virt.addr, n, ctrptr);
+		if (k) {
+			if (ctrptr == ctrblk)
+				memcpy(walk.iv, ctrptr + k - AES_BLOCK_SIZE,
+				       AES_BLOCK_SIZE);
+			crypto_inc(walk.iv, AES_BLOCK_SIZE);
+			ret = skcipher_walk_done(&walk, nbytes - k);
+		}
+		if (k < n) {
+			if (__paes_convert_key(ctx)) {
+				if (locked)
+					mutex_unlock(&ctrblk_lock);
+				return skcipher_walk_done(&walk, -EIO);
+			}
+			spin_lock_bh(&ctx->pk_lock);
+			memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
+			spin_unlock_bh(&ctx->pk_lock);
+		}
+	}
+	if (locked)
+		mutex_unlock(&ctrblk_lock);
+	/*
+	 * final block may be < AES_BLOCK_SIZE, copy only nbytes
+	 */
+	if (nbytes) {
+		memset(buf, 0, AES_BLOCK_SIZE);
+		memcpy(buf, walk.src.virt.addr, nbytes);
+		while (1) {
+			if (cpacf_kmctr(ctx->fc, &param, buf,
+					buf, AES_BLOCK_SIZE,
+					walk.iv) == AES_BLOCK_SIZE)
+				break;
+			if (__paes_convert_key(ctx))
+				return skcipher_walk_done(&walk, -EIO);
+			spin_lock_bh(&ctx->pk_lock);
+			memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
+			spin_unlock_bh(&ctx->pk_lock);
+		}
+		memcpy(walk.dst.virt.addr, buf, nbytes);
+		crypto_inc(walk.iv, AES_BLOCK_SIZE);
+		ret = skcipher_walk_done(&walk, nbytes);
+	}
+
+	return ret;
+}
+
+static struct skcipher_alg ctr_paes_alg = {
+	.base.cra_name		=	"ctr(paes)",
+	.base.cra_driver_name	=	"ctr-paes-s390",
+	.base.cra_priority	=	402,	/* ecb-paes-s390 + 1 */
+	.base.cra_blocksize	=	1,
+	.base.cra_ctxsize	=	sizeof(struct s390_paes_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.base.cra_list		=	LIST_HEAD_INIT(ctr_paes_alg.base.cra_list),
+	.init			=	ctr_paes_init,
+	.exit			=	ctr_paes_exit,
+	.min_keysize		=	PAES_MIN_KEYSIZE,
+	.max_keysize		=	PAES_MAX_KEYSIZE,
+	.ivsize			=	AES_BLOCK_SIZE,
+	.setkey			=	ctr_paes_set_key,
+	.encrypt		=	ctr_paes_crypt,
+	.decrypt		=	ctr_paes_crypt,
+	.chunksize		=	AES_BLOCK_SIZE,
+};
+
+static inline void __crypto_unregister_skcipher(struct skcipher_alg *alg)
+{
+	if (!list_empty(&alg->base.cra_list))
+		crypto_unregister_skcipher(alg);
+}
+
+static void paes_s390_fini(void)
+{
+	__crypto_unregister_skcipher(&ctr_paes_alg);
+	__crypto_unregister_skcipher(&xts_paes_alg);
+	__crypto_unregister_skcipher(&cbc_paes_alg);
+	__crypto_unregister_skcipher(&ecb_paes_alg);
+	if (ctrblk)
+		free_page((unsigned long) ctrblk);
+}
+
+static int __init paes_s390_init(void)
+{
+	int ret;
+
+	/* Query available functions for KM, KMC and KMCTR */
+	cpacf_query(CPACF_KM, &km_functions);
+	cpacf_query(CPACF_KMC, &kmc_functions);
+	cpacf_query(CPACF_KMCTR, &kmctr_functions);
+
+	if (cpacf_test_func(&km_functions, CPACF_KM_PAES_128) ||
+	    cpacf_test_func(&km_functions, CPACF_KM_PAES_192) ||
+	    cpacf_test_func(&km_functions, CPACF_KM_PAES_256)) {
+		ret = crypto_register_skcipher(&ecb_paes_alg);
+		if (ret)
+			goto out_err;
+	}
+
+	if (cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_128) ||
+	    cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_192) ||
+	    cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_256)) {
+		ret = crypto_register_skcipher(&cbc_paes_alg);
+		if (ret)
+			goto out_err;
+	}
+
+	if (cpacf_test_func(&km_functions, CPACF_KM_PXTS_128) ||
+	    cpacf_test_func(&km_functions, CPACF_KM_PXTS_256)) {
+		ret = crypto_register_skcipher(&xts_paes_alg);
+		if (ret)
+			goto out_err;
+	}
+
+	if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_128) ||
+	    cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_192) ||
+	    cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_256)) {
+		ctrblk = (u8 *) __get_free_page(GFP_KERNEL);
+		if (!ctrblk) {
+			ret = -ENOMEM;
+			goto out_err;
+		}
+		ret = crypto_register_skcipher(&ctr_paes_alg);
+		if (ret)
+			goto out_err;
+	}
+
+	return 0;
+out_err:
+	paes_s390_fini();
+	return ret;
+}
+
+module_init(paes_s390_init);
+module_exit(paes_s390_fini);
+
+MODULE_ALIAS_CRYPTO("paes");
+
+MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm with protected keys");
+MODULE_LICENSE("GPL");
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
new file mode 100644
index 0000000000..a077087bc6
--- /dev/null
+++ b/arch/s390/crypto/prng.c
@@ -0,0 +1,911 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2006, 2015
+ * Author(s): Jan Glauber <jan.glauber@de.ibm.com>
+ *	      Harald Freudenberger <freude@de.ibm.com>
+ * Driver for the s390 pseudo random number generator
+ */
+
+#define KMSG_COMPONENT "prng"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/fs.h>
+#include <linux/fips.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/mutex.h>
+#include <linux/cpufeature.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/sched/signal.h>
+
+#include <asm/debug.h>
+#include <linux/uaccess.h>
+#include <asm/timex.h>
+#include <asm/cpacf.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("IBM Corporation");
+MODULE_DESCRIPTION("s390 PRNG interface");
+
+
+#define PRNG_MODE_AUTO	  0
+#define PRNG_MODE_TDES	  1
+#define PRNG_MODE_SHA512  2
+
+static unsigned int prng_mode = PRNG_MODE_AUTO;
+module_param_named(mode, prng_mode, int, 0);
+MODULE_PARM_DESC(prng_mode, "PRNG mode: 0 - auto, 1 - TDES, 2 - SHA512");
+
+
+#define PRNG_CHUNKSIZE_TDES_MIN   8
+#define PRNG_CHUNKSIZE_TDES_MAX   (64*1024)
+#define PRNG_CHUNKSIZE_SHA512_MIN 64
+#define PRNG_CHUNKSIZE_SHA512_MAX (64*1024)
+
+static unsigned int prng_chunk_size = 256;
+module_param_named(chunksize, prng_chunk_size, int, 0);
+MODULE_PARM_DESC(prng_chunk_size, "PRNG read chunk size in bytes");
+
+
+#define PRNG_RESEED_LIMIT_TDES		 4096
+#define PRNG_RESEED_LIMIT_TDES_LOWER	 4096
+#define PRNG_RESEED_LIMIT_SHA512       100000
+#define PRNG_RESEED_LIMIT_SHA512_LOWER	10000
+
+static unsigned int prng_reseed_limit;
+module_param_named(reseed_limit, prng_reseed_limit, int, 0);
+MODULE_PARM_DESC(prng_reseed_limit, "PRNG reseed limit");
+
+static bool trng_available;
+
+/*
+ * Any one who considers arithmetical methods of producing random digits is,
+ * of course, in a state of sin. -- John von Neumann
+ */
+
+static int prng_errorflag;
+
+#define PRNG_GEN_ENTROPY_FAILED  1
+#define PRNG_SELFTEST_FAILED	 2
+#define PRNG_INSTANTIATE_FAILED  3
+#define PRNG_SEED_FAILED	 4
+#define PRNG_RESEED_FAILED	 5
+#define PRNG_GEN_FAILED		 6
+
+struct prng_ws_s {
+	u8  parm_block[32];
+	u32 reseed_counter;
+	u64 byte_counter;
+};
+
+struct prno_ws_s {
+	u32 res;
+	u32 reseed_counter;
+	u64 stream_bytes;
+	u8  V[112];
+	u8  C[112];
+};
+
+struct prng_data_s {
+	struct mutex mutex;
+	union {
+		struct prng_ws_s prngws;
+		struct prno_ws_s prnows;
+	};
+	u8 *buf;
+	u32 rest;
+	u8 *prev;
+};
+
+static struct prng_data_s *prng_data;
+
+/* initial parameter block for tdes mode, copied from libica */
+static const u8 initial_parm_block[32] __initconst = {
+	0x0F, 0x2B, 0x8E, 0x63, 0x8C, 0x8E, 0xD2, 0x52,
+	0x64, 0xB7, 0xA0, 0x7B, 0x75, 0x28, 0xB8, 0xF4,
+	0x75, 0x5F, 0xD2, 0xA6, 0x8D, 0x97, 0x11, 0xFF,
+	0x49, 0xD8, 0x23, 0xF3, 0x7E, 0x21, 0xEC, 0xA0 };
+
+
+/*** helper functions ***/
+
+/*
+ * generate_entropy:
+ * This function fills a given buffer with random bytes. The entropy within
+ * the random bytes given back is assumed to have at least 50% - meaning
+ * a 64 bytes buffer has at least 64 * 8 / 2 = 256 bits of entropy.
+ * Within the function the entropy generation is done in junks of 64 bytes.
+ * So the caller should also ask for buffer fill in multiples of 64 bytes.
+ * The generation of the entropy is based on the assumption that every stckf()
+ * invocation produces 0.5 bits of entropy. To accumulate 256 bits of entropy
+ * at least 512 stckf() values are needed. The entropy relevant part of the
+ * stckf value is bit 51 (counting starts at the left with bit nr 0) so
+ * here we use the lower 4 bytes and exor the values into 2k of bufferspace.
+ * To be on the save side, if there is ever a problem with stckf() the
+ * other half of the page buffer is filled with bytes from urandom via
+ * get_random_bytes(), so this function consumes 2k of urandom for each
+ * requested 64 bytes output data. Finally the buffer page is condensed into
+ * a 64 byte value by hashing with a SHA512 hash.
+ */
+static int generate_entropy(u8 *ebuf, size_t nbytes)
+{
+	int n, ret = 0;
+	u8 *pg, pblock[80] = {
+		/* 8 x 64 bit init values */
+		0x6A, 0x09, 0xE6, 0x67, 0xF3, 0xBC, 0xC9, 0x08,
+		0xBB, 0x67, 0xAE, 0x85, 0x84, 0xCA, 0xA7, 0x3B,
+		0x3C, 0x6E, 0xF3, 0x72, 0xFE, 0x94, 0xF8, 0x2B,
+		0xA5, 0x4F, 0xF5, 0x3A, 0x5F, 0x1D, 0x36, 0xF1,
+		0x51, 0x0E, 0x52, 0x7F, 0xAD, 0xE6, 0x82, 0xD1,
+		0x9B, 0x05, 0x68, 0x8C, 0x2B, 0x3E, 0x6C, 0x1F,
+		0x1F, 0x83, 0xD9, 0xAB, 0xFB, 0x41, 0xBD, 0x6B,
+		0x5B, 0xE0, 0xCD, 0x19, 0x13, 0x7E, 0x21, 0x79,
+		/* 128 bit counter total message bit length */
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00 };
+
+	/* allocate one page stckf buffer */
+	pg = (u8 *) __get_free_page(GFP_KERNEL);
+	if (!pg) {
+		prng_errorflag = PRNG_GEN_ENTROPY_FAILED;
+		return -ENOMEM;
+	}
+
+	/* fill the ebuf in chunks of 64 byte each */
+	while (nbytes) {
+		/* fill lower 2k with urandom bytes */
+		get_random_bytes(pg, PAGE_SIZE / 2);
+		/* exor upper 2k with 512 stckf values, offset 4 bytes each */
+		for (n = 0; n < 512; n++) {
+			int offset = (PAGE_SIZE / 2) + (n * 4) - 4;
+			u64 *p = (u64 *)(pg + offset);
+			*p ^= get_tod_clock_fast();
+		}
+		/* hash over the filled page */
+		cpacf_klmd(CPACF_KLMD_SHA_512, pblock, pg, PAGE_SIZE);
+		n = (nbytes < 64) ? nbytes : 64;
+		memcpy(ebuf, pblock, n);
+		ret += n;
+		ebuf += n;
+		nbytes -= n;
+	}
+
+	memzero_explicit(pblock, sizeof(pblock));
+	memzero_explicit(pg, PAGE_SIZE);
+	free_page((unsigned long)pg);
+	return ret;
+}
+
+
+/*** tdes functions ***/
+
+static void prng_tdes_add_entropy(void)
+{
+	__u64 entropy[4];
+	unsigned int i;
+
+	for (i = 0; i < 16; i++) {
+		cpacf_kmc(CPACF_KMC_PRNG, prng_data->prngws.parm_block,
+			  (char *) entropy, (char *) entropy,
+			  sizeof(entropy));
+		memcpy(prng_data->prngws.parm_block, entropy, sizeof(entropy));
+	}
+}
+
+
+static void prng_tdes_seed(int nbytes)
+{
+	char buf[16];
+	int i = 0;
+
+	BUG_ON(nbytes > sizeof(buf));
+
+	get_random_bytes(buf, nbytes);
+
+	/* Add the entropy */
+	while (nbytes >= 8) {
+		*((__u64 *)prng_data->prngws.parm_block) ^= *((__u64 *)(buf+i));
+		prng_tdes_add_entropy();
+		i += 8;
+		nbytes -= 8;
+	}
+	prng_tdes_add_entropy();
+	prng_data->prngws.reseed_counter = 0;
+}
+
+
+static int __init prng_tdes_instantiate(void)
+{
+	int datalen;
+
+	pr_debug("prng runs in TDES mode with "
+		 "chunksize=%d and reseed_limit=%u\n",
+		 prng_chunk_size, prng_reseed_limit);
+
+	/* memory allocation, prng_data struct init, mutex init */
+	datalen = sizeof(struct prng_data_s) + prng_chunk_size;
+	prng_data = kzalloc(datalen, GFP_KERNEL);
+	if (!prng_data) {
+		prng_errorflag = PRNG_INSTANTIATE_FAILED;
+		return -ENOMEM;
+	}
+	mutex_init(&prng_data->mutex);
+	prng_data->buf = ((u8 *)prng_data) + sizeof(struct prng_data_s);
+	memcpy(prng_data->prngws.parm_block, initial_parm_block, 32);
+
+	/* initialize the PRNG, add 128 bits of entropy */
+	prng_tdes_seed(16);
+
+	return 0;
+}
+
+
+static void prng_tdes_deinstantiate(void)
+{
+	pr_debug("The prng module stopped "
+		 "after running in triple DES mode\n");
+	kfree_sensitive(prng_data);
+}
+
+
+/*** sha512 functions ***/
+
+static int __init prng_sha512_selftest(void)
+{
+	/* NIST DRBG testvector for Hash Drbg, Sha-512, Count #0 */
+	static const u8 seed[] __initconst = {
+		0x6b, 0x50, 0xa7, 0xd8, 0xf8, 0xa5, 0x5d, 0x7a,
+		0x3d, 0xf8, 0xbb, 0x40, 0xbc, 0xc3, 0xb7, 0x22,
+		0xd8, 0x70, 0x8d, 0xe6, 0x7f, 0xda, 0x01, 0x0b,
+		0x03, 0xc4, 0xc8, 0x4d, 0x72, 0x09, 0x6f, 0x8c,
+		0x3e, 0xc6, 0x49, 0xcc, 0x62, 0x56, 0xd9, 0xfa,
+		0x31, 0xdb, 0x7a, 0x29, 0x04, 0xaa, 0xf0, 0x25 };
+	static const u8 V0[] __initconst = {
+		0x00, 0xad, 0xe3, 0x6f, 0x9a, 0x01, 0xc7, 0x76,
+		0x61, 0x34, 0x35, 0xf5, 0x4e, 0x24, 0x74, 0x22,
+		0x21, 0x9a, 0x29, 0x89, 0xc7, 0x93, 0x2e, 0x60,
+		0x1e, 0xe8, 0x14, 0x24, 0x8d, 0xd5, 0x03, 0xf1,
+		0x65, 0x5d, 0x08, 0x22, 0x72, 0xd5, 0xad, 0x95,
+		0xe1, 0x23, 0x1e, 0x8a, 0xa7, 0x13, 0xd9, 0x2b,
+		0x5e, 0xbc, 0xbb, 0x80, 0xab, 0x8d, 0xe5, 0x79,
+		0xab, 0x5b, 0x47, 0x4e, 0xdd, 0xee, 0x6b, 0x03,
+		0x8f, 0x0f, 0x5c, 0x5e, 0xa9, 0x1a, 0x83, 0xdd,
+		0xd3, 0x88, 0xb2, 0x75, 0x4b, 0xce, 0x83, 0x36,
+		0x57, 0x4b, 0xf1, 0x5c, 0xca, 0x7e, 0x09, 0xc0,
+		0xd3, 0x89, 0xc6, 0xe0, 0xda, 0xc4, 0x81, 0x7e,
+		0x5b, 0xf9, 0xe1, 0x01, 0xc1, 0x92, 0x05, 0xea,
+		0xf5, 0x2f, 0xc6, 0xc6, 0xc7, 0x8f, 0xbc, 0xf4 };
+	static const u8 C0[] __initconst = {
+		0x00, 0xf4, 0xa3, 0xe5, 0xa0, 0x72, 0x63, 0x95,
+		0xc6, 0x4f, 0x48, 0xd0, 0x8b, 0x5b, 0x5f, 0x8e,
+		0x6b, 0x96, 0x1f, 0x16, 0xed, 0xbc, 0x66, 0x94,
+		0x45, 0x31, 0xd7, 0x47, 0x73, 0x22, 0xa5, 0x86,
+		0xce, 0xc0, 0x4c, 0xac, 0x63, 0xb8, 0x39, 0x50,
+		0xbf, 0xe6, 0x59, 0x6c, 0x38, 0x58, 0x99, 0x1f,
+		0x27, 0xa7, 0x9d, 0x71, 0x2a, 0xb3, 0x7b, 0xf9,
+		0xfb, 0x17, 0x86, 0xaa, 0x99, 0x81, 0xaa, 0x43,
+		0xe4, 0x37, 0xd3, 0x1e, 0x6e, 0xe5, 0xe6, 0xee,
+		0xc2, 0xed, 0x95, 0x4f, 0x53, 0x0e, 0x46, 0x8a,
+		0xcc, 0x45, 0xa5, 0xdb, 0x69, 0x0d, 0x81, 0xc9,
+		0x32, 0x92, 0xbc, 0x8f, 0x33, 0xe6, 0xf6, 0x09,
+		0x7c, 0x8e, 0x05, 0x19, 0x0d, 0xf1, 0xb6, 0xcc,
+		0xf3, 0x02, 0x21, 0x90, 0x25, 0xec, 0xed, 0x0e };
+	static const u8 random[] __initconst = {
+		0x95, 0xb7, 0xf1, 0x7e, 0x98, 0x02, 0xd3, 0x57,
+		0x73, 0x92, 0xc6, 0xa9, 0xc0, 0x80, 0x83, 0xb6,
+		0x7d, 0xd1, 0x29, 0x22, 0x65, 0xb5, 0xf4, 0x2d,
+		0x23, 0x7f, 0x1c, 0x55, 0xbb, 0x9b, 0x10, 0xbf,
+		0xcf, 0xd8, 0x2c, 0x77, 0xa3, 0x78, 0xb8, 0x26,
+		0x6a, 0x00, 0x99, 0x14, 0x3b, 0x3c, 0x2d, 0x64,
+		0x61, 0x1e, 0xee, 0xb6, 0x9a, 0xcd, 0xc0, 0x55,
+		0x95, 0x7c, 0x13, 0x9e, 0x8b, 0x19, 0x0c, 0x7a,
+		0x06, 0x95, 0x5f, 0x2c, 0x79, 0x7c, 0x27, 0x78,
+		0xde, 0x94, 0x03, 0x96, 0xa5, 0x01, 0xf4, 0x0e,
+		0x91, 0x39, 0x6a, 0xcf, 0x8d, 0x7e, 0x45, 0xeb,
+		0xdb, 0xb5, 0x3b, 0xbf, 0x8c, 0x97, 0x52, 0x30,
+		0xd2, 0xf0, 0xff, 0x91, 0x06, 0xc7, 0x61, 0x19,
+		0xae, 0x49, 0x8e, 0x7f, 0xbc, 0x03, 0xd9, 0x0f,
+		0x8e, 0x4c, 0x51, 0x62, 0x7a, 0xed, 0x5c, 0x8d,
+		0x42, 0x63, 0xd5, 0xd2, 0xb9, 0x78, 0x87, 0x3a,
+		0x0d, 0xe5, 0x96, 0xee, 0x6d, 0xc7, 0xf7, 0xc2,
+		0x9e, 0x37, 0xee, 0xe8, 0xb3, 0x4c, 0x90, 0xdd,
+		0x1c, 0xf6, 0xa9, 0xdd, 0xb2, 0x2b, 0x4c, 0xbd,
+		0x08, 0x6b, 0x14, 0xb3, 0x5d, 0xe9, 0x3d, 0xa2,
+		0xd5, 0xcb, 0x18, 0x06, 0x69, 0x8c, 0xbd, 0x7b,
+		0xbb, 0x67, 0xbf, 0xe3, 0xd3, 0x1f, 0xd2, 0xd1,
+		0xdb, 0xd2, 0xa1, 0xe0, 0x58, 0xa3, 0xeb, 0x99,
+		0xd7, 0xe5, 0x1f, 0x1a, 0x93, 0x8e, 0xed, 0x5e,
+		0x1c, 0x1d, 0xe2, 0x3a, 0x6b, 0x43, 0x45, 0xd3,
+		0x19, 0x14, 0x09, 0xf9, 0x2f, 0x39, 0xb3, 0x67,
+		0x0d, 0x8d, 0xbf, 0xb6, 0x35, 0xd8, 0xe6, 0xa3,
+		0x69, 0x32, 0xd8, 0x10, 0x33, 0xd1, 0x44, 0x8d,
+		0x63, 0xb4, 0x03, 0xdd, 0xf8, 0x8e, 0x12, 0x1b,
+		0x6e, 0x81, 0x9a, 0xc3, 0x81, 0x22, 0x6c, 0x13,
+		0x21, 0xe4, 0xb0, 0x86, 0x44, 0xf6, 0x72, 0x7c,
+		0x36, 0x8c, 0x5a, 0x9f, 0x7a, 0x4b, 0x3e, 0xe2 };
+
+	u8 buf[sizeof(random)];
+	struct prno_ws_s ws;
+
+	memset(&ws, 0, sizeof(ws));
+
+	/* initial seed */
+	cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED,
+		   &ws, NULL, 0, seed, sizeof(seed));
+
+	/* check working states V and C */
+	if (memcmp(ws.V, V0, sizeof(V0)) != 0
+	    || memcmp(ws.C, C0, sizeof(C0)) != 0) {
+		pr_err("The prng self test state test "
+		       "for the SHA-512 mode failed\n");
+		prng_errorflag = PRNG_SELFTEST_FAILED;
+		return -EIO;
+	}
+
+	/* generate random bytes */
+	cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN,
+		   &ws, buf, sizeof(buf), NULL, 0);
+	cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN,
+		   &ws, buf, sizeof(buf), NULL, 0);
+
+	/* check against expected data */
+	if (memcmp(buf, random, sizeof(random)) != 0) {
+		pr_err("The prng self test data test "
+		       "for the SHA-512 mode failed\n");
+		prng_errorflag = PRNG_SELFTEST_FAILED;
+		return -EIO;
+	}
+
+	return 0;
+}
+
+
+static int __init prng_sha512_instantiate(void)
+{
+	int ret, datalen, seedlen;
+	u8 seed[128 + 16];
+
+	pr_debug("prng runs in SHA-512 mode "
+		 "with chunksize=%d and reseed_limit=%u\n",
+		 prng_chunk_size, prng_reseed_limit);
+
+	/* memory allocation, prng_data struct init, mutex init */
+	datalen = sizeof(struct prng_data_s) + prng_chunk_size;
+	if (fips_enabled)
+		datalen += prng_chunk_size;
+	prng_data = kzalloc(datalen, GFP_KERNEL);
+	if (!prng_data) {
+		prng_errorflag = PRNG_INSTANTIATE_FAILED;
+		return -ENOMEM;
+	}
+	mutex_init(&prng_data->mutex);
+	prng_data->buf = ((u8 *)prng_data) + sizeof(struct prng_data_s);
+
+	/* selftest */
+	ret = prng_sha512_selftest();
+	if (ret)
+		goto outfree;
+
+	/* generate initial seed, we need at least  256 + 128 bits entropy. */
+	if (trng_available) {
+		/*
+		 * Trng available, so use it. The trng works in chunks of
+		 * 32 bytes and produces 100% entropy. So we pull 64 bytes
+		 * which gives us 512 bits entropy.
+		 */
+		seedlen = 2 * 32;
+		cpacf_trng(NULL, 0, seed, seedlen);
+	} else {
+		/*
+		 * No trng available, so use the generate_entropy() function.
+		 * This function works in 64 byte junks and produces
+		 * 50% entropy. So we pull 2*64 bytes which gives us 512 bits
+		 * of entropy.
+		 */
+		seedlen = 2 * 64;
+		ret = generate_entropy(seed, seedlen);
+		if (ret != seedlen)
+			goto outfree;
+	}
+
+	/* append the seed by 16 bytes of unique nonce */
+	store_tod_clock_ext((union tod_clock *)(seed + seedlen));
+	seedlen += 16;
+
+	/* now initial seed of the prno drng */
+	cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED,
+		   &prng_data->prnows, NULL, 0, seed, seedlen);
+	memzero_explicit(seed, sizeof(seed));
+
+	/* if fips mode is enabled, generate a first block of random
+	   bytes for the FIPS 140-2 Conditional Self Test */
+	if (fips_enabled) {
+		prng_data->prev = prng_data->buf + prng_chunk_size;
+		cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN,
+			   &prng_data->prnows,
+			   prng_data->prev, prng_chunk_size, NULL, 0);
+	}
+
+	return 0;
+
+outfree:
+	kfree(prng_data);
+	return ret;
+}
+
+
+static void prng_sha512_deinstantiate(void)
+{
+	pr_debug("The prng module stopped after running in SHA-512 mode\n");
+	kfree_sensitive(prng_data);
+}
+
+
+static int prng_sha512_reseed(void)
+{
+	int ret, seedlen;
+	u8 seed[64];
+
+	/* We need at least 256 bits of fresh entropy for reseeding */
+	if (trng_available) {
+		/* trng produces 256 bits entropy in 32 bytes */
+		seedlen = 32;
+		cpacf_trng(NULL, 0, seed, seedlen);
+	} else {
+		/* generate_entropy() produces 256 bits entropy in 64 bytes */
+		seedlen = 64;
+		ret = generate_entropy(seed, seedlen);
+		if (ret != sizeof(seed))
+			return ret;
+	}
+
+	/* do a reseed of the prno drng with this bytestring */
+	cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED,
+		   &prng_data->prnows, NULL, 0, seed, seedlen);
+	memzero_explicit(seed, sizeof(seed));
+
+	return 0;
+}
+
+
+static int prng_sha512_generate(u8 *buf, size_t nbytes)
+{
+	int ret;
+
+	/* reseed needed ? */
+	if (prng_data->prnows.reseed_counter > prng_reseed_limit) {
+		ret = prng_sha512_reseed();
+		if (ret)
+			return ret;
+	}
+
+	/* PRNO generate */
+	cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN,
+		   &prng_data->prnows, buf, nbytes, NULL, 0);
+
+	/* FIPS 140-2 Conditional Self Test */
+	if (fips_enabled) {
+		if (!memcmp(prng_data->prev, buf, nbytes)) {
+			prng_errorflag = PRNG_GEN_FAILED;
+			return -EILSEQ;
+		}
+		memcpy(prng_data->prev, buf, nbytes);
+	}
+
+	return nbytes;
+}
+
+
+/*** file io functions ***/
+
+static int prng_open(struct inode *inode, struct file *file)
+{
+	return nonseekable_open(inode, file);
+}
+
+
+static ssize_t prng_tdes_read(struct file *file, char __user *ubuf,
+			      size_t nbytes, loff_t *ppos)
+{
+	int chunk, n, ret = 0;
+
+	/* lock prng_data struct */
+	if (mutex_lock_interruptible(&prng_data->mutex))
+		return -ERESTARTSYS;
+
+	while (nbytes) {
+		if (need_resched()) {
+			if (signal_pending(current)) {
+				if (ret == 0)
+					ret = -ERESTARTSYS;
+				break;
+			}
+			/* give mutex free before calling schedule() */
+			mutex_unlock(&prng_data->mutex);
+			schedule();
+			/* occupy mutex again */
+			if (mutex_lock_interruptible(&prng_data->mutex)) {
+				if (ret == 0)
+					ret = -ERESTARTSYS;
+				return ret;
+			}
+		}
+
+		/*
+		 * we lose some random bytes if an attacker issues
+		 * reads < 8 bytes, but we don't care
+		 */
+		chunk = min_t(int, nbytes, prng_chunk_size);
+
+		/* PRNG only likes multiples of 8 bytes */
+		n = (chunk + 7) & -8;
+
+		if (prng_data->prngws.reseed_counter > prng_reseed_limit)
+			prng_tdes_seed(8);
+
+		/* if the CPU supports PRNG stckf is present too */
+		*((unsigned long long *)prng_data->buf) = get_tod_clock_fast();
+
+		/*
+		 * Beside the STCKF the input for the TDES-EDE is the output
+		 * of the last operation. We differ here from X9.17 since we
+		 * only store one timestamp into the buffer. Padding the whole
+		 * buffer with timestamps does not improve security, since
+		 * successive stckf have nearly constant offsets.
+		 * If an attacker knows the first timestamp it would be
+		 * trivial to guess the additional values. One timestamp
+		 * is therefore enough and still guarantees unique input values.
+		 *
+		 * Note: you can still get strict X9.17 conformity by setting
+		 * prng_chunk_size to 8 bytes.
+		 */
+		cpacf_kmc(CPACF_KMC_PRNG, prng_data->prngws.parm_block,
+			  prng_data->buf, prng_data->buf, n);
+
+		prng_data->prngws.byte_counter += n;
+		prng_data->prngws.reseed_counter += n;
+
+		if (copy_to_user(ubuf, prng_data->buf, chunk)) {
+			ret = -EFAULT;
+			break;
+		}
+
+		nbytes -= chunk;
+		ret += chunk;
+		ubuf += chunk;
+	}
+
+	/* unlock prng_data struct */
+	mutex_unlock(&prng_data->mutex);
+
+	return ret;
+}
+
+
+static ssize_t prng_sha512_read(struct file *file, char __user *ubuf,
+				size_t nbytes, loff_t *ppos)
+{
+	int n, ret = 0;
+	u8 *p;
+
+	/* if errorflag is set do nothing and return 'broken pipe' */
+	if (prng_errorflag)
+		return -EPIPE;
+
+	/* lock prng_data struct */
+	if (mutex_lock_interruptible(&prng_data->mutex))
+		return -ERESTARTSYS;
+
+	while (nbytes) {
+		if (need_resched()) {
+			if (signal_pending(current)) {
+				if (ret == 0)
+					ret = -ERESTARTSYS;
+				break;
+			}
+			/* give mutex free before calling schedule() */
+			mutex_unlock(&prng_data->mutex);
+			schedule();
+			/* occopy mutex again */
+			if (mutex_lock_interruptible(&prng_data->mutex)) {
+				if (ret == 0)
+					ret = -ERESTARTSYS;
+				return ret;
+			}
+		}
+		if (prng_data->rest) {
+			/* push left over random bytes from the previous read */
+			p = prng_data->buf + prng_chunk_size - prng_data->rest;
+			n = (nbytes < prng_data->rest) ?
+				nbytes : prng_data->rest;
+			prng_data->rest -= n;
+		} else {
+			/* generate one chunk of random bytes into read buf */
+			p = prng_data->buf;
+			n = prng_sha512_generate(p, prng_chunk_size);
+			if (n < 0) {
+				ret = n;
+				break;
+			}
+			if (nbytes < prng_chunk_size) {
+				n = nbytes;
+				prng_data->rest = prng_chunk_size - n;
+			} else {
+				n = prng_chunk_size;
+				prng_data->rest = 0;
+			}
+		}
+		if (copy_to_user(ubuf, p, n)) {
+			ret = -EFAULT;
+			break;
+		}
+		memzero_explicit(p, n);
+		ubuf += n;
+		nbytes -= n;
+		ret += n;
+	}
+
+	/* unlock prng_data struct */
+	mutex_unlock(&prng_data->mutex);
+
+	return ret;
+}
+
+
+/*** sysfs stuff ***/
+
+static const struct file_operations prng_sha512_fops = {
+	.owner		= THIS_MODULE,
+	.open		= &prng_open,
+	.release	= NULL,
+	.read		= &prng_sha512_read,
+	.llseek		= noop_llseek,
+};
+static const struct file_operations prng_tdes_fops = {
+	.owner		= THIS_MODULE,
+	.open		= &prng_open,
+	.release	= NULL,
+	.read		= &prng_tdes_read,
+	.llseek		= noop_llseek,
+};
+
+/* chunksize attribute (ro) */
+static ssize_t prng_chunksize_show(struct device *dev,
+				   struct device_attribute *attr,
+				   char *buf)
+{
+	return scnprintf(buf, PAGE_SIZE, "%u\n", prng_chunk_size);
+}
+static DEVICE_ATTR(chunksize, 0444, prng_chunksize_show, NULL);
+
+/* counter attribute (ro) */
+static ssize_t prng_counter_show(struct device *dev,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	u64 counter;
+
+	if (mutex_lock_interruptible(&prng_data->mutex))
+		return -ERESTARTSYS;
+	if (prng_mode == PRNG_MODE_SHA512)
+		counter = prng_data->prnows.stream_bytes;
+	else
+		counter = prng_data->prngws.byte_counter;
+	mutex_unlock(&prng_data->mutex);
+
+	return scnprintf(buf, PAGE_SIZE, "%llu\n", counter);
+}
+static DEVICE_ATTR(byte_counter, 0444, prng_counter_show, NULL);
+
+/* errorflag attribute (ro) */
+static ssize_t prng_errorflag_show(struct device *dev,
+				   struct device_attribute *attr,
+				   char *buf)
+{
+	return scnprintf(buf, PAGE_SIZE, "%d\n", prng_errorflag);
+}
+static DEVICE_ATTR(errorflag, 0444, prng_errorflag_show, NULL);
+
+/* mode attribute (ro) */
+static ssize_t prng_mode_show(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	if (prng_mode == PRNG_MODE_TDES)
+		return scnprintf(buf, PAGE_SIZE, "TDES\n");
+	else
+		return scnprintf(buf, PAGE_SIZE, "SHA512\n");
+}
+static DEVICE_ATTR(mode, 0444, prng_mode_show, NULL);
+
+/* reseed attribute (w) */
+static ssize_t prng_reseed_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	if (mutex_lock_interruptible(&prng_data->mutex))
+		return -ERESTARTSYS;
+	prng_sha512_reseed();
+	mutex_unlock(&prng_data->mutex);
+
+	return count;
+}
+static DEVICE_ATTR(reseed, 0200, NULL, prng_reseed_store);
+
+/* reseed limit attribute (rw) */
+static ssize_t prng_reseed_limit_show(struct device *dev,
+				      struct device_attribute *attr,
+				      char *buf)
+{
+	return scnprintf(buf, PAGE_SIZE, "%u\n", prng_reseed_limit);
+}
+static ssize_t prng_reseed_limit_store(struct device *dev,
+				       struct device_attribute *attr,
+				       const char *buf, size_t count)
+{
+	unsigned limit;
+
+	if (sscanf(buf, "%u\n", &limit) != 1)
+		return -EINVAL;
+
+	if (prng_mode == PRNG_MODE_SHA512) {
+		if (limit < PRNG_RESEED_LIMIT_SHA512_LOWER)
+			return -EINVAL;
+	} else {
+		if (limit < PRNG_RESEED_LIMIT_TDES_LOWER)
+			return -EINVAL;
+	}
+
+	prng_reseed_limit = limit;
+
+	return count;
+}
+static DEVICE_ATTR(reseed_limit, 0644,
+		   prng_reseed_limit_show, prng_reseed_limit_store);
+
+/* strength attribute (ro) */
+static ssize_t prng_strength_show(struct device *dev,
+				  struct device_attribute *attr,
+				  char *buf)
+{
+	return scnprintf(buf, PAGE_SIZE, "256\n");
+}
+static DEVICE_ATTR(strength, 0444, prng_strength_show, NULL);
+
+static struct attribute *prng_sha512_dev_attrs[] = {
+	&dev_attr_errorflag.attr,
+	&dev_attr_chunksize.attr,
+	&dev_attr_byte_counter.attr,
+	&dev_attr_mode.attr,
+	&dev_attr_reseed.attr,
+	&dev_attr_reseed_limit.attr,
+	&dev_attr_strength.attr,
+	NULL
+};
+ATTRIBUTE_GROUPS(prng_sha512_dev);
+
+static struct attribute *prng_tdes_dev_attrs[] = {
+	&dev_attr_chunksize.attr,
+	&dev_attr_byte_counter.attr,
+	&dev_attr_mode.attr,
+	NULL
+};
+ATTRIBUTE_GROUPS(prng_tdes_dev);
+
+static struct miscdevice prng_sha512_dev = {
+	.name	= "prandom",
+	.minor	= MISC_DYNAMIC_MINOR,
+	.mode	= 0644,
+	.fops	= &prng_sha512_fops,
+	.groups = prng_sha512_dev_groups,
+};
+
+static struct miscdevice prng_tdes_dev = {
+	.name	= "prandom",
+	.minor	= MISC_DYNAMIC_MINOR,
+	.mode	= 0644,
+	.fops	= &prng_tdes_fops,
+	.groups = prng_tdes_dev_groups,
+};
+
+
+/*** module init and exit ***/
+
+static int __init prng_init(void)
+{
+	int ret;
+
+	/* check if the CPU has a PRNG */
+	if (!cpacf_query_func(CPACF_KMC, CPACF_KMC_PRNG))
+		return -ENODEV;
+
+	/* check if TRNG subfunction is available */
+	if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG))
+		trng_available = true;
+
+	/* choose prng mode */
+	if (prng_mode != PRNG_MODE_TDES) {
+		/* check for MSA5 support for PRNO operations */
+		if (!cpacf_query_func(CPACF_PRNO, CPACF_PRNO_SHA512_DRNG_GEN)) {
+			if (prng_mode == PRNG_MODE_SHA512) {
+				pr_err("The prng module cannot "
+				       "start in SHA-512 mode\n");
+				return -ENODEV;
+			}
+			prng_mode = PRNG_MODE_TDES;
+		} else
+			prng_mode = PRNG_MODE_SHA512;
+	}
+
+	if (prng_mode == PRNG_MODE_SHA512) {
+
+		/* SHA512 mode */
+
+		if (prng_chunk_size < PRNG_CHUNKSIZE_SHA512_MIN
+		    || prng_chunk_size > PRNG_CHUNKSIZE_SHA512_MAX)
+			return -EINVAL;
+		prng_chunk_size = (prng_chunk_size + 0x3f) & ~0x3f;
+
+		if (prng_reseed_limit == 0)
+			prng_reseed_limit = PRNG_RESEED_LIMIT_SHA512;
+		else if (prng_reseed_limit < PRNG_RESEED_LIMIT_SHA512_LOWER)
+			return -EINVAL;
+
+		ret = prng_sha512_instantiate();
+		if (ret)
+			goto out;
+
+		ret = misc_register(&prng_sha512_dev);
+		if (ret) {
+			prng_sha512_deinstantiate();
+			goto out;
+		}
+
+	} else {
+
+		/* TDES mode */
+
+		if (prng_chunk_size < PRNG_CHUNKSIZE_TDES_MIN
+		    || prng_chunk_size > PRNG_CHUNKSIZE_TDES_MAX)
+			return -EINVAL;
+		prng_chunk_size = (prng_chunk_size + 0x07) & ~0x07;
+
+		if (prng_reseed_limit == 0)
+			prng_reseed_limit = PRNG_RESEED_LIMIT_TDES;
+		else if (prng_reseed_limit < PRNG_RESEED_LIMIT_TDES_LOWER)
+			return -EINVAL;
+
+		ret = prng_tdes_instantiate();
+		if (ret)
+			goto out;
+
+		ret = misc_register(&prng_tdes_dev);
+		if (ret) {
+			prng_tdes_deinstantiate();
+			goto out;
+		}
+	}
+
+out:
+	return ret;
+}
+
+
+static void __exit prng_exit(void)
+{
+	if (prng_mode == PRNG_MODE_SHA512) {
+		misc_deregister(&prng_sha512_dev);
+		prng_sha512_deinstantiate();
+	} else {
+		misc_deregister(&prng_tdes_dev);
+		prng_tdes_deinstantiate();
+	}
+}
+
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, prng_init);
+module_exit(prng_exit);
diff --git a/arch/s390/crypto/sha.h b/arch/s390/crypto/sha.h
new file mode 100644
index 0000000000..65ea12fc87
--- /dev/null
+++ b/arch/s390/crypto/sha.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Cryptographic API.
+ *
+ * s390 generic implementation of the SHA Secure Hash Algorithms.
+ *
+ * Copyright IBM Corp. 2007
+ * Author(s): Jan Glauber (jang@de.ibm.com)
+ */
+#ifndef _CRYPTO_ARCH_S390_SHA_H
+#define _CRYPTO_ARCH_S390_SHA_H
+
+#include <linux/crypto.h>
+#include <crypto/sha1.h>
+#include <crypto/sha2.h>
+#include <crypto/sha3.h>
+
+/* must be big enough for the largest SHA variant */
+#define SHA3_STATE_SIZE			200
+#define CPACF_MAX_PARMBLOCK_SIZE	SHA3_STATE_SIZE
+#define SHA_MAX_BLOCK_SIZE		SHA3_224_BLOCK_SIZE
+
+struct s390_sha_ctx {
+	u64 count;		/* message length in bytes */
+	u32 state[CPACF_MAX_PARMBLOCK_SIZE / sizeof(u32)];
+	u8 buf[SHA_MAX_BLOCK_SIZE];
+	int func;		/* KIMD function to use */
+};
+
+struct shash_desc;
+
+int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len);
+int s390_sha_final(struct shash_desc *desc, u8 *out);
+
+#endif
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
new file mode 100644
index 0000000000..bc3a22704e
--- /dev/null
+++ b/arch/s390/crypto/sha1_s390.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Cryptographic API.
+ *
+ * s390 implementation of the SHA1 Secure Hash Algorithm.
+ *
+ * Derived from cryptoapi implementation, adapted for in-place
+ * scatterlist interface.  Originally based on the public domain
+ * implementation written by Steve Reid.
+ *
+ * s390 Version:
+ *   Copyright IBM Corp. 2003, 2007
+ *   Author(s): Thomas Spatzier
+ *		Jan Glauber (jan.glauber@de.ibm.com)
+ *
+ * Derived from "crypto/sha1_generic.c"
+ *   Copyright (c) Alan Smithee.
+ *   Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
+ *   Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
+ */
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+#include <crypto/sha1.h>
+#include <asm/cpacf.h>
+
+#include "sha.h"
+
+static int s390_sha1_init(struct shash_desc *desc)
+{
+	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+
+	sctx->state[0] = SHA1_H0;
+	sctx->state[1] = SHA1_H1;
+	sctx->state[2] = SHA1_H2;
+	sctx->state[3] = SHA1_H3;
+	sctx->state[4] = SHA1_H4;
+	sctx->count = 0;
+	sctx->func = CPACF_KIMD_SHA_1;
+
+	return 0;
+}
+
+static int s390_sha1_export(struct shash_desc *desc, void *out)
+{
+	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+	struct sha1_state *octx = out;
+
+	octx->count = sctx->count;
+	memcpy(octx->state, sctx->state, sizeof(octx->state));
+	memcpy(octx->buffer, sctx->buf, sizeof(octx->buffer));
+	return 0;
+}
+
+static int s390_sha1_import(struct shash_desc *desc, const void *in)
+{
+	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+	const struct sha1_state *ictx = in;
+
+	sctx->count = ictx->count;
+	memcpy(sctx->state, ictx->state, sizeof(ictx->state));
+	memcpy(sctx->buf, ictx->buffer, sizeof(ictx->buffer));
+	sctx->func = CPACF_KIMD_SHA_1;
+	return 0;
+}
+
+static struct shash_alg alg = {
+	.digestsize	=	SHA1_DIGEST_SIZE,
+	.init		=	s390_sha1_init,
+	.update		=	s390_sha_update,
+	.final		=	s390_sha_final,
+	.export		=	s390_sha1_export,
+	.import		=	s390_sha1_import,
+	.descsize	=	sizeof(struct s390_sha_ctx),
+	.statesize	=	sizeof(struct sha1_state),
+	.base		=	{
+		.cra_name	=	"sha1",
+		.cra_driver_name=	"sha1-s390",
+		.cra_priority	=	300,
+		.cra_blocksize	=	SHA1_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+};
+
+static int __init sha1_s390_init(void)
+{
+	if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_1))
+		return -ENODEV;
+	return crypto_register_shash(&alg);
+}
+
+static void __exit sha1_s390_fini(void)
+{
+	crypto_unregister_shash(&alg);
+}
+
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, sha1_s390_init);
+module_exit(sha1_s390_fini);
+
+MODULE_ALIAS_CRYPTO("sha1");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm");
diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c
new file mode 100644
index 0000000000..6f1ccdf93d
--- /dev/null
+++ b/arch/s390/crypto/sha256_s390.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Cryptographic API.
+ *
+ * s390 implementation of the SHA256 and SHA224 Secure Hash Algorithm.
+ *
+ * s390 Version:
+ *   Copyright IBM Corp. 2005, 2011
+ *   Author(s): Jan Glauber (jang@de.ibm.com)
+ */
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+#include <crypto/sha2.h>
+#include <asm/cpacf.h>
+
+#include "sha.h"
+
+static int s390_sha256_init(struct shash_desc *desc)
+{
+	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+
+	sctx->state[0] = SHA256_H0;
+	sctx->state[1] = SHA256_H1;
+	sctx->state[2] = SHA256_H2;
+	sctx->state[3] = SHA256_H3;
+	sctx->state[4] = SHA256_H4;
+	sctx->state[5] = SHA256_H5;
+	sctx->state[6] = SHA256_H6;
+	sctx->state[7] = SHA256_H7;
+	sctx->count = 0;
+	sctx->func = CPACF_KIMD_SHA_256;
+
+	return 0;
+}
+
+static int sha256_export(struct shash_desc *desc, void *out)
+{
+	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+	struct sha256_state *octx = out;
+
+	octx->count = sctx->count;
+	memcpy(octx->state, sctx->state, sizeof(octx->state));
+	memcpy(octx->buf, sctx->buf, sizeof(octx->buf));
+	return 0;
+}
+
+static int sha256_import(struct shash_desc *desc, const void *in)
+{
+	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+	const struct sha256_state *ictx = in;
+
+	sctx->count = ictx->count;
+	memcpy(sctx->state, ictx->state, sizeof(ictx->state));
+	memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf));
+	sctx->func = CPACF_KIMD_SHA_256;
+	return 0;
+}
+
+static struct shash_alg sha256_alg = {
+	.digestsize	=	SHA256_DIGEST_SIZE,
+	.init		=	s390_sha256_init,
+	.update		=	s390_sha_update,
+	.final		=	s390_sha_final,
+	.export		=	sha256_export,
+	.import		=	sha256_import,
+	.descsize	=	sizeof(struct s390_sha_ctx),
+	.statesize	=	sizeof(struct sha256_state),
+	.base		=	{
+		.cra_name	=	"sha256",
+		.cra_driver_name=	"sha256-s390",
+		.cra_priority	=	300,
+		.cra_blocksize	=	SHA256_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+};
+
+static int s390_sha224_init(struct shash_desc *desc)
+{
+	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+
+	sctx->state[0] = SHA224_H0;
+	sctx->state[1] = SHA224_H1;
+	sctx->state[2] = SHA224_H2;
+	sctx->state[3] = SHA224_H3;
+	sctx->state[4] = SHA224_H4;
+	sctx->state[5] = SHA224_H5;
+	sctx->state[6] = SHA224_H6;
+	sctx->state[7] = SHA224_H7;
+	sctx->count = 0;
+	sctx->func = CPACF_KIMD_SHA_256;
+
+	return 0;
+}
+
+static struct shash_alg sha224_alg = {
+	.digestsize	=	SHA224_DIGEST_SIZE,
+	.init		=	s390_sha224_init,
+	.update		=	s390_sha_update,
+	.final		=	s390_sha_final,
+	.export		=	sha256_export,
+	.import		=	sha256_import,
+	.descsize	=	sizeof(struct s390_sha_ctx),
+	.statesize	=	sizeof(struct sha256_state),
+	.base		=	{
+		.cra_name	=	"sha224",
+		.cra_driver_name=	"sha224-s390",
+		.cra_priority	=	300,
+		.cra_blocksize	=	SHA224_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+};
+
+static int __init sha256_s390_init(void)
+{
+	int ret;
+
+	if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_256))
+		return -ENODEV;
+	ret = crypto_register_shash(&sha256_alg);
+	if (ret < 0)
+		goto out;
+	ret = crypto_register_shash(&sha224_alg);
+	if (ret < 0)
+		crypto_unregister_shash(&sha256_alg);
+out:
+	return ret;
+}
+
+static void __exit sha256_s390_fini(void)
+{
+	crypto_unregister_shash(&sha224_alg);
+	crypto_unregister_shash(&sha256_alg);
+}
+
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, sha256_s390_init);
+module_exit(sha256_s390_fini);
+
+MODULE_ALIAS_CRYPTO("sha256");
+MODULE_ALIAS_CRYPTO("sha224");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA256 and SHA224 Secure Hash Algorithm");
diff --git a/arch/s390/crypto/sha3_256_s390.c b/arch/s390/crypto/sha3_256_s390.c
new file mode 100644
index 0000000000..e1350e033a
--- /dev/null
+++ b/arch/s390/crypto/sha3_256_s390.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Cryptographic API.
+ *
+ * s390 implementation of the SHA256 and SHA224 Secure Hash Algorithm.
+ *
+ * s390 Version:
+ *   Copyright IBM Corp. 2019
+ *   Author(s): Joerg Schmidbauer (jschmidb@de.ibm.com)
+ */
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+#include <crypto/sha3.h>
+#include <asm/cpacf.h>
+
+#include "sha.h"
+
+static int sha3_256_init(struct shash_desc *desc)
+{
+	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+
+	memset(sctx->state, 0, sizeof(sctx->state));
+	sctx->count = 0;
+	sctx->func = CPACF_KIMD_SHA3_256;
+
+	return 0;
+}
+
+static int sha3_256_export(struct shash_desc *desc, void *out)
+{
+	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+	struct sha3_state *octx = out;
+
+	octx->rsiz = sctx->count;
+	memcpy(octx->st, sctx->state, sizeof(octx->st));
+	memcpy(octx->buf, sctx->buf, sizeof(octx->buf));
+
+	return 0;
+}
+
+static int sha3_256_import(struct shash_desc *desc, const void *in)
+{
+	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+	const struct sha3_state *ictx = in;
+
+	sctx->count = ictx->rsiz;
+	memcpy(sctx->state, ictx->st, sizeof(ictx->st));
+	memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf));
+	sctx->func = CPACF_KIMD_SHA3_256;
+
+	return 0;
+}
+
+static int sha3_224_import(struct shash_desc *desc, const void *in)
+{
+	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+	const struct sha3_state *ictx = in;
+
+	sctx->count = ictx->rsiz;
+	memcpy(sctx->state, ictx->st, sizeof(ictx->st));
+	memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf));
+	sctx->func = CPACF_KIMD_SHA3_224;
+
+	return 0;
+}
+
+static struct shash_alg sha3_256_alg = {
+	.digestsize	=	SHA3_256_DIGEST_SIZE,	   /* = 32 */
+	.init		=	sha3_256_init,
+	.update		=	s390_sha_update,
+	.final		=	s390_sha_final,
+	.export		=	sha3_256_export,
+	.import		=	sha3_256_import,
+	.descsize	=	sizeof(struct s390_sha_ctx),
+	.statesize	=	sizeof(struct sha3_state),
+	.base		=	{
+		.cra_name	 =	"sha3-256",
+		.cra_driver_name =	"sha3-256-s390",
+		.cra_priority	 =	300,
+		.cra_blocksize	 =	SHA3_256_BLOCK_SIZE,
+		.cra_module	 =	THIS_MODULE,
+	}
+};
+
+static int sha3_224_init(struct shash_desc *desc)
+{
+	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+
+	memset(sctx->state, 0, sizeof(sctx->state));
+	sctx->count = 0;
+	sctx->func = CPACF_KIMD_SHA3_224;
+
+	return 0;
+}
+
+static struct shash_alg sha3_224_alg = {
+	.digestsize	=	SHA3_224_DIGEST_SIZE,
+	.init		=	sha3_224_init,
+	.update		=	s390_sha_update,
+	.final		=	s390_sha_final,
+	.export		=	sha3_256_export, /* same as for 256 */
+	.import		=	sha3_224_import, /* function code different! */
+	.descsize	=	sizeof(struct s390_sha_ctx),
+	.statesize	=	sizeof(struct sha3_state),
+	.base		=	{
+		.cra_name	 =	"sha3-224",
+		.cra_driver_name =	"sha3-224-s390",
+		.cra_priority	 =	300,
+		.cra_blocksize	 =	SHA3_224_BLOCK_SIZE,
+		.cra_module	 =	THIS_MODULE,
+	}
+};
+
+static int __init sha3_256_s390_init(void)
+{
+	int ret;
+
+	if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA3_256))
+		return -ENODEV;
+
+	ret = crypto_register_shash(&sha3_256_alg);
+	if (ret < 0)
+		goto out;
+
+	ret = crypto_register_shash(&sha3_224_alg);
+	if (ret < 0)
+		crypto_unregister_shash(&sha3_256_alg);
+out:
+	return ret;
+}
+
+static void __exit sha3_256_s390_fini(void)
+{
+	crypto_unregister_shash(&sha3_224_alg);
+	crypto_unregister_shash(&sha3_256_alg);
+}
+
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, sha3_256_s390_init);
+module_exit(sha3_256_s390_fini);
+
+MODULE_ALIAS_CRYPTO("sha3-256");
+MODULE_ALIAS_CRYPTO("sha3-224");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA3-256 and SHA3-224 Secure Hash Algorithm");
diff --git a/arch/s390/crypto/sha3_512_s390.c b/arch/s390/crypto/sha3_512_s390.c
new file mode 100644
index 0000000000..06c142ed9b
--- /dev/null
+++ b/arch/s390/crypto/sha3_512_s390.c
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Cryptographic API.
+ *
+ * s390 implementation of the SHA512 and SHA384 Secure Hash Algorithm.
+ *
+ * Copyright IBM Corp. 2019
+ * Author(s): Joerg Schmidbauer (jschmidb@de.ibm.com)
+ */
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+#include <crypto/sha3.h>
+#include <asm/cpacf.h>
+
+#include "sha.h"
+
+static int sha3_512_init(struct shash_desc *desc)
+{
+	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+
+	memset(sctx->state, 0, sizeof(sctx->state));
+	sctx->count = 0;
+	sctx->func = CPACF_KIMD_SHA3_512;
+
+	return 0;
+}
+
+static int sha3_512_export(struct shash_desc *desc, void *out)
+{
+	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+	struct sha3_state *octx = out;
+
+	octx->rsiz = sctx->count;
+	octx->rsizw = sctx->count >> 32;
+
+	memcpy(octx->st, sctx->state, sizeof(octx->st));
+	memcpy(octx->buf, sctx->buf, sizeof(octx->buf));
+
+	return 0;
+}
+
+static int sha3_512_import(struct shash_desc *desc, const void *in)
+{
+	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+	const struct sha3_state *ictx = in;
+
+	if (unlikely(ictx->rsizw))
+		return -ERANGE;
+	sctx->count = ictx->rsiz;
+
+	memcpy(sctx->state, ictx->st, sizeof(ictx->st));
+	memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf));
+	sctx->func = CPACF_KIMD_SHA3_512;
+
+	return 0;
+}
+
+static int sha3_384_import(struct shash_desc *desc, const void *in)
+{
+	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+	const struct sha3_state *ictx = in;
+
+	if (unlikely(ictx->rsizw))
+		return -ERANGE;
+	sctx->count = ictx->rsiz;
+
+	memcpy(sctx->state, ictx->st, sizeof(ictx->st));
+	memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf));
+	sctx->func = CPACF_KIMD_SHA3_384;
+
+	return 0;
+}
+
+static struct shash_alg sha3_512_alg = {
+	.digestsize	=	SHA3_512_DIGEST_SIZE,
+	.init		=	sha3_512_init,
+	.update		=	s390_sha_update,
+	.final		=	s390_sha_final,
+	.export		=	sha3_512_export,
+	.import		=	sha3_512_import,
+	.descsize	=	sizeof(struct s390_sha_ctx),
+	.statesize	=	sizeof(struct sha3_state),
+	.base		=	{
+		.cra_name	 =	"sha3-512",
+		.cra_driver_name =	"sha3-512-s390",
+		.cra_priority	 =	300,
+		.cra_blocksize	 =	SHA3_512_BLOCK_SIZE,
+		.cra_module	 =	THIS_MODULE,
+	}
+};
+
+MODULE_ALIAS_CRYPTO("sha3-512");
+
+static int sha3_384_init(struct shash_desc *desc)
+{
+	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+
+	memset(sctx->state, 0, sizeof(sctx->state));
+	sctx->count = 0;
+	sctx->func = CPACF_KIMD_SHA3_384;
+
+	return 0;
+}
+
+static struct shash_alg sha3_384_alg = {
+	.digestsize	=	SHA3_384_DIGEST_SIZE,
+	.init		=	sha3_384_init,
+	.update		=	s390_sha_update,
+	.final		=	s390_sha_final,
+	.export		=	sha3_512_export, /* same as for 512 */
+	.import		=	sha3_384_import, /* function code different! */
+	.descsize	=	sizeof(struct s390_sha_ctx),
+	.statesize	=	sizeof(struct sha3_state),
+	.base		=	{
+		.cra_name	 =	"sha3-384",
+		.cra_driver_name =	"sha3-384-s390",
+		.cra_priority	 =	300,
+		.cra_blocksize	 =	SHA3_384_BLOCK_SIZE,
+		.cra_ctxsize	 =	sizeof(struct s390_sha_ctx),
+		.cra_module	 =	THIS_MODULE,
+	}
+};
+
+MODULE_ALIAS_CRYPTO("sha3-384");
+
+static int __init init(void)
+{
+	int ret;
+
+	if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA3_512))
+		return -ENODEV;
+	ret = crypto_register_shash(&sha3_512_alg);
+	if (ret < 0)
+		goto out;
+	ret = crypto_register_shash(&sha3_384_alg);
+	if (ret < 0)
+		crypto_unregister_shash(&sha3_512_alg);
+out:
+	return ret;
+}
+
+static void __exit fini(void)
+{
+	crypto_unregister_shash(&sha3_512_alg);
+	crypto_unregister_shash(&sha3_384_alg);
+}
+
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, init);
+module_exit(fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA3-512 and SHA3-384 Secure Hash Algorithm");
diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c
new file mode 100644
index 0000000000..04f11c4077
--- /dev/null
+++ b/arch/s390/crypto/sha512_s390.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Cryptographic API.
+ *
+ * s390 implementation of the SHA512 and SHA38 Secure Hash Algorithm.
+ *
+ * Copyright IBM Corp. 2007
+ * Author(s): Jan Glauber (jang@de.ibm.com)
+ */
+#include <crypto/internal/hash.h>
+#include <crypto/sha2.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+#include <asm/cpacf.h>
+
+#include "sha.h"
+
+static int sha512_init(struct shash_desc *desc)
+{
+	struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
+
+	*(__u64 *)&ctx->state[0] = SHA512_H0;
+	*(__u64 *)&ctx->state[2] = SHA512_H1;
+	*(__u64 *)&ctx->state[4] = SHA512_H2;
+	*(__u64 *)&ctx->state[6] = SHA512_H3;
+	*(__u64 *)&ctx->state[8] = SHA512_H4;
+	*(__u64 *)&ctx->state[10] = SHA512_H5;
+	*(__u64 *)&ctx->state[12] = SHA512_H6;
+	*(__u64 *)&ctx->state[14] = SHA512_H7;
+	ctx->count = 0;
+	ctx->func = CPACF_KIMD_SHA_512;
+
+	return 0;
+}
+
+static int sha512_export(struct shash_desc *desc, void *out)
+{
+	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+	struct sha512_state *octx = out;
+
+	octx->count[0] = sctx->count;
+	octx->count[1] = 0;
+	memcpy(octx->state, sctx->state, sizeof(octx->state));
+	memcpy(octx->buf, sctx->buf, sizeof(octx->buf));
+	return 0;
+}
+
+static int sha512_import(struct shash_desc *desc, const void *in)
+{
+	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+	const struct sha512_state *ictx = in;
+
+	if (unlikely(ictx->count[1]))
+		return -ERANGE;
+	sctx->count = ictx->count[0];
+
+	memcpy(sctx->state, ictx->state, sizeof(ictx->state));
+	memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf));
+	sctx->func = CPACF_KIMD_SHA_512;
+	return 0;
+}
+
+static struct shash_alg sha512_alg = {
+	.digestsize	=	SHA512_DIGEST_SIZE,
+	.init		=	sha512_init,
+	.update		=	s390_sha_update,
+	.final		=	s390_sha_final,
+	.export		=	sha512_export,
+	.import		=	sha512_import,
+	.descsize	=	sizeof(struct s390_sha_ctx),
+	.statesize	=	sizeof(struct sha512_state),
+	.base		=	{
+		.cra_name	=	"sha512",
+		.cra_driver_name=	"sha512-s390",
+		.cra_priority	=	300,
+		.cra_blocksize	=	SHA512_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+};
+
+MODULE_ALIAS_CRYPTO("sha512");
+
+static int sha384_init(struct shash_desc *desc)
+{
+	struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
+
+	*(__u64 *)&ctx->state[0] = SHA384_H0;
+	*(__u64 *)&ctx->state[2] = SHA384_H1;
+	*(__u64 *)&ctx->state[4] = SHA384_H2;
+	*(__u64 *)&ctx->state[6] = SHA384_H3;
+	*(__u64 *)&ctx->state[8] = SHA384_H4;
+	*(__u64 *)&ctx->state[10] = SHA384_H5;
+	*(__u64 *)&ctx->state[12] = SHA384_H6;
+	*(__u64 *)&ctx->state[14] = SHA384_H7;
+	ctx->count = 0;
+	ctx->func = CPACF_KIMD_SHA_512;
+
+	return 0;
+}
+
+static struct shash_alg sha384_alg = {
+	.digestsize	=	SHA384_DIGEST_SIZE,
+	.init		=	sha384_init,
+	.update		=	s390_sha_update,
+	.final		=	s390_sha_final,
+	.export		=	sha512_export,
+	.import		=	sha512_import,
+	.descsize	=	sizeof(struct s390_sha_ctx),
+	.statesize	=	sizeof(struct sha512_state),
+	.base		=	{
+		.cra_name	=	"sha384",
+		.cra_driver_name=	"sha384-s390",
+		.cra_priority	=	300,
+		.cra_blocksize	=	SHA384_BLOCK_SIZE,
+		.cra_ctxsize	=	sizeof(struct s390_sha_ctx),
+		.cra_module	=	THIS_MODULE,
+	}
+};
+
+MODULE_ALIAS_CRYPTO("sha384");
+
+static int __init init(void)
+{
+	int ret;
+
+	if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_512))
+		return -ENODEV;
+	if ((ret = crypto_register_shash(&sha512_alg)) < 0)
+		goto out;
+	if ((ret = crypto_register_shash(&sha384_alg)) < 0)
+		crypto_unregister_shash(&sha512_alg);
+out:
+	return ret;
+}
+
+static void __exit fini(void)
+{
+	crypto_unregister_shash(&sha512_alg);
+	crypto_unregister_shash(&sha384_alg);
+}
+
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, init);
+module_exit(fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA512 and SHA-384 Secure Hash Algorithm");
diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c
new file mode 100644
index 0000000000..686fe7aa19
--- /dev/null
+++ b/arch/s390/crypto/sha_common.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Cryptographic API.
+ *
+ * s390 generic implementation of the SHA Secure Hash Algorithms.
+ *
+ * Copyright IBM Corp. 2007
+ * Author(s): Jan Glauber (jang@de.ibm.com)
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/module.h>
+#include <asm/cpacf.h>
+#include "sha.h"
+
+int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len)
+{
+	struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
+	unsigned int bsize = crypto_shash_blocksize(desc->tfm);
+	unsigned int index, n;
+
+	/* how much is already in the buffer? */
+	index = ctx->count % bsize;
+	ctx->count += len;
+
+	if ((index + len) < bsize)
+		goto store;
+
+	/* process one stored block */
+	if (index) {
+		memcpy(ctx->buf + index, data, bsize - index);
+		cpacf_kimd(ctx->func, ctx->state, ctx->buf, bsize);
+		data += bsize - index;
+		len -= bsize - index;
+		index = 0;
+	}
+
+	/* process as many blocks as possible */
+	if (len >= bsize) {
+		n = (len / bsize) * bsize;
+		cpacf_kimd(ctx->func, ctx->state, data, n);
+		data += n;
+		len -= n;
+	}
+store:
+	if (len)
+		memcpy(ctx->buf + index , data, len);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(s390_sha_update);
+
+static int s390_crypto_shash_parmsize(int func)
+{
+	switch (func) {
+	case CPACF_KLMD_SHA_1:
+		return 20;
+	case CPACF_KLMD_SHA_256:
+		return 32;
+	case CPACF_KLMD_SHA_512:
+		return 64;
+	case CPACF_KLMD_SHA3_224:
+	case CPACF_KLMD_SHA3_256:
+	case CPACF_KLMD_SHA3_384:
+	case CPACF_KLMD_SHA3_512:
+		return 200;
+	default:
+		return -EINVAL;
+	}
+}
+
+int s390_sha_final(struct shash_desc *desc, u8 *out)
+{
+	struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
+	unsigned int bsize = crypto_shash_blocksize(desc->tfm);
+	u64 bits;
+	unsigned int n;
+	int mbl_offset;
+
+	n = ctx->count % bsize;
+	bits = ctx->count * 8;
+	mbl_offset = s390_crypto_shash_parmsize(ctx->func);
+	if (mbl_offset < 0)
+		return -EINVAL;
+
+	mbl_offset = mbl_offset / sizeof(u32);
+
+	/* set total msg bit length (mbl) in CPACF parmblock */
+	switch (ctx->func) {
+	case CPACF_KLMD_SHA_1:
+	case CPACF_KLMD_SHA_256:
+		memcpy(ctx->state + mbl_offset, &bits, sizeof(bits));
+		break;
+	case CPACF_KLMD_SHA_512:
+		/*
+		 * the SHA512 parmblock has a 128-bit mbl field, clear
+		 * high-order u64 field, copy bits to low-order u64 field
+		 */
+		memset(ctx->state + mbl_offset, 0x00, sizeof(bits));
+		mbl_offset += sizeof(u64) / sizeof(u32);
+		memcpy(ctx->state + mbl_offset, &bits, sizeof(bits));
+		break;
+	case CPACF_KLMD_SHA3_224:
+	case CPACF_KLMD_SHA3_256:
+	case CPACF_KLMD_SHA3_384:
+	case CPACF_KLMD_SHA3_512:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	cpacf_klmd(ctx->func, ctx->state, ctx->buf, n);
+
+	/* copy digest to out */
+	memcpy(out, ctx->state, crypto_shash_digestsize(desc->tfm));
+	/* wipe context */
+	memset(ctx, 0, sizeof *ctx);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(s390_sha_final);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("s390 SHA cipher common functions");
diff --git a/arch/s390/hypfs/Makefile b/arch/s390/hypfs/Makefile
new file mode 100644
index 0000000000..c34854d298
--- /dev/null
+++ b/arch/s390/hypfs/Makefile
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the linux hypfs filesystem routines.
+#
+
+obj-$(CONFIG_S390_HYPFS)	+= hypfs_dbfs.o
+obj-$(CONFIG_S390_HYPFS)	+= hypfs_diag.o
+obj-$(CONFIG_S390_HYPFS)	+= hypfs_diag0c.o
+obj-$(CONFIG_S390_HYPFS)	+= hypfs_sprp.o
+obj-$(CONFIG_S390_HYPFS)	+= hypfs_vm.o
+
+obj-$(CONFIG_S390_HYPFS_FS)	+= hypfs_diag_fs.o
+obj-$(CONFIG_S390_HYPFS_FS)	+= hypfs_vm_fs.o
+obj-$(CONFIG_S390_HYPFS_FS)	+= inode.o
diff --git a/arch/s390/hypfs/hypfs.h b/arch/s390/hypfs/hypfs.h
new file mode 100644
index 0000000000..65f4036fd5
--- /dev/null
+++ b/arch/s390/hypfs/hypfs.h
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *    Hypervisor filesystem for Linux on s390.
+ *
+ *    Copyright IBM Corp. 2006
+ *    Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#ifndef _HYPFS_H_
+#define _HYPFS_H_
+
+#include <linux/fs.h>
+#include <linux/types.h>
+#include <linux/debugfs.h>
+#include <linux/workqueue.h>
+#include <linux/kref.h>
+#include <asm/hypfs.h>
+
+#define REG_FILE_MODE    0440
+#define UPDATE_FILE_MODE 0220
+#define DIR_MODE         0550
+
+extern struct dentry *hypfs_mkdir(struct dentry *parent, const char *name);
+
+extern struct dentry *hypfs_create_u64(struct dentry *dir, const char *name,
+				       __u64 value);
+
+extern struct dentry *hypfs_create_str(struct dentry *dir, const char *name,
+				       char *string);
+
+/* LPAR Hypervisor */
+extern int hypfs_diag_init(void);
+extern void hypfs_diag_exit(void);
+extern int hypfs_diag_create_files(struct dentry *root);
+
+/* VM Hypervisor */
+extern int hypfs_vm_init(void);
+extern void hypfs_vm_exit(void);
+extern int hypfs_vm_create_files(struct dentry *root);
+
+/* VM diagnose 0c */
+int hypfs_diag0c_init(void);
+void hypfs_diag0c_exit(void);
+
+/* Set Partition-Resource Parameter */
+void hypfs_sprp_init(void);
+void hypfs_sprp_exit(void);
+
+int __hypfs_fs_init(void);
+
+static inline int hypfs_fs_init(void)
+{
+	if (IS_ENABLED(CONFIG_S390_HYPFS_FS))
+		return __hypfs_fs_init();
+	return 0;
+}
+
+/* debugfs interface */
+struct hypfs_dbfs_file;
+
+struct hypfs_dbfs_data {
+	void			*buf;
+	void			*buf_free_ptr;
+	size_t			size;
+	struct hypfs_dbfs_file	*dbfs_file;
+};
+
+struct hypfs_dbfs_file {
+	const char	*name;
+	int		(*data_create)(void **data, void **data_free_ptr,
+				       size_t *size);
+	void		(*data_free)(const void *buf_free_ptr);
+	long		(*unlocked_ioctl) (struct file *, unsigned int,
+					   unsigned long);
+
+	/* Private data for hypfs_dbfs.c */
+	struct mutex		lock;
+	struct dentry		*dentry;
+};
+
+extern void hypfs_dbfs_exit(void);
+extern void hypfs_dbfs_create_file(struct hypfs_dbfs_file *df);
+extern void hypfs_dbfs_remove_file(struct hypfs_dbfs_file *df);
+
+#endif /* _HYPFS_H_ */
diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c
new file mode 100644
index 0000000000..4024599eb4
--- /dev/null
+++ b/arch/s390/hypfs/hypfs_dbfs.c
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hypervisor filesystem for Linux on s390 - debugfs interface
+ *
+ * Copyright IBM Corp. 2010
+ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#include <linux/slab.h>
+#include "hypfs.h"
+
+static struct dentry *dbfs_dir;
+
+static struct hypfs_dbfs_data *hypfs_dbfs_data_alloc(struct hypfs_dbfs_file *f)
+{
+	struct hypfs_dbfs_data *data;
+
+	data = kmalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return NULL;
+	data->dbfs_file = f;
+	return data;
+}
+
+static void hypfs_dbfs_data_free(struct hypfs_dbfs_data *data)
+{
+	data->dbfs_file->data_free(data->buf_free_ptr);
+	kfree(data);
+}
+
+static ssize_t dbfs_read(struct file *file, char __user *buf,
+			 size_t size, loff_t *ppos)
+{
+	struct hypfs_dbfs_data *data;
+	struct hypfs_dbfs_file *df;
+	ssize_t rc;
+
+	if (*ppos != 0)
+		return 0;
+
+	df = file_inode(file)->i_private;
+	mutex_lock(&df->lock);
+	data = hypfs_dbfs_data_alloc(df);
+	if (!data) {
+		mutex_unlock(&df->lock);
+		return -ENOMEM;
+	}
+	rc = df->data_create(&data->buf, &data->buf_free_ptr, &data->size);
+	if (rc) {
+		mutex_unlock(&df->lock);
+		kfree(data);
+		return rc;
+	}
+	mutex_unlock(&df->lock);
+
+	rc = simple_read_from_buffer(buf, size, ppos, data->buf, data->size);
+	hypfs_dbfs_data_free(data);
+	return rc;
+}
+
+static long dbfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct hypfs_dbfs_file *df = file_inode(file)->i_private;
+	long rc;
+
+	mutex_lock(&df->lock);
+	if (df->unlocked_ioctl)
+		rc = df->unlocked_ioctl(file, cmd, arg);
+	else
+		rc = -ENOTTY;
+	mutex_unlock(&df->lock);
+	return rc;
+}
+
+static const struct file_operations dbfs_ops = {
+	.read		= dbfs_read,
+	.llseek		= no_llseek,
+	.unlocked_ioctl = dbfs_ioctl,
+};
+
+void hypfs_dbfs_create_file(struct hypfs_dbfs_file *df)
+{
+	df->dentry = debugfs_create_file(df->name, 0400, dbfs_dir, df,
+					 &dbfs_ops);
+	mutex_init(&df->lock);
+}
+
+void hypfs_dbfs_remove_file(struct hypfs_dbfs_file *df)
+{
+	debugfs_remove(df->dentry);
+}
+
+static int __init hypfs_dbfs_init(void)
+{
+	int rc = -ENODATA;
+
+	dbfs_dir = debugfs_create_dir("s390_hypfs", NULL);
+	if (hypfs_diag_init())
+		goto fail_dbfs_exit;
+	if (hypfs_vm_init())
+		goto fail_hypfs_diag_exit;
+	hypfs_sprp_init();
+	if (hypfs_diag0c_init())
+		goto fail_hypfs_sprp_exit;
+	rc = hypfs_fs_init();
+	if (rc)
+		goto fail_hypfs_diag0c_exit;
+	return 0;
+
+fail_hypfs_diag0c_exit:
+	hypfs_diag0c_exit();
+fail_hypfs_sprp_exit:
+	hypfs_sprp_exit();
+	hypfs_vm_exit();
+fail_hypfs_diag_exit:
+	hypfs_diag_exit();
+	pr_err("Initialization of hypfs failed with rc=%i\n", rc);
+fail_dbfs_exit:
+	debugfs_remove(dbfs_dir);
+	return rc;
+}
+device_initcall(hypfs_dbfs_init)
diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c
new file mode 100644
index 0000000000..279b7bba4d
--- /dev/null
+++ b/arch/s390/hypfs/hypfs_diag.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    Hypervisor filesystem for Linux on s390. Diag 204 and 224
+ *    implementation.
+ *
+ *    Copyright IBM Corp. 2006, 2008
+ *    Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#define KMSG_COMPONENT "hypfs"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <asm/diag.h>
+#include <asm/ebcdic.h>
+#include "hypfs_diag.h"
+#include "hypfs.h"
+
+#define DBFS_D204_HDR_VERSION	0
+
+static enum diag204_sc diag204_store_sc;	/* used subcode for store */
+static enum diag204_format diag204_info_type;	/* used diag 204 data format */
+
+static void *diag204_buf;		/* 4K aligned buffer for diag204 data */
+static int diag204_buf_pages;		/* number of pages for diag204 data */
+
+static struct dentry *dbfs_d204_file;
+
+enum diag204_format diag204_get_info_type(void)
+{
+	return diag204_info_type;
+}
+
+static void diag204_set_info_type(enum diag204_format type)
+{
+	diag204_info_type = type;
+}
+
+/* Diagnose 204 functions */
+/*
+ * For the old diag subcode 4 with simple data format we have to use real
+ * memory. If we use subcode 6 or 7 with extended data format, we can (and
+ * should) use vmalloc, since we need a lot of memory in that case. Currently
+ * up to 93 pages!
+ */
+
+static void diag204_free_buffer(void)
+{
+	vfree(diag204_buf);
+	diag204_buf = NULL;
+}
+
+void *diag204_get_buffer(enum diag204_format fmt, int *pages)
+{
+	if (diag204_buf) {
+		*pages = diag204_buf_pages;
+		return diag204_buf;
+	}
+	if (fmt == DIAG204_INFO_SIMPLE) {
+		*pages = 1;
+	} else {/* DIAG204_INFO_EXT */
+		*pages = diag204((unsigned long)DIAG204_SUBC_RSI |
+				 (unsigned long)DIAG204_INFO_EXT, 0, NULL);
+		if (*pages <= 0)
+			return ERR_PTR(-EOPNOTSUPP);
+	}
+	diag204_buf = __vmalloc_node(array_size(*pages, PAGE_SIZE),
+				     PAGE_SIZE, GFP_KERNEL, NUMA_NO_NODE,
+				     __builtin_return_address(0));
+	if (!diag204_buf)
+		return ERR_PTR(-ENOMEM);
+	diag204_buf_pages = *pages;
+	return diag204_buf;
+}
+
+/*
+ * diag204_probe() has to find out, which type of diagnose 204 implementation
+ * we have on our machine. Currently there are three possible scanarios:
+ *   - subcode 4   + simple data format (only one page)
+ *   - subcode 4-6 + extended data format
+ *   - subcode 4-7 + extended data format
+ *
+ * Subcode 5 is used to retrieve the size of the data, provided by subcodes
+ * 6 and 7. Subcode 7 basically has the same function as subcode 6. In addition
+ * to subcode 6 it provides also information about secondary cpus.
+ * In order to get as much information as possible, we first try
+ * subcode 7, then 6 and if both fail, we use subcode 4.
+ */
+
+static int diag204_probe(void)
+{
+	void *buf;
+	int pages, rc;
+
+	buf = diag204_get_buffer(DIAG204_INFO_EXT, &pages);
+	if (!IS_ERR(buf)) {
+		if (diag204((unsigned long)DIAG204_SUBC_STIB7 |
+			    (unsigned long)DIAG204_INFO_EXT, pages, buf) >= 0) {
+			diag204_store_sc = DIAG204_SUBC_STIB7;
+			diag204_set_info_type(DIAG204_INFO_EXT);
+			goto out;
+		}
+		if (diag204((unsigned long)DIAG204_SUBC_STIB6 |
+			    (unsigned long)DIAG204_INFO_EXT, pages, buf) >= 0) {
+			diag204_store_sc = DIAG204_SUBC_STIB6;
+			diag204_set_info_type(DIAG204_INFO_EXT);
+			goto out;
+		}
+		diag204_free_buffer();
+	}
+
+	/* subcodes 6 and 7 failed, now try subcode 4 */
+
+	buf = diag204_get_buffer(DIAG204_INFO_SIMPLE, &pages);
+	if (IS_ERR(buf)) {
+		rc = PTR_ERR(buf);
+		goto fail_alloc;
+	}
+	if (diag204((unsigned long)DIAG204_SUBC_STIB4 |
+		    (unsigned long)DIAG204_INFO_SIMPLE, pages, buf) >= 0) {
+		diag204_store_sc = DIAG204_SUBC_STIB4;
+		diag204_set_info_type(DIAG204_INFO_SIMPLE);
+		goto out;
+	} else {
+		rc = -EOPNOTSUPP;
+		goto fail_store;
+	}
+out:
+	rc = 0;
+fail_store:
+	diag204_free_buffer();
+fail_alloc:
+	return rc;
+}
+
+int diag204_store(void *buf, int pages)
+{
+	int rc;
+
+	rc = diag204((unsigned long)diag204_store_sc |
+		     (unsigned long)diag204_get_info_type(), pages, buf);
+	return rc < 0 ? -EOPNOTSUPP : 0;
+}
+
+struct dbfs_d204_hdr {
+	u64	len;		/* Length of d204 buffer without header */
+	u16	version;	/* Version of header */
+	u8	sc;		/* Used subcode */
+	char	reserved[53];
+} __attribute__ ((packed));
+
+struct dbfs_d204 {
+	struct dbfs_d204_hdr	hdr;	/* 64 byte header */
+	char			buf[];	/* d204 buffer */
+} __attribute__ ((packed));
+
+static int dbfs_d204_create(void **data, void **data_free_ptr, size_t *size)
+{
+	struct dbfs_d204 *d204;
+	int rc, buf_size;
+	void *base;
+
+	buf_size = PAGE_SIZE * (diag204_buf_pages + 1) + sizeof(d204->hdr);
+	base = vzalloc(buf_size);
+	if (!base)
+		return -ENOMEM;
+	d204 = PTR_ALIGN(base + sizeof(d204->hdr), PAGE_SIZE) - sizeof(d204->hdr);
+	rc = diag204_store(d204->buf, diag204_buf_pages);
+	if (rc) {
+		vfree(base);
+		return rc;
+	}
+	d204->hdr.version = DBFS_D204_HDR_VERSION;
+	d204->hdr.len = PAGE_SIZE * diag204_buf_pages;
+	d204->hdr.sc = diag204_store_sc;
+	*data = d204;
+	*data_free_ptr = base;
+	*size = d204->hdr.len + sizeof(struct dbfs_d204_hdr);
+	return 0;
+}
+
+static struct hypfs_dbfs_file dbfs_file_d204 = {
+	.name		= "diag_204",
+	.data_create	= dbfs_d204_create,
+	.data_free	= vfree,
+};
+
+__init int hypfs_diag_init(void)
+{
+	int rc;
+
+	if (diag204_probe()) {
+		pr_info("The hardware system does not support hypfs\n");
+		return -ENODATA;
+	}
+
+	if (diag204_get_info_type() == DIAG204_INFO_EXT)
+		hypfs_dbfs_create_file(&dbfs_file_d204);
+
+	rc = hypfs_diag_fs_init();
+	if (rc) {
+		pr_err("The hardware system does not provide all functions required by hypfs\n");
+		debugfs_remove(dbfs_d204_file);
+	}
+	return rc;
+}
+
+void hypfs_diag_exit(void)
+{
+	debugfs_remove(dbfs_d204_file);
+	hypfs_diag_fs_exit();
+	diag204_free_buffer();
+	hypfs_dbfs_remove_file(&dbfs_file_d204);
+}
diff --git a/arch/s390/hypfs/hypfs_diag.h b/arch/s390/hypfs/hypfs_diag.h
new file mode 100644
index 0000000000..7090eff27f
--- /dev/null
+++ b/arch/s390/hypfs/hypfs_diag.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *    Hypervisor filesystem for Linux on s390. Diag 204 and 224
+ *    implementation.
+ *
+ *    Copyright IBM Corp. 2006, 2008
+ *    Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#ifndef _S390_HYPFS_DIAG_H_
+#define _S390_HYPFS_DIAG_H_
+
+#include <asm/diag.h>
+
+enum diag204_format diag204_get_info_type(void);
+void *diag204_get_buffer(enum diag204_format fmt, int *pages);
+int diag204_store(void *buf, int pages);
+
+int __hypfs_diag_fs_init(void);
+void __hypfs_diag_fs_exit(void);
+
+static inline int hypfs_diag_fs_init(void)
+{
+	if (IS_ENABLED(CONFIG_S390_HYPFS_FS))
+		return __hypfs_diag_fs_init();
+	return 0;
+}
+
+static inline void hypfs_diag_fs_exit(void)
+{
+	if (IS_ENABLED(CONFIG_S390_HYPFS_FS))
+		__hypfs_diag_fs_exit();
+}
+
+#endif /* _S390_HYPFS_DIAG_H_ */
diff --git a/arch/s390/hypfs/hypfs_diag0c.c b/arch/s390/hypfs/hypfs_diag0c.c
new file mode 100644
index 0000000000..9a2786079e
--- /dev/null
+++ b/arch/s390/hypfs/hypfs_diag0c.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hypervisor filesystem for Linux on s390
+ *
+ * Diag 0C implementation
+ *
+ * Copyright IBM Corp. 2014
+ */
+
+#include <linux/slab.h>
+#include <linux/cpu.h>
+#include <asm/diag.h>
+#include <asm/hypfs.h>
+#include "hypfs.h"
+
+#define DBFS_D0C_HDR_VERSION 0
+
+/*
+ * Get hypfs_diag0c_entry from CPU vector and store diag0c data
+ */
+static void diag0c_fn(void *data)
+{
+	diag_stat_inc(DIAG_STAT_X00C);
+	diag_amode31_ops.diag0c(((void **)data)[smp_processor_id()]);
+}
+
+/*
+ * Allocate buffer and store diag 0c data
+ */
+static void *diag0c_store(unsigned int *count)
+{
+	struct hypfs_diag0c_data *diag0c_data;
+	unsigned int cpu_count, cpu, i;
+	void **cpu_vec;
+
+	cpus_read_lock();
+	cpu_count = num_online_cpus();
+	cpu_vec = kmalloc_array(num_possible_cpus(), sizeof(*cpu_vec),
+				GFP_KERNEL);
+	if (!cpu_vec)
+		goto fail_unlock_cpus;
+	/* Note: Diag 0c needs 8 byte alignment and real storage */
+	diag0c_data = kzalloc(struct_size(diag0c_data, entry, cpu_count),
+			      GFP_KERNEL | GFP_DMA);
+	if (!diag0c_data)
+		goto fail_kfree_cpu_vec;
+	i = 0;
+	/* Fill CPU vector for each online CPU */
+	for_each_online_cpu(cpu) {
+		diag0c_data->entry[i].cpu = cpu;
+		cpu_vec[cpu] = &diag0c_data->entry[i++];
+	}
+	/* Collect data all CPUs */
+	on_each_cpu(diag0c_fn, cpu_vec, 1);
+	*count = cpu_count;
+	kfree(cpu_vec);
+	cpus_read_unlock();
+	return diag0c_data;
+
+fail_kfree_cpu_vec:
+	kfree(cpu_vec);
+fail_unlock_cpus:
+	cpus_read_unlock();
+	return ERR_PTR(-ENOMEM);
+}
+
+/*
+ * Hypfs DBFS callback: Free diag 0c data
+ */
+static void dbfs_diag0c_free(const void *data)
+{
+	kfree(data);
+}
+
+/*
+ * Hypfs DBFS callback: Create diag 0c data
+ */
+static int dbfs_diag0c_create(void **data, void **data_free_ptr, size_t *size)
+{
+	struct hypfs_diag0c_data *diag0c_data;
+	unsigned int count;
+
+	diag0c_data = diag0c_store(&count);
+	if (IS_ERR(diag0c_data))
+		return PTR_ERR(diag0c_data);
+	memset(&diag0c_data->hdr, 0, sizeof(diag0c_data->hdr));
+	store_tod_clock_ext((union tod_clock *)diag0c_data->hdr.tod_ext);
+	diag0c_data->hdr.len = count * sizeof(struct hypfs_diag0c_entry);
+	diag0c_data->hdr.version = DBFS_D0C_HDR_VERSION;
+	diag0c_data->hdr.count = count;
+	*data = diag0c_data;
+	*data_free_ptr = diag0c_data;
+	*size = diag0c_data->hdr.len + sizeof(struct hypfs_diag0c_hdr);
+	return 0;
+}
+
+/*
+ * Hypfs DBFS file structure
+ */
+static struct hypfs_dbfs_file dbfs_file_0c = {
+	.name		= "diag_0c",
+	.data_create	= dbfs_diag0c_create,
+	.data_free	= dbfs_diag0c_free,
+};
+
+/*
+ * Initialize diag 0c interface for z/VM
+ */
+int __init hypfs_diag0c_init(void)
+{
+	if (!MACHINE_IS_VM)
+		return 0;
+	hypfs_dbfs_create_file(&dbfs_file_0c);
+	return 0;
+}
+
+/*
+ * Shutdown diag 0c interface for z/VM
+ */
+void hypfs_diag0c_exit(void)
+{
+	if (!MACHINE_IS_VM)
+		return;
+	hypfs_dbfs_remove_file(&dbfs_file_0c);
+}
diff --git a/arch/s390/hypfs/hypfs_diag_fs.c b/arch/s390/hypfs/hypfs_diag_fs.c
new file mode 100644
index 0000000000..00a6d370a2
--- /dev/null
+++ b/arch/s390/hypfs/hypfs_diag_fs.c
@@ -0,0 +1,393 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    Hypervisor filesystem for Linux on s390. Diag 204 and 224
+ *    implementation.
+ *
+ *    Copyright IBM Corp. 2006, 2008
+ *    Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#define KMSG_COMPONENT "hypfs"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <asm/diag.h>
+#include <asm/ebcdic.h>
+#include "hypfs_diag.h"
+#include "hypfs.h"
+
+#define TMP_SIZE 64		/* size of temporary buffers */
+
+static char *diag224_cpu_names;			/* diag 224 name table */
+static int diag224_idx2name(int index, char *name);
+
+/*
+ * DIAG 204 member access functions.
+ *
+ * Since we have two different diag 204 data formats for old and new s390
+ * machines, we do not access the structs directly, but use getter functions for
+ * each struct member instead. This should make the code more readable.
+ */
+
+/* Time information block */
+
+static inline int info_blk_hdr__size(enum diag204_format type)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return sizeof(struct diag204_info_blk_hdr);
+	else /* DIAG204_INFO_EXT */
+		return sizeof(struct diag204_x_info_blk_hdr);
+}
+
+static inline __u8 info_blk_hdr__npar(enum diag204_format type, void *hdr)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_info_blk_hdr *)hdr)->npar;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_info_blk_hdr *)hdr)->npar;
+}
+
+static inline __u8 info_blk_hdr__flags(enum diag204_format type, void *hdr)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_info_blk_hdr *)hdr)->flags;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_info_blk_hdr *)hdr)->flags;
+}
+
+/* Partition header */
+
+static inline int part_hdr__size(enum diag204_format type)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return sizeof(struct diag204_part_hdr);
+	else /* DIAG204_INFO_EXT */
+		return sizeof(struct diag204_x_part_hdr);
+}
+
+static inline __u8 part_hdr__rcpus(enum diag204_format type, void *hdr)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_part_hdr *)hdr)->cpus;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_part_hdr *)hdr)->rcpus;
+}
+
+static inline void part_hdr__part_name(enum diag204_format type, void *hdr,
+				       char *name)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		memcpy(name, ((struct diag204_part_hdr *)hdr)->part_name,
+		       DIAG204_LPAR_NAME_LEN);
+	else /* DIAG204_INFO_EXT */
+		memcpy(name, ((struct diag204_x_part_hdr *)hdr)->part_name,
+		       DIAG204_LPAR_NAME_LEN);
+	EBCASC(name, DIAG204_LPAR_NAME_LEN);
+	name[DIAG204_LPAR_NAME_LEN] = 0;
+	strim(name);
+}
+
+/* CPU info block */
+
+static inline int cpu_info__size(enum diag204_format type)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return sizeof(struct diag204_cpu_info);
+	else /* DIAG204_INFO_EXT */
+		return sizeof(struct diag204_x_cpu_info);
+}
+
+static inline __u8 cpu_info__ctidx(enum diag204_format type, void *hdr)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_cpu_info *)hdr)->ctidx;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_cpu_info *)hdr)->ctidx;
+}
+
+static inline __u16 cpu_info__cpu_addr(enum diag204_format type, void *hdr)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_cpu_info *)hdr)->cpu_addr;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_cpu_info *)hdr)->cpu_addr;
+}
+
+static inline __u64 cpu_info__acc_time(enum diag204_format type, void *hdr)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_cpu_info *)hdr)->acc_time;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_cpu_info *)hdr)->acc_time;
+}
+
+static inline __u64 cpu_info__lp_time(enum diag204_format type, void *hdr)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_cpu_info *)hdr)->lp_time;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_cpu_info *)hdr)->lp_time;
+}
+
+static inline __u64 cpu_info__online_time(enum diag204_format type, void *hdr)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return 0;	/* online_time not available in simple info */
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_cpu_info *)hdr)->online_time;
+}
+
+/* Physical header */
+
+static inline int phys_hdr__size(enum diag204_format type)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return sizeof(struct diag204_phys_hdr);
+	else /* DIAG204_INFO_EXT */
+		return sizeof(struct diag204_x_phys_hdr);
+}
+
+static inline __u8 phys_hdr__cpus(enum diag204_format type, void *hdr)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_phys_hdr *)hdr)->cpus;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_phys_hdr *)hdr)->cpus;
+}
+
+/* Physical CPU info block */
+
+static inline int phys_cpu__size(enum diag204_format type)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return sizeof(struct diag204_phys_cpu);
+	else /* DIAG204_INFO_EXT */
+		return sizeof(struct diag204_x_phys_cpu);
+}
+
+static inline __u16 phys_cpu__cpu_addr(enum diag204_format type, void *hdr)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_phys_cpu *)hdr)->cpu_addr;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_phys_cpu *)hdr)->cpu_addr;
+}
+
+static inline __u64 phys_cpu__mgm_time(enum diag204_format type, void *hdr)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_phys_cpu *)hdr)->mgm_time;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_phys_cpu *)hdr)->mgm_time;
+}
+
+static inline __u64 phys_cpu__ctidx(enum diag204_format type, void *hdr)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_phys_cpu *)hdr)->ctidx;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_phys_cpu *)hdr)->ctidx;
+}
+
+/*
+ * Functions to create the directory structure
+ * *******************************************
+ */
+
+static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info)
+{
+	struct dentry *cpu_dir;
+	char buffer[TMP_SIZE];
+	void *rc;
+
+	snprintf(buffer, TMP_SIZE, "%d", cpu_info__cpu_addr(diag204_get_info_type(),
+							    cpu_info));
+	cpu_dir = hypfs_mkdir(cpus_dir, buffer);
+	rc = hypfs_create_u64(cpu_dir, "mgmtime",
+			      cpu_info__acc_time(diag204_get_info_type(), cpu_info) -
+			      cpu_info__lp_time(diag204_get_info_type(), cpu_info));
+	if (IS_ERR(rc))
+		return PTR_ERR(rc);
+	rc = hypfs_create_u64(cpu_dir, "cputime",
+			      cpu_info__lp_time(diag204_get_info_type(), cpu_info));
+	if (IS_ERR(rc))
+		return PTR_ERR(rc);
+	if (diag204_get_info_type() == DIAG204_INFO_EXT) {
+		rc = hypfs_create_u64(cpu_dir, "onlinetime",
+				      cpu_info__online_time(diag204_get_info_type(),
+							    cpu_info));
+		if (IS_ERR(rc))
+			return PTR_ERR(rc);
+	}
+	diag224_idx2name(cpu_info__ctidx(diag204_get_info_type(), cpu_info), buffer);
+	rc = hypfs_create_str(cpu_dir, "type", buffer);
+	return PTR_ERR_OR_ZERO(rc);
+}
+
+static void *hypfs_create_lpar_files(struct dentry *systems_dir, void *part_hdr)
+{
+	struct dentry *cpus_dir;
+	struct dentry *lpar_dir;
+	char lpar_name[DIAG204_LPAR_NAME_LEN + 1];
+	void *cpu_info;
+	int i;
+
+	part_hdr__part_name(diag204_get_info_type(), part_hdr, lpar_name);
+	lpar_name[DIAG204_LPAR_NAME_LEN] = 0;
+	lpar_dir = hypfs_mkdir(systems_dir, lpar_name);
+	if (IS_ERR(lpar_dir))
+		return lpar_dir;
+	cpus_dir = hypfs_mkdir(lpar_dir, "cpus");
+	if (IS_ERR(cpus_dir))
+		return cpus_dir;
+	cpu_info = part_hdr + part_hdr__size(diag204_get_info_type());
+	for (i = 0; i < part_hdr__rcpus(diag204_get_info_type(), part_hdr); i++) {
+		int rc;
+
+		rc = hypfs_create_cpu_files(cpus_dir, cpu_info);
+		if (rc)
+			return ERR_PTR(rc);
+		cpu_info += cpu_info__size(diag204_get_info_type());
+	}
+	return cpu_info;
+}
+
+static int hypfs_create_phys_cpu_files(struct dentry *cpus_dir, void *cpu_info)
+{
+	struct dentry *cpu_dir;
+	char buffer[TMP_SIZE];
+	void *rc;
+
+	snprintf(buffer, TMP_SIZE, "%i", phys_cpu__cpu_addr(diag204_get_info_type(),
+							    cpu_info));
+	cpu_dir = hypfs_mkdir(cpus_dir, buffer);
+	if (IS_ERR(cpu_dir))
+		return PTR_ERR(cpu_dir);
+	rc = hypfs_create_u64(cpu_dir, "mgmtime",
+			      phys_cpu__mgm_time(diag204_get_info_type(), cpu_info));
+	if (IS_ERR(rc))
+		return PTR_ERR(rc);
+	diag224_idx2name(phys_cpu__ctidx(diag204_get_info_type(), cpu_info), buffer);
+	rc = hypfs_create_str(cpu_dir, "type", buffer);
+	return PTR_ERR_OR_ZERO(rc);
+}
+
+static void *hypfs_create_phys_files(struct dentry *parent_dir, void *phys_hdr)
+{
+	int i;
+	void *cpu_info;
+	struct dentry *cpus_dir;
+
+	cpus_dir = hypfs_mkdir(parent_dir, "cpus");
+	if (IS_ERR(cpus_dir))
+		return cpus_dir;
+	cpu_info = phys_hdr + phys_hdr__size(diag204_get_info_type());
+	for (i = 0; i < phys_hdr__cpus(diag204_get_info_type(), phys_hdr); i++) {
+		int rc;
+
+		rc = hypfs_create_phys_cpu_files(cpus_dir, cpu_info);
+		if (rc)
+			return ERR_PTR(rc);
+		cpu_info += phys_cpu__size(diag204_get_info_type());
+	}
+	return cpu_info;
+}
+
+int hypfs_diag_create_files(struct dentry *root)
+{
+	struct dentry *systems_dir, *hyp_dir;
+	void *time_hdr, *part_hdr;
+	void *buffer, *ptr;
+	int i, rc, pages;
+
+	buffer = diag204_get_buffer(diag204_get_info_type(), &pages);
+	if (IS_ERR(buffer))
+		return PTR_ERR(buffer);
+	rc = diag204_store(buffer, pages);
+	if (rc)
+		return rc;
+
+	systems_dir = hypfs_mkdir(root, "systems");
+	if (IS_ERR(systems_dir)) {
+		rc = PTR_ERR(systems_dir);
+		goto err_out;
+	}
+	time_hdr = (struct x_info_blk_hdr *)buffer;
+	part_hdr = time_hdr + info_blk_hdr__size(diag204_get_info_type());
+	for (i = 0; i < info_blk_hdr__npar(diag204_get_info_type(), time_hdr); i++) {
+		part_hdr = hypfs_create_lpar_files(systems_dir, part_hdr);
+		if (IS_ERR(part_hdr)) {
+			rc = PTR_ERR(part_hdr);
+			goto err_out;
+		}
+	}
+	if (info_blk_hdr__flags(diag204_get_info_type(), time_hdr) &
+	    DIAG204_LPAR_PHYS_FLG) {
+		ptr = hypfs_create_phys_files(root, part_hdr);
+		if (IS_ERR(ptr)) {
+			rc = PTR_ERR(ptr);
+			goto err_out;
+		}
+	}
+	hyp_dir = hypfs_mkdir(root, "hyp");
+	if (IS_ERR(hyp_dir)) {
+		rc = PTR_ERR(hyp_dir);
+		goto err_out;
+	}
+	ptr = hypfs_create_str(hyp_dir, "type", "LPAR Hypervisor");
+	if (IS_ERR(ptr)) {
+		rc = PTR_ERR(ptr);
+		goto err_out;
+	}
+	rc = 0;
+
+err_out:
+	return rc;
+}
+
+/* Diagnose 224 functions */
+
+static int diag224_idx2name(int index, char *name)
+{
+	memcpy(name, diag224_cpu_names + ((index + 1) * DIAG204_CPU_NAME_LEN),
+	       DIAG204_CPU_NAME_LEN);
+	name[DIAG204_CPU_NAME_LEN] = 0;
+	strim(name);
+	return 0;
+}
+
+static int diag224_get_name_table(void)
+{
+	/* memory must be below 2GB */
+	diag224_cpu_names = (char *)__get_free_page(GFP_KERNEL | GFP_DMA);
+	if (!diag224_cpu_names)
+		return -ENOMEM;
+	if (diag224(diag224_cpu_names)) {
+		free_page((unsigned long)diag224_cpu_names);
+		return -EOPNOTSUPP;
+	}
+	EBCASC(diag224_cpu_names + 16, (*diag224_cpu_names + 1) * 16);
+	return 0;
+}
+
+static void diag224_delete_name_table(void)
+{
+	free_page((unsigned long)diag224_cpu_names);
+}
+
+int __init __hypfs_diag_fs_init(void)
+{
+	if (MACHINE_IS_LPAR)
+		return diag224_get_name_table();
+	return 0;
+}
+
+void __hypfs_diag_fs_exit(void)
+{
+	diag224_delete_name_table();
+}
diff --git a/arch/s390/hypfs/hypfs_sprp.c b/arch/s390/hypfs/hypfs_sprp.c
new file mode 100644
index 0000000000..f5f7e78ddc
--- /dev/null
+++ b/arch/s390/hypfs/hypfs_sprp.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    Hypervisor filesystem for Linux on s390.
+ *    Set Partition-Resource Parameter interface.
+ *
+ *    Copyright IBM Corp. 2013
+ *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/compat.h>
+#include <linux/errno.h>
+#include <linux/gfp.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <asm/diag.h>
+#include <asm/sclp.h>
+#include "hypfs.h"
+
+#define DIAG304_SET_WEIGHTS	0
+#define DIAG304_QUERY_PRP	1
+#define DIAG304_SET_CAPPING	2
+
+#define DIAG304_CMD_MAX		2
+
+static inline unsigned long __hypfs_sprp_diag304(void *data, unsigned long cmd)
+{
+	union register_pair r1 = { .even = (unsigned long)data, };
+
+	asm volatile("diag %[r1],%[r3],0x304\n"
+		     : [r1] "+&d" (r1.pair)
+		     : [r3] "d" (cmd)
+		     : "memory");
+	return r1.odd;
+}
+
+static unsigned long hypfs_sprp_diag304(void *data, unsigned long cmd)
+{
+	diag_stat_inc(DIAG_STAT_X304);
+	return __hypfs_sprp_diag304(data, cmd);
+}
+
+static void hypfs_sprp_free(const void *data)
+{
+	free_page((unsigned long) data);
+}
+
+static int hypfs_sprp_create(void **data_ptr, void **free_ptr, size_t *size)
+{
+	unsigned long rc;
+	void *data;
+
+	data = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+	rc = hypfs_sprp_diag304(data, DIAG304_QUERY_PRP);
+	if (rc != 1) {
+		*data_ptr = *free_ptr = NULL;
+		*size = 0;
+		free_page((unsigned long) data);
+		return -EIO;
+	}
+	*data_ptr = *free_ptr = data;
+	*size = PAGE_SIZE;
+	return 0;
+}
+
+static int __hypfs_sprp_ioctl(void __user *user_area)
+{
+	struct hypfs_diag304 *diag304;
+	unsigned long cmd;
+	void __user *udata;
+	void *data;
+	int rc;
+
+	rc = -ENOMEM;
+	data = (void *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
+	diag304 = kzalloc(sizeof(*diag304), GFP_KERNEL);
+	if (!data || !diag304)
+		goto out;
+
+	rc = -EFAULT;
+	if (copy_from_user(diag304, user_area, sizeof(*diag304)))
+		goto out;
+	rc = -EINVAL;
+	if ((diag304->args[0] >> 8) != 0 || diag304->args[1] > DIAG304_CMD_MAX)
+		goto out;
+
+	rc = -EFAULT;
+	udata = (void __user *)(unsigned long) diag304->data;
+	if (diag304->args[1] == DIAG304_SET_WEIGHTS ||
+	    diag304->args[1] == DIAG304_SET_CAPPING)
+		if (copy_from_user(data, udata, PAGE_SIZE))
+			goto out;
+
+	cmd = *(unsigned long *) &diag304->args[0];
+	diag304->rc = hypfs_sprp_diag304(data, cmd);
+
+	if (diag304->args[1] == DIAG304_QUERY_PRP)
+		if (copy_to_user(udata, data, PAGE_SIZE)) {
+			rc = -EFAULT;
+			goto out;
+		}
+
+	rc = copy_to_user(user_area, diag304, sizeof(*diag304)) ? -EFAULT : 0;
+out:
+	kfree(diag304);
+	free_page((unsigned long) data);
+	return rc;
+}
+
+static long hypfs_sprp_ioctl(struct file *file, unsigned int cmd,
+			       unsigned long arg)
+{
+	void __user *argp;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+	if (is_compat_task())
+		argp = compat_ptr(arg);
+	else
+		argp = (void __user *) arg;
+	switch (cmd) {
+	case HYPFS_DIAG304:
+		return __hypfs_sprp_ioctl(argp);
+	default: /* unknown ioctl number */
+		return -ENOTTY;
+	}
+	return 0;
+}
+
+static struct hypfs_dbfs_file hypfs_sprp_file = {
+	.name		= "diag_304",
+	.data_create	= hypfs_sprp_create,
+	.data_free	= hypfs_sprp_free,
+	.unlocked_ioctl = hypfs_sprp_ioctl,
+};
+
+void hypfs_sprp_init(void)
+{
+	if (!sclp.has_sprp)
+		return;
+	hypfs_dbfs_create_file(&hypfs_sprp_file);
+}
+
+void hypfs_sprp_exit(void)
+{
+	if (!sclp.has_sprp)
+		return;
+	hypfs_dbfs_remove_file(&hypfs_sprp_file);
+}
diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c
new file mode 100644
index 0000000000..3db40ad853
--- /dev/null
+++ b/arch/s390/hypfs/hypfs_vm.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    Hypervisor filesystem for Linux on s390. z/VM implementation.
+ *
+ *    Copyright IBM Corp. 2006
+ *    Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/vmalloc.h>
+#include <asm/extable.h>
+#include <asm/diag.h>
+#include <asm/ebcdic.h>
+#include <asm/timex.h>
+#include "hypfs_vm.h"
+#include "hypfs.h"
+
+#define DBFS_D2FC_HDR_VERSION 0
+
+static char local_guest[] = "        ";
+static char all_guests[] = "*       ";
+static char *all_groups = all_guests;
+char *diag2fc_guest_query;
+
+static int diag2fc(int size, char* query, void *addr)
+{
+	unsigned long residual_cnt;
+	unsigned long rc;
+	struct diag2fc_parm_list parm_list;
+
+	memcpy(parm_list.userid, query, DIAG2FC_NAME_LEN);
+	ASCEBC(parm_list.userid, DIAG2FC_NAME_LEN);
+	memcpy(parm_list.aci_grp, all_groups, DIAG2FC_NAME_LEN);
+	ASCEBC(parm_list.aci_grp, DIAG2FC_NAME_LEN);
+	parm_list.addr = (unsigned long)addr;
+	parm_list.size = size;
+	parm_list.fmt = 0x02;
+	rc = -1;
+
+	diag_stat_inc(DIAG_STAT_X2FC);
+	asm volatile(
+		"	diag    %0,%1,0x2fc\n"
+		"0:	nopr	%%r7\n"
+		EX_TABLE(0b,0b)
+		: "=d" (residual_cnt), "+d" (rc) : "0" (&parm_list) : "memory");
+
+	if ((rc != 0 ) && (rc != -2))
+		return rc;
+	else
+		return -residual_cnt;
+}
+
+/*
+ * Allocate buffer for "query" and store diag 2fc at "offset"
+ */
+void *diag2fc_store(char *query, unsigned int *count, int offset)
+{
+	void *data;
+	int size;
+
+	do {
+		size = diag2fc(0, query, NULL);
+		if (size < 0)
+			return ERR_PTR(-EACCES);
+		data = vmalloc(size + offset);
+		if (!data)
+			return ERR_PTR(-ENOMEM);
+		if (diag2fc(size, query, data + offset) == 0)
+			break;
+		vfree(data);
+	} while (1);
+	*count = (size / sizeof(struct diag2fc_data));
+
+	return data;
+}
+
+void diag2fc_free(const void *data)
+{
+	vfree(data);
+}
+
+struct dbfs_d2fc_hdr {
+	u64	len;		/* Length of d2fc buffer without header */
+	u16	version;	/* Version of header */
+	union tod_clock tod_ext; /* TOD clock for d2fc */
+	u64	count;		/* Number of VM guests in d2fc buffer */
+	char	reserved[30];
+} __attribute__ ((packed));
+
+struct dbfs_d2fc {
+	struct dbfs_d2fc_hdr	hdr;	/* 64 byte header */
+	char			buf[];	/* d2fc buffer */
+} __attribute__ ((packed));
+
+static int dbfs_diag2fc_create(void **data, void **data_free_ptr, size_t *size)
+{
+	struct dbfs_d2fc *d2fc;
+	unsigned int count;
+
+	d2fc = diag2fc_store(diag2fc_guest_query, &count, sizeof(d2fc->hdr));
+	if (IS_ERR(d2fc))
+		return PTR_ERR(d2fc);
+	store_tod_clock_ext(&d2fc->hdr.tod_ext);
+	d2fc->hdr.len = count * sizeof(struct diag2fc_data);
+	d2fc->hdr.version = DBFS_D2FC_HDR_VERSION;
+	d2fc->hdr.count = count;
+	memset(&d2fc->hdr.reserved, 0, sizeof(d2fc->hdr.reserved));
+	*data = d2fc;
+	*data_free_ptr = d2fc;
+	*size = d2fc->hdr.len + sizeof(struct dbfs_d2fc_hdr);
+	return 0;
+}
+
+static struct hypfs_dbfs_file dbfs_file_2fc = {
+	.name		= "diag_2fc",
+	.data_create	= dbfs_diag2fc_create,
+	.data_free	= diag2fc_free,
+};
+
+int hypfs_vm_init(void)
+{
+	if (!MACHINE_IS_VM)
+		return 0;
+	if (diag2fc(0, all_guests, NULL) > 0)
+		diag2fc_guest_query = all_guests;
+	else if (diag2fc(0, local_guest, NULL) > 0)
+		diag2fc_guest_query = local_guest;
+	else
+		return -EACCES;
+	hypfs_dbfs_create_file(&dbfs_file_2fc);
+	return 0;
+}
+
+void hypfs_vm_exit(void)
+{
+	if (!MACHINE_IS_VM)
+		return;
+	hypfs_dbfs_remove_file(&dbfs_file_2fc);
+}
diff --git a/arch/s390/hypfs/hypfs_vm.h b/arch/s390/hypfs/hypfs_vm.h
new file mode 100644
index 0000000000..fe2e5851ad
--- /dev/null
+++ b/arch/s390/hypfs/hypfs_vm.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *    Hypervisor filesystem for Linux on s390. z/VM implementation.
+ *
+ *    Copyright IBM Corp. 2006
+ *    Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#ifndef _S390_HYPFS_VM_H_
+#define _S390_HYPFS_VM_H_
+
+#define DIAG2FC_NAME_LEN 8
+
+struct diag2fc_data {
+	__u32 version;
+	__u32 flags;
+	__u64 used_cpu;
+	__u64 el_time;
+	__u64 mem_min_kb;
+	__u64 mem_max_kb;
+	__u64 mem_share_kb;
+	__u64 mem_used_kb;
+	__u32 pcpus;
+	__u32 lcpus;
+	__u32 vcpus;
+	__u32 ocpus;
+	__u32 cpu_max;
+	__u32 cpu_shares;
+	__u32 cpu_use_samp;
+	__u32 cpu_delay_samp;
+	__u32 page_wait_samp;
+	__u32 idle_samp;
+	__u32 other_samp;
+	__u32 total_samp;
+	char  guest_name[DIAG2FC_NAME_LEN];
+};
+
+struct diag2fc_parm_list {
+	char userid[DIAG2FC_NAME_LEN];
+	char aci_grp[DIAG2FC_NAME_LEN];
+	__u64 addr;
+	__u32 size;
+	__u32 fmt;
+};
+
+void *diag2fc_store(char *query, unsigned int *count, int offset);
+void diag2fc_free(const void *data);
+extern char *diag2fc_guest_query;
+
+#endif /* _S390_HYPFS_VM_H_ */
diff --git a/arch/s390/hypfs/hypfs_vm_fs.c b/arch/s390/hypfs/hypfs_vm_fs.c
new file mode 100644
index 0000000000..6011289afa
--- /dev/null
+++ b/arch/s390/hypfs/hypfs_vm_fs.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    Hypervisor filesystem for Linux on s390. z/VM implementation.
+ *
+ *    Copyright IBM Corp. 2006
+ *    Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/vmalloc.h>
+#include <asm/extable.h>
+#include <asm/diag.h>
+#include <asm/ebcdic.h>
+#include <asm/timex.h>
+#include "hypfs_vm.h"
+#include "hypfs.h"
+
+#define ATTRIBUTE(dir, name, member) \
+do { \
+	void *rc; \
+	rc = hypfs_create_u64(dir, name, member); \
+	if (IS_ERR(rc)) \
+		return PTR_ERR(rc); \
+} while (0)
+
+static int hypfs_vm_create_guest(struct dentry *systems_dir,
+				 struct diag2fc_data *data)
+{
+	char guest_name[DIAG2FC_NAME_LEN + 1] = {};
+	struct dentry *guest_dir, *cpus_dir, *samples_dir, *mem_dir;
+	int dedicated_flag, capped_value;
+
+	capped_value = (data->flags & 0x00000006) >> 1;
+	dedicated_flag = (data->flags & 0x00000008) >> 3;
+
+	/* guest dir */
+	memcpy(guest_name, data->guest_name, DIAG2FC_NAME_LEN);
+	EBCASC(guest_name, DIAG2FC_NAME_LEN);
+	strim(guest_name);
+	guest_dir = hypfs_mkdir(systems_dir, guest_name);
+	if (IS_ERR(guest_dir))
+		return PTR_ERR(guest_dir);
+	ATTRIBUTE(guest_dir, "onlinetime_us", data->el_time);
+
+	/* logical cpu information */
+	cpus_dir = hypfs_mkdir(guest_dir, "cpus");
+	if (IS_ERR(cpus_dir))
+		return PTR_ERR(cpus_dir);
+	ATTRIBUTE(cpus_dir, "cputime_us", data->used_cpu);
+	ATTRIBUTE(cpus_dir, "capped", capped_value);
+	ATTRIBUTE(cpus_dir, "dedicated", dedicated_flag);
+	ATTRIBUTE(cpus_dir, "count", data->vcpus);
+	/*
+	 * Note: The "weight_min" attribute got the wrong name.
+	 * The value represents the number of non-stopped (operating)
+	 * CPUS.
+	 */
+	ATTRIBUTE(cpus_dir, "weight_min", data->ocpus);
+	ATTRIBUTE(cpus_dir, "weight_max", data->cpu_max);
+	ATTRIBUTE(cpus_dir, "weight_cur", data->cpu_shares);
+
+	/* memory information */
+	mem_dir = hypfs_mkdir(guest_dir, "mem");
+	if (IS_ERR(mem_dir))
+		return PTR_ERR(mem_dir);
+	ATTRIBUTE(mem_dir, "min_KiB", data->mem_min_kb);
+	ATTRIBUTE(mem_dir, "max_KiB", data->mem_max_kb);
+	ATTRIBUTE(mem_dir, "used_KiB", data->mem_used_kb);
+	ATTRIBUTE(mem_dir, "share_KiB", data->mem_share_kb);
+
+	/* samples */
+	samples_dir = hypfs_mkdir(guest_dir, "samples");
+	if (IS_ERR(samples_dir))
+		return PTR_ERR(samples_dir);
+	ATTRIBUTE(samples_dir, "cpu_using", data->cpu_use_samp);
+	ATTRIBUTE(samples_dir, "cpu_delay", data->cpu_delay_samp);
+	ATTRIBUTE(samples_dir, "mem_delay", data->page_wait_samp);
+	ATTRIBUTE(samples_dir, "idle", data->idle_samp);
+	ATTRIBUTE(samples_dir, "other", data->other_samp);
+	ATTRIBUTE(samples_dir, "total", data->total_samp);
+	return 0;
+}
+
+int hypfs_vm_create_files(struct dentry *root)
+{
+	struct dentry *dir, *file;
+	struct diag2fc_data *data;
+	unsigned int count = 0;
+	int rc, i;
+
+	data = diag2fc_store(diag2fc_guest_query, &count, 0);
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+
+	/* Hypervisor Info */
+	dir = hypfs_mkdir(root, "hyp");
+	if (IS_ERR(dir)) {
+		rc = PTR_ERR(dir);
+		goto failed;
+	}
+	file = hypfs_create_str(dir, "type", "z/VM Hypervisor");
+	if (IS_ERR(file)) {
+		rc = PTR_ERR(file);
+		goto failed;
+	}
+
+	/* physical cpus */
+	dir = hypfs_mkdir(root, "cpus");
+	if (IS_ERR(dir)) {
+		rc = PTR_ERR(dir);
+		goto failed;
+	}
+	file = hypfs_create_u64(dir, "count", data->lcpus);
+	if (IS_ERR(file)) {
+		rc = PTR_ERR(file);
+		goto failed;
+	}
+
+	/* guests */
+	dir = hypfs_mkdir(root, "systems");
+	if (IS_ERR(dir)) {
+		rc = PTR_ERR(dir);
+		goto failed;
+	}
+
+	for (i = 0; i < count; i++) {
+		rc = hypfs_vm_create_guest(dir, &data[i]);
+		if (rc)
+			goto failed;
+	}
+	diag2fc_free(data);
+	return 0;
+
+failed:
+	diag2fc_free(data);
+	return rc;
+}
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
new file mode 100644
index 0000000000..ada8314993
--- /dev/null
+++ b/arch/s390/hypfs/inode.c
@@ -0,0 +1,477 @@
+// SPDX-License-Identifier: GPL-1.0+
+/*
+ *    Hypervisor filesystem for Linux on s390.
+ *
+ *    Copyright IBM Corp. 2006, 2008
+ *    Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#define KMSG_COMPONENT "hypfs"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
+#include <linux/namei.h>
+#include <linux/vfs.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <linux/time.h>
+#include <linux/sysfs.h>
+#include <linux/init.h>
+#include <linux/kobject.h>
+#include <linux/seq_file.h>
+#include <linux/uio.h>
+#include <asm/ebcdic.h>
+#include "hypfs.h"
+
+#define HYPFS_MAGIC 0x687970	/* ASCII 'hyp' */
+#define TMP_SIZE 64		/* size of temporary buffers */
+
+static struct dentry *hypfs_create_update_file(struct dentry *dir);
+
+struct hypfs_sb_info {
+	kuid_t uid;			/* uid used for files and dirs */
+	kgid_t gid;			/* gid used for files and dirs */
+	struct dentry *update_file;	/* file to trigger update */
+	time64_t last_update;		/* last update, CLOCK_MONOTONIC time */
+	struct mutex lock;		/* lock to protect update process */
+};
+
+static const struct file_operations hypfs_file_ops;
+static struct file_system_type hypfs_type;
+static const struct super_operations hypfs_s_ops;
+
+/* start of list of all dentries, which have to be deleted on update */
+static struct dentry *hypfs_last_dentry;
+
+static void hypfs_update_update(struct super_block *sb)
+{
+	struct hypfs_sb_info *sb_info = sb->s_fs_info;
+	struct inode *inode = d_inode(sb_info->update_file);
+
+	sb_info->last_update = ktime_get_seconds();
+	inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
+}
+
+/* directory tree removal functions */
+
+static void hypfs_add_dentry(struct dentry *dentry)
+{
+	dentry->d_fsdata = hypfs_last_dentry;
+	hypfs_last_dentry = dentry;
+}
+
+static void hypfs_remove(struct dentry *dentry)
+{
+	struct dentry *parent;
+
+	parent = dentry->d_parent;
+	inode_lock(d_inode(parent));
+	if (simple_positive(dentry)) {
+		if (d_is_dir(dentry))
+			simple_rmdir(d_inode(parent), dentry);
+		else
+			simple_unlink(d_inode(parent), dentry);
+	}
+	d_drop(dentry);
+	dput(dentry);
+	inode_unlock(d_inode(parent));
+}
+
+static void hypfs_delete_tree(struct dentry *root)
+{
+	while (hypfs_last_dentry) {
+		struct dentry *next_dentry;
+		next_dentry = hypfs_last_dentry->d_fsdata;
+		hypfs_remove(hypfs_last_dentry);
+		hypfs_last_dentry = next_dentry;
+	}
+}
+
+static struct inode *hypfs_make_inode(struct super_block *sb, umode_t mode)
+{
+	struct inode *ret = new_inode(sb);
+
+	if (ret) {
+		struct hypfs_sb_info *hypfs_info = sb->s_fs_info;
+		ret->i_ino = get_next_ino();
+		ret->i_mode = mode;
+		ret->i_uid = hypfs_info->uid;
+		ret->i_gid = hypfs_info->gid;
+		ret->i_atime = ret->i_mtime = inode_set_ctime_current(ret);
+		if (S_ISDIR(mode))
+			set_nlink(ret, 2);
+	}
+	return ret;
+}
+
+static void hypfs_evict_inode(struct inode *inode)
+{
+	clear_inode(inode);
+	kfree(inode->i_private);
+}
+
+static int hypfs_open(struct inode *inode, struct file *filp)
+{
+	char *data = file_inode(filp)->i_private;
+	struct hypfs_sb_info *fs_info;
+
+	if (filp->f_mode & FMODE_WRITE) {
+		if (!(inode->i_mode & S_IWUGO))
+			return -EACCES;
+	}
+	if (filp->f_mode & FMODE_READ) {
+		if (!(inode->i_mode & S_IRUGO))
+			return -EACCES;
+	}
+
+	fs_info = inode->i_sb->s_fs_info;
+	if(data) {
+		mutex_lock(&fs_info->lock);
+		filp->private_data = kstrdup(data, GFP_KERNEL);
+		if (!filp->private_data) {
+			mutex_unlock(&fs_info->lock);
+			return -ENOMEM;
+		}
+		mutex_unlock(&fs_info->lock);
+	}
+	return nonseekable_open(inode, filp);
+}
+
+static ssize_t hypfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+	struct file *file = iocb->ki_filp;
+	char *data = file->private_data;
+	size_t available = strlen(data);
+	loff_t pos = iocb->ki_pos;
+	size_t count;
+
+	if (pos < 0)
+		return -EINVAL;
+	if (pos >= available || !iov_iter_count(to))
+		return 0;
+	count = copy_to_iter(data + pos, available - pos, to);
+	if (!count)
+		return -EFAULT;
+	iocb->ki_pos = pos + count;
+	file_accessed(file);
+	return count;
+}
+
+static ssize_t hypfs_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+	int rc;
+	struct super_block *sb = file_inode(iocb->ki_filp)->i_sb;
+	struct hypfs_sb_info *fs_info = sb->s_fs_info;
+	size_t count = iov_iter_count(from);
+
+	/*
+	 * Currently we only allow one update per second for two reasons:
+	 * 1. diag 204 is VERY expensive
+	 * 2. If several processes do updates in parallel and then read the
+	 *    hypfs data, the likelihood of collisions is reduced, if we restrict
+	 *    the minimum update interval. A collision occurs, if during the
+	 *    data gathering of one process another process triggers an update
+	 *    If the first process wants to ensure consistent data, it has
+	 *    to restart data collection in this case.
+	 */
+	mutex_lock(&fs_info->lock);
+	if (fs_info->last_update == ktime_get_seconds()) {
+		rc = -EBUSY;
+		goto out;
+	}
+	hypfs_delete_tree(sb->s_root);
+	if (MACHINE_IS_VM)
+		rc = hypfs_vm_create_files(sb->s_root);
+	else
+		rc = hypfs_diag_create_files(sb->s_root);
+	if (rc) {
+		pr_err("Updating the hypfs tree failed\n");
+		hypfs_delete_tree(sb->s_root);
+		goto out;
+	}
+	hypfs_update_update(sb);
+	rc = count;
+	iov_iter_advance(from, count);
+out:
+	mutex_unlock(&fs_info->lock);
+	return rc;
+}
+
+static int hypfs_release(struct inode *inode, struct file *filp)
+{
+	kfree(filp->private_data);
+	return 0;
+}
+
+enum { Opt_uid, Opt_gid, };
+
+static const struct fs_parameter_spec hypfs_fs_parameters[] = {
+	fsparam_u32("gid", Opt_gid),
+	fsparam_u32("uid", Opt_uid),
+	{}
+};
+
+static int hypfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+	struct hypfs_sb_info *hypfs_info = fc->s_fs_info;
+	struct fs_parse_result result;
+	kuid_t uid;
+	kgid_t gid;
+	int opt;
+
+	opt = fs_parse(fc, hypfs_fs_parameters, param, &result);
+	if (opt < 0)
+		return opt;
+
+	switch (opt) {
+	case Opt_uid:
+		uid = make_kuid(current_user_ns(), result.uint_32);
+		if (!uid_valid(uid))
+			return invalf(fc, "Unknown uid");
+		hypfs_info->uid = uid;
+		break;
+	case Opt_gid:
+		gid = make_kgid(current_user_ns(), result.uint_32);
+		if (!gid_valid(gid))
+			return invalf(fc, "Unknown gid");
+		hypfs_info->gid = gid;
+		break;
+	}
+	return 0;
+}
+
+static int hypfs_show_options(struct seq_file *s, struct dentry *root)
+{
+	struct hypfs_sb_info *hypfs_info = root->d_sb->s_fs_info;
+
+	seq_printf(s, ",uid=%u", from_kuid_munged(&init_user_ns, hypfs_info->uid));
+	seq_printf(s, ",gid=%u", from_kgid_munged(&init_user_ns, hypfs_info->gid));
+	return 0;
+}
+
+static int hypfs_fill_super(struct super_block *sb, struct fs_context *fc)
+{
+	struct hypfs_sb_info *sbi = sb->s_fs_info;
+	struct inode *root_inode;
+	struct dentry *root_dentry, *update_file;
+	int rc;
+
+	sb->s_blocksize = PAGE_SIZE;
+	sb->s_blocksize_bits = PAGE_SHIFT;
+	sb->s_magic = HYPFS_MAGIC;
+	sb->s_op = &hypfs_s_ops;
+
+	root_inode = hypfs_make_inode(sb, S_IFDIR | 0755);
+	if (!root_inode)
+		return -ENOMEM;
+	root_inode->i_op = &simple_dir_inode_operations;
+	root_inode->i_fop = &simple_dir_operations;
+	sb->s_root = root_dentry = d_make_root(root_inode);
+	if (!root_dentry)
+		return -ENOMEM;
+	if (MACHINE_IS_VM)
+		rc = hypfs_vm_create_files(root_dentry);
+	else
+		rc = hypfs_diag_create_files(root_dentry);
+	if (rc)
+		return rc;
+	update_file = hypfs_create_update_file(root_dentry);
+	if (IS_ERR(update_file))
+		return PTR_ERR(update_file);
+	sbi->update_file = update_file;
+	hypfs_update_update(sb);
+	pr_info("Hypervisor filesystem mounted\n");
+	return 0;
+}
+
+static int hypfs_get_tree(struct fs_context *fc)
+{
+	return get_tree_single(fc, hypfs_fill_super);
+}
+
+static void hypfs_free_fc(struct fs_context *fc)
+{
+	kfree(fc->s_fs_info);
+}
+
+static const struct fs_context_operations hypfs_context_ops = {
+	.free		= hypfs_free_fc,
+	.parse_param	= hypfs_parse_param,
+	.get_tree	= hypfs_get_tree,
+};
+
+static int hypfs_init_fs_context(struct fs_context *fc)
+{
+	struct hypfs_sb_info *sbi;
+
+	sbi = kzalloc(sizeof(struct hypfs_sb_info), GFP_KERNEL);
+	if (!sbi)
+		return -ENOMEM;
+
+	mutex_init(&sbi->lock);
+	sbi->uid = current_uid();
+	sbi->gid = current_gid();
+
+	fc->s_fs_info = sbi;
+	fc->ops = &hypfs_context_ops;
+	return 0;
+}
+
+static void hypfs_kill_super(struct super_block *sb)
+{
+	struct hypfs_sb_info *sb_info = sb->s_fs_info;
+
+	if (sb->s_root)
+		hypfs_delete_tree(sb->s_root);
+	if (sb_info && sb_info->update_file)
+		hypfs_remove(sb_info->update_file);
+	kfree(sb->s_fs_info);
+	sb->s_fs_info = NULL;
+	kill_litter_super(sb);
+}
+
+static struct dentry *hypfs_create_file(struct dentry *parent, const char *name,
+					char *data, umode_t mode)
+{
+	struct dentry *dentry;
+	struct inode *inode;
+
+	inode_lock(d_inode(parent));
+	dentry = lookup_one_len(name, parent, strlen(name));
+	if (IS_ERR(dentry)) {
+		dentry = ERR_PTR(-ENOMEM);
+		goto fail;
+	}
+	inode = hypfs_make_inode(parent->d_sb, mode);
+	if (!inode) {
+		dput(dentry);
+		dentry = ERR_PTR(-ENOMEM);
+		goto fail;
+	}
+	if (S_ISREG(mode)) {
+		inode->i_fop = &hypfs_file_ops;
+		if (data)
+			inode->i_size = strlen(data);
+		else
+			inode->i_size = 0;
+	} else if (S_ISDIR(mode)) {
+		inode->i_op = &simple_dir_inode_operations;
+		inode->i_fop = &simple_dir_operations;
+		inc_nlink(d_inode(parent));
+	} else
+		BUG();
+	inode->i_private = data;
+	d_instantiate(dentry, inode);
+	dget(dentry);
+fail:
+	inode_unlock(d_inode(parent));
+	return dentry;
+}
+
+struct dentry *hypfs_mkdir(struct dentry *parent, const char *name)
+{
+	struct dentry *dentry;
+
+	dentry = hypfs_create_file(parent, name, NULL, S_IFDIR | DIR_MODE);
+	if (IS_ERR(dentry))
+		return dentry;
+	hypfs_add_dentry(dentry);
+	return dentry;
+}
+
+static struct dentry *hypfs_create_update_file(struct dentry *dir)
+{
+	struct dentry *dentry;
+
+	dentry = hypfs_create_file(dir, "update", NULL,
+				   S_IFREG | UPDATE_FILE_MODE);
+	/*
+	 * We do not put the update file on the 'delete' list with
+	 * hypfs_add_dentry(), since it should not be removed when the tree
+	 * is updated.
+	 */
+	return dentry;
+}
+
+struct dentry *hypfs_create_u64(struct dentry *dir,
+				const char *name, __u64 value)
+{
+	char *buffer;
+	char tmp[TMP_SIZE];
+	struct dentry *dentry;
+
+	snprintf(tmp, TMP_SIZE, "%llu\n", (unsigned long long int)value);
+	buffer = kstrdup(tmp, GFP_KERNEL);
+	if (!buffer)
+		return ERR_PTR(-ENOMEM);
+	dentry =
+	    hypfs_create_file(dir, name, buffer, S_IFREG | REG_FILE_MODE);
+	if (IS_ERR(dentry)) {
+		kfree(buffer);
+		return ERR_PTR(-ENOMEM);
+	}
+	hypfs_add_dentry(dentry);
+	return dentry;
+}
+
+struct dentry *hypfs_create_str(struct dentry *dir,
+				const char *name, char *string)
+{
+	char *buffer;
+	struct dentry *dentry;
+
+	buffer = kmalloc(strlen(string) + 2, GFP_KERNEL);
+	if (!buffer)
+		return ERR_PTR(-ENOMEM);
+	sprintf(buffer, "%s\n", string);
+	dentry =
+	    hypfs_create_file(dir, name, buffer, S_IFREG | REG_FILE_MODE);
+	if (IS_ERR(dentry)) {
+		kfree(buffer);
+		return ERR_PTR(-ENOMEM);
+	}
+	hypfs_add_dentry(dentry);
+	return dentry;
+}
+
+static const struct file_operations hypfs_file_ops = {
+	.open		= hypfs_open,
+	.release	= hypfs_release,
+	.read_iter	= hypfs_read_iter,
+	.write_iter	= hypfs_write_iter,
+	.llseek		= no_llseek,
+};
+
+static struct file_system_type hypfs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "s390_hypfs",
+	.init_fs_context = hypfs_init_fs_context,
+	.parameters	= hypfs_fs_parameters,
+	.kill_sb	= hypfs_kill_super
+};
+
+static const struct super_operations hypfs_s_ops = {
+	.statfs		= simple_statfs,
+	.evict_inode	= hypfs_evict_inode,
+	.show_options	= hypfs_show_options,
+};
+
+int __init __hypfs_fs_init(void)
+{
+	int rc;
+
+	rc = sysfs_create_mount_point(hypervisor_kobj, "s390");
+	if (rc)
+		return rc;
+	rc = register_filesystem(&hypfs_type);
+	if (rc)
+		goto fail;
+	return 0;
+fail:
+	sysfs_remove_mount_point(hypervisor_kobj, "s390");
+	return rc;
+}
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
new file mode 100644
index 0000000000..4b904110d2
--- /dev/null
+++ b/arch/s390/include/asm/Kbuild
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+generated-y += dis-defs.h
+generated-y += facility-defs.h
+generated-y += syscall_table.h
+generated-y += unistd_nr.h
+
+generic-y += asm-offsets.h
+generic-y += kvm_types.h
+generic-y += mcs_spinlock.h
diff --git a/arch/s390/include/asm/abs_lowcore.h b/arch/s390/include/asm/abs_lowcore.h
new file mode 100644
index 0000000000..6f264b79e3
--- /dev/null
+++ b/arch/s390/include/asm/abs_lowcore.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_ABS_LOWCORE_H
+#define _ASM_S390_ABS_LOWCORE_H
+
+#include <asm/lowcore.h>
+
+#define ABS_LOWCORE_MAP_SIZE	(NR_CPUS * sizeof(struct lowcore))
+
+extern unsigned long __abs_lowcore;
+
+int abs_lowcore_map(int cpu, struct lowcore *lc, bool alloc);
+void abs_lowcore_unmap(int cpu);
+
+static inline struct lowcore *get_abs_lowcore(void)
+{
+	int cpu;
+
+	cpu = get_cpu();
+	return ((struct lowcore *)__abs_lowcore) + cpu;
+}
+
+static inline void put_abs_lowcore(struct lowcore *lc)
+{
+	put_cpu();
+}
+
+#endif /* _ASM_S390_ABS_LOWCORE_H */
diff --git a/arch/s390/include/asm/airq.h b/arch/s390/include/asm/airq.h
new file mode 100644
index 0000000000..c4c28c2609
--- /dev/null
+++ b/arch/s390/include/asm/airq.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *    Copyright IBM Corp. 2002, 2007
+ *    Author(s): Ingo Adlung <adlung@de.ibm.com>
+ *		 Cornelia Huck <cornelia.huck@de.ibm.com>
+ *		 Arnd Bergmann <arndb@de.ibm.com>
+ *		 Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_AIRQ_H
+#define _ASM_S390_AIRQ_H
+
+#include <linux/bit_spinlock.h>
+#include <linux/dma-mapping.h>
+#include <asm/tpi.h>
+
+struct airq_struct {
+	struct hlist_node list;		/* Handler queueing. */
+	void (*handler)(struct airq_struct *airq, struct tpi_info *tpi_info);
+	u8 *lsi_ptr;			/* Local-Summary-Indicator pointer */
+	u8 isc;				/* Interrupt-subclass */
+	u8 flags;
+};
+
+#define AIRQ_PTR_ALLOCATED	0x01
+
+int register_adapter_interrupt(struct airq_struct *airq);
+void unregister_adapter_interrupt(struct airq_struct *airq);
+
+/* Adapter interrupt bit vector */
+struct airq_iv {
+	unsigned long *vector;	/* Adapter interrupt bit vector */
+	dma_addr_t vector_dma; /* Adapter interrupt bit vector dma */
+	unsigned long *avail;	/* Allocation bit mask for the bit vector */
+	unsigned long *bitlock;	/* Lock bit mask for the bit vector */
+	unsigned long *ptr;	/* Pointer associated with each bit */
+	unsigned int *data;	/* 32 bit value associated with each bit */
+	unsigned long bits;	/* Number of bits in the vector */
+	unsigned long end;	/* Number of highest allocated bit + 1 */
+	unsigned long flags;	/* Allocation flags */
+	spinlock_t lock;	/* Lock to protect alloc & free */
+};
+
+#define AIRQ_IV_ALLOC		1	/* Use an allocation bit mask */
+#define AIRQ_IV_BITLOCK		2	/* Allocate the lock bit mask */
+#define AIRQ_IV_PTR		4	/* Allocate the ptr array */
+#define AIRQ_IV_DATA		8	/* Allocate the data array */
+#define AIRQ_IV_CACHELINE	16	/* Cacheline alignment for the vector */
+#define AIRQ_IV_GUESTVEC	32	/* Vector is a pinned guest page */
+
+struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags,
+			       unsigned long *vec);
+void airq_iv_release(struct airq_iv *iv);
+unsigned long airq_iv_alloc(struct airq_iv *iv, unsigned long num);
+void airq_iv_free(struct airq_iv *iv, unsigned long bit, unsigned long num);
+unsigned long airq_iv_scan(struct airq_iv *iv, unsigned long start,
+			   unsigned long end);
+
+static inline unsigned long airq_iv_alloc_bit(struct airq_iv *iv)
+{
+	return airq_iv_alloc(iv, 1);
+}
+
+static inline void airq_iv_free_bit(struct airq_iv *iv, unsigned long bit)
+{
+	airq_iv_free(iv, bit, 1);
+}
+
+static inline unsigned long airq_iv_end(struct airq_iv *iv)
+{
+	return iv->end;
+}
+
+static inline void airq_iv_lock(struct airq_iv *iv, unsigned long bit)
+{
+	const unsigned long be_to_le = BITS_PER_LONG - 1;
+	bit_spin_lock(bit ^ be_to_le, iv->bitlock);
+}
+
+static inline void airq_iv_unlock(struct airq_iv *iv, unsigned long bit)
+{
+	const unsigned long be_to_le = BITS_PER_LONG - 1;
+	bit_spin_unlock(bit ^ be_to_le, iv->bitlock);
+}
+
+static inline void airq_iv_set_data(struct airq_iv *iv, unsigned long bit,
+				    unsigned int data)
+{
+	iv->data[bit] = data;
+}
+
+static inline unsigned int airq_iv_get_data(struct airq_iv *iv,
+					    unsigned long bit)
+{
+	return iv->data[bit];
+}
+
+static inline void airq_iv_set_ptr(struct airq_iv *iv, unsigned long bit,
+				   unsigned long ptr)
+{
+	iv->ptr[bit] = ptr;
+}
+
+static inline unsigned long airq_iv_get_ptr(struct airq_iv *iv,
+					    unsigned long bit)
+{
+	return iv->ptr[bit];
+}
+
+#endif /* _ASM_S390_AIRQ_H */
diff --git a/arch/s390/include/asm/alternative-asm.h b/arch/s390/include/asm/alternative-asm.h
new file mode 100644
index 0000000000..7db046596b
--- /dev/null
+++ b/arch/s390/include/asm/alternative-asm.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_ALTERNATIVE_ASM_H
+#define _ASM_S390_ALTERNATIVE_ASM_H
+
+#ifdef __ASSEMBLY__
+
+/*
+ * Issue one struct alt_instr descriptor entry (need to put it into
+ * the section .altinstructions, see below). This entry contains
+ * enough information for the alternatives patching code to patch an
+ * instruction. See apply_alternatives().
+ */
+.macro alt_entry orig_start, orig_end, alt_start, alt_end, feature
+	.long	\orig_start - .
+	.long	\alt_start - .
+	.word	\feature
+	.byte	\orig_end - \orig_start
+	.org	. - ( \orig_end - \orig_start ) + ( \alt_end - \alt_start )
+	.org	. - ( \alt_end - \alt_start ) + ( \orig_end - \orig_start )
+.endm
+
+/*
+ * Define an alternative between two instructions. If @feature is
+ * present, early code in apply_alternatives() replaces @oldinstr with
+ * @newinstr.
+ */
+.macro ALTERNATIVE oldinstr, newinstr, feature
+	.pushsection .altinstr_replacement,"ax"
+770:	\newinstr
+771:	.popsection
+772:	\oldinstr
+773:	.pushsection .altinstructions,"a"
+	alt_entry 772b, 773b, 770b, 771b, \feature
+	.popsection
+.endm
+
+/*
+ * Define an alternative between two instructions. If @feature is
+ * present, early code in apply_alternatives() replaces @oldinstr with
+ * @newinstr.
+ */
+.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
+	.pushsection .altinstr_replacement,"ax"
+770:	\newinstr1
+771:	\newinstr2
+772:	.popsection
+773:	\oldinstr
+774:	.pushsection .altinstructions,"a"
+	alt_entry 773b, 774b, 770b, 771b,\feature1
+	alt_entry 773b, 774b, 771b, 772b,\feature2
+	.popsection
+.endm
+
+#endif	/*  __ASSEMBLY__  */
+
+#endif /* _ASM_S390_ALTERNATIVE_ASM_H */
diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h
new file mode 100644
index 0000000000..904dd049f9
--- /dev/null
+++ b/arch/s390/include/asm/alternative.h
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_ALTERNATIVE_H
+#define _ASM_S390_ALTERNATIVE_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/stringify.h>
+
+struct alt_instr {
+	s32 instr_offset;	/* original instruction */
+	s32 repl_offset;	/* offset to replacement instruction */
+	u16 facility;		/* facility bit set for replacement */
+	u8  instrlen;		/* length of original instruction */
+} __packed;
+
+void apply_alternative_instructions(void);
+void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
+
+/*
+ * +---------------------------------+
+ * |661:			     |662:
+ * | oldinstr			     |
+ * +---------------------------------+
+ *
+ * .altinstr_replacement section
+ * +---------------------------------+
+ * |6641:			     |6651:
+ * | alternative instr 1	     |
+ * +---------------------------------+
+ * |6642:			     |6652:
+ * | alternative instr 2	     |
+ * +---------------------------------+
+ *
+ * .altinstructions section
+ * +---------------------------------+
+ * | alt_instr entries for each      |
+ * | alternative instr		     |
+ * +---------------------------------+
+ */
+
+#define b_altinstr(num)		"664"#num
+#define e_altinstr(num)		"665"#num
+#define oldinstr_len		"662b-661b"
+#define altinstr_len(num)	e_altinstr(num)"b-"b_altinstr(num)"b"
+
+#define OLDINSTR(oldinstr) \
+	"661:\n\t" oldinstr "\n662:\n"
+
+#define ALTINSTR_ENTRY(facility, num)					\
+	"\t.long 661b - .\n"			/* old instruction */	\
+	"\t.long " b_altinstr(num)"b - .\n"	/* alt instruction */	\
+	"\t.word " __stringify(facility) "\n"	/* facility bit    */	\
+	"\t.byte " oldinstr_len "\n"		/* instruction len */	\
+	"\t.org . - (" oldinstr_len ") + (" altinstr_len(num) ")\n"	\
+	"\t.org . - (" altinstr_len(num) ") + (" oldinstr_len ")\n"
+
+#define ALTINSTR_REPLACEMENT(altinstr, num)	/* replacement */	\
+	b_altinstr(num)":\n\t" altinstr "\n" e_altinstr(num) ":\n"
+
+/* alternative assembly primitive: */
+#define ALTERNATIVE(oldinstr, altinstr, facility) \
+	".pushsection .altinstr_replacement, \"ax\"\n"			\
+	ALTINSTR_REPLACEMENT(altinstr, 1)				\
+	".popsection\n"							\
+	OLDINSTR(oldinstr)						\
+	".pushsection .altinstructions,\"a\"\n"				\
+	ALTINSTR_ENTRY(facility, 1)					\
+	".popsection\n"
+
+#define ALTERNATIVE_2(oldinstr, altinstr1, facility1, altinstr2, facility2)\
+	".pushsection .altinstr_replacement, \"ax\"\n"			\
+	ALTINSTR_REPLACEMENT(altinstr1, 1)				\
+	ALTINSTR_REPLACEMENT(altinstr2, 2)				\
+	".popsection\n"							\
+	OLDINSTR(oldinstr)						\
+	".pushsection .altinstructions,\"a\"\n"				\
+	ALTINSTR_ENTRY(facility1, 1)					\
+	ALTINSTR_ENTRY(facility2, 2)					\
+	".popsection\n"
+
+/*
+ * Alternative instructions for different CPU types or capabilities.
+ *
+ * This allows to use optimized instructions even on generic binary
+ * kernels.
+ *
+ * oldinstr is padded with jump and nops at compile time if altinstr is
+ * longer. altinstr is padded with jump and nops at run-time during patching.
+ *
+ * For non barrier like inlines please define new variants
+ * without volatile and memory clobber.
+ */
+#define alternative(oldinstr, altinstr, facility)			\
+	asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, facility) : : : "memory")
+
+#define alternative_2(oldinstr, altinstr1, facility1, altinstr2, facility2) \
+	asm_inline volatile(ALTERNATIVE_2(oldinstr, altinstr1, facility1,   \
+				   altinstr2, facility2) ::: "memory")
+
+/* Alternative inline assembly with input. */
+#define alternative_input(oldinstr, newinstr, feature, input...)	\
+	asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, feature)	\
+		: : input)
+
+/* Like alternative_input, but with a single output argument */
+#define alternative_io(oldinstr, altinstr, facility, output, input...)	\
+	asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, facility)	\
+		: output : input)
+
+/* Use this macro if more than one output parameter is needed. */
+#define ASM_OUTPUT2(a...) a
+
+/* Use this macro if clobbers are needed without inputs. */
+#define ASM_NO_INPUT_CLOBBER(clobber...) : clobber
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_S390_ALTERNATIVE_H */
diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h
new file mode 100644
index 0000000000..40c2b82f08
--- /dev/null
+++ b/arch/s390/include/asm/ap.h
@@ -0,0 +1,555 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Adjunct processor (AP) interfaces
+ *
+ * Copyright IBM Corp. 2017
+ *
+ * Author(s): Tony Krowiak <akrowia@linux.vnet.ibm.com>
+ *	      Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *	      Harald Freudenberger <freude@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_AP_H_
+#define _ASM_S390_AP_H_
+
+#include <linux/io.h>
+#include <asm/asm-extable.h>
+
+/**
+ * The ap_qid_t identifier of an ap queue.
+ * If the AP facilities test (APFT) facility is available,
+ * card and queue index are 8 bit values, otherwise
+ * card index is 6 bit and queue index a 4 bit value.
+ */
+typedef unsigned int ap_qid_t;
+
+#define AP_MKQID(_card, _queue) (((_card) & 0xff) << 8 | ((_queue) & 0xff))
+#define AP_QID_CARD(_qid) (((_qid) >> 8) & 0xff)
+#define AP_QID_QUEUE(_qid) ((_qid) & 0xff)
+
+/**
+ * struct ap_queue_status - Holds the AP queue status.
+ * @queue_empty: Shows if queue is empty
+ * @replies_waiting: Waiting replies
+ * @queue_full: Is 1 if the queue is full
+ * @irq_enabled: Shows if interrupts are enabled for the AP
+ * @response_code: Holds the 8 bit response code
+ *
+ * The ap queue status word is returned by all three AP functions
+ * (PQAP, NQAP and DQAP).  There's a set of flags in the first
+ * byte, followed by a 1 byte response code.
+ */
+struct ap_queue_status {
+	unsigned int queue_empty	: 1;
+	unsigned int replies_waiting	: 1;
+	unsigned int queue_full		: 1;
+	unsigned int			: 3;
+	unsigned int async		: 1;
+	unsigned int irq_enabled	: 1;
+	unsigned int response_code	: 8;
+	unsigned int			: 16;
+};
+
+/*
+ * AP queue status reg union to access the reg1
+ * register with the lower 32 bits comprising the
+ * ap queue status.
+ */
+union ap_queue_status_reg {
+	unsigned long value;
+	struct {
+		u32 _pad;
+		struct ap_queue_status status;
+	};
+};
+
+/**
+ * ap_intructions_available() - Test if AP instructions are available.
+ *
+ * Returns true if the AP instructions are installed, otherwise false.
+ */
+static inline bool ap_instructions_available(void)
+{
+	unsigned long reg0 = AP_MKQID(0, 0);
+	unsigned long reg1 = 0;
+
+	asm volatile(
+		"	lgr	0,%[reg0]\n"		/* qid into gr0 */
+		"	lghi	1,0\n"			/* 0 into gr1 */
+		"	lghi	2,0\n"			/* 0 into gr2 */
+		"	.insn	rre,0xb2af0000,0,0\n"	/* PQAP(TAPQ) */
+		"0:	la	%[reg1],1\n"		/* 1 into reg1 */
+		"1:\n"
+		EX_TABLE(0b, 1b)
+		: [reg1] "+&d" (reg1)
+		: [reg0] "d" (reg0)
+		: "cc", "0", "1", "2");
+	return reg1 != 0;
+}
+
+/* TAPQ register GR2 response struct */
+struct ap_tapq_gr2 {
+	union {
+		unsigned long value;
+		struct {
+			unsigned int fac    : 32; /* facility bits */
+			unsigned int apinfo : 32; /* ap type, ... */
+		};
+		struct {
+			unsigned int s	   :  1; /* APSC */
+			unsigned int m	   :  1; /* AP4KM */
+			unsigned int c	   :  1; /* AP4KC */
+			unsigned int mode  :  3;
+			unsigned int n	   :  1; /* APXA */
+			unsigned int	   :  1;
+			unsigned int class :  8;
+			unsigned int bs	   :  2; /* SE bind/assoc */
+			unsigned int	   : 14;
+			unsigned int at	   :  8; /* ap type */
+			unsigned int nd	   :  8; /* nr of domains */
+			unsigned int	   :  4;
+			unsigned int ml	   :  4; /* apxl ml */
+			unsigned int	   :  4;
+			unsigned int qd	   :  4; /* queue depth */
+		};
+	};
+};
+
+/*
+ * Convenience defines to be used with the bs field from struct ap_tapq_gr2
+ */
+#define AP_BS_Q_USABLE		      0
+#define AP_BS_Q_USABLE_NO_SECURE_KEY  1
+#define AP_BS_Q_AVAIL_FOR_BINDING     2
+#define AP_BS_Q_UNUSABLE	      3
+
+/**
+ * ap_tapq(): Test adjunct processor queue.
+ * @qid: The AP queue number
+ * @info: Pointer to queue descriptor
+ *
+ * Returns AP queue status structure.
+ */
+static inline struct ap_queue_status ap_tapq(ap_qid_t qid, struct ap_tapq_gr2 *info)
+{
+	union ap_queue_status_reg reg1;
+	unsigned long reg2;
+
+	asm volatile(
+		"	lgr	0,%[qid]\n"		/* qid into gr0 */
+		"	lghi	2,0\n"			/* 0 into gr2 */
+		"	.insn	rre,0xb2af0000,0,0\n"	/* PQAP(TAPQ) */
+		"	lgr	%[reg1],1\n"		/* gr1 (status) into reg1 */
+		"	lgr	%[reg2],2\n"		/* gr2 into reg2 */
+		: [reg1] "=&d" (reg1.value), [reg2] "=&d" (reg2)
+		: [qid] "d" (qid)
+		: "cc", "0", "1", "2");
+	if (info)
+		info->value = reg2;
+	return reg1.status;
+}
+
+/**
+ * ap_test_queue(): Test adjunct processor queue.
+ * @qid: The AP queue number
+ * @tbit: Test facilities bit
+ * @info: Ptr to tapq gr2 struct
+ *
+ * Returns AP queue status structure.
+ */
+static inline struct ap_queue_status ap_test_queue(ap_qid_t qid, int tbit,
+						   struct ap_tapq_gr2 *info)
+{
+	if (tbit)
+		qid |= 1UL << 23; /* set T bit*/
+	return ap_tapq(qid, info);
+}
+
+/**
+ * ap_pqap_rapq(): Reset adjunct processor queue.
+ * @qid: The AP queue number
+ * @fbit: if != 0 set F bit
+ *
+ * Returns AP queue status structure.
+ */
+static inline struct ap_queue_status ap_rapq(ap_qid_t qid, int fbit)
+{
+	unsigned long reg0 = qid | (1UL << 24);  /* fc 1UL is RAPQ */
+	union ap_queue_status_reg reg1;
+
+	if (fbit)
+		reg0 |= 1UL << 22;
+
+	asm volatile(
+		"	lgr	0,%[reg0]\n"		/* qid arg into gr0 */
+		"	.insn	rre,0xb2af0000,0,0\n"	/* PQAP(RAPQ) */
+		"	lgr	%[reg1],1\n"		/* gr1 (status) into reg1 */
+		: [reg1] "=&d" (reg1.value)
+		: [reg0] "d" (reg0)
+		: "cc", "0", "1");
+	return reg1.status;
+}
+
+/**
+ * ap_pqap_zapq(): Reset and zeroize adjunct processor queue.
+ * @qid: The AP queue number
+ * @fbit: if != 0 set F bit
+ *
+ * Returns AP queue status structure.
+ */
+static inline struct ap_queue_status ap_zapq(ap_qid_t qid, int fbit)
+{
+	unsigned long reg0 = qid | (2UL << 24);  /* fc 2UL is ZAPQ */
+	union ap_queue_status_reg reg1;
+
+	if (fbit)
+		reg0 |= 1UL << 22;
+
+	asm volatile(
+		"	lgr	0,%[reg0]\n"		/* qid arg into gr0 */
+		"	.insn	rre,0xb2af0000,0,0\n"	/* PQAP(ZAPQ) */
+		"	lgr	%[reg1],1\n"		/* gr1 (status) into reg1 */
+		: [reg1] "=&d" (reg1.value)
+		: [reg0] "d" (reg0)
+		: "cc", "0", "1");
+	return reg1.status;
+}
+
+/**
+ * struct ap_config_info - convenience struct for AP crypto
+ * config info as returned by the ap_qci() function.
+ */
+struct ap_config_info {
+	unsigned int apsc	 : 1;	/* S bit */
+	unsigned int apxa	 : 1;	/* N bit */
+	unsigned int qact	 : 1;	/* C bit */
+	unsigned int rc8a	 : 1;	/* R bit */
+	unsigned int		 : 4;
+	unsigned int apsb	 : 1;	/* B bit */
+	unsigned int		 : 23;
+	unsigned char na;		/* max # of APs - 1 */
+	unsigned char nd;		/* max # of Domains - 1 */
+	unsigned char _reserved0[10];
+	unsigned int apm[8];		/* AP ID mask */
+	unsigned int aqm[8];		/* AP (usage) queue mask */
+	unsigned int adm[8];		/* AP (control) domain mask */
+	unsigned char _reserved1[16];
+} __aligned(8);
+
+/**
+ * ap_qci(): Get AP configuration data
+ *
+ * Returns 0 on success, or -EOPNOTSUPP.
+ */
+static inline int ap_qci(struct ap_config_info *config)
+{
+	unsigned long reg0 = 4UL << 24;  /* fc 4UL is QCI */
+	unsigned long reg1 = -EOPNOTSUPP;
+	struct ap_config_info *reg2 = config;
+
+	asm volatile(
+		"	lgr	0,%[reg0]\n"		/* QCI fc into gr0 */
+		"	lgr	2,%[reg2]\n"		/* ptr to config into gr2 */
+		"	.insn	rre,0xb2af0000,0,0\n"	/* PQAP(QCI) */
+		"0:	la	%[reg1],0\n"		/* good case, QCI fc available */
+		"1:\n"
+		EX_TABLE(0b, 1b)
+		: [reg1] "+&d" (reg1)
+		: [reg0] "d" (reg0), [reg2] "d" (reg2)
+		: "cc", "memory", "0", "2");
+
+	return reg1;
+}
+
+/*
+ * struct ap_qirq_ctrl - convenient struct for easy invocation
+ * of the ap_aqic() function. This struct is passed as GR1
+ * parameter to the PQAP(AQIC) instruction. For details please
+ * see the AR documentation.
+ */
+union ap_qirq_ctrl {
+	unsigned long value;
+	struct {
+		unsigned int	   : 8;
+		unsigned int zone  : 8;	/* zone info */
+		unsigned int ir	   : 1;	/* ir flag: enable (1) or disable (0) irq */
+		unsigned int	   : 4;
+		unsigned int gisc  : 3;	/* guest isc field */
+		unsigned int	   : 6;
+		unsigned int gf	   : 2;	/* gisa format */
+		unsigned int	   : 1;
+		unsigned int gisa  : 27;	/* gisa origin */
+		unsigned int	   : 1;
+		unsigned int isc   : 3;	/* irq sub class */
+	};
+};
+
+/**
+ * ap_aqic(): Control interruption for a specific AP.
+ * @qid: The AP queue number
+ * @qirqctrl: struct ap_qirq_ctrl (64 bit value)
+ * @pa_ind: Physical address of the notification indicator byte
+ *
+ * Returns AP queue status.
+ */
+static inline struct ap_queue_status ap_aqic(ap_qid_t qid,
+					     union ap_qirq_ctrl qirqctrl,
+					     phys_addr_t pa_ind)
+{
+	unsigned long reg0 = qid | (3UL << 24);  /* fc 3UL is AQIC */
+	union ap_queue_status_reg reg1;
+	unsigned long reg2 = pa_ind;
+
+	reg1.value = qirqctrl.value;
+
+	asm volatile(
+		"	lgr	0,%[reg0]\n"		/* qid param into gr0 */
+		"	lgr	1,%[reg1]\n"		/* irq ctrl into gr1 */
+		"	lgr	2,%[reg2]\n"		/* ni addr into gr2 */
+		"	.insn	rre,0xb2af0000,0,0\n"	/* PQAP(AQIC) */
+		"	lgr	%[reg1],1\n"		/* gr1 (status) into reg1 */
+		: [reg1] "+&d" (reg1.value)
+		: [reg0] "d" (reg0), [reg2] "d" (reg2)
+		: "cc", "memory", "0", "1", "2");
+
+	return reg1.status;
+}
+
+/*
+ * union ap_qact_ap_info - used together with the
+ * ap_aqic() function to provide a convenient way
+ * to handle the ap info needed by the qact function.
+ */
+union ap_qact_ap_info {
+	unsigned long val;
+	struct {
+		unsigned int	  : 3;
+		unsigned int mode : 3;
+		unsigned int	  : 26;
+		unsigned int cat  : 8;
+		unsigned int	  : 8;
+		unsigned char ver[2];
+	};
+};
+
+/**
+ * ap_qact(): Query AP compatibility type.
+ * @qid: The AP queue number
+ * @apinfo: On input the info about the AP queue. On output the
+ *	    alternate AP queue info provided by the qact function
+ *	    in GR2 is stored in.
+ *
+ * Returns AP queue status. Check response_code field for failures.
+ */
+static inline struct ap_queue_status ap_qact(ap_qid_t qid, int ifbit,
+					     union ap_qact_ap_info *apinfo)
+{
+	unsigned long reg0 = qid | (5UL << 24) | ((ifbit & 0x01) << 22);
+	union ap_queue_status_reg reg1;
+	unsigned long reg2;
+
+	reg1.value = apinfo->val;
+
+	asm volatile(
+		"	lgr	0,%[reg0]\n"		/* qid param into gr0 */
+		"	lgr	1,%[reg1]\n"		/* qact in info into gr1 */
+		"	.insn	rre,0xb2af0000,0,0\n"	/* PQAP(QACT) */
+		"	lgr	%[reg1],1\n"		/* gr1 (status) into reg1 */
+		"	lgr	%[reg2],2\n"		/* qact out info into reg2 */
+		: [reg1] "+&d" (reg1.value), [reg2] "=&d" (reg2)
+		: [reg0] "d" (reg0)
+		: "cc", "0", "1", "2");
+	apinfo->val = reg2;
+	return reg1.status;
+}
+
+/*
+ * ap_bapq(): SE bind AP queue.
+ * @qid: The AP queue number
+ *
+ * Returns AP queue status structure.
+ *
+ * Invoking this function in a non-SE environment
+ * may case a specification exception.
+ */
+static inline struct ap_queue_status ap_bapq(ap_qid_t qid)
+{
+	unsigned long reg0 = qid | (7UL << 24);  /* fc 7 is BAPQ */
+	union ap_queue_status_reg reg1;
+
+	asm volatile(
+		"	lgr	0,%[reg0]\n"		/* qid arg into gr0 */
+		"	.insn	rre,0xb2af0000,0,0\n"	/* PQAP(BAPQ) */
+		"	lgr	%[reg1],1\n"		/* gr1 (status) into reg1 */
+		: [reg1] "=&d" (reg1.value)
+		: [reg0] "d" (reg0)
+		: "cc", "0", "1");
+
+	return reg1.status;
+}
+
+/*
+ * ap_aapq(): SE associate AP queue.
+ * @qid: The AP queue number
+ * @sec_idx: The secret index
+ *
+ * Returns AP queue status structure.
+ *
+ * Invoking this function in a non-SE environment
+ * may case a specification exception.
+ */
+static inline struct ap_queue_status ap_aapq(ap_qid_t qid, unsigned int sec_idx)
+{
+	unsigned long reg0 = qid | (8UL << 24);  /* fc 8 is AAPQ */
+	unsigned long reg2 = sec_idx;
+	union ap_queue_status_reg reg1;
+
+	asm volatile(
+		"	lgr	0,%[reg0]\n"		/* qid arg into gr0 */
+		"	lgr	2,%[reg2]\n"		/* secret index into gr2 */
+		"	.insn	rre,0xb2af0000,0,0\n"	/* PQAP(AAPQ) */
+		"	lgr	%[reg1],1\n"		/* gr1 (status) into reg1 */
+		: [reg1] "=&d" (reg1.value)
+		: [reg0] "d" (reg0), [reg2] "d" (reg2)
+		: "cc", "0", "1", "2");
+
+	return reg1.status;
+}
+
+/**
+ * ap_nqap(): Send message to adjunct processor queue.
+ * @qid: The AP queue number
+ * @psmid: The program supplied message identifier
+ * @msg: The message text
+ * @length: The message length
+ *
+ * Returns AP queue status structure.
+ * Condition code 1 on NQAP can't happen because the L bit is 1.
+ * Condition code 2 on NQAP also means the send is incomplete,
+ * because a segment boundary was reached. The NQAP is repeated.
+ */
+static inline struct ap_queue_status ap_nqap(ap_qid_t qid,
+					     unsigned long long psmid,
+					     void *msg, size_t length)
+{
+	unsigned long reg0 = qid | 0x40000000UL;  /* 0x4... is last msg part */
+	union register_pair nqap_r1, nqap_r2;
+	union ap_queue_status_reg reg1;
+
+	nqap_r1.even = (unsigned int)(psmid >> 32);
+	nqap_r1.odd  = psmid & 0xffffffff;
+	nqap_r2.even = (unsigned long)msg;
+	nqap_r2.odd  = (unsigned long)length;
+
+	asm volatile (
+		"	lgr	0,%[reg0]\n"  /* qid param in gr0 */
+		"0:	.insn	rre,0xb2ad0000,%[nqap_r1],%[nqap_r2]\n"
+		"	brc	2,0b\n"       /* handle partial completion */
+		"	lgr	%[reg1],1\n"  /* gr1 (status) into reg1 */
+		: [reg0] "+&d" (reg0), [reg1] "=&d" (reg1.value),
+		  [nqap_r2] "+&d" (nqap_r2.pair)
+		: [nqap_r1] "d" (nqap_r1.pair)
+		: "cc", "memory", "0", "1");
+	return reg1.status;
+}
+
+/**
+ * ap_dqap(): Receive message from adjunct processor queue.
+ * @qid: The AP queue number
+ * @psmid: Pointer to program supplied message identifier
+ * @msg: Pointer to message buffer
+ * @msglen: Message buffer size
+ * @length: Pointer to length of actually written bytes
+ * @reslength: Residual length on return
+ * @resgr0: input: gr0 value (only used if != 0), output: residual gr0 content
+ *
+ * Returns AP queue status structure.
+ * Condition code 1 on DQAP means the receive has taken place
+ * but only partially.	The response is incomplete, hence the
+ * DQAP is repeated.
+ * Condition code 2 on DQAP also means the receive is incomplete,
+ * this time because a segment boundary was reached. Again, the
+ * DQAP is repeated.
+ * Note that gpr2 is used by the DQAP instruction to keep track of
+ * any 'residual' length, in case the instruction gets interrupted.
+ * Hence it gets zeroed before the instruction.
+ * If the message does not fit into the buffer, this function will
+ * return with a truncated message and the reply in the firmware queue
+ * is not removed. This is indicated to the caller with an
+ * ap_queue_status response_code value of all bits on (0xFF) and (if
+ * the reslength ptr is given) the remaining length is stored in
+ * *reslength and (if the resgr0 ptr is given) the updated gr0 value
+ * for further processing of this msg entry is stored in *resgr0. The
+ * caller needs to detect this situation and should invoke ap_dqap
+ * with a valid resgr0 ptr and a value in there != 0 to indicate that
+ * *resgr0 is to be used instead of qid to further process this entry.
+ */
+static inline struct ap_queue_status ap_dqap(ap_qid_t qid,
+					     unsigned long *psmid,
+					     void *msg, size_t msglen,
+					     size_t *length,
+					     size_t *reslength,
+					     unsigned long *resgr0)
+{
+	unsigned long reg0 = resgr0 && *resgr0 ? *resgr0 : qid | 0x80000000UL;
+	union ap_queue_status_reg reg1;
+	unsigned long reg2;
+	union register_pair rp1, rp2;
+
+	rp1.even = 0UL;
+	rp1.odd  = 0UL;
+	rp2.even = (unsigned long)msg;
+	rp2.odd  = (unsigned long)msglen;
+
+	asm volatile(
+		"	lgr	0,%[reg0]\n"   /* qid param into gr0 */
+		"	lghi	2,0\n"	       /* 0 into gr2 (res length) */
+		"0:	ltgr	%N[rp2],%N[rp2]\n" /* check buf len */
+		"	jz	2f\n"	       /* go out if buf len is 0 */
+		"1:	.insn	rre,0xb2ae0000,%[rp1],%[rp2]\n"
+		"	brc	6,0b\n"        /* handle partial complete */
+		"2:	lgr	%[reg0],0\n"   /* gr0 (qid + info) into reg0 */
+		"	lgr	%[reg1],1\n"   /* gr1 (status) into reg1 */
+		"	lgr	%[reg2],2\n"   /* gr2 (res length) into reg2 */
+		: [reg0] "+&d" (reg0), [reg1] "=&d" (reg1.value),
+		  [reg2] "=&d" (reg2), [rp1] "+&d" (rp1.pair),
+		  [rp2] "+&d" (rp2.pair)
+		:
+		: "cc", "memory", "0", "1", "2");
+
+	if (reslength)
+		*reslength = reg2;
+	if (reg2 != 0 && rp2.odd == 0) {
+		/*
+		 * Partially complete, status in gr1 is not set.
+		 * Signal the caller that this dqap is only partially received
+		 * with a special status response code 0xFF and *resgr0 updated
+		 */
+		reg1.status.response_code = 0xFF;
+		if (resgr0)
+			*resgr0 = reg0;
+	} else {
+		*psmid = (rp1.even << 32) + rp1.odd;
+		if (resgr0)
+			*resgr0 = 0;
+	}
+
+	/* update *length with the nr of bytes stored into the msg buffer */
+	if (length)
+		*length = msglen - rp2.odd;
+
+	return reg1.status;
+}
+
+/*
+ * Interface to tell the AP bus code that a configuration
+ * change has happened. The bus code should at least do
+ * an ap bus resource rescan.
+ */
+#if IS_ENABLED(CONFIG_ZCRYPT)
+void ap_bus_cfg_chg(void);
+#else
+static inline void ap_bus_cfg_chg(void){}
+#endif
+
+#endif /* _ASM_S390_AP_H_ */
diff --git a/arch/s390/include/asm/appldata.h b/arch/s390/include/asm/appldata.h
new file mode 100644
index 0000000000..f2240392c7
--- /dev/null
+++ b/arch/s390/include/asm/appldata.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2006
+ *
+ * Author(s): Melissa Howland <melissah@us.ibm.com>
+ */
+
+#ifndef _ASM_S390_APPLDATA_H
+#define _ASM_S390_APPLDATA_H
+
+#include <linux/io.h>
+#include <asm/diag.h>
+
+#define APPLDATA_START_INTERVAL_REC	0x80
+#define APPLDATA_STOP_REC		0x81
+#define APPLDATA_GEN_EVENT_REC		0x82
+#define APPLDATA_START_CONFIG_REC	0x83
+
+/*
+ * Parameter list for DIAGNOSE X'DC'
+ */
+struct appldata_parameter_list {
+	u16 diag;
+	u8  function;
+	u8  parlist_length;
+	u32 unused01;
+	u16 reserved;
+	u16 buffer_length;
+	u32 unused02;
+	u64 product_id_addr;
+	u64 buffer_addr;
+} __attribute__ ((packed));
+
+struct appldata_product_id {
+	char prod_nr[7];	/* product number */
+	u16  prod_fn;		/* product function */
+	u8   record_nr; 	/* record number */
+	u16  version_nr;	/* version */
+	u16  release_nr;	/* release */
+	u16  mod_lvl;		/* modification level */
+} __attribute__ ((packed));
+
+
+static inline int appldata_asm(struct appldata_parameter_list *parm_list,
+			       struct appldata_product_id *id,
+			       unsigned short fn, void *buffer,
+			       unsigned short length)
+{
+	int ry;
+
+	if (!MACHINE_IS_VM)
+		return -EOPNOTSUPP;
+	parm_list->diag = 0xdc;
+	parm_list->function = fn;
+	parm_list->parlist_length = sizeof(*parm_list);
+	parm_list->buffer_length = length;
+	parm_list->product_id_addr = (unsigned long) id;
+	parm_list->buffer_addr = virt_to_phys(buffer);
+	diag_stat_inc(DIAG_STAT_X0DC);
+	asm volatile(
+		"	diag	%1,%0,0xdc"
+		: "=d" (ry)
+		: "d" (parm_list), "m" (*parm_list), "m" (*id)
+		: "cc");
+	return ry;
+}
+
+#endif /* _ASM_S390_APPLDATA_H */
diff --git a/arch/s390/include/asm/archrandom.h b/arch/s390/include/asm/archrandom.h
new file mode 100644
index 0000000000..1594049893
--- /dev/null
+++ b/arch/s390/include/asm/archrandom.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Kernel interface for the s390 arch_random_* functions
+ *
+ * Copyright IBM Corp. 2017, 2022
+ *
+ * Author: Harald Freudenberger <freude@de.ibm.com>
+ *
+ */
+
+#ifndef _ASM_S390_ARCHRANDOM_H
+#define _ASM_S390_ARCHRANDOM_H
+
+#include <linux/static_key.h>
+#include <linux/preempt.h>
+#include <linux/atomic.h>
+#include <asm/cpacf.h>
+
+DECLARE_STATIC_KEY_FALSE(s390_arch_random_available);
+extern atomic64_t s390_arch_random_counter;
+
+static inline size_t __must_check arch_get_random_longs(unsigned long *v, size_t max_longs)
+{
+	return 0;
+}
+
+static inline size_t __must_check arch_get_random_seed_longs(unsigned long *v, size_t max_longs)
+{
+	if (static_branch_likely(&s390_arch_random_available) &&
+	    in_task()) {
+		cpacf_trng(NULL, 0, (u8 *)v, max_longs * sizeof(*v));
+		atomic64_add(max_longs * sizeof(*v), &s390_arch_random_counter);
+		return max_longs;
+	}
+	return 0;
+}
+
+#endif /* _ASM_S390_ARCHRANDOM_H */
diff --git a/arch/s390/include/asm/asm-const.h b/arch/s390/include/asm/asm-const.h
new file mode 100644
index 0000000000..11f615eb00
--- /dev/null
+++ b/arch/s390/include/asm/asm-const.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_ASM_CONST_H
+#define _ASM_S390_ASM_CONST_H
+
+#ifdef __ASSEMBLY__
+#  define stringify_in_c(...)	__VA_ARGS__
+#else
+/* This version of stringify will deal with commas... */
+#  define __stringify_in_c(...)	#__VA_ARGS__
+#  define stringify_in_c(...)	__stringify_in_c(__VA_ARGS__) " "
+#endif
+#endif /* _ASM_S390_ASM_CONST_H */
diff --git a/arch/s390/include/asm/asm-extable.h b/arch/s390/include/asm/asm-extable.h
new file mode 100644
index 0000000000..e6532477f1
--- /dev/null
+++ b/arch/s390/include/asm/asm-extable.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_EXTABLE_H
+#define __ASM_EXTABLE_H
+
+#include <linux/stringify.h>
+#include <linux/bits.h>
+#include <asm/asm-const.h>
+
+#define EX_TYPE_NONE		0
+#define EX_TYPE_FIXUP		1
+#define EX_TYPE_BPF		2
+#define EX_TYPE_UA_STORE	3
+#define EX_TYPE_UA_LOAD_MEM	4
+#define EX_TYPE_UA_LOAD_REG	5
+#define EX_TYPE_UA_LOAD_REGPAIR	6
+
+#define EX_DATA_REG_ERR_SHIFT	0
+#define EX_DATA_REG_ERR		GENMASK(3, 0)
+
+#define EX_DATA_REG_ADDR_SHIFT	4
+#define EX_DATA_REG_ADDR	GENMASK(7, 4)
+
+#define EX_DATA_LEN_SHIFT	8
+#define EX_DATA_LEN		GENMASK(11, 8)
+
+#define __EX_TABLE(_section, _fault, _target, _type)			\
+	stringify_in_c(.section	_section,"a";)				\
+	stringify_in_c(.balign	4;)					\
+	stringify_in_c(.long	(_fault) - .;)				\
+	stringify_in_c(.long	(_target) - .;)				\
+	stringify_in_c(.short	(_type);)				\
+	stringify_in_c(.short	0;)					\
+	stringify_in_c(.previous)
+
+#define __EX_TABLE_UA(_section, _fault, _target, _type, _regerr, _regaddr, _len)\
+	stringify_in_c(.section _section,"a";)					\
+	stringify_in_c(.balign	4;)						\
+	stringify_in_c(.long	(_fault) - .;)					\
+	stringify_in_c(.long	(_target) - .;)					\
+	stringify_in_c(.short	(_type);)					\
+	stringify_in_c(.macro	extable_reg regerr, regaddr;)			\
+	stringify_in_c(.set	.Lfound, 0;)					\
+	stringify_in_c(.set	.Lcurr, 0;)					\
+	stringify_in_c(.irp	rs,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15;)	\
+	stringify_in_c(		.ifc	"\regerr", "%%r\rs";)			\
+	stringify_in_c(			.set	.Lfound, 1;)			\
+	stringify_in_c(			.set	.Lregerr, .Lcurr;)		\
+	stringify_in_c(		.endif;)					\
+	stringify_in_c(		.set	.Lcurr, .Lcurr+1;)			\
+	stringify_in_c(.endr;)							\
+	stringify_in_c(.ifne	(.Lfound != 1);)				\
+	stringify_in_c(		.error	"extable_reg: bad register argument1";)	\
+	stringify_in_c(.endif;)							\
+	stringify_in_c(.set	.Lfound, 0;)					\
+	stringify_in_c(.set	.Lcurr, 0;)					\
+	stringify_in_c(.irp	rs,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15;)	\
+	stringify_in_c(		.ifc	"\regaddr", "%%r\rs";)			\
+	stringify_in_c(			.set	.Lfound, 1;)			\
+	stringify_in_c(			.set	.Lregaddr, .Lcurr;)		\
+	stringify_in_c(		.endif;)					\
+	stringify_in_c(		.set	.Lcurr, .Lcurr+1;)			\
+	stringify_in_c(.endr;)							\
+	stringify_in_c(.ifne	(.Lfound != 1);)				\
+	stringify_in_c(		.error	"extable_reg: bad register argument2";)	\
+	stringify_in_c(.endif;)							\
+	stringify_in_c(.short	.Lregerr << EX_DATA_REG_ERR_SHIFT |		\
+				.Lregaddr << EX_DATA_REG_ADDR_SHIFT |		\
+				_len << EX_DATA_LEN_SHIFT;)			\
+	stringify_in_c(.endm;)							\
+	stringify_in_c(extable_reg _regerr,_regaddr;)				\
+	stringify_in_c(.purgem	extable_reg;)					\
+	stringify_in_c(.previous)
+
+#define EX_TABLE(_fault, _target)					\
+	__EX_TABLE(__ex_table, _fault, _target, EX_TYPE_FIXUP)
+
+#define EX_TABLE_AMODE31(_fault, _target)				\
+	__EX_TABLE(.amode31.ex_table, _fault, _target, EX_TYPE_FIXUP)
+
+#define EX_TABLE_UA_STORE(_fault, _target, _regerr)			\
+	__EX_TABLE_UA(__ex_table, _fault, _target, EX_TYPE_UA_STORE, _regerr, _regerr, 0)
+
+#define EX_TABLE_UA_LOAD_MEM(_fault, _target, _regerr, _regmem, _len)	\
+	__EX_TABLE_UA(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_MEM, _regerr, _regmem, _len)
+
+#define EX_TABLE_UA_LOAD_REG(_fault, _target, _regerr, _regzero)	\
+	__EX_TABLE_UA(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_REG, _regerr, _regzero, 0)
+
+#define EX_TABLE_UA_LOAD_REGPAIR(_fault, _target, _regerr, _regzero)	\
+	__EX_TABLE_UA(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_REGPAIR, _regerr, _regzero, 0)
+
+#endif /* __ASM_EXTABLE_H */
diff --git a/arch/s390/include/asm/asm-prototypes.h b/arch/s390/include/asm/asm-prototypes.h
new file mode 100644
index 0000000000..a873e873e1
--- /dev/null
+++ b/arch/s390/include/asm/asm-prototypes.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_PROTOTYPES_H
+
+#include <linux/kvm_host.h>
+#include <linux/ftrace.h>
+#include <asm/fpu/api.h>
+#include <asm-generic/asm-prototypes.h>
+
+__int128_t __ashlti3(__int128_t a, int b);
+__int128_t __ashrti3(__int128_t a, int b);
+__int128_t __lshrti3(__int128_t a, int b);
+
+#endif /* _ASM_S390_PROTOTYPES_H */
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
new file mode 100644
index 0000000000..7138d189cc
--- /dev/null
+++ b/arch/s390/include/asm/atomic.h
@@ -0,0 +1,150 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 1999, 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ *	      Denis Joseph Barrow,
+ *	      Arnd Bergmann,
+ */
+
+#ifndef __ARCH_S390_ATOMIC__
+#define __ARCH_S390_ATOMIC__
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <asm/atomic_ops.h>
+#include <asm/barrier.h>
+#include <asm/cmpxchg.h>
+
+static inline int arch_atomic_read(const atomic_t *v)
+{
+	return __atomic_read(v);
+}
+#define arch_atomic_read arch_atomic_read
+
+static inline void arch_atomic_set(atomic_t *v, int i)
+{
+	__atomic_set(v, i);
+}
+#define arch_atomic_set arch_atomic_set
+
+static inline int arch_atomic_add_return(int i, atomic_t *v)
+{
+	return __atomic_add_barrier(i, &v->counter) + i;
+}
+#define arch_atomic_add_return arch_atomic_add_return
+
+static inline int arch_atomic_fetch_add(int i, atomic_t *v)
+{
+	return __atomic_add_barrier(i, &v->counter);
+}
+#define arch_atomic_fetch_add arch_atomic_fetch_add
+
+static inline void arch_atomic_add(int i, atomic_t *v)
+{
+	__atomic_add(i, &v->counter);
+}
+#define arch_atomic_add arch_atomic_add
+
+#define arch_atomic_sub(_i, _v)		arch_atomic_add(-(int)(_i), _v)
+#define arch_atomic_sub_return(_i, _v)	arch_atomic_add_return(-(int)(_i), _v)
+#define arch_atomic_fetch_sub(_i, _v)	arch_atomic_fetch_add(-(int)(_i), _v)
+
+#define ATOMIC_OPS(op)							\
+static inline void arch_atomic_##op(int i, atomic_t *v)			\
+{									\
+	__atomic_##op(i, &v->counter);					\
+}									\
+static inline int arch_atomic_fetch_##op(int i, atomic_t *v)		\
+{									\
+	return __atomic_##op##_barrier(i, &v->counter);			\
+}
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
+
+#undef ATOMIC_OPS
+
+#define arch_atomic_and			arch_atomic_and
+#define arch_atomic_or			arch_atomic_or
+#define arch_atomic_xor			arch_atomic_xor
+#define arch_atomic_fetch_and		arch_atomic_fetch_and
+#define arch_atomic_fetch_or		arch_atomic_fetch_or
+#define arch_atomic_fetch_xor		arch_atomic_fetch_xor
+
+#define arch_atomic_xchg(v, new)	(arch_xchg(&((v)->counter), new))
+
+static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
+{
+	return __atomic_cmpxchg(&v->counter, old, new);
+}
+#define arch_atomic_cmpxchg arch_atomic_cmpxchg
+
+#define ATOMIC64_INIT(i)  { (i) }
+
+static inline s64 arch_atomic64_read(const atomic64_t *v)
+{
+	return __atomic64_read(v);
+}
+#define arch_atomic64_read arch_atomic64_read
+
+static inline void arch_atomic64_set(atomic64_t *v, s64 i)
+{
+	__atomic64_set(v, i);
+}
+#define arch_atomic64_set arch_atomic64_set
+
+static inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
+{
+	return __atomic64_add_barrier(i, (long *)&v->counter) + i;
+}
+#define arch_atomic64_add_return arch_atomic64_add_return
+
+static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
+{
+	return __atomic64_add_barrier(i, (long *)&v->counter);
+}
+#define arch_atomic64_fetch_add arch_atomic64_fetch_add
+
+static inline void arch_atomic64_add(s64 i, atomic64_t *v)
+{
+	__atomic64_add(i, (long *)&v->counter);
+}
+#define arch_atomic64_add arch_atomic64_add
+
+#define arch_atomic64_xchg(v, new)	(arch_xchg(&((v)->counter), new))
+
+static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
+{
+	return __atomic64_cmpxchg((long *)&v->counter, old, new);
+}
+#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg
+
+#define ATOMIC64_OPS(op)						\
+static inline void arch_atomic64_##op(s64 i, atomic64_t *v)		\
+{									\
+	__atomic64_##op(i, (long *)&v->counter);			\
+}									\
+static inline long arch_atomic64_fetch_##op(s64 i, atomic64_t *v)	\
+{									\
+	return __atomic64_##op##_barrier(i, (long *)&v->counter);	\
+}
+
+ATOMIC64_OPS(and)
+ATOMIC64_OPS(or)
+ATOMIC64_OPS(xor)
+
+#undef ATOMIC64_OPS
+
+#define arch_atomic64_and		arch_atomic64_and
+#define arch_atomic64_or		arch_atomic64_or
+#define arch_atomic64_xor		arch_atomic64_xor
+#define arch_atomic64_fetch_and		arch_atomic64_fetch_and
+#define arch_atomic64_fetch_or		arch_atomic64_fetch_or
+#define arch_atomic64_fetch_xor		arch_atomic64_fetch_xor
+
+#define arch_atomic64_sub_return(_i, _v) arch_atomic64_add_return(-(s64)(_i), _v)
+#define arch_atomic64_fetch_sub(_i, _v)  arch_atomic64_fetch_add(-(s64)(_i), _v)
+#define arch_atomic64_sub(_i, _v)	 arch_atomic64_add(-(s64)(_i), _v)
+
+#endif /* __ARCH_S390_ATOMIC__  */
diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h
new file mode 100644
index 0000000000..50510e08b8
--- /dev/null
+++ b/arch/s390/include/asm/atomic_ops.h
@@ -0,0 +1,201 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Low level function for atomic operations
+ *
+ * Copyright IBM Corp. 1999, 2016
+ */
+
+#ifndef __ARCH_S390_ATOMIC_OPS__
+#define __ARCH_S390_ATOMIC_OPS__
+
+static inline int __atomic_read(const atomic_t *v)
+{
+	int c;
+
+	asm volatile(
+		"	l	%0,%1\n"
+		: "=d" (c) : "R" (v->counter));
+	return c;
+}
+
+static inline void __atomic_set(atomic_t *v, int i)
+{
+	asm volatile(
+		"	st	%1,%0\n"
+		: "=R" (v->counter) : "d" (i));
+}
+
+static inline s64 __atomic64_read(const atomic64_t *v)
+{
+	s64 c;
+
+	asm volatile(
+		"	lg	%0,%1\n"
+		: "=d" (c) : "RT" (v->counter));
+	return c;
+}
+
+static inline void __atomic64_set(atomic64_t *v, s64 i)
+{
+	asm volatile(
+		"	stg	%1,%0\n"
+		: "=RT" (v->counter) : "d" (i));
+}
+
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+
+#define __ATOMIC_OP(op_name, op_type, op_string, op_barrier)		\
+static inline op_type op_name(op_type val, op_type *ptr)		\
+{									\
+	op_type old;							\
+									\
+	asm volatile(							\
+		op_string "	%[old],%[val],%[ptr]\n"			\
+		op_barrier						\
+		: [old] "=d" (old), [ptr] "+QS" (*ptr)			\
+		: [val] "d" (val) : "cc", "memory");			\
+	return old;							\
+}									\
+
+#define __ATOMIC_OPS(op_name, op_type, op_string)			\
+	__ATOMIC_OP(op_name, op_type, op_string, "\n")			\
+	__ATOMIC_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n")
+
+__ATOMIC_OPS(__atomic_add, int, "laa")
+__ATOMIC_OPS(__atomic_and, int, "lan")
+__ATOMIC_OPS(__atomic_or,  int, "lao")
+__ATOMIC_OPS(__atomic_xor, int, "lax")
+
+__ATOMIC_OPS(__atomic64_add, long, "laag")
+__ATOMIC_OPS(__atomic64_and, long, "lang")
+__ATOMIC_OPS(__atomic64_or,  long, "laog")
+__ATOMIC_OPS(__atomic64_xor, long, "laxg")
+
+#undef __ATOMIC_OPS
+#undef __ATOMIC_OP
+
+#define __ATOMIC_CONST_OP(op_name, op_type, op_string, op_barrier)	\
+static __always_inline void op_name(op_type val, op_type *ptr)		\
+{									\
+	asm volatile(							\
+		op_string "	%[ptr],%[val]\n"			\
+		op_barrier						\
+		: [ptr] "+QS" (*ptr) : [val] "i" (val) : "cc", "memory");\
+}
+
+#define __ATOMIC_CONST_OPS(op_name, op_type, op_string)			\
+	__ATOMIC_CONST_OP(op_name, op_type, op_string, "\n")		\
+	__ATOMIC_CONST_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n")
+
+__ATOMIC_CONST_OPS(__atomic_add_const, int, "asi")
+__ATOMIC_CONST_OPS(__atomic64_add_const, long, "agsi")
+
+#undef __ATOMIC_CONST_OPS
+#undef __ATOMIC_CONST_OP
+
+#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+#define __ATOMIC_OP(op_name, op_string)					\
+static inline int op_name(int val, int *ptr)				\
+{									\
+	int old, new;							\
+									\
+	asm volatile(							\
+		"0:	lr	%[new],%[old]\n"			\
+		op_string "	%[new],%[val]\n"			\
+		"	cs	%[old],%[new],%[ptr]\n"			\
+		"	jl	0b"					\
+		: [old] "=d" (old), [new] "=&d" (new), [ptr] "+Q" (*ptr)\
+		: [val] "d" (val), "0" (*ptr) : "cc", "memory");	\
+	return old;							\
+}
+
+#define __ATOMIC_OPS(op_name, op_string)				\
+	__ATOMIC_OP(op_name, op_string)					\
+	__ATOMIC_OP(op_name##_barrier, op_string)
+
+__ATOMIC_OPS(__atomic_add, "ar")
+__ATOMIC_OPS(__atomic_and, "nr")
+__ATOMIC_OPS(__atomic_or,  "or")
+__ATOMIC_OPS(__atomic_xor, "xr")
+
+#undef __ATOMIC_OPS
+
+#define __ATOMIC64_OP(op_name, op_string)				\
+static inline long op_name(long val, long *ptr)				\
+{									\
+	long old, new;							\
+									\
+	asm volatile(							\
+		"0:	lgr	%[new],%[old]\n"			\
+		op_string "	%[new],%[val]\n"			\
+		"	csg	%[old],%[new],%[ptr]\n"			\
+		"	jl	0b"					\
+		: [old] "=d" (old), [new] "=&d" (new), [ptr] "+QS" (*ptr)\
+		: [val] "d" (val), "0" (*ptr) : "cc", "memory");	\
+	return old;							\
+}
+
+#define __ATOMIC64_OPS(op_name, op_string)				\
+	__ATOMIC64_OP(op_name, op_string)				\
+	__ATOMIC64_OP(op_name##_barrier, op_string)
+
+__ATOMIC64_OPS(__atomic64_add, "agr")
+__ATOMIC64_OPS(__atomic64_and, "ngr")
+__ATOMIC64_OPS(__atomic64_or,  "ogr")
+__ATOMIC64_OPS(__atomic64_xor, "xgr")
+
+#undef __ATOMIC64_OPS
+
+#define __atomic_add_const(val, ptr)		__atomic_add(val, ptr)
+#define __atomic_add_const_barrier(val, ptr)	__atomic_add(val, ptr)
+#define __atomic64_add_const(val, ptr)		__atomic64_add(val, ptr)
+#define __atomic64_add_const_barrier(val, ptr)	__atomic64_add(val, ptr)
+
+#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+static inline int __atomic_cmpxchg(int *ptr, int old, int new)
+{
+	asm volatile(
+		"	cs	%[old],%[new],%[ptr]"
+		: [old] "+d" (old), [ptr] "+Q" (*ptr)
+		: [new] "d" (new)
+		: "cc", "memory");
+	return old;
+}
+
+static inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new)
+{
+	int old_expected = old;
+
+	asm volatile(
+		"	cs	%[old],%[new],%[ptr]"
+		: [old] "+d" (old), [ptr] "+Q" (*ptr)
+		: [new] "d" (new)
+		: "cc", "memory");
+	return old == old_expected;
+}
+
+static inline long __atomic64_cmpxchg(long *ptr, long old, long new)
+{
+	asm volatile(
+		"	csg	%[old],%[new],%[ptr]"
+		: [old] "+d" (old), [ptr] "+QS" (*ptr)
+		: [new] "d" (new)
+		: "cc", "memory");
+	return old;
+}
+
+static inline bool __atomic64_cmpxchg_bool(long *ptr, long old, long new)
+{
+	long old_expected = old;
+
+	asm volatile(
+		"	csg	%[old],%[new],%[ptr]"
+		: [old] "+d" (old), [ptr] "+QS" (*ptr)
+		: [new] "d" (new)
+		: "cc", "memory");
+	return old == old_expected;
+}
+
+#endif /* __ARCH_S390_ATOMIC_OPS__  */
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
new file mode 100644
index 0000000000..82de2a7c41
--- /dev/null
+++ b/arch/s390/include/asm/barrier.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 1999, 2009
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef __ASM_BARRIER_H
+#define __ASM_BARRIER_H
+
+/*
+ * Force strict CPU ordering.
+ * And yes, this is required on UP too when we're talking
+ * to devices.
+ */
+
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+/* Fast-BCR without checkpoint synchronization */
+#define __ASM_BCR_SERIALIZE "bcr 14,0\n"
+#else
+#define __ASM_BCR_SERIALIZE "bcr 15,0\n"
+#endif
+
+static __always_inline void bcr_serialize(void)
+{
+	asm volatile(__ASM_BCR_SERIALIZE : : : "memory");
+}
+
+#define __mb()		bcr_serialize()
+#define __rmb()		barrier()
+#define __wmb()		barrier()
+#define __dma_rmb()	__mb()
+#define __dma_wmb()	__mb()
+#define __smp_mb()	__mb()
+#define __smp_rmb()	__rmb()
+#define __smp_wmb()	__wmb()
+
+#define __smp_store_release(p, v)					\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	barrier();							\
+	WRITE_ONCE(*p, v);						\
+} while (0)
+
+#define __smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = READ_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	barrier();							\
+	___p1;								\
+})
+
+#define __smp_mb__before_atomic()	barrier()
+#define __smp_mb__after_atomic()	barrier()
+
+/**
+ * array_index_mask_nospec - generate a mask for array_idx() that is
+ * ~0UL when the bounds check succeeds and 0 otherwise
+ * @index: array element index
+ * @size: number of elements in array
+ */
+#define array_index_mask_nospec array_index_mask_nospec
+static inline unsigned long array_index_mask_nospec(unsigned long index,
+						    unsigned long size)
+{
+	unsigned long mask;
+
+	if (__builtin_constant_p(size) && size > 0) {
+		asm("	clgr	%2,%1\n"
+		    "	slbgr	%0,%0\n"
+		    :"=d" (mask) : "d" (size-1), "d" (index) :"cc");
+		return mask;
+	}
+	asm("	clgr	%1,%2\n"
+	    "	slbgr	%0,%0\n"
+	    :"=d" (mask) : "d" (size), "d" (index) :"cc");
+	return ~mask;
+}
+
+#include <asm-generic/barrier.h>
+
+#endif /* __ASM_BARRIER_H */
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
new file mode 100644
index 0000000000..2de74fcd05
--- /dev/null
+++ b/arch/s390/include/asm/bitops.h
@@ -0,0 +1,378 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *    Copyright IBM Corp. 1999,2013
+ *
+ *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ *
+ * The description below was taken in large parts from the powerpc
+ * bitops header file:
+ * Within a word, bits are numbered LSB first.  Lot's of places make
+ * this assumption by directly testing bits with (val & (1<<nr)).
+ * This can cause confusion for large (> 1 word) bitmaps on a
+ * big-endian system because, unlike little endian, the number of each
+ * bit depends on the word size.
+ *
+ * The bitop functions are defined to work on unsigned longs, so the bits
+ * end up numbered:
+ *   |63..............0|127............64|191...........128|255...........192|
+ *
+ * We also have special functions which work with an MSB0 encoding.
+ * The bits are numbered:
+ *   |0..............63|64............127|128...........191|192...........255|
+ *
+ * The main difference is that bit 0-63 in the bit number field needs to be
+ * reversed compared to the LSB0 encoded bit fields. This can be achieved by
+ * XOR with 0x3f.
+ *
+ */
+
+#ifndef _S390_BITOPS_H
+#define _S390_BITOPS_H
+
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
+#include <linux/typecheck.h>
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <asm/atomic_ops.h>
+#include <asm/barrier.h>
+
+#define __BITOPS_WORDS(bits) (((bits) + BITS_PER_LONG - 1) / BITS_PER_LONG)
+
+static inline unsigned long *
+__bitops_word(unsigned long nr, const volatile unsigned long *ptr)
+{
+	unsigned long addr;
+
+	addr = (unsigned long)ptr + ((nr ^ (nr & (BITS_PER_LONG - 1))) >> 3);
+	return (unsigned long *)addr;
+}
+
+static inline unsigned long __bitops_mask(unsigned long nr)
+{
+	return 1UL << (nr & (BITS_PER_LONG - 1));
+}
+
+static __always_inline void arch_set_bit(unsigned long nr, volatile unsigned long *ptr)
+{
+	unsigned long *addr = __bitops_word(nr, ptr);
+	unsigned long mask = __bitops_mask(nr);
+
+	__atomic64_or(mask, (long *)addr);
+}
+
+static __always_inline void arch_clear_bit(unsigned long nr, volatile unsigned long *ptr)
+{
+	unsigned long *addr = __bitops_word(nr, ptr);
+	unsigned long mask = __bitops_mask(nr);
+
+	__atomic64_and(~mask, (long *)addr);
+}
+
+static __always_inline void arch_change_bit(unsigned long nr,
+					    volatile unsigned long *ptr)
+{
+	unsigned long *addr = __bitops_word(nr, ptr);
+	unsigned long mask = __bitops_mask(nr);
+
+	__atomic64_xor(mask, (long *)addr);
+}
+
+static inline bool arch_test_and_set_bit(unsigned long nr,
+					 volatile unsigned long *ptr)
+{
+	unsigned long *addr = __bitops_word(nr, ptr);
+	unsigned long mask = __bitops_mask(nr);
+	unsigned long old;
+
+	old = __atomic64_or_barrier(mask, (long *)addr);
+	return old & mask;
+}
+
+static inline bool arch_test_and_clear_bit(unsigned long nr,
+					   volatile unsigned long *ptr)
+{
+	unsigned long *addr = __bitops_word(nr, ptr);
+	unsigned long mask = __bitops_mask(nr);
+	unsigned long old;
+
+	old = __atomic64_and_barrier(~mask, (long *)addr);
+	return old & mask;
+}
+
+static inline bool arch_test_and_change_bit(unsigned long nr,
+					    volatile unsigned long *ptr)
+{
+	unsigned long *addr = __bitops_word(nr, ptr);
+	unsigned long mask = __bitops_mask(nr);
+	unsigned long old;
+
+	old = __atomic64_xor_barrier(mask, (long *)addr);
+	return old & mask;
+}
+
+static __always_inline void
+arch___set_bit(unsigned long nr, volatile unsigned long *addr)
+{
+	unsigned long *p = __bitops_word(nr, addr);
+	unsigned long mask = __bitops_mask(nr);
+
+	*p |= mask;
+}
+
+static __always_inline void
+arch___clear_bit(unsigned long nr, volatile unsigned long *addr)
+{
+	unsigned long *p = __bitops_word(nr, addr);
+	unsigned long mask = __bitops_mask(nr);
+
+	*p &= ~mask;
+}
+
+static __always_inline void
+arch___change_bit(unsigned long nr, volatile unsigned long *addr)
+{
+	unsigned long *p = __bitops_word(nr, addr);
+	unsigned long mask = __bitops_mask(nr);
+
+	*p ^= mask;
+}
+
+static __always_inline bool
+arch___test_and_set_bit(unsigned long nr, volatile unsigned long *addr)
+{
+	unsigned long *p = __bitops_word(nr, addr);
+	unsigned long mask = __bitops_mask(nr);
+	unsigned long old;
+
+	old = *p;
+	*p |= mask;
+	return old & mask;
+}
+
+static __always_inline bool
+arch___test_and_clear_bit(unsigned long nr, volatile unsigned long *addr)
+{
+	unsigned long *p = __bitops_word(nr, addr);
+	unsigned long mask = __bitops_mask(nr);
+	unsigned long old;
+
+	old = *p;
+	*p &= ~mask;
+	return old & mask;
+}
+
+static __always_inline bool
+arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr)
+{
+	unsigned long *p = __bitops_word(nr, addr);
+	unsigned long mask = __bitops_mask(nr);
+	unsigned long old;
+
+	old = *p;
+	*p ^= mask;
+	return old & mask;
+}
+
+#define arch_test_bit generic_test_bit
+#define arch_test_bit_acquire generic_test_bit_acquire
+
+static inline bool arch_test_and_set_bit_lock(unsigned long nr,
+					      volatile unsigned long *ptr)
+{
+	if (arch_test_bit(nr, ptr))
+		return true;
+	return arch_test_and_set_bit(nr, ptr);
+}
+
+static inline void arch_clear_bit_unlock(unsigned long nr,
+					 volatile unsigned long *ptr)
+{
+	smp_mb__before_atomic();
+	arch_clear_bit(nr, ptr);
+}
+
+static inline void arch___clear_bit_unlock(unsigned long nr,
+					   volatile unsigned long *ptr)
+{
+	smp_mb();
+	arch___clear_bit(nr, ptr);
+}
+
+#include <asm-generic/bitops/instrumented-atomic.h>
+#include <asm-generic/bitops/instrumented-non-atomic.h>
+#include <asm-generic/bitops/instrumented-lock.h>
+
+/*
+ * Functions which use MSB0 bit numbering.
+ * The bits are numbered:
+ *   |0..............63|64............127|128...........191|192...........255|
+ */
+unsigned long find_first_bit_inv(const unsigned long *addr, unsigned long size);
+unsigned long find_next_bit_inv(const unsigned long *addr, unsigned long size,
+				unsigned long offset);
+
+#define for_each_set_bit_inv(bit, addr, size)				\
+	for ((bit) = find_first_bit_inv((addr), (size));		\
+	     (bit) < (size);						\
+	     (bit) = find_next_bit_inv((addr), (size), (bit) + 1))
+
+static inline void set_bit_inv(unsigned long nr, volatile unsigned long *ptr)
+{
+	return set_bit(nr ^ (BITS_PER_LONG - 1), ptr);
+}
+
+static inline void clear_bit_inv(unsigned long nr, volatile unsigned long *ptr)
+{
+	return clear_bit(nr ^ (BITS_PER_LONG - 1), ptr);
+}
+
+static inline bool test_and_clear_bit_inv(unsigned long nr,
+					  volatile unsigned long *ptr)
+{
+	return test_and_clear_bit(nr ^ (BITS_PER_LONG - 1), ptr);
+}
+
+static inline void __set_bit_inv(unsigned long nr, volatile unsigned long *ptr)
+{
+	return __set_bit(nr ^ (BITS_PER_LONG - 1), ptr);
+}
+
+static inline void __clear_bit_inv(unsigned long nr, volatile unsigned long *ptr)
+{
+	return __clear_bit(nr ^ (BITS_PER_LONG - 1), ptr);
+}
+
+static inline bool test_bit_inv(unsigned long nr,
+				const volatile unsigned long *ptr)
+{
+	return test_bit(nr ^ (BITS_PER_LONG - 1), ptr);
+}
+
+/**
+ * __flogr - find leftmost one
+ * @word - The word to search
+ *
+ * Returns the bit number of the most significant bit set,
+ * where the most significant bit has bit number 0.
+ * If no bit is set this function returns 64.
+ */
+static inline unsigned char __flogr(unsigned long word)
+{
+	if (__builtin_constant_p(word)) {
+		unsigned long bit = 0;
+
+		if (!word)
+			return 64;
+		if (!(word & 0xffffffff00000000UL)) {
+			word <<= 32;
+			bit += 32;
+		}
+		if (!(word & 0xffff000000000000UL)) {
+			word <<= 16;
+			bit += 16;
+		}
+		if (!(word & 0xff00000000000000UL)) {
+			word <<= 8;
+			bit += 8;
+		}
+		if (!(word & 0xf000000000000000UL)) {
+			word <<= 4;
+			bit += 4;
+		}
+		if (!(word & 0xc000000000000000UL)) {
+			word <<= 2;
+			bit += 2;
+		}
+		if (!(word & 0x8000000000000000UL)) {
+			word <<= 1;
+			bit += 1;
+		}
+		return bit;
+	} else {
+		union register_pair rp;
+
+		rp.even = word;
+		asm volatile(
+			"       flogr   %[rp],%[rp]\n"
+			: [rp] "+d" (rp.pair) : : "cc");
+		return rp.even;
+	}
+}
+
+/**
+ * __ffs - find first bit in word.
+ * @word: The word to search
+ *
+ * Undefined if no bit exists, so code should check against 0 first.
+ */
+static inline unsigned long __ffs(unsigned long word)
+{
+	return __flogr(-word & word) ^ (BITS_PER_LONG - 1);
+}
+
+/**
+ * ffs - find first bit set
+ * @word: the word to search
+ *
+ * This is defined the same way as the libc and
+ * compiler builtin ffs routines (man ffs).
+ */
+static inline int ffs(int word)
+{
+	unsigned long mask = 2 * BITS_PER_LONG - 1;
+	unsigned int val = (unsigned int)word;
+
+	return (1 + (__flogr(-val & val) ^ (BITS_PER_LONG - 1))) & mask;
+}
+
+/**
+ * __fls - find last (most-significant) set bit in a long word
+ * @word: the word to search
+ *
+ * Undefined if no set bit exists, so code should check against 0 first.
+ */
+static inline unsigned long __fls(unsigned long word)
+{
+	return __flogr(word) ^ (BITS_PER_LONG - 1);
+}
+
+/**
+ * fls64 - find last set bit in a 64-bit word
+ * @word: the word to search
+ *
+ * This is defined in a similar way as the libc and compiler builtin
+ * ffsll, but returns the position of the most significant set bit.
+ *
+ * fls64(value) returns 0 if value is 0 or the position of the last
+ * set bit if value is nonzero. The last (most significant) bit is
+ * at position 64.
+ */
+static inline int fls64(unsigned long word)
+{
+	unsigned long mask = 2 * BITS_PER_LONG - 1;
+
+	return (1 + (__flogr(word) ^ (BITS_PER_LONG - 1))) & mask;
+}
+
+/**
+ * fls - find last (most-significant) bit set
+ * @word: the word to search
+ *
+ * This is defined the same way as ffs.
+ * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
+ */
+static inline int fls(unsigned int word)
+{
+	return fls64(word);
+}
+
+#include <asm-generic/bitops/ffz.h>
+#include <asm-generic/bitops/hweight.h>
+#include <asm-generic/bitops/sched.h>
+#include <asm-generic/bitops/le.h>
+#include <asm-generic/bitops/ext2-atomic-setbit.h>
+
+#endif /* _S390_BITOPS_H */
diff --git a/arch/s390/include/asm/boot_data.h b/arch/s390/include/asm/boot_data.h
new file mode 100644
index 0000000000..f7eed27b32
--- /dev/null
+++ b/arch/s390/include/asm/boot_data.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_BOOT_DATA_H
+
+#include <asm/setup.h>
+#include <asm/ipl.h>
+
+extern char early_command_line[COMMAND_LINE_SIZE];
+extern struct ipl_parameter_block ipl_block;
+extern int ipl_block_valid;
+extern int ipl_secure_flag;
+
+extern unsigned long ipl_cert_list_addr;
+extern unsigned long ipl_cert_list_size;
+
+extern unsigned long early_ipl_comp_list_addr;
+extern unsigned long early_ipl_comp_list_size;
+
+#endif /* _ASM_S390_BOOT_DATA_H */
diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h
new file mode 100644
index 0000000000..aebe1e22c7
--- /dev/null
+++ b/arch/s390/include/asm/bug.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_BUG_H
+#define _ASM_S390_BUG_H
+
+#include <linux/compiler.h>
+
+#ifdef CONFIG_BUG
+
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+
+#define __EMIT_BUG(x) do {					\
+	asm_inline volatile(					\
+		"0:	mc	0,0\n"				\
+		".section .rodata.str,\"aMS\",@progbits,1\n"	\
+		"1:	.asciz	\""__FILE__"\"\n"		\
+		".previous\n"					\
+		".section __bug_table,\"awM\",@progbits,%2\n"	\
+		"2:	.long	0b-.\n"				\
+		"	.long	1b-.\n"				\
+		"	.short	%0,%1\n"			\
+		"	.org	2b+%2\n"			\
+		".previous\n"					\
+		: : "i" (__LINE__),				\
+		    "i" (x),					\
+		    "i" (sizeof(struct bug_entry)));		\
+} while (0)
+
+#else /* CONFIG_DEBUG_BUGVERBOSE */
+
+#define __EMIT_BUG(x) do {					\
+	asm_inline volatile(					\
+		"0:	mc	0,0\n"				\
+		".section __bug_table,\"awM\",@progbits,%1\n"	\
+		"1:	.long	0b-.\n"				\
+		"	.short	%0\n"				\
+		"	.org	1b+%1\n"			\
+		".previous\n"					\
+		: : "i" (x),					\
+		    "i" (sizeof(struct bug_entry)));		\
+} while (0)
+
+#endif /* CONFIG_DEBUG_BUGVERBOSE */
+
+#define BUG() do {					\
+	__EMIT_BUG(0);					\
+	unreachable();					\
+} while (0)
+
+#define __WARN_FLAGS(flags) do {			\
+	__EMIT_BUG(BUGFLAG_WARNING|(flags));		\
+} while (0)
+
+#define WARN_ON(x) ({					\
+	int __ret_warn_on = !!(x);			\
+	if (__builtin_constant_p(__ret_warn_on)) {	\
+		if (__ret_warn_on)			\
+			__WARN();			\
+	} else {					\
+		if (unlikely(__ret_warn_on))		\
+			__WARN();			\
+	}						\
+	unlikely(__ret_warn_on);			\
+})
+
+#define HAVE_ARCH_BUG
+#define HAVE_ARCH_WARN_ON
+#endif /* CONFIG_BUG */
+
+#include <asm-generic/bug.h>
+
+#endif /* _ASM_S390_BUG_H */
diff --git a/arch/s390/include/asm/cache.h b/arch/s390/include/asm/cache.h
new file mode 100644
index 0000000000..00128174c0
--- /dev/null
+++ b/arch/s390/include/asm/cache.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999
+ *
+ *  Derived from "include/asm-i386/cache.h"
+ *    Copyright (C) 1992, Linus Torvalds
+ */
+
+#ifndef __ARCH_S390_CACHE_H
+#define __ARCH_S390_CACHE_H
+
+#define L1_CACHE_BYTES     256
+#define L1_CACHE_SHIFT     8
+#define NET_SKB_PAD	   32
+
+#define __read_mostly __section(".data..read_mostly")
+
+#endif
diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h
new file mode 100644
index 0000000000..91d261751d
--- /dev/null
+++ b/arch/s390/include/asm/ccwdev.h
@@ -0,0 +1,237 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2002, 2009
+ *
+ * Author(s): Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * Interface for CCW device drivers
+ */
+#ifndef _S390_CCWDEV_H_
+#define _S390_CCWDEV_H_
+
+#include <linux/device.h>
+#include <linux/mod_devicetable.h>
+#include <asm/chsc.h>
+#include <asm/fcx.h>
+#include <asm/irq.h>
+#include <asm/schid.h>
+#include <linux/mutex.h>
+
+/* structs from asm/cio.h */
+struct irb;
+struct ccw1;
+struct ccw_dev_id;
+
+/* simplified initializers for struct ccw_device:
+ * CCW_DEVICE and CCW_DEVICE_DEVTYPE initialize one
+ * entry in your MODULE_DEVICE_TABLE and set the match_flag correctly */
+#define CCW_DEVICE(cu, cum) 						\
+	.cu_type=(cu), .cu_model=(cum),					\
+	.match_flags=(CCW_DEVICE_ID_MATCH_CU_TYPE			\
+		   | (cum ? CCW_DEVICE_ID_MATCH_CU_MODEL : 0))
+
+#define CCW_DEVICE_DEVTYPE(cu, cum, dev, devm)				\
+	.cu_type=(cu), .cu_model=(cum), .dev_type=(dev), .dev_model=(devm),\
+	.match_flags=CCW_DEVICE_ID_MATCH_CU_TYPE			\
+		   | ((cum) ? CCW_DEVICE_ID_MATCH_CU_MODEL : 0) 	\
+		   | CCW_DEVICE_ID_MATCH_DEVICE_TYPE			\
+		   | ((devm) ? CCW_DEVICE_ID_MATCH_DEVICE_MODEL : 0)
+
+/* scan through an array of device ids and return the first
+ * entry that matches the device.
+ *
+ * the array must end with an entry containing zero match_flags
+ */
+static inline const struct ccw_device_id *
+ccw_device_id_match(const struct ccw_device_id *array,
+			const struct ccw_device_id *match)
+{
+	const struct ccw_device_id *id = array;
+
+	for (id = array; id->match_flags; id++) {
+		if ((id->match_flags & CCW_DEVICE_ID_MATCH_CU_TYPE)
+		    && (id->cu_type != match->cu_type))
+			continue;
+
+		if ((id->match_flags & CCW_DEVICE_ID_MATCH_CU_MODEL)
+		    && (id->cu_model != match->cu_model))
+			continue;
+
+		if ((id->match_flags & CCW_DEVICE_ID_MATCH_DEVICE_TYPE)
+		    && (id->dev_type != match->dev_type))
+			continue;
+
+		if ((id->match_flags & CCW_DEVICE_ID_MATCH_DEVICE_MODEL)
+		    && (id->dev_model != match->dev_model))
+			continue;
+
+		return id;
+	}
+
+	return NULL;
+}
+
+/**
+ * struct ccw_device - channel attached device
+ * @ccwlock: pointer to device lock
+ * @id: id of this device
+ * @drv: ccw driver for this device
+ * @dev: embedded device structure
+ * @online: online status of device
+ * @handler: interrupt handler
+ *
+ * @handler is a member of the device rather than the driver since a driver
+ * can have different interrupt handlers for different ccw devices
+ * (multi-subchannel drivers).
+ */
+struct ccw_device {
+	spinlock_t *ccwlock;
+/* private: */
+	struct ccw_device_private *private;	/* cio private information */
+	struct mutex reg_mutex;
+/* public: */
+	struct ccw_device_id id;
+	struct ccw_driver *drv;
+	struct device dev;
+	int online;
+	void (*handler) (struct ccw_device *, unsigned long, struct irb *);
+};
+
+/*
+ * Possible events used by the path_event notifier.
+ */
+#define PE_NONE				0x0
+#define PE_PATH_GONE			0x1 /* A path is no longer available. */
+#define PE_PATH_AVAILABLE		0x2 /* A path has become available and
+					       was successfully verified. */
+#define PE_PATHGROUP_ESTABLISHED	0x4 /* A pathgroup was reset and had
+					       to be established again. */
+#define PE_PATH_FCES_EVENT		0x8 /* The FCES Status of a path has
+					     * changed. */
+
+/*
+ * Possible CIO actions triggered by the unit check handler.
+ */
+enum uc_todo {
+	UC_TODO_RETRY,
+	UC_TODO_RETRY_ON_NEW_PATH,
+	UC_TODO_STOP
+};
+
+/**
+ * struct ccw_driver - device driver for channel attached devices
+ * @ids: ids supported by this driver
+ * @probe: function called on probe
+ * @remove: function called on remove
+ * @set_online: called when setting device online
+ * @set_offline: called when setting device offline
+ * @notify: notify driver of device state changes
+ * @path_event: notify driver of channel path events
+ * @shutdown: called at device shutdown
+ * @uc_handler: callback for unit check handler
+ * @driver: embedded device driver structure
+ * @int_class: interruption class to use for accounting interrupts
+ */
+struct ccw_driver {
+	struct ccw_device_id *ids;
+	int (*probe) (struct ccw_device *);
+	void (*remove) (struct ccw_device *);
+	int (*set_online) (struct ccw_device *);
+	int (*set_offline) (struct ccw_device *);
+	int (*notify) (struct ccw_device *, int);
+	void (*path_event) (struct ccw_device *, int *);
+	void (*shutdown) (struct ccw_device *);
+	enum uc_todo (*uc_handler) (struct ccw_device *, struct irb *);
+	struct device_driver driver;
+	enum interruption_class int_class;
+};
+
+extern struct ccw_device *get_ccwdev_by_busid(struct ccw_driver *cdrv,
+					      const char *bus_id);
+
+/* devices drivers call these during module load and unload.
+ * When a driver is registered, its probe method is called
+ * when new devices for its type pop up */
+extern int  ccw_driver_register   (struct ccw_driver *driver);
+extern void ccw_driver_unregister (struct ccw_driver *driver);
+extern int ccw_device_set_options_mask(struct ccw_device *, unsigned long);
+extern int ccw_device_set_options(struct ccw_device *, unsigned long);
+extern void ccw_device_clear_options(struct ccw_device *, unsigned long);
+int ccw_device_is_pathgroup(struct ccw_device *cdev);
+int ccw_device_is_multipath(struct ccw_device *cdev);
+
+/* Allow for i/o completion notification after primary interrupt status. */
+#define CCWDEV_EARLY_NOTIFICATION	0x0001
+/* Report all interrupt conditions. */
+#define CCWDEV_REPORT_ALL	 	0x0002
+/* Try to perform path grouping. */
+#define CCWDEV_DO_PATHGROUP             0x0004
+/* Allow forced onlining of boxed devices. */
+#define CCWDEV_ALLOW_FORCE              0x0008
+/* Try to use multipath mode. */
+#define CCWDEV_DO_MULTIPATH		0x0010
+
+extern int ccw_device_start(struct ccw_device *, struct ccw1 *,
+			    unsigned long, __u8, unsigned long);
+extern int ccw_device_start_timeout(struct ccw_device *, struct ccw1 *,
+				    unsigned long, __u8, unsigned long, int);
+extern int ccw_device_start_key(struct ccw_device *, struct ccw1 *,
+				unsigned long, __u8, __u8, unsigned long);
+extern int ccw_device_start_timeout_key(struct ccw_device *, struct ccw1 *,
+					unsigned long, __u8, __u8,
+					unsigned long, int);
+
+
+extern int ccw_device_resume(struct ccw_device *);
+extern int ccw_device_halt(struct ccw_device *, unsigned long);
+extern int ccw_device_clear(struct ccw_device *, unsigned long);
+int ccw_device_tm_start_key(struct ccw_device *cdev, struct tcw *tcw,
+			    unsigned long intparm, u8 lpm, u8 key);
+int ccw_device_tm_start_key(struct ccw_device *, struct tcw *,
+			    unsigned long, u8, u8);
+int ccw_device_tm_start_timeout_key(struct ccw_device *, struct tcw *,
+			    unsigned long, u8, u8, int);
+int ccw_device_tm_start(struct ccw_device *, struct tcw *,
+			    unsigned long, u8);
+int ccw_device_tm_start_timeout(struct ccw_device *, struct tcw *,
+			    unsigned long, u8, int);
+int ccw_device_tm_intrg(struct ccw_device *cdev);
+
+int ccw_device_get_mdc(struct ccw_device *cdev, u8 mask);
+
+extern int ccw_device_set_online(struct ccw_device *cdev);
+extern int ccw_device_set_offline(struct ccw_device *cdev);
+
+
+extern struct ciw *ccw_device_get_ciw(struct ccw_device *, __u32 cmd);
+extern __u8 ccw_device_get_path_mask(struct ccw_device *);
+extern void ccw_device_get_id(struct ccw_device *, struct ccw_dev_id *);
+
+#define get_ccwdev_lock(x) (x)->ccwlock
+
+#define to_ccwdev(n) container_of(n, struct ccw_device, dev)
+#define to_ccwdrv(n) container_of(n, struct ccw_driver, driver)
+
+extern struct ccw_device *ccw_device_create_console(struct ccw_driver *);
+extern void ccw_device_destroy_console(struct ccw_device *);
+extern int ccw_device_enable_console(struct ccw_device *);
+extern void ccw_device_wait_idle(struct ccw_device *);
+
+extern void *ccw_device_dma_zalloc(struct ccw_device *cdev, size_t size);
+extern void ccw_device_dma_free(struct ccw_device *cdev,
+				void *cpu_addr, size_t size);
+
+int ccw_device_siosl(struct ccw_device *);
+
+extern void ccw_device_get_schid(struct ccw_device *, struct subchannel_id *);
+
+struct channel_path_desc_fmt0 *ccw_device_get_chp_desc(struct ccw_device *, int);
+u8 *ccw_device_get_util_str(struct ccw_device *cdev, int chp_idx);
+int ccw_device_pnso(struct ccw_device *cdev,
+		    struct chsc_pnso_area *pnso_area, u8 oc,
+		    struct chsc_pnso_resume_token resume_token, int cnc);
+int ccw_device_get_cssid(struct ccw_device *cdev, u8 *cssid);
+int ccw_device_get_iid(struct ccw_device *cdev, u8 *iid);
+int ccw_device_get_chpid(struct ccw_device *cdev, int chp_idx, u8 *chpid);
+int ccw_device_get_chid(struct ccw_device *cdev, int chp_idx, u16 *chid);
+#endif /* _S390_CCWDEV_H_ */
diff --git a/arch/s390/include/asm/ccwgroup.h b/arch/s390/include/asm/ccwgroup.h
new file mode 100644
index 0000000000..11d2fb3de4
--- /dev/null
+++ b/arch/s390/include/asm/ccwgroup.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef S390_CCWGROUP_H
+#define S390_CCWGROUP_H
+
+struct ccw_device;
+struct ccw_driver;
+
+/**
+ * struct ccwgroup_device - ccw group device
+ * @state: online/offline state
+ * @count: number of attached slave devices
+ * @dev: embedded device structure
+ * @cdev: variable number of slave devices, allocated as needed
+ * @ungroup_work: used to ungroup the ccwgroup device
+ */
+struct ccwgroup_device {
+	enum {
+		CCWGROUP_OFFLINE,
+		CCWGROUP_ONLINE,
+	} state;
+/* private: */
+	atomic_t onoff;
+	struct mutex reg_mutex;
+/* public: */
+	unsigned int count;
+	struct device	dev;
+	struct work_struct ungroup_work;
+	struct ccw_device *cdev[];
+};
+
+/**
+ * struct ccwgroup_driver - driver for ccw group devices
+ * @setup: function called during device creation to setup the device
+ * @remove: function called on remove
+ * @set_online: function called when device is set online
+ * @set_offline: function called when device is set offline
+ * @shutdown: function called when device is shut down
+ * @driver: embedded driver structure
+ * @ccw_driver: supported ccw_driver (optional)
+ */
+struct ccwgroup_driver {
+	int (*setup) (struct ccwgroup_device *);
+	void (*remove) (struct ccwgroup_device *);
+	int (*set_online) (struct ccwgroup_device *);
+	int (*set_offline) (struct ccwgroup_device *);
+	void (*shutdown)(struct ccwgroup_device *);
+
+	struct device_driver driver;
+	struct ccw_driver *ccw_driver;
+};
+
+extern int  ccwgroup_driver_register   (struct ccwgroup_driver *cdriver);
+extern void ccwgroup_driver_unregister (struct ccwgroup_driver *cdriver);
+int ccwgroup_create_dev(struct device *root, struct ccwgroup_driver *gdrv,
+			int num_devices, const char *buf);
+
+extern int ccwgroup_set_online(struct ccwgroup_device *gdev);
+int ccwgroup_set_offline(struct ccwgroup_device *gdev, bool call_gdrv);
+
+extern int ccwgroup_probe_ccwdev(struct ccw_device *cdev);
+extern void ccwgroup_remove_ccwdev(struct ccw_device *cdev);
+
+#define to_ccwgroupdev(x) container_of((x), struct ccwgroup_device, dev)
+#define to_ccwgroupdrv(x) container_of((x), struct ccwgroup_driver, driver)
+
+#if IS_ENABLED(CONFIG_CCWGROUP)
+bool dev_is_ccwgroup(struct device *dev);
+#else /* CONFIG_CCWGROUP */
+static inline bool dev_is_ccwgroup(struct device *dev)
+{
+	return false;
+}
+#endif /* CONFIG_CCWGROUP */
+
+#endif
diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h
new file mode 100644
index 0000000000..69837eec2f
--- /dev/null
+++ b/arch/s390/include/asm/checksum.h
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *    S390 fast network checksum routines
+ *
+ *  S390 version
+ *    Copyright IBM Corp. 1999
+ *    Author(s): Ulrich Hild        (first version)
+ *               Martin Schwidefsky (heavily optimized CKSM version)
+ *               D.J. Barrow        (third attempt) 
+ */
+
+#ifndef _S390_CHECKSUM_H
+#define _S390_CHECKSUM_H
+
+#include <linux/kasan-checks.h>
+#include <linux/in6.h>
+
+/*
+ * Computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit).
+ *
+ * Returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic.
+ *
+ * This function must be called with even lengths, except
+ * for the last fragment, which may be odd.
+ *
+ * It's best to have buff aligned on a 32-bit boundary.
+ */
+static inline __wsum csum_partial(const void *buff, int len, __wsum sum)
+{
+	union register_pair rp = {
+		.even = (unsigned long) buff,
+		.odd = (unsigned long) len,
+	};
+
+	kasan_check_read(buff, len);
+	asm volatile(
+		"0:	cksm	%[sum],%[rp]\n"
+		"	jo	0b\n"
+		: [sum] "+&d" (sum), [rp] "+&d" (rp.pair) : : "cc", "memory");
+	return sum;
+}
+
+/*
+ * Fold a partial checksum without adding pseudo headers.
+ */
+static inline __sum16 csum_fold(__wsum sum)
+{
+	u32 csum = (__force u32) sum;
+
+	csum += (csum >> 16) | (csum << 16);
+	csum >>= 16;
+	return (__force __sum16) ~csum;
+}
+
+/*
+ * This is a version of ip_compute_csum() optimized for IP headers,
+ * which always checksums on 4 octet boundaries.
+ */
+static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+{
+	__u64 csum = 0;
+	__u32 *ptr = (u32 *)iph;
+
+	csum += *ptr++;
+	csum += *ptr++;
+	csum += *ptr++;
+	csum += *ptr++;
+	ihl -= 4;
+	while (ihl--)
+		csum += *ptr++;
+	csum += (csum >> 32) | (csum << 32);
+	return csum_fold((__force __wsum)(csum >> 32));
+}
+
+/*
+ * Computes the checksum of the TCP/UDP pseudo-header.
+ * Returns a 32-bit checksum.
+ */
+static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len,
+					__u8 proto, __wsum sum)
+{
+	__u64 csum = (__force __u64)sum;
+
+	csum += (__force __u32)saddr;
+	csum += (__force __u32)daddr;
+	csum += len;
+	csum += proto;
+	csum += (csum >> 32) | (csum << 32);
+	return (__force __wsum)(csum >> 32);
+}
+
+/*
+ * Computes the checksum of the TCP/UDP pseudo-header.
+ * Returns a 16-bit checksum, already complemented.
+ */
+static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len,
+					__u8 proto, __wsum sum)
+{
+	return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
+}
+
+/*
+ * Used for miscellaneous IP-like checksums, mainly icmp.
+ */
+static inline __sum16 ip_compute_csum(const void *buff, int len)
+{
+	return csum_fold(csum_partial(buff, len, 0));
+}
+
+#define _HAVE_ARCH_IPV6_CSUM
+static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+				      const struct in6_addr *daddr,
+				      __u32 len, __u8 proto, __wsum csum)
+{
+	__u64 sum = (__force __u64)csum;
+
+	sum += (__force __u32)saddr->s6_addr32[0];
+	sum += (__force __u32)saddr->s6_addr32[1];
+	sum += (__force __u32)saddr->s6_addr32[2];
+	sum += (__force __u32)saddr->s6_addr32[3];
+	sum += (__force __u32)daddr->s6_addr32[0];
+	sum += (__force __u32)daddr->s6_addr32[1];
+	sum += (__force __u32)daddr->s6_addr32[2];
+	sum += (__force __u32)daddr->s6_addr32[3];
+	sum += len;
+	sum += proto;
+	sum += (sum >> 32) | (sum << 32);
+	return csum_fold((__force __wsum)(sum >> 32));
+}
+
+#endif /* _S390_CHECKSUM_H */
diff --git a/arch/s390/include/asm/chpid.h b/arch/s390/include/asm/chpid.h
new file mode 100644
index 0000000000..20e0d22f29
--- /dev/null
+++ b/arch/s390/include/asm/chpid.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *    Copyright IBM Corp. 2007, 2012
+ *    Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
+ */
+#ifndef _ASM_S390_CHPID_H
+#define _ASM_S390_CHPID_H
+
+#include <uapi/asm/chpid.h>
+#include <asm/cio.h>
+
+struct channel_path_desc_fmt0 {
+	u8 flags;
+	u8 lsn;
+	u8 desc;
+	u8 chpid;
+	u8 swla;
+	u8 zeroes;
+	u8 chla;
+	u8 chpp;
+} __packed;
+
+static inline void chp_id_init(struct chp_id *chpid)
+{
+	memset(chpid, 0, sizeof(struct chp_id));
+}
+
+static inline int chp_id_is_equal(struct chp_id *a, struct chp_id *b)
+{
+	return (a->id == b->id) && (a->cssid == b->cssid);
+}
+
+static inline void chp_id_next(struct chp_id *chpid)
+{
+	if (chpid->id < __MAX_CHPID)
+		chpid->id++;
+	else {
+		chpid->id = 0;
+		chpid->cssid++;
+	}
+}
+
+static inline int chp_id_is_valid(struct chp_id *chpid)
+{
+	return (chpid->cssid <= __MAX_CSSID);
+}
+
+
+#define chp_id_for_each(c) \
+	for (chp_id_init(c); chp_id_is_valid(c); chp_id_next(c))
+#endif /* _ASM_S390_CHPID_H */
diff --git a/arch/s390/include/asm/chsc.h b/arch/s390/include/asm/chsc.h
new file mode 100644
index 0000000000..bb48ea380c
--- /dev/null
+++ b/arch/s390/include/asm/chsc.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2020
+ *
+ * Author(s): Alexandra Winter <wintera@linux.ibm.com>
+ *
+ * Interface for Channel Subsystem Call
+ */
+#ifndef _ASM_S390_CHSC_H
+#define _ASM_S390_CHSC_H
+
+#include <uapi/asm/chsc.h>
+
+/**
+ * Operation codes for CHSC PNSO:
+ *    PNSO_OC_NET_BRIDGE_INFO - only addresses that are visible to a bridgeport
+ *    PNSO_OC_NET_ADDR_INFO   - all addresses
+ */
+#define PNSO_OC_NET_BRIDGE_INFO		0
+#define PNSO_OC_NET_ADDR_INFO		3
+/**
+ * struct chsc_pnso_naid_l2 - network address information descriptor
+ * @nit:  Network interface token
+ * @addr_lnid: network address and logical network id (VLAN ID)
+ */
+struct chsc_pnso_naid_l2 {
+	u64 nit;
+	struct { u8 mac[6]; u16 lnid; } addr_lnid;
+} __packed;
+
+struct chsc_pnso_resume_token {
+	u64 t1;
+	u64 t2;
+} __packed;
+
+struct chsc_pnso_naihdr {
+	struct chsc_pnso_resume_token resume_token;
+	u32:32;
+	u32 instance;
+	u32:24;
+	u8 naids;
+	u32 reserved[3];
+} __packed;
+
+struct chsc_pnso_area {
+	struct chsc_header request;
+	u8:2;
+	u8 m:1;
+	u8:5;
+	u8:2;
+	u8 ssid:2;
+	u8 fmt:4;
+	u16 sch;
+	u8:8;
+	u8 cssid;
+	u16:16;
+	u8 oc;
+	u32:24;
+	struct chsc_pnso_resume_token resume_token;
+	u32 n:1;
+	u32:31;
+	u32 reserved[3];
+	struct chsc_header response;
+	u32:32;
+	struct chsc_pnso_naihdr naihdr;
+	struct chsc_pnso_naid_l2 entries[];
+} __packed __aligned(PAGE_SIZE);
+
+#endif /* _ASM_S390_CHSC_H */
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
new file mode 100644
index 0000000000..1c4f585dd3
--- /dev/null
+++ b/arch/s390/include/asm/cio.h
@@ -0,0 +1,378 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Common interface for I/O on S/390
+ */
+#ifndef _ASM_S390_CIO_H_
+#define _ASM_S390_CIO_H_
+
+#include <linux/bitops.h>
+#include <linux/genalloc.h>
+#include <asm/types.h>
+#include <asm/tpi.h>
+
+#define LPM_ANYPATH 0xff
+#define __MAX_CSSID 0
+#define __MAX_SUBCHANNEL 65535
+#define __MAX_SSID 3
+
+#include <asm/scsw.h>
+
+/**
+ * struct ccw1 - channel command word
+ * @cmd_code: command code
+ * @flags: flags, like IDA addressing, etc.
+ * @count: byte count
+ * @cda: data address
+ *
+ * The ccw is the basic structure to build channel programs that perform
+ * operations with the device or the control unit. Only Format-1 channel
+ * command words are supported.
+ */
+struct ccw1 {
+	__u8  cmd_code;
+	__u8  flags;
+	__u16 count;
+	__u32 cda;
+} __attribute__ ((packed,aligned(8)));
+
+/**
+ * struct ccw0 - channel command word
+ * @cmd_code: command code
+ * @cda: data address
+ * @flags: flags, like IDA addressing, etc.
+ * @reserved: will be ignored
+ * @count: byte count
+ *
+ * The format-0 ccw structure.
+ */
+struct ccw0 {
+	__u8 cmd_code;
+	__u32 cda : 24;
+	__u8  flags;
+	__u8  reserved;
+	__u16 count;
+} __packed __aligned(8);
+
+#define CCW_FLAG_DC		0x80
+#define CCW_FLAG_CC		0x40
+#define CCW_FLAG_SLI		0x20
+#define CCW_FLAG_SKIP		0x10
+#define CCW_FLAG_PCI		0x08
+#define CCW_FLAG_IDA		0x04
+#define CCW_FLAG_SUSPEND	0x02
+
+#define CCW_CMD_READ_IPL	0x02
+#define CCW_CMD_NOOP		0x03
+#define CCW_CMD_BASIC_SENSE	0x04
+#define CCW_CMD_TIC		0x08
+#define CCW_CMD_STLCK           0x14
+#define CCW_CMD_SENSE_PGID	0x34
+#define CCW_CMD_SUSPEND_RECONN	0x5B
+#define CCW_CMD_RDC		0x64
+#define CCW_CMD_RELEASE		0x94
+#define CCW_CMD_SET_PGID	0xAF
+#define CCW_CMD_SENSE_ID	0xE4
+#define CCW_CMD_DCTL		0xF3
+
+#define SENSE_MAX_COUNT		0x20
+
+/**
+ * struct erw - extended report word
+ * @res0: reserved
+ * @auth: authorization check
+ * @pvrf: path-verification-required flag
+ * @cpt: channel-path timeout
+ * @fsavf: failing storage address validity flag
+ * @cons: concurrent sense
+ * @scavf: secondary ccw address validity flag
+ * @fsaf: failing storage address format
+ * @scnt: sense count, if @cons == %1
+ * @res16: reserved
+ */
+struct erw {
+	__u32 res0  : 3;
+	__u32 auth  : 1;
+	__u32 pvrf  : 1;
+	__u32 cpt   : 1;
+	__u32 fsavf : 1;
+	__u32 cons  : 1;
+	__u32 scavf : 1;
+	__u32 fsaf  : 1;
+	__u32 scnt  : 6;
+	__u32 res16 : 16;
+} __attribute__ ((packed));
+
+/**
+ * struct erw_eadm - EADM Subchannel extended report word
+ * @b: aob error
+ * @r: arsb error
+ */
+struct erw_eadm {
+	__u32 : 16;
+	__u32 b : 1;
+	__u32 r : 1;
+	__u32  : 14;
+} __packed;
+
+/**
+ * struct sublog - subchannel logout area
+ * @res0: reserved
+ * @esf: extended status flags
+ * @lpum: last path used mask
+ * @arep: ancillary report
+ * @fvf: field-validity flags
+ * @sacc: storage access code
+ * @termc: termination code
+ * @devsc: device-status check
+ * @serr: secondary error
+ * @ioerr: i/o-error alert
+ * @seqc: sequence code
+ */
+struct sublog {
+	__u32 res0  : 1;
+	__u32 esf   : 7;
+	__u32 lpum  : 8;
+	__u32 arep  : 1;
+	__u32 fvf   : 5;
+	__u32 sacc  : 2;
+	__u32 termc : 2;
+	__u32 devsc : 1;
+	__u32 serr  : 1;
+	__u32 ioerr : 1;
+	__u32 seqc  : 3;
+} __attribute__ ((packed));
+
+/**
+ * struct esw0 - Format 0 Extended Status Word (ESW)
+ * @sublog: subchannel logout
+ * @erw: extended report word
+ * @faddr: failing storage address
+ * @saddr: secondary ccw address
+ */
+struct esw0 {
+	struct sublog sublog;
+	struct erw erw;
+	__u32  faddr[2];
+	__u32  saddr;
+} __attribute__ ((packed));
+
+/**
+ * struct esw1 - Format 1 Extended Status Word (ESW)
+ * @zero0: reserved zeros
+ * @lpum: last path used mask
+ * @zero16: reserved zeros
+ * @erw: extended report word
+ * @zeros: three fullwords of zeros
+ */
+struct esw1 {
+	__u8  zero0;
+	__u8  lpum;
+	__u16 zero16;
+	struct erw erw;
+	__u32 zeros[3];
+} __attribute__ ((packed));
+
+/**
+ * struct esw2 - Format 2 Extended Status Word (ESW)
+ * @zero0: reserved zeros
+ * @lpum: last path used mask
+ * @dcti: device-connect-time interval
+ * @erw: extended report word
+ * @zeros: three fullwords of zeros
+ */
+struct esw2 {
+	__u8  zero0;
+	__u8  lpum;
+	__u16 dcti;
+	struct erw erw;
+	__u32 zeros[3];
+} __attribute__ ((packed));
+
+/**
+ * struct esw3 - Format 3 Extended Status Word (ESW)
+ * @zero0: reserved zeros
+ * @lpum: last path used mask
+ * @res: reserved
+ * @erw: extended report word
+ * @zeros: three fullwords of zeros
+ */
+struct esw3 {
+	__u8  zero0;
+	__u8  lpum;
+	__u16 res;
+	struct erw erw;
+	__u32 zeros[3];
+} __attribute__ ((packed));
+
+/**
+ * struct esw_eadm - EADM Subchannel Extended Status Word (ESW)
+ * @sublog: subchannel logout
+ * @erw: extended report word
+ */
+struct esw_eadm {
+	__u32 sublog;
+	struct erw_eadm erw;
+	__u32 : 32;
+	__u32 : 32;
+	__u32 : 32;
+} __packed;
+
+/**
+ * struct irb - interruption response block
+ * @scsw: subchannel status word
+ * @esw: extended status word
+ * @ecw: extended control word
+ *
+ * The irb that is handed to the device driver when an interrupt occurs. For
+ * solicited interrupts, the common I/O layer already performs checks whether
+ * a field is valid; a field not being valid is always passed as %0.
+ * If a unit check occurred, @ecw may contain sense data; this is retrieved
+ * by the common I/O layer itself if the device doesn't support concurrent
+ * sense (so that the device driver never needs to perform basic sense itself).
+ * For unsolicited interrupts, the irb is passed as-is (expect for sense data,
+ * if applicable).
+ */
+struct irb {
+	union scsw scsw;
+	union {
+		struct esw0 esw0;
+		struct esw1 esw1;
+		struct esw2 esw2;
+		struct esw3 esw3;
+		struct esw_eadm eadm;
+	} esw;
+	__u8   ecw[32];
+} __attribute__ ((packed,aligned(4)));
+
+/**
+ * struct ciw - command information word  (CIW) layout
+ * @et: entry type
+ * @reserved: reserved bits
+ * @ct: command type
+ * @cmd: command code
+ * @count: command count
+ */
+struct ciw {
+	__u32 et       :  2;
+	__u32 reserved :  2;
+	__u32 ct       :  4;
+	__u32 cmd      :  8;
+	__u32 count    : 16;
+} __attribute__ ((packed));
+
+#define CIW_TYPE_RCD	0x0    	/* read configuration data */
+#define CIW_TYPE_SII	0x1    	/* set interface identifier */
+#define CIW_TYPE_RNI	0x2    	/* read node identifier */
+
+/*
+ * Node Descriptor as defined in SA22-7204, "Common I/O-Device Commands"
+ */
+
+#define ND_VALIDITY_VALID	0
+#define ND_VALIDITY_OUTDATED	1
+#define ND_VALIDITY_INVALID	2
+
+struct node_descriptor {
+	/* Flags. */
+	union {
+		struct {
+			u32 validity:3;
+			u32 reserved:5;
+		} __packed;
+		u8 byte0;
+	} __packed;
+
+	/* Node parameters. */
+	u32 params:24;
+
+	/* Node ID. */
+	char type[6];
+	char model[3];
+	char manufacturer[3];
+	char plant[2];
+	char seq[12];
+	u16 tag;
+} __packed;
+
+/*
+ * Flags used as input parameters for do_IO()
+ */
+#define DOIO_ALLOW_SUSPEND	 0x0001 /* allow for channel prog. suspend */
+#define DOIO_DENY_PREFETCH	 0x0002 /* don't allow for CCW prefetch */
+#define DOIO_SUPPRESS_INTER	 0x0004 /* suppress intermediate inter. */
+					/* ... for suspended CCWs */
+/* Device or subchannel gone. */
+#define CIO_GONE       0x0001
+/* No path to device. */
+#define CIO_NO_PATH    0x0002
+/* Device has appeared. */
+#define CIO_OPER       0x0004
+/* Sick revalidation of device. */
+#define CIO_REVALIDATE 0x0008
+/* Device did not respond in time. */
+#define CIO_BOXED      0x0010
+
+/**
+ * struct ccw_dev_id - unique identifier for ccw devices
+ * @ssid: subchannel set id
+ * @devno: device number
+ *
+ * This structure is not directly based on any hardware structure. The
+ * hardware identifies a device by its device number and its subchannel,
+ * which is in turn identified by its id. In order to get a unique identifier
+ * for ccw devices across subchannel sets, @struct ccw_dev_id has been
+ * introduced.
+ */
+struct ccw_dev_id {
+	u8 ssid;
+	u16 devno;
+};
+
+/**
+ * ccw_dev_id_is_equal() - compare two ccw_dev_ids
+ * @dev_id1: a ccw_dev_id
+ * @dev_id2: another ccw_dev_id
+ * Returns:
+ *  %1 if the two structures are equal field-by-field,
+ *  %0 if not.
+ * Context:
+ *  any
+ */
+static inline int ccw_dev_id_is_equal(struct ccw_dev_id *dev_id1,
+				      struct ccw_dev_id *dev_id2)
+{
+	if ((dev_id1->ssid == dev_id2->ssid) &&
+	    (dev_id1->devno == dev_id2->devno))
+		return 1;
+	return 0;
+}
+
+/**
+ * pathmask_to_pos() - find the position of the left-most bit in a pathmask
+ * @mask: pathmask with at least one bit set
+ */
+static inline u8 pathmask_to_pos(u8 mask)
+{
+	return 8 - ffs(mask);
+}
+
+extern void css_schedule_reprobe(void);
+
+extern void *cio_dma_zalloc(size_t size);
+extern void cio_dma_free(void *cpu_addr, size_t size);
+extern struct device *cio_get_dma_css_dev(void);
+
+void *cio_gp_dma_zalloc(struct gen_pool *gp_dma, struct device *dma_dev,
+			size_t size);
+void cio_gp_dma_free(struct gen_pool *gp_dma, void *cpu_addr, size_t size);
+void cio_gp_dma_destroy(struct gen_pool *gp_dma, struct device *dma_dev);
+struct gen_pool *cio_gp_dma_create(struct device *dma_dev, int nr_pages);
+
+/* Function from drivers/s390/cio/chsc.c */
+int chsc_sstpc(void *page, unsigned int op, u16 ctrl, long *clock_delta);
+int chsc_sstpi(void *page, void *result, size_t size);
+int chsc_stzi(void *page, void *result, size_t size);
+int chsc_sgib(u32 origin);
+int chsc_scud(u16 cu, u64 *esm, u8 *esm_valid);
+
+#endif
diff --git a/arch/s390/include/asm/clocksource.h b/arch/s390/include/asm/clocksource.h
new file mode 100644
index 0000000000..03434369fc
--- /dev/null
+++ b/arch/s390/include/asm/clocksource.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* s390-specific clocksource additions */
+
+#ifndef _ASM_S390_CLOCKSOURCE_H
+#define _ASM_S390_CLOCKSOURCE_H
+
+#endif /* _ASM_S390_CLOCKSOURCE_H */
diff --git a/arch/s390/include/asm/clp.h b/arch/s390/include/asm/clp.h
new file mode 100644
index 0000000000..10919eeb75
--- /dev/null
+++ b/arch/s390/include/asm/clp.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_CLP_H
+#define _ASM_S390_CLP_H
+
+/* CLP common request & response block size */
+#define CLP_BLK_SIZE			PAGE_SIZE
+
+/* Call Logical Processor - Command Code */
+#define CLP_SLPC		0x0001
+
+#define CLP_LPS_BASE	0
+#define CLP_LPS_PCI	2
+
+struct clp_req_hdr {
+	u16 len;
+	u16 cmd;
+	u32 fmt		: 4;
+	u32 reserved1	: 28;
+	u64 reserved2;
+} __packed;
+
+struct clp_rsp_hdr {
+	u16 len;
+	u16 rsp;
+	u32 fmt		: 4;
+	u32 reserved1	: 28;
+	u64 reserved2;
+} __packed;
+
+/* CLP Response Codes */
+#define CLP_RC_OK			0x0010	/* Command request successfully */
+#define CLP_RC_CMD			0x0020	/* Command code not recognized */
+#define CLP_RC_PERM			0x0030	/* Command not authorized */
+#define CLP_RC_FMT			0x0040	/* Invalid command request format */
+#define CLP_RC_LEN			0x0050	/* Invalid command request length */
+#define CLP_RC_8K			0x0060	/* Command requires 8K LPCB */
+#define CLP_RC_RESNOT0			0x0070	/* Reserved field not zero */
+#define CLP_RC_NODATA			0x0080	/* No data available */
+#define CLP_RC_FC_UNKNOWN		0x0100	/* Function code not recognized */
+
+/* Store logical-processor characteristics request */
+struct clp_req_slpc {
+	struct clp_req_hdr hdr;
+} __packed;
+
+struct clp_rsp_slpc {
+	struct clp_rsp_hdr hdr;
+	u32 reserved2[4];
+	u32 lpif[8];
+	u32 reserved3[8];
+	u32 lpic[8];
+} __packed;
+
+struct clp_req_rsp_slpc {
+	struct clp_req_slpc request;
+	struct clp_rsp_slpc response;
+} __packed;
+
+#endif
diff --git a/arch/s390/include/asm/cmb.h b/arch/s390/include/asm/cmb.h
new file mode 100644
index 0000000000..599594c372
--- /dev/null
+++ b/arch/s390/include/asm/cmb.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef S390_CMB_H
+#define S390_CMB_H
+
+#include <uapi/asm/cmb.h>
+
+struct ccw_device;
+extern int enable_cmf(struct ccw_device *cdev);
+extern int disable_cmf(struct ccw_device *cdev);
+extern int __disable_cmf(struct ccw_device *cdev);
+extern u64 cmf_read(struct ccw_device *cdev, int index);
+extern int cmf_readall(struct ccw_device *cdev, struct cmbdata *data);
+
+#endif /* S390_CMB_H */
diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h
new file mode 100644
index 0000000000..aae0315374
--- /dev/null
+++ b/arch/s390/include/asm/cmpxchg.h
@@ -0,0 +1,207 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 1999, 2011
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ */
+
+#ifndef __ASM_CMPXCHG_H
+#define __ASM_CMPXCHG_H
+
+#include <linux/mmdebug.h>
+#include <linux/types.h>
+#include <linux/bug.h>
+
+void __xchg_called_with_bad_pointer(void);
+
+static __always_inline unsigned long
+__arch_xchg(unsigned long x, unsigned long address, int size)
+{
+	unsigned long old;
+	int shift;
+
+	switch (size) {
+	case 1:
+		shift = (3 ^ (address & 3)) << 3;
+		address ^= address & 3;
+		asm volatile(
+			"       l       %0,%1\n"
+			"0:     lr      0,%0\n"
+			"       nr      0,%3\n"
+			"       or      0,%2\n"
+			"       cs      %0,0,%1\n"
+			"       jl      0b\n"
+			: "=&d" (old), "+Q" (*(int *) address)
+			: "d" ((x & 0xff) << shift), "d" (~(0xff << shift))
+			: "memory", "cc", "0");
+		return old >> shift;
+	case 2:
+		shift = (2 ^ (address & 2)) << 3;
+		address ^= address & 2;
+		asm volatile(
+			"       l       %0,%1\n"
+			"0:     lr      0,%0\n"
+			"       nr      0,%3\n"
+			"       or      0,%2\n"
+			"       cs      %0,0,%1\n"
+			"       jl      0b\n"
+			: "=&d" (old), "+Q" (*(int *) address)
+			: "d" ((x & 0xffff) << shift), "d" (~(0xffff << shift))
+			: "memory", "cc", "0");
+		return old >> shift;
+	case 4:
+		asm volatile(
+			"       l       %0,%1\n"
+			"0:     cs      %0,%2,%1\n"
+			"       jl      0b\n"
+			: "=&d" (old), "+Q" (*(int *) address)
+			: "d" (x)
+			: "memory", "cc");
+		return old;
+	case 8:
+		asm volatile(
+			"       lg      %0,%1\n"
+			"0:     csg     %0,%2,%1\n"
+			"       jl      0b\n"
+			: "=&d" (old), "+QS" (*(long *) address)
+			: "d" (x)
+			: "memory", "cc");
+		return old;
+	}
+	__xchg_called_with_bad_pointer();
+	return x;
+}
+
+#define arch_xchg(ptr, x)						\
+({									\
+	__typeof__(*(ptr)) __ret;					\
+									\
+	__ret = (__typeof__(*(ptr)))					\
+		__arch_xchg((unsigned long)(x), (unsigned long)(ptr),	\
+			    sizeof(*(ptr)));				\
+	__ret;								\
+})
+
+void __cmpxchg_called_with_bad_pointer(void);
+
+static __always_inline unsigned long __cmpxchg(unsigned long address,
+					       unsigned long old,
+					       unsigned long new, int size)
+{
+	switch (size) {
+	case 1: {
+		unsigned int prev, shift, mask;
+
+		shift = (3 ^ (address & 3)) << 3;
+		address ^= address & 3;
+		old = (old & 0xff) << shift;
+		new = (new & 0xff) << shift;
+		mask = ~(0xff << shift);
+		asm volatile(
+			"	l	%[prev],%[address]\n"
+			"	nr	%[prev],%[mask]\n"
+			"	xilf	%[mask],0xffffffff\n"
+			"	or	%[new],%[prev]\n"
+			"	or	%[prev],%[tmp]\n"
+			"0:	lr	%[tmp],%[prev]\n"
+			"	cs	%[prev],%[new],%[address]\n"
+			"	jnl	1f\n"
+			"	xr	%[tmp],%[prev]\n"
+			"	xr	%[new],%[tmp]\n"
+			"	nr	%[tmp],%[mask]\n"
+			"	jz	0b\n"
+			"1:"
+			: [prev] "=&d" (prev),
+			  [address] "+Q" (*(int *)address),
+			  [tmp] "+&d" (old),
+			  [new] "+&d" (new),
+			  [mask] "+&d" (mask)
+			:: "memory", "cc");
+		return prev >> shift;
+	}
+	case 2: {
+		unsigned int prev, shift, mask;
+
+		shift = (2 ^ (address & 2)) << 3;
+		address ^= address & 2;
+		old = (old & 0xffff) << shift;
+		new = (new & 0xffff) << shift;
+		mask = ~(0xffff << shift);
+		asm volatile(
+			"	l	%[prev],%[address]\n"
+			"	nr	%[prev],%[mask]\n"
+			"	xilf	%[mask],0xffffffff\n"
+			"	or	%[new],%[prev]\n"
+			"	or	%[prev],%[tmp]\n"
+			"0:	lr	%[tmp],%[prev]\n"
+			"	cs	%[prev],%[new],%[address]\n"
+			"	jnl	1f\n"
+			"	xr	%[tmp],%[prev]\n"
+			"	xr	%[new],%[tmp]\n"
+			"	nr	%[tmp],%[mask]\n"
+			"	jz	0b\n"
+			"1:"
+			: [prev] "=&d" (prev),
+			  [address] "+Q" (*(int *)address),
+			  [tmp] "+&d" (old),
+			  [new] "+&d" (new),
+			  [mask] "+&d" (mask)
+			:: "memory", "cc");
+		return prev >> shift;
+	}
+	case 4: {
+		unsigned int prev = old;
+
+		asm volatile(
+			"	cs	%[prev],%[new],%[address]\n"
+			: [prev] "+&d" (prev),
+			  [address] "+Q" (*(int *)address)
+			: [new] "d" (new)
+			: "memory", "cc");
+		return prev;
+	}
+	case 8: {
+		unsigned long prev = old;
+
+		asm volatile(
+			"	csg	%[prev],%[new],%[address]\n"
+			: [prev] "+&d" (prev),
+			  [address] "+QS" (*(long *)address)
+			: [new] "d" (new)
+			: "memory", "cc");
+		return prev;
+	}
+	}
+	__cmpxchg_called_with_bad_pointer();
+	return old;
+}
+
+#define arch_cmpxchg(ptr, o, n)						\
+({									\
+	__typeof__(*(ptr)) __ret;					\
+									\
+	__ret = (__typeof__(*(ptr)))					\
+		__cmpxchg((unsigned long)(ptr), (unsigned long)(o),	\
+			  (unsigned long)(n), sizeof(*(ptr)));		\
+	__ret;								\
+})
+
+#define arch_cmpxchg64		arch_cmpxchg
+#define arch_cmpxchg_local	arch_cmpxchg
+#define arch_cmpxchg64_local	arch_cmpxchg
+
+#define system_has_cmpxchg128()		1
+
+static __always_inline u128 arch_cmpxchg128(volatile u128 *ptr, u128 old, u128 new)
+{
+	asm volatile(
+		"	cdsg	%[old],%[new],%[ptr]\n"
+		: [old] "+d" (old), [ptr] "+QS" (*ptr)
+		: [new] "d" (new)
+		: "memory", "cc");
+	return old;
+}
+
+#define arch_cmpxchg128		arch_cmpxchg128
+
+#endif /* __ASM_CMPXCHG_H */
diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
new file mode 100644
index 0000000000..3cb9d813f0
--- /dev/null
+++ b/arch/s390/include/asm/compat.h
@@ -0,0 +1,140 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390X_COMPAT_H
+#define _ASM_S390X_COMPAT_H
+/*
+ * Architecture specific compatibility types
+ */
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+#include <linux/thread_info.h>
+#include <asm/ptrace.h>
+
+#define compat_mode_t	compat_mode_t
+typedef u16		compat_mode_t;
+
+#define __compat_uid_t	__compat_uid_t
+typedef u16		__compat_uid_t;
+typedef u16		__compat_gid_t;
+
+#define compat_dev_t	compat_dev_t
+typedef u16		compat_dev_t;
+
+#define compat_ipc_pid_t compat_ipc_pid_t
+typedef u16		 compat_ipc_pid_t;
+
+#define compat_statfs	compat_statfs
+
+#include <asm-generic/compat.h>
+
+#define __TYPE_IS_PTR(t) (!__builtin_types_compatible_p( \
+				typeof(0?(__force t)0:0ULL), u64))
+
+#define __SC_DELOUSE(t,v) ({ \
+	BUILD_BUG_ON(sizeof(t) > 4 && !__TYPE_IS_PTR(t)); \
+	(__force t)(__TYPE_IS_PTR(t) ? ((v) & 0x7fffffff) : (v)); \
+})
+
+#define PSW32_MASK_USER		0x0000FF00UL
+
+#define PSW32_USER_BITS (PSW32_MASK_DAT | PSW32_MASK_IO | PSW32_MASK_EXT | \
+			 PSW32_DEFAULT_KEY | PSW32_MASK_BASE | \
+			 PSW32_MASK_MCHECK | PSW32_MASK_PSTATE | \
+			 PSW32_ASC_PRIMARY)
+
+#define COMPAT_UTS_MACHINE	"s390\0\0\0\0"
+
+typedef u16		compat_nlink_t;
+
+typedef struct {
+	u32 mask;
+	u32 addr;
+} __aligned(8) psw_compat_t;
+
+typedef struct {
+	psw_compat_t psw;
+	u32 gprs[NUM_GPRS];
+	u32 acrs[NUM_ACRS];
+	u32 orig_gpr2;
+} s390_compat_regs;
+
+typedef struct {
+	u32 gprs_high[NUM_GPRS];
+} s390_compat_regs_high;
+
+struct compat_stat {
+	compat_dev_t	st_dev;
+	u16		__pad1;
+	compat_ino_t	st_ino;
+	compat_mode_t	st_mode;
+	compat_nlink_t	st_nlink;
+	__compat_uid_t	st_uid;
+	__compat_gid_t	st_gid;
+	compat_dev_t	st_rdev;
+	u16		__pad2;
+	u32		st_size;
+	u32		st_blksize;
+	u32		st_blocks;
+	u32		st_atime;
+	u32		st_atime_nsec;
+	u32		st_mtime;
+	u32		st_mtime_nsec;
+	u32		st_ctime;
+	u32		st_ctime_nsec;
+	u32		__unused4;
+	u32		__unused5;
+};
+
+struct compat_statfs {
+	u32		f_type;
+	u32		f_bsize;
+	u32		f_blocks;
+	u32		f_bfree;
+	u32		f_bavail;
+	u32		f_files;
+	u32		f_ffree;
+	compat_fsid_t	f_fsid;
+	u32		f_namelen;
+	u32		f_frsize;
+	u32		f_flags;
+	u32		f_spare[4];
+};
+
+struct compat_statfs64 {
+	u32		f_type;
+	u32		f_bsize;
+	u64		f_blocks;
+	u64		f_bfree;
+	u64		f_bavail;
+	u64		f_files;
+	u64		f_ffree;
+	compat_fsid_t	f_fsid;
+	u32		f_namelen;
+	u32		f_frsize;
+	u32		f_flags;
+	u32		f_spare[5];
+};
+
+/*
+ * A pointer passed in from user mode. This should not
+ * be used for syscall parameters, just declare them
+ * as pointers because the syscall entry code will have
+ * appropriately converted them already.
+ */
+
+static inline void __user *compat_ptr(compat_uptr_t uptr)
+{
+	return (void __user *)(unsigned long)(uptr & 0x7fffffffUL);
+}
+#define compat_ptr(uptr) compat_ptr(uptr)
+
+#ifdef CONFIG_COMPAT
+
+static inline int is_compat_task(void)
+{
+	return test_thread_flag(TIF_31BIT);
+}
+
+#endif
+
+#endif /* _ASM_S390X_COMPAT_H */
diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h
new file mode 100644
index 0000000000..b378e2b57a
--- /dev/null
+++ b/arch/s390/include/asm/cpacf.h
@@ -0,0 +1,551 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * CP Assist for Cryptographic Functions (CPACF)
+ *
+ * Copyright IBM Corp. 2003, 2023
+ * Author(s): Thomas Spatzier
+ *	      Jan Glauber
+ *	      Harald Freudenberger (freude@de.ibm.com)
+ *	      Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+#ifndef _ASM_S390_CPACF_H
+#define _ASM_S390_CPACF_H
+
+#include <asm/facility.h>
+
+/*
+ * Instruction opcodes for the CPACF instructions
+ */
+#define CPACF_KMAC		0xb91e		/* MSA	*/
+#define CPACF_KM		0xb92e		/* MSA	*/
+#define CPACF_KMC		0xb92f		/* MSA	*/
+#define CPACF_KIMD		0xb93e		/* MSA	*/
+#define CPACF_KLMD		0xb93f		/* MSA	*/
+#define CPACF_PCKMO		0xb928		/* MSA3 */
+#define CPACF_KMF		0xb92a		/* MSA4 */
+#define CPACF_KMO		0xb92b		/* MSA4 */
+#define CPACF_PCC		0xb92c		/* MSA4 */
+#define CPACF_KMCTR		0xb92d		/* MSA4 */
+#define CPACF_PRNO		0xb93c		/* MSA5 */
+#define CPACF_KMA		0xb929		/* MSA8 */
+#define CPACF_KDSA		0xb93a		/* MSA9 */
+
+/*
+ * En/decryption modifier bits
+ */
+#define CPACF_ENCRYPT		0x00
+#define CPACF_DECRYPT		0x80
+
+/*
+ * Function codes for the KM (CIPHER MESSAGE) instruction
+ */
+#define CPACF_KM_QUERY		0x00
+#define CPACF_KM_DEA		0x01
+#define CPACF_KM_TDEA_128	0x02
+#define CPACF_KM_TDEA_192	0x03
+#define CPACF_KM_AES_128	0x12
+#define CPACF_KM_AES_192	0x13
+#define CPACF_KM_AES_256	0x14
+#define CPACF_KM_PAES_128	0x1a
+#define CPACF_KM_PAES_192	0x1b
+#define CPACF_KM_PAES_256	0x1c
+#define CPACF_KM_XTS_128	0x32
+#define CPACF_KM_XTS_256	0x34
+#define CPACF_KM_PXTS_128	0x3a
+#define CPACF_KM_PXTS_256	0x3c
+
+/*
+ * Function codes for the KMC (CIPHER MESSAGE WITH CHAINING)
+ * instruction
+ */
+#define CPACF_KMC_QUERY		0x00
+#define CPACF_KMC_DEA		0x01
+#define CPACF_KMC_TDEA_128	0x02
+#define CPACF_KMC_TDEA_192	0x03
+#define CPACF_KMC_AES_128	0x12
+#define CPACF_KMC_AES_192	0x13
+#define CPACF_KMC_AES_256	0x14
+#define CPACF_KMC_PAES_128	0x1a
+#define CPACF_KMC_PAES_192	0x1b
+#define CPACF_KMC_PAES_256	0x1c
+#define CPACF_KMC_PRNG		0x43
+
+/*
+ * Function codes for the KMCTR (CIPHER MESSAGE WITH COUNTER)
+ * instruction
+ */
+#define CPACF_KMCTR_QUERY	0x00
+#define CPACF_KMCTR_DEA		0x01
+#define CPACF_KMCTR_TDEA_128	0x02
+#define CPACF_KMCTR_TDEA_192	0x03
+#define CPACF_KMCTR_AES_128	0x12
+#define CPACF_KMCTR_AES_192	0x13
+#define CPACF_KMCTR_AES_256	0x14
+#define CPACF_KMCTR_PAES_128	0x1a
+#define CPACF_KMCTR_PAES_192	0x1b
+#define CPACF_KMCTR_PAES_256	0x1c
+
+/*
+ * Function codes for the KIMD (COMPUTE INTERMEDIATE MESSAGE DIGEST)
+ * instruction
+ */
+#define CPACF_KIMD_QUERY	0x00
+#define CPACF_KIMD_SHA_1	0x01
+#define CPACF_KIMD_SHA_256	0x02
+#define CPACF_KIMD_SHA_512	0x03
+#define CPACF_KIMD_SHA3_224	0x20
+#define CPACF_KIMD_SHA3_256	0x21
+#define CPACF_KIMD_SHA3_384	0x22
+#define CPACF_KIMD_SHA3_512	0x23
+#define CPACF_KIMD_GHASH	0x41
+
+/*
+ * Function codes for the KLMD (COMPUTE LAST MESSAGE DIGEST)
+ * instruction
+ */
+#define CPACF_KLMD_QUERY	0x00
+#define CPACF_KLMD_SHA_1	0x01
+#define CPACF_KLMD_SHA_256	0x02
+#define CPACF_KLMD_SHA_512	0x03
+#define CPACF_KLMD_SHA3_224	0x20
+#define CPACF_KLMD_SHA3_256	0x21
+#define CPACF_KLMD_SHA3_384	0x22
+#define CPACF_KLMD_SHA3_512	0x23
+
+/*
+ * function codes for the KMAC (COMPUTE MESSAGE AUTHENTICATION CODE)
+ * instruction
+ */
+#define CPACF_KMAC_QUERY	0x00
+#define CPACF_KMAC_DEA		0x01
+#define CPACF_KMAC_TDEA_128	0x02
+#define CPACF_KMAC_TDEA_192	0x03
+
+/*
+ * Function codes for the PCKMO (PERFORM CRYPTOGRAPHIC KEY MANAGEMENT)
+ * instruction
+ */
+#define CPACF_PCKMO_QUERY		0x00
+#define CPACF_PCKMO_ENC_DES_KEY		0x01
+#define CPACF_PCKMO_ENC_TDES_128_KEY	0x02
+#define CPACF_PCKMO_ENC_TDES_192_KEY	0x03
+#define CPACF_PCKMO_ENC_AES_128_KEY	0x12
+#define CPACF_PCKMO_ENC_AES_192_KEY	0x13
+#define CPACF_PCKMO_ENC_AES_256_KEY	0x14
+#define CPACF_PCKMO_ENC_ECC_P256_KEY	0x20
+#define CPACF_PCKMO_ENC_ECC_P384_KEY	0x21
+#define CPACF_PCKMO_ENC_ECC_P521_KEY	0x22
+#define CPACF_PCKMO_ENC_ECC_ED25519_KEY	0x28
+#define CPACF_PCKMO_ENC_ECC_ED448_KEY	0x29
+
+/*
+ * Function codes for the PRNO (PERFORM RANDOM NUMBER OPERATION)
+ * instruction
+ */
+#define CPACF_PRNO_QUERY		0x00
+#define CPACF_PRNO_SHA512_DRNG_GEN	0x03
+#define CPACF_PRNO_SHA512_DRNG_SEED	0x83
+#define CPACF_PRNO_TRNG_Q_R2C_RATIO	0x70
+#define CPACF_PRNO_TRNG			0x72
+
+/*
+ * Function codes for the KMA (CIPHER MESSAGE WITH AUTHENTICATION)
+ * instruction
+ */
+#define CPACF_KMA_QUERY		0x00
+#define CPACF_KMA_GCM_AES_128	0x12
+#define CPACF_KMA_GCM_AES_192	0x13
+#define CPACF_KMA_GCM_AES_256	0x14
+
+/*
+ * Flags for the KMA (CIPHER MESSAGE WITH AUTHENTICATION) instruction
+ */
+#define CPACF_KMA_LPC	0x100	/* Last-Plaintext/Ciphertext */
+#define CPACF_KMA_LAAD	0x200	/* Last-AAD */
+#define CPACF_KMA_HS	0x400	/* Hash-subkey Supplied */
+
+typedef struct { unsigned char bytes[16]; } cpacf_mask_t;
+
+/**
+ * cpacf_query() - check if a specific CPACF function is available
+ * @opcode: the opcode of the crypto instruction
+ * @func: the function code to test for
+ *
+ * Executes the query function for the given crypto instruction @opcode
+ * and checks if @func is available
+ *
+ * Returns 1 if @func is available for @opcode, 0 otherwise
+ */
+static __always_inline void __cpacf_query(unsigned int opcode, cpacf_mask_t *mask)
+{
+	asm volatile(
+		"	lghi	0,0\n" /* query function */
+		"	lgr	1,%[mask]\n"
+		"	spm	0\n" /* pckmo doesn't change the cc */
+		/* Parameter regs are ignored, but must be nonzero and unique */
+		"0:	.insn	rrf,%[opc] << 16,2,4,6,0\n"
+		"	brc	1,0b\n"	/* handle partial completion */
+		: "=m" (*mask)
+		: [mask] "d" ((unsigned long)mask), [opc] "i" (opcode)
+		: "cc", "0", "1");
+}
+
+static __always_inline int __cpacf_check_opcode(unsigned int opcode)
+{
+	switch (opcode) {
+	case CPACF_KMAC:
+	case CPACF_KM:
+	case CPACF_KMC:
+	case CPACF_KIMD:
+	case CPACF_KLMD:
+		return test_facility(17);	/* check for MSA */
+	case CPACF_PCKMO:
+		return test_facility(76);	/* check for MSA3 */
+	case CPACF_KMF:
+	case CPACF_KMO:
+	case CPACF_PCC:
+	case CPACF_KMCTR:
+		return test_facility(77);	/* check for MSA4 */
+	case CPACF_PRNO:
+		return test_facility(57);	/* check for MSA5 */
+	case CPACF_KMA:
+		return test_facility(146);	/* check for MSA8 */
+	default:
+		BUG();
+	}
+}
+
+static __always_inline int cpacf_query(unsigned int opcode, cpacf_mask_t *mask)
+{
+	if (__cpacf_check_opcode(opcode)) {
+		__cpacf_query(opcode, mask);
+		return 1;
+	}
+	memset(mask, 0, sizeof(*mask));
+	return 0;
+}
+
+static inline int cpacf_test_func(cpacf_mask_t *mask, unsigned int func)
+{
+	return (mask->bytes[func >> 3] & (0x80 >> (func & 7))) != 0;
+}
+
+static __always_inline int cpacf_query_func(unsigned int opcode, unsigned int func)
+{
+	cpacf_mask_t mask;
+
+	if (cpacf_query(opcode, &mask))
+		return cpacf_test_func(&mask, func);
+	return 0;
+}
+
+/**
+ * cpacf_km() - executes the KM (CIPHER MESSAGE) instruction
+ * @func: the function code passed to KM; see CPACF_KM_xxx defines
+ * @param: address of parameter block; see POP for details on each func
+ * @dest: address of destination memory area
+ * @src: address of source memory area
+ * @src_len: length of src operand in bytes
+ *
+ * Returns 0 for the query func, number of processed bytes for
+ * encryption/decryption funcs
+ */
+static inline int cpacf_km(unsigned long func, void *param,
+			   u8 *dest, const u8 *src, long src_len)
+{
+	union register_pair d, s;
+
+	d.even = (unsigned long)dest;
+	s.even = (unsigned long)src;
+	s.odd  = (unsigned long)src_len;
+	asm volatile(
+		"	lgr	0,%[fc]\n"
+		"	lgr	1,%[pba]\n"
+		"0:	.insn	rre,%[opc] << 16,%[dst],%[src]\n"
+		"	brc	1,0b\n" /* handle partial completion */
+		: [src] "+&d" (s.pair), [dst] "+&d" (d.pair)
+		: [fc] "d" (func), [pba] "d" ((unsigned long)param),
+		  [opc] "i" (CPACF_KM)
+		: "cc", "memory", "0", "1");
+
+	return src_len - s.odd;
+}
+
+/**
+ * cpacf_kmc() - executes the KMC (CIPHER MESSAGE WITH CHAINING) instruction
+ * @func: the function code passed to KM; see CPACF_KMC_xxx defines
+ * @param: address of parameter block; see POP for details on each func
+ * @dest: address of destination memory area
+ * @src: address of source memory area
+ * @src_len: length of src operand in bytes
+ *
+ * Returns 0 for the query func, number of processed bytes for
+ * encryption/decryption funcs
+ */
+static inline int cpacf_kmc(unsigned long func, void *param,
+			    u8 *dest, const u8 *src, long src_len)
+{
+	union register_pair d, s;
+
+	d.even = (unsigned long)dest;
+	s.even = (unsigned long)src;
+	s.odd  = (unsigned long)src_len;
+	asm volatile(
+		"	lgr	0,%[fc]\n"
+		"	lgr	1,%[pba]\n"
+		"0:	.insn	rre,%[opc] << 16,%[dst],%[src]\n"
+		"	brc	1,0b\n" /* handle partial completion */
+		: [src] "+&d" (s.pair), [dst] "+&d" (d.pair)
+		: [fc] "d" (func), [pba] "d" ((unsigned long)param),
+		  [opc] "i" (CPACF_KMC)
+		: "cc", "memory", "0", "1");
+
+	return src_len - s.odd;
+}
+
+/**
+ * cpacf_kimd() - executes the KIMD (COMPUTE INTERMEDIATE MESSAGE DIGEST)
+ *		  instruction
+ * @func: the function code passed to KM; see CPACF_KIMD_xxx defines
+ * @param: address of parameter block; see POP for details on each func
+ * @src: address of source memory area
+ * @src_len: length of src operand in bytes
+ */
+static inline void cpacf_kimd(unsigned long func, void *param,
+			      const u8 *src, long src_len)
+{
+	union register_pair s;
+
+	s.even = (unsigned long)src;
+	s.odd  = (unsigned long)src_len;
+	asm volatile(
+		"	lgr	0,%[fc]\n"
+		"	lgr	1,%[pba]\n"
+		"0:	.insn	rre,%[opc] << 16,0,%[src]\n"
+		"	brc	1,0b\n" /* handle partial completion */
+		: [src] "+&d" (s.pair)
+		: [fc] "d" (func), [pba] "d" ((unsigned long)(param)),
+		  [opc] "i" (CPACF_KIMD)
+		: "cc", "memory", "0", "1");
+}
+
+/**
+ * cpacf_klmd() - executes the KLMD (COMPUTE LAST MESSAGE DIGEST) instruction
+ * @func: the function code passed to KM; see CPACF_KLMD_xxx defines
+ * @param: address of parameter block; see POP for details on each func
+ * @src: address of source memory area
+ * @src_len: length of src operand in bytes
+ */
+static inline void cpacf_klmd(unsigned long func, void *param,
+			      const u8 *src, long src_len)
+{
+	union register_pair s;
+
+	s.even = (unsigned long)src;
+	s.odd  = (unsigned long)src_len;
+	asm volatile(
+		"	lgr	0,%[fc]\n"
+		"	lgr	1,%[pba]\n"
+		"0:	.insn	rre,%[opc] << 16,0,%[src]\n"
+		"	brc	1,0b\n" /* handle partial completion */
+		: [src] "+&d" (s.pair)
+		: [fc] "d" (func), [pba] "d" ((unsigned long)param),
+		  [opc] "i" (CPACF_KLMD)
+		: "cc", "memory", "0", "1");
+}
+
+/**
+ * cpacf_kmac() - executes the KMAC (COMPUTE MESSAGE AUTHENTICATION CODE)
+ *		  instruction
+ * @func: the function code passed to KM; see CPACF_KMAC_xxx defines
+ * @param: address of parameter block; see POP for details on each func
+ * @src: address of source memory area
+ * @src_len: length of src operand in bytes
+ *
+ * Returns 0 for the query func, number of processed bytes for digest funcs
+ */
+static inline int cpacf_kmac(unsigned long func, void *param,
+			     const u8 *src, long src_len)
+{
+	union register_pair s;
+
+	s.even = (unsigned long)src;
+	s.odd  = (unsigned long)src_len;
+	asm volatile(
+		"	lgr	0,%[fc]\n"
+		"	lgr	1,%[pba]\n"
+		"0:	.insn	rre,%[opc] << 16,0,%[src]\n"
+		"	brc	1,0b\n" /* handle partial completion */
+		: [src] "+&d" (s.pair)
+		: [fc] "d" (func), [pba] "d" ((unsigned long)param),
+		  [opc] "i" (CPACF_KMAC)
+		: "cc", "memory", "0", "1");
+
+	return src_len - s.odd;
+}
+
+/**
+ * cpacf_kmctr() - executes the KMCTR (CIPHER MESSAGE WITH COUNTER) instruction
+ * @func: the function code passed to KMCTR; see CPACF_KMCTR_xxx defines
+ * @param: address of parameter block; see POP for details on each func
+ * @dest: address of destination memory area
+ * @src: address of source memory area
+ * @src_len: length of src operand in bytes
+ * @counter: address of counter value
+ *
+ * Returns 0 for the query func, number of processed bytes for
+ * encryption/decryption funcs
+ */
+static inline int cpacf_kmctr(unsigned long func, void *param, u8 *dest,
+			      const u8 *src, long src_len, u8 *counter)
+{
+	union register_pair d, s, c;
+
+	d.even = (unsigned long)dest;
+	s.even = (unsigned long)src;
+	s.odd  = (unsigned long)src_len;
+	c.even = (unsigned long)counter;
+	asm volatile(
+		"	lgr	0,%[fc]\n"
+		"	lgr	1,%[pba]\n"
+		"0:	.insn	rrf,%[opc] << 16,%[dst],%[src],%[ctr],0\n"
+		"	brc	1,0b\n" /* handle partial completion */
+		: [src] "+&d" (s.pair), [dst] "+&d" (d.pair),
+		  [ctr] "+&d" (c.pair)
+		: [fc] "d" (func), [pba] "d" ((unsigned long)param),
+		  [opc] "i" (CPACF_KMCTR)
+		: "cc", "memory", "0", "1");
+
+	return src_len - s.odd;
+}
+
+/**
+ * cpacf_prno() - executes the PRNO (PERFORM RANDOM NUMBER OPERATION)
+ *		  instruction
+ * @func: the function code passed to PRNO; see CPACF_PRNO_xxx defines
+ * @param: address of parameter block; see POP for details on each func
+ * @dest: address of destination memory area
+ * @dest_len: size of destination memory area in bytes
+ * @seed: address of seed data
+ * @seed_len: size of seed data in bytes
+ */
+static inline void cpacf_prno(unsigned long func, void *param,
+			      u8 *dest, unsigned long dest_len,
+			      const u8 *seed, unsigned long seed_len)
+{
+	union register_pair d, s;
+
+	d.even = (unsigned long)dest;
+	d.odd  = (unsigned long)dest_len;
+	s.even = (unsigned long)seed;
+	s.odd  = (unsigned long)seed_len;
+	asm volatile (
+		"	lgr	0,%[fc]\n"
+		"	lgr	1,%[pba]\n"
+		"0:	.insn	rre,%[opc] << 16,%[dst],%[seed]\n"
+		"	brc	1,0b\n"	  /* handle partial completion */
+		: [dst] "+&d" (d.pair)
+		: [fc] "d" (func), [pba] "d" ((unsigned long)param),
+		  [seed] "d" (s.pair), [opc] "i" (CPACF_PRNO)
+		: "cc", "memory", "0", "1");
+}
+
+/**
+ * cpacf_trng() - executes the TRNG subfunction of the PRNO instruction
+ * @ucbuf: buffer for unconditioned data
+ * @ucbuf_len: amount of unconditioned data to fetch in bytes
+ * @cbuf: buffer for conditioned data
+ * @cbuf_len: amount of conditioned data to fetch in bytes
+ */
+static inline void cpacf_trng(u8 *ucbuf, unsigned long ucbuf_len,
+			      u8 *cbuf, unsigned long cbuf_len)
+{
+	union register_pair u, c;
+
+	u.even = (unsigned long)ucbuf;
+	u.odd  = (unsigned long)ucbuf_len;
+	c.even = (unsigned long)cbuf;
+	c.odd  = (unsigned long)cbuf_len;
+	asm volatile (
+		"	lghi	0,%[fc]\n"
+		"0:	.insn	rre,%[opc] << 16,%[ucbuf],%[cbuf]\n"
+		"	brc	1,0b\n"	  /* handle partial completion */
+		: [ucbuf] "+&d" (u.pair), [cbuf] "+&d" (c.pair)
+		: [fc] "K" (CPACF_PRNO_TRNG), [opc] "i" (CPACF_PRNO)
+		: "cc", "memory", "0");
+}
+
+/**
+ * cpacf_pcc() - executes the PCC (PERFORM CRYPTOGRAPHIC COMPUTATION)
+ *		 instruction
+ * @func: the function code passed to PCC; see CPACF_KM_xxx defines
+ * @param: address of parameter block; see POP for details on each func
+ */
+static inline void cpacf_pcc(unsigned long func, void *param)
+{
+	asm volatile(
+		"	lgr	0,%[fc]\n"
+		"	lgr	1,%[pba]\n"
+		"0:	.insn	rre,%[opc] << 16,0,0\n" /* PCC opcode */
+		"	brc	1,0b\n" /* handle partial completion */
+		:
+		: [fc] "d" (func), [pba] "d" ((unsigned long)param),
+		  [opc] "i" (CPACF_PCC)
+		: "cc", "memory", "0", "1");
+}
+
+/**
+ * cpacf_pckmo() - executes the PCKMO (PERFORM CRYPTOGRAPHIC KEY
+ *		  MANAGEMENT) instruction
+ * @func: the function code passed to PCKMO; see CPACF_PCKMO_xxx defines
+ * @param: address of parameter block; see POP for details on each func
+ *
+ * Returns 0.
+ */
+static inline void cpacf_pckmo(long func, void *param)
+{
+	asm volatile(
+		"	lgr	0,%[fc]\n"
+		"	lgr	1,%[pba]\n"
+		"       .insn   rre,%[opc] << 16,0,0\n" /* PCKMO opcode */
+		:
+		: [fc] "d" (func), [pba] "d" ((unsigned long)param),
+		  [opc] "i" (CPACF_PCKMO)
+		: "cc", "memory", "0", "1");
+}
+
+/**
+ * cpacf_kma() - executes the KMA (CIPHER MESSAGE WITH AUTHENTICATION)
+ *		 instruction
+ * @func: the function code passed to KMA; see CPACF_KMA_xxx defines
+ * @param: address of parameter block; see POP for details on each func
+ * @dest: address of destination memory area
+ * @src: address of source memory area
+ * @src_len: length of src operand in bytes
+ * @aad: address of additional authenticated data memory area
+ * @aad_len: length of aad operand in bytes
+ */
+static inline void cpacf_kma(unsigned long func, void *param, u8 *dest,
+			     const u8 *src, unsigned long src_len,
+			     const u8 *aad, unsigned long aad_len)
+{
+	union register_pair d, s, a;
+
+	d.even = (unsigned long)dest;
+	s.even = (unsigned long)src;
+	s.odd  = (unsigned long)src_len;
+	a.even = (unsigned long)aad;
+	a.odd  = (unsigned long)aad_len;
+	asm volatile(
+		"	lgr	0,%[fc]\n"
+		"	lgr	1,%[pba]\n"
+		"0:	.insn	rrf,%[opc] << 16,%[dst],%[src],%[aad],0\n"
+		"	brc	1,0b\n"	/* handle partial completion */
+		: [dst] "+&d" (d.pair), [src] "+&d" (s.pair),
+		  [aad] "+&d" (a.pair)
+		: [fc] "d" (func), [pba] "d" ((unsigned long)param),
+		  [opc] "i" (CPACF_KMA)
+		: "cc", "memory", "0", "1");
+}
+
+#endif	/* _ASM_S390_CPACF_H */
diff --git a/arch/s390/include/asm/cpcmd.h b/arch/s390/include/asm/cpcmd.h
new file mode 100644
index 0000000000..c3c993abe9
--- /dev/null
+++ b/arch/s390/include/asm/cpcmd.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ *               Christian Borntraeger (cborntra@de.ibm.com),
+ */
+
+#ifndef _ASM_S390_CPCMD_H
+#define _ASM_S390_CPCMD_H
+
+/*
+ * the lowlevel function for cpcmd
+ */
+int __cpcmd(const char *cmd, char *response, int rlen, int *response_code);
+
+/*
+ * cpcmd is the in-kernel interface for issuing CP commands
+ *
+ * cmd:		null-terminated command string, max 240 characters
+ * response:	response buffer for VM's textual response
+ * rlen:	size of the response buffer, cpcmd will not exceed this size
+ *		but will cap the output, if its too large. Everything that
+ *		did not fit into the buffer will be silently dropped
+ * response_code: return pointer for VM's error code
+ * return value: the size of the response. The caller can check if the buffer
+ *		was large enough by comparing the return value and rlen
+ * NOTE: If the response buffer is not in real storage, cpcmd can sleep
+ */
+int cpcmd(const char *cmd, char *response, int rlen, int *response_code);
+
+#endif /* _ASM_S390_CPCMD_H */
diff --git a/arch/s390/include/asm/cpu.h b/arch/s390/include/asm/cpu.h
new file mode 100644
index 0000000000..26c710cd34
--- /dev/null
+++ b/arch/s390/include/asm/cpu.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *    Copyright IBM Corp. 2000, 2009
+ *    Author(s): Hartmut Penner <hp@de.ibm.com>,
+ *		 Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ *		 Christian Ehrhardt <ehrhardt@de.ibm.com>,
+ */
+
+#ifndef _ASM_S390_CPU_H
+#define _ASM_S390_CPU_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+#include <linux/jump_label.h>
+
+struct cpuid
+{
+	unsigned int version :	8;
+	unsigned int ident   : 24;
+	unsigned int machine : 16;
+	unsigned int unused  : 16;
+} __attribute__ ((packed, aligned(8)));
+
+DECLARE_STATIC_KEY_FALSE(cpu_has_bear);
+
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_S390_CPU_H */
diff --git a/arch/s390/include/asm/cpu_mf-insn.h b/arch/s390/include/asm/cpu_mf-insn.h
new file mode 100644
index 0000000000..a68b362e09
--- /dev/null
+++ b/arch/s390/include/asm/cpu_mf-insn.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Support for CPU-MF instructions
+ *
+ * Copyright IBM Corp. 2019
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+#ifndef _ASM_S390_CPU_MF_INSN_H
+#define _ASM_S390_CPU_MF_INSN_H
+
+#ifdef __ASSEMBLY__
+
+/* Macro to generate the STCCTM instruction with a customized
+ * M3 field designating the counter set.
+ */
+.macro	STCCTM	r1 m3 db2
+	.insn	rsy,0xeb0000000017,\r1,\m3 & 0xf,\db2
+.endm
+
+#endif /* __ASSEMBLY__ */
+
+#endif
diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
new file mode 100644
index 0000000000..a0de5b9b02
--- /dev/null
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -0,0 +1,277 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * CPU-measurement facilities
+ *
+ *  Copyright IBM Corp. 2012, 2018
+ *  Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ *	       Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+#ifndef _ASM_S390_CPU_MF_H
+#define _ASM_S390_CPU_MF_H
+
+#include <linux/errno.h>
+#include <asm/asm-extable.h>
+#include <asm/facility.h>
+
+asm(".include \"asm/cpu_mf-insn.h\"\n");
+
+#define CPU_MF_INT_SF_IAE	(1 << 31)	/* invalid entry address */
+#define CPU_MF_INT_SF_ISE	(1 << 30)	/* incorrect SDBT entry */
+#define CPU_MF_INT_SF_PRA	(1 << 29)	/* program request alert */
+#define CPU_MF_INT_SF_SACA	(1 << 23)	/* sampler auth. change alert */
+#define CPU_MF_INT_SF_LSDA	(1 << 22)	/* loss of sample data alert */
+#define CPU_MF_INT_CF_MTDA	(1 << 15)	/* loss of MT ctr. data alert */
+#define CPU_MF_INT_CF_CACA	(1 <<  7)	/* counter auth. change alert */
+#define CPU_MF_INT_CF_LCDA	(1 <<  6)	/* loss of counter data alert */
+#define CPU_MF_INT_CF_MASK	(CPU_MF_INT_CF_MTDA|CPU_MF_INT_CF_CACA| \
+				 CPU_MF_INT_CF_LCDA)
+#define CPU_MF_INT_SF_MASK	(CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE|	\
+				 CPU_MF_INT_SF_PRA|CPU_MF_INT_SF_SACA|	\
+				 CPU_MF_INT_SF_LSDA)
+
+#define CPU_MF_SF_RIBM_NOTAV	0x1		/* Sampling unavailable */
+
+/* CPU measurement facility support */
+static inline int cpum_cf_avail(void)
+{
+	return test_facility(40) && test_facility(67);
+}
+
+static inline int cpum_sf_avail(void)
+{
+	return test_facility(40) && test_facility(68);
+}
+
+struct cpumf_ctr_info {
+	u16   cfvn;
+	u16   auth_ctl;
+	u16   enable_ctl;
+	u16   act_ctl;
+	u16   max_cpu;
+	u16   csvn;
+	u16   max_cg;
+	u16   reserved1;
+	u32   reserved2[12];
+} __packed;
+
+/* QUERY SAMPLING INFORMATION block */
+struct hws_qsi_info_block {	    /* Bit(s) */
+	unsigned int b0_13:14;	    /* 0-13: zeros			 */
+	unsigned int as:1;	    /* 14: basic-sampling authorization	 */
+	unsigned int ad:1;	    /* 15: diag-sampling authorization	 */
+	unsigned int b16_21:6;	    /* 16-21: zeros			 */
+	unsigned int es:1;	    /* 22: basic-sampling enable control */
+	unsigned int ed:1;	    /* 23: diag-sampling enable control	 */
+	unsigned int b24_29:6;	    /* 24-29: zeros			 */
+	unsigned int cs:1;	    /* 30: basic-sampling activation control */
+	unsigned int cd:1;	    /* 31: diag-sampling activation control */
+	unsigned int bsdes:16;	    /* 4-5: size of basic sampling entry */
+	unsigned int dsdes:16;	    /* 6-7: size of diagnostic sampling entry */
+	unsigned long min_sampl_rate; /* 8-15: minimum sampling interval */
+	unsigned long max_sampl_rate; /* 16-23: maximum sampling interval*/
+	unsigned long tear;	    /* 24-31: TEAR contents		 */
+	unsigned long dear;	    /* 32-39: DEAR contents		 */
+	unsigned int rsvrd0:24;	    /* 40-42: reserved			 */
+	unsigned int ribm:8;	    /* 43: Reserved by IBM		 */
+	unsigned int cpu_speed;     /* 44-47: CPU speed			 */
+	unsigned long long rsvrd1;  /* 48-55: reserved			 */
+	unsigned long long rsvrd2;  /* 56-63: reserved			 */
+} __packed;
+
+/* SET SAMPLING CONTROLS request block */
+struct hws_lsctl_request_block {
+	unsigned int s:1;	    /* 0: maximum buffer indicator	 */
+	unsigned int h:1;	    /* 1: part. level reserved for VM use*/
+	unsigned long long b2_53:52;/* 2-53: zeros			 */
+	unsigned int es:1;	    /* 54: basic-sampling enable control */
+	unsigned int ed:1;	    /* 55: diag-sampling enable control	 */
+	unsigned int b56_61:6;	    /* 56-61: - zeros			 */
+	unsigned int cs:1;	    /* 62: basic-sampling activation control */
+	unsigned int cd:1;	    /* 63: diag-sampling activation control  */
+	unsigned long interval;     /* 8-15: sampling interval		 */
+	unsigned long tear;	    /* 16-23: TEAR contents		 */
+	unsigned long dear;	    /* 24-31: DEAR contents		 */
+	/* 32-63:							 */
+	unsigned long rsvrd1;	    /* reserved				 */
+	unsigned long rsvrd2;	    /* reserved				 */
+	unsigned long rsvrd3;	    /* reserved				 */
+	unsigned long rsvrd4;	    /* reserved				 */
+} __packed;
+
+struct hws_basic_entry {
+	unsigned int def:16;	    /* 0-15  Data Entry Format		 */
+	unsigned int R:4;	    /* 16-19 reserved			 */
+	unsigned int U:4;	    /* 20-23 Number of unique instruct.  */
+	unsigned int z:2;	    /* zeros				 */
+	unsigned int T:1;	    /* 26 PSW DAT mode			 */
+	unsigned int W:1;	    /* 27 PSW wait state		 */
+	unsigned int P:1;	    /* 28 PSW Problem state		 */
+	unsigned int AS:2;	    /* 29-30 PSW address-space control	 */
+	unsigned int I:1;	    /* 31 entry valid or invalid	 */
+	unsigned int CL:2;	    /* 32-33 Configuration Level	 */
+	unsigned int H:1;	    /* 34 Host Indicator		 */
+	unsigned int LS:1;	    /* 35 Limited Sampling		 */
+	unsigned int:12;
+	unsigned int prim_asn:16;   /* primary ASN			 */
+	unsigned long long ia;	    /* Instruction Address		 */
+	unsigned long long gpp;     /* Guest Program Parameter		 */
+	unsigned long long hpp;     /* Host Program Parameter		 */
+} __packed;
+
+struct hws_diag_entry {
+	unsigned int def:16;	    /* 0-15  Data Entry Format		 */
+	unsigned int R:15;	    /* 16-19 and 20-30 reserved		 */
+	unsigned int I:1;	    /* 31 entry valid or invalid	 */
+	u8	     data[];	    /* Machine-dependent sample data	 */
+} __packed;
+
+struct hws_combined_entry {
+	struct hws_basic_entry	basic;	/* Basic-sampling data entry */
+	struct hws_diag_entry	diag;	/* Diagnostic-sampling data entry */
+} __packed;
+
+union hws_trailer_header {
+	struct {
+		unsigned int f:1;	/* 0 - Block Full Indicator   */
+		unsigned int a:1;	/* 1 - Alert request control  */
+		unsigned int t:1;	/* 2 - Timestamp format	      */
+		unsigned int :29;	/* 3 - 31: Reserved	      */
+		unsigned int bsdes:16;	/* 32-47: size of basic SDE   */
+		unsigned int dsdes:16;	/* 48-63: size of diagnostic SDE */
+		unsigned long long overflow; /* 64 - Overflow Count   */
+	};
+	u128 val;
+};
+
+struct hws_trailer_entry {
+	union hws_trailer_header header; /* 0 - 15 Flags + Overflow Count     */
+	unsigned char timestamp[16];	 /* 16 - 31 timestamp		      */
+	unsigned long long reserved1;	 /* 32 -Reserved		      */
+	unsigned long long reserved2;	 /*				      */
+	union {				 /* 48 - reserved for programming use */
+		struct {
+			unsigned int clock_base:1; /* in progusage2 */
+			unsigned long long progusage1:63;
+			unsigned long long progusage2;
+		};
+		unsigned long long progusage[2];
+	};
+} __packed;
+
+/* Load program parameter */
+static inline void lpp(void *pp)
+{
+	asm volatile("lpp 0(%0)\n" :: "a" (pp) : "memory");
+}
+
+/* Query counter information */
+static inline int qctri(struct cpumf_ctr_info *info)
+{
+	int rc = -EINVAL;
+
+	asm volatile (
+		"0:	qctri	%1\n"
+		"1:	lhi	%0,0\n"
+		"2:\n"
+		EX_TABLE(1b, 2b)
+		: "+d" (rc), "=Q" (*info));
+	return rc;
+}
+
+/* Load CPU-counter-set controls */
+static inline int lcctl(u64 ctl)
+{
+	int cc;
+
+	asm volatile (
+		"	lcctl	%1\n"
+		"	ipm	%0\n"
+		"	srl	%0,28\n"
+		: "=d" (cc) : "Q" (ctl) : "cc");
+	return cc;
+}
+
+/* Extract CPU counter */
+static inline int __ecctr(u64 ctr, u64 *content)
+{
+	u64 _content;
+	int cc;
+
+	asm volatile (
+		"	ecctr	%0,%2\n"
+		"	ipm	%1\n"
+		"	srl	%1,28\n"
+		: "=d" (_content), "=d" (cc) : "d" (ctr) : "cc");
+	*content = _content;
+	return cc;
+}
+
+/* Extract CPU counter */
+static inline int ecctr(u64 ctr, u64 *val)
+{
+	u64 content;
+	int cc;
+
+	cc = __ecctr(ctr, &content);
+	if (!cc)
+		*val = content;
+	return cc;
+}
+
+/* Store CPU counter multiple for a particular counter set */
+enum stcctm_ctr_set {
+	EXTENDED = 0,
+	BASIC = 1,
+	PROBLEM_STATE = 2,
+	CRYPTO_ACTIVITY = 3,
+	MT_DIAG = 5,
+	MT_DIAG_CLEARING = 9,	/* clears loss-of-MT-ctr-data alert */
+};
+
+static __always_inline int stcctm(enum stcctm_ctr_set set, u64 range, u64 *dest)
+{
+	int cc;
+
+	asm volatile (
+		"	STCCTM	%2,%3,%1\n"
+		"	ipm	%0\n"
+		"	srl	%0,28\n"
+		: "=d" (cc)
+		: "Q" (*dest), "d" (range), "i" (set)
+		: "cc", "memory");
+	return cc;
+}
+
+/* Query sampling information */
+static inline int qsi(struct hws_qsi_info_block *info)
+{
+	int cc = 1;
+
+	asm volatile(
+		"0:	qsi	%1\n"
+		"1:	lhi	%0,0\n"
+		"2:\n"
+		EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
+		: "+d" (cc), "+Q" (*info));
+	return cc ? -EINVAL : 0;
+}
+
+/* Load sampling controls */
+static inline int lsctl(struct hws_lsctl_request_block *req)
+{
+	int cc;
+
+	cc = 1;
+	asm volatile(
+		"0:	lsctl	0(%1)\n"
+		"1:	ipm	%0\n"
+		"	srl	%0,28\n"
+		"2:\n"
+		EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
+		: "+d" (cc), "+a" (req)
+		: "m" (*req)
+		: "cc", "memory");
+
+	return cc ? -EINVAL : 0;
+}
+#endif /* _ASM_S390_CPU_MF_H */
diff --git a/arch/s390/include/asm/cpufeature.h b/arch/s390/include/asm/cpufeature.h
new file mode 100644
index 0000000000..9312046137
--- /dev/null
+++ b/arch/s390/include/asm/cpufeature.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Module interface for CPU features
+ *
+ * Copyright IBM Corp. 2015, 2022
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef __ASM_S390_CPUFEATURE_H
+#define __ASM_S390_CPUFEATURE_H
+
+enum {
+	S390_CPU_FEATURE_MSA,
+	S390_CPU_FEATURE_VXRS,
+	S390_CPU_FEATURE_UV,
+	MAX_CPU_FEATURES
+};
+
+#define cpu_feature(feature)	(feature)
+
+int cpu_have_feature(unsigned int nr);
+
+#endif /* __ASM_S390_CPUFEATURE_H */
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
new file mode 100644
index 0000000000..30bb3ec4e5
--- /dev/null
+++ b/arch/s390/include/asm/cputime.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  Copyright IBM Corp. 2004
+ *
+ *  Author: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef _S390_CPUTIME_H
+#define _S390_CPUTIME_H
+
+#include <linux/types.h>
+#include <asm/timex.h>
+
+/*
+ * Convert cputime to nanoseconds.
+ */
+#define cputime_to_nsecs(cputime) tod_to_ns(cputime)
+
+void account_idle_time_irq(void);
+
+#endif /* _S390_CPUTIME_H */
diff --git a/arch/s390/include/asm/crw.h b/arch/s390/include/asm/crw.h
new file mode 100644
index 0000000000..97456d98fe
--- /dev/null
+++ b/arch/s390/include/asm/crw.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *   Data definitions for channel report processing
+ *    Copyright IBM Corp. 2000, 2009
+ *    Author(s): Ingo Adlung <adlung@de.ibm.com>,
+ *		 Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ *		 Cornelia Huck <cornelia.huck@de.ibm.com>,
+ */
+
+#ifndef _ASM_S390_CRW_H
+#define _ASM_S390_CRW_H
+
+#include <linux/types.h>
+
+/*
+ * Channel Report Word
+ */
+struct crw {
+	__u32 res1 :  1;   /* reserved zero */
+	__u32 slct :  1;   /* solicited */
+	__u32 oflw :  1;   /* overflow */
+	__u32 chn  :  1;   /* chained */
+	__u32 rsc  :  4;   /* reporting source code */
+	__u32 anc  :  1;   /* ancillary report */
+	__u32 res2 :  1;   /* reserved zero */
+	__u32 erc  :  6;   /* error-recovery code */
+	__u32 rsid : 16;   /* reporting-source ID */
+} __attribute__ ((packed));
+
+typedef void (*crw_handler_t)(struct crw *, struct crw *, int);
+
+extern int crw_register_handler(int rsc, crw_handler_t handler);
+extern void crw_unregister_handler(int rsc);
+extern void crw_handle_channel_report(void);
+void crw_wait_for_channel_report(void);
+
+#define NR_RSCS 16
+
+#define CRW_RSC_MONITOR  0x2  /* monitoring facility */
+#define CRW_RSC_SCH	 0x3  /* subchannel */
+#define CRW_RSC_CPATH	 0x4  /* channel path */
+#define CRW_RSC_CONFIG	 0x9  /* configuration-alert facility */
+#define CRW_RSC_CSS	 0xB  /* channel subsystem */
+
+#define CRW_ERC_EVENT	 0x00 /* event information pending */
+#define CRW_ERC_AVAIL	 0x01 /* available */
+#define CRW_ERC_INIT	 0x02 /* initialized */
+#define CRW_ERC_TERROR	 0x03 /* temporary error */
+#define CRW_ERC_IPARM	 0x04 /* installed parm initialized */
+#define CRW_ERC_TERM	 0x05 /* terminal */
+#define CRW_ERC_PERRN	 0x06 /* perm. error, fac. not init */
+#define CRW_ERC_PERRI	 0x07 /* perm. error, facility init */
+#define CRW_ERC_PMOD	 0x08 /* installed parameters modified */
+
+#endif /* _ASM_S390_CRW_H */
diff --git a/arch/s390/include/asm/css_chars.h b/arch/s390/include/asm/css_chars.h
new file mode 100644
index 0000000000..638137d46c
--- /dev/null
+++ b/arch/s390/include/asm/css_chars.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_CSS_CHARS_H
+#define _ASM_CSS_CHARS_H
+
+#include <linux/types.h>
+
+struct css_general_char {
+	u64 : 12;
+	u64 dynio : 1;	 /* bit 12 */
+	u64 : 4;
+	u64 eadm : 1;	 /* bit 17 */
+	u64 : 23;
+	u64 aif : 1;	 /* bit 41 */
+	u64 : 3;
+	u64 mcss : 1;	 /* bit 45 */
+	u64 fcs : 1;	 /* bit 46 */
+	u64 : 1;
+	u64 ext_mb : 1;  /* bit 48 */
+	u64 : 7;
+	u64 aif_tdd : 1; /* bit 56 */
+	u64 : 1;
+	u64 qebsm : 1;	 /* bit 58 */
+	u64 : 2;
+	u64 aiv : 1;	 /* bit 61 */
+	u64 : 2;
+
+	u64 : 3;
+	u64 aif_osa : 1; /* bit 67 */
+	u64 : 12;
+	u64 eadm_rf : 1; /* bit 80 */
+	u64 : 1;
+	u64 cib : 1;	 /* bit 82 */
+	u64 : 5;
+	u64 fcx : 1;	 /* bit 88 */
+	u64 : 19;
+	u64 alt_ssi : 1; /* bit 108 */
+	u64 : 1;
+	u64 narf : 1;	 /* bit 110 */
+	u64 : 5;
+	u64 enarf: 1;	 /* bit 116 */
+	u64 : 6;
+	u64 util_str : 1;/* bit 123 */
+} __packed;
+
+extern struct css_general_char css_general_characteristics;
+
+#endif
diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
new file mode 100644
index 0000000000..adf7d8cdac
--- /dev/null
+++ b/arch/s390/include/asm/ctl_reg.h
@@ -0,0 +1,146 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 1999, 2009
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef __ASM_CTL_REG_H
+#define __ASM_CTL_REG_H
+
+#include <linux/bits.h>
+
+#define CR0_CLOCK_COMPARATOR_SIGN	BIT(63 - 10)
+#define CR0_LOW_ADDRESS_PROTECTION	BIT(63 - 35)
+#define CR0_FETCH_PROTECTION_OVERRIDE	BIT(63 - 38)
+#define CR0_STORAGE_PROTECTION_OVERRIDE	BIT(63 - 39)
+#define CR0_EMERGENCY_SIGNAL_SUBMASK	BIT(63 - 49)
+#define CR0_EXTERNAL_CALL_SUBMASK	BIT(63 - 50)
+#define CR0_CLOCK_COMPARATOR_SUBMASK	BIT(63 - 52)
+#define CR0_CPU_TIMER_SUBMASK		BIT(63 - 53)
+#define CR0_SERVICE_SIGNAL_SUBMASK	BIT(63 - 54)
+#define CR0_UNUSED_56			BIT(63 - 56)
+#define CR0_INTERRUPT_KEY_SUBMASK	BIT(63 - 57)
+#define CR0_MEASUREMENT_ALERT_SUBMASK	BIT(63 - 58)
+
+#define CR14_UNUSED_32			BIT(63 - 32)
+#define CR14_UNUSED_33			BIT(63 - 33)
+#define CR14_CHANNEL_REPORT_SUBMASK	BIT(63 - 35)
+#define CR14_RECOVERY_SUBMASK		BIT(63 - 36)
+#define CR14_DEGRADATION_SUBMASK	BIT(63 - 37)
+#define CR14_EXTERNAL_DAMAGE_SUBMASK	BIT(63 - 38)
+#define CR14_WARNING_SUBMASK		BIT(63 - 39)
+
+#ifndef __ASSEMBLY__
+
+#include <linux/bug.h>
+
+#define __ctl_load(array, low, high) do {				\
+	typedef struct { char _[sizeof(array)]; } addrtype;		\
+									\
+	BUILD_BUG_ON(sizeof(addrtype) != (high - low + 1) * sizeof(long));\
+	asm volatile(							\
+		"	lctlg	%1,%2,%0\n"				\
+		:							\
+		: "Q" (*(addrtype *)(&array)), "i" (low), "i" (high)	\
+		: "memory");						\
+} while (0)
+
+#define __ctl_store(array, low, high) do {				\
+	typedef struct { char _[sizeof(array)]; } addrtype;		\
+									\
+	BUILD_BUG_ON(sizeof(addrtype) != (high - low + 1) * sizeof(long));\
+	asm volatile(							\
+		"	stctg	%1,%2,%0\n"				\
+		: "=Q" (*(addrtype *)(&array))				\
+		: "i" (low), "i" (high));				\
+} while (0)
+
+static __always_inline void __ctl_set_bit(unsigned int cr, unsigned int bit)
+{
+	unsigned long reg;
+
+	__ctl_store(reg, cr, cr);
+	reg |= 1UL << bit;
+	__ctl_load(reg, cr, cr);
+}
+
+static __always_inline void __ctl_clear_bit(unsigned int cr, unsigned int bit)
+{
+	unsigned long reg;
+
+	__ctl_store(reg, cr, cr);
+	reg &= ~(1UL << bit);
+	__ctl_load(reg, cr, cr);
+}
+
+void smp_ctl_set_clear_bit(int cr, int bit, bool set);
+
+static inline void ctl_set_bit(int cr, int bit)
+{
+	smp_ctl_set_clear_bit(cr, bit, true);
+}
+
+static inline void ctl_clear_bit(int cr, int bit)
+{
+	smp_ctl_set_clear_bit(cr, bit, false);
+}
+
+union ctlreg0 {
+	unsigned long val;
+	struct {
+		unsigned long	   : 8;
+		unsigned long tcx  : 1;	/* Transactional-Execution control */
+		unsigned long pifo : 1;	/* Transactional-Execution Program-
+					   Interruption-Filtering Override */
+		unsigned long	   : 3;
+		unsigned long ccc  : 1; /* Cryptography counter control */
+		unsigned long pec  : 1; /* PAI extension control */
+		unsigned long	   : 17;
+		unsigned long	   : 3;
+		unsigned long lap  : 1; /* Low-address-protection control */
+		unsigned long	   : 4;
+		unsigned long edat : 1; /* Enhanced-DAT-enablement control */
+		unsigned long	   : 2;
+		unsigned long iep  : 1; /* Instruction-Execution-Protection */
+		unsigned long	   : 1;
+		unsigned long afp  : 1; /* AFP-register control */
+		unsigned long vx   : 1; /* Vector enablement control */
+		unsigned long	   : 7;
+		unsigned long sssm : 1; /* Service signal subclass mask */
+		unsigned long	   : 9;
+	};
+};
+
+union ctlreg2 {
+	unsigned long val;
+	struct {
+		unsigned long	    : 33;
+		unsigned long ducto : 25;
+		unsigned long	    : 1;
+		unsigned long gse   : 1;
+		unsigned long	    : 1;
+		unsigned long tds   : 1;
+		unsigned long tdc   : 2;
+	};
+};
+
+union ctlreg5 {
+	unsigned long val;
+	struct {
+		unsigned long	    : 33;
+		unsigned long pasteo: 25;
+		unsigned long	    : 6;
+	};
+};
+
+union ctlreg15 {
+	unsigned long val;
+	struct {
+		unsigned long lsea  : 61;
+		unsigned long	    : 3;
+	};
+};
+
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_CTL_REG_H */
diff --git a/arch/s390/include/asm/current.h b/arch/s390/include/asm/current.h
new file mode 100644
index 0000000000..68f8431527
--- /dev/null
+++ b/arch/s390/include/asm/current.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ *  Derived from "include/asm-i386/current.h"
+ */
+
+#ifndef _S390_CURRENT_H
+#define _S390_CURRENT_H
+
+#include <asm/lowcore.h>
+
+struct task_struct;
+
+#define current ((struct task_struct *const)S390_lowcore.current_task)
+
+#endif /* !(_S390_CURRENT_H) */
diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h
new file mode 100644
index 0000000000..ccd4e148b5
--- /dev/null
+++ b/arch/s390/include/asm/debug.h
@@ -0,0 +1,490 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *   S/390 debug facility
+ *
+ *    Copyright IBM Corp. 1999, 2020
+ */
+#ifndef _ASM_S390_DEBUG_H
+#define _ASM_S390_DEBUG_H
+
+#include <linux/string.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/time.h>
+#include <linux/refcount.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+
+#define DEBUG_MAX_LEVEL		   6  /* debug levels range from 0 to 6 */
+#define DEBUG_OFF_LEVEL		   -1 /* level where debug is switched off */
+#define DEBUG_FLUSH_ALL		   -1 /* parameter to flush all areas */
+#define DEBUG_MAX_VIEWS		   10 /* max number of views in proc fs */
+#define DEBUG_MAX_NAME_LEN	   64 /* max length for a debugfs file name */
+#define DEBUG_DEFAULT_LEVEL	   3  /* initial debug level */
+
+#define DEBUG_DIR_ROOT "s390dbf" /* name of debug root directory in proc fs */
+
+#define DEBUG_DATA(entry) (char *)(entry + 1) /* data is stored behind */
+					      /* the entry information */
+
+#define __DEBUG_FEATURE_VERSION	   3  /* version of debug feature */
+
+struct __debug_entry {
+	unsigned long clock	: 60;
+	unsigned long exception	:  1;
+	unsigned long level	:  3;
+	void *caller;
+	unsigned short cpu;
+} __packed;
+
+typedef struct __debug_entry debug_entry_t;
+
+struct debug_view;
+
+typedef struct debug_info {
+	struct debug_info *next;
+	struct debug_info *prev;
+	refcount_t ref_count;
+	spinlock_t lock;
+	int level;
+	int nr_areas;
+	int pages_per_area;
+	int buf_size;
+	int entry_size;
+	debug_entry_t ***areas;
+	int active_area;
+	int *active_pages;
+	int *active_entries;
+	struct dentry *debugfs_root_entry;
+	struct dentry *debugfs_entries[DEBUG_MAX_VIEWS];
+	struct debug_view *views[DEBUG_MAX_VIEWS];
+	char name[DEBUG_MAX_NAME_LEN];
+	umode_t mode;
+} debug_info_t;
+
+typedef int (debug_header_proc_t) (debug_info_t *id,
+				   struct debug_view *view,
+				   int area,
+				   debug_entry_t *entry,
+				   char *out_buf);
+
+typedef int (debug_format_proc_t) (debug_info_t *id,
+				   struct debug_view *view, char *out_buf,
+				   const char *in_buf);
+typedef int (debug_prolog_proc_t) (debug_info_t *id,
+				   struct debug_view *view,
+				   char *out_buf);
+typedef int (debug_input_proc_t) (debug_info_t *id,
+				  struct debug_view *view,
+				  struct file *file,
+				  const char __user *user_buf,
+				  size_t in_buf_size, loff_t *offset);
+
+int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view,
+			 int area, debug_entry_t *entry, char *out_buf);
+
+struct debug_view {
+	char name[DEBUG_MAX_NAME_LEN];
+	debug_prolog_proc_t *prolog_proc;
+	debug_header_proc_t *header_proc;
+	debug_format_proc_t *format_proc;
+	debug_input_proc_t  *input_proc;
+	void		    *private_data;
+};
+
+extern struct debug_view debug_hex_ascii_view;
+extern struct debug_view debug_sprintf_view;
+
+/* do NOT use the _common functions */
+
+debug_entry_t *debug_event_common(debug_info_t *id, int level,
+				  const void *data, int length);
+
+debug_entry_t *debug_exception_common(debug_info_t *id, int level,
+				      const void *data, int length);
+
+/* Debug Feature API: */
+
+debug_info_t *debug_register(const char *name, int pages, int nr_areas,
+			     int buf_size);
+
+debug_info_t *debug_register_mode(const char *name, int pages, int nr_areas,
+				  int buf_size, umode_t mode, uid_t uid,
+				  gid_t gid);
+
+void debug_unregister(debug_info_t *id);
+
+void debug_set_level(debug_info_t *id, int new_level);
+
+void debug_set_critical(void);
+
+void debug_stop_all(void);
+
+/**
+ * debug_level_enabled() - Returns true if debug events for the specified
+ *			   level would be logged. Otherwise returns false.
+ *
+ * @id:		handle for debug log
+ * @level:	debug level
+ *
+ * Return:
+ * - %true if level is less or equal to the current debug level.
+ */
+static inline bool debug_level_enabled(debug_info_t *id, int level)
+{
+	return level <= id->level;
+}
+
+/**
+ * debug_event() - writes binary debug entry to active debug area
+ *		   (if level <= actual debug level)
+ *
+ * @id:		handle for debug log
+ * @level:	debug level
+ * @data:	pointer to data for debug entry
+ * @length:	length of data in bytes
+ *
+ * Return:
+ * - Address of written debug entry
+ * - %NULL if error
+ */
+static inline debug_entry_t *debug_event(debug_info_t *id, int level,
+					 void *data, int length)
+{
+	if ((!id) || (level > id->level) || (id->pages_per_area == 0))
+		return NULL;
+	return debug_event_common(id, level, data, length);
+}
+
+/**
+ * debug_int_event() - writes unsigned integer debug entry to active debug area
+ *		       (if level <= actual debug level)
+ *
+ * @id:		handle for debug log
+ * @level:	debug level
+ * @tag:	integer value for debug entry
+ *
+ * Return:
+ * - Address of written debug entry
+ * - %NULL if error
+ */
+static inline debug_entry_t *debug_int_event(debug_info_t *id, int level,
+					     unsigned int tag)
+{
+	unsigned int t = tag;
+
+	if ((!id) || (level > id->level) || (id->pages_per_area == 0))
+		return NULL;
+	return debug_event_common(id, level, &t, sizeof(unsigned int));
+}
+
+/**
+ * debug_long_event() - writes unsigned long debug entry to active debug area
+ *		       (if level <= actual debug level)
+ *
+ * @id:		handle for debug log
+ * @level:	debug level
+ * @tag:	long integer value for debug entry
+ *
+ * Return:
+ * - Address of written debug entry
+ * - %NULL if error
+ */
+static inline debug_entry_t *debug_long_event(debug_info_t *id, int level,
+					      unsigned long tag)
+{
+	unsigned long t = tag;
+
+	if ((!id) || (level > id->level) || (id->pages_per_area == 0))
+		return NULL;
+	return debug_event_common(id, level, &t, sizeof(unsigned long));
+}
+
+/**
+ * debug_text_event() - writes string debug entry in ascii format to active
+ *			debug area (if level <= actual debug level)
+ *
+ * @id:		handle for debug log
+ * @level:	debug level
+ * @txt:	string for debug entry
+ *
+ * Return:
+ * - Address of written debug entry
+ * - %NULL if error
+ */
+static inline debug_entry_t *debug_text_event(debug_info_t *id, int level,
+					      const char *txt)
+{
+	if ((!id) || (level > id->level) || (id->pages_per_area == 0))
+		return NULL;
+	return debug_event_common(id, level, txt, strlen(txt));
+}
+
+/*
+ * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are
+ * stored in the s390dbf. See Documentation/arch/s390/s390dbf.rst for more details!
+ */
+extern debug_entry_t *
+__debug_sprintf_event(debug_info_t *id, int level, char *string, ...)
+	__attribute__ ((format(printf, 3, 4)));
+
+/**
+ * debug_sprintf_event() - writes debug entry with format string
+ *			   and varargs (longs) to active debug area
+ *			   (if level $<=$ actual debug level).
+ *
+ * @_id:	handle for debug log
+ * @_level:	debug level
+ * @_fmt:	format string for debug entry
+ * @...:	varargs used as in sprintf()
+ *
+ * Return:
+ * - Address of written debug entry
+ * - %NULL if error
+ *
+ * floats and long long datatypes cannot be used as varargs.
+ */
+#define debug_sprintf_event(_id, _level, _fmt, ...)			\
+({									\
+	debug_entry_t *__ret;						\
+	debug_info_t *__id = _id;					\
+	int __level = _level;						\
+									\
+	if ((!__id) || (__level > __id->level))				\
+		__ret = NULL;						\
+	else								\
+		__ret = __debug_sprintf_event(__id, __level,		\
+					      _fmt, ## __VA_ARGS__);	\
+	__ret;								\
+})
+
+/**
+ * debug_exception() - writes binary debug entry to active debug area
+ *		       (if level <= actual debug level)
+ *		       and switches to next debug area
+ *
+ * @id:		handle for debug log
+ * @level:	debug level
+ * @data:	pointer to data for debug entry
+ * @length:	length of data in bytes
+ *
+ * Return:
+ * - Address of written debug entry
+ * - %NULL if error
+ */
+static inline debug_entry_t *debug_exception(debug_info_t *id, int level,
+					     void *data, int length)
+{
+	if ((!id) || (level > id->level) || (id->pages_per_area == 0))
+		return NULL;
+	return debug_exception_common(id, level, data, length);
+}
+
+/**
+ * debug_int_exception() - writes unsigned int debug entry to active debug area
+ *			   (if level <= actual debug level)
+ *			   and switches to next debug area
+ *
+ * @id:		handle for debug log
+ * @level:	debug level
+ * @tag:	integer value for debug entry
+ *
+ * Return:
+ * - Address of written debug entry
+ * - %NULL if error
+ */
+static inline debug_entry_t *debug_int_exception(debug_info_t *id, int level,
+						 unsigned int tag)
+{
+	unsigned int t = tag;
+
+	if ((!id) || (level > id->level) || (id->pages_per_area == 0))
+		return NULL;
+	return debug_exception_common(id, level, &t, sizeof(unsigned int));
+}
+
+/**
+ * debug_long_exception() - writes long debug entry to active debug area
+ *			   (if level <= actual debug level)
+ *			   and switches to next debug area
+ *
+ * @id:		handle for debug log
+ * @level:	debug level
+ * @tag:	long integer value for debug entry
+ *
+ * Return:
+ * - Address of written debug entry
+ * - %NULL if error
+ */
+static inline debug_entry_t *debug_long_exception (debug_info_t *id, int level,
+						   unsigned long tag)
+{
+	unsigned long t = tag;
+
+	if ((!id) || (level > id->level) || (id->pages_per_area == 0))
+		return NULL;
+	return debug_exception_common(id, level, &t, sizeof(unsigned long));
+}
+
+/**
+ * debug_text_exception() - writes string debug entry in ascii format to active
+ *			    debug area (if level <= actual debug level)
+ *			    and switches to next debug area
+ * area
+ *
+ * @id:	handle for debug log
+ * @level:	debug level
+ * @txt:	string for debug entry
+ *
+ * Return:
+ * - Address of written debug entry
+ * - %NULL if error
+ */
+static inline debug_entry_t *debug_text_exception(debug_info_t *id, int level,
+						  const char *txt)
+{
+	if ((!id) || (level > id->level) || (id->pages_per_area == 0))
+		return NULL;
+	return debug_exception_common(id, level, txt, strlen(txt));
+}
+
+/*
+ * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are
+ * stored in the s390dbf. See Documentation/arch/s390/s390dbf.rst for more details!
+ */
+extern debug_entry_t *
+__debug_sprintf_exception(debug_info_t *id, int level, char *string, ...)
+	__attribute__ ((format(printf, 3, 4)));
+
+
+/**
+ * debug_sprintf_exception() - writes debug entry with format string and
+ *			       varargs (longs) to active debug area
+ *			       (if level <= actual debug level)
+ *			       and switches to next debug area.
+ *
+ * @_id:	handle for debug log
+ * @_level:	debug level
+ * @_fmt:	format string for debug entry
+ * @...:	varargs used as in sprintf()
+ *
+ * Return:
+ * - Address of written debug entry
+ * - %NULL if error
+ *
+ * floats and long long datatypes cannot be used as varargs.
+ */
+#define debug_sprintf_exception(_id, _level, _fmt, ...)			\
+({									\
+	debug_entry_t *__ret;						\
+	debug_info_t *__id = _id;					\
+	int __level = _level;						\
+									\
+	if ((!__id) || (__level > __id->level))				\
+		__ret = NULL;						\
+	else								\
+		__ret = __debug_sprintf_exception(__id, __level,	\
+						  _fmt, ## __VA_ARGS__);\
+	__ret;								\
+})
+
+int debug_register_view(debug_info_t *id, struct debug_view *view);
+
+int debug_unregister_view(debug_info_t *id, struct debug_view *view);
+
+#ifndef MODULE
+
+/*
+ * Note: Initial page and area numbers must be fixed to allow static
+ * initialization. This enables very early tracing. Changes to these values
+ * must be reflected in __DEFINE_STATIC_AREA.
+ */
+#define EARLY_PAGES		8
+#define EARLY_AREAS		1
+
+#define VNAME(var, suffix)	__##var##_##suffix
+
+/*
+ * Define static areas for early trace data. During boot debug_register_static()
+ * will replace these with dynamically allocated areas to allow custom page and
+ * area sizes, and dynamic resizing.
+ */
+#define __DEFINE_STATIC_AREA(var)					\
+static char VNAME(var, data)[EARLY_PAGES][PAGE_SIZE] __initdata;	\
+static debug_entry_t *VNAME(var, pages)[EARLY_PAGES] __initdata = {	\
+	(debug_entry_t *)VNAME(var, data)[0],				\
+	(debug_entry_t *)VNAME(var, data)[1],				\
+	(debug_entry_t *)VNAME(var, data)[2],				\
+	(debug_entry_t *)VNAME(var, data)[3],				\
+	(debug_entry_t *)VNAME(var, data)[4],				\
+	(debug_entry_t *)VNAME(var, data)[5],				\
+	(debug_entry_t *)VNAME(var, data)[6],				\
+	(debug_entry_t *)VNAME(var, data)[7],				\
+};									\
+static debug_entry_t **VNAME(var, areas)[EARLY_AREAS] __initdata = {	\
+	(debug_entry_t **)VNAME(var, pages),				\
+};									\
+static int VNAME(var, active_pages)[EARLY_AREAS] __initdata;		\
+static int VNAME(var, active_entries)[EARLY_AREAS] __initdata
+
+#define __DEBUG_INFO_INIT(var, _name, _buf_size) {			\
+	.next = NULL,							\
+	.prev = NULL,							\
+	.ref_count = REFCOUNT_INIT(1),					\
+	.lock = __SPIN_LOCK_UNLOCKED(var.lock),				\
+	.level = DEBUG_DEFAULT_LEVEL,					\
+	.nr_areas = EARLY_AREAS,					\
+	.pages_per_area = EARLY_PAGES,					\
+	.buf_size = (_buf_size),					\
+	.entry_size = sizeof(debug_entry_t) + (_buf_size),		\
+	.areas = VNAME(var, areas),					\
+	.active_area = 0,						\
+	.active_pages = VNAME(var, active_pages),			\
+	.active_entries = VNAME(var, active_entries),			\
+	.debugfs_root_entry = NULL,					\
+	.debugfs_entries = { NULL },					\
+	.views = { NULL },						\
+	.name = (_name),						\
+	.mode = 0600,							\
+}
+
+#define __REGISTER_STATIC_DEBUG_INFO(var, name, pages, areas, view)	\
+static int __init VNAME(var, reg)(void)					\
+{									\
+	debug_register_static(&var, (pages), (areas));			\
+	debug_register_view(&var, (view));				\
+	return 0;							\
+}									\
+arch_initcall(VNAME(var, reg))
+
+/**
+ * DEFINE_STATIC_DEBUG_INFO - Define static debug_info_t
+ *
+ * @var: Name of debug_info_t variable
+ * @name: Name of debug log (e.g. used for debugfs entry)
+ * @pages: Number of pages per area
+ * @nr_areas: Number of debug areas
+ * @buf_size: Size of data area in each debug entry
+ * @view: Pointer to debug view struct
+ *
+ * Define a static debug_info_t for early tracing. The associated debugfs log
+ * is automatically registered with the specified debug view.
+ *
+ * Important: Users of this macro must not call any of the
+ * debug_register/_unregister() functions for this debug_info_t!
+ *
+ * Note: Tracing will start with a fixed number of initial pages and areas.
+ * The debug area will be changed to use the specified numbers during
+ * arch_initcall.
+ */
+#define DEFINE_STATIC_DEBUG_INFO(var, name, pages, nr_areas, buf_size, view) \
+__DEFINE_STATIC_AREA(var);						\
+static debug_info_t __refdata var =					\
+	__DEBUG_INFO_INIT(var, (name), (buf_size));			\
+__REGISTER_STATIC_DEBUG_INFO(var, name, pages, nr_areas, view)
+
+void debug_register_static(debug_info_t *id, int pages_per_area, int nr_areas);
+
+#endif /* MODULE */
+
+#endif /* _ASM_S390_DEBUG_H */
diff --git a/arch/s390/include/asm/delay.h b/arch/s390/include/asm/delay.h
new file mode 100644
index 0000000000..21a8fe18fe
--- /dev/null
+++ b/arch/s390/include/asm/delay.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ *  Derived from "include/asm-i386/delay.h"
+ *    Copyright (C) 1993 Linus Torvalds
+ *
+ *  Delay routines calling functions in arch/s390/lib/delay.c
+ */
+ 
+#ifndef _S390_DELAY_H
+#define _S390_DELAY_H
+
+void __ndelay(unsigned long nsecs);
+void __udelay(unsigned long usecs);
+void __delay(unsigned long loops);
+
+#define ndelay(n) __ndelay((unsigned long)(n))
+#define udelay(n) __udelay((unsigned long)(n))
+#define mdelay(n) __udelay((unsigned long)(n) * 1000)
+
+#endif /* defined(_S390_DELAY_H) */
diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h
new file mode 100644
index 0000000000..bed8041375
--- /dev/null
+++ b/arch/s390/include/asm/diag.h
@@ -0,0 +1,351 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * s390 diagnose functions
+ *
+ * Copyright IBM Corp. 2007
+ * Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_DIAG_H
+#define _ASM_S390_DIAG_H
+
+#include <linux/if_ether.h>
+#include <linux/percpu.h>
+#include <asm/asm-extable.h>
+#include <asm/cio.h>
+
+enum diag_stat_enum {
+	DIAG_STAT_X008,
+	DIAG_STAT_X00C,
+	DIAG_STAT_X010,
+	DIAG_STAT_X014,
+	DIAG_STAT_X044,
+	DIAG_STAT_X064,
+	DIAG_STAT_X08C,
+	DIAG_STAT_X09C,
+	DIAG_STAT_X0DC,
+	DIAG_STAT_X204,
+	DIAG_STAT_X210,
+	DIAG_STAT_X224,
+	DIAG_STAT_X250,
+	DIAG_STAT_X258,
+	DIAG_STAT_X26C,
+	DIAG_STAT_X288,
+	DIAG_STAT_X2C4,
+	DIAG_STAT_X2FC,
+	DIAG_STAT_X304,
+	DIAG_STAT_X308,
+	DIAG_STAT_X318,
+	DIAG_STAT_X320,
+	DIAG_STAT_X500,
+	NR_DIAG_STAT
+};
+
+void diag_stat_inc(enum diag_stat_enum nr);
+void diag_stat_inc_norecursion(enum diag_stat_enum nr);
+
+/*
+ * Diagnose 10: Release page range
+ */
+static inline void diag10_range(unsigned long start_pfn, unsigned long num_pfn)
+{
+	unsigned long start_addr, end_addr;
+
+	start_addr = pfn_to_phys(start_pfn);
+	end_addr = pfn_to_phys(start_pfn + num_pfn - 1);
+
+	diag_stat_inc(DIAG_STAT_X010);
+	asm volatile(
+		"0:	diag	%0,%1,0x10\n"
+		"1:	nopr	%%r7\n"
+		EX_TABLE(0b, 1b)
+		EX_TABLE(1b, 1b)
+		: : "a" (start_addr), "a" (end_addr));
+}
+
+/*
+ * Diagnose 14: Input spool file manipulation
+ */
+extern int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode);
+
+/*
+ * Diagnose 210: Get information about a virtual device
+ */
+struct diag210 {
+	u16 vrdcdvno;	/* device number (input) */
+	u16 vrdclen;	/* data block length (input) */
+	u8 vrdcvcla;	/* virtual device class (output) */
+	u8 vrdcvtyp;	/* virtual device type (output) */
+	u8 vrdcvsta;	/* virtual device status (output) */
+	u8 vrdcvfla;	/* virtual device flags (output) */
+	u8 vrdcrccl;	/* real device class (output) */
+	u8 vrdccrty;	/* real device type (output) */
+	u8 vrdccrmd;	/* real device model (output) */
+	u8 vrdccrft;	/* real device feature (output) */
+} __packed __aligned(4);
+
+extern int diag210(struct diag210 *addr);
+
+struct diag8c {
+	u8 flags;
+	u8 num_partitions;
+	u16 width;
+	u16 height;
+	u8 data[];
+} __packed __aligned(4);
+
+extern int diag8c(struct diag8c *out, struct ccw_dev_id *devno);
+
+/* bit is set in flags, when physical cpu info is included in diag 204 data */
+#define DIAG204_LPAR_PHYS_FLG 0x80
+#define DIAG204_LPAR_NAME_LEN 8		/* lpar name len in diag 204 data */
+#define DIAG204_CPU_NAME_LEN 16		/* type name len of cpus in diag224 name table */
+
+/* diag 204 subcodes */
+enum diag204_sc {
+	DIAG204_SUBC_STIB4 = 4,
+	DIAG204_SUBC_RSI = 5,
+	DIAG204_SUBC_STIB6 = 6,
+	DIAG204_SUBC_STIB7 = 7
+};
+
+#define DIAG204_SUBCODE_MASK 0xffff
+
+/* The two available diag 204 data formats */
+enum diag204_format {
+	DIAG204_INFO_SIMPLE = 0,
+	DIAG204_INFO_EXT = 0x00010000
+};
+
+enum diag204_cpu_flags {
+	DIAG204_CPU_ONLINE = 0x20,
+	DIAG204_CPU_CAPPED = 0x40,
+};
+
+struct diag204_info_blk_hdr {
+	__u8  npar;
+	__u8  flags;
+	__u16 tslice;
+	__u16 phys_cpus;
+	__u16 this_part;
+	__u64 curtod;
+} __packed;
+
+struct diag204_x_info_blk_hdr {
+	__u8  npar;
+	__u8  flags;
+	__u16 tslice;
+	__u16 phys_cpus;
+	__u16 this_part;
+	__u64 curtod1;
+	__u64 curtod2;
+	char reserved[40];
+} __packed;
+
+struct diag204_part_hdr {
+	__u8 pn;
+	__u8 cpus;
+	char reserved[6];
+	char part_name[DIAG204_LPAR_NAME_LEN];
+} __packed;
+
+struct diag204_x_part_hdr {
+	__u8  pn;
+	__u8  cpus;
+	__u8  rcpus;
+	__u8  pflag;
+	__u32 mlu;
+	char  part_name[DIAG204_LPAR_NAME_LEN];
+	char  lpc_name[8];
+	char  os_name[8];
+	__u64 online_cs;
+	__u64 online_es;
+	__u8  upid;
+	__u8  reserved:3;
+	__u8  mtid:5;
+	char  reserved1[2];
+	__u32 group_mlu;
+	char  group_name[8];
+	char  hardware_group_name[8];
+	char  reserved2[24];
+} __packed;
+
+struct diag204_cpu_info {
+	__u16 cpu_addr;
+	char  reserved1[2];
+	__u8  ctidx;
+	__u8  cflag;
+	__u16 weight;
+	__u64 acc_time;
+	__u64 lp_time;
+} __packed;
+
+struct diag204_x_cpu_info {
+	__u16 cpu_addr;
+	char  reserved1[2];
+	__u8  ctidx;
+	__u8  cflag;
+	__u16 weight;
+	__u64 acc_time;
+	__u64 lp_time;
+	__u16 min_weight;
+	__u16 cur_weight;
+	__u16 max_weight;
+	char  reseved2[2];
+	__u64 online_time;
+	__u64 wait_time;
+	__u32 pma_weight;
+	__u32 polar_weight;
+	__u32 cpu_type_cap;
+	__u32 group_cpu_type_cap;
+	char  reserved3[32];
+} __packed;
+
+struct diag204_phys_hdr {
+	char reserved1[1];
+	__u8 cpus;
+	char reserved2[6];
+	char mgm_name[8];
+} __packed;
+
+struct diag204_x_phys_hdr {
+	char reserved1[1];
+	__u8 cpus;
+	char reserved2[6];
+	char mgm_name[8];
+	char reserved3[80];
+} __packed;
+
+struct diag204_phys_cpu {
+	__u16 cpu_addr;
+	char  reserved1[2];
+	__u8  ctidx;
+	char  reserved2[3];
+	__u64 mgm_time;
+	char  reserved3[8];
+} __packed;
+
+struct diag204_x_phys_cpu {
+	__u16 cpu_addr;
+	char  reserved1[2];
+	__u8  ctidx;
+	char  reserved2[1];
+	__u16 weight;
+	__u64 mgm_time;
+	char  reserved3[80];
+} __packed;
+
+struct diag204_x_part_block {
+	struct diag204_x_part_hdr hdr;
+	struct diag204_x_cpu_info cpus[];
+} __packed;
+
+struct diag204_x_phys_block {
+	struct diag204_x_phys_hdr hdr;
+	struct diag204_x_phys_cpu cpus[];
+} __packed;
+
+enum diag26c_sc {
+	DIAG26C_PORT_VNIC    = 0x00000024,
+	DIAG26C_MAC_SERVICES = 0x00000030
+};
+
+enum diag26c_version {
+	DIAG26C_VERSION2	 = 0x00000002,	/* z/VM 5.4.0 */
+	DIAG26C_VERSION6_VM65918 = 0x00020006	/* z/VM 6.4.0 + VM65918 */
+};
+
+#define DIAG26C_VNIC_INFO	0x0002
+struct diag26c_vnic_req {
+	u32	resp_buf_len;
+	u32	resp_version;
+	u16	req_format;
+	u16	vlan_id;
+	u64	sys_name;
+	u8	res[2];
+	u16	devno;
+} __packed __aligned(8);
+
+#define VNIC_INFO_PROT_L3	1
+#define VNIC_INFO_PROT_L2	2
+/* Note: this is the bare minimum, use it for uninitialized VNICs only. */
+struct diag26c_vnic_resp {
+	u32	version;
+	u32	entry_cnt;
+	/* VNIC info: */
+	u32	next_entry;
+	u64	owner;
+	u16	devno;
+	u8	status;
+	u8	type;
+	u64	lan_owner;
+	u64	lan_name;
+	u64	port_name;
+	u8	port_type;
+	u8	ext_status:6;
+	u8	protocol:2;
+	u16	base_devno;
+	u32	port_num;
+	u32	ifindex;
+	u32	maxinfo;
+	u32	dev_count;
+	/* 3x device info: */
+	u8	dev_info1[28];
+	u8	dev_info2[28];
+	u8	dev_info3[28];
+} __packed __aligned(8);
+
+#define DIAG26C_GET_MAC	0x0000
+struct diag26c_mac_req {
+	u32	resp_buf_len;
+	u32	resp_version;
+	u16	op_code;
+	u16	devno;
+	u8	res[4];
+};
+
+struct diag26c_mac_resp {
+	u32	version;
+	u8	mac[ETH_ALEN];
+	u8	res[2];
+} __aligned(8);
+
+#define CPNC_LINUX		0x4
+union diag318_info {
+	unsigned long val;
+	struct {
+		unsigned long cpnc : 8;
+		unsigned long cpvc : 56;
+	};
+};
+
+int diag204(unsigned long subcode, unsigned long size, void *addr);
+int diag224(void *ptr);
+int diag26c(void *req, void *resp, enum diag26c_sc subcode);
+
+struct hypfs_diag0c_entry;
+
+/*
+ * This structure must contain only pointers/references into
+ * the AMODE31 text section.
+ */
+struct diag_ops {
+	int (*diag210)(struct diag210 *addr);
+	int (*diag26c)(void *req, void *resp, enum diag26c_sc subcode);
+	int (*diag14)(unsigned long rx, unsigned long ry1, unsigned long subcode);
+	int (*diag8c)(struct diag8c *addr, struct ccw_dev_id *devno, size_t len);
+	void (*diag0c)(struct hypfs_diag0c_entry *entry);
+	void (*diag308_reset)(void);
+};
+
+extern struct diag_ops diag_amode31_ops;
+extern struct diag210 *__diag210_tmp_amode31;
+
+int _diag210_amode31(struct diag210 *addr);
+int _diag26c_amode31(void *req, void *resp, enum diag26c_sc subcode);
+int _diag14_amode31(unsigned long rx, unsigned long ry1, unsigned long subcode);
+void _diag0c_amode31(struct hypfs_diag0c_entry *entry);
+void _diag308_reset_amode31(void);
+int _diag8c_amode31(struct diag8c *addr, struct ccw_dev_id *devno, size_t len);
+
+#endif /* _ASM_S390_DIAG_H */
diff --git a/arch/s390/include/asm/dis.h b/arch/s390/include/asm/dis.h
new file mode 100644
index 0000000000..c18ed60919
--- /dev/null
+++ b/arch/s390/include/asm/dis.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Disassemble s390 instructions.
+ *
+ * Copyright IBM Corp. 2007
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ */
+
+#ifndef __ASM_S390_DIS_H__
+#define __ASM_S390_DIS_H__
+
+#include <asm/dis-defs.h>
+
+static inline int insn_length(unsigned char code)
+{
+	return ((((int) code + 64) >> 7) + 1) << 1;
+}
+
+struct pt_regs;
+
+void show_code(struct pt_regs *regs);
+void print_fn_code(unsigned char *code, unsigned long len);
+struct s390_insn *find_insn(unsigned char *code);
+
+static inline int is_known_insn(unsigned char *code)
+{
+	return !!find_insn(code);
+}
+
+#endif /* __ASM_S390_DIS_H__ */
diff --git a/arch/s390/include/asm/dma.h b/arch/s390/include/asm/dma.h
new file mode 100644
index 0000000000..7fe3e31956
--- /dev/null
+++ b/arch/s390/include/asm/dma.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_DMA_H
+#define _ASM_S390_DMA_H
+
+#include <linux/io.h>
+
+/*
+ * MAX_DMA_ADDRESS is ambiguous because on s390 its completely unrelated
+ * to DMA. It _is_ used for the s390 memory zone split at 2GB caused
+ * by the 31 bit heritage.
+ */
+#define MAX_DMA_ADDRESS		__va(0x80000000)
+
+#endif /* _ASM_S390_DMA_H */
diff --git a/arch/s390/include/asm/dwarf.h b/arch/s390/include/asm/dwarf.h
new file mode 100644
index 0000000000..4f21ae561e
--- /dev/null
+++ b/arch/s390/include/asm/dwarf.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_DWARF_H
+#define _ASM_S390_DWARF_H
+
+#ifdef __ASSEMBLY__
+
+#define CFI_STARTPROC		.cfi_startproc
+#define CFI_ENDPROC		.cfi_endproc
+#define CFI_DEF_CFA_OFFSET	.cfi_def_cfa_offset
+#define CFI_ADJUST_CFA_OFFSET	.cfi_adjust_cfa_offset
+#define CFI_RESTORE		.cfi_restore
+
+#ifdef CONFIG_AS_CFI_VAL_OFFSET
+#define CFI_VAL_OFFSET		.cfi_val_offset
+#else
+#define CFI_VAL_OFFSET		#
+#endif
+
+#ifndef BUILD_VDSO
+	/*
+	 * Emit CFI data in .debug_frame sections and not in .eh_frame
+	 * sections.  The .eh_frame CFI is used for runtime unwind
+	 * information that is not being used.  Hence, vmlinux.lds.S
+	 * can discard the .eh_frame sections.
+	 */
+	.cfi_sections .debug_frame
+#else
+	/*
+	 * For vDSO, emit CFI data in both, .eh_frame and .debug_frame
+	 * sections.
+	 */
+	.cfi_sections .eh_frame, .debug_frame
+#endif
+
+#endif	/* __ASSEMBLY__ */
+
+#endif	/* _ASM_S390_DWARF_H */
diff --git a/arch/s390/include/asm/eadm.h b/arch/s390/include/asm/eadm.h
new file mode 100644
index 0000000000..06f795855a
--- /dev/null
+++ b/arch/s390/include/asm/eadm.h
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_EADM_H
+#define _ASM_S390_EADM_H
+
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/blk_types.h>
+
+struct arqb {
+	u64 data;
+	u16 fmt:4;
+	u16:12;
+	u16 cmd_code;
+	u16:16;
+	u16 msb_count;
+	u32 reserved[12];
+} __packed;
+
+#define ARQB_CMD_MOVE	1
+
+struct arsb {
+	u16 fmt:4;
+	u32:28;
+	u8 ef;
+	u8:8;
+	u8 ecbi;
+	u8:8;
+	u8 fvf;
+	u16:16;
+	u8 eqc;
+	u32:32;
+	u64 fail_msb;
+	u64 fail_aidaw;
+	u64 fail_ms;
+	u64 fail_scm;
+	u32 reserved[4];
+} __packed;
+
+#define EQC_WR_PROHIBIT 22
+
+struct msb {
+	u8 fmt:4;
+	u8 oc:4;
+	u8 flags;
+	u16:12;
+	u16 bs:4;
+	u32 blk_count;
+	u64 data_addr;
+	u64 scm_addr;
+	u64:64;
+} __packed;
+
+struct aidaw {
+	u8 flags;
+	u32 :24;
+	u32 :32;
+	u64 data_addr;
+} __packed;
+
+#define MSB_OC_CLEAR	0
+#define MSB_OC_READ	1
+#define MSB_OC_WRITE	2
+#define MSB_OC_RELEASE	3
+
+#define MSB_FLAG_BNM	0x80
+#define MSB_FLAG_IDA	0x40
+
+#define MSB_BS_4K	0
+#define MSB_BS_1M	1
+
+#define AOB_NR_MSB	124
+
+struct aob {
+	struct arqb request;
+	struct arsb response;
+	struct msb msb[AOB_NR_MSB];
+} __packed __aligned(PAGE_SIZE);
+
+struct aob_rq_header {
+	struct scm_device *scmdev;
+	char data[];
+};
+
+struct scm_device {
+	u64 address;
+	u64 size;
+	unsigned int nr_max_block;
+	struct device dev;
+	struct {
+		unsigned int persistence:4;
+		unsigned int oper_state:4;
+		unsigned int data_state:4;
+		unsigned int rank:4;
+		unsigned int release:1;
+		unsigned int res_id:8;
+	} __packed attrs;
+};
+
+#define OP_STATE_GOOD		1
+#define OP_STATE_TEMP_ERR	2
+#define OP_STATE_PERM_ERR	3
+
+enum scm_event {SCM_CHANGE, SCM_AVAIL};
+
+struct scm_driver {
+	struct device_driver drv;
+	int (*probe) (struct scm_device *scmdev);
+	void (*remove) (struct scm_device *scmdev);
+	void (*notify) (struct scm_device *scmdev, enum scm_event event);
+	void (*handler) (struct scm_device *scmdev, void *data,
+			blk_status_t error);
+};
+
+int scm_driver_register(struct scm_driver *scmdrv);
+void scm_driver_unregister(struct scm_driver *scmdrv);
+
+int eadm_start_aob(struct aob *aob);
+void scm_irq_handler(struct aob *aob, blk_status_t error);
+
+#endif /* _ASM_S390_EADM_H */
diff --git a/arch/s390/include/asm/ebcdic.h b/arch/s390/include/asm/ebcdic.h
new file mode 100644
index 0000000000..efb50fc686
--- /dev/null
+++ b/arch/s390/include/asm/ebcdic.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *    EBCDIC -> ASCII, ASCII -> EBCDIC conversion routines.
+ *
+ *  S390 version
+ *    Copyright IBM Corp. 1999
+ *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef _EBCDIC_H
+#define _EBCDIC_H
+
+#include <linux/types.h>
+
+extern __u8 _ascebc_500[256];   /* ASCII -> EBCDIC 500 conversion table */
+extern __u8 _ebcasc_500[256];   /* EBCDIC 500 -> ASCII conversion table */
+extern __u8 _ascebc[256];   /* ASCII -> EBCDIC conversion table */
+extern __u8 _ebcasc[256];   /* EBCDIC -> ASCII conversion table */
+extern __u8 _ebc_tolower[256]; /* EBCDIC -> lowercase */
+extern __u8 _ebc_toupper[256]; /* EBCDIC -> uppercase */
+
+static inline void
+codepage_convert(const __u8 *codepage, volatile char *addr, unsigned long nr)
+{
+	if (nr-- <= 0)
+		return;
+	asm volatile(
+		"	bras	1,1f\n"
+		"	tr	0(1,%0),0(%2)\n"
+		"0:	tr	0(256,%0),0(%2)\n"
+		"	la	%0,256(%0)\n"
+		"1:	ahi	%1,-256\n"
+		"	jnm	0b\n"
+		"	ex	%1,0(1)"
+		: "+&a" (addr), "+&a" (nr)
+		: "a" (codepage) : "cc", "memory", "1");
+}
+
+#define ASCEBC(addr,nr) codepage_convert(_ascebc, addr, nr)
+#define EBCASC(addr,nr) codepage_convert(_ebcasc, addr, nr)
+#define ASCEBC_500(addr,nr) codepage_convert(_ascebc_500, addr, nr)
+#define EBCASC_500(addr,nr) codepage_convert(_ebcasc_500, addr, nr)
+#define EBC_TOLOWER(addr,nr) codepage_convert(_ebc_tolower, addr, nr)
+#define EBC_TOUPPER(addr,nr) codepage_convert(_ebc_toupper, addr, nr)
+
+#endif
+
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
new file mode 100644
index 0000000000..70a30ae258
--- /dev/null
+++ b/arch/s390/include/asm/elf.h
@@ -0,0 +1,302 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  S390 version
+ *
+ *  Derived from "include/asm-i386/elf.h"
+ */
+
+#ifndef __ASMS390_ELF_H
+#define __ASMS390_ELF_H
+
+/* s390 relocations defined by the ABIs */
+#define R_390_NONE		0	/* No reloc.  */
+#define R_390_8			1	/* Direct 8 bit.  */
+#define R_390_12		2	/* Direct 12 bit.  */
+#define R_390_16		3	/* Direct 16 bit.  */
+#define R_390_32		4	/* Direct 32 bit.  */
+#define R_390_PC32		5	/* PC relative 32 bit.	*/
+#define R_390_GOT12		6	/* 12 bit GOT offset.  */
+#define R_390_GOT32		7	/* 32 bit GOT offset.  */
+#define R_390_PLT32		8	/* 32 bit PC relative PLT address.  */
+#define R_390_COPY		9	/* Copy symbol at runtime.  */
+#define R_390_GLOB_DAT		10	/* Create GOT entry.  */
+#define R_390_JMP_SLOT		11	/* Create PLT entry.  */
+#define R_390_RELATIVE		12	/* Adjust by program base.  */
+#define R_390_GOTOFF32		13	/* 32 bit offset to GOT.	 */
+#define R_390_GOTPC		14	/* 32 bit PC rel. offset to GOT.  */
+#define R_390_GOT16		15	/* 16 bit GOT offset.  */
+#define R_390_PC16		16	/* PC relative 16 bit.	*/
+#define R_390_PC16DBL		17	/* PC relative 16 bit shifted by 1.  */
+#define R_390_PLT16DBL		18	/* 16 bit PC rel. PLT shifted by 1.  */
+#define R_390_PC32DBL		19	/* PC relative 32 bit shifted by 1.  */
+#define R_390_PLT32DBL		20	/* 32 bit PC rel. PLT shifted by 1.  */
+#define R_390_GOTPCDBL		21	/* 32 bit PC rel. GOT shifted by 1.  */
+#define R_390_64		22	/* Direct 64 bit.  */
+#define R_390_PC64		23	/* PC relative 64 bit.	*/
+#define R_390_GOT64		24	/* 64 bit GOT offset.  */
+#define R_390_PLT64		25	/* 64 bit PC relative PLT address.  */
+#define R_390_GOTENT		26	/* 32 bit PC rel. to GOT entry >> 1. */
+#define R_390_GOTOFF16		27	/* 16 bit offset to GOT. */
+#define R_390_GOTOFF64		28	/* 64 bit offset to GOT. */
+#define R_390_GOTPLT12		29	/* 12 bit offset to jump slot.	*/
+#define R_390_GOTPLT16		30	/* 16 bit offset to jump slot.	*/
+#define R_390_GOTPLT32		31	/* 32 bit offset to jump slot.	*/
+#define R_390_GOTPLT64		32	/* 64 bit offset to jump slot.	*/
+#define R_390_GOTPLTENT		33	/* 32 bit rel. offset to jump slot.  */
+#define R_390_PLTOFF16		34	/* 16 bit offset from GOT to PLT. */
+#define R_390_PLTOFF32		35	/* 32 bit offset from GOT to PLT. */
+#define R_390_PLTOFF64		36	/* 16 bit offset from GOT to PLT. */
+#define R_390_TLS_LOAD		37	/* Tag for load insn in TLS code. */
+#define R_390_TLS_GDCALL	38	/* Tag for function call in general
+                                           dynamic TLS code.  */
+#define R_390_TLS_LDCALL	39	/* Tag for function call in local
+                                           dynamic TLS code.  */
+#define R_390_TLS_GD32		40	/* Direct 32 bit for general dynamic
+                                           thread local data.  */
+#define R_390_TLS_GD64		41	/* Direct 64 bit for general dynamic
+                                           thread local data.  */
+#define R_390_TLS_GOTIE12	42	/* 12 bit GOT offset for static TLS
+                                           block offset.  */
+#define R_390_TLS_GOTIE32	43	/* 32 bit GOT offset for static TLS
+                                           block offset.  */
+#define R_390_TLS_GOTIE64	44	/* 64 bit GOT offset for static TLS
+                                           block offset.  */
+#define R_390_TLS_LDM32		45	/* Direct 32 bit for local dynamic
+                                           thread local data in LD code.  */
+#define R_390_TLS_LDM64		46	/* Direct 64 bit for local dynamic
+                                           thread local data in LD code.  */
+#define R_390_TLS_IE32		47	/* 32 bit address of GOT entry for
+                                           negated static TLS block offset.  */
+#define R_390_TLS_IE64		48	/* 64 bit address of GOT entry for
+                                           negated static TLS block offset.  */
+#define R_390_TLS_IEENT		49	/* 32 bit rel. offset to GOT entry for
+                                           negated static TLS block offset.  */
+#define R_390_TLS_LE32		50	/* 32 bit negated offset relative to
+                                           static TLS block.  */
+#define R_390_TLS_LE64		51	/* 64 bit negated offset relative to
+                                           static TLS block.  */
+#define R_390_TLS_LDO32		52	/* 32 bit offset relative to TLS
+                                           block.  */
+#define R_390_TLS_LDO64		53	/* 64 bit offset relative to TLS
+                                           block.  */
+#define R_390_TLS_DTPMOD	54	/* ID of module containing symbol.  */
+#define R_390_TLS_DTPOFF	55	/* Offset in TLS block.  */
+#define R_390_TLS_TPOFF		56	/* Negate offset in static TLS
+                                           block.  */
+#define R_390_20		57	/* Direct 20 bit.  */
+#define R_390_GOT20		58	/* 20 bit GOT offset.  */
+#define R_390_GOTPLT20		59	/* 20 bit offset to jump slot.  */
+#define R_390_TLS_GOTIE20	60	/* 20 bit GOT offset for static TLS
+					   block offset.  */
+/* Keep this the last entry.  */
+#define R_390_NUM	61
+
+enum {
+	HWCAP_NR_ESAN3		= 0,
+	HWCAP_NR_ZARCH		= 1,
+	HWCAP_NR_STFLE		= 2,
+	HWCAP_NR_MSA		= 3,
+	HWCAP_NR_LDISP		= 4,
+	HWCAP_NR_EIMM		= 5,
+	HWCAP_NR_DFP		= 6,
+	HWCAP_NR_HPAGE		= 7,
+	HWCAP_NR_ETF3EH		= 8,
+	HWCAP_NR_HIGH_GPRS	= 9,
+	HWCAP_NR_TE		= 10,
+	HWCAP_NR_VXRS		= 11,
+	HWCAP_NR_VXRS_BCD	= 12,
+	HWCAP_NR_VXRS_EXT	= 13,
+	HWCAP_NR_GS		= 14,
+	HWCAP_NR_VXRS_EXT2	= 15,
+	HWCAP_NR_VXRS_PDE	= 16,
+	HWCAP_NR_SORT		= 17,
+	HWCAP_NR_DFLT		= 18,
+	HWCAP_NR_VXRS_PDE2	= 19,
+	HWCAP_NR_NNPA		= 20,
+	HWCAP_NR_PCI_MIO	= 21,
+	HWCAP_NR_SIE		= 22,
+	HWCAP_NR_MAX
+};
+
+/* Bits present in AT_HWCAP. */
+#define HWCAP_ESAN3		BIT(HWCAP_NR_ESAN3)
+#define HWCAP_ZARCH		BIT(HWCAP_NR_ZARCH)
+#define HWCAP_STFLE		BIT(HWCAP_NR_STFLE)
+#define HWCAP_MSA		BIT(HWCAP_NR_MSA)
+#define HWCAP_LDISP		BIT(HWCAP_NR_LDISP)
+#define HWCAP_EIMM		BIT(HWCAP_NR_EIMM)
+#define HWCAP_DFP		BIT(HWCAP_NR_DFP)
+#define HWCAP_HPAGE		BIT(HWCAP_NR_HPAGE)
+#define HWCAP_ETF3EH		BIT(HWCAP_NR_ETF3EH)
+#define HWCAP_HIGH_GPRS		BIT(HWCAP_NR_HIGH_GPRS)
+#define HWCAP_TE		BIT(HWCAP_NR_TE)
+#define HWCAP_VXRS		BIT(HWCAP_NR_VXRS)
+#define HWCAP_VXRS_BCD		BIT(HWCAP_NR_VXRS_BCD)
+#define HWCAP_VXRS_EXT		BIT(HWCAP_NR_VXRS_EXT)
+#define HWCAP_GS		BIT(HWCAP_NR_GS)
+#define HWCAP_VXRS_EXT2		BIT(HWCAP_NR_VXRS_EXT2)
+#define HWCAP_VXRS_PDE		BIT(HWCAP_NR_VXRS_PDE)
+#define HWCAP_SORT		BIT(HWCAP_NR_SORT)
+#define HWCAP_DFLT		BIT(HWCAP_NR_DFLT)
+#define HWCAP_VXRS_PDE2		BIT(HWCAP_NR_VXRS_PDE2)
+#define HWCAP_NNPA		BIT(HWCAP_NR_NNPA)
+#define HWCAP_PCI_MIO		BIT(HWCAP_NR_PCI_MIO)
+#define HWCAP_SIE		BIT(HWCAP_NR_SIE)
+
+/*
+ * These are used to set parameters in the core dumps.
+ */
+#define ELF_CLASS	ELFCLASS64
+#define ELF_DATA	ELFDATA2MSB
+#define ELF_ARCH	EM_S390
+
+/* s390 specific phdr types */
+#define PT_S390_PGSTE	0x70000000
+
+/*
+ * ELF register definitions..
+ */
+
+#include <linux/compat.h>
+
+#include <asm/ptrace.h>
+#include <asm/syscall.h>
+#include <asm/user.h>
+
+typedef s390_fp_regs elf_fpregset_t;
+typedef s390_regs elf_gregset_t;
+
+typedef s390_fp_regs compat_elf_fpregset_t;
+typedef s390_compat_regs compat_elf_gregset_t;
+
+#include <linux/sched/mm.h>	/* for task_struct */
+#include <asm/mmu_context.h>
+
+/*
+ * This is used to ensure we don't load something for the wrong architecture.
+ */
+#define elf_check_arch(x) \
+	(((x)->e_machine == EM_S390 || (x)->e_machine == EM_S390_OLD) \
+         && (x)->e_ident[EI_CLASS] == ELF_CLASS) 
+#define compat_elf_check_arch(x) \
+	(((x)->e_machine == EM_S390 || (x)->e_machine == EM_S390_OLD) \
+	 && (x)->e_ident[EI_CLASS] == ELF_CLASS)
+#define compat_start_thread	start_thread31
+
+struct arch_elf_state {
+	int rc;
+};
+
+#define INIT_ARCH_ELF_STATE { .rc = 0 }
+
+#define arch_check_elf(ehdr, interp, interp_ehdr, state) (0)
+#ifdef CONFIG_PGSTE
+#define arch_elf_pt_proc(ehdr, phdr, elf, interp, state)	\
+({								\
+	struct arch_elf_state *_state = state;			\
+	if ((phdr)->p_type == PT_S390_PGSTE &&			\
+	    !page_table_allocate_pgste &&			\
+	    !test_thread_flag(TIF_PGSTE) &&			\
+	    !current->mm->context.alloc_pgste) {		\
+		set_thread_flag(TIF_PGSTE);			\
+		set_pt_regs_flag(task_pt_regs(current),		\
+				 PIF_EXECVE_PGSTE_RESTART);	\
+		_state->rc = -EAGAIN;				\
+	}							\
+	_state->rc;						\
+})
+#else
+#define arch_elf_pt_proc(ehdr, phdr, elf, interp, state)	\
+({								\
+	(state)->rc;						\
+})
+#endif
+
+/* For SVR4/S390 the function pointer to be registered with `atexit` is
+   passed in R14. */
+#define ELF_PLAT_INIT(_r, load_addr) \
+	do { \
+		_r->gprs[14] = 0; \
+	} while (0)
+
+#define CORE_DUMP_USE_REGSET
+#define ELF_EXEC_PAGESIZE	PAGE_SIZE
+
+/* This is the location that an ET_DYN program is loaded if exec'ed.  Typical
+   use of this is to invoke "./ld.so someprog" to test out a new version of
+   the loader.  We need to make sure that it is out of the way of the program
+   that it will "exec", and that there is sufficient room for the brk. 64-bit
+   tasks are aligned to 4GB. */
+#define ELF_ET_DYN_BASE (is_compat_task() ? \
+				(STACK_TOP / 3 * 2) : \
+				(STACK_TOP / 3 * 2) & ~((1UL << 32) - 1))
+
+/* This yields a mask that user programs can use to figure out what
+   instruction set this CPU supports. */
+
+extern unsigned long elf_hwcap;
+#define ELF_HWCAP (elf_hwcap)
+
+/* This yields a string that ld.so will use to load implementation
+   specific libraries for optimization.  This is more specific in
+   intent than poking at uname or /proc/cpuinfo.
+
+   For the moment, we have only optimizations for the Intel generations,
+   but that could change... */
+
+#define ELF_PLATFORM_SIZE 8
+extern char elf_platform[];
+#define ELF_PLATFORM (elf_platform)
+
+#ifndef CONFIG_COMPAT
+#define SET_PERSONALITY(ex) \
+do {								\
+	set_personality(PER_LINUX |				\
+		(current->personality & (~PER_MASK)));		\
+	current->thread.sys_call_table = sys_call_table;	\
+} while (0)
+#else /* CONFIG_COMPAT */
+#define SET_PERSONALITY(ex)					\
+do {								\
+	if (personality(current->personality) != PER_LINUX32)	\
+		set_personality(PER_LINUX |			\
+			(current->personality & ~PER_MASK));	\
+	if ((ex).e_ident[EI_CLASS] == ELFCLASS32) {		\
+		set_thread_flag(TIF_31BIT);			\
+		current->thread.sys_call_table =		\
+			sys_call_table_emu;			\
+	} else {						\
+		clear_thread_flag(TIF_31BIT);			\
+		current->thread.sys_call_table =		\
+			sys_call_table;				\
+	}							\
+} while (0)
+#endif /* CONFIG_COMPAT */
+
+/*
+ * Cache aliasing on the latest machines calls for a mapping granularity
+ * of 512KB for the anonymous mapping base. For 64-bit processes use a
+ * 512KB alignment and a randomization of up to 1GB. For 31-bit processes
+ * the virtual address space is limited, use no alignment and limit the
+ * randomization to 8MB.
+ * For the additional randomization of the program break use 32MB for
+ * 64-bit and 8MB for 31-bit.
+ */
+#define BRK_RND_MASK	(is_compat_task() ? 0x7ffUL : 0x1fffUL)
+#define MMAP_RND_MASK	(is_compat_task() ? 0x7ffUL : 0x3ff80UL)
+#define MMAP_ALIGN_MASK	(is_compat_task() ? 0 : 0x7fUL)
+#define STACK_RND_MASK	MMAP_RND_MASK
+
+/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */
+#define ARCH_DLINFO							\
+do {									\
+	NEW_AUX_ENT(AT_SYSINFO_EHDR,					\
+		    (unsigned long)current->mm->context.vdso_base);	\
+} while (0)
+
+struct linux_binprm;
+
+#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
+int arch_setup_additional_pages(struct linux_binprm *, int);
+
+#endif
diff --git a/arch/s390/include/asm/entry-common.h b/arch/s390/include/asm/entry-common.h
new file mode 100644
index 0000000000..fdd319a622
--- /dev/null
+++ b/arch/s390/include/asm/entry-common.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ARCH_S390_ENTRY_COMMON_H
+#define ARCH_S390_ENTRY_COMMON_H
+
+#include <linux/sched.h>
+#include <linux/audit.h>
+#include <linux/randomize_kstack.h>
+#include <linux/processor.h>
+#include <linux/uaccess.h>
+#include <asm/timex.h>
+#include <asm/fpu/api.h>
+#include <asm/pai.h>
+
+#define ARCH_EXIT_TO_USER_MODE_WORK (_TIF_GUARDED_STORAGE | _TIF_PER_TRAP)
+
+void do_per_trap(struct pt_regs *regs);
+
+static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
+{
+	if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
+		debug_user_asce(0);
+
+	pai_kernel_enter(regs);
+}
+
+#define arch_enter_from_user_mode arch_enter_from_user_mode
+
+static __always_inline void arch_exit_to_user_mode_work(struct pt_regs *regs,
+							unsigned long ti_work)
+{
+	if (ti_work & _TIF_PER_TRAP) {
+		clear_thread_flag(TIF_PER_TRAP);
+		do_per_trap(regs);
+	}
+
+	if (ti_work & _TIF_GUARDED_STORAGE)
+		gs_load_bc_cb(regs);
+}
+
+#define arch_exit_to_user_mode_work arch_exit_to_user_mode_work
+
+static __always_inline void arch_exit_to_user_mode(void)
+{
+	if (test_cpu_flag(CIF_FPU))
+		__load_fpu_regs();
+
+	if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
+		debug_user_asce(1);
+
+	pai_kernel_exit(current_pt_regs());
+}
+
+#define arch_exit_to_user_mode arch_exit_to_user_mode
+
+static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
+						  unsigned long ti_work)
+{
+	choose_random_kstack_offset(get_tod_clock_fast() & 0xff);
+}
+
+#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
+
+#endif
diff --git a/arch/s390/include/asm/exec.h b/arch/s390/include/asm/exec.h
new file mode 100644
index 0000000000..641bfbec9d
--- /dev/null
+++ b/arch/s390/include/asm/exec.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 1999, 2009
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef __ASM_EXEC_H
+#define __ASM_EXEC_H
+
+extern unsigned long arch_align_stack(unsigned long sp);
+
+#endif /* __ASM_EXEC_H */
diff --git a/arch/s390/include/asm/extable.h b/arch/s390/include/asm/extable.h
new file mode 100644
index 0000000000..af6ba52743
--- /dev/null
+++ b/arch/s390/include/asm/extable.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __S390_EXTABLE_H
+#define __S390_EXTABLE_H
+
+#include <asm/ptrace.h>
+#include <linux/compiler.h>
+
+/*
+ * The exception table consists of three addresses:
+ *
+ * - Address of an instruction that is allowed to fault.
+ * - Address at which the program should continue.
+ * - Optional address of handler that takes pt_regs * argument and runs in
+ *   interrupt context.
+ *
+ * No registers are modified, so it is entirely up to the continuation code
+ * to figure out what to do.
+ *
+ * All the routines below use bits of fixup code that are out of line
+ * with the main instruction path.  This means when everything is well,
+ * we don't even have to jump over them.  Further, they do not intrude
+ * on our cache or tlb entries.
+ */
+
+struct exception_table_entry
+{
+	int insn, fixup;
+	short type, data;
+};
+
+extern struct exception_table_entry *__start_amode31_ex_table;
+extern struct exception_table_entry *__stop_amode31_ex_table;
+
+const struct exception_table_entry *s390_search_extables(unsigned long addr);
+
+static inline unsigned long extable_fixup(const struct exception_table_entry *x)
+{
+	return (unsigned long)&x->fixup + x->fixup;
+}
+
+#define ARCH_HAS_RELATIVE_EXTABLE
+
+static inline void swap_ex_entry_fixup(struct exception_table_entry *a,
+				       struct exception_table_entry *b,
+				       struct exception_table_entry tmp,
+				       int delta)
+{
+	a->fixup = b->fixup + delta;
+	b->fixup = tmp.fixup - delta;
+	a->type = b->type;
+	b->type = tmp.type;
+	a->data = b->data;
+	b->data = tmp.data;
+}
+#define swap_ex_entry_fixup swap_ex_entry_fixup
+
+#ifdef CONFIG_BPF_JIT
+
+bool ex_handler_bpf(const struct exception_table_entry *ex, struct pt_regs *regs);
+
+#else /* !CONFIG_BPF_JIT */
+
+static inline bool ex_handler_bpf(const struct exception_table_entry *ex, struct pt_regs *regs)
+{
+	return false;
+}
+
+#endif /* CONFIG_BPF_JIT */
+
+bool fixup_exception(struct pt_regs *regs);
+
+#endif
diff --git a/arch/s390/include/asm/extmem.h b/arch/s390/include/asm/extmem.h
new file mode 100644
index 0000000000..568fd81bb7
--- /dev/null
+++ b/arch/s390/include/asm/extmem.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  definitions for external memory segment support
+ *  Copyright IBM Corp. 2003
+ */
+
+#ifndef _ASM_S390X_DCSS_H
+#define _ASM_S390X_DCSS_H
+#ifndef __ASSEMBLY__
+
+/* possible values for segment type as returned by segment_info */
+#define SEG_TYPE_SW 0
+#define SEG_TYPE_EW 1
+#define SEG_TYPE_SR 2
+#define SEG_TYPE_ER 3
+#define SEG_TYPE_SN 4
+#define SEG_TYPE_EN 5
+#define SEG_TYPE_SC 6
+#define SEG_TYPE_EWEN 7
+
+#define SEGMENT_SHARED 0
+#define SEGMENT_EXCLUSIVE 1
+
+int segment_load (char *name, int segtype, unsigned long *addr, unsigned long *length);
+void segment_unload(char *name);
+void segment_save(char *name);
+int segment_type (char* name);
+int segment_modify_shared (char *name, int do_nonshared);
+void segment_warning(int rc, char *seg_name);
+
+#endif
+#endif
diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h
new file mode 100644
index 0000000000..94b6919026
--- /dev/null
+++ b/arch/s390/include/asm/facility.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 1999, 2009
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef __ASM_FACILITY_H
+#define __ASM_FACILITY_H
+
+#include <asm/facility-defs.h>
+
+#include <linux/minmax.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/preempt.h>
+
+#include <asm/lowcore.h>
+
+#define MAX_FACILITY_BIT (sizeof(stfle_fac_list) * 8)
+
+extern u64 stfle_fac_list[16];
+extern u64 alt_stfle_fac_list[16];
+
+static inline void __set_facility(unsigned long nr, void *facilities)
+{
+	unsigned char *ptr = (unsigned char *) facilities;
+
+	if (nr >= MAX_FACILITY_BIT)
+		return;
+	ptr[nr >> 3] |= 0x80 >> (nr & 7);
+}
+
+static inline void __clear_facility(unsigned long nr, void *facilities)
+{
+	unsigned char *ptr = (unsigned char *) facilities;
+
+	if (nr >= MAX_FACILITY_BIT)
+		return;
+	ptr[nr >> 3] &= ~(0x80 >> (nr & 7));
+}
+
+static inline int __test_facility(unsigned long nr, void *facilities)
+{
+	unsigned char *ptr;
+
+	if (nr >= MAX_FACILITY_BIT)
+		return 0;
+	ptr = (unsigned char *) facilities + (nr >> 3);
+	return (*ptr & (0x80 >> (nr & 7))) != 0;
+}
+
+/*
+ * The test_facility function uses the bit ordering where the MSB is bit 0.
+ * That makes it easier to query facility bits with the bit number as
+ * documented in the Principles of Operation.
+ */
+static inline int test_facility(unsigned long nr)
+{
+	unsigned long facilities_als[] = { FACILITIES_ALS };
+
+	if (__builtin_constant_p(nr) && nr < sizeof(facilities_als) * 8) {
+		if (__test_facility(nr, &facilities_als))
+			return 1;
+	}
+	return __test_facility(nr, &stfle_fac_list);
+}
+
+static inline unsigned long __stfle_asm(u64 *stfle_fac_list, int size)
+{
+	unsigned long reg0 = size - 1;
+
+	asm volatile(
+		"	lgr	0,%[reg0]\n"
+		"	.insn	s,0xb2b00000,%[list]\n" /* stfle */
+		"	lgr	%[reg0],0\n"
+		: [reg0] "+&d" (reg0), [list] "+Q" (*stfle_fac_list)
+		:
+		: "memory", "cc", "0");
+	return reg0;
+}
+
+/**
+ * stfle - Store facility list extended
+ * @stfle_fac_list: array where facility list can be stored
+ * @size: size of passed in array in double words
+ */
+static inline void __stfle(u64 *stfle_fac_list, int size)
+{
+	unsigned long nr;
+	u32 stfl_fac_list;
+
+	asm volatile(
+		"	stfl	0(0)\n"
+		: "=m" (S390_lowcore.stfl_fac_list));
+	stfl_fac_list = S390_lowcore.stfl_fac_list;
+	memcpy(stfle_fac_list, &stfl_fac_list, 4);
+	nr = 4; /* bytes stored by stfl */
+	if (stfl_fac_list & 0x01000000) {
+		/* More facility bits available with stfle */
+		nr = __stfle_asm(stfle_fac_list, size);
+		nr = min_t(unsigned long, (nr + 1) * 8, size * 8);
+	}
+	memset((char *) stfle_fac_list + nr, 0, size * 8 - nr);
+}
+
+static inline void stfle(u64 *stfle_fac_list, int size)
+{
+	preempt_disable();
+	__stfle(stfle_fac_list, size);
+	preempt_enable();
+}
+
+#endif /* __ASM_FACILITY_H */
diff --git a/arch/s390/include/asm/fcx.h b/arch/s390/include/asm/fcx.h
new file mode 100644
index 0000000000..29784b4b44
--- /dev/null
+++ b/arch/s390/include/asm/fcx.h
@@ -0,0 +1,312 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  Functions for assembling fcx enabled I/O control blocks.
+ *
+ *    Copyright IBM Corp. 2008
+ *    Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_FCX_H
+#define _ASM_S390_FCX_H
+
+#include <linux/types.h>
+
+#define TCW_FORMAT_DEFAULT		0
+#define TCW_TIDAW_FORMAT_DEFAULT	0
+#define TCW_FLAGS_INPUT_TIDA		(1 << (23 - 5))
+#define TCW_FLAGS_TCCB_TIDA		(1 << (23 - 6))
+#define TCW_FLAGS_OUTPUT_TIDA		(1 << (23 - 7))
+#define TCW_FLAGS_TIDAW_FORMAT(x)	((x) & 3) << (23 - 9)
+#define TCW_FLAGS_GET_TIDAW_FORMAT(x)	(((x) >> (23 - 9)) & 3)
+
+/**
+ * struct tcw - Transport Control Word (TCW)
+ * @format: TCW format
+ * @flags: TCW flags
+ * @tccbl: Transport-Command-Control-Block Length
+ * @r: Read Operations
+ * @w: Write Operations
+ * @output: Output-Data Address
+ * @input: Input-Data Address
+ * @tsb: Transport-Status-Block Address
+ * @tccb: Transport-Command-Control-Block Address
+ * @output_count: Output Count
+ * @input_count: Input Count
+ * @intrg: Interrogate TCW Address
+ */
+struct tcw {
+	u32 format:2;
+	u32 :6;
+	u32 flags:24;
+	u32 :8;
+	u32 tccbl:6;
+	u32 r:1;
+	u32 w:1;
+	u32 :16;
+	u64 output;
+	u64 input;
+	u64 tsb;
+	u64 tccb;
+	u32 output_count;
+	u32 input_count;
+	u32 :32;
+	u32 :32;
+	u32 :32;
+	u32 intrg;
+} __attribute__ ((packed, aligned(64)));
+
+#define TIDAW_FLAGS_LAST		(1 << (7 - 0))
+#define TIDAW_FLAGS_SKIP		(1 << (7 - 1))
+#define TIDAW_FLAGS_DATA_INT		(1 << (7 - 2))
+#define TIDAW_FLAGS_TTIC		(1 << (7 - 3))
+#define TIDAW_FLAGS_INSERT_CBC		(1 << (7 - 4))
+
+/**
+ * struct tidaw - Transport-Indirect-Addressing Word (TIDAW)
+ * @flags: TIDAW flags. Can be an arithmetic OR of the following constants:
+ * %TIDAW_FLAGS_LAST, %TIDAW_FLAGS_SKIP, %TIDAW_FLAGS_DATA_INT,
+ * %TIDAW_FLAGS_TTIC, %TIDAW_FLAGS_INSERT_CBC
+ * @count: Count
+ * @addr: Address
+ */
+struct tidaw {
+	u32 flags:8;
+	u32 :24;
+	u32 count;
+	u64 addr;
+} __attribute__ ((packed, aligned(16)));
+
+/**
+ * struct tsa_iostat - I/O-Status Transport-Status Area (IO-Stat TSA)
+ * @dev_time: Device Time
+ * @def_time: Defer Time
+ * @queue_time: Queue Time
+ * @dev_busy_time: Device-Busy Time
+ * @dev_act_time: Device-Active-Only Time
+ * @sense: Sense Data (if present)
+ */
+struct tsa_iostat {
+	u32 dev_time;
+	u32 def_time;
+	u32 queue_time;
+	u32 dev_busy_time;
+	u32 dev_act_time;
+	u8 sense[32];
+} __attribute__ ((packed));
+
+/**
+ * struct tsa_ddpcs - Device-Detected-Program-Check Transport-Status Area (DDPC TSA)
+ * @rc: Reason Code
+ * @rcq: Reason Code Qualifier
+ * @sense: Sense Data (if present)
+ */
+struct tsa_ddpc {
+	u32 :24;
+	u32 rc:8;
+	u8 rcq[16];
+	u8 sense[32];
+} __attribute__ ((packed));
+
+#define TSA_INTRG_FLAGS_CU_STATE_VALID		(1 << (7 - 0))
+#define TSA_INTRG_FLAGS_DEV_STATE_VALID		(1 << (7 - 1))
+#define TSA_INTRG_FLAGS_OP_STATE_VALID		(1 << (7 - 2))
+
+/**
+ * struct tsa_intrg - Interrogate Transport-Status Area (Intrg. TSA)
+ * @format: Format
+ * @flags: Flags. Can be an arithmetic OR of the following constants:
+ * %TSA_INTRG_FLAGS_CU_STATE_VALID, %TSA_INTRG_FLAGS_DEV_STATE_VALID,
+ * %TSA_INTRG_FLAGS_OP_STATE_VALID
+ * @cu_state: Controle-Unit State
+ * @dev_state: Device State
+ * @op_state: Operation State
+ * @sd_info: State-Dependent Information
+ * @dl_id: Device-Level Identifier
+ * @dd_data: Device-Dependent Data
+ */
+struct tsa_intrg {
+	u32 format:8;
+	u32 flags:8;
+	u32 cu_state:8;
+	u32 dev_state:8;
+	u32 op_state:8;
+	u32 :24;
+	u8 sd_info[12];
+	u32 dl_id;
+	u8 dd_data[28];
+} __attribute__ ((packed));
+
+#define TSB_FORMAT_NONE		0
+#define TSB_FORMAT_IOSTAT	1
+#define TSB_FORMAT_DDPC		2
+#define TSB_FORMAT_INTRG	3
+
+#define TSB_FLAGS_DCW_OFFSET_VALID	(1 << (7 - 0))
+#define TSB_FLAGS_COUNT_VALID		(1 << (7 - 1))
+#define TSB_FLAGS_CACHE_MISS		(1 << (7 - 2))
+#define TSB_FLAGS_TIME_VALID		(1 << (7 - 3))
+#define TSB_FLAGS_FORMAT(x)		((x) & 7)
+#define TSB_FORMAT(t)			((t)->flags & 7)
+
+/**
+ * struct tsb - Transport-Status Block (TSB)
+ * @length: Length
+ * @flags: Flags. Can be an arithmetic OR of the following constants:
+ * %TSB_FLAGS_DCW_OFFSET_VALID, %TSB_FLAGS_COUNT_VALID, %TSB_FLAGS_CACHE_MISS,
+ * %TSB_FLAGS_TIME_VALID
+ * @dcw_offset: DCW Offset
+ * @count: Count
+ * @tsa: Transport-Status-Area
+ */
+struct tsb {
+	u32 length:8;
+	u32 flags:8;
+	u32 dcw_offset:16;
+	u32 count;
+	u32 :32;
+	union {
+		struct tsa_iostat iostat;
+		struct tsa_ddpc ddpc;
+		struct tsa_intrg intrg;
+	} __attribute__ ((packed)) tsa;
+} __attribute__ ((packed, aligned(8)));
+
+#define DCW_INTRG_FORMAT_DEFAULT	0
+
+#define DCW_INTRG_RC_UNSPECIFIED	0
+#define DCW_INTRG_RC_TIMEOUT		1
+
+#define DCW_INTRG_RCQ_UNSPECIFIED	0
+#define DCW_INTRG_RCQ_PRIMARY		1
+#define DCW_INTRG_RCQ_SECONDARY		2
+
+#define DCW_INTRG_FLAGS_MPM		(1 << (7 - 0))
+#define DCW_INTRG_FLAGS_PPR		(1 << (7 - 1))
+#define DCW_INTRG_FLAGS_CRIT		(1 << (7 - 2))
+
+/**
+ * struct dcw_intrg_data - Interrogate DCW data
+ * @format: Format. Should be %DCW_INTRG_FORMAT_DEFAULT
+ * @rc: Reason Code. Can be one of %DCW_INTRG_RC_UNSPECIFIED,
+ * %DCW_INTRG_RC_TIMEOUT
+ * @rcq: Reason Code Qualifier: Can be one of %DCW_INTRG_RCQ_UNSPECIFIED,
+ * %DCW_INTRG_RCQ_PRIMARY, %DCW_INTRG_RCQ_SECONDARY
+ * @lpm: Logical-Path Mask
+ * @pam: Path-Available Mask
+ * @pim: Path-Installed Mask
+ * @timeout: Timeout
+ * @flags: Flags. Can be an arithmetic OR of %DCW_INTRG_FLAGS_MPM,
+ * %DCW_INTRG_FLAGS_PPR, %DCW_INTRG_FLAGS_CRIT
+ * @time: Time
+ * @prog_id: Program Identifier
+ * @prog_data: Program-Dependent Data
+ */
+struct dcw_intrg_data {
+	u32 format:8;
+	u32 rc:8;
+	u32 rcq:8;
+	u32 lpm:8;
+	u32 pam:8;
+	u32 pim:8;
+	u32 timeout:16;
+	u32 flags:8;
+	u32 :24;
+	u32 :32;
+	u64 time;
+	u64 prog_id;
+	u8  prog_data[];
+} __attribute__ ((packed));
+
+#define DCW_FLAGS_CC		(1 << (7 - 1))
+
+#define DCW_CMD_WRITE		0x01
+#define DCW_CMD_READ		0x02
+#define DCW_CMD_CONTROL		0x03
+#define DCW_CMD_SENSE		0x04
+#define DCW_CMD_SENSE_ID	0xe4
+#define DCW_CMD_INTRG		0x40
+
+/**
+ * struct dcw - Device-Command Word (DCW)
+ * @cmd: Command Code. Can be one of %DCW_CMD_WRITE, %DCW_CMD_READ,
+ * %DCW_CMD_CONTROL, %DCW_CMD_SENSE, %DCW_CMD_SENSE_ID, %DCW_CMD_INTRG
+ * @flags: Flags. Can be an arithmetic OR of %DCW_FLAGS_CC
+ * @cd_count: Control-Data Count
+ * @count: Count
+ * @cd: Control Data
+ */
+struct dcw {
+	u32 cmd:8;
+	u32 flags:8;
+	u32 :8;
+	u32 cd_count:8;
+	u32 count;
+	u8 cd[];
+} __attribute__ ((packed));
+
+#define TCCB_FORMAT_DEFAULT	0x7f
+#define TCCB_MAX_DCW		30
+#define TCCB_MAX_SIZE		(sizeof(struct tccb_tcah) + \
+				 TCCB_MAX_DCW * sizeof(struct dcw) + \
+				 sizeof(struct tccb_tcat))
+#define TCCB_SAC_DEFAULT	0x1ffe
+#define TCCB_SAC_INTRG		0x1fff
+
+/**
+ * struct tccb_tcah - Transport-Command-Area Header (TCAH)
+ * @format: Format. Should be %TCCB_FORMAT_DEFAULT
+ * @tcal: Transport-Command-Area Length
+ * @sac: Service-Action Code. Can be one of %TCCB_SAC_DEFAULT, %TCCB_SAC_INTRG
+ * @prio: Priority
+ */
+struct tccb_tcah {
+	u32 format:8;
+	u32 :24;
+	u32 :24;
+	u32 tcal:8;
+	u32 sac:16;
+	u32 :8;
+	u32 prio:8;
+	u32 :32;
+} __attribute__ ((packed));
+
+/**
+ * struct tccb_tcat - Transport-Command-Area Trailer (TCAT)
+ * @count: Transport Count
+ */
+struct tccb_tcat {
+	u32 :32;
+	u32 count;
+} __attribute__ ((packed));
+
+/**
+ * struct tccb - (partial) Transport-Command-Control Block (TCCB)
+ * @tcah: TCAH
+ * @tca: Transport-Command Area
+ */
+struct tccb {
+	struct tccb_tcah tcah;
+	u8 tca[];
+} __attribute__ ((packed, aligned(8)));
+
+struct tcw *tcw_get_intrg(struct tcw *tcw);
+void *tcw_get_data(struct tcw *tcw);
+struct tccb *tcw_get_tccb(struct tcw *tcw);
+struct tsb *tcw_get_tsb(struct tcw *tcw);
+
+void tcw_init(struct tcw *tcw, int r, int w);
+void tcw_finalize(struct tcw *tcw, int num_tidaws);
+
+void tcw_set_intrg(struct tcw *tcw, struct tcw *intrg_tcw);
+void tcw_set_data(struct tcw *tcw, void *data, int use_tidal);
+void tcw_set_tccb(struct tcw *tcw, struct tccb *tccb);
+void tcw_set_tsb(struct tcw *tcw, struct tsb *tsb);
+
+void tccb_init(struct tccb *tccb, size_t tccb_size, u32 sac);
+void tsb_init(struct tsb *tsb);
+struct dcw *tccb_add_dcw(struct tccb *tccb, size_t tccb_size, u8 cmd, u8 flags,
+			 void *cd, u8 cd_count, u32 count);
+struct tidaw *tcw_add_tidaw(struct tcw *tcw, int num_tidaws, u8 flags,
+			    void *addr, u32 count);
+
+#endif /* _ASM_S390_FCX_H */
diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h
new file mode 100644
index 0000000000..9acf48e53a
--- /dev/null
+++ b/arch/s390/include/asm/fpu/api.h
@@ -0,0 +1,119 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * In-kernel FPU support functions
+ *
+ *
+ * Consider these guidelines before using in-kernel FPU functions:
+ *
+ *  1. Use kernel_fpu_begin() and kernel_fpu_end() to enclose all in-kernel
+ *     use of floating-point or vector registers and instructions.
+ *
+ *  2. For kernel_fpu_begin(), specify the vector register range you want to
+ *     use with the KERNEL_VXR_* constants. Consider these usage guidelines:
+ *
+ *     a) If your function typically runs in process-context, use the lower
+ *	  half of the vector registers, for example, specify KERNEL_VXR_LOW.
+ *     b) If your function typically runs in soft-irq or hard-irq context,
+ *	  prefer using the upper half of the vector registers, for example,
+ *	  specify KERNEL_VXR_HIGH.
+ *
+ *     If you adhere to these guidelines, an interrupted process context
+ *     does not require to save and restore vector registers because of
+ *     disjoint register ranges.
+ *
+ *     Also note that the __kernel_fpu_begin()/__kernel_fpu_end() functions
+ *     includes logic to save and restore up to 16 vector registers at once.
+ *
+ *  3. You can nest kernel_fpu_begin()/kernel_fpu_end() by using different
+ *     struct kernel_fpu states.  Vector registers that are in use by outer
+ *     levels are saved and restored.  You can minimize the save and restore
+ *     effort by choosing disjoint vector register ranges.
+ *
+ *  5. To use vector floating-point instructions, specify the KERNEL_FPC
+ *     flag to save and restore floating-point controls in addition to any
+ *     vector register range.
+ *
+ *  6. To use floating-point registers and instructions only, specify the
+ *     KERNEL_FPR flag.  This flag triggers a save and restore of vector
+ *     registers V0 to V15 and floating-point controls.
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef _ASM_S390_FPU_API_H
+#define _ASM_S390_FPU_API_H
+
+#include <linux/preempt.h>
+#include <asm/asm-extable.h>
+
+void save_fpu_regs(void);
+void load_fpu_regs(void);
+void __load_fpu_regs(void);
+
+static inline int test_fp_ctl(u32 fpc)
+{
+	u32 orig_fpc;
+	int rc;
+
+	asm volatile(
+		"	efpc    %1\n"
+		"	sfpc	%2\n"
+		"0:	sfpc	%1\n"
+		"	la	%0,0\n"
+		"1:\n"
+		EX_TABLE(0b,1b)
+		: "=d" (rc), "=&d" (orig_fpc)
+		: "d" (fpc), "0" (-EINVAL));
+	return rc;
+}
+
+#define KERNEL_FPC		1
+#define KERNEL_VXR_V0V7		2
+#define KERNEL_VXR_V8V15	4
+#define KERNEL_VXR_V16V23	8
+#define KERNEL_VXR_V24V31	16
+
+#define KERNEL_VXR_LOW		(KERNEL_VXR_V0V7|KERNEL_VXR_V8V15)
+#define KERNEL_VXR_MID		(KERNEL_VXR_V8V15|KERNEL_VXR_V16V23)
+#define KERNEL_VXR_HIGH		(KERNEL_VXR_V16V23|KERNEL_VXR_V24V31)
+
+#define KERNEL_VXR		(KERNEL_VXR_LOW|KERNEL_VXR_HIGH)
+#define KERNEL_FPR		(KERNEL_FPC|KERNEL_VXR_LOW)
+
+struct kernel_fpu;
+
+/*
+ * Note the functions below must be called with preemption disabled.
+ * Do not enable preemption before calling __kernel_fpu_end() to prevent
+ * an corruption of an existing kernel FPU state.
+ *
+ * Prefer using the kernel_fpu_begin()/kernel_fpu_end() pair of functions.
+ */
+void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags);
+void __kernel_fpu_end(struct kernel_fpu *state, u32 flags);
+
+
+static inline void kernel_fpu_begin(struct kernel_fpu *state, u32 flags)
+{
+	preempt_disable();
+	state->mask = S390_lowcore.fpu_flags;
+	if (!test_cpu_flag(CIF_FPU))
+		/* Save user space FPU state and register contents */
+		save_fpu_regs();
+	else if (state->mask & flags)
+		/* Save FPU/vector register in-use by the kernel */
+		__kernel_fpu_begin(state, flags);
+	S390_lowcore.fpu_flags |= flags;
+}
+
+static inline void kernel_fpu_end(struct kernel_fpu *state, u32 flags)
+{
+	S390_lowcore.fpu_flags = state->mask;
+	if (state->mask & flags)
+		/* Restore FPU/vector register in-use by the kernel */
+		__kernel_fpu_end(state, flags);
+	preempt_enable();
+}
+
+#endif /* _ASM_S390_FPU_API_H */
diff --git a/arch/s390/include/asm/fpu/internal.h b/arch/s390/include/asm/fpu/internal.h
new file mode 100644
index 0000000000..bbdadb1c9e
--- /dev/null
+++ b/arch/s390/include/asm/fpu/internal.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * FPU state and register content conversion primitives
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef _ASM_S390_FPU_INTERNAL_H
+#define _ASM_S390_FPU_INTERNAL_H
+
+#include <linux/string.h>
+#include <asm/ctl_reg.h>
+#include <asm/fpu/types.h>
+
+static inline void save_vx_regs(__vector128 *vxrs)
+{
+	asm volatile(
+		"	la	1,%0\n"
+		"	.word	0xe70f,0x1000,0x003e\n"	/* vstm 0,15,0(1) */
+		"	.word	0xe70f,0x1100,0x0c3e\n"	/* vstm 16,31,256(1) */
+		: "=Q" (*(struct vx_array *) vxrs) : : "1");
+}
+
+static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs)
+{
+	int i;
+
+	for (i = 0; i < __NUM_FPRS; i++)
+		fprs[i].ui = vxrs[i].high;
+}
+
+static inline void convert_fp_to_vx(__vector128 *vxrs, freg_t *fprs)
+{
+	int i;
+
+	for (i = 0; i < __NUM_FPRS; i++)
+		vxrs[i].high = fprs[i].ui;
+}
+
+static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu)
+{
+	fpregs->pad = 0;
+	fpregs->fpc = fpu->fpc;
+	if (MACHINE_HAS_VX)
+		convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs);
+	else
+		memcpy((freg_t *)&fpregs->fprs, fpu->fprs,
+		       sizeof(fpregs->fprs));
+}
+
+static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu)
+{
+	fpu->fpc = fpregs->fpc;
+	if (MACHINE_HAS_VX)
+		convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs);
+	else
+		memcpy(fpu->fprs, (freg_t *)&fpregs->fprs,
+		       sizeof(fpregs->fprs));
+}
+
+#endif /* _ASM_S390_FPU_INTERNAL_H */
diff --git a/arch/s390/include/asm/fpu/types.h b/arch/s390/include/asm/fpu/types.h
new file mode 100644
index 0000000000..d889e94368
--- /dev/null
+++ b/arch/s390/include/asm/fpu/types.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * FPU data structures
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef _ASM_S390_FPU_TYPES_H
+#define _ASM_S390_FPU_TYPES_H
+
+#include <asm/sigcontext.h>
+
+struct fpu {
+	__u32 fpc;		/* Floating-point control */
+	void *regs;		/* Pointer to the current save area */
+	union {
+		/* Floating-point register save area */
+		freg_t fprs[__NUM_FPRS];
+		/* Vector register save area */
+		__vector128 vxrs[__NUM_VXRS];
+	};
+};
+
+/* VX array structure for address operand constraints in inline assemblies */
+struct vx_array { __vector128 _[__NUM_VXRS]; };
+
+/* In-kernel FPU state structure */
+struct kernel_fpu {
+	u32	    mask;
+	u32	    fpc;
+	union {
+		freg_t fprs[__NUM_FPRS];
+		__vector128 vxrs[__NUM_VXRS];
+	};
+};
+
+#endif /* _ASM_S390_FPU_TYPES_H */
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
new file mode 100644
index 0000000000..5a82b08f03
--- /dev/null
+++ b/arch/s390/include/asm/ftrace.h
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_FTRACE_H
+#define _ASM_S390_FTRACE_H
+
+#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
+#define ARCH_SUPPORTS_FTRACE_OPS 1
+#define MCOUNT_INSN_SIZE	6
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_CC_IS_CLANG
+/* https://bugs.llvm.org/show_bug.cgi?id=41424 */
+#define ftrace_return_address(n) 0UL
+#else
+#define ftrace_return_address(n) __builtin_return_address(n)
+#endif
+
+void ftrace_caller(void);
+
+extern void *ftrace_func;
+
+struct dyn_arch_ftrace { };
+
+#define MCOUNT_ADDR 0
+#define FTRACE_ADDR ((unsigned long)ftrace_caller)
+
+#define KPROBE_ON_FTRACE_NOP	0
+#define KPROBE_ON_FTRACE_CALL	1
+
+struct module;
+struct dyn_ftrace;
+
+bool ftrace_need_init_nop(void);
+#define ftrace_need_init_nop ftrace_need_init_nop
+
+int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
+#define ftrace_init_nop ftrace_init_nop
+
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+	return addr;
+}
+
+struct ftrace_regs {
+	struct pt_regs regs;
+};
+
+static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs)
+{
+	struct pt_regs *regs = &fregs->regs;
+
+	if (test_pt_regs_flag(regs, PIF_FTRACE_FULL_REGS))
+		return regs;
+	return NULL;
+}
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+struct fgraph_ret_regs {
+	unsigned long gpr2;
+	unsigned long fp;
+};
+
+static __always_inline unsigned long fgraph_ret_regs_return_value(struct fgraph_ret_regs *ret_regs)
+{
+	return ret_regs->gpr2;
+}
+
+static __always_inline unsigned long fgraph_ret_regs_frame_pointer(struct fgraph_ret_regs *ret_regs)
+{
+	return ret_regs->fp;
+}
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+static __always_inline unsigned long
+ftrace_regs_get_instruction_pointer(const struct ftrace_regs *fregs)
+{
+	return fregs->regs.psw.addr;
+}
+
+static __always_inline void
+ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs,
+				    unsigned long ip)
+{
+	fregs->regs.psw.addr = ip;
+}
+
+#define ftrace_regs_get_argument(fregs, n) \
+	regs_get_kernel_argument(&(fregs)->regs, n)
+#define ftrace_regs_get_stack_pointer(fregs) \
+	kernel_stack_pointer(&(fregs)->regs)
+#define ftrace_regs_return_value(fregs) \
+	regs_return_value(&(fregs)->regs)
+#define ftrace_regs_set_return_value(fregs, ret) \
+	regs_set_return_value(&(fregs)->regs, ret)
+#define ftrace_override_function_with_return(fregs) \
+	override_function_with_return(&(fregs)->regs)
+#define ftrace_regs_query_register_offset(name) \
+	regs_query_register_offset(name)
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+/*
+ * When an ftrace registered caller is tracing a function that is
+ * also set by a register_ftrace_direct() call, it needs to be
+ * differentiated in the ftrace_caller trampoline. To do this,
+ * place the direct caller in the ORIG_GPR2 part of pt_regs. This
+ * tells the ftrace_caller that there's a direct caller.
+ */
+static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs, unsigned long addr)
+{
+	struct pt_regs *regs = &fregs->regs;
+	regs->orig_gpr2 = addr;
+}
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
+
+/*
+ * Even though the system call numbers are identical for s390/s390x a
+ * different system call table is used for compat tasks. This may lead
+ * to e.g. incorrect or missing trace event sysfs files.
+ * Therefore simply do not trace compat system calls at all.
+ * See kernel/trace/trace_syscalls.c.
+ */
+#define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS
+static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs)
+{
+	return is_compat_task();
+}
+
+#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME
+static inline bool arch_syscall_match_sym_name(const char *sym,
+					       const char *name)
+{
+	/*
+	 * Skip __s390_ and __s390x_ prefix - due to compat wrappers
+	 * and aliasing some symbols of 64 bit system call functions
+	 * may get the __s390_ prefix instead of the __s390x_ prefix.
+	 */
+	return !strcmp(sym + 7, name) || !strcmp(sym + 8, name);
+}
+
+#endif /* __ASSEMBLY__ */
+
+#ifdef CONFIG_FUNCTION_TRACER
+
+#define FTRACE_NOP_INSN .word 0xc004, 0x0000, 0x0000 /* brcl 0,0 */
+
+#ifndef CC_USING_HOTPATCH
+
+#define FTRACE_GEN_MCOUNT_RECORD(name)		\
+	.section __mcount_loc, "a", @progbits;	\
+	.quad name;				\
+	.previous;
+
+#else /* !CC_USING_HOTPATCH */
+
+#define FTRACE_GEN_MCOUNT_RECORD(name)
+
+#endif /* !CC_USING_HOTPATCH */
+
+#define FTRACE_GEN_NOP_ASM(name)		\
+	FTRACE_GEN_MCOUNT_RECORD(name)		\
+	FTRACE_NOP_INSN
+
+#else /* CONFIG_FUNCTION_TRACER */
+
+#define FTRACE_GEN_NOP_ASM(name)
+
+#endif /* CONFIG_FUNCTION_TRACER */
+
+#endif /* _ASM_S390_FTRACE_H */
diff --git a/arch/s390/include/asm/ftrace.lds.h b/arch/s390/include/asm/ftrace.lds.h
new file mode 100644
index 0000000000..968adfd412
--- /dev/null
+++ b/arch/s390/include/asm/ftrace.lds.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef DIV_ROUND_UP
+#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
+#endif
+
+#define SIZEOF_MCOUNT_LOC_ENTRY 8
+#define SIZEOF_FTRACE_HOTPATCH_TRAMPOLINE 24
+#define FTRACE_HOTPATCH_TRAMPOLINES_SIZE(n)				       \
+	DIV_ROUND_UP(SIZEOF_FTRACE_HOTPATCH_TRAMPOLINE * (n),		       \
+		     SIZEOF_MCOUNT_LOC_ENTRY)
+
+#ifdef CONFIG_FUNCTION_TRACER
+#define FTRACE_HOTPATCH_TRAMPOLINES_TEXT				       \
+	. = ALIGN(8);							       \
+	__ftrace_hotpatch_trampolines_start = .;			       \
+	. = . + FTRACE_HOTPATCH_TRAMPOLINES_SIZE(__stop_mcount_loc -	       \
+						 __start_mcount_loc);	       \
+	__ftrace_hotpatch_trampolines_end = .;
+#else
+#define FTRACE_HOTPATCH_TRAMPOLINES_TEXT
+#endif
diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h
new file mode 100644
index 0000000000..eaeaeb3ff0
--- /dev/null
+++ b/arch/s390/include/asm/futex.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_FUTEX_H
+#define _ASM_S390_FUTEX_H
+
+#include <linux/uaccess.h>
+#include <linux/futex.h>
+#include <asm/asm-extable.h>
+#include <asm/mmu_context.h>
+#include <asm/errno.h>
+
+#define __futex_atomic_op(insn, ret, oldval, newval, uaddr, oparg)	\
+	asm volatile(							\
+		"   sacf  256\n"					\
+		"0: l     %1,0(%6)\n"					\
+		"1:"insn						\
+		"2: cs    %1,%2,0(%6)\n"				\
+		"3: jl    1b\n"						\
+		"   lhi   %0,0\n"					\
+		"4: sacf  768\n"					\
+		EX_TABLE(0b,4b) EX_TABLE(1b,4b)				\
+		EX_TABLE(2b,4b) EX_TABLE(3b,4b)				\
+		: "=d" (ret), "=&d" (oldval), "=&d" (newval),		\
+		  "=m" (*uaddr)						\
+		: "0" (-EFAULT), "d" (oparg), "a" (uaddr),		\
+		  "m" (*uaddr) : "cc");
+
+static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
+		u32 __user *uaddr)
+{
+	int oldval = 0, newval, ret;
+
+	switch (op) {
+	case FUTEX_OP_SET:
+		__futex_atomic_op("lr %2,%5\n",
+				  ret, oldval, newval, uaddr, oparg);
+		break;
+	case FUTEX_OP_ADD:
+		__futex_atomic_op("lr %2,%1\nar %2,%5\n",
+				  ret, oldval, newval, uaddr, oparg);
+		break;
+	case FUTEX_OP_OR:
+		__futex_atomic_op("lr %2,%1\nor %2,%5\n",
+				  ret, oldval, newval, uaddr, oparg);
+		break;
+	case FUTEX_OP_ANDN:
+		__futex_atomic_op("lr %2,%1\nnr %2,%5\n",
+				  ret, oldval, newval, uaddr, oparg);
+		break;
+	case FUTEX_OP_XOR:
+		__futex_atomic_op("lr %2,%1\nxr %2,%5\n",
+				  ret, oldval, newval, uaddr, oparg);
+		break;
+	default:
+		ret = -ENOSYS;
+	}
+
+	if (!ret)
+		*oval = oldval;
+
+	return ret;
+}
+
+static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+						u32 oldval, u32 newval)
+{
+	int ret;
+
+	asm volatile(
+		"   sacf 256\n"
+		"0: cs   %1,%4,0(%5)\n"
+		"1: la   %0,0\n"
+		"2: sacf 768\n"
+		EX_TABLE(0b,2b) EX_TABLE(1b,2b)
+		: "=d" (ret), "+d" (oldval), "=m" (*uaddr)
+		: "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
+		: "cc", "memory");
+	*uval = oldval;
+	return ret;
+}
+
+#endif /* _ASM_S390_FUTEX_H */
diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h
new file mode 100644
index 0000000000..5cc46e0dde
--- /dev/null
+++ b/arch/s390/include/asm/gmap.h
@@ -0,0 +1,188 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  KVM guest address space mapping code
+ *
+ *    Copyright IBM Corp. 2007, 2016
+ *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_GMAP_H
+#define _ASM_S390_GMAP_H
+
+#include <linux/radix-tree.h>
+#include <linux/refcount.h>
+
+/* Generic bits for GMAP notification on DAT table entry changes. */
+#define GMAP_NOTIFY_SHADOW	0x2
+#define GMAP_NOTIFY_MPROT	0x1
+
+/* Status bits only for huge segment entries */
+#define _SEGMENT_ENTRY_GMAP_IN		0x8000	/* invalidation notify bit */
+#define _SEGMENT_ENTRY_GMAP_UC		0x4000	/* dirty (migration) */
+
+/**
+ * struct gmap_struct - guest address space
+ * @list: list head for the mm->context gmap list
+ * @crst_list: list of all crst tables used in the guest address space
+ * @mm: pointer to the parent mm_struct
+ * @guest_to_host: radix tree with guest to host address translation
+ * @host_to_guest: radix tree with pointer to segment table entries
+ * @guest_table_lock: spinlock to protect all entries in the guest page table
+ * @ref_count: reference counter for the gmap structure
+ * @table: pointer to the page directory
+ * @asce: address space control element for gmap page table
+ * @pfault_enabled: defines if pfaults are applicable for the guest
+ * @guest_handle: protected virtual machine handle for the ultravisor
+ * @host_to_rmap: radix tree with gmap_rmap lists
+ * @children: list of shadow gmap structures
+ * @pt_list: list of all page tables used in the shadow guest address space
+ * @shadow_lock: spinlock to protect the shadow gmap list
+ * @parent: pointer to the parent gmap for shadow guest address spaces
+ * @orig_asce: ASCE for which the shadow page table has been created
+ * @edat_level: edat level to be used for the shadow translation
+ * @removed: flag to indicate if a shadow guest address space has been removed
+ * @initialized: flag to indicate if a shadow guest address space can be used
+ */
+struct gmap {
+	struct list_head list;
+	struct list_head crst_list;
+	struct mm_struct *mm;
+	struct radix_tree_root guest_to_host;
+	struct radix_tree_root host_to_guest;
+	spinlock_t guest_table_lock;
+	refcount_t ref_count;
+	unsigned long *table;
+	unsigned long asce;
+	unsigned long asce_end;
+	void *private;
+	bool pfault_enabled;
+	/* only set for protected virtual machines */
+	unsigned long guest_handle;
+	/* Additional data for shadow guest address spaces */
+	struct radix_tree_root host_to_rmap;
+	struct list_head children;
+	struct list_head pt_list;
+	spinlock_t shadow_lock;
+	struct gmap *parent;
+	unsigned long orig_asce;
+	int edat_level;
+	bool removed;
+	bool initialized;
+};
+
+/**
+ * struct gmap_rmap - reverse mapping for shadow page table entries
+ * @next: pointer to next rmap in the list
+ * @raddr: virtual rmap address in the shadow guest address space
+ */
+struct gmap_rmap {
+	struct gmap_rmap *next;
+	unsigned long raddr;
+};
+
+#define gmap_for_each_rmap(pos, head) \
+	for (pos = (head); pos; pos = pos->next)
+
+#define gmap_for_each_rmap_safe(pos, n, head) \
+	for (pos = (head); n = pos ? pos->next : NULL, pos; pos = n)
+
+/**
+ * struct gmap_notifier - notify function block for page invalidation
+ * @notifier_call: address of callback function
+ */
+struct gmap_notifier {
+	struct list_head list;
+	struct rcu_head rcu;
+	void (*notifier_call)(struct gmap *gmap, unsigned long start,
+			      unsigned long end);
+};
+
+static inline int gmap_is_shadow(struct gmap *gmap)
+{
+	return !!gmap->parent;
+}
+
+struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit);
+void gmap_remove(struct gmap *gmap);
+struct gmap *gmap_get(struct gmap *gmap);
+void gmap_put(struct gmap *gmap);
+
+void gmap_enable(struct gmap *gmap);
+void gmap_disable(struct gmap *gmap);
+struct gmap *gmap_get_enabled(void);
+int gmap_map_segment(struct gmap *gmap, unsigned long from,
+		     unsigned long to, unsigned long len);
+int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
+unsigned long __gmap_translate(struct gmap *, unsigned long gaddr);
+unsigned long gmap_translate(struct gmap *, unsigned long gaddr);
+int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr);
+int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags);
+void gmap_discard(struct gmap *, unsigned long from, unsigned long to);
+void __gmap_zap(struct gmap *, unsigned long gaddr);
+void gmap_unlink(struct mm_struct *, unsigned long *table, unsigned long vmaddr);
+
+int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val);
+
+struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
+			 int edat_level);
+int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level);
+int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
+		    int fake);
+int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
+		    int fake);
+int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
+		    int fake);
+int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
+		    int fake);
+int gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr,
+			   unsigned long *pgt, int *dat_protection, int *fake);
+int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte);
+
+void gmap_register_pte_notifier(struct gmap_notifier *);
+void gmap_unregister_pte_notifier(struct gmap_notifier *);
+
+int gmap_mprotect_notify(struct gmap *, unsigned long start,
+			 unsigned long len, int prot);
+
+void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4],
+			     unsigned long gaddr, unsigned long vmaddr);
+int gmap_mark_unmergeable(void);
+void s390_unlist_old_asce(struct gmap *gmap);
+int s390_replace_asce(struct gmap *gmap);
+void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns);
+int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
+			    unsigned long end, bool interruptible);
+
+/**
+ * s390_uv_destroy_range - Destroy a range of pages in the given mm.
+ * @mm: the mm on which to operate on
+ * @start: the start of the range
+ * @end: the end of the range
+ *
+ * This function will call cond_sched, so it should not generate stalls, but
+ * it will otherwise only return when it completed.
+ */
+static inline void s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
+					 unsigned long end)
+{
+	(void)__s390_uv_destroy_range(mm, start, end, false);
+}
+
+/**
+ * s390_uv_destroy_range_interruptible - Destroy a range of pages in the
+ * given mm, but stop when a fatal signal is received.
+ * @mm: the mm on which to operate on
+ * @start: the start of the range
+ * @end: the end of the range
+ *
+ * This function will call cond_sched, so it should not generate stalls. If
+ * a fatal signal is received, it will return with -EINTR immediately,
+ * without finishing destroying the whole range. Upon successful
+ * completion, 0 is returned.
+ */
+static inline int s390_uv_destroy_range_interruptible(struct mm_struct *mm, unsigned long start,
+						      unsigned long end)
+{
+	return __s390_uv_destroy_range(mm, start, end, true);
+}
+#endif /* _ASM_S390_GMAP_H */
diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h
new file mode 100644
index 0000000000..58668ffb54
--- /dev/null
+++ b/arch/s390/include/asm/hardirq.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999, 2000
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ *               Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
+ *
+ *  Derived from "include/asm-i386/hardirq.h"
+ */
+
+#ifndef __ASM_HARDIRQ_H
+#define __ASM_HARDIRQ_H
+
+#include <asm/lowcore.h>
+
+#define local_softirq_pending() (S390_lowcore.softirq_pending)
+#define set_softirq_pending(x) (S390_lowcore.softirq_pending = (x))
+#define or_softirq_pending(x)  (S390_lowcore.softirq_pending |= (x))
+
+#define __ARCH_IRQ_STAT
+#define __ARCH_IRQ_EXIT_IRQS_DISABLED
+
+static inline void ack_bad_irq(unsigned int irq)
+{
+	printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq);
+}
+
+#endif /* __ASM_HARDIRQ_H */
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
new file mode 100644
index 0000000000..deb198a610
--- /dev/null
+++ b/arch/s390/include/asm/hugetlb.h
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  IBM System z Huge TLB Page Support for Kernel.
+ *
+ *    Copyright IBM Corp. 2008
+ *    Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_HUGETLB_H
+#define _ASM_S390_HUGETLB_H
+
+#include <linux/pgtable.h>
+#include <asm/page.h>
+
+#define hugetlb_free_pgd_range			free_pgd_range
+#define hugepages_supported()			(MACHINE_HAS_EDAT1)
+
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+		     pte_t *ptep, pte_t pte, unsigned long sz);
+void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+		     pte_t *ptep, pte_t pte);
+pte_t huge_ptep_get(pte_t *ptep);
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+			      unsigned long addr, pte_t *ptep);
+
+/*
+ * If the arch doesn't supply something else, assume that hugepage
+ * size aligned regions are ok without further preparation.
+ */
+static inline int prepare_hugepage_range(struct file *file,
+			unsigned long addr, unsigned long len)
+{
+	struct hstate *h = hstate_file(file);
+
+	if (len & ~huge_page_mask(h))
+		return -EINVAL;
+	if (addr & ~huge_page_mask(h))
+		return -EINVAL;
+	return 0;
+}
+
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+	clear_bit(PG_arch_1, &page->flags);
+}
+#define arch_clear_hugepage_flags arch_clear_hugepage_flags
+
+static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
+				  pte_t *ptep, unsigned long sz)
+{
+	if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
+		set_pte(ptep, __pte(_REGION3_ENTRY_EMPTY));
+	else
+		set_pte(ptep, __pte(_SEGMENT_ENTRY_EMPTY));
+}
+
+static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
+					  unsigned long address, pte_t *ptep)
+{
+	return huge_ptep_get_and_clear(vma->vm_mm, address, ptep);
+}
+
+static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+					     unsigned long addr, pte_t *ptep,
+					     pte_t pte, int dirty)
+{
+	int changed = !pte_same(huge_ptep_get(ptep), pte);
+	if (changed) {
+		huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
+		__set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
+	}
+	return changed;
+}
+
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+					   unsigned long addr, pte_t *ptep)
+{
+	pte_t pte = huge_ptep_get_and_clear(mm, addr, ptep);
+	__set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte));
+}
+
+static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot)
+{
+	return mk_pte(page, pgprot);
+}
+
+static inline int huge_pte_none(pte_t pte)
+{
+	return pte_none(pte);
+}
+
+static inline int huge_pte_none_mostly(pte_t pte)
+{
+	return huge_pte_none(pte);
+}
+
+static inline int huge_pte_write(pte_t pte)
+{
+	return pte_write(pte);
+}
+
+static inline int huge_pte_dirty(pte_t pte)
+{
+	return pte_dirty(pte);
+}
+
+static inline pte_t huge_pte_mkwrite(pte_t pte)
+{
+	return pte_mkwrite_novma(pte);
+}
+
+static inline pte_t huge_pte_mkdirty(pte_t pte)
+{
+	return pte_mkdirty(pte);
+}
+
+static inline pte_t huge_pte_wrprotect(pte_t pte)
+{
+	return pte_wrprotect(pte);
+}
+
+static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot)
+{
+	return pte_modify(pte, newprot);
+}
+
+static inline pte_t huge_pte_mkuffd_wp(pte_t pte)
+{
+	return pte;
+}
+
+static inline pte_t huge_pte_clear_uffd_wp(pte_t pte)
+{
+	return pte;
+}
+
+static inline int huge_pte_uffd_wp(pte_t pte)
+{
+	return 0;
+}
+
+static inline bool gigantic_page_runtime_supported(void)
+{
+	return true;
+}
+
+#endif /* _ASM_S390_HUGETLB_H */
diff --git a/arch/s390/include/asm/hw_irq.h b/arch/s390/include/asm/hw_irq.h
new file mode 100644
index 0000000000..9078b5b6b8
--- /dev/null
+++ b/arch/s390/include/asm/hw_irq.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _HW_IRQ_H
+#define _HW_IRQ_H
+
+#include <linux/msi.h>
+#include <linux/pci.h>
+
+void __init init_airq_interrupts(void);
+void __init init_cio_interrupts(void);
+
+#endif
diff --git a/arch/s390/include/asm/idals.h b/arch/s390/include/asm/idals.h
new file mode 100644
index 0000000000..59fcc3c72e
--- /dev/null
+++ b/arch/s390/include/asm/idals.h
@@ -0,0 +1,244 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* 
+ * Author(s)......: Holger Smolinski <Holger.Smolinski@de.ibm.com>
+ *		    Martin Schwidefsky <schwidefsky@de.ibm.com>
+ * Bugreports.to..: <Linux390@de.ibm.com>
+ * Copyright IBM Corp. 2000
+ *
+ * History of changes
+ * 07/24/00 new file
+ * 05/04/02 code restructuring.
+ */
+
+#ifndef _S390_IDALS_H
+#define _S390_IDALS_H
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <asm/cio.h>
+#include <linux/uaccess.h>
+
+#define IDA_SIZE_LOG 12 /* 11 for 2k , 12 for 4k */
+#define IDA_BLOCK_SIZE (1L<<IDA_SIZE_LOG)
+
+#define IDA_2K_SIZE_LOG 11
+#define IDA_2K_BLOCK_SIZE (1L << IDA_2K_SIZE_LOG)
+
+/*
+ * Test if an address/length pair needs an idal list.
+ */
+static inline int
+idal_is_needed(void *vaddr, unsigned int length)
+{
+	return ((__pa(vaddr) + length - 1) >> 31) != 0;
+}
+
+
+/*
+ * Return the number of idal words needed for an address/length pair.
+ */
+static inline unsigned int idal_nr_words(void *vaddr, unsigned int length)
+{
+	return ((__pa(vaddr) & (IDA_BLOCK_SIZE-1)) + length +
+		(IDA_BLOCK_SIZE-1)) >> IDA_SIZE_LOG;
+}
+
+/*
+ * Return the number of 2K IDA words needed for an address/length pair.
+ */
+static inline unsigned int idal_2k_nr_words(void *vaddr, unsigned int length)
+{
+	return ((__pa(vaddr) & (IDA_2K_BLOCK_SIZE - 1)) + length +
+		(IDA_2K_BLOCK_SIZE - 1)) >> IDA_2K_SIZE_LOG;
+}
+
+/*
+ * Create the list of idal words for an address/length pair.
+ */
+static inline unsigned long *idal_create_words(unsigned long *idaws,
+					       void *vaddr, unsigned int length)
+{
+	unsigned long paddr;
+	unsigned int cidaw;
+
+	paddr = __pa(vaddr);
+	cidaw = ((paddr & (IDA_BLOCK_SIZE-1)) + length + 
+		 (IDA_BLOCK_SIZE-1)) >> IDA_SIZE_LOG;
+	*idaws++ = paddr;
+	paddr &= -IDA_BLOCK_SIZE;
+	while (--cidaw > 0) {
+		paddr += IDA_BLOCK_SIZE;
+		*idaws++ = paddr;
+	}
+	return idaws;
+}
+
+/*
+ * Sets the address of the data in CCW.
+ * If necessary it allocates an IDAL and sets the appropriate flags.
+ */
+static inline int
+set_normalized_cda(struct ccw1 * ccw, void *vaddr)
+{
+	unsigned int nridaws;
+	unsigned long *idal;
+
+	if (ccw->flags & CCW_FLAG_IDA)
+		return -EINVAL;
+	nridaws = idal_nr_words(vaddr, ccw->count);
+	if (nridaws > 0) {
+		idal = kmalloc(nridaws * sizeof(unsigned long),
+			       GFP_ATOMIC | GFP_DMA );
+		if (idal == NULL)
+			return -ENOMEM;
+		idal_create_words(idal, vaddr, ccw->count);
+		ccw->flags |= CCW_FLAG_IDA;
+		vaddr = idal;
+	}
+	ccw->cda = (__u32)(unsigned long) vaddr;
+	return 0;
+}
+
+/*
+ * Releases any allocated IDAL related to the CCW.
+ */
+static inline void
+clear_normalized_cda(struct ccw1 * ccw)
+{
+	if (ccw->flags & CCW_FLAG_IDA) {
+		kfree((void *)(unsigned long) ccw->cda);
+		ccw->flags &= ~CCW_FLAG_IDA;
+	}
+	ccw->cda = 0;
+}
+
+/*
+ * Idal buffer extension
+ */
+struct idal_buffer {
+	size_t size;
+	size_t page_order;
+	void *data[];
+};
+
+/*
+ * Allocate an idal buffer
+ */
+static inline struct idal_buffer *
+idal_buffer_alloc(size_t size, int page_order)
+{
+	struct idal_buffer *ib;
+	int nr_chunks, nr_ptrs, i;
+
+	nr_ptrs = (size + IDA_BLOCK_SIZE - 1) >> IDA_SIZE_LOG;
+	nr_chunks = (4096 << page_order) >> IDA_SIZE_LOG;
+	ib = kmalloc(struct_size(ib, data, nr_ptrs), GFP_DMA | GFP_KERNEL);
+	if (ib == NULL)
+		return ERR_PTR(-ENOMEM);
+	ib->size = size;
+	ib->page_order = page_order;
+	for (i = 0; i < nr_ptrs; i++) {
+		if ((i & (nr_chunks - 1)) != 0) {
+			ib->data[i] = ib->data[i-1] + IDA_BLOCK_SIZE;
+			continue;
+		}
+		ib->data[i] = (void *)
+			__get_free_pages(GFP_KERNEL, page_order);
+		if (ib->data[i] != NULL)
+			continue;
+		// Not enough memory
+		while (i >= nr_chunks) {
+			i -= nr_chunks;
+			free_pages((unsigned long) ib->data[i],
+				   ib->page_order);
+		}
+		kfree(ib);
+		return ERR_PTR(-ENOMEM);
+	}
+	return ib;
+}
+
+/*
+ * Free an idal buffer.
+ */
+static inline void
+idal_buffer_free(struct idal_buffer *ib)
+{
+	int nr_chunks, nr_ptrs, i;
+
+	nr_ptrs = (ib->size + IDA_BLOCK_SIZE - 1) >> IDA_SIZE_LOG;
+	nr_chunks = (4096 << ib->page_order) >> IDA_SIZE_LOG;
+	for (i = 0; i < nr_ptrs; i += nr_chunks)
+		free_pages((unsigned long) ib->data[i], ib->page_order);
+	kfree(ib);
+}
+
+/*
+ * Test if a idal list is really needed.
+ */
+static inline int
+__idal_buffer_is_needed(struct idal_buffer *ib)
+{
+	return ib->size > (4096ul << ib->page_order) ||
+		idal_is_needed(ib->data[0], ib->size);
+}
+
+/*
+ * Set channel data address to idal buffer.
+ */
+static inline void
+idal_buffer_set_cda(struct idal_buffer *ib, struct ccw1 *ccw)
+{
+	if (__idal_buffer_is_needed(ib)) {
+		// setup idals;
+		ccw->cda = (u32)(addr_t) ib->data;
+		ccw->flags |= CCW_FLAG_IDA;
+	} else
+		// we do not need idals - use direct addressing
+		ccw->cda = (u32)(addr_t) ib->data[0];
+	ccw->count = ib->size;
+}
+
+/*
+ * Copy count bytes from an idal buffer to user memory
+ */
+static inline size_t
+idal_buffer_to_user(struct idal_buffer *ib, void __user *to, size_t count)
+{
+	size_t left;
+	int i;
+
+	BUG_ON(count > ib->size);
+	for (i = 0; count > IDA_BLOCK_SIZE; i++) {
+		left = copy_to_user(to, ib->data[i], IDA_BLOCK_SIZE);
+		if (left)
+			return left + count - IDA_BLOCK_SIZE;
+		to = (void __user *) to + IDA_BLOCK_SIZE;
+		count -= IDA_BLOCK_SIZE;
+	}
+	return copy_to_user(to, ib->data[i], count);
+}
+
+/*
+ * Copy count bytes from user memory to an idal buffer
+ */
+static inline size_t
+idal_buffer_from_user(struct idal_buffer *ib, const void __user *from, size_t count)
+{
+	size_t left;
+	int i;
+
+	BUG_ON(count > ib->size);
+	for (i = 0; count > IDA_BLOCK_SIZE; i++) {
+		left = copy_from_user(ib->data[i], from, IDA_BLOCK_SIZE);
+		if (left)
+			return left + count - IDA_BLOCK_SIZE;
+		from = (void __user *) from + IDA_BLOCK_SIZE;
+		count -= IDA_BLOCK_SIZE;
+	}
+	return copy_from_user(ib->data[i], from, count);
+}
+
+#endif
diff --git a/arch/s390/include/asm/idle.h b/arch/s390/include/asm/idle.h
new file mode 100644
index 0000000000..09f763b9eb
--- /dev/null
+++ b/arch/s390/include/asm/idle.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  Copyright IBM Corp. 2014
+ *
+ *  Author: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef _S390_IDLE_H
+#define _S390_IDLE_H
+
+#include <linux/types.h>
+#include <linux/device.h>
+
+struct s390_idle_data {
+	unsigned long idle_count;
+	unsigned long idle_time;
+	unsigned long clock_idle_enter;
+	unsigned long timer_idle_enter;
+	unsigned long mt_cycles_enter[8];
+};
+
+extern struct device_attribute dev_attr_idle_count;
+extern struct device_attribute dev_attr_idle_time_us;
+
+void psw_idle(struct s390_idle_data *data, unsigned long psw_mask);
+
+#endif /* _S390_IDLE_H */
diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h
new file mode 100644
index 0000000000..4453ad7c11
--- /dev/null
+++ b/arch/s390/include/asm/io.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ *  Derived from "include/asm-i386/io.h"
+ */
+
+#ifndef _S390_IO_H
+#define _S390_IO_H
+
+#include <linux/kernel.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/pci_io.h>
+
+#define xlate_dev_mem_ptr xlate_dev_mem_ptr
+void *xlate_dev_mem_ptr(phys_addr_t phys);
+#define unxlate_dev_mem_ptr unxlate_dev_mem_ptr
+void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr);
+
+#define IO_SPACE_LIMIT 0
+
+/*
+ * I/O memory mapping functions.
+ */
+#define ioremap_prot ioremap_prot
+#define iounmap iounmap
+
+#define _PAGE_IOREMAP pgprot_val(PAGE_KERNEL)
+
+#define ioremap_wc(addr, size)  \
+	ioremap_prot((addr), (size), pgprot_val(pgprot_writecombine(PAGE_KERNEL)))
+#define ioremap_wt(addr, size)  \
+	ioremap_prot((addr), (size), pgprot_val(pgprot_writethrough(PAGE_KERNEL)))
+
+static inline void __iomem *ioport_map(unsigned long port, unsigned int nr)
+{
+	return NULL;
+}
+
+static inline void ioport_unmap(void __iomem *p)
+{
+}
+
+#ifdef CONFIG_PCI
+
+/*
+ * s390 needs a private implementation of pci_iomap since ioremap with its
+ * offset parameter isn't sufficient. That's because BAR spaces are not
+ * disjunctive on s390 so we need the bar parameter of pci_iomap to find
+ * the corresponding device and create the mapping cookie.
+ */
+#define pci_iomap pci_iomap
+#define pci_iomap_range pci_iomap_range
+#define pci_iounmap pci_iounmap
+#define pci_iomap_wc pci_iomap_wc
+#define pci_iomap_wc_range pci_iomap_wc_range
+
+#define memcpy_fromio(dst, src, count)	zpci_memcpy_fromio(dst, src, count)
+#define memcpy_toio(dst, src, count)	zpci_memcpy_toio(dst, src, count)
+#define memset_io(dst, val, count)	zpci_memset_io(dst, val, count)
+
+#define mmiowb()	zpci_barrier()
+
+#define __raw_readb	zpci_read_u8
+#define __raw_readw	zpci_read_u16
+#define __raw_readl	zpci_read_u32
+#define __raw_readq	zpci_read_u64
+#define __raw_writeb	zpci_write_u8
+#define __raw_writew	zpci_write_u16
+#define __raw_writel	zpci_write_u32
+#define __raw_writeq	zpci_write_u64
+
+#endif /* CONFIG_PCI */
+
+#include <asm-generic/io.h>
+
+#endif
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
new file mode 100644
index 0000000000..b0d0003247
--- /dev/null
+++ b/arch/s390/include/asm/ipl.h
@@ -0,0 +1,170 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * s390 (re)ipl support
+ *
+ * Copyright IBM Corp. 2007
+ */
+
+#ifndef _ASM_S390_IPL_H
+#define _ASM_S390_IPL_H
+
+#include <asm/lowcore.h>
+#include <asm/types.h>
+#include <asm/cio.h>
+#include <asm/setup.h>
+#include <asm/page.h>
+#include <uapi/asm/ipl.h>
+
+struct ipl_parameter_block {
+	struct ipl_pl_hdr hdr;
+	union {
+		struct ipl_pb_hdr pb0_hdr;
+		struct ipl_pb0_common common;
+		struct ipl_pb0_fcp fcp;
+		struct ipl_pb0_ccw ccw;
+		struct ipl_pb0_eckd eckd;
+		struct ipl_pb0_nvme nvme;
+		char raw[PAGE_SIZE - sizeof(struct ipl_pl_hdr)];
+	};
+} __packed __aligned(PAGE_SIZE);
+
+#define NSS_NAME_SIZE 8
+
+#define IPL_BP_FCP_LEN (sizeof(struct ipl_pl_hdr) + \
+			      sizeof(struct ipl_pb0_fcp))
+#define IPL_BP0_FCP_LEN (sizeof(struct ipl_pb0_fcp))
+
+#define IPL_BP_NVME_LEN (sizeof(struct ipl_pl_hdr) + \
+			      sizeof(struct ipl_pb0_nvme))
+#define IPL_BP0_NVME_LEN (sizeof(struct ipl_pb0_nvme))
+
+#define IPL_BP_CCW_LEN (sizeof(struct ipl_pl_hdr) + \
+			      sizeof(struct ipl_pb0_ccw))
+#define IPL_BP0_CCW_LEN (sizeof(struct ipl_pb0_ccw))
+
+#define IPL_BP_ECKD_LEN (sizeof(struct ipl_pl_hdr) + \
+			      sizeof(struct ipl_pb0_eckd))
+#define IPL_BP0_ECKD_LEN (sizeof(struct ipl_pb0_eckd))
+
+#define IPL_MAX_SUPPORTED_VERSION (0)
+
+#define IPL_RB_CERT_UNKNOWN ((unsigned short)-1)
+
+#define DIAG308_VMPARM_SIZE (64)
+#define DIAG308_SCPDATA_OFFSET offsetof(struct ipl_parameter_block, \
+					fcp.scp_data)
+#define DIAG308_SCPDATA_SIZE (PAGE_SIZE - DIAG308_SCPDATA_OFFSET)
+
+struct save_area;
+struct save_area * __init save_area_alloc(bool is_boot_cpu);
+struct save_area * __init save_area_boot_cpu(void);
+void __init save_area_add_regs(struct save_area *, void *regs);
+void __init save_area_add_vxrs(struct save_area *, __vector128 *vxrs);
+
+extern void s390_reset_system(void);
+extern size_t ipl_block_get_ascii_vmparm(char *dest, size_t size,
+					 const struct ipl_parameter_block *ipb);
+
+enum ipl_type {
+	IPL_TYPE_UNKNOWN	= 1,
+	IPL_TYPE_CCW		= 2,
+	IPL_TYPE_FCP		= 4,
+	IPL_TYPE_FCP_DUMP	= 8,
+	IPL_TYPE_NSS		= 16,
+	IPL_TYPE_NVME		= 32,
+	IPL_TYPE_NVME_DUMP	= 64,
+	IPL_TYPE_ECKD		= 128,
+	IPL_TYPE_ECKD_DUMP	= 256,
+};
+
+struct ipl_info
+{
+	enum ipl_type type;
+	union {
+		struct {
+			struct ccw_dev_id dev_id;
+		} ccw;
+		struct {
+			struct ccw_dev_id dev_id;
+		} eckd;
+		struct {
+			struct ccw_dev_id dev_id;
+			u64 wwpn;
+			u64 lun;
+		} fcp;
+		struct {
+			u32 fid;
+			u32 nsid;
+		} nvme;
+		struct {
+			char name[NSS_NAME_SIZE + 1];
+		} nss;
+	} data;
+};
+
+extern struct ipl_info ipl_info;
+extern void setup_ipl(void);
+extern void set_os_info_reipl_block(void);
+
+static inline bool is_ipl_type_dump(void)
+{
+	return (ipl_info.type == IPL_TYPE_FCP_DUMP) ||
+		(ipl_info.type == IPL_TYPE_ECKD_DUMP) ||
+		(ipl_info.type == IPL_TYPE_NVME_DUMP);
+}
+
+struct ipl_report {
+	struct ipl_parameter_block *ipib;
+	struct list_head components;
+	struct list_head certificates;
+	size_t size;
+};
+
+struct ipl_report_component {
+	struct list_head list;
+	struct ipl_rb_component_entry entry;
+};
+
+struct ipl_report_certificate {
+	struct list_head list;
+	struct ipl_rb_certificate_entry entry;
+	void *key;
+};
+
+struct kexec_buf;
+struct ipl_report *ipl_report_init(struct ipl_parameter_block *ipib);
+void *ipl_report_finish(struct ipl_report *report);
+int ipl_report_free(struct ipl_report *report);
+int ipl_report_add_component(struct ipl_report *report, struct kexec_buf *kbuf,
+			     unsigned char flags, unsigned short cert);
+int ipl_report_add_certificate(struct ipl_report *report, void *key,
+			       unsigned long addr, unsigned long len);
+
+/*
+ * DIAG 308 support
+ */
+enum diag308_subcode  {
+	DIAG308_CLEAR_RESET = 0,
+	DIAG308_LOAD_NORMAL_RESET = 1,
+	DIAG308_REL_HSA = 2,
+	DIAG308_LOAD_CLEAR = 3,
+	DIAG308_LOAD_NORMAL_DUMP = 4,
+	DIAG308_SET = 5,
+	DIAG308_STORE = 6,
+	DIAG308_LOAD_NORMAL = 7,
+};
+
+enum diag308_subcode_flags {
+	DIAG308_FLAG_EI = 1UL << 16,
+};
+
+enum diag308_rc {
+	DIAG308_RC_OK		= 0x0001,
+	DIAG308_RC_NOCONFIG	= 0x0102,
+};
+
+extern int diag308(unsigned long subcode, void *addr);
+extern void store_status(void (*fn)(void *), void *data);
+extern void lgr_info_log(void);
+
+#endif /* _ASM_S390_IPL_H */
diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h
new file mode 100644
index 0000000000..89902f7547
--- /dev/null
+++ b/arch/s390/include/asm/irq.h
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_IRQ_H
+#define _ASM_IRQ_H
+
+#define EXT_INTERRUPT	0
+#define IO_INTERRUPT	1
+#define THIN_INTERRUPT	2
+
+#define NR_IRQS_BASE	3
+
+#define NR_IRQS	NR_IRQS_BASE
+#define NR_IRQS_LEGACY NR_IRQS_BASE
+
+/* External interruption codes */
+#define EXT_IRQ_INTERRUPT_KEY	0x0040
+#define EXT_IRQ_CLK_COMP	0x1004
+#define EXT_IRQ_CPU_TIMER	0x1005
+#define EXT_IRQ_WARNING_TRACK	0x1007
+#define EXT_IRQ_MALFUNC_ALERT	0x1200
+#define EXT_IRQ_EMERGENCY_SIG	0x1201
+#define EXT_IRQ_EXTERNAL_CALL	0x1202
+#define EXT_IRQ_TIMING_ALERT	0x1406
+#define EXT_IRQ_MEASURE_ALERT	0x1407
+#define EXT_IRQ_SERVICE_SIG	0x2401
+#define EXT_IRQ_CP_SERVICE	0x2603
+#define EXT_IRQ_IUCV		0x4000
+
+#ifndef __ASSEMBLY__
+
+#include <linux/hardirq.h>
+#include <linux/percpu.h>
+#include <linux/cache.h>
+#include <linux/types.h>
+
+enum interruption_class {
+	IRQEXT_CLK,
+	IRQEXT_EXC,
+	IRQEXT_EMS,
+	IRQEXT_TMR,
+	IRQEXT_TLA,
+	IRQEXT_PFL,
+	IRQEXT_DSD,
+	IRQEXT_VRT,
+	IRQEXT_SCP,
+	IRQEXT_IUC,
+	IRQEXT_CMS,
+	IRQEXT_CMC,
+	IRQEXT_FTP,
+	IRQIO_CIO,
+	IRQIO_DAS,
+	IRQIO_C15,
+	IRQIO_C70,
+	IRQIO_TAP,
+	IRQIO_VMR,
+	IRQIO_LCS,
+	IRQIO_CTC,
+	IRQIO_ADM,
+	IRQIO_CSC,
+	IRQIO_VIR,
+	IRQIO_QAI,
+	IRQIO_APB,
+	IRQIO_PCF,
+	IRQIO_PCD,
+	IRQIO_MSI,
+	IRQIO_VAI,
+	IRQIO_GAL,
+	NMI_NMI,
+	CPU_RST,
+	NR_ARCH_IRQS
+};
+
+struct irq_stat {
+	unsigned int irqs[NR_ARCH_IRQS];
+};
+
+DECLARE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat);
+
+static __always_inline void inc_irq_stat(enum interruption_class irq)
+{
+	__this_cpu_inc(irq_stat.irqs[irq]);
+}
+
+struct ext_code {
+	union {
+		struct {
+			unsigned short subcode;
+			unsigned short code;
+		};
+		unsigned int int_code;
+	};
+};
+
+typedef void (*ext_int_handler_t)(struct ext_code, unsigned int, unsigned long);
+
+int register_external_irq(u16 code, ext_int_handler_t handler);
+int unregister_external_irq(u16 code, ext_int_handler_t handler);
+
+enum irq_subclass {
+	IRQ_SUBCLASS_MEASUREMENT_ALERT = 5,
+	IRQ_SUBCLASS_SERVICE_SIGNAL = 9,
+};
+
+#define CR0_IRQ_SUBCLASS_MASK					  \
+	((1UL << (63 - 30))  /* Warning Track */		| \
+	 (1UL << (63 - 48))  /* Malfunction Alert */		| \
+	 (1UL << (63 - 49))  /* Emergency Signal */		| \
+	 (1UL << (63 - 50))  /* External Call */		| \
+	 (1UL << (63 - 52))  /* Clock Comparator */		| \
+	 (1UL << (63 - 53))  /* CPU Timer */			| \
+	 (1UL << (63 - 54))  /* Service Signal */		| \
+	 (1UL << (63 - 57))  /* Interrupt Key */		| \
+	 (1UL << (63 - 58))  /* Measurement Alert */		| \
+	 (1UL << (63 - 59))  /* Timing Alert */			| \
+	 (1UL << (63 - 62))) /* IUCV */
+
+void irq_subclass_register(enum irq_subclass subclass);
+void irq_subclass_unregister(enum irq_subclass subclass);
+
+#define irq_canonicalize(irq)  (irq)
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_IRQ_H */
diff --git a/arch/s390/include/asm/irq_work.h b/arch/s390/include/asm/irq_work.h
new file mode 100644
index 0000000000..603783766d
--- /dev/null
+++ b/arch/s390/include/asm/irq_work.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_IRQ_WORK_H
+#define _ASM_S390_IRQ_WORK_H
+
+static inline bool arch_irq_work_has_interrupt(void)
+{
+	return true;
+}
+
+void arch_irq_work_raise(void);
+
+#endif /* _ASM_S390_IRQ_WORK_H */
diff --git a/arch/s390/include/asm/irqflags.h b/arch/s390/include/asm/irqflags.h
new file mode 100644
index 0000000000..02427b205c
--- /dev/null
+++ b/arch/s390/include/asm/irqflags.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *    Copyright IBM Corp. 2006, 2010
+ *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef __ASM_IRQFLAGS_H
+#define __ASM_IRQFLAGS_H
+
+#include <linux/types.h>
+
+#define ARCH_IRQ_ENABLED	(3UL << (BITS_PER_LONG - 8))
+
+/* store then OR system mask. */
+#define __arch_local_irq_stosm(__or)					\
+({									\
+	unsigned long __mask;						\
+	asm volatile(							\
+		"	stosm	%0,%1"					\
+		: "=Q" (__mask) : "i" (__or) : "memory");		\
+	__mask;								\
+})
+
+/* store then AND system mask. */
+#define __arch_local_irq_stnsm(__and)					\
+({									\
+	unsigned long __mask;						\
+	asm volatile(							\
+		"	stnsm	%0,%1"					\
+		: "=Q" (__mask) : "i" (__and) : "memory");		\
+	__mask;								\
+})
+
+/* set system mask. */
+static __always_inline void __arch_local_irq_ssm(unsigned long flags)
+{
+	asm volatile("ssm   %0" : : "Q" (flags) : "memory");
+}
+
+static __always_inline unsigned long arch_local_save_flags(void)
+{
+	return __arch_local_irq_stnsm(0xff);
+}
+
+static __always_inline unsigned long arch_local_irq_save(void)
+{
+	return __arch_local_irq_stnsm(0xfc);
+}
+
+static __always_inline void arch_local_irq_disable(void)
+{
+	arch_local_irq_save();
+}
+
+static __always_inline void arch_local_irq_enable(void)
+{
+	__arch_local_irq_stosm(0x03);
+}
+
+/* This only restores external and I/O interrupt state */
+static __always_inline void arch_local_irq_restore(unsigned long flags)
+{
+	/* only disabled->disabled and disabled->enabled is valid */
+	if (flags & ARCH_IRQ_ENABLED)
+		arch_local_irq_enable();
+}
+
+static __always_inline bool arch_irqs_disabled_flags(unsigned long flags)
+{
+	return !(flags & ARCH_IRQ_ENABLED);
+}
+
+static __always_inline bool arch_irqs_disabled(void)
+{
+	return arch_irqs_disabled_flags(arch_local_save_flags());
+}
+
+#endif /* __ASM_IRQFLAGS_H */
diff --git a/arch/s390/include/asm/isc.h b/arch/s390/include/asm/isc.h
new file mode 100644
index 0000000000..b2cc1ec78d
--- /dev/null
+++ b/arch/s390/include/asm/isc.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_ISC_H
+#define _ASM_S390_ISC_H
+
+#include <linux/types.h>
+
+/*
+ * I/O interruption subclasses used by drivers.
+ * Please add all used iscs here so that it is possible to distribute
+ * isc usage between drivers.
+ * Reminder: 0 is highest priority, 7 lowest.
+ */
+#define MAX_ISC 7
+
+/* Regular I/O interrupts. */
+#define IO_SCH_ISC 3			/* regular I/O subchannels */
+#define CONSOLE_ISC 1			/* console I/O subchannel */
+#define EADM_SCH_ISC 4			/* EADM subchannels */
+#define CHSC_SCH_ISC 7			/* CHSC subchannels */
+#define VFIO_CCW_ISC IO_SCH_ISC		/* VFIO-CCW I/O subchannels */
+/* Adapter interrupts. */
+#define QDIO_AIRQ_ISC IO_SCH_ISC	/* I/O subchannel in qdio mode */
+#define PCI_ISC 2			/* PCI I/O subchannels */
+#define GAL_ISC 5			/* GIB alert */
+#define AP_ISC 6			/* adjunct processor (crypto) devices */
+
+/* Functions for registration of I/O interruption subclasses */
+void isc_register(unsigned int isc);
+void isc_unregister(unsigned int isc);
+
+#endif /* _ASM_S390_ISC_H */
diff --git a/arch/s390/include/asm/itcw.h b/arch/s390/include/asm/itcw.h
new file mode 100644
index 0000000000..59b7396132
--- /dev/null
+++ b/arch/s390/include/asm/itcw.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  Functions for incremental construction of fcx enabled I/O control blocks.
+ *
+ *    Copyright IBM Corp. 2008
+ *    Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_ITCW_H
+#define _ASM_S390_ITCW_H
+
+#include <linux/types.h>
+#include <asm/fcx.h>
+
+#define ITCW_OP_READ	0
+#define ITCW_OP_WRITE	1
+
+struct itcw;
+
+struct tcw *itcw_get_tcw(struct itcw *itcw);
+size_t itcw_calc_size(int intrg, int max_tidaws, int intrg_max_tidaws);
+struct itcw *itcw_init(void *buffer, size_t size, int op, int intrg,
+		       int max_tidaws, int intrg_max_tidaws);
+struct dcw *itcw_add_dcw(struct itcw *itcw, u8 cmd, u8 flags, void *cd,
+			 u8 cd_count, u32 count);
+struct tidaw *itcw_add_tidaw(struct itcw *itcw, u8 flags, void *addr,
+			     u32 count);
+void itcw_set_data(struct itcw *itcw, void *addr, int use_tidal);
+void itcw_finalize(struct itcw *itcw);
+
+#endif /* _ASM_S390_ITCW_H */
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h
new file mode 100644
index 0000000000..895f774bbc
--- /dev/null
+++ b/arch/s390/include/asm/jump_label.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_JUMP_LABEL_H
+#define _ASM_S390_JUMP_LABEL_H
+
+#define HAVE_JUMP_LABEL_BATCH
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+#include <linux/stringify.h>
+
+#define JUMP_LABEL_NOP_SIZE 6
+
+#ifdef CONFIG_CC_IS_CLANG
+#define JUMP_LABEL_STATIC_KEY_CONSTRAINT "i"
+#elif __GNUC__ < 9
+#define JUMP_LABEL_STATIC_KEY_CONSTRAINT "X"
+#else
+#define JUMP_LABEL_STATIC_KEY_CONSTRAINT "jdd"
+#endif
+
+/*
+ * We use a brcl 0,<offset> instruction for jump labels so it
+ * can be easily distinguished from a hotpatch generated instruction.
+ */
+static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
+{
+	asm_volatile_goto("0:	brcl 0,%l[label]\n"
+			  ".pushsection __jump_table,\"aw\"\n"
+			  ".balign	8\n"
+			  ".long	0b-.,%l[label]-.\n"
+			  ".quad	%0+%1-.\n"
+			  ".popsection\n"
+			  : : JUMP_LABEL_STATIC_KEY_CONSTRAINT (key), "i" (branch) : : label);
+	return false;
+label:
+	return true;
+}
+
+static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
+{
+	asm_volatile_goto("0:	brcl 15,%l[label]\n"
+			  ".pushsection __jump_table,\"aw\"\n"
+			  ".balign	8\n"
+			  ".long	0b-.,%l[label]-.\n"
+			  ".quad	%0+%1-.\n"
+			  ".popsection\n"
+			  : : JUMP_LABEL_STATIC_KEY_CONSTRAINT (key), "i" (branch) : : label);
+	return false;
+label:
+	return true;
+}
+
+#endif  /* __ASSEMBLY__ */
+#endif
diff --git a/arch/s390/include/asm/kasan.h b/arch/s390/include/asm/kasan.h
new file mode 100644
index 0000000000..0cffead0f2
--- /dev/null
+++ b/arch/s390/include/asm/kasan.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_KASAN_H
+#define __ASM_KASAN_H
+
+#include <linux/const.h>
+
+#ifdef CONFIG_KASAN
+
+#define KASAN_SHADOW_SCALE_SHIFT 3
+#define KASAN_SHADOW_SIZE						       \
+	(_AC(1, UL) << (_REGION1_SHIFT - KASAN_SHADOW_SCALE_SHIFT))
+#define KASAN_SHADOW_OFFSET	_AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
+#define KASAN_SHADOW_START	KASAN_SHADOW_OFFSET
+#define KASAN_SHADOW_END	(KASAN_SHADOW_START + KASAN_SHADOW_SIZE)
+
+#endif
+
+#endif
diff --git a/arch/s390/include/asm/kdebug.h b/arch/s390/include/asm/kdebug.h
new file mode 100644
index 0000000000..4377238e47
--- /dev/null
+++ b/arch/s390/include/asm/kdebug.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _S390_KDEBUG_H
+#define _S390_KDEBUG_H
+
+/*
+ * Feb 2006 Ported to s390 <grundym@us.ibm.com>
+ */
+
+struct pt_regs;
+
+enum die_val {
+	DIE_OOPS = 1,
+	DIE_BPT,
+	DIE_SSTEP,
+	DIE_PANIC,
+	DIE_NMI,
+	DIE_DIE,
+	DIE_NMIWATCHDOG,
+	DIE_KERNELDEBUG,
+	DIE_TRAP,
+	DIE_GPF,
+	DIE_CALL,
+	DIE_NMI_IPI,
+};
+
+extern void __noreturn die(struct pt_regs *, const char *);
+
+#endif
diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h
new file mode 100644
index 0000000000..1bd08eb56d
--- /dev/null
+++ b/arch/s390/include/asm/kexec.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2005
+ *
+ * Author(s): Rolf Adelsberger <adelsberger@de.ibm.com>
+ *
+ */
+
+#ifndef _S390_KEXEC_H
+#define _S390_KEXEC_H
+
+#include <linux/module.h>
+
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/setup.h>
+/*
+ * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return.
+ * I.e. Maximum page that is mapped directly into kernel memory,
+ * and kmap is not required.
+ */
+
+/* Maximum physical address we can use pages from */
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can reach in physical address mode */
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can use for the control pages */
+/* Not more than 2GB */
+#define KEXEC_CONTROL_MEMORY_LIMIT (1UL<<31)
+
+/* Allocate control page with GFP_DMA */
+#define KEXEC_CONTROL_MEMORY_GFP (GFP_DMA | __GFP_NORETRY)
+
+/* Maximum address we can use for the crash control pages */
+#define KEXEC_CRASH_CONTROL_MEMORY_LIMIT (-1UL)
+
+/* Allocate one page for the pdp and the second for the code */
+#define KEXEC_CONTROL_PAGE_SIZE 4096
+
+/* Alignment of crashkernel memory */
+#define KEXEC_CRASH_MEM_ALIGN HPAGE_SIZE
+
+/* The native architecture */
+#define KEXEC_ARCH KEXEC_ARCH_S390
+
+/* Allow kexec_file to load a segment to 0 */
+#define KEXEC_BUF_MEM_UNKNOWN -1
+
+/* Provide a dummy definition to avoid build failures. */
+static inline void crash_setup_regs(struct pt_regs *newregs,
+					struct pt_regs *oldregs) { }
+
+struct kimage;
+struct s390_load_data {
+	/* Pointer to the kernel buffer. Used to register cmdline etc.. */
+	void *kernel_buf;
+
+	/* Load address of the kernel_buf. */
+	unsigned long kernel_mem;
+
+	/* Parmarea in the kernel buffer. */
+	struct parmarea *parm;
+
+	/* Total size of loaded segments in memory. Used as an offset. */
+	size_t memsz;
+
+	struct ipl_report *report;
+};
+
+int s390_verify_sig(const char *kernel, unsigned long kernel_len);
+void *kexec_file_add_components(struct kimage *image,
+				int (*add_kernel)(struct kimage *image,
+						  struct s390_load_data *data));
+int arch_kexec_do_relocs(int r_type, void *loc, unsigned long val,
+			 unsigned long addr);
+
+#define ARCH_HAS_KIMAGE_ARCH
+
+struct kimage_arch {
+	void *ipl_buf;
+};
+
+extern const struct kexec_file_ops s390_kexec_image_ops;
+extern const struct kexec_file_ops s390_kexec_elf_ops;
+
+#ifdef CONFIG_CRASH_DUMP
+void crash_free_reserved_phys_range(unsigned long begin, unsigned long end);
+#define crash_free_reserved_phys_range crash_free_reserved_phys_range
+
+void arch_kexec_protect_crashkres(void);
+#define arch_kexec_protect_crashkres arch_kexec_protect_crashkres
+
+void arch_kexec_unprotect_crashkres(void);
+#define arch_kexec_unprotect_crashkres arch_kexec_unprotect_crashkres
+#endif
+
+#ifdef CONFIG_KEXEC_FILE
+struct purgatory_info;
+int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
+				     Elf_Shdr *section,
+				     const Elf_Shdr *relsec,
+				     const Elf_Shdr *symtab);
+#define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add
+
+int arch_kimage_file_post_load_cleanup(struct kimage *image);
+#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup
+#endif
+#endif /*_S390_KEXEC_H */
diff --git a/arch/s390/include/asm/kfence.h b/arch/s390/include/asm/kfence.h
new file mode 100644
index 0000000000..e47fd8cbe7
--- /dev/null
+++ b/arch/s390/include/asm/kfence.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_S390_KFENCE_H
+#define _ASM_S390_KFENCE_H
+
+#include <linux/mm.h>
+#include <linux/kfence.h>
+#include <asm/set_memory.h>
+#include <asm/page.h>
+
+void __kernel_map_pages(struct page *page, int numpages, int enable);
+
+static __always_inline bool arch_kfence_init_pool(void)
+{
+	return true;
+}
+
+#define arch_kfence_test_address(addr) ((addr) & PAGE_MASK)
+
+/*
+ * Do not split kfence pool to 4k mapping with arch_kfence_init_pool(),
+ * but earlier where page table allocations still happen with memblock.
+ * Reason is that arch_kfence_init_pool() gets called when the system
+ * is still in a limbo state - disabling and enabling bottom halves is
+ * not yet allowed, but that is what our page_table_alloc() would do.
+ */
+static __always_inline void kfence_split_mapping(void)
+{
+#ifdef CONFIG_KFENCE
+	unsigned long pool_pages = KFENCE_POOL_SIZE >> PAGE_SHIFT;
+
+	set_memory_4k((unsigned long)__kfence_pool, pool_pages);
+#endif
+}
+
+static inline bool kfence_protect_page(unsigned long addr, bool protect)
+{
+	__kernel_map_pages(virt_to_page((void *)addr), 1, !protect);
+	return true;
+}
+
+#endif /* _ASM_S390_KFENCE_H */
diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h
new file mode 100644
index 0000000000..83f732ca3a
--- /dev/null
+++ b/arch/s390/include/asm/kprobes.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#ifndef _ASM_S390_KPROBES_H
+#define _ASM_S390_KPROBES_H
+/*
+ *  Kernel Probes (KProbes)
+ *
+ * Copyright IBM Corp. 2002, 2006
+ *
+ * 2002-Oct	Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
+ *		Probes initial implementation ( includes suggestions from
+ *		Rusty Russell).
+ * 2004-Nov	Modified for PPC64 by Ananth N Mavinakayanahalli
+ *		<ananth@in.ibm.com>
+ * 2005-Dec	Used as a template for s390 by Mike Grundy
+ *		<grundym@us.ibm.com>
+ */
+#include <linux/types.h>
+#include <asm-generic/kprobes.h>
+
+#define BREAKPOINT_INSTRUCTION	0x0002
+
+#define FIXUP_PSW_NORMAL	0x08
+#define FIXUP_BRANCH_NOT_TAKEN	0x04
+#define FIXUP_RETURN_REGISTER	0x02
+#define FIXUP_NOT_REQUIRED	0x01
+
+int probe_is_prohibited_opcode(u16 *insn);
+int probe_get_fixup_type(u16 *insn);
+int probe_is_insn_relative_long(u16 *insn);
+
+#ifdef CONFIG_KPROBES
+#include <linux/ptrace.h>
+#include <linux/percpu.h>
+#include <linux/sched/task_stack.h>
+
+#define __ARCH_WANT_KPROBES_INSN_SLOT
+
+struct pt_regs;
+struct kprobe;
+
+typedef u16 kprobe_opcode_t;
+
+/* Maximum instruction size is 3 (16bit) halfwords: */
+#define MAX_INSN_SIZE		0x0003
+#define MAX_STACK_SIZE		64
+#define MIN_STACK_SIZE(ADDR) (((MAX_STACK_SIZE) < \
+	(((unsigned long)task_stack_page(current)) + THREAD_SIZE - (ADDR))) \
+	? (MAX_STACK_SIZE) \
+	: (((unsigned long)task_stack_page(current)) + THREAD_SIZE - (ADDR)))
+
+#define kretprobe_blacklist_size 0
+
+/* Architecture specific copy of original instruction */
+struct arch_specific_insn {
+	/* copy of original instruction */
+	kprobe_opcode_t *insn;
+};
+
+struct prev_kprobe {
+	struct kprobe *kp;
+	unsigned long status;
+};
+
+/* per-cpu kprobe control block */
+struct kprobe_ctlblk {
+	unsigned long kprobe_status;
+	unsigned long kprobe_saved_imask;
+	unsigned long kprobe_saved_ctl[3];
+	struct prev_kprobe prev_kprobe;
+};
+
+void arch_remove_kprobe(struct kprobe *p);
+
+int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
+int kprobe_exceptions_notify(struct notifier_block *self,
+	unsigned long val, void *data);
+
+#define flush_insn_slot(p)	do { } while (0)
+
+#endif /* CONFIG_KPROBES */
+#endif	/* _ASM_S390_KPROBES_H */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
new file mode 100644
index 0000000000..427f9528a7
--- /dev/null
+++ b/arch/s390/include/asm/kvm_host.h
@@ -0,0 +1,1060 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * definition for kernel virtual machines on s390
+ *
+ * Copyright IBM Corp. 2008, 2018
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ */
+
+
+#ifndef ASM_KVM_HOST_H
+#define ASM_KVM_HOST_H
+
+#include <linux/types.h>
+#include <linux/hrtimer.h>
+#include <linux/interrupt.h>
+#include <linux/kvm_types.h>
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+#include <linux/seqlock.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/mmu_notifier.h>
+#include <asm/debug.h>
+#include <asm/cpu.h>
+#include <asm/fpu/api.h>
+#include <asm/isc.h>
+#include <asm/guarded_storage.h>
+
+#define KVM_S390_BSCA_CPU_SLOTS 64
+#define KVM_S390_ESCA_CPU_SLOTS 248
+#define KVM_MAX_VCPUS 255
+
+/*
+ * These seem to be used for allocating ->chip in the routing table, which we
+ * don't use. 1 is as small as we can get to reduce the needed memory. If we
+ * need to look at ->chip later on, we'll need to revisit this.
+ */
+#define KVM_NR_IRQCHIPS 1
+#define KVM_IRQCHIP_NUM_PINS 1
+#define KVM_HALT_POLL_NS_DEFAULT 50000
+
+/* s390-specific vcpu->requests bit members */
+#define KVM_REQ_ENABLE_IBS	KVM_ARCH_REQ(0)
+#define KVM_REQ_DISABLE_IBS	KVM_ARCH_REQ(1)
+#define KVM_REQ_ICPT_OPEREXC	KVM_ARCH_REQ(2)
+#define KVM_REQ_START_MIGRATION KVM_ARCH_REQ(3)
+#define KVM_REQ_STOP_MIGRATION  KVM_ARCH_REQ(4)
+#define KVM_REQ_VSIE_RESTART	KVM_ARCH_REQ(5)
+#define KVM_REQ_REFRESH_GUEST_PREFIX	\
+	KVM_ARCH_REQ_FLAGS(6, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+
+#define SIGP_CTRL_C		0x80
+#define SIGP_CTRL_SCN_MASK	0x3f
+
+union bsca_sigp_ctrl {
+	__u8 value;
+	struct {
+		__u8 c : 1;
+		__u8 r : 1;
+		__u8 scn : 6;
+	};
+};
+
+union esca_sigp_ctrl {
+	__u16 value;
+	struct {
+		__u8 c : 1;
+		__u8 reserved: 7;
+		__u8 scn;
+	};
+};
+
+struct esca_entry {
+	union esca_sigp_ctrl sigp_ctrl;
+	__u16   reserved1[3];
+	__u64   sda;
+	__u64   reserved2[6];
+};
+
+struct bsca_entry {
+	__u8	reserved0;
+	union bsca_sigp_ctrl	sigp_ctrl;
+	__u16	reserved[3];
+	__u64	sda;
+	__u64	reserved2[2];
+};
+
+union ipte_control {
+	unsigned long val;
+	struct {
+		unsigned long k  : 1;
+		unsigned long kh : 31;
+		unsigned long kg : 32;
+	};
+};
+
+union sca_utility {
+	__u16 val;
+	struct {
+		__u16 mtcr : 1;
+		__u16 reserved : 15;
+	};
+};
+
+struct bsca_block {
+	union ipte_control ipte_control;
+	__u64	reserved[5];
+	__u64	mcn;
+	union sca_utility utility;
+	__u8	reserved2[6];
+	struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS];
+};
+
+struct esca_block {
+	union ipte_control ipte_control;
+	__u64   reserved1[6];
+	union sca_utility utility;
+	__u8	reserved2[6];
+	__u64   mcn[4];
+	__u64   reserved3[20];
+	struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS];
+};
+
+/*
+ * This struct is used to store some machine check info from lowcore
+ * for machine checks that happen while the guest is running.
+ * This info in host's lowcore might be overwritten by a second machine
+ * check from host when host is in the machine check's high-level handling.
+ * The size is 24 bytes.
+ */
+struct mcck_volatile_info {
+	__u64 mcic;
+	__u64 failing_storage_address;
+	__u32 ext_damage_code;
+	__u32 reserved;
+};
+
+#define CR0_INITIAL_MASK (CR0_UNUSED_56 | CR0_INTERRUPT_KEY_SUBMASK | \
+			  CR0_MEASUREMENT_ALERT_SUBMASK)
+#define CR14_INITIAL_MASK (CR14_UNUSED_32 | CR14_UNUSED_33 | \
+			   CR14_EXTERNAL_DAMAGE_SUBMASK)
+
+#define SIDAD_SIZE_MASK		0xff
+#define sida_addr(sie_block) phys_to_virt((sie_block)->sidad & PAGE_MASK)
+#define sida_size(sie_block) \
+	((((sie_block)->sidad & SIDAD_SIZE_MASK) + 1) * PAGE_SIZE)
+
+#define CPUSTAT_STOPPED    0x80000000
+#define CPUSTAT_WAIT       0x10000000
+#define CPUSTAT_ECALL_PEND 0x08000000
+#define CPUSTAT_STOP_INT   0x04000000
+#define CPUSTAT_IO_INT     0x02000000
+#define CPUSTAT_EXT_INT    0x01000000
+#define CPUSTAT_RUNNING    0x00800000
+#define CPUSTAT_RETAINED   0x00400000
+#define CPUSTAT_TIMING_SUB 0x00020000
+#define CPUSTAT_SIE_SUB    0x00010000
+#define CPUSTAT_RRF        0x00008000
+#define CPUSTAT_SLSV       0x00004000
+#define CPUSTAT_SLSR       0x00002000
+#define CPUSTAT_ZARCH      0x00000800
+#define CPUSTAT_MCDS       0x00000100
+#define CPUSTAT_KSS        0x00000200
+#define CPUSTAT_SM         0x00000080
+#define CPUSTAT_IBS        0x00000040
+#define CPUSTAT_GED2       0x00000010
+#define CPUSTAT_G          0x00000008
+#define CPUSTAT_GED        0x00000004
+#define CPUSTAT_J          0x00000002
+#define CPUSTAT_P          0x00000001
+
+struct kvm_s390_sie_block {
+	atomic_t cpuflags;		/* 0x0000 */
+	__u32 : 1;			/* 0x0004 */
+	__u32 prefix : 18;
+	__u32 : 1;
+	__u32 ibc : 12;
+	__u8	reserved08[4];		/* 0x0008 */
+#define PROG_IN_SIE (1<<0)
+	__u32	prog0c;			/* 0x000c */
+	union {
+		__u8	reserved10[16];		/* 0x0010 */
+		struct {
+			__u64	pv_handle_cpu;
+			__u64	pv_handle_config;
+		};
+	};
+#define PROG_BLOCK_SIE	(1<<0)
+#define PROG_REQUEST	(1<<1)
+	atomic_t prog20;		/* 0x0020 */
+	__u8	reserved24[4];		/* 0x0024 */
+	__u64	cputm;			/* 0x0028 */
+	__u64	ckc;			/* 0x0030 */
+	__u64	epoch;			/* 0x0038 */
+	__u32	svcc;			/* 0x0040 */
+#define LCTL_CR0	0x8000
+#define LCTL_CR6	0x0200
+#define LCTL_CR9	0x0040
+#define LCTL_CR10	0x0020
+#define LCTL_CR11	0x0010
+#define LCTL_CR14	0x0002
+	__u16   lctl;			/* 0x0044 */
+	__s16	icpua;			/* 0x0046 */
+#define ICTL_OPEREXC	0x80000000
+#define ICTL_PINT	0x20000000
+#define ICTL_LPSW	0x00400000
+#define ICTL_STCTL	0x00040000
+#define ICTL_ISKE	0x00004000
+#define ICTL_SSKE	0x00002000
+#define ICTL_RRBE	0x00001000
+#define ICTL_TPROT	0x00000200
+	__u32	ictl;			/* 0x0048 */
+#define ECA_CEI		0x80000000
+#define ECA_IB		0x40000000
+#define ECA_SIGPI	0x10000000
+#define ECA_MVPGI	0x01000000
+#define ECA_AIV		0x00200000
+#define ECA_VX		0x00020000
+#define ECA_PROTEXCI	0x00002000
+#define ECA_APIE	0x00000008
+#define ECA_SII		0x00000001
+	__u32	eca;			/* 0x004c */
+#define ICPT_INST	0x04
+#define ICPT_PROGI	0x08
+#define ICPT_INSTPROGI	0x0C
+#define ICPT_EXTREQ	0x10
+#define ICPT_EXTINT	0x14
+#define ICPT_IOREQ	0x18
+#define ICPT_WAIT	0x1c
+#define ICPT_VALIDITY	0x20
+#define ICPT_STOP	0x28
+#define ICPT_OPEREXC	0x2C
+#define ICPT_PARTEXEC	0x38
+#define ICPT_IOINST	0x40
+#define ICPT_KSS	0x5c
+#define ICPT_MCHKREQ	0x60
+#define ICPT_INT_ENABLE	0x64
+#define ICPT_PV_INSTR	0x68
+#define ICPT_PV_NOTIFY	0x6c
+#define ICPT_PV_PREF	0x70
+	__u8	icptcode;		/* 0x0050 */
+	__u8	icptstatus;		/* 0x0051 */
+	__u16	ihcpu;			/* 0x0052 */
+	__u8	reserved54;		/* 0x0054 */
+#define IICTL_CODE_NONE		 0x00
+#define IICTL_CODE_MCHK		 0x01
+#define IICTL_CODE_EXT		 0x02
+#define IICTL_CODE_IO		 0x03
+#define IICTL_CODE_RESTART	 0x04
+#define IICTL_CODE_SPECIFICATION 0x10
+#define IICTL_CODE_OPERAND	 0x11
+	__u8	iictl;			/* 0x0055 */
+	__u16	ipa;			/* 0x0056 */
+	__u32	ipb;			/* 0x0058 */
+	__u32	scaoh;			/* 0x005c */
+#define FPF_BPBC 	0x20
+	__u8	fpf;			/* 0x0060 */
+#define ECB_GS		0x40
+#define ECB_TE		0x10
+#define ECB_SPECI	0x08
+#define ECB_SRSI	0x04
+#define ECB_HOSTPROTINT	0x02
+#define ECB_PTF		0x01
+	__u8	ecb;			/* 0x0061 */
+#define ECB2_CMMA	0x80
+#define ECB2_IEP	0x20
+#define ECB2_PFMFI	0x08
+#define ECB2_ESCA	0x04
+#define ECB2_ZPCI_LSI	0x02
+	__u8    ecb2;                   /* 0x0062 */
+#define ECB3_AISI	0x20
+#define ECB3_AISII	0x10
+#define ECB3_DEA 0x08
+#define ECB3_AES 0x04
+#define ECB3_RI  0x01
+	__u8    ecb3;			/* 0x0063 */
+#define ESCA_SCAOL_MASK ~0x3fU
+	__u32	scaol;			/* 0x0064 */
+	__u8	sdf;			/* 0x0068 */
+	__u8    epdx;			/* 0x0069 */
+	__u8	cpnc;			/* 0x006a */
+	__u8	reserved6b;		/* 0x006b */
+	__u32	todpr;			/* 0x006c */
+#define GISA_FORMAT1 0x00000001
+	__u32	gd;			/* 0x0070 */
+	__u8	reserved74[12];		/* 0x0074 */
+	__u64	mso;			/* 0x0080 */
+	__u64	msl;			/* 0x0088 */
+	psw_t	gpsw;			/* 0x0090 */
+	__u64	gg14;			/* 0x00a0 */
+	__u64	gg15;			/* 0x00a8 */
+	__u8	reservedb0[8];		/* 0x00b0 */
+#define HPID_KVM	0x4
+#define HPID_VSIE	0x5
+	__u8	hpid;			/* 0x00b8 */
+	__u8	reservedb9[7];		/* 0x00b9 */
+	union {
+		struct {
+			__u32	eiparams;	/* 0x00c0 */
+			__u16	extcpuaddr;	/* 0x00c4 */
+			__u16	eic;		/* 0x00c6 */
+		};
+		__u64	mcic;			/* 0x00c0 */
+	} __packed;
+	__u32	reservedc8;		/* 0x00c8 */
+	union {
+		struct {
+			__u16	pgmilc;		/* 0x00cc */
+			__u16	iprcc;		/* 0x00ce */
+		};
+		__u32	edc;			/* 0x00cc */
+	} __packed;
+	union {
+		struct {
+			__u32	dxc;		/* 0x00d0 */
+			__u16	mcn;		/* 0x00d4 */
+			__u8	perc;		/* 0x00d6 */
+			__u8	peratmid;	/* 0x00d7 */
+		};
+		__u64	faddr;			/* 0x00d0 */
+	} __packed;
+	__u64	peraddr;		/* 0x00d8 */
+	__u8	eai;			/* 0x00e0 */
+	__u8	peraid;			/* 0x00e1 */
+	__u8	oai;			/* 0x00e2 */
+	__u8	armid;			/* 0x00e3 */
+	__u8	reservede4[4];		/* 0x00e4 */
+	union {
+		__u64	tecmc;		/* 0x00e8 */
+		struct {
+			__u16	subchannel_id;	/* 0x00e8 */
+			__u16	subchannel_nr;	/* 0x00ea */
+			__u32	io_int_parm;	/* 0x00ec */
+			__u32	io_int_word;	/* 0x00f0 */
+		};
+	} __packed;
+	__u8	reservedf4[8];		/* 0x00f4 */
+#define CRYCB_FORMAT_MASK 0x00000003
+#define CRYCB_FORMAT0 0x00000000
+#define CRYCB_FORMAT1 0x00000001
+#define CRYCB_FORMAT2 0x00000003
+	__u32	crycbd;			/* 0x00fc */
+	__u64	gcr[16];		/* 0x0100 */
+	union {
+		__u64	gbea;		/* 0x0180 */
+		__u64	sidad;
+	};
+	__u8    reserved188[8];		/* 0x0188 */
+	__u64   sdnxo;			/* 0x0190 */
+	__u8    reserved198[8];		/* 0x0198 */
+	__u32	fac;			/* 0x01a0 */
+	__u8	reserved1a4[20];	/* 0x01a4 */
+	__u64	cbrlo;			/* 0x01b8 */
+	__u8	reserved1c0[8];		/* 0x01c0 */
+#define ECD_HOSTREGMGMT	0x20000000
+#define ECD_MEF		0x08000000
+#define ECD_ETOKENF	0x02000000
+#define ECD_ECC		0x00200000
+	__u32	ecd;			/* 0x01c8 */
+	__u8	reserved1cc[18];	/* 0x01cc */
+	__u64	pp;			/* 0x01de */
+	__u8	reserved1e6[2];		/* 0x01e6 */
+	__u64	itdba;			/* 0x01e8 */
+	__u64   riccbd;			/* 0x01f0 */
+	__u64	gvrd;			/* 0x01f8 */
+} __packed __aligned(512);
+
+struct kvm_s390_itdb {
+	__u8	data[256];
+};
+
+struct sie_page {
+	struct kvm_s390_sie_block sie_block;
+	struct mcck_volatile_info mcck_info;	/* 0x0200 */
+	__u8 reserved218[360];		/* 0x0218 */
+	__u64 pv_grregs[16];		/* 0x0380 */
+	__u8 reserved400[512];		/* 0x0400 */
+	struct kvm_s390_itdb itdb;	/* 0x0600 */
+	__u8 reserved700[2304];		/* 0x0700 */
+};
+
+struct kvm_vcpu_stat {
+	struct kvm_vcpu_stat_generic generic;
+	u64 exit_userspace;
+	u64 exit_null;
+	u64 exit_external_request;
+	u64 exit_io_request;
+	u64 exit_external_interrupt;
+	u64 exit_stop_request;
+	u64 exit_validity;
+	u64 exit_instruction;
+	u64 exit_pei;
+	u64 halt_no_poll_steal;
+	u64 instruction_lctl;
+	u64 instruction_lctlg;
+	u64 instruction_stctl;
+	u64 instruction_stctg;
+	u64 exit_program_interruption;
+	u64 exit_instr_and_program;
+	u64 exit_operation_exception;
+	u64 deliver_ckc;
+	u64 deliver_cputm;
+	u64 deliver_external_call;
+	u64 deliver_emergency_signal;
+	u64 deliver_service_signal;
+	u64 deliver_virtio;
+	u64 deliver_stop_signal;
+	u64 deliver_prefix_signal;
+	u64 deliver_restart_signal;
+	u64 deliver_program;
+	u64 deliver_io;
+	u64 deliver_machine_check;
+	u64 exit_wait_state;
+	u64 inject_ckc;
+	u64 inject_cputm;
+	u64 inject_external_call;
+	u64 inject_emergency_signal;
+	u64 inject_mchk;
+	u64 inject_pfault_init;
+	u64 inject_program;
+	u64 inject_restart;
+	u64 inject_set_prefix;
+	u64 inject_stop_signal;
+	u64 instruction_epsw;
+	u64 instruction_gs;
+	u64 instruction_io_other;
+	u64 instruction_lpsw;
+	u64 instruction_lpswe;
+	u64 instruction_pfmf;
+	u64 instruction_ptff;
+	u64 instruction_sck;
+	u64 instruction_sckpf;
+	u64 instruction_stidp;
+	u64 instruction_spx;
+	u64 instruction_stpx;
+	u64 instruction_stap;
+	u64 instruction_iske;
+	u64 instruction_ri;
+	u64 instruction_rrbe;
+	u64 instruction_sske;
+	u64 instruction_ipte_interlock;
+	u64 instruction_stsi;
+	u64 instruction_stfl;
+	u64 instruction_tb;
+	u64 instruction_tpi;
+	u64 instruction_tprot;
+	u64 instruction_tsch;
+	u64 instruction_sie;
+	u64 instruction_essa;
+	u64 instruction_sthyi;
+	u64 instruction_sigp_sense;
+	u64 instruction_sigp_sense_running;
+	u64 instruction_sigp_external_call;
+	u64 instruction_sigp_emergency;
+	u64 instruction_sigp_cond_emergency;
+	u64 instruction_sigp_start;
+	u64 instruction_sigp_stop;
+	u64 instruction_sigp_stop_store_status;
+	u64 instruction_sigp_store_status;
+	u64 instruction_sigp_store_adtl_status;
+	u64 instruction_sigp_arch;
+	u64 instruction_sigp_prefix;
+	u64 instruction_sigp_restart;
+	u64 instruction_sigp_init_cpu_reset;
+	u64 instruction_sigp_cpu_reset;
+	u64 instruction_sigp_unknown;
+	u64 instruction_diagnose_10;
+	u64 instruction_diagnose_44;
+	u64 instruction_diagnose_9c;
+	u64 diag_9c_ignored;
+	u64 diag_9c_forward;
+	u64 instruction_diagnose_258;
+	u64 instruction_diagnose_308;
+	u64 instruction_diagnose_500;
+	u64 instruction_diagnose_other;
+	u64 pfault_sync;
+};
+
+#define PGM_OPERATION			0x01
+#define PGM_PRIVILEGED_OP		0x02
+#define PGM_EXECUTE			0x03
+#define PGM_PROTECTION			0x04
+#define PGM_ADDRESSING			0x05
+#define PGM_SPECIFICATION		0x06
+#define PGM_DATA			0x07
+#define PGM_FIXED_POINT_OVERFLOW	0x08
+#define PGM_FIXED_POINT_DIVIDE		0x09
+#define PGM_DECIMAL_OVERFLOW		0x0a
+#define PGM_DECIMAL_DIVIDE		0x0b
+#define PGM_HFP_EXPONENT_OVERFLOW	0x0c
+#define PGM_HFP_EXPONENT_UNDERFLOW	0x0d
+#define PGM_HFP_SIGNIFICANCE		0x0e
+#define PGM_HFP_DIVIDE			0x0f
+#define PGM_SEGMENT_TRANSLATION		0x10
+#define PGM_PAGE_TRANSLATION		0x11
+#define PGM_TRANSLATION_SPEC		0x12
+#define PGM_SPECIAL_OPERATION		0x13
+#define PGM_OPERAND			0x15
+#define PGM_TRACE_TABEL			0x16
+#define PGM_VECTOR_PROCESSING		0x1b
+#define PGM_SPACE_SWITCH		0x1c
+#define PGM_HFP_SQUARE_ROOT		0x1d
+#define PGM_PC_TRANSLATION_SPEC		0x1f
+#define PGM_AFX_TRANSLATION		0x20
+#define PGM_ASX_TRANSLATION		0x21
+#define PGM_LX_TRANSLATION		0x22
+#define PGM_EX_TRANSLATION		0x23
+#define PGM_PRIMARY_AUTHORITY		0x24
+#define PGM_SECONDARY_AUTHORITY		0x25
+#define PGM_LFX_TRANSLATION		0x26
+#define PGM_LSX_TRANSLATION		0x27
+#define PGM_ALET_SPECIFICATION		0x28
+#define PGM_ALEN_TRANSLATION		0x29
+#define PGM_ALE_SEQUENCE		0x2a
+#define PGM_ASTE_VALIDITY		0x2b
+#define PGM_ASTE_SEQUENCE		0x2c
+#define PGM_EXTENDED_AUTHORITY		0x2d
+#define PGM_LSTE_SEQUENCE		0x2e
+#define PGM_ASTE_INSTANCE		0x2f
+#define PGM_STACK_FULL			0x30
+#define PGM_STACK_EMPTY			0x31
+#define PGM_STACK_SPECIFICATION		0x32
+#define PGM_STACK_TYPE			0x33
+#define PGM_STACK_OPERATION		0x34
+#define PGM_ASCE_TYPE			0x38
+#define PGM_REGION_FIRST_TRANS		0x39
+#define PGM_REGION_SECOND_TRANS		0x3a
+#define PGM_REGION_THIRD_TRANS		0x3b
+#define PGM_MONITOR			0x40
+#define PGM_PER				0x80
+#define PGM_CRYPTO_OPERATION		0x119
+
+/* irq types in ascend order of priorities */
+enum irq_types {
+	IRQ_PEND_SET_PREFIX = 0,
+	IRQ_PEND_RESTART,
+	IRQ_PEND_SIGP_STOP,
+	IRQ_PEND_IO_ISC_7,
+	IRQ_PEND_IO_ISC_6,
+	IRQ_PEND_IO_ISC_5,
+	IRQ_PEND_IO_ISC_4,
+	IRQ_PEND_IO_ISC_3,
+	IRQ_PEND_IO_ISC_2,
+	IRQ_PEND_IO_ISC_1,
+	IRQ_PEND_IO_ISC_0,
+	IRQ_PEND_VIRTIO,
+	IRQ_PEND_PFAULT_DONE,
+	IRQ_PEND_PFAULT_INIT,
+	IRQ_PEND_EXT_HOST,
+	IRQ_PEND_EXT_SERVICE,
+	IRQ_PEND_EXT_SERVICE_EV,
+	IRQ_PEND_EXT_TIMING,
+	IRQ_PEND_EXT_CPU_TIMER,
+	IRQ_PEND_EXT_CLOCK_COMP,
+	IRQ_PEND_EXT_EXTERNAL,
+	IRQ_PEND_EXT_EMERGENCY,
+	IRQ_PEND_EXT_MALFUNC,
+	IRQ_PEND_EXT_IRQ_KEY,
+	IRQ_PEND_MCHK_REP,
+	IRQ_PEND_PROG,
+	IRQ_PEND_SVC,
+	IRQ_PEND_MCHK_EX,
+	IRQ_PEND_COUNT
+};
+
+/* We have 2M for virtio device descriptor pages. Smallest amount of
+ * memory per page is 24 bytes (1 queue), so (2048*1024) / 24 = 87381
+ */
+#define KVM_S390_MAX_VIRTIO_IRQS 87381
+
+/*
+ * Repressible (non-floating) machine check interrupts
+ * subclass bits in MCIC
+ */
+#define MCHK_EXTD_BIT 58
+#define MCHK_DEGR_BIT 56
+#define MCHK_WARN_BIT 55
+#define MCHK_REP_MASK ((1UL << MCHK_DEGR_BIT) | \
+		       (1UL << MCHK_EXTD_BIT) | \
+		       (1UL << MCHK_WARN_BIT))
+
+/* Exigent machine check interrupts subclass bits in MCIC */
+#define MCHK_SD_BIT 63
+#define MCHK_PD_BIT 62
+#define MCHK_EX_MASK ((1UL << MCHK_SD_BIT) | (1UL << MCHK_PD_BIT))
+
+#define IRQ_PEND_EXT_MASK ((1UL << IRQ_PEND_EXT_IRQ_KEY)    | \
+			   (1UL << IRQ_PEND_EXT_CLOCK_COMP) | \
+			   (1UL << IRQ_PEND_EXT_CPU_TIMER)  | \
+			   (1UL << IRQ_PEND_EXT_MALFUNC)    | \
+			   (1UL << IRQ_PEND_EXT_EMERGENCY)  | \
+			   (1UL << IRQ_PEND_EXT_EXTERNAL)   | \
+			   (1UL << IRQ_PEND_EXT_TIMING)     | \
+			   (1UL << IRQ_PEND_EXT_HOST)       | \
+			   (1UL << IRQ_PEND_EXT_SERVICE)    | \
+			   (1UL << IRQ_PEND_EXT_SERVICE_EV) | \
+			   (1UL << IRQ_PEND_VIRTIO)         | \
+			   (1UL << IRQ_PEND_PFAULT_INIT)    | \
+			   (1UL << IRQ_PEND_PFAULT_DONE))
+
+#define IRQ_PEND_IO_MASK ((1UL << IRQ_PEND_IO_ISC_0) | \
+			  (1UL << IRQ_PEND_IO_ISC_1) | \
+			  (1UL << IRQ_PEND_IO_ISC_2) | \
+			  (1UL << IRQ_PEND_IO_ISC_3) | \
+			  (1UL << IRQ_PEND_IO_ISC_4) | \
+			  (1UL << IRQ_PEND_IO_ISC_5) | \
+			  (1UL << IRQ_PEND_IO_ISC_6) | \
+			  (1UL << IRQ_PEND_IO_ISC_7))
+
+#define IRQ_PEND_MCHK_MASK ((1UL << IRQ_PEND_MCHK_REP) | \
+			    (1UL << IRQ_PEND_MCHK_EX))
+
+#define IRQ_PEND_EXT_II_MASK ((1UL << IRQ_PEND_EXT_CPU_TIMER)  | \
+			      (1UL << IRQ_PEND_EXT_CLOCK_COMP) | \
+			      (1UL << IRQ_PEND_EXT_EMERGENCY)  | \
+			      (1UL << IRQ_PEND_EXT_EXTERNAL)   | \
+			      (1UL << IRQ_PEND_EXT_SERVICE)    | \
+			      (1UL << IRQ_PEND_EXT_SERVICE_EV))
+
+struct kvm_s390_interrupt_info {
+	struct list_head list;
+	u64	type;
+	union {
+		struct kvm_s390_io_info io;
+		struct kvm_s390_ext_info ext;
+		struct kvm_s390_pgm_info pgm;
+		struct kvm_s390_emerg_info emerg;
+		struct kvm_s390_extcall_info extcall;
+		struct kvm_s390_prefix_info prefix;
+		struct kvm_s390_stop_info stop;
+		struct kvm_s390_mchk_info mchk;
+	};
+};
+
+struct kvm_s390_irq_payload {
+	struct kvm_s390_io_info io;
+	struct kvm_s390_ext_info ext;
+	struct kvm_s390_pgm_info pgm;
+	struct kvm_s390_emerg_info emerg;
+	struct kvm_s390_extcall_info extcall;
+	struct kvm_s390_prefix_info prefix;
+	struct kvm_s390_stop_info stop;
+	struct kvm_s390_mchk_info mchk;
+};
+
+struct kvm_s390_local_interrupt {
+	spinlock_t lock;
+	DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS);
+	struct kvm_s390_irq_payload irq;
+	unsigned long pending_irqs;
+};
+
+#define FIRQ_LIST_IO_ISC_0 0
+#define FIRQ_LIST_IO_ISC_1 1
+#define FIRQ_LIST_IO_ISC_2 2
+#define FIRQ_LIST_IO_ISC_3 3
+#define FIRQ_LIST_IO_ISC_4 4
+#define FIRQ_LIST_IO_ISC_5 5
+#define FIRQ_LIST_IO_ISC_6 6
+#define FIRQ_LIST_IO_ISC_7 7
+#define FIRQ_LIST_PFAULT   8
+#define FIRQ_LIST_VIRTIO   9
+#define FIRQ_LIST_COUNT   10
+#define FIRQ_CNTR_IO       0
+#define FIRQ_CNTR_SERVICE  1
+#define FIRQ_CNTR_VIRTIO   2
+#define FIRQ_CNTR_PFAULT   3
+#define FIRQ_MAX_COUNT     4
+
+/* mask the AIS mode for a given ISC */
+#define AIS_MODE_MASK(isc) (0x80 >> isc)
+
+#define KVM_S390_AIS_MODE_ALL    0
+#define KVM_S390_AIS_MODE_SINGLE 1
+
+struct kvm_s390_float_interrupt {
+	unsigned long pending_irqs;
+	unsigned long masked_irqs;
+	spinlock_t lock;
+	struct list_head lists[FIRQ_LIST_COUNT];
+	int counters[FIRQ_MAX_COUNT];
+	struct kvm_s390_mchk_info mchk;
+	struct kvm_s390_ext_info srv_signal;
+	int next_rr_cpu;
+	struct mutex ais_lock;
+	u8 simm;
+	u8 nimm;
+};
+
+struct kvm_hw_wp_info_arch {
+	unsigned long addr;
+	unsigned long phys_addr;
+	int len;
+	char *old_data;
+};
+
+struct kvm_hw_bp_info_arch {
+	unsigned long addr;
+	int len;
+};
+
+/*
+ * Only the upper 16 bits of kvm_guest_debug->control are arch specific.
+ * Further KVM_GUESTDBG flags which an be used from userspace can be found in
+ * arch/s390/include/uapi/asm/kvm.h
+ */
+#define KVM_GUESTDBG_EXIT_PENDING 0x10000000
+
+#define guestdbg_enabled(vcpu) \
+		(vcpu->guest_debug & KVM_GUESTDBG_ENABLE)
+#define guestdbg_sstep_enabled(vcpu) \
+		(vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+#define guestdbg_hw_bp_enabled(vcpu) \
+		(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
+#define guestdbg_exit_pending(vcpu) (guestdbg_enabled(vcpu) && \
+		(vcpu->guest_debug & KVM_GUESTDBG_EXIT_PENDING))
+
+#define KVM_GUESTDBG_VALID_MASK \
+		(KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP |\
+		KVM_GUESTDBG_USE_HW_BP | KVM_GUESTDBG_EXIT_PENDING)
+
+struct kvm_guestdbg_info_arch {
+	unsigned long cr0;
+	unsigned long cr9;
+	unsigned long cr10;
+	unsigned long cr11;
+	struct kvm_hw_bp_info_arch *hw_bp_info;
+	struct kvm_hw_wp_info_arch *hw_wp_info;
+	int nr_hw_bp;
+	int nr_hw_wp;
+	unsigned long last_bp;
+};
+
+struct kvm_s390_pv_vcpu {
+	u64 handle;
+	unsigned long stor_base;
+};
+
+struct kvm_vcpu_arch {
+	struct kvm_s390_sie_block *sie_block;
+	/* if vsie is active, currently executed shadow sie control block */
+	struct kvm_s390_sie_block *vsie_block;
+	unsigned int      host_acrs[NUM_ACRS];
+	struct gs_cb      *host_gscb;
+	struct fpu	  host_fpregs;
+	struct kvm_s390_local_interrupt local_int;
+	struct hrtimer    ckc_timer;
+	struct kvm_s390_pgm_info pgm;
+	struct gmap *gmap;
+	/* backup location for the currently enabled gmap when scheduled out */
+	struct gmap *enabled_gmap;
+	struct kvm_guestdbg_info_arch guestdbg;
+	unsigned long pfault_token;
+	unsigned long pfault_select;
+	unsigned long pfault_compare;
+	bool cputm_enabled;
+	/*
+	 * The seqcount protects updates to cputm_start and sie_block.cputm,
+	 * this way we can have non-blocking reads with consistent values.
+	 * Only the owning VCPU thread (vcpu->cpu) is allowed to change these
+	 * values and to start/stop/enable/disable cpu timer accounting.
+	 */
+	seqcount_t cputm_seqcount;
+	__u64 cputm_start;
+	bool gs_enabled;
+	bool skey_enabled;
+	struct kvm_s390_pv_vcpu pv;
+	union diag318_info diag318_info;
+};
+
+struct kvm_vm_stat {
+	struct kvm_vm_stat_generic generic;
+	u64 inject_io;
+	u64 inject_float_mchk;
+	u64 inject_pfault_done;
+	u64 inject_service_signal;
+	u64 inject_virtio;
+	u64 aen_forward;
+};
+
+struct kvm_arch_memory_slot {
+};
+
+struct s390_map_info {
+	struct list_head list;
+	__u64 guest_addr;
+	__u64 addr;
+	struct page *page;
+};
+
+struct s390_io_adapter {
+	unsigned int id;
+	int isc;
+	bool maskable;
+	bool masked;
+	bool swap;
+	bool suppressible;
+};
+
+#define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8)
+#define MAX_S390_ADAPTER_MAPS 256
+
+/* maximum size of facilities and facility mask is 2k bytes */
+#define S390_ARCH_FAC_LIST_SIZE_BYTE (1<<11)
+#define S390_ARCH_FAC_LIST_SIZE_U64 \
+	(S390_ARCH_FAC_LIST_SIZE_BYTE / sizeof(u64))
+#define S390_ARCH_FAC_MASK_SIZE_BYTE S390_ARCH_FAC_LIST_SIZE_BYTE
+#define S390_ARCH_FAC_MASK_SIZE_U64 \
+	(S390_ARCH_FAC_MASK_SIZE_BYTE / sizeof(u64))
+
+struct kvm_s390_cpu_model {
+	/* facility mask supported by kvm & hosting machine */
+	__u64 fac_mask[S390_ARCH_FAC_LIST_SIZE_U64];
+	struct kvm_s390_vm_cpu_subfunc subfuncs;
+	/* facility list requested by guest (in dma page) */
+	__u64 *fac_list;
+	u64 cpuid;
+	unsigned short ibc;
+	/* subset of available UV-features for pv-guests enabled by user space */
+	struct kvm_s390_vm_cpu_uv_feat uv_feat_guest;
+};
+
+typedef int (*crypto_hook)(struct kvm_vcpu *vcpu);
+
+struct kvm_s390_crypto {
+	struct kvm_s390_crypto_cb *crycb;
+	struct rw_semaphore pqap_hook_rwsem;
+	crypto_hook *pqap_hook;
+	__u32 crycbd;
+	__u8 aes_kw;
+	__u8 dea_kw;
+	__u8 apie;
+};
+
+#define APCB0_MASK_SIZE 1
+struct kvm_s390_apcb0 {
+	__u64 apm[APCB0_MASK_SIZE];		/* 0x0000 */
+	__u64 aqm[APCB0_MASK_SIZE];		/* 0x0008 */
+	__u64 adm[APCB0_MASK_SIZE];		/* 0x0010 */
+	__u64 reserved18;			/* 0x0018 */
+};
+
+#define APCB1_MASK_SIZE 4
+struct kvm_s390_apcb1 {
+	__u64 apm[APCB1_MASK_SIZE];		/* 0x0000 */
+	__u64 aqm[APCB1_MASK_SIZE];		/* 0x0020 */
+	__u64 adm[APCB1_MASK_SIZE];		/* 0x0040 */
+	__u64 reserved60[4];			/* 0x0060 */
+};
+
+struct kvm_s390_crypto_cb {
+	struct kvm_s390_apcb0 apcb0;		/* 0x0000 */
+	__u8   reserved20[0x0048 - 0x0020];	/* 0x0020 */
+	__u8   dea_wrapping_key_mask[24];	/* 0x0048 */
+	__u8   aes_wrapping_key_mask[32];	/* 0x0060 */
+	struct kvm_s390_apcb1 apcb1;		/* 0x0080 */
+};
+
+struct kvm_s390_gisa {
+	union {
+		struct { /* common to all formats */
+			u32 next_alert;
+			u8  ipm;
+			u8  reserved01[2];
+			u8  iam;
+		};
+		struct { /* format 0 */
+			u32 next_alert;
+			u8  ipm;
+			u8  reserved01;
+			u8  : 6;
+			u8  g : 1;
+			u8  c : 1;
+			u8  iam;
+			u8  reserved02[4];
+			u32 airq_count;
+		} g0;
+		struct { /* format 1 */
+			u32 next_alert;
+			u8  ipm;
+			u8  simm;
+			u8  nimm;
+			u8  iam;
+			u8  aism[8];
+			u8  : 6;
+			u8  g : 1;
+			u8  c : 1;
+			u8  reserved03[11];
+			u32 airq_count;
+		} g1;
+		struct {
+			u64 word[4];
+		} u64;
+	};
+};
+
+struct kvm_s390_gib {
+	u32 alert_list_origin;
+	u32 reserved01;
+	u8:5;
+	u8  nisc:3;
+	u8  reserved03[3];
+	u32 reserved04[5];
+};
+
+/*
+ * sie_page2 has to be allocated as DMA because fac_list, crycb and
+ * gisa need 31bit addresses in the sie control block.
+ */
+struct sie_page2 {
+	__u64 fac_list[S390_ARCH_FAC_LIST_SIZE_U64];	/* 0x0000 */
+	struct kvm_s390_crypto_cb crycb;		/* 0x0800 */
+	struct kvm_s390_gisa gisa;			/* 0x0900 */
+	struct kvm *kvm;				/* 0x0920 */
+	u8 reserved928[0x1000 - 0x928];			/* 0x0928 */
+};
+
+struct kvm_s390_vsie {
+	struct mutex mutex;
+	struct radix_tree_root addr_to_page;
+	int page_count;
+	int next;
+	struct page *pages[KVM_MAX_VCPUS];
+};
+
+struct kvm_s390_gisa_iam {
+	u8 mask;
+	spinlock_t ref_lock;
+	u32 ref_count[MAX_ISC + 1];
+};
+
+struct kvm_s390_gisa_interrupt {
+	struct kvm_s390_gisa *origin;
+	struct kvm_s390_gisa_iam alert;
+	struct hrtimer timer;
+	u64 expires;
+	DECLARE_BITMAP(kicked_mask, KVM_MAX_VCPUS);
+};
+
+struct kvm_s390_pv {
+	u64 handle;
+	u64 guest_len;
+	unsigned long stor_base;
+	void *stor_var;
+	bool dumping;
+	void *set_aside;
+	struct list_head need_cleanup;
+	struct mmu_notifier mmu_notifier;
+};
+
+struct kvm_arch{
+	void *sca;
+	int use_esca;
+	rwlock_t sca_lock;
+	debug_info_t *dbf;
+	struct kvm_s390_float_interrupt float_int;
+	struct kvm_device *flic;
+	struct gmap *gmap;
+	unsigned long mem_limit;
+	int css_support;
+	int use_irqchip;
+	int use_cmma;
+	int use_pfmfi;
+	int use_skf;
+	int use_zpci_interp;
+	int user_cpu_state_ctrl;
+	int user_sigp;
+	int user_stsi;
+	int user_instr0;
+	struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
+	wait_queue_head_t ipte_wq;
+	int ipte_lock_count;
+	struct mutex ipte_mutex;
+	spinlock_t start_stop_lock;
+	struct sie_page2 *sie_page2;
+	struct kvm_s390_cpu_model model;
+	struct kvm_s390_crypto crypto;
+	struct kvm_s390_vsie vsie;
+	u8 epdx;
+	u64 epoch;
+	int migration_mode;
+	atomic64_t cmma_dirty_pages;
+	/* subset of available cpu features enabled by user space */
+	DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
+	/* indexed by vcpu_idx */
+	DECLARE_BITMAP(idle_mask, KVM_MAX_VCPUS);
+	struct kvm_s390_gisa_interrupt gisa_int;
+	struct kvm_s390_pv pv;
+	struct list_head kzdev_list;
+	spinlock_t kzdev_list_lock;
+};
+
+#define KVM_HVA_ERR_BAD		(-1UL)
+#define KVM_HVA_ERR_RO_BAD	(-2UL)
+
+static inline bool kvm_is_error_hva(unsigned long addr)
+{
+	return IS_ERR_VALUE(addr);
+}
+
+#define ASYNC_PF_PER_VCPU	64
+struct kvm_arch_async_pf {
+	unsigned long pfault_token;
+};
+
+bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu);
+
+void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
+			       struct kvm_async_pf *work);
+
+bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
+				     struct kvm_async_pf *work);
+
+void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
+				 struct kvm_async_pf *work);
+
+static inline void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu) {}
+
+void kvm_arch_crypto_clear_masks(struct kvm *kvm);
+void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
+			       unsigned long *aqm, unsigned long *adm);
+
+int __sie64a(phys_addr_t sie_block_phys, struct kvm_s390_sie_block *sie_block, u64 *rsa);
+
+static inline int sie64a(struct kvm_s390_sie_block *sie_block, u64 *rsa)
+{
+	return __sie64a(virt_to_phys(sie_block), sie_block, rsa);
+}
+
+extern char sie_exit;
+
+bool kvm_s390_pv_is_protected(struct kvm *kvm);
+bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu);
+
+extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc);
+extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc);
+
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+static inline void kvm_arch_free_memslot(struct kvm *kvm,
+					 struct kvm_memory_slot *slot) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
+static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
+static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+		struct kvm_memory_slot *slot) {}
+static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
+
+#define __KVM_HAVE_ARCH_VM_FREE
+void kvm_arch_free_vm(struct kvm *kvm);
+
+struct zpci_kvm_hook {
+	int (*kvm_register)(void *opaque, struct kvm *kvm);
+	void (*kvm_unregister)(void *opaque);
+};
+
+extern struct zpci_kvm_hook zpci_kvm_hook;
+
+#endif
diff --git a/arch/s390/include/asm/kvm_para.h b/arch/s390/include/asm/kvm_para.h
new file mode 100644
index 0000000000..df73a05276
--- /dev/null
+++ b/arch/s390/include/asm/kvm_para.h
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * definition for paravirtual devices on s390
+ *
+ * Copyright IBM Corp. 2008
+ *
+ *    Author(s): Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+/*
+ * Hypercalls for KVM on s390. The calling convention is similar to the
+ * s390 ABI, so we use R2-R6 for parameters 1-5. In addition we use R1
+ * as hypercall number and R7 as parameter 6. The return value is
+ * written to R2. We use the diagnose instruction as hypercall. To avoid
+ * conflicts with existing diagnoses for LPAR and z/VM, we do not use
+ * the instruction encoded number, but specify the number in R1 and
+ * use 0x500 as KVM hypercall
+ *
+ * Copyright IBM Corp. 2007,2008
+ * Author(s): Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+#ifndef __S390_KVM_PARA_H
+#define __S390_KVM_PARA_H
+
+#include <uapi/asm/kvm_para.h>
+#include <asm/diag.h>
+
+#define HYPERCALL_FMT_0
+#define HYPERCALL_FMT_1 , "0" (r2)
+#define HYPERCALL_FMT_2 , "d" (r3) HYPERCALL_FMT_1
+#define HYPERCALL_FMT_3 , "d" (r4) HYPERCALL_FMT_2
+#define HYPERCALL_FMT_4 , "d" (r5) HYPERCALL_FMT_3
+#define HYPERCALL_FMT_5 , "d" (r6) HYPERCALL_FMT_4
+#define HYPERCALL_FMT_6 , "d" (r7) HYPERCALL_FMT_5
+
+#define HYPERCALL_PARM_0
+#define HYPERCALL_PARM_1 , unsigned long arg1
+#define HYPERCALL_PARM_2 HYPERCALL_PARM_1, unsigned long arg2
+#define HYPERCALL_PARM_3 HYPERCALL_PARM_2, unsigned long arg3
+#define HYPERCALL_PARM_4 HYPERCALL_PARM_3, unsigned long arg4
+#define HYPERCALL_PARM_5 HYPERCALL_PARM_4, unsigned long arg5
+#define HYPERCALL_PARM_6 HYPERCALL_PARM_5, unsigned long arg6
+
+#define HYPERCALL_REGS_0
+#define HYPERCALL_REGS_1						\
+	register unsigned long r2 asm("2") = arg1
+#define HYPERCALL_REGS_2						\
+	HYPERCALL_REGS_1;						\
+	register unsigned long r3 asm("3") = arg2
+#define HYPERCALL_REGS_3						\
+	HYPERCALL_REGS_2;						\
+	register unsigned long r4 asm("4") = arg3
+#define HYPERCALL_REGS_4						\
+	HYPERCALL_REGS_3;						\
+	register unsigned long r5 asm("5") = arg4
+#define HYPERCALL_REGS_5						\
+	HYPERCALL_REGS_4;						\
+	register unsigned long r6 asm("6") = arg5
+#define HYPERCALL_REGS_6						\
+	HYPERCALL_REGS_5;						\
+	register unsigned long r7 asm("7") = arg6
+
+#define HYPERCALL_ARGS_0
+#define HYPERCALL_ARGS_1 , arg1
+#define HYPERCALL_ARGS_2 HYPERCALL_ARGS_1, arg2
+#define HYPERCALL_ARGS_3 HYPERCALL_ARGS_2, arg3
+#define HYPERCALL_ARGS_4 HYPERCALL_ARGS_3, arg4
+#define HYPERCALL_ARGS_5 HYPERCALL_ARGS_4, arg5
+#define HYPERCALL_ARGS_6 HYPERCALL_ARGS_5, arg6
+
+#define GENERATE_KVM_HYPERCALL_FUNC(args)				\
+static inline								\
+long __kvm_hypercall##args(unsigned long nr HYPERCALL_PARM_##args)	\
+{									\
+	register unsigned long __nr asm("1") = nr;			\
+	register long __rc asm("2");					\
+	HYPERCALL_REGS_##args;						\
+									\
+	asm volatile (							\
+		"	diag	2,4,0x500\n"				\
+		: "=d" (__rc)						\
+		: "d" (__nr) HYPERCALL_FMT_##args			\
+		: "memory", "cc");					\
+	return __rc;							\
+}									\
+									\
+static inline								\
+long kvm_hypercall##args(unsigned long nr HYPERCALL_PARM_##args)	\
+{									\
+	diag_stat_inc(DIAG_STAT_X500);					\
+	return __kvm_hypercall##args(nr HYPERCALL_ARGS_##args);		\
+}
+
+GENERATE_KVM_HYPERCALL_FUNC(0)
+GENERATE_KVM_HYPERCALL_FUNC(1)
+GENERATE_KVM_HYPERCALL_FUNC(2)
+GENERATE_KVM_HYPERCALL_FUNC(3)
+GENERATE_KVM_HYPERCALL_FUNC(4)
+GENERATE_KVM_HYPERCALL_FUNC(5)
+GENERATE_KVM_HYPERCALL_FUNC(6)
+
+/* kvm on s390 is always paravirtualization enabled */
+static inline int kvm_para_available(void)
+{
+	return 1;
+}
+
+/* No feature bits are currently assigned for kvm on s390 */
+static inline unsigned int kvm_arch_para_features(void)
+{
+	return 0;
+}
+
+static inline unsigned int kvm_arch_para_hints(void)
+{
+	return 0;
+}
+
+static inline bool kvm_check_and_clear_guest_paused(void)
+{
+	return false;
+}
+
+#endif /* __S390_KVM_PARA_H */
diff --git a/arch/s390/include/asm/linkage.h b/arch/s390/include/asm/linkage.h
new file mode 100644
index 0000000000..df3fb7d822
--- /dev/null
+++ b/arch/s390/include/asm/linkage.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_LINKAGE_H
+#define __ASM_LINKAGE_H
+
+#include <linux/stringify.h>
+
+#define __ALIGN .balign CONFIG_FUNCTION_ALIGNMENT, 0x07
+#define __ALIGN_STR __stringify(__ALIGN)
+
+#endif
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
new file mode 100644
index 0000000000..69ccc464a4
--- /dev/null
+++ b/arch/s390/include/asm/lowcore.h
@@ -0,0 +1,232 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *    Copyright IBM Corp. 1999, 2012
+ *    Author(s): Hartmut Penner <hp@de.ibm.com>,
+ *		 Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ *		 Denis Joseph Barrow,
+ */
+
+#ifndef _ASM_S390_LOWCORE_H
+#define _ASM_S390_LOWCORE_H
+
+#include <linux/types.h>
+#include <asm/ptrace.h>
+#include <asm/cpu.h>
+#include <asm/types.h>
+
+#define LC_ORDER 1
+#define LC_PAGES 2
+
+struct pgm_tdb {
+	u64 data[32];
+};
+
+struct lowcore {
+	__u8	pad_0x0000[0x0014-0x0000];	/* 0x0000 */
+	__u32	ipl_parmblock_ptr;		/* 0x0014 */
+	__u8	pad_0x0018[0x0080-0x0018];	/* 0x0018 */
+	__u32	ext_params;			/* 0x0080 */
+	union {
+		struct {
+			__u16 ext_cpu_addr;	/* 0x0084 */
+			__u16 ext_int_code;	/* 0x0086 */
+		};
+		__u32 ext_int_code_addr;
+	};
+	__u32	svc_int_code;			/* 0x0088 */
+	union {
+		struct {
+			__u16	pgm_ilc;	/* 0x008c */
+			__u16	pgm_code;	/* 0x008e */
+		};
+		__u32 pgm_int_code;
+	};
+	__u32	data_exc_code;			/* 0x0090 */
+	__u16	mon_class_num;			/* 0x0094 */
+	union {
+		struct {
+			__u8	per_code;	/* 0x0096 */
+			__u8	per_atmid;	/* 0x0097 */
+		};
+		__u16 per_code_combined;
+	};
+	__u64	per_address;			/* 0x0098 */
+	__u8	exc_access_id;			/* 0x00a0 */
+	__u8	per_access_id;			/* 0x00a1 */
+	__u8	op_access_id;			/* 0x00a2 */
+	__u8	ar_mode_id;			/* 0x00a3 */
+	__u8	pad_0x00a4[0x00a8-0x00a4];	/* 0x00a4 */
+	__u64	trans_exc_code;			/* 0x00a8 */
+	__u64	monitor_code;			/* 0x00b0 */
+	union {
+		struct {
+			__u16	subchannel_id;	/* 0x00b8 */
+			__u16	subchannel_nr;	/* 0x00ba */
+			__u32	io_int_parm;	/* 0x00bc */
+			__u32	io_int_word;	/* 0x00c0 */
+		};
+		struct tpi_info	tpi_info;	/* 0x00b8 */
+	};
+	__u8	pad_0x00c4[0x00c8-0x00c4];	/* 0x00c4 */
+	__u32	stfl_fac_list;			/* 0x00c8 */
+	__u8	pad_0x00cc[0x00e8-0x00cc];	/* 0x00cc */
+	__u64	mcck_interruption_code;		/* 0x00e8 */
+	__u8	pad_0x00f0[0x00f4-0x00f0];	/* 0x00f0 */
+	__u32	external_damage_code;		/* 0x00f4 */
+	__u64	failing_storage_address;	/* 0x00f8 */
+	__u8	pad_0x0100[0x0110-0x0100];	/* 0x0100 */
+	__u64	pgm_last_break;			/* 0x0110 */
+	__u8	pad_0x0118[0x0120-0x0118];	/* 0x0118 */
+	psw_t	restart_old_psw;		/* 0x0120 */
+	psw_t	external_old_psw;		/* 0x0130 */
+	psw_t	svc_old_psw;			/* 0x0140 */
+	psw_t	program_old_psw;		/* 0x0150 */
+	psw_t	mcck_old_psw;			/* 0x0160 */
+	psw_t	io_old_psw;			/* 0x0170 */
+	__u8	pad_0x0180[0x01a0-0x0180];	/* 0x0180 */
+	psw_t	restart_psw;			/* 0x01a0 */
+	psw_t	external_new_psw;		/* 0x01b0 */
+	psw_t	svc_new_psw;			/* 0x01c0 */
+	psw_t	program_new_psw;		/* 0x01d0 */
+	psw_t	mcck_new_psw;			/* 0x01e0 */
+	psw_t	io_new_psw;			/* 0x01f0 */
+
+	/* Save areas. */
+	__u64	save_area_sync[8];		/* 0x0200 */
+	__u64	save_area_async[8];		/* 0x0240 */
+	__u64	save_area_restart[1];		/* 0x0280 */
+
+	/* CPU flags. */
+	__u64	cpu_flags;			/* 0x0288 */
+
+	/* Return psws. */
+	psw_t	return_psw;			/* 0x0290 */
+	psw_t	return_mcck_psw;		/* 0x02a0 */
+
+	__u64	last_break;			/* 0x02b0 */
+
+	/* CPU accounting and timing values. */
+	__u64	sys_enter_timer;		/* 0x02b8 */
+	__u64	mcck_enter_timer;		/* 0x02c0 */
+	__u64	exit_timer;			/* 0x02c8 */
+	__u64	user_timer;			/* 0x02d0 */
+	__u64	guest_timer;			/* 0x02d8 */
+	__u64	system_timer;			/* 0x02e0 */
+	__u64	hardirq_timer;			/* 0x02e8 */
+	__u64	softirq_timer;			/* 0x02f0 */
+	__u64	steal_timer;			/* 0x02f8 */
+	__u64	avg_steal_timer;		/* 0x0300 */
+	__u64	last_update_timer;		/* 0x0308 */
+	__u64	last_update_clock;		/* 0x0310 */
+	__u64	int_clock;			/* 0x0318 */
+	__u8	pad_0x0320[0x0328-0x0320];	/* 0x0320 */
+	__u64	clock_comparator;		/* 0x0328 */
+	__u64	boot_clock[2];			/* 0x0330 */
+
+	/* Current process. */
+	__u64	current_task;			/* 0x0340 */
+	__u64	kernel_stack;			/* 0x0348 */
+
+	/* Interrupt, DAT-off and restartstack. */
+	__u64	async_stack;			/* 0x0350 */
+	__u64	nodat_stack;			/* 0x0358 */
+	__u64	restart_stack;			/* 0x0360 */
+	__u64	mcck_stack;			/* 0x0368 */
+	/* Restart function and parameter. */
+	__u64	restart_fn;			/* 0x0370 */
+	__u64	restart_data;			/* 0x0378 */
+	__u32	restart_source;			/* 0x0380 */
+	__u32	restart_flags;			/* 0x0384 */
+
+	/* Address space pointer. */
+	__u64	kernel_asce;			/* 0x0388 */
+	__u64	user_asce;			/* 0x0390 */
+
+	/*
+	 * The lpp and current_pid fields form a
+	 * 64-bit value that is set as program
+	 * parameter with the LPP instruction.
+	 */
+	__u32	lpp;				/* 0x0398 */
+	__u32	current_pid;			/* 0x039c */
+
+	/* SMP info area */
+	__u32	cpu_nr;				/* 0x03a0 */
+	__u32	softirq_pending;		/* 0x03a4 */
+	__s32	preempt_count;			/* 0x03a8 */
+	__u32	spinlock_lockval;		/* 0x03ac */
+	__u32	spinlock_index;			/* 0x03b0 */
+	__u32	fpu_flags;			/* 0x03b4 */
+	__u64	percpu_offset;			/* 0x03b8 */
+	__u8	pad_0x03c0[0x03c8-0x03c0];	/* 0x03c0 */
+	__u64	machine_flags;			/* 0x03c8 */
+	__u64	gmap;				/* 0x03d0 */
+	__u8	pad_0x03d8[0x0400-0x03d8];	/* 0x03d8 */
+
+	__u32	return_lpswe;			/* 0x0400 */
+	__u32	return_mcck_lpswe;		/* 0x0404 */
+	__u8	pad_0x040a[0x0e00-0x0408];	/* 0x0408 */
+
+	/*
+	 * 0xe00 contains the address of the IPL Parameter Information
+	 * block. Dump tools need IPIB for IPL after dump.
+	 * Note: do not change the position of any fields in 0x0e00-0x0f00
+	 */
+	__u64	ipib;				/* 0x0e00 */
+	__u32	ipib_checksum;			/* 0x0e08 */
+	__u64	vmcore_info;			/* 0x0e0c */
+	__u8	pad_0x0e14[0x0e18-0x0e14];	/* 0x0e14 */
+	__u64	os_info;			/* 0x0e18 */
+	__u8	pad_0x0e20[0x11b0-0x0e20];	/* 0x0e20 */
+
+	/* Pointer to the machine check extended save area */
+	__u64	mcesad;				/* 0x11b0 */
+
+	/* 64 bit extparam used for pfault/diag 250: defined by architecture */
+	__u64	ext_params2;			/* 0x11B8 */
+	__u8	pad_0x11c0[0x1200-0x11C0];	/* 0x11C0 */
+
+	/* CPU register save area: defined by architecture */
+	__u64	floating_pt_save_area[16];	/* 0x1200 */
+	__u64	gpregs_save_area[16];		/* 0x1280 */
+	psw_t	psw_save_area;			/* 0x1300 */
+	__u8	pad_0x1310[0x1318-0x1310];	/* 0x1310 */
+	__u32	prefixreg_save_area;		/* 0x1318 */
+	__u32	fpt_creg_save_area;		/* 0x131c */
+	__u8	pad_0x1320[0x1324-0x1320];	/* 0x1320 */
+	__u32	tod_progreg_save_area;		/* 0x1324 */
+	__u32	cpu_timer_save_area[2];		/* 0x1328 */
+	__u32	clock_comp_save_area[2];	/* 0x1330 */
+	__u64	last_break_save_area;		/* 0x1338 */
+	__u32	access_regs_save_area[16];	/* 0x1340 */
+	__u64	cregs_save_area[16];		/* 0x1380 */
+	__u8	pad_0x1400[0x1500-0x1400];	/* 0x1400 */
+	/* Cryptography-counter designation */
+	__u64	ccd;				/* 0x1500 */
+	/* AI-extension counter designation */
+	__u64	aicd;				/* 0x1508 */
+	__u8	pad_0x1510[0x1800-0x1510];	/* 0x1510 */
+
+	/* Transaction abort diagnostic block */
+	struct pgm_tdb pgm_tdb;			/* 0x1800 */
+	__u8	pad_0x1900[0x2000-0x1900];	/* 0x1900 */
+} __packed __aligned(8192);
+
+#define S390_lowcore (*((struct lowcore *) 0))
+
+extern struct lowcore *lowcore_ptr[];
+
+static inline void set_prefix(__u32 address)
+{
+	asm volatile("spx %0" : : "Q" (address) : "memory");
+}
+
+static inline __u32 store_prefix(void)
+{
+	__u32 address;
+
+	asm volatile("stpx %0" : "=Q" (address));
+	return address;
+}
+
+#endif /* _ASM_S390_LOWCORE_H */
diff --git a/arch/s390/include/asm/maccess.h b/arch/s390/include/asm/maccess.h
new file mode 100644
index 0000000000..50225940d9
--- /dev/null
+++ b/arch/s390/include/asm/maccess.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_S390_MACCESS_H
+#define __ASM_S390_MACCESS_H
+
+#include <linux/types.h>
+
+#define MEMCPY_REAL_SIZE	PAGE_SIZE
+#define MEMCPY_REAL_MASK	PAGE_MASK
+
+struct iov_iter;
+
+extern unsigned long __memcpy_real_area;
+extern pte_t *memcpy_real_ptep;
+size_t memcpy_real_iter(struct iov_iter *iter, unsigned long src, size_t count);
+int memcpy_real(void *dest, unsigned long src, size_t count);
+#ifdef CONFIG_CRASH_DUMP
+int copy_oldmem_kernel(void *dst, unsigned long src, size_t count);
+#endif
+
+#endif /* __ASM_S390_MACCESS_H */
diff --git a/arch/s390/include/asm/mem_encrypt.h b/arch/s390/include/asm/mem_encrypt.h
new file mode 100644
index 0000000000..b85e13505a
--- /dev/null
+++ b/arch/s390/include/asm/mem_encrypt.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef S390_MEM_ENCRYPT_H__
+#define S390_MEM_ENCRYPT_H__
+
+#ifndef __ASSEMBLY__
+
+int set_memory_encrypted(unsigned long vaddr, int numpages);
+int set_memory_decrypted(unsigned long vaddr, int numpages);
+
+#endif	/* __ASSEMBLY__ */
+
+#endif	/* S390_MEM_ENCRYPT_H__ */
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
new file mode 100644
index 0000000000..829d68e2c6
--- /dev/null
+++ b/arch/s390/include/asm/mmu.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __MMU_H
+#define __MMU_H
+
+#include <linux/cpumask.h>
+#include <linux/errno.h>
+#include <asm/asm-extable.h>
+
+typedef struct {
+	spinlock_t lock;
+	cpumask_t cpu_attach_mask;
+	atomic_t flush_count;
+	unsigned int flush_mm;
+	struct list_head pgtable_list;
+	struct list_head gmap_list;
+	unsigned long gmap_asce;
+	unsigned long asce;
+	unsigned long asce_limit;
+	unsigned long vdso_base;
+	/* The mmu context belongs to a secure guest. */
+	atomic_t protected_count;
+	/*
+	 * The following bitfields need a down_write on the mm
+	 * semaphore when they are written to. As they are only
+	 * written once, they can be read without a lock.
+	 *
+	 * The mmu context allocates 4K page tables.
+	 */
+	unsigned int alloc_pgste:1;
+	/* The mmu context uses extended page tables. */
+	unsigned int has_pgste:1;
+	/* The mmu context uses storage keys. */
+	unsigned int uses_skeys:1;
+	/* The mmu context uses CMM. */
+	unsigned int uses_cmm:1;
+	/* The gmaps associated with this context are allowed to use huge pages. */
+	unsigned int allow_gmap_hpage_1m:1;
+} mm_context_t;
+
+#define INIT_MM_CONTEXT(name)						   \
+	.context.lock =	__SPIN_LOCK_UNLOCKED(name.context.lock),	   \
+	.context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), \
+	.context.gmap_list = LIST_HEAD_INIT(name.context.gmap_list),
+
+#endif
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
new file mode 100644
index 0000000000..2a38af5a00
--- /dev/null
+++ b/arch/s390/include/asm/mmu_context.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  S390 version
+ *
+ *  Derived from "include/asm-i386/mmu_context.h"
+ */
+
+#ifndef __S390_MMU_CONTEXT_H
+#define __S390_MMU_CONTEXT_H
+
+#include <asm/pgalloc.h>
+#include <linux/uaccess.h>
+#include <linux/mm_types.h>
+#include <asm/tlbflush.h>
+#include <asm/ctl_reg.h>
+#include <asm-generic/mm_hooks.h>
+
+#define init_new_context init_new_context
+static inline int init_new_context(struct task_struct *tsk,
+				   struct mm_struct *mm)
+{
+	unsigned long asce_type, init_entry;
+
+	spin_lock_init(&mm->context.lock);
+	INIT_LIST_HEAD(&mm->context.pgtable_list);
+	INIT_LIST_HEAD(&mm->context.gmap_list);
+	cpumask_clear(&mm->context.cpu_attach_mask);
+	atomic_set(&mm->context.flush_count, 0);
+	atomic_set(&mm->context.protected_count, 0);
+	mm->context.gmap_asce = 0;
+	mm->context.flush_mm = 0;
+#ifdef CONFIG_PGSTE
+	mm->context.alloc_pgste = page_table_allocate_pgste ||
+		test_thread_flag(TIF_PGSTE) ||
+		(current->mm && current->mm->context.alloc_pgste);
+	mm->context.has_pgste = 0;
+	mm->context.uses_skeys = 0;
+	mm->context.uses_cmm = 0;
+	mm->context.allow_gmap_hpage_1m = 0;
+#endif
+	switch (mm->context.asce_limit) {
+	default:
+		/*
+		 * context created by exec, the value of asce_limit can
+		 * only be zero in this case
+		 */
+		VM_BUG_ON(mm->context.asce_limit);
+		/* continue as 3-level task */
+		mm->context.asce_limit = _REGION2_SIZE;
+		fallthrough;
+	case _REGION2_SIZE:
+		/* forked 3-level task */
+		init_entry = _REGION3_ENTRY_EMPTY;
+		asce_type = _ASCE_TYPE_REGION3;
+		break;
+	case TASK_SIZE_MAX:
+		/* forked 5-level task */
+		init_entry = _REGION1_ENTRY_EMPTY;
+		asce_type = _ASCE_TYPE_REGION1;
+		break;
+	case _REGION1_SIZE:
+		/* forked 4-level task */
+		init_entry = _REGION2_ENTRY_EMPTY;
+		asce_type = _ASCE_TYPE_REGION2;
+		break;
+	}
+	mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+			   _ASCE_USER_BITS | asce_type;
+	crst_table_init((unsigned long *) mm->pgd, init_entry);
+	return 0;
+}
+
+static inline void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+				      struct task_struct *tsk)
+{
+	int cpu = smp_processor_id();
+
+	if (next == &init_mm)
+		S390_lowcore.user_asce = s390_invalid_asce;
+	else
+		S390_lowcore.user_asce = next->context.asce;
+	cpumask_set_cpu(cpu, &next->context.cpu_attach_mask);
+	/* Clear previous user-ASCE from CR7 */
+	__ctl_load(s390_invalid_asce, 7, 7);
+	if (prev != next)
+		cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask);
+}
+#define switch_mm_irqs_off switch_mm_irqs_off
+
+static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+			     struct task_struct *tsk)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	switch_mm_irqs_off(prev, next, tsk);
+	local_irq_restore(flags);
+}
+
+#define finish_arch_post_lock_switch finish_arch_post_lock_switch
+static inline void finish_arch_post_lock_switch(void)
+{
+	struct task_struct *tsk = current;
+	struct mm_struct *mm = tsk->mm;
+
+	if (mm) {
+		preempt_disable();
+		while (atomic_read(&mm->context.flush_count))
+			cpu_relax();
+		cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
+		__tlb_flush_mm_lazy(mm);
+		preempt_enable();
+	}
+	__ctl_load(S390_lowcore.user_asce, 7, 7);
+}
+
+#define activate_mm activate_mm
+static inline void activate_mm(struct mm_struct *prev,
+                               struct mm_struct *next)
+{
+	switch_mm(prev, next, current);
+	cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
+	__ctl_load(S390_lowcore.user_asce, 7, 7);
+}
+
+#include <asm-generic/mmu_context.h>
+
+#endif /* __S390_MMU_CONTEXT_H */
diff --git a/arch/s390/include/asm/mmzone.h b/arch/s390/include/asm/mmzone.h
new file mode 100644
index 0000000000..73e3e7c697
--- /dev/null
+++ b/arch/s390/include/asm/mmzone.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * NUMA support for s390
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#ifndef _ASM_S390_MMZONE_H
+#define _ASM_S390_MMZONE_H
+
+#ifdef CONFIG_NUMA
+
+extern struct pglist_data *node_data[];
+#define NODE_DATA(nid) (node_data[nid])
+
+#endif /* CONFIG_NUMA */
+#endif /* _ASM_S390_MMZONE_H */
diff --git a/arch/s390/include/asm/module.h b/arch/s390/include/asm/module.h
new file mode 100644
index 0000000000..9f1eea1587
--- /dev/null
+++ b/arch/s390/include/asm/module.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_MODULE_H
+#define _ASM_S390_MODULE_H
+
+#include <asm-generic/module.h>
+
+/*
+ * This file contains the s390 architecture specific module code.
+ */
+
+struct mod_arch_syminfo {
+	unsigned long got_offset;
+	unsigned long plt_offset;
+	int got_initialized;
+	int plt_initialized;
+};
+
+struct mod_arch_specific {
+	/* Starting offset of got in the module core memory. */
+	unsigned long got_offset;
+	/* Starting offset of plt in the module core memory. */
+	unsigned long plt_offset;
+	/* Size of the got. */
+	unsigned long got_size;
+	/* Size of the plt. */
+	unsigned long plt_size;
+	/* Number of symbols in syminfo. */
+	int nsyms;
+	/* Additional symbol information (got and plt offsets). */
+	struct mod_arch_syminfo *syminfo;
+#ifdef CONFIG_FUNCTION_TRACER
+	/* Start of memory reserved for ftrace hotpatch trampolines. */
+	struct ftrace_hotpatch_trampoline *trampolines_start;
+	/* End of memory reserved for ftrace hotpatch trampolines. */
+	struct ftrace_hotpatch_trampoline *trampolines_end;
+	/* Next unused ftrace hotpatch trampoline slot. */
+	struct ftrace_hotpatch_trampoline *next_trampoline;
+#endif /* CONFIG_FUNCTION_TRACER */
+};
+
+#endif /* _ASM_S390_MODULE_H */
diff --git a/arch/s390/include/asm/msi.h b/arch/s390/include/asm/msi.h
new file mode 100644
index 0000000000..399343ed9f
--- /dev/null
+++ b/arch/s390/include/asm/msi.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_MSI_H
+#define _ASM_S390_MSI_H
+#include <asm-generic/msi.h>
+
+/*
+ * Work around S390 not using irq_domain at all so we can't set
+ * IRQ_DOMAIN_FLAG_ISOLATED_MSI. See for an explanation how it works:
+ *
+ * https://lore.kernel.org/r/31af8174-35e9-ebeb-b9ef-74c90d4bfd93@linux.ibm.com/
+ *
+ * Note this is less isolated than the ARM/x86 versions as userspace can trigger
+ * MSI belonging to kernel devices within the same gisa.
+ */
+#define arch_is_isolated_msi() true
+
+#endif
diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h
new file mode 100644
index 0000000000..227466ce9e
--- /dev/null
+++ b/arch/s390/include/asm/nmi.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *   Machine check handler definitions
+ *
+ *    Copyright IBM Corp. 2000, 2009
+ *    Author(s): Ingo Adlung <adlung@de.ibm.com>,
+ *		 Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ *		 Cornelia Huck <cornelia.huck@de.ibm.com>,
+ */
+
+#ifndef _ASM_S390_NMI_H
+#define _ASM_S390_NMI_H
+
+#include <linux/bits.h>
+#include <linux/types.h>
+
+#define MCIC_SUBCLASS_MASK	(1ULL<<63 | 1ULL<<62 | 1ULL<<61 | \
+				1ULL<<59 | 1ULL<<58 | 1ULL<<56 | \
+				1ULL<<55 | 1ULL<<54 | 1ULL<<53 | \
+				1ULL<<52 | 1ULL<<47 | 1ULL<<46 | \
+				1ULL<<45 | 1ULL<<44)
+#define MCCK_CODE_SYSTEM_DAMAGE		BIT(63)
+#define MCCK_CODE_EXT_DAMAGE		BIT(63 - 5)
+#define MCCK_CODE_CP			BIT(63 - 9)
+#define MCCK_CODE_STG_ERROR		BIT(63 - 16)
+#define MCCK_CODE_STG_KEY_ERROR		BIT(63 - 18)
+#define MCCK_CODE_STG_DEGRAD		BIT(63 - 19)
+#define MCCK_CODE_PSW_MWP_VALID		BIT(63 - 20)
+#define MCCK_CODE_PSW_IA_VALID		BIT(63 - 23)
+#define MCCK_CODE_STG_FAIL_ADDR		BIT(63 - 24)
+#define MCCK_CODE_CR_VALID		BIT(63 - 29)
+#define MCCK_CODE_GS_VALID		BIT(63 - 36)
+#define MCCK_CODE_FC_VALID		BIT(63 - 43)
+#define MCCK_CODE_CPU_TIMER_VALID	BIT(63 - 46)
+
+#ifndef __ASSEMBLY__
+
+union mci {
+	unsigned long val;
+	struct {
+		u64 sd :  1; /* 00 system damage */
+		u64 pd :  1; /* 01 instruction-processing damage */
+		u64 sr :  1; /* 02 system recovery */
+		u64    :  1; /* 03 */
+		u64 cd :  1; /* 04 timing-facility damage */
+		u64 ed :  1; /* 05 external damage */
+		u64    :  1; /* 06 */
+		u64 dg :  1; /* 07 degradation */
+		u64 w  :  1; /* 08 warning pending */
+		u64 cp :  1; /* 09 channel-report pending */
+		u64 sp :  1; /* 10 service-processor damage */
+		u64 ck :  1; /* 11 channel-subsystem damage */
+		u64    :  2; /* 12-13 */
+		u64 b  :  1; /* 14 backed up */
+		u64    :  1; /* 15 */
+		u64 se :  1; /* 16 storage error uncorrected */
+		u64 sc :  1; /* 17 storage error corrected */
+		u64 ke :  1; /* 18 storage-key error uncorrected */
+		u64 ds :  1; /* 19 storage degradation */
+		u64 wp :  1; /* 20 psw mwp validity */
+		u64 ms :  1; /* 21 psw mask and key validity */
+		u64 pm :  1; /* 22 psw program mask and cc validity */
+		u64 ia :  1; /* 23 psw instruction address validity */
+		u64 fa :  1; /* 24 failing storage address validity */
+		u64 vr :  1; /* 25 vector register validity */
+		u64 ec :  1; /* 26 external damage code validity */
+		u64 fp :  1; /* 27 floating point register validity */
+		u64 gr :  1; /* 28 general register validity */
+		u64 cr :  1; /* 29 control register validity */
+		u64    :  1; /* 30 */
+		u64 st :  1; /* 31 storage logical validity */
+		u64 ie :  1; /* 32 indirect storage error */
+		u64 ar :  1; /* 33 access register validity */
+		u64 da :  1; /* 34 delayed access exception */
+		u64    :  1; /* 35 */
+		u64 gs :  1; /* 36 guarded storage registers validity */
+		u64    :  5; /* 37-41 */
+		u64 pr :  1; /* 42 tod programmable register validity */
+		u64 fc :  1; /* 43 fp control register validity */
+		u64 ap :  1; /* 44 ancillary report */
+		u64    :  1; /* 45 */
+		u64 ct :  1; /* 46 cpu timer validity */
+		u64 cc :  1; /* 47 clock comparator validity */
+		u64    : 16; /* 47-63 */
+	};
+};
+
+#define MCESA_ORIGIN_MASK	(~0x3ffUL)
+#define MCESA_LC_MASK		(0xfUL)
+#define MCESA_MIN_SIZE		(1024)
+#define MCESA_MAX_SIZE		(2048)
+
+struct mcesa {
+	u8 vector_save_area[1024];
+	u8 guarded_storage_save_area[32];
+};
+
+struct pt_regs;
+
+void nmi_alloc_mcesa_early(u64 *mcesad);
+int nmi_alloc_mcesa(u64 *mcesad);
+void nmi_free_mcesa(u64 *mcesad);
+
+void s390_handle_mcck(void);
+void s390_do_machine_check(struct pt_regs *regs);
+
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_S390_NMI_H */
diff --git a/arch/s390/include/asm/nospec-branch.h b/arch/s390/include/asm/nospec-branch.h
new file mode 100644
index 0000000000..82725cf783
--- /dev/null
+++ b/arch/s390/include/asm/nospec-branch.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_EXPOLINE_H
+#define _ASM_S390_EXPOLINE_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+
+extern int nospec_disable;
+
+void nospec_init_branches(void);
+void nospec_auto_detect(void);
+void nospec_revert(s32 *start, s32 *end);
+
+static inline bool nospec_uses_trampoline(void)
+{
+	return __is_defined(CC_USING_EXPOLINE) && !nospec_disable;
+}
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_S390_EXPOLINE_H */
diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h
new file mode 100644
index 0000000000..7a946c42ad
--- /dev/null
+++ b/arch/s390/include/asm/nospec-insn.h
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_NOSPEC_ASM_H
+#define _ASM_S390_NOSPEC_ASM_H
+
+#include <linux/linkage.h>
+#include <asm/dwarf.h>
+
+#ifdef __ASSEMBLY__
+
+#ifdef CC_USING_EXPOLINE
+
+/*
+ * The expoline macros are used to create thunks in the same format
+ * as gcc generates them. The 'comdat' section flag makes sure that
+ * the various thunks are merged into a single copy.
+ */
+	.macro __THUNK_PROLOG_NAME name
+#ifdef CONFIG_EXPOLINE_EXTERN
+	.pushsection .text,"ax",@progbits
+	__ALIGN
+#else
+	.pushsection .text.\name,"axG",@progbits,\name,comdat
+#endif
+	.globl \name
+	.hidden \name
+	.type \name,@function
+\name:
+	CFI_STARTPROC
+	.endm
+
+	.macro __THUNK_EPILOG_NAME name
+	CFI_ENDPROC
+#ifdef CONFIG_EXPOLINE_EXTERN
+	.size \name, .-\name
+#endif
+	.popsection
+	.endm
+
+	.macro __THUNK_PROLOG_BR r1
+	__THUNK_PROLOG_NAME __s390_indirect_jump_r\r1
+	.endm
+
+	.macro __THUNK_EPILOG_BR r1
+	__THUNK_EPILOG_NAME __s390_indirect_jump_r\r1
+	.endm
+
+	.macro __THUNK_BR r1
+	jg	__s390_indirect_jump_r\r1
+	.endm
+
+	.macro __THUNK_BRASL r1,r2
+	brasl	\r1,__s390_indirect_jump_r\r2
+	.endm
+
+	.macro	__DECODE_R expand,reg
+	.set .L__decode_fail,1
+	.irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+	.ifc \reg,%r\r1
+	\expand \r1
+	.set .L__decode_fail,0
+	.endif
+	.endr
+	.if .L__decode_fail == 1
+	.error "__DECODE_R failed"
+	.endif
+	.endm
+
+	.macro	__DECODE_RR expand,rsave,rtarget
+	.set .L__decode_fail,1
+	.irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+	.ifc \rsave,%r\r1
+	.irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+	.ifc \rtarget,%r\r2
+	\expand \r1,\r2
+	.set .L__decode_fail,0
+	.endif
+	.endr
+	.endif
+	.endr
+	.if .L__decode_fail == 1
+	.error "__DECODE_RR failed"
+	.endif
+	.endm
+
+	.macro __THUNK_EX_BR reg
+	exrl	0,555f
+	j	.
+555:	br	\reg
+	.endm
+
+#ifdef CONFIG_EXPOLINE_EXTERN
+	.macro GEN_BR_THUNK reg
+	.endm
+	.macro GEN_BR_THUNK_EXTERN reg
+#else
+	.macro GEN_BR_THUNK reg
+#endif
+	__DECODE_R __THUNK_PROLOG_BR,\reg
+	__THUNK_EX_BR \reg
+	__DECODE_R __THUNK_EPILOG_BR,\reg
+	.endm
+
+	.macro BR_EX reg
+557:	__DECODE_R __THUNK_BR,\reg
+	.pushsection .s390_indirect_branches,"a",@progbits
+	.long	557b-.
+	.popsection
+	.endm
+
+	.macro BASR_EX rsave,rtarget
+559:	__DECODE_RR __THUNK_BRASL,\rsave,\rtarget
+	.pushsection .s390_indirect_branches,"a",@progbits
+	.long	559b-.
+	.popsection
+	.endm
+
+#else
+	.macro GEN_BR_THUNK reg
+	.endm
+
+	 .macro BR_EX reg
+	br	\reg
+	.endm
+
+	.macro BASR_EX rsave,rtarget
+	basr	\rsave,\rtarget
+	.endm
+#endif /* CC_USING_EXPOLINE */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_S390_NOSPEC_ASM_H */
diff --git a/arch/s390/include/asm/numa.h b/arch/s390/include/asm/numa.h
new file mode 100644
index 0000000000..23cd5d1b73
--- /dev/null
+++ b/arch/s390/include/asm/numa.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * NUMA support for s390
+ *
+ * Declare the NUMA core code structures and functions.
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#ifndef _ASM_S390_NUMA_H
+#define _ASM_S390_NUMA_H
+
+#ifdef CONFIG_NUMA
+
+#include <linux/numa.h>
+
+void numa_setup(void);
+
+#else
+
+static inline void numa_setup(void) { }
+
+#endif /* CONFIG_NUMA */
+
+#endif /* _ASM_S390_NUMA_H */
diff --git a/arch/s390/include/asm/os_info.h b/arch/s390/include/asm/os_info.h
new file mode 100644
index 0000000000..a4d2e103f1
--- /dev/null
+++ b/arch/s390/include/asm/os_info.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * OS info memory interface
+ *
+ * Copyright IBM Corp. 2012
+ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+#ifndef _ASM_S390_OS_INFO_H
+#define _ASM_S390_OS_INFO_H
+
+#include <linux/uio.h>
+
+#define OS_INFO_VERSION_MAJOR	1
+#define OS_INFO_VERSION_MINOR	1
+#define OS_INFO_MAGIC		0x4f53494e464f535aULL /* OSINFOSZ */
+
+#define OS_INFO_VMCOREINFO	0
+#define OS_INFO_REIPL_BLOCK	1
+#define OS_INFO_FLAGS_ENTRY	2
+
+#define OS_INFO_FLAG_REIPL_CLEAR	(1UL << 0)
+
+struct os_info_entry {
+	u64	addr;
+	u64	size;
+	u32	csum;
+} __packed;
+
+struct os_info {
+	u64	magic;
+	u32	csum;
+	u16	version_major;
+	u16	version_minor;
+	u64	crashkernel_addr;
+	u64	crashkernel_size;
+	struct os_info_entry entry[3];
+	u8	reserved[4004];
+} __packed;
+
+void os_info_init(void);
+void os_info_entry_add(int nr, void *ptr, u64 len);
+void os_info_crashkernel_add(unsigned long base, unsigned long size);
+u32 os_info_csum(struct os_info *os_info);
+
+#ifdef CONFIG_CRASH_DUMP
+void *os_info_old_entry(int nr, unsigned long *size);
+#else
+static inline void *os_info_old_entry(int nr, unsigned long *size)
+{
+	return NULL;
+}
+#endif
+
+#endif /* _ASM_S390_OS_INFO_H */
diff --git a/arch/s390/include/asm/page-states.h b/arch/s390/include/asm/page-states.h
new file mode 100644
index 0000000000..c33c4deb54
--- /dev/null
+++ b/arch/s390/include/asm/page-states.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *    Copyright IBM Corp. 2017
+ *    Author(s): Claudio Imbrenda <imbrenda@linux.vnet.ibm.com>
+ */
+
+#ifndef PAGE_STATES_H
+#define PAGE_STATES_H
+
+#define ESSA_GET_STATE			0
+#define ESSA_SET_STABLE			1
+#define ESSA_SET_UNUSED			2
+#define ESSA_SET_VOLATILE		3
+#define ESSA_SET_POT_VOLATILE		4
+#define ESSA_SET_STABLE_RESIDENT	5
+#define ESSA_SET_STABLE_IF_RESIDENT	6
+#define ESSA_SET_STABLE_NODAT		7
+
+#define ESSA_MAX	ESSA_SET_STABLE_NODAT
+
+#endif
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
new file mode 100644
index 0000000000..cfec074331
--- /dev/null
+++ b/arch/s390/include/asm/page.h
@@ -0,0 +1,218 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999, 2000
+ *    Author(s): Hartmut Penner (hp@de.ibm.com)
+ */
+
+#ifndef _S390_PAGE_H
+#define _S390_PAGE_H
+
+#include <linux/const.h>
+#include <asm/types.h>
+
+#define _PAGE_SHIFT	12
+#define _PAGE_SIZE	(_AC(1, UL) << _PAGE_SHIFT)
+#define _PAGE_MASK	(~(_PAGE_SIZE - 1))
+
+/* PAGE_SHIFT determines the page size */
+#define PAGE_SHIFT	_PAGE_SHIFT
+#define PAGE_SIZE	_PAGE_SIZE
+#define PAGE_MASK	_PAGE_MASK
+#define PAGE_DEFAULT_ACC	_AC(0, UL)
+/* storage-protection override */
+#define PAGE_SPO_ACC		9
+#define PAGE_DEFAULT_KEY	(PAGE_DEFAULT_ACC << 4)
+
+#define HPAGE_SHIFT	20
+#define HPAGE_SIZE	(1UL << HPAGE_SHIFT)
+#define HPAGE_MASK	(~(HPAGE_SIZE - 1))
+#define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
+#define HUGE_MAX_HSTATE		2
+
+#define ARCH_HAS_SETCLEAR_HUGE_PTE
+#define ARCH_HAS_HUGE_PTE_TYPE
+#define ARCH_HAS_PREPARE_HUGEPAGE
+#define ARCH_HAS_HUGEPAGE_CLEAR_FLUSH
+
+#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+
+#include <asm/setup.h>
+#ifndef __ASSEMBLY__
+
+void __storage_key_init_range(unsigned long start, unsigned long end);
+
+static inline void storage_key_init_range(unsigned long start, unsigned long end)
+{
+	if (PAGE_DEFAULT_KEY != 0)
+		__storage_key_init_range(start, end);
+}
+
+#define clear_page(page)	memset((page), 0, PAGE_SIZE)
+
+/*
+ * copy_page uses the mvcl instruction with 0xb0 padding byte in order to
+ * bypass caches when copying a page. Especially when copying huge pages
+ * this keeps L1 and L2 data caches alive.
+ */
+static inline void copy_page(void *to, void *from)
+{
+	union register_pair dst, src;
+
+	dst.even = (unsigned long) to;
+	dst.odd  = 0x1000;
+	src.even = (unsigned long) from;
+	src.odd  = 0xb0001000;
+
+	asm volatile(
+		"	mvcl	%[dst],%[src]"
+		: [dst] "+&d" (dst.pair), [src] "+&d" (src.pair)
+		: : "memory", "cc");
+}
+
+#define clear_user_page(page, vaddr, pg)	clear_page(page)
+#define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
+
+#define vma_alloc_zeroed_movable_folio(vma, vaddr) \
+	vma_alloc_folio(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, 0, vma, vaddr, false)
+
+/*
+ * These are used to make use of C type-checking..
+ */
+
+typedef struct { unsigned long pgprot; } pgprot_t;
+typedef struct { unsigned long pgste; } pgste_t;
+typedef struct { unsigned long pte; } pte_t;
+typedef struct { unsigned long pmd; } pmd_t;
+typedef struct { unsigned long pud; } pud_t;
+typedef struct { unsigned long p4d; } p4d_t;
+typedef struct { unsigned long pgd; } pgd_t;
+typedef pte_t *pgtable_t;
+
+#define pgprot_val(x)	((x).pgprot)
+#define pgste_val(x)	((x).pgste)
+
+static inline unsigned long pte_val(pte_t pte)
+{
+	return pte.pte;
+}
+
+static inline unsigned long pmd_val(pmd_t pmd)
+{
+	return pmd.pmd;
+}
+
+static inline unsigned long pud_val(pud_t pud)
+{
+	return pud.pud;
+}
+
+static inline unsigned long p4d_val(p4d_t p4d)
+{
+	return p4d.p4d;
+}
+
+static inline unsigned long pgd_val(pgd_t pgd)
+{
+	return pgd.pgd;
+}
+
+#define __pgste(x)	((pgste_t) { (x) } )
+#define __pte(x)        ((pte_t) { (x) } )
+#define __pmd(x)        ((pmd_t) { (x) } )
+#define __pud(x)	((pud_t) { (x) } )
+#define __p4d(x)	((p4d_t) { (x) } )
+#define __pgd(x)        ((pgd_t) { (x) } )
+#define __pgprot(x)     ((pgprot_t) { (x) } )
+
+static inline void page_set_storage_key(unsigned long addr,
+					unsigned char skey, int mapped)
+{
+	if (!mapped)
+		asm volatile(".insn rrf,0xb22b0000,%0,%1,8,0"
+			     : : "d" (skey), "a" (addr));
+	else
+		asm volatile("sske %0,%1" : : "d" (skey), "a" (addr));
+}
+
+static inline unsigned char page_get_storage_key(unsigned long addr)
+{
+	unsigned char skey;
+
+	asm volatile("iske %0,%1" : "=d" (skey) : "a" (addr));
+	return skey;
+}
+
+static inline int page_reset_referenced(unsigned long addr)
+{
+	int cc;
+
+	asm volatile(
+		"	rrbe	0,%1\n"
+		"	ipm	%0\n"
+		"	srl	%0,28\n"
+		: "=d" (cc) : "a" (addr) : "cc");
+	return cc;
+}
+
+/* Bits int the storage key */
+#define _PAGE_CHANGED		0x02	/* HW changed bit		*/
+#define _PAGE_REFERENCED	0x04	/* HW referenced bit		*/
+#define _PAGE_FP_BIT		0x08	/* HW fetch protection bit	*/
+#define _PAGE_ACC_BITS		0xf0	/* HW access control bits	*/
+
+struct page;
+void arch_free_page(struct page *page, int order);
+void arch_alloc_page(struct page *page, int order);
+void arch_set_page_dat(struct page *page, int order);
+
+static inline int devmem_is_allowed(unsigned long pfn)
+{
+	return 0;
+}
+
+#define HAVE_ARCH_FREE_PAGE
+#define HAVE_ARCH_ALLOC_PAGE
+
+#if IS_ENABLED(CONFIG_PGSTE)
+int arch_make_page_accessible(struct page *page);
+#define HAVE_ARCH_MAKE_PAGE_ACCESSIBLE
+#endif
+
+#define __PAGE_OFFSET		0x0UL
+#define PAGE_OFFSET		0x0UL
+
+#define __pa(x)			((unsigned long)(x))
+#define __va(x)			((void *)(unsigned long)(x))
+
+#define phys_to_pfn(phys)	((phys) >> PAGE_SHIFT)
+#define pfn_to_phys(pfn)	((pfn) << PAGE_SHIFT)
+
+#define phys_to_page(phys)	pfn_to_page(phys_to_pfn(phys))
+#define page_to_phys(page)	pfn_to_phys(page_to_pfn(page))
+
+static inline void *pfn_to_virt(unsigned long pfn)
+{
+	return __va(pfn_to_phys(pfn));
+}
+
+static inline unsigned long virt_to_pfn(const void *kaddr)
+{
+	return phys_to_pfn(__pa(kaddr));
+}
+
+#define pfn_to_kaddr(pfn)	pfn_to_virt(pfn)
+
+#define virt_to_page(kaddr)	pfn_to_page(virt_to_pfn(kaddr))
+#define page_to_virt(page)	pfn_to_virt(page_to_pfn(page))
+
+#define virt_addr_valid(kaddr)	pfn_valid(virt_to_pfn(kaddr))
+
+#define VM_DATA_DEFAULT_FLAGS	VM_DATA_FLAGS_NON_EXEC
+
+#endif /* !__ASSEMBLY__ */
+
+#include <asm-generic/memory_model.h>
+#include <asm-generic/getorder.h>
+
+#endif /* _S390_PAGE_H */
diff --git a/arch/s390/include/asm/pai.h b/arch/s390/include/asm/pai.h
new file mode 100644
index 0000000000..7d1888e3de
--- /dev/null
+++ b/arch/s390/include/asm/pai.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Processor Activity Instrumentation support for cryptography counters
+ *
+ *  Copyright IBM Corp. 2022
+ *  Author(s): Thomas Richter <tmricht@linux.ibm.com>
+ */
+#ifndef _ASM_S390_PAI_H
+#define _ASM_S390_PAI_H
+
+#include <linux/jump_label.h>
+#include <asm/lowcore.h>
+#include <asm/ptrace.h>
+
+struct qpaci_info_block {
+	u64 header;
+	struct {
+		u64 : 8;
+		u64 num_cc : 8;	/* # of supported crypto counters */
+		u64 : 9;
+		u64 num_nnpa : 7;	/* # of supported NNPA counters */
+		u64 : 32;
+	};
+};
+
+static inline int qpaci(struct qpaci_info_block *info)
+{
+	/* Size of info (in double words minus one) */
+	size_t size = sizeof(*info) / sizeof(u64) - 1;
+	int cc;
+
+	asm volatile(
+		"	lgr	0,%[size]\n"
+		"	.insn	s,0xb28f0000,%[info]\n"
+		"	lgr	%[size],0\n"
+		"	ipm	%[cc]\n"
+		"	srl	%[cc],28\n"
+		: [cc] "=d" (cc), [info] "=Q" (*info), [size] "+&d" (size)
+		:
+		: "0", "cc", "memory");
+	return cc ? (size + 1) * sizeof(u64) : 0;
+}
+
+#define PAI_CRYPTO_BASE			0x1000	/* First event number */
+#define PAI_CRYPTO_MAXCTR		256	/* Max # of event counters */
+#define PAI_CRYPTO_KERNEL_OFFSET	2048
+#define PAI_NNPA_BASE			0x1800	/* First event number */
+#define PAI_NNPA_MAXCTR			128	/* Max # of event counters */
+
+DECLARE_STATIC_KEY_FALSE(pai_key);
+
+static __always_inline void pai_kernel_enter(struct pt_regs *regs)
+{
+	if (!IS_ENABLED(CONFIG_PERF_EVENTS))
+		return;
+	if (!static_branch_unlikely(&pai_key))
+		return;
+	if (!S390_lowcore.ccd)
+		return;
+	if (!user_mode(regs))
+		return;
+	WRITE_ONCE(S390_lowcore.ccd, S390_lowcore.ccd | PAI_CRYPTO_KERNEL_OFFSET);
+}
+
+static __always_inline void pai_kernel_exit(struct pt_regs *regs)
+{
+	if (!IS_ENABLED(CONFIG_PERF_EVENTS))
+		return;
+	if (!static_branch_unlikely(&pai_key))
+		return;
+	if (!S390_lowcore.ccd)
+		return;
+	if (!user_mode(regs))
+		return;
+	WRITE_ONCE(S390_lowcore.ccd, S390_lowcore.ccd & ~PAI_CRYPTO_KERNEL_OFFSET);
+}
+
+enum paievt_mode {
+	PAI_MODE_NONE,
+	PAI_MODE_SAMPLING,
+	PAI_MODE_COUNTING,
+};
+
+#endif
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
new file mode 100644
index 0000000000..b248694e00
--- /dev/null
+++ b/arch/s390/include/asm/pci.h
@@ -0,0 +1,327 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_S390_PCI_H
+#define __ASM_S390_PCI_H
+
+#include <linux/pci.h>
+#include <linux/mutex.h>
+#include <linux/iommu.h>
+#include <linux/pci_hotplug.h>
+#include <asm/pci_clp.h>
+#include <asm/pci_debug.h>
+#include <asm/pci_insn.h>
+#include <asm/sclp.h>
+
+#define PCIBIOS_MIN_IO		0x1000
+#define PCIBIOS_MIN_MEM		0x10000000
+
+#define pcibios_assign_all_busses()	(0)
+
+void __iomem *pci_iomap(struct pci_dev *, int, unsigned long);
+void pci_iounmap(struct pci_dev *, void __iomem *);
+int pci_domain_nr(struct pci_bus *);
+int pci_proc_domain(struct pci_bus *);
+
+#define ZPCI_BUS_NR			0	/* default bus number */
+
+#define ZPCI_NR_DMA_SPACES		1
+#define ZPCI_NR_DEVICES			CONFIG_PCI_NR_FUNCTIONS
+#define ZPCI_DOMAIN_BITMAP_SIZE		(1 << 16)
+
+#ifdef PCI
+#if (ZPCI_NR_DEVICES > ZPCI_DOMAIN_BITMAP_SIZE)
+# error ZPCI_NR_DEVICES can not be bigger than ZPCI_DOMAIN_BITMAP_SIZE
+#endif
+#endif /* PCI */
+
+/* PCI Function Controls */
+#define ZPCI_FC_FN_ENABLED		0x80
+#define ZPCI_FC_ERROR			0x40
+#define ZPCI_FC_BLOCKED			0x20
+#define ZPCI_FC_DMA_ENABLED		0x10
+
+#define ZPCI_FMB_DMA_COUNTER_VALID	(1 << 23)
+
+struct zpci_fmb_fmt0 {
+	u64 dma_rbytes;
+	u64 dma_wbytes;
+};
+
+struct zpci_fmb_fmt1 {
+	u64 rx_bytes;
+	u64 rx_packets;
+	u64 tx_bytes;
+	u64 tx_packets;
+};
+
+struct zpci_fmb_fmt2 {
+	u64 consumed_work_units;
+	u64 max_work_units;
+};
+
+struct zpci_fmb_fmt3 {
+	u64 tx_bytes;
+};
+
+struct zpci_fmb {
+	u32 format	: 8;
+	u32 fmt_ind	: 24;
+	u32 samples;
+	u64 last_update;
+	/* common counters */
+	u64 ld_ops;
+	u64 st_ops;
+	u64 stb_ops;
+	u64 rpcit_ops;
+	/* format specific counters */
+	union {
+		struct zpci_fmb_fmt0 fmt0;
+		struct zpci_fmb_fmt1 fmt1;
+		struct zpci_fmb_fmt2 fmt2;
+		struct zpci_fmb_fmt3 fmt3;
+	};
+} __packed __aligned(128);
+
+enum zpci_state {
+	ZPCI_FN_STATE_STANDBY = 0,
+	ZPCI_FN_STATE_CONFIGURED = 1,
+	ZPCI_FN_STATE_RESERVED = 2,
+};
+
+struct zpci_bar_struct {
+	struct resource *res;		/* bus resource */
+	void __iomem	*mio_wb;
+	void __iomem	*mio_wt;
+	u32		val;		/* bar start & 3 flag bits */
+	u16		map_idx;	/* index into bar mapping array */
+	u8		size;		/* order 2 exponent */
+};
+
+struct s390_domain;
+struct kvm_zdev;
+
+#define ZPCI_FUNCTIONS_PER_BUS 256
+struct zpci_bus {
+	struct kref		kref;
+	struct pci_bus		*bus;
+	struct zpci_dev		*function[ZPCI_FUNCTIONS_PER_BUS];
+	struct list_head	resources;
+	struct list_head	bus_next;
+	struct resource		bus_resource;
+	int			pchid;
+	int			domain_nr;
+	bool			multifunction;
+	enum pci_bus_speed	max_bus_speed;
+};
+
+/* Private data per function */
+struct zpci_dev {
+	struct zpci_bus *zbus;
+	struct list_head entry;		/* list of all zpci_devices, needed for hotplug, etc. */
+	struct list_head iommu_list;
+	struct kref kref;
+	struct rcu_head rcu;
+	struct hotplug_slot hotplug_slot;
+
+	enum zpci_state state;
+	u32		fid;		/* function ID, used by sclp */
+	u32		fh;		/* function handle, used by insn's */
+	u32		gisa;		/* GISA designation for passthrough */
+	u16		vfn;		/* virtual function number */
+	u16		pchid;		/* physical channel ID */
+	u16		maxstbl;	/* Maximum store block size */
+	u8		pfgid;		/* function group ID */
+	u8		pft;		/* pci function type */
+	u8		port;
+	u8		dtsm;		/* Supported DT mask */
+	u8		rid_available	: 1;
+	u8		has_hp_slot	: 1;
+	u8		has_resources	: 1;
+	u8		is_physfn	: 1;
+	u8		util_str_avail	: 1;
+	u8		irqs_registered	: 1;
+	u8		reserved	: 2;
+	unsigned int	devfn;		/* DEVFN part of the RID*/
+
+	struct mutex lock;
+	u8 pfip[CLP_PFIP_NR_SEGMENTS];	/* pci function internal path */
+	u32 uid;			/* user defined id */
+	u8 util_str[CLP_UTIL_STR_LEN];	/* utility string */
+
+	/* IRQ stuff */
+	u64		msi_addr;	/* MSI address */
+	unsigned int	max_msi;	/* maximum number of MSI's */
+	unsigned int	msi_first_bit;
+	unsigned int	msi_nr_irqs;
+	struct airq_iv *aibv;		/* adapter interrupt bit vector */
+	unsigned long	aisb;		/* number of the summary bit */
+
+	/* DMA stuff */
+	unsigned long	*dma_table;
+	int		tlb_refresh;
+
+	spinlock_t	iommu_bitmap_lock;
+	unsigned long	*iommu_bitmap;
+	unsigned long	*lazy_bitmap;
+	unsigned long	iommu_size;
+	unsigned long	iommu_pages;
+	unsigned int	next_bit;
+
+	struct iommu_device iommu_dev;  /* IOMMU core handle */
+
+	char res_name[16];
+	bool mio_capable;
+	struct zpci_bar_struct bars[PCI_STD_NUM_BARS];
+
+	u64		start_dma;	/* Start of available DMA addresses */
+	u64		end_dma;	/* End of available DMA addresses */
+	u64		dma_mask;	/* DMA address space mask */
+
+	/* Function measurement block */
+	struct zpci_fmb *fmb;
+	u16		fmb_update;	/* update interval */
+	u16		fmb_length;
+	/* software counters */
+	atomic64_t allocated_pages;
+	atomic64_t mapped_pages;
+	atomic64_t unmapped_pages;
+
+	u8		version;
+	enum pci_bus_speed max_bus_speed;
+
+	struct dentry	*debugfs_dev;
+
+	/* IOMMU and passthrough */
+	struct s390_domain *s390_domain; /* s390 IOMMU domain data */
+	struct kvm_zdev *kzdev;
+	struct mutex kzdev_lock;
+};
+
+static inline bool zdev_enabled(struct zpci_dev *zdev)
+{
+	return (zdev->fh & (1UL << 31)) ? true : false;
+}
+
+extern const struct attribute_group *zpci_attr_groups[];
+extern unsigned int s390_pci_force_floating __initdata;
+extern unsigned int s390_pci_no_rid;
+
+extern union zpci_sic_iib *zpci_aipb;
+extern struct airq_iv *zpci_aif_sbv;
+
+/* -----------------------------------------------------------------------------
+  Prototypes
+----------------------------------------------------------------------------- */
+/* Base stuff */
+struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state);
+int zpci_enable_device(struct zpci_dev *);
+int zpci_disable_device(struct zpci_dev *);
+int zpci_scan_configured_device(struct zpci_dev *zdev, u32 fh);
+int zpci_deconfigure_device(struct zpci_dev *zdev);
+void zpci_device_reserved(struct zpci_dev *zdev);
+bool zpci_is_device_configured(struct zpci_dev *zdev);
+
+int zpci_hot_reset_device(struct zpci_dev *zdev);
+int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64, u8 *);
+int zpci_unregister_ioat(struct zpci_dev *, u8);
+void zpci_remove_reserved_devices(void);
+void zpci_update_fh(struct zpci_dev *zdev, u32 fh);
+
+/* CLP */
+int clp_setup_writeback_mio(void);
+int clp_scan_pci_devices(void);
+int clp_query_pci_fn(struct zpci_dev *zdev);
+int clp_enable_fh(struct zpci_dev *zdev, u32 *fh, u8 nr_dma_as);
+int clp_disable_fh(struct zpci_dev *zdev, u32 *fh);
+int clp_get_state(u32 fid, enum zpci_state *state);
+int clp_refresh_fh(u32 fid, u32 *fh);
+
+/* UID */
+void update_uid_checking(bool new);
+
+/* IOMMU Interface */
+int zpci_init_iommu(struct zpci_dev *zdev);
+void zpci_destroy_iommu(struct zpci_dev *zdev);
+
+#ifdef CONFIG_PCI
+static inline bool zpci_use_mio(struct zpci_dev *zdev)
+{
+	return static_branch_likely(&have_mio) && zdev->mio_capable;
+}
+
+/* Error handling and recovery */
+void zpci_event_error(void *);
+void zpci_event_availability(void *);
+bool zpci_is_enabled(void);
+#else /* CONFIG_PCI */
+static inline void zpci_event_error(void *e) {}
+static inline void zpci_event_availability(void *e) {}
+#endif /* CONFIG_PCI */
+
+#ifdef CONFIG_HOTPLUG_PCI_S390
+int zpci_init_slot(struct zpci_dev *);
+void zpci_exit_slot(struct zpci_dev *);
+#else /* CONFIG_HOTPLUG_PCI_S390 */
+static inline int zpci_init_slot(struct zpci_dev *zdev)
+{
+	return 0;
+}
+static inline void zpci_exit_slot(struct zpci_dev *zdev) {}
+#endif /* CONFIG_HOTPLUG_PCI_S390 */
+
+/* Helpers */
+static inline struct zpci_dev *to_zpci(struct pci_dev *pdev)
+{
+	struct zpci_bus *zbus = pdev->sysdata;
+
+	return zbus->function[pdev->devfn];
+}
+
+static inline struct zpci_dev *to_zpci_dev(struct device *dev)
+{
+	return to_zpci(to_pci_dev(dev));
+}
+
+struct zpci_dev *get_zdev_by_fid(u32);
+
+/* DMA */
+int zpci_dma_init(void);
+void zpci_dma_exit(void);
+int zpci_dma_init_device(struct zpci_dev *zdev);
+int zpci_dma_exit_device(struct zpci_dev *zdev);
+
+/* IRQ */
+int __init zpci_irq_init(void);
+void __init zpci_irq_exit(void);
+
+/* FMB */
+int zpci_fmb_enable_device(struct zpci_dev *);
+int zpci_fmb_disable_device(struct zpci_dev *);
+
+/* Debug */
+int zpci_debug_init(void);
+void zpci_debug_exit(void);
+void zpci_debug_init_device(struct zpci_dev *, const char *);
+void zpci_debug_exit_device(struct zpci_dev *);
+
+/* Error handling */
+int zpci_report_error(struct pci_dev *, struct zpci_report_error_header *);
+int zpci_clear_error_state(struct zpci_dev *zdev);
+int zpci_reset_load_store_blocked(struct zpci_dev *zdev);
+
+#ifdef CONFIG_NUMA
+
+/* Returns the node based on PCI bus */
+static inline int __pcibus_to_node(const struct pci_bus *bus)
+{
+	return NUMA_NO_NODE;
+}
+
+static inline const struct cpumask *
+cpumask_of_pcibus(const struct pci_bus *bus)
+{
+	return cpu_online_mask;
+}
+
+#endif /* CONFIG_NUMA */
+
+#endif
diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h
new file mode 100644
index 0000000000..d6189ed14f
--- /dev/null
+++ b/arch/s390/include/asm/pci_clp.h
@@ -0,0 +1,217 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_PCI_CLP_H
+#define _ASM_S390_PCI_CLP_H
+
+#include <asm/clp.h>
+
+/*
+ * Call Logical Processor - Command Codes
+ */
+#define CLP_SLPC		0x0001
+#define CLP_LIST_PCI		0x0002
+#define CLP_QUERY_PCI_FN	0x0003
+#define CLP_QUERY_PCI_FNGRP	0x0004
+#define CLP_SET_PCI_FN		0x0005
+
+/* PCI function handle list entry */
+struct clp_fh_list_entry {
+	u16 device_id;
+	u16 vendor_id;
+	u32 config_state :  1;
+	u32		 : 31;
+	u32 fid;		/* PCI function id */
+	u32 fh;			/* PCI function handle */
+} __packed;
+
+#define CLP_RC_SETPCIFN_FH	0x0101	/* Invalid PCI fn handle */
+#define CLP_RC_SETPCIFN_FHOP	0x0102	/* Fn handle not valid for op */
+#define CLP_RC_SETPCIFN_DMAAS	0x0103	/* Invalid DMA addr space */
+#define CLP_RC_SETPCIFN_RES	0x0104	/* Insufficient resources */
+#define CLP_RC_SETPCIFN_ALRDY	0x0105	/* Fn already in requested state */
+#define CLP_RC_SETPCIFN_ERR	0x0106	/* Fn in permanent error state */
+#define CLP_RC_SETPCIFN_RECPND	0x0107	/* Error recovery pending */
+#define CLP_RC_SETPCIFN_BUSY	0x0108	/* Fn busy */
+#define CLP_RC_LISTPCI_BADRT	0x010a	/* Resume token not recognized */
+#define CLP_RC_QUERYPCIFG_PFGID	0x010b	/* Unrecognized PFGID */
+
+/* request or response block header length */
+#define LIST_PCI_HDR_LEN	32
+
+/* Number of function handles fitting in response block */
+#define CLP_FH_LIST_NR_ENTRIES				\
+	((CLP_BLK_SIZE - 2 * LIST_PCI_HDR_LEN)		\
+		/ sizeof(struct clp_fh_list_entry))
+
+#define CLP_SET_ENABLE_PCI_FN	0	/* Yes, 0 enables it */
+#define CLP_SET_DISABLE_PCI_FN	1	/* Yes, 1 disables it */
+#define CLP_SET_ENABLE_MIO	2
+#define CLP_SET_DISABLE_MIO	3
+
+#define CLP_UTIL_STR_LEN	64
+#define CLP_PFIP_NR_SEGMENTS	4
+
+extern bool zpci_unique_uid;
+
+struct clp_rsp_slpc_pci {
+	struct clp_rsp_hdr hdr;
+	u32 reserved2[4];
+	u32 lpif[8];
+	u32 reserved3[4];
+	u32 vwb		:  1;
+	u32		:  1;
+	u32 mio_wb	:  6;
+	u32		: 24;
+	u32 reserved5[3];
+	u32 lpic[8];
+} __packed;
+
+/* List PCI functions request */
+struct clp_req_list_pci {
+	struct clp_req_hdr hdr;
+	u64 resume_token;
+	u64 reserved2;
+} __packed;
+
+/* List PCI functions response */
+struct clp_rsp_list_pci {
+	struct clp_rsp_hdr hdr;
+	u64 resume_token;
+	u32 reserved2;
+	u16 max_fn;
+	u8			: 7;
+	u8 uid_checking		: 1;
+	u8 entry_size;
+	struct clp_fh_list_entry fh_list[CLP_FH_LIST_NR_ENTRIES];
+} __packed;
+
+struct mio_info {
+	u32 valid : 6;
+	u32 : 26;
+	u32 : 32;
+	struct {
+		u64 wb;
+		u64 wt;
+	} addr[PCI_STD_NUM_BARS];
+	u32 reserved[6];
+} __packed;
+
+/* Query PCI function request */
+struct clp_req_query_pci {
+	struct clp_req_hdr hdr;
+	u32 fh;				/* function handle */
+	u32 reserved2;
+	u64 reserved3;
+} __packed;
+
+/* Query PCI function response */
+struct clp_rsp_query_pci {
+	struct clp_rsp_hdr hdr;
+	u16 vfn;			/* virtual fn number */
+	u16			:  3;
+	u16 rid_avail		:  1;
+	u16 is_physfn		:  1;
+	u16 reserved1		:  1;
+	u16 mio_addr_avail	:  1;
+	u16 util_str_avail	:  1;	/* utility string available? */
+	u16 pfgid		:  8;	/* pci function group id */
+	u32 fid;			/* pci function id */
+	u8 bar_size[PCI_STD_NUM_BARS];
+	u16 pchid;
+	__le32 bar[PCI_STD_NUM_BARS];
+	u8 pfip[CLP_PFIP_NR_SEGMENTS];	/* pci function internal path */
+	u16			: 12;
+	u16 port		:  4;
+	u8 fmb_len;
+	u8 pft;				/* pci function type */
+	u64 sdma;			/* start dma as */
+	u64 edma;			/* end dma as */
+#define ZPCI_RID_MASK_DEVFN 0x00ff
+	u16 rid;			/* BUS/DEVFN PCI address */
+	u16 reserved0;
+	u32 reserved[10];
+	u32 uid;			/* user defined id */
+	u8 util_str[CLP_UTIL_STR_LEN];	/* utility string */
+	u32 reserved2[16];
+	struct mio_info mio;
+} __packed;
+
+/* Query PCI function group request */
+struct clp_req_query_pci_grp {
+	struct clp_req_hdr hdr;
+	u32 reserved2		: 24;
+	u32 pfgid		:  8;	/* function group id */
+	u32 reserved3;
+	u64 reserved4;
+} __packed;
+
+/* Query PCI function group response */
+struct clp_rsp_query_pci_grp {
+	struct clp_rsp_hdr hdr;
+	u16			:  4;
+	u16 noi			: 12;	/* number of interrupts */
+	u8 version;
+	u8			:  6;
+	u8 frame		:  1;
+	u8 refresh		:  1;	/* TLB refresh mode */
+	u16			:  3;
+	u16 maxstbl		: 13;	/* Maximum store block size */
+	u16 mui;
+	u8 dtsm;			/* Supported DT mask */
+	u8 reserved3;
+	u16 maxfaal;
+	u16			:  4;
+	u16 dnoi		: 12;
+	u16 maxcpu;
+	u64 dasm;			/* dma address space mask */
+	u64 msia;			/* MSI address */
+	u64 reserved4;
+	u64 reserved5;
+} __packed;
+
+/* Set PCI function request */
+struct clp_req_set_pci {
+	struct clp_req_hdr hdr;
+	u32 fh;				/* function handle */
+	u16 reserved2;
+	u8 oc;				/* operation controls */
+	u8 ndas;			/* number of dma spaces */
+	u32 reserved3;
+	u32 gisa;			/* GISA designation */
+} __packed;
+
+/* Set PCI function response */
+struct clp_rsp_set_pci {
+	struct clp_rsp_hdr hdr;
+	u32 fh;				/* function handle */
+	u32 reserved1;
+	u64 reserved2;
+	struct mio_info mio;
+} __packed;
+
+/* Combined request/response block structures used by clp insn */
+struct clp_req_rsp_slpc_pci {
+	struct clp_req_slpc request;
+	struct clp_rsp_slpc_pci response;
+} __packed;
+
+struct clp_req_rsp_list_pci {
+	struct clp_req_list_pci request;
+	struct clp_rsp_list_pci response;
+} __packed;
+
+struct clp_req_rsp_set_pci {
+	struct clp_req_set_pci request;
+	struct clp_rsp_set_pci response;
+} __packed;
+
+struct clp_req_rsp_query_pci {
+	struct clp_req_query_pci request;
+	struct clp_rsp_query_pci response;
+} __packed;
+
+struct clp_req_rsp_query_pci_grp {
+	struct clp_req_query_pci_grp request;
+	struct clp_rsp_query_pci_grp response;
+} __packed;
+
+#endif
diff --git a/arch/s390/include/asm/pci_debug.h b/arch/s390/include/asm/pci_debug.h
new file mode 100644
index 0000000000..3bb4e7e33a
--- /dev/null
+++ b/arch/s390/include/asm/pci_debug.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _S390_ASM_PCI_DEBUG_H
+#define _S390_ASM_PCI_DEBUG_H
+
+#include <asm/debug.h>
+
+extern debug_info_t *pci_debug_msg_id;
+extern debug_info_t *pci_debug_err_id;
+
+#define zpci_dbg(imp, fmt, args...)				\
+	debug_sprintf_event(pci_debug_msg_id, imp, fmt, ##args)
+
+#define zpci_err(text...)							\
+	do {									\
+		char debug_buffer[16];						\
+		snprintf(debug_buffer, 16, text);				\
+		debug_text_event(pci_debug_err_id, 0, debug_buffer);		\
+	} while (0)
+
+static inline void zpci_err_hex_level(int level, void *addr, int len)
+{
+	debug_event(pci_debug_err_id, level, addr, len);
+}
+
+static inline void zpci_err_hex(void *addr, int len)
+{
+	zpci_err_hex_level(0, addr, len);
+}
+
+#endif
diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h
new file mode 100644
index 0000000000..7119c04c51
--- /dev/null
+++ b/arch/s390/include/asm/pci_dma.h
@@ -0,0 +1,198 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_PCI_DMA_H
+#define _ASM_S390_PCI_DMA_H
+
+/* I/O Translation Anchor (IOTA) */
+enum zpci_ioat_dtype {
+	ZPCI_IOTA_STO = 0,
+	ZPCI_IOTA_RTTO = 1,
+	ZPCI_IOTA_RSTO = 2,
+	ZPCI_IOTA_RFTO = 3,
+	ZPCI_IOTA_PFAA = 4,
+	ZPCI_IOTA_IOPFAA = 5,
+	ZPCI_IOTA_IOPTO = 7
+};
+
+#define ZPCI_IOTA_IOT_ENABLED		0x800UL
+#define ZPCI_IOTA_DT_ST			(ZPCI_IOTA_STO	<< 2)
+#define ZPCI_IOTA_DT_RT			(ZPCI_IOTA_RTTO << 2)
+#define ZPCI_IOTA_DT_RS			(ZPCI_IOTA_RSTO << 2)
+#define ZPCI_IOTA_DT_RF			(ZPCI_IOTA_RFTO << 2)
+#define ZPCI_IOTA_DT_PF			(ZPCI_IOTA_PFAA << 2)
+#define ZPCI_IOTA_FS_4K			0
+#define ZPCI_IOTA_FS_1M			1
+#define ZPCI_IOTA_FS_2G			2
+#define ZPCI_KEY			(PAGE_DEFAULT_KEY << 5)
+
+#define ZPCI_TABLE_SIZE_RT	(1UL << 42)
+
+#define ZPCI_IOTA_STO_FLAG	(ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_ST)
+#define ZPCI_IOTA_RTTO_FLAG	(ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RT)
+#define ZPCI_IOTA_RSTO_FLAG	(ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RS)
+#define ZPCI_IOTA_RFTO_FLAG	(ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RF)
+#define ZPCI_IOTA_RFAA_FLAG	(ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_PF | ZPCI_IOTA_FS_2G)
+
+/* I/O Region and segment tables */
+#define ZPCI_INDEX_MASK			0x7ffUL
+
+#define ZPCI_TABLE_TYPE_MASK		0xc
+#define ZPCI_TABLE_TYPE_RFX		0xc
+#define ZPCI_TABLE_TYPE_RSX		0x8
+#define ZPCI_TABLE_TYPE_RTX		0x4
+#define ZPCI_TABLE_TYPE_SX		0x0
+
+#define ZPCI_TABLE_LEN_RFX		0x3
+#define ZPCI_TABLE_LEN_RSX		0x3
+#define ZPCI_TABLE_LEN_RTX		0x3
+
+#define ZPCI_TABLE_OFFSET_MASK		0xc0
+#define ZPCI_TABLE_SIZE			0x4000
+#define ZPCI_TABLE_ALIGN		ZPCI_TABLE_SIZE
+#define ZPCI_TABLE_ENTRY_SIZE		(sizeof(unsigned long))
+#define ZPCI_TABLE_ENTRIES		(ZPCI_TABLE_SIZE / ZPCI_TABLE_ENTRY_SIZE)
+
+#define ZPCI_TABLE_BITS			11
+#define ZPCI_PT_BITS			8
+#define ZPCI_ST_SHIFT			(ZPCI_PT_BITS + PAGE_SHIFT)
+#define ZPCI_RT_SHIFT			(ZPCI_ST_SHIFT + ZPCI_TABLE_BITS)
+
+#define ZPCI_RTE_FLAG_MASK		0x3fffUL
+#define ZPCI_RTE_ADDR_MASK		(~ZPCI_RTE_FLAG_MASK)
+#define ZPCI_STE_FLAG_MASK		0x7ffUL
+#define ZPCI_STE_ADDR_MASK		(~ZPCI_STE_FLAG_MASK)
+
+/* I/O Page tables */
+#define ZPCI_PTE_VALID_MASK		0x400
+#define ZPCI_PTE_INVALID		0x400
+#define ZPCI_PTE_VALID			0x000
+#define ZPCI_PT_SIZE			0x800
+#define ZPCI_PT_ALIGN			ZPCI_PT_SIZE
+#define ZPCI_PT_ENTRIES			(ZPCI_PT_SIZE / ZPCI_TABLE_ENTRY_SIZE)
+#define ZPCI_PT_MASK			(ZPCI_PT_ENTRIES - 1)
+
+#define ZPCI_PTE_FLAG_MASK		0xfffUL
+#define ZPCI_PTE_ADDR_MASK		(~ZPCI_PTE_FLAG_MASK)
+
+/* Shared bits */
+#define ZPCI_TABLE_VALID		0x00
+#define ZPCI_TABLE_INVALID		0x20
+#define ZPCI_TABLE_PROTECTED		0x200
+#define ZPCI_TABLE_UNPROTECTED		0x000
+
+#define ZPCI_TABLE_VALID_MASK		0x20
+#define ZPCI_TABLE_PROT_MASK		0x200
+
+static inline unsigned int calc_rtx(dma_addr_t ptr)
+{
+	return ((unsigned long) ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK;
+}
+
+static inline unsigned int calc_sx(dma_addr_t ptr)
+{
+	return ((unsigned long) ptr >> ZPCI_ST_SHIFT) & ZPCI_INDEX_MASK;
+}
+
+static inline unsigned int calc_px(dma_addr_t ptr)
+{
+	return ((unsigned long) ptr >> PAGE_SHIFT) & ZPCI_PT_MASK;
+}
+
+static inline void set_pt_pfaa(unsigned long *entry, phys_addr_t pfaa)
+{
+	*entry &= ZPCI_PTE_FLAG_MASK;
+	*entry |= (pfaa & ZPCI_PTE_ADDR_MASK);
+}
+
+static inline void set_rt_sto(unsigned long *entry, phys_addr_t sto)
+{
+	*entry &= ZPCI_RTE_FLAG_MASK;
+	*entry |= (sto & ZPCI_RTE_ADDR_MASK);
+	*entry |= ZPCI_TABLE_TYPE_RTX;
+}
+
+static inline void set_st_pto(unsigned long *entry, phys_addr_t pto)
+{
+	*entry &= ZPCI_STE_FLAG_MASK;
+	*entry |= (pto & ZPCI_STE_ADDR_MASK);
+	*entry |= ZPCI_TABLE_TYPE_SX;
+}
+
+static inline void validate_rt_entry(unsigned long *entry)
+{
+	*entry &= ~ZPCI_TABLE_VALID_MASK;
+	*entry &= ~ZPCI_TABLE_OFFSET_MASK;
+	*entry |= ZPCI_TABLE_VALID;
+	*entry |= ZPCI_TABLE_LEN_RTX;
+}
+
+static inline void validate_st_entry(unsigned long *entry)
+{
+	*entry &= ~ZPCI_TABLE_VALID_MASK;
+	*entry |= ZPCI_TABLE_VALID;
+}
+
+static inline void invalidate_pt_entry(unsigned long *entry)
+{
+	WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_INVALID);
+	*entry &= ~ZPCI_PTE_VALID_MASK;
+	*entry |= ZPCI_PTE_INVALID;
+}
+
+static inline void validate_pt_entry(unsigned long *entry)
+{
+	WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID);
+	*entry &= ~ZPCI_PTE_VALID_MASK;
+	*entry |= ZPCI_PTE_VALID;
+}
+
+static inline void entry_set_protected(unsigned long *entry)
+{
+	*entry &= ~ZPCI_TABLE_PROT_MASK;
+	*entry |= ZPCI_TABLE_PROTECTED;
+}
+
+static inline void entry_clr_protected(unsigned long *entry)
+{
+	*entry &= ~ZPCI_TABLE_PROT_MASK;
+	*entry |= ZPCI_TABLE_UNPROTECTED;
+}
+
+static inline int reg_entry_isvalid(unsigned long entry)
+{
+	return (entry & ZPCI_TABLE_VALID_MASK) == ZPCI_TABLE_VALID;
+}
+
+static inline int pt_entry_isvalid(unsigned long entry)
+{
+	return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID;
+}
+
+static inline unsigned long *get_rt_sto(unsigned long entry)
+{
+	if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX)
+		return phys_to_virt(entry & ZPCI_RTE_ADDR_MASK);
+	else
+		return NULL;
+
+}
+
+static inline unsigned long *get_st_pto(unsigned long entry)
+{
+	if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_SX)
+		return phys_to_virt(entry & ZPCI_STE_ADDR_MASK);
+	else
+		return NULL;
+}
+
+/* Prototypes */
+void dma_free_seg_table(unsigned long);
+unsigned long *dma_alloc_cpu_table(gfp_t gfp);
+void dma_cleanup_tables(unsigned long *);
+unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr,
+				  gfp_t gfp);
+void dma_update_cpu_trans(unsigned long *entry, phys_addr_t page_addr, int flags);
+
+extern const struct dma_map_ops s390_pci_dma_ops;
+
+
+#endif
diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h
new file mode 100644
index 0000000000..e5f57cfe1d
--- /dev/null
+++ b/arch/s390/include/asm/pci_insn.h
@@ -0,0 +1,159 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_PCI_INSN_H
+#define _ASM_S390_PCI_INSN_H
+
+#include <linux/jump_label.h>
+
+/* Load/Store status codes */
+#define ZPCI_PCI_ST_FUNC_NOT_ENABLED		4
+#define ZPCI_PCI_ST_FUNC_IN_ERR			8
+#define ZPCI_PCI_ST_BLOCKED			12
+#define ZPCI_PCI_ST_INSUF_RES			16
+#define ZPCI_PCI_ST_INVAL_AS			20
+#define ZPCI_PCI_ST_FUNC_ALREADY_ENABLED	24
+#define ZPCI_PCI_ST_DMA_AS_NOT_ENABLED		28
+#define ZPCI_PCI_ST_2ND_OP_IN_INV_AS		36
+#define ZPCI_PCI_ST_FUNC_NOT_AVAIL		40
+#define ZPCI_PCI_ST_ALREADY_IN_RQ_STATE		44
+
+/* Load/Store return codes */
+#define ZPCI_PCI_LS_OK				0
+#define ZPCI_PCI_LS_ERR				1
+#define ZPCI_PCI_LS_BUSY			2
+#define ZPCI_PCI_LS_INVAL_HANDLE		3
+
+/* Load/Store address space identifiers */
+#define ZPCI_PCIAS_MEMIO_0			0
+#define ZPCI_PCIAS_MEMIO_1			1
+#define ZPCI_PCIAS_MEMIO_2			2
+#define ZPCI_PCIAS_MEMIO_3			3
+#define ZPCI_PCIAS_MEMIO_4			4
+#define ZPCI_PCIAS_MEMIO_5			5
+#define ZPCI_PCIAS_CFGSPC			15
+
+/* Modify PCI Function Controls */
+#define ZPCI_MOD_FC_REG_INT	2
+#define ZPCI_MOD_FC_DEREG_INT	3
+#define ZPCI_MOD_FC_REG_IOAT	4
+#define ZPCI_MOD_FC_DEREG_IOAT	5
+#define ZPCI_MOD_FC_REREG_IOAT	6
+#define ZPCI_MOD_FC_RESET_ERROR	7
+#define ZPCI_MOD_FC_RESET_BLOCK	9
+#define ZPCI_MOD_FC_SET_MEASURE	10
+#define ZPCI_MOD_FC_REG_INT_D	16
+#define ZPCI_MOD_FC_DEREG_INT_D	17
+
+/* FIB function controls */
+#define ZPCI_FIB_FC_ENABLED	0x80
+#define ZPCI_FIB_FC_ERROR	0x40
+#define ZPCI_FIB_FC_LS_BLOCKED	0x20
+#define ZPCI_FIB_FC_DMAAS_REG	0x10
+
+/* FIB function controls */
+#define ZPCI_FIB_FC_ENABLED	0x80
+#define ZPCI_FIB_FC_ERROR	0x40
+#define ZPCI_FIB_FC_LS_BLOCKED	0x20
+#define ZPCI_FIB_FC_DMAAS_REG	0x10
+
+struct zpci_fib_fmt0 {
+	u32		:  1;
+	u32 isc		:  3;	/* Interrupt subclass */
+	u32 noi		: 12;	/* Number of interrupts */
+	u32		:  2;
+	u32 aibvo	:  6;	/* Adapter interrupt bit vector offset */
+	u32 sum		:  1;	/* Adapter int summary bit enabled */
+	u32		:  1;
+	u32 aisbo	:  6;	/* Adapter int summary bit offset */
+	u32		: 32;
+	u64 aibv;		/* Adapter int bit vector address */
+	u64 aisb;		/* Adapter int summary bit address */
+};
+
+struct zpci_fib_fmt1 {
+	u32		:  4;
+	u32 noi		: 12;
+	u32		: 16;
+	u32 dibvo	: 16;
+	u32		: 16;
+	u64		: 64;
+	u64		: 64;
+};
+
+/* Function Information Block */
+struct zpci_fib {
+	u32 fmt		:  8;	/* format */
+	u32		: 24;
+	u32		: 32;
+	u8 fc;			/* function controls */
+	u64		: 56;
+	u64 pba;		/* PCI base address */
+	u64 pal;		/* PCI address limit */
+	u64 iota;		/* I/O Translation Anchor */
+	union {
+		struct zpci_fib_fmt0 fmt0;
+		struct zpci_fib_fmt1 fmt1;
+	};
+	u64 fmb_addr;		/* Function measurement block address and key */
+	u32		: 32;
+	u32 gd;
+} __packed __aligned(8);
+
+/* Set Interruption Controls Operation Controls  */
+#define	SIC_IRQ_MODE_ALL		0
+#define	SIC_IRQ_MODE_SINGLE		1
+#define	SIC_SET_AENI_CONTROLS		2
+#define	SIC_IRQ_MODE_DIRECT		4
+#define	SIC_IRQ_MODE_D_ALL		16
+#define	SIC_IRQ_MODE_D_SINGLE		17
+#define	SIC_IRQ_MODE_SET_CPU		18
+
+/* directed interruption information block */
+struct zpci_diib {
+	u32 : 1;
+	u32 isc : 3;
+	u32 : 28;
+	u16 : 16;
+	u16 nr_cpus;
+	u64 disb_addr;
+	u64 : 64;
+	u64 : 64;
+} __packed __aligned(8);
+
+/* cpu directed interruption information block */
+struct zpci_cdiib {
+	u64 : 64;
+	u64 dibv_addr;
+	u64 : 64;
+	u64 : 64;
+	u64 : 64;
+} __packed __aligned(8);
+
+/* adapter interruption parameters block */
+struct zpci_aipb {
+	u64 faisb;
+	u64 gait;
+	u16 : 13;
+	u16 afi : 3;
+	u32 : 32;
+	u16 faal;
+} __packed __aligned(8);
+
+union zpci_sic_iib {
+	struct zpci_diib diib;
+	struct zpci_cdiib cdiib;
+	struct zpci_aipb aipb;
+};
+
+DECLARE_STATIC_KEY_FALSE(have_mio);
+
+u8 zpci_mod_fc(u64 req, struct zpci_fib *fib, u8 *status);
+int zpci_refresh_trans(u64 fn, u64 addr, u64 range);
+int __zpci_load(u64 *data, u64 req, u64 offset);
+int zpci_load(u64 *data, const volatile void __iomem *addr, unsigned long len);
+int __zpci_store(u64 data, u64 req, u64 offset);
+int zpci_store(const volatile void __iomem *addr, u64 data, unsigned long len);
+int __zpci_store_block(const u64 *data, u64 req, u64 offset);
+void zpci_barrier(void);
+int zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib);
+
+#endif
diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h
new file mode 100644
index 0000000000..2686bee800
--- /dev/null
+++ b/arch/s390/include/asm/pci_io.h
@@ -0,0 +1,206 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_PCI_IO_H
+#define _ASM_S390_PCI_IO_H
+
+#ifdef CONFIG_PCI
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <asm/pci_insn.h>
+
+/* I/O size constraints */
+#define ZPCI_MAX_READ_SIZE	8
+#define ZPCI_MAX_WRITE_SIZE	128
+#define ZPCI_BOUNDARY_SIZE	(1 << 12)
+#define ZPCI_BOUNDARY_MASK	(ZPCI_BOUNDARY_SIZE - 1)
+
+/* I/O Map */
+#define ZPCI_IOMAP_SHIFT		48
+#define ZPCI_IOMAP_ADDR_SHIFT		62
+#define ZPCI_IOMAP_ADDR_BASE		(1UL << ZPCI_IOMAP_ADDR_SHIFT)
+#define ZPCI_IOMAP_ADDR_OFF_MASK	((1UL << ZPCI_IOMAP_SHIFT) - 1)
+#define ZPCI_IOMAP_MAX_ENTRIES							\
+	(1UL << (ZPCI_IOMAP_ADDR_SHIFT - ZPCI_IOMAP_SHIFT))
+#define ZPCI_IOMAP_ADDR_IDX_MASK						\
+	((ZPCI_IOMAP_ADDR_BASE - 1) & ~ZPCI_IOMAP_ADDR_OFF_MASK)
+
+struct zpci_iomap_entry {
+	u32 fh;
+	u8 bar;
+	u16 count;
+};
+
+extern struct zpci_iomap_entry *zpci_iomap_start;
+
+#define ZPCI_ADDR(idx) (ZPCI_IOMAP_ADDR_BASE | ((u64) idx << ZPCI_IOMAP_SHIFT))
+#define ZPCI_IDX(addr)								\
+	(((__force u64) addr & ZPCI_IOMAP_ADDR_IDX_MASK) >> ZPCI_IOMAP_SHIFT)
+#define ZPCI_OFFSET(addr)							\
+	((__force u64) addr & ZPCI_IOMAP_ADDR_OFF_MASK)
+
+#define ZPCI_CREATE_REQ(handle, space, len)					\
+	((u64) handle << 32 | space << 16 | len)
+
+#define zpci_read(LENGTH, RETTYPE)						\
+static inline RETTYPE zpci_read_##RETTYPE(const volatile void __iomem *addr)	\
+{										\
+	u64 data;								\
+	int rc;									\
+										\
+	rc = zpci_load(&data, addr, LENGTH);					\
+	if (rc)									\
+		data = -1ULL;							\
+	return (RETTYPE) data;							\
+}
+
+#define zpci_write(LENGTH, VALTYPE)						\
+static inline void zpci_write_##VALTYPE(VALTYPE val,				\
+					const volatile void __iomem *addr)	\
+{										\
+	u64 data = (VALTYPE) val;						\
+										\
+	zpci_store(addr, data, LENGTH);						\
+}
+
+zpci_read(8, u64)
+zpci_read(4, u32)
+zpci_read(2, u16)
+zpci_read(1, u8)
+zpci_write(8, u64)
+zpci_write(4, u32)
+zpci_write(2, u16)
+zpci_write(1, u8)
+
+static inline int zpci_write_single(volatile void __iomem *dst, const void *src,
+				    unsigned long len)
+{
+	u64 val;
+
+	switch (len) {
+	case 1:
+		val = (u64) *((u8 *) src);
+		break;
+	case 2:
+		val = (u64) *((u16 *) src);
+		break;
+	case 4:
+		val = (u64) *((u32 *) src);
+		break;
+	case 8:
+		val = (u64) *((u64 *) src);
+		break;
+	default:
+		val = 0;		/* let FW report error */
+		break;
+	}
+	return zpci_store(dst, val, len);
+}
+
+static inline int zpci_read_single(void *dst, const volatile void __iomem *src,
+				   unsigned long len)
+{
+	u64 data;
+	int cc;
+
+	cc = zpci_load(&data, src, len);
+	if (cc)
+		goto out;
+
+	switch (len) {
+	case 1:
+		*((u8 *) dst) = (u8) data;
+		break;
+	case 2:
+		*((u16 *) dst) = (u16) data;
+		break;
+	case 4:
+		*((u32 *) dst) = (u32) data;
+		break;
+	case 8:
+		*((u64 *) dst) = (u64) data;
+		break;
+	}
+out:
+	return cc;
+}
+
+int zpci_write_block(volatile void __iomem *dst, const void *src,
+		     unsigned long len);
+
+static inline int zpci_get_max_io_size(u64 src, u64 dst, int len, int max)
+{
+	int offset = dst & ZPCI_BOUNDARY_MASK;
+	int size;
+
+	size = min3(len, ZPCI_BOUNDARY_SIZE - offset, max);
+	if (IS_ALIGNED(src, 8) && IS_ALIGNED(dst, 8) && IS_ALIGNED(size, 8))
+		return size;
+
+	if (size >= 8)
+		return 8;
+	return rounddown_pow_of_two(size);
+}
+
+static inline int zpci_memcpy_fromio(void *dst,
+				     const volatile void __iomem *src,
+				     unsigned long n)
+{
+	int size, rc = 0;
+
+	while (n > 0) {
+		size = zpci_get_max_io_size((u64 __force) src,
+					    (u64) dst, n,
+					    ZPCI_MAX_READ_SIZE);
+		rc = zpci_read_single(dst, src, size);
+		if (rc)
+			break;
+		src += size;
+		dst += size;
+		n -= size;
+	}
+	return rc;
+}
+
+static inline int zpci_memcpy_toio(volatile void __iomem *dst,
+				   const void *src, unsigned long n)
+{
+	int size, rc = 0;
+
+	if (!src)
+		return -EINVAL;
+
+	while (n > 0) {
+		size = zpci_get_max_io_size((u64 __force) dst,
+					    (u64) src, n,
+					    ZPCI_MAX_WRITE_SIZE);
+		if (size > 8) /* main path */
+			rc = zpci_write_block(dst, src, size);
+		else
+			rc = zpci_write_single(dst, src, size);
+		if (rc)
+			break;
+		src += size;
+		dst += size;
+		n -= size;
+	}
+	return rc;
+}
+
+static inline int zpci_memset_io(volatile void __iomem *dst,
+				 unsigned char val, size_t count)
+{
+	u8 *src = kmalloc(count, GFP_KERNEL);
+	int rc;
+
+	if (src == NULL)
+		return -ENOMEM;
+	memset(src, val, count);
+
+	rc = zpci_memcpy_toio(dst, src, count);
+	kfree(src);
+	return rc;
+}
+
+#endif /* CONFIG_PCI */
+
+#endif /* _ASM_S390_PCI_IO_H */
diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h
new file mode 100644
index 0000000000..264095dd84
--- /dev/null
+++ b/arch/s390/include/asm/percpu.h
@@ -0,0 +1,185 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ARCH_S390_PERCPU__
+#define __ARCH_S390_PERCPU__
+
+#include <linux/preempt.h>
+#include <asm/cmpxchg.h>
+
+/*
+ * s390 uses its own implementation for per cpu data, the offset of
+ * the cpu local data area is cached in the cpu's lowcore memory.
+ */
+#define __my_cpu_offset S390_lowcore.percpu_offset
+
+/*
+ * For 64 bit module code, the module may be more than 4G above the
+ * per cpu area, use weak definitions to force the compiler to
+ * generate external references.
+ */
+#if defined(MODULE)
+#define ARCH_NEEDS_WEAK_PER_CPU
+#endif
+
+/*
+ * We use a compare-and-swap loop since that uses less cpu cycles than
+ * disabling and enabling interrupts like the generic variant would do.
+ */
+#define arch_this_cpu_to_op_simple(pcp, val, op)			\
+({									\
+	typedef typeof(pcp) pcp_op_T__;					\
+	pcp_op_T__ old__, new__, prev__;				\
+	pcp_op_T__ *ptr__;						\
+	preempt_disable_notrace();					\
+	ptr__ = raw_cpu_ptr(&(pcp));					\
+	prev__ = READ_ONCE(*ptr__);					\
+	do {								\
+		old__ = prev__;						\
+		new__ = old__ op (val);					\
+		prev__ = cmpxchg(ptr__, old__, new__);			\
+	} while (prev__ != old__);					\
+	preempt_enable_notrace();					\
+	new__;								\
+})
+
+#define this_cpu_add_1(pcp, val)	arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_add_2(pcp, val)	arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_add_return_1(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_add_return_2(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_and_1(pcp, val)	arch_this_cpu_to_op_simple(pcp, val, &)
+#define this_cpu_and_2(pcp, val)	arch_this_cpu_to_op_simple(pcp, val, &)
+#define this_cpu_or_1(pcp, val)		arch_this_cpu_to_op_simple(pcp, val, |)
+#define this_cpu_or_2(pcp, val)		arch_this_cpu_to_op_simple(pcp, val, |)
+
+#ifndef CONFIG_HAVE_MARCH_Z196_FEATURES
+
+#define this_cpu_add_4(pcp, val)	arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_add_8(pcp, val)	arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_add_return_4(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_add_return_8(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_and_4(pcp, val)	arch_this_cpu_to_op_simple(pcp, val, &)
+#define this_cpu_and_8(pcp, val)	arch_this_cpu_to_op_simple(pcp, val, &)
+#define this_cpu_or_4(pcp, val)		arch_this_cpu_to_op_simple(pcp, val, |)
+#define this_cpu_or_8(pcp, val)		arch_this_cpu_to_op_simple(pcp, val, |)
+
+#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+#define arch_this_cpu_add(pcp, val, op1, op2, szcast)			\
+{									\
+	typedef typeof(pcp) pcp_op_T__; 				\
+	pcp_op_T__ val__ = (val);					\
+	pcp_op_T__ old__, *ptr__;					\
+	preempt_disable_notrace();					\
+	ptr__ = raw_cpu_ptr(&(pcp)); 				\
+	if (__builtin_constant_p(val__) &&				\
+	    ((szcast)val__ > -129) && ((szcast)val__ < 128)) {		\
+		asm volatile(						\
+			op2 "   %[ptr__],%[val__]\n"			\
+			: [ptr__] "+Q" (*ptr__) 			\
+			: [val__] "i" ((szcast)val__)			\
+			: "cc");					\
+	} else {							\
+		asm volatile(						\
+			op1 "   %[old__],%[val__],%[ptr__]\n"		\
+			: [old__] "=d" (old__), [ptr__] "+Q" (*ptr__)	\
+			: [val__] "d" (val__)				\
+			: "cc");					\
+	}								\
+	preempt_enable_notrace();					\
+}
+
+#define this_cpu_add_4(pcp, val) arch_this_cpu_add(pcp, val, "laa", "asi", int)
+#define this_cpu_add_8(pcp, val) arch_this_cpu_add(pcp, val, "laag", "agsi", long)
+
+#define arch_this_cpu_add_return(pcp, val, op)				\
+({									\
+	typedef typeof(pcp) pcp_op_T__; 				\
+	pcp_op_T__ val__ = (val);					\
+	pcp_op_T__ old__, *ptr__;					\
+	preempt_disable_notrace();					\
+	ptr__ = raw_cpu_ptr(&(pcp));	 				\
+	asm volatile(							\
+		op "    %[old__],%[val__],%[ptr__]\n"			\
+		: [old__] "=d" (old__), [ptr__] "+Q" (*ptr__)		\
+		: [val__] "d" (val__)					\
+		: "cc");						\
+	preempt_enable_notrace();						\
+	old__ + val__;							\
+})
+
+#define this_cpu_add_return_4(pcp, val) arch_this_cpu_add_return(pcp, val, "laa")
+#define this_cpu_add_return_8(pcp, val) arch_this_cpu_add_return(pcp, val, "laag")
+
+#define arch_this_cpu_to_op(pcp, val, op)				\
+{									\
+	typedef typeof(pcp) pcp_op_T__; 				\
+	pcp_op_T__ val__ = (val);					\
+	pcp_op_T__ old__, *ptr__;					\
+	preempt_disable_notrace();					\
+	ptr__ = raw_cpu_ptr(&(pcp));	 				\
+	asm volatile(							\
+		op "    %[old__],%[val__],%[ptr__]\n"			\
+		: [old__] "=d" (old__), [ptr__] "+Q" (*ptr__)		\
+		: [val__] "d" (val__)					\
+		: "cc");						\
+	preempt_enable_notrace();					\
+}
+
+#define this_cpu_and_4(pcp, val)	arch_this_cpu_to_op(pcp, val, "lan")
+#define this_cpu_and_8(pcp, val)	arch_this_cpu_to_op(pcp, val, "lang")
+#define this_cpu_or_4(pcp, val)		arch_this_cpu_to_op(pcp, val, "lao")
+#define this_cpu_or_8(pcp, val)		arch_this_cpu_to_op(pcp, val, "laog")
+
+#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+#define arch_this_cpu_cmpxchg(pcp, oval, nval)				\
+({									\
+	typedef typeof(pcp) pcp_op_T__;					\
+	pcp_op_T__ ret__;						\
+	pcp_op_T__ *ptr__;						\
+	preempt_disable_notrace();					\
+	ptr__ = raw_cpu_ptr(&(pcp));					\
+	ret__ = cmpxchg(ptr__, oval, nval);				\
+	preempt_enable_notrace();					\
+	ret__;								\
+})
+
+#define this_cpu_cmpxchg_1(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval)
+#define this_cpu_cmpxchg_2(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval)
+#define this_cpu_cmpxchg_4(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval)
+#define this_cpu_cmpxchg_8(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval)
+
+#define this_cpu_cmpxchg64(pcp, o, n)	this_cpu_cmpxchg_8(pcp, o, n)
+
+#define this_cpu_cmpxchg128(pcp, oval, nval)				\
+({									\
+	typedef typeof(pcp) pcp_op_T__;					\
+	u128 old__, new__, ret__;					\
+	pcp_op_T__ *ptr__;						\
+	old__ = oval;							\
+	new__ = nval;							\
+	preempt_disable_notrace();					\
+	ptr__ = raw_cpu_ptr(&(pcp));					\
+	ret__ = cmpxchg128((void *)ptr__, old__, new__);		\
+	preempt_enable_notrace();					\
+	ret__;								\
+})
+
+#define arch_this_cpu_xchg(pcp, nval)					\
+({									\
+	typeof(pcp) *ptr__;						\
+	typeof(pcp) ret__;						\
+	preempt_disable_notrace();					\
+	ptr__ = raw_cpu_ptr(&(pcp));					\
+	ret__ = xchg(ptr__, nval);					\
+	preempt_enable_notrace();					\
+	ret__;								\
+})
+
+#define this_cpu_xchg_1(pcp, nval) arch_this_cpu_xchg(pcp, nval)
+#define this_cpu_xchg_2(pcp, nval) arch_this_cpu_xchg(pcp, nval)
+#define this_cpu_xchg_4(pcp, nval) arch_this_cpu_xchg(pcp, nval)
+#define this_cpu_xchg_8(pcp, nval) arch_this_cpu_xchg(pcp, nval)
+
+#include <asm-generic/percpu.h>
+
+#endif /* __ARCH_S390_PERCPU__ */
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
new file mode 100644
index 0000000000..9917e2717b
--- /dev/null
+++ b/arch/s390/include/asm/perf_event.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Performance event support - s390 specific definitions.
+ *
+ * Copyright IBM Corp. 2009, 2017
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *	      Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef _ASM_S390_PERF_EVENT_H
+#define _ASM_S390_PERF_EVENT_H
+
+#include <linux/perf_event.h>
+#include <linux/device.h>
+#include <asm/stacktrace.h>
+
+/* Per-CPU flags for PMU states */
+#define PMU_F_RESERVED			0x1000
+#define PMU_F_ENABLED			0x2000
+#define PMU_F_IN_USE			0x4000
+#define PMU_F_ERR_IBE			0x0100
+#define PMU_F_ERR_LSDA			0x0200
+#define PMU_F_ERR_MASK			(PMU_F_ERR_IBE|PMU_F_ERR_LSDA)
+
+/* Perf definitions for PMU event attributes in sysfs */
+extern __init const struct attribute_group **cpumf_cf_event_group(void);
+extern ssize_t cpumf_events_sysfs_show(struct device *dev,
+				       struct device_attribute *attr,
+				       char *page);
+#define EVENT_VAR(_cat, _name)		event_attr_##_cat##_##_name
+#define EVENT_PTR(_cat, _name)		(&EVENT_VAR(_cat, _name).attr.attr)
+
+#define CPUMF_EVENT_ATTR(cat, name, id)			\
+	PMU_EVENT_ATTR(name, EVENT_VAR(cat, name), id, cpumf_events_sysfs_show)
+#define CPUMF_EVENT_PTR(cat, name)	EVENT_PTR(cat, name)
+
+
+/* Perf callbacks */
+struct pt_regs;
+extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
+extern unsigned long perf_misc_flags(struct pt_regs *regs);
+#define perf_misc_flags(regs) perf_misc_flags(regs)
+#define perf_arch_bpf_user_pt_regs(regs) &regs->user_regs
+
+/* Perf pt_regs extension for sample-data-entry indicators */
+struct perf_sf_sde_regs {
+	unsigned char in_guest:1;	  /* guest sample */
+	unsigned long reserved:63;	  /* reserved */
+};
+
+/* Perf PMU definitions for the counter facility */
+#define PERF_CPUM_CF_MAX_CTR		0xffffUL  /* Max ctr for ECCTR */
+
+/* Perf PMU definitions for the sampling facility */
+#define PERF_CPUM_SF_MAX_CTR		2
+#define PERF_EVENT_CPUM_SF		0xB0000UL /* Event: Basic-sampling */
+#define PERF_EVENT_CPUM_SF_DIAG		0xBD000UL /* Event: Combined-sampling */
+#define PERF_EVENT_CPUM_CF_DIAG		0xBC000UL /* Event: Counter sets */
+#define PERF_CPUM_SF_BASIC_MODE		0x0001	  /* Basic-sampling flag */
+#define PERF_CPUM_SF_DIAG_MODE		0x0002	  /* Diagnostic-sampling flag */
+#define PERF_CPUM_SF_MODE_MASK		(PERF_CPUM_SF_BASIC_MODE| \
+					 PERF_CPUM_SF_DIAG_MODE)
+#define PERF_CPUM_SF_FREQ_MODE		0x0008	  /* Sampling with frequency */
+
+#define REG_NONE		0
+#define REG_OVERFLOW		1
+#define OVERFLOW_REG(hwc)	((hwc)->extra_reg.config)
+#define SFB_ALLOC_REG(hwc)	((hwc)->extra_reg.alloc)
+#define TEAR_REG(hwc)		((hwc)->last_tag)
+#define SAMPL_RATE(hwc)		((hwc)->event_base)
+#define SAMPL_FLAGS(hwc)	((hwc)->config_base)
+#define SAMPL_DIAG_MODE(hwc)	(SAMPL_FLAGS(hwc) & PERF_CPUM_SF_DIAG_MODE)
+#define SAMPLE_FREQ_MODE(hwc)	(SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FREQ_MODE)
+
+#define perf_arch_fetch_caller_regs(regs, __ip) do {			\
+	(regs)->psw.addr = (__ip);					\
+	(regs)->gprs[15] = (unsigned long)__builtin_frame_address(0) -	\
+		offsetof(struct stack_frame, back_chain);		\
+} while (0)
+
+#endif /* _ASM_S390_PERF_EVENT_H */
diff --git a/arch/s390/include/asm/pfault.h b/arch/s390/include/asm/pfault.h
new file mode 100644
index 0000000000..a1bee4a1e4
--- /dev/null
+++ b/arch/s390/include/asm/pfault.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *    Copyright IBM Corp. 1999, 2023
+ */
+#ifndef _ASM_S390_PFAULT_H
+#define _ASM_S390_PFAULT_H
+
+#include <linux/errno.h>
+
+int __pfault_init(void);
+void __pfault_fini(void);
+
+static inline int pfault_init(void)
+{
+	if (IS_ENABLED(CONFIG_PFAULT))
+		return __pfault_init();
+	return -EOPNOTSUPP;
+}
+
+static inline void pfault_fini(void)
+{
+	if (IS_ENABLED(CONFIG_PFAULT))
+		__pfault_fini();
+}
+
+#endif /* _ASM_S390_PFAULT_H */
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
new file mode 100644
index 0000000000..376b4b23bd
--- /dev/null
+++ b/arch/s390/include/asm/pgalloc.h
@@ -0,0 +1,157 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999, 2000
+ *    Author(s): Hartmut Penner (hp@de.ibm.com)
+ *               Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ *  Derived from "include/asm-i386/pgalloc.h"
+ *    Copyright (C) 1994  Linus Torvalds
+ */
+
+#ifndef _S390_PGALLOC_H
+#define _S390_PGALLOC_H
+
+#include <linux/threads.h>
+#include <linux/string.h>
+#include <linux/gfp.h>
+#include <linux/mm.h>
+
+#define CRST_ALLOC_ORDER 2
+
+unsigned long *crst_table_alloc(struct mm_struct *);
+void crst_table_free(struct mm_struct *, unsigned long *);
+
+unsigned long *page_table_alloc(struct mm_struct *);
+struct page *page_table_alloc_pgste(struct mm_struct *mm);
+void page_table_free(struct mm_struct *, unsigned long *);
+void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long);
+void page_table_free_pgste(struct page *page);
+extern int page_table_allocate_pgste;
+
+static inline void crst_table_init(unsigned long *crst, unsigned long entry)
+{
+	memset64((u64 *)crst, entry, _CRST_ENTRIES);
+}
+
+int crst_table_upgrade(struct mm_struct *mm, unsigned long limit);
+
+static inline unsigned long check_asce_limit(struct mm_struct *mm, unsigned long addr,
+					     unsigned long len)
+{
+	int rc;
+
+	if (addr + len > mm->context.asce_limit &&
+	    addr + len <= TASK_SIZE) {
+		rc = crst_table_upgrade(mm, addr + len);
+		if (rc)
+			return (unsigned long) rc;
+	}
+	return addr;
+}
+
+static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+	unsigned long *table = crst_table_alloc(mm);
+
+	if (table)
+		crst_table_init(table, _REGION2_ENTRY_EMPTY);
+	return (p4d_t *) table;
+}
+
+static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
+{
+	if (!mm_p4d_folded(mm))
+		crst_table_free(mm, (unsigned long *) p4d);
+}
+
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+	unsigned long *table = crst_table_alloc(mm);
+	if (table)
+		crst_table_init(table, _REGION3_ENTRY_EMPTY);
+	return (pud_t *) table;
+}
+
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+	if (!mm_pud_folded(mm))
+		crst_table_free(mm, (unsigned long *) pud);
+}
+
+static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
+{
+	unsigned long *table = crst_table_alloc(mm);
+
+	if (!table)
+		return NULL;
+	crst_table_init(table, _SEGMENT_ENTRY_EMPTY);
+	if (!pagetable_pmd_ctor(virt_to_ptdesc(table))) {
+		crst_table_free(mm, table);
+		return NULL;
+	}
+	return (pmd_t *) table;
+}
+
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+{
+	if (mm_pmd_folded(mm))
+		return;
+	pagetable_pmd_dtor(virt_to_ptdesc(pmd));
+	crst_table_free(mm, (unsigned long *) pmd);
+}
+
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d)
+{
+	set_pgd(pgd, __pgd(_REGION1_ENTRY | __pa(p4d)));
+}
+
+static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
+{
+	set_p4d(p4d, __p4d(_REGION2_ENTRY | __pa(pud)));
+}
+
+static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+{
+	set_pud(pud, __pud(_REGION3_ENTRY | __pa(pmd)));
+}
+
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+	return (pgd_t *) crst_table_alloc(mm);
+}
+
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+	crst_table_free(mm, (unsigned long *) pgd);
+}
+
+static inline void pmd_populate(struct mm_struct *mm,
+				pmd_t *pmd, pgtable_t pte)
+{
+	set_pmd(pmd, __pmd(_SEGMENT_ENTRY | __pa(pte)));
+}
+
+#define pmd_populate_kernel(mm, pmd, pte) pmd_populate(mm, pmd, pte)
+
+/*
+ * page table entry allocation/free routines.
+ */
+#define pte_alloc_one_kernel(mm) ((pte_t *)page_table_alloc(mm))
+#define pte_alloc_one(mm) ((pte_t *)page_table_alloc(mm))
+
+#define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte)
+#define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte)
+
+/* arch use pte_free_defer() implementation in arch/s390/mm/pgalloc.c */
+#define pte_free_defer pte_free_defer
+void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable);
+
+void vmem_map_init(void);
+void *vmem_crst_alloc(unsigned long val);
+pte_t *vmem_pte_alloc(void);
+
+unsigned long base_asce_alloc(unsigned long addr, unsigned long num_pages);
+void base_asce_free(unsigned long asce);
+
+#endif /* _S390_PGALLOC_H */
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
new file mode 100644
index 0000000000..fb3ee7758b
--- /dev/null
+++ b/arch/s390/include/asm/pgtable.h
@@ -0,0 +1,1872 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999, 2000
+ *    Author(s): Hartmut Penner (hp@de.ibm.com)
+ *               Ulrich Weigand (weigand@de.ibm.com)
+ *               Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ *  Derived from "include/asm-i386/pgtable.h"
+ */
+
+#ifndef _ASM_S390_PGTABLE_H
+#define _ASM_S390_PGTABLE_H
+
+#include <linux/sched.h>
+#include <linux/mm_types.h>
+#include <linux/page-flags.h>
+#include <linux/radix-tree.h>
+#include <linux/atomic.h>
+#include <asm/sections.h>
+#include <asm/bug.h>
+#include <asm/page.h>
+#include <asm/uv.h>
+
+extern pgd_t swapper_pg_dir[];
+extern pgd_t invalid_pg_dir[];
+extern void paging_init(void);
+extern unsigned long s390_invalid_asce;
+
+enum {
+	PG_DIRECT_MAP_4K = 0,
+	PG_DIRECT_MAP_1M,
+	PG_DIRECT_MAP_2G,
+	PG_DIRECT_MAP_MAX
+};
+
+extern atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]);
+
+static inline void update_page_count(int level, long count)
+{
+	if (IS_ENABLED(CONFIG_PROC_FS))
+		atomic_long_add(count, &direct_pages_count[level]);
+}
+
+/*
+ * The S390 doesn't have any external MMU info: the kernel page
+ * tables contain all the necessary information.
+ */
+#define update_mmu_cache(vma, address, ptep)     do { } while (0)
+#define update_mmu_cache_range(vmf, vma, addr, ptep, nr) do { } while (0)
+#define update_mmu_cache_pmd(vma, address, ptep) do { } while (0)
+
+/*
+ * ZERO_PAGE is a global shared page that is always zero; used
+ * for zero-mapped memory areas etc..
+ */
+
+extern unsigned long empty_zero_page;
+extern unsigned long zero_page_mask;
+
+#define ZERO_PAGE(vaddr) \
+	(virt_to_page((void *)(empty_zero_page + \
+	 (((unsigned long)(vaddr)) &zero_page_mask))))
+#define __HAVE_COLOR_ZERO_PAGE
+
+/* TODO: s390 cannot support io_remap_pfn_range... */
+
+#define pte_ERROR(e) \
+	pr_err("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
+#define pmd_ERROR(e) \
+	pr_err("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e))
+#define pud_ERROR(e) \
+	pr_err("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e))
+#define p4d_ERROR(e) \
+	pr_err("%s:%d: bad p4d %016lx.\n", __FILE__, __LINE__, p4d_val(e))
+#define pgd_ERROR(e) \
+	pr_err("%s:%d: bad pgd %016lx.\n", __FILE__, __LINE__, pgd_val(e))
+
+/*
+ * The vmalloc and module area will always be on the topmost area of the
+ * kernel mapping. 512GB are reserved for vmalloc by default.
+ * At the top of the vmalloc area a 2GB area is reserved where modules
+ * will reside. That makes sure that inter module branches always
+ * happen without trampolines and in addition the placement within a
+ * 2GB frame is branch prediction unit friendly.
+ */
+extern unsigned long __bootdata_preserved(VMALLOC_START);
+extern unsigned long __bootdata_preserved(VMALLOC_END);
+#define VMALLOC_DEFAULT_SIZE	((512UL << 30) - MODULES_LEN)
+extern struct page *__bootdata_preserved(vmemmap);
+extern unsigned long __bootdata_preserved(vmemmap_size);
+
+extern unsigned long __bootdata_preserved(MODULES_VADDR);
+extern unsigned long __bootdata_preserved(MODULES_END);
+#define MODULES_VADDR	MODULES_VADDR
+#define MODULES_END	MODULES_END
+#define MODULES_LEN	(1UL << 31)
+
+static inline int is_module_addr(void *addr)
+{
+	BUILD_BUG_ON(MODULES_LEN > (1UL << 31));
+	if (addr < (void *)MODULES_VADDR)
+		return 0;
+	if (addr > (void *)MODULES_END)
+		return 0;
+	return 1;
+}
+
+/*
+ * A 64 bit pagetable entry of S390 has following format:
+ * |			 PFRA			      |0IPC|  OS  |
+ * 0000000000111111111122222222223333333333444444444455555555556666
+ * 0123456789012345678901234567890123456789012345678901234567890123
+ *
+ * I Page-Invalid Bit:    Page is not available for address-translation
+ * P Page-Protection Bit: Store access not possible for page
+ * C Change-bit override: HW is not required to set change bit
+ *
+ * A 64 bit segmenttable entry of S390 has following format:
+ * |        P-table origin                              |      TT
+ * 0000000000111111111122222222223333333333444444444455555555556666
+ * 0123456789012345678901234567890123456789012345678901234567890123
+ *
+ * I Segment-Invalid Bit:    Segment is not available for address-translation
+ * C Common-Segment Bit:     Segment is not private (PoP 3-30)
+ * P Page-Protection Bit: Store access not possible for page
+ * TT Type 00
+ *
+ * A 64 bit region table entry of S390 has following format:
+ * |        S-table origin                             |   TF  TTTL
+ * 0000000000111111111122222222223333333333444444444455555555556666
+ * 0123456789012345678901234567890123456789012345678901234567890123
+ *
+ * I Segment-Invalid Bit:    Segment is not available for address-translation
+ * TT Type 01
+ * TF
+ * TL Table length
+ *
+ * The 64 bit regiontable origin of S390 has following format:
+ * |      region table origon                          |       DTTL
+ * 0000000000111111111122222222223333333333444444444455555555556666
+ * 0123456789012345678901234567890123456789012345678901234567890123
+ *
+ * X Space-Switch event:
+ * G Segment-Invalid Bit:  
+ * P Private-Space Bit:    
+ * S Storage-Alteration:
+ * R Real space
+ * TL Table-Length:
+ *
+ * A storage key has the following format:
+ * | ACC |F|R|C|0|
+ *  0   3 4 5 6 7
+ * ACC: access key
+ * F  : fetch protection bit
+ * R  : referenced bit
+ * C  : changed bit
+ */
+
+/* Hardware bits in the page table entry */
+#define _PAGE_NOEXEC	0x100		/* HW no-execute bit  */
+#define _PAGE_PROTECT	0x200		/* HW read-only bit  */
+#define _PAGE_INVALID	0x400		/* HW invalid bit    */
+#define _PAGE_LARGE	0x800		/* Bit to mark a large pte */
+
+/* Software bits in the page table entry */
+#define _PAGE_PRESENT	0x001		/* SW pte present bit */
+#define _PAGE_YOUNG	0x004		/* SW pte young bit */
+#define _PAGE_DIRTY	0x008		/* SW pte dirty bit */
+#define _PAGE_READ	0x010		/* SW pte read bit */
+#define _PAGE_WRITE	0x020		/* SW pte write bit */
+#define _PAGE_SPECIAL	0x040		/* SW associated with special page */
+#define _PAGE_UNUSED	0x080		/* SW bit for pgste usage state */
+
+#ifdef CONFIG_MEM_SOFT_DIRTY
+#define _PAGE_SOFT_DIRTY 0x002		/* SW pte soft dirty bit */
+#else
+#define _PAGE_SOFT_DIRTY 0x000
+#endif
+
+#define _PAGE_SW_BITS	0xffUL		/* All SW bits */
+
+#define _PAGE_SWP_EXCLUSIVE _PAGE_LARGE	/* SW pte exclusive swap bit */
+
+/* Set of bits not changed in pte_modify */
+#define _PAGE_CHG_MASK		(PAGE_MASK | _PAGE_SPECIAL | _PAGE_DIRTY | \
+				 _PAGE_YOUNG | _PAGE_SOFT_DIRTY)
+
+/*
+ * Mask of bits that must not be changed with RDP. Allow only _PAGE_PROTECT
+ * HW bit and all SW bits.
+ */
+#define _PAGE_RDP_MASK		~(_PAGE_PROTECT | _PAGE_SW_BITS)
+
+/*
+ * handle_pte_fault uses pte_present and pte_none to find out the pte type
+ * WITHOUT holding the page table lock. The _PAGE_PRESENT bit is used to
+ * distinguish present from not-present ptes. It is changed only with the page
+ * table lock held.
+ *
+ * The following table gives the different possible bit combinations for
+ * the pte hardware and software bits in the last 12 bits of a pte
+ * (. unassigned bit, x don't care, t swap type):
+ *
+ *				842100000000
+ *				000084210000
+ *				000000008421
+ *				.IR.uswrdy.p
+ * empty			.10.00000000
+ * swap				.11..ttttt.0
+ * prot-none, clean, old	.11.xx0000.1
+ * prot-none, clean, young	.11.xx0001.1
+ * prot-none, dirty, old	.11.xx0010.1
+ * prot-none, dirty, young	.11.xx0011.1
+ * read-only, clean, old	.11.xx0100.1
+ * read-only, clean, young	.01.xx0101.1
+ * read-only, dirty, old	.11.xx0110.1
+ * read-only, dirty, young	.01.xx0111.1
+ * read-write, clean, old	.11.xx1100.1
+ * read-write, clean, young	.01.xx1101.1
+ * read-write, dirty, old	.10.xx1110.1
+ * read-write, dirty, young	.00.xx1111.1
+ * HW-bits: R read-only, I invalid
+ * SW-bits: p present, y young, d dirty, r read, w write, s special,
+ *	    u unused, l large
+ *
+ * pte_none    is true for the bit pattern .10.00000000, pte == 0x400
+ * pte_swap    is true for the bit pattern .11..ooooo.0, (pte & 0x201) == 0x200
+ * pte_present is true for the bit pattern .xx.xxxxxx.1, (pte & 0x001) == 0x001
+ */
+
+/* Bits in the segment/region table address-space-control-element */
+#define _ASCE_ORIGIN		~0xfffUL/* region/segment table origin	    */
+#define _ASCE_PRIVATE_SPACE	0x100	/* private space control	    */
+#define _ASCE_ALT_EVENT		0x80	/* storage alteration event control */
+#define _ASCE_SPACE_SWITCH	0x40	/* space switch event		    */
+#define _ASCE_REAL_SPACE	0x20	/* real space control		    */
+#define _ASCE_TYPE_MASK		0x0c	/* asce table type mask		    */
+#define _ASCE_TYPE_REGION1	0x0c	/* region first table type	    */
+#define _ASCE_TYPE_REGION2	0x08	/* region second table type	    */
+#define _ASCE_TYPE_REGION3	0x04	/* region third table type	    */
+#define _ASCE_TYPE_SEGMENT	0x00	/* segment table type		    */
+#define _ASCE_TABLE_LENGTH	0x03	/* region table length		    */
+
+/* Bits in the region table entry */
+#define _REGION_ENTRY_ORIGIN	~0xfffUL/* region/segment table origin	    */
+#define _REGION_ENTRY_PROTECT	0x200	/* region protection bit	    */
+#define _REGION_ENTRY_NOEXEC	0x100	/* region no-execute bit	    */
+#define _REGION_ENTRY_OFFSET	0xc0	/* region table offset		    */
+#define _REGION_ENTRY_INVALID	0x20	/* invalid region table entry	    */
+#define _REGION_ENTRY_TYPE_MASK	0x0c	/* region table type mask	    */
+#define _REGION_ENTRY_TYPE_R1	0x0c	/* region first table type	    */
+#define _REGION_ENTRY_TYPE_R2	0x08	/* region second table type	    */
+#define _REGION_ENTRY_TYPE_R3	0x04	/* region third table type	    */
+#define _REGION_ENTRY_LENGTH	0x03	/* region third length		    */
+
+#define _REGION1_ENTRY		(_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_LENGTH)
+#define _REGION1_ENTRY_EMPTY	(_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID)
+#define _REGION2_ENTRY		(_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_LENGTH)
+#define _REGION2_ENTRY_EMPTY	(_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID)
+#define _REGION3_ENTRY		(_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH)
+#define _REGION3_ENTRY_EMPTY	(_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID)
+
+#define _REGION3_ENTRY_ORIGIN_LARGE ~0x7fffffffUL /* large page address	     */
+#define _REGION3_ENTRY_DIRTY	0x2000	/* SW region dirty bit */
+#define _REGION3_ENTRY_YOUNG	0x1000	/* SW region young bit */
+#define _REGION3_ENTRY_LARGE	0x0400	/* RTTE-format control, large page  */
+#define _REGION3_ENTRY_READ	0x0002	/* SW region read bit */
+#define _REGION3_ENTRY_WRITE	0x0001	/* SW region write bit */
+
+#ifdef CONFIG_MEM_SOFT_DIRTY
+#define _REGION3_ENTRY_SOFT_DIRTY 0x4000 /* SW region soft dirty bit */
+#else
+#define _REGION3_ENTRY_SOFT_DIRTY 0x0000 /* SW region soft dirty bit */
+#endif
+
+#define _REGION_ENTRY_BITS	 0xfffffffffffff22fUL
+
+/* Bits in the segment table entry */
+#define _SEGMENT_ENTRY_BITS			0xfffffffffffffe33UL
+#define _SEGMENT_ENTRY_HARDWARE_BITS		0xfffffffffffffe30UL
+#define _SEGMENT_ENTRY_HARDWARE_BITS_LARGE	0xfffffffffff00730UL
+#define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address	    */
+#define _SEGMENT_ENTRY_ORIGIN	~0x7ffUL/* page table origin		    */
+#define _SEGMENT_ENTRY_PROTECT	0x200	/* segment protection bit	    */
+#define _SEGMENT_ENTRY_NOEXEC	0x100	/* segment no-execute bit	    */
+#define _SEGMENT_ENTRY_INVALID	0x20	/* invalid segment table entry	    */
+#define _SEGMENT_ENTRY_TYPE_MASK 0x0c	/* segment table type mask	    */
+
+#define _SEGMENT_ENTRY		(0)
+#define _SEGMENT_ENTRY_EMPTY	(_SEGMENT_ENTRY_INVALID)
+
+#define _SEGMENT_ENTRY_DIRTY	0x2000	/* SW segment dirty bit */
+#define _SEGMENT_ENTRY_YOUNG	0x1000	/* SW segment young bit */
+#define _SEGMENT_ENTRY_LARGE	0x0400	/* STE-format control, large page */
+#define _SEGMENT_ENTRY_WRITE	0x0002	/* SW segment write bit */
+#define _SEGMENT_ENTRY_READ	0x0001	/* SW segment read bit */
+
+#ifdef CONFIG_MEM_SOFT_DIRTY
+#define _SEGMENT_ENTRY_SOFT_DIRTY 0x4000 /* SW segment soft dirty bit */
+#else
+#define _SEGMENT_ENTRY_SOFT_DIRTY 0x0000 /* SW segment soft dirty bit */
+#endif
+
+#define _CRST_ENTRIES	2048	/* number of region/segment table entries */
+#define _PAGE_ENTRIES	256	/* number of page table entries	*/
+
+#define _CRST_TABLE_SIZE (_CRST_ENTRIES * 8)
+#define _PAGE_TABLE_SIZE (_PAGE_ENTRIES * 8)
+
+#define _REGION1_SHIFT	53
+#define _REGION2_SHIFT	42
+#define _REGION3_SHIFT	31
+#define _SEGMENT_SHIFT	20
+
+#define _REGION1_INDEX	(0x7ffUL << _REGION1_SHIFT)
+#define _REGION2_INDEX	(0x7ffUL << _REGION2_SHIFT)
+#define _REGION3_INDEX	(0x7ffUL << _REGION3_SHIFT)
+#define _SEGMENT_INDEX	(0x7ffUL << _SEGMENT_SHIFT)
+#define _PAGE_INDEX	(0xffUL  << _PAGE_SHIFT)
+
+#define _REGION1_SIZE	(1UL << _REGION1_SHIFT)
+#define _REGION2_SIZE	(1UL << _REGION2_SHIFT)
+#define _REGION3_SIZE	(1UL << _REGION3_SHIFT)
+#define _SEGMENT_SIZE	(1UL << _SEGMENT_SHIFT)
+
+#define _REGION1_MASK	(~(_REGION1_SIZE - 1))
+#define _REGION2_MASK	(~(_REGION2_SIZE - 1))
+#define _REGION3_MASK	(~(_REGION3_SIZE - 1))
+#define _SEGMENT_MASK	(~(_SEGMENT_SIZE - 1))
+
+#define PMD_SHIFT	_SEGMENT_SHIFT
+#define PUD_SHIFT	_REGION3_SHIFT
+#define P4D_SHIFT	_REGION2_SHIFT
+#define PGDIR_SHIFT	_REGION1_SHIFT
+
+#define PMD_SIZE	_SEGMENT_SIZE
+#define PUD_SIZE	_REGION3_SIZE
+#define P4D_SIZE	_REGION2_SIZE
+#define PGDIR_SIZE	_REGION1_SIZE
+
+#define PMD_MASK	_SEGMENT_MASK
+#define PUD_MASK	_REGION3_MASK
+#define P4D_MASK	_REGION2_MASK
+#define PGDIR_MASK	_REGION1_MASK
+
+#define PTRS_PER_PTE	_PAGE_ENTRIES
+#define PTRS_PER_PMD	_CRST_ENTRIES
+#define PTRS_PER_PUD	_CRST_ENTRIES
+#define PTRS_PER_P4D	_CRST_ENTRIES
+#define PTRS_PER_PGD	_CRST_ENTRIES
+
+/*
+ * Segment table and region3 table entry encoding
+ * (R = read-only, I = invalid, y = young bit):
+ *				dy..R...I...wr
+ * prot-none, clean, old	00..1...1...00
+ * prot-none, clean, young	01..1...1...00
+ * prot-none, dirty, old	10..1...1...00
+ * prot-none, dirty, young	11..1...1...00
+ * read-only, clean, old	00..1...1...01
+ * read-only, clean, young	01..1...0...01
+ * read-only, dirty, old	10..1...1...01
+ * read-only, dirty, young	11..1...0...01
+ * read-write, clean, old	00..1...1...11
+ * read-write, clean, young	01..1...0...11
+ * read-write, dirty, old	10..0...1...11
+ * read-write, dirty, young	11..0...0...11
+ * The segment table origin is used to distinguish empty (origin==0) from
+ * read-write, old segment table entries (origin!=0)
+ * HW-bits: R read-only, I invalid
+ * SW-bits: y young, d dirty, r read, w write
+ */
+
+/* Page status table bits for virtualization */
+#define PGSTE_ACC_BITS	0xf000000000000000UL
+#define PGSTE_FP_BIT	0x0800000000000000UL
+#define PGSTE_PCL_BIT	0x0080000000000000UL
+#define PGSTE_HR_BIT	0x0040000000000000UL
+#define PGSTE_HC_BIT	0x0020000000000000UL
+#define PGSTE_GR_BIT	0x0004000000000000UL
+#define PGSTE_GC_BIT	0x0002000000000000UL
+#define PGSTE_UC_BIT	0x0000800000000000UL	/* user dirty (migration) */
+#define PGSTE_IN_BIT	0x0000400000000000UL	/* IPTE notify bit */
+#define PGSTE_VSIE_BIT	0x0000200000000000UL	/* ref'd in a shadow table */
+
+/* Guest Page State used for virtualization */
+#define _PGSTE_GPS_ZERO			0x0000000080000000UL
+#define _PGSTE_GPS_NODAT		0x0000000040000000UL
+#define _PGSTE_GPS_USAGE_MASK		0x0000000003000000UL
+#define _PGSTE_GPS_USAGE_STABLE		0x0000000000000000UL
+#define _PGSTE_GPS_USAGE_UNUSED		0x0000000001000000UL
+#define _PGSTE_GPS_USAGE_POT_VOLATILE	0x0000000002000000UL
+#define _PGSTE_GPS_USAGE_VOLATILE	_PGSTE_GPS_USAGE_MASK
+
+/*
+ * A user page table pointer has the space-switch-event bit, the
+ * private-space-control bit and the storage-alteration-event-control
+ * bit set. A kernel page table pointer doesn't need them.
+ */
+#define _ASCE_USER_BITS		(_ASCE_SPACE_SWITCH | _ASCE_PRIVATE_SPACE | \
+				 _ASCE_ALT_EVENT)
+
+/*
+ * Page protection definitions.
+ */
+#define PAGE_NONE	__pgprot(_PAGE_PRESENT | _PAGE_INVALID | _PAGE_PROTECT)
+#define PAGE_RO		__pgprot(_PAGE_PRESENT | _PAGE_READ | \
+				 _PAGE_NOEXEC  | _PAGE_INVALID | _PAGE_PROTECT)
+#define PAGE_RX		__pgprot(_PAGE_PRESENT | _PAGE_READ | \
+				 _PAGE_INVALID | _PAGE_PROTECT)
+#define PAGE_RW		__pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+				 _PAGE_NOEXEC  | _PAGE_INVALID | _PAGE_PROTECT)
+#define PAGE_RWX	__pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+				 _PAGE_INVALID | _PAGE_PROTECT)
+
+#define PAGE_SHARED	__pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+				 _PAGE_YOUNG | _PAGE_DIRTY | _PAGE_NOEXEC)
+#define PAGE_KERNEL	__pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+				 _PAGE_YOUNG | _PAGE_DIRTY | _PAGE_NOEXEC)
+#define PAGE_KERNEL_RO	__pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_YOUNG | \
+				 _PAGE_PROTECT | _PAGE_NOEXEC)
+#define PAGE_KERNEL_EXEC __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+				  _PAGE_YOUNG |	_PAGE_DIRTY)
+
+/*
+ * On s390 the page table entry has an invalid bit and a read-only bit.
+ * Read permission implies execute permission and write permission
+ * implies read permission.
+ */
+         /*xwr*/
+
+/*
+ * Segment entry (large page) protection definitions.
+ */
+#define SEGMENT_NONE	__pgprot(_SEGMENT_ENTRY_INVALID | \
+				 _SEGMENT_ENTRY_PROTECT)
+#define SEGMENT_RO	__pgprot(_SEGMENT_ENTRY_PROTECT | \
+				 _SEGMENT_ENTRY_READ | \
+				 _SEGMENT_ENTRY_NOEXEC)
+#define SEGMENT_RX	__pgprot(_SEGMENT_ENTRY_PROTECT | \
+				 _SEGMENT_ENTRY_READ)
+#define SEGMENT_RW	__pgprot(_SEGMENT_ENTRY_READ | \
+				 _SEGMENT_ENTRY_WRITE | \
+				 _SEGMENT_ENTRY_NOEXEC)
+#define SEGMENT_RWX	__pgprot(_SEGMENT_ENTRY_READ | \
+				 _SEGMENT_ENTRY_WRITE)
+#define SEGMENT_KERNEL	__pgprot(_SEGMENT_ENTRY |	\
+				 _SEGMENT_ENTRY_LARGE |	\
+				 _SEGMENT_ENTRY_READ |	\
+				 _SEGMENT_ENTRY_WRITE | \
+				 _SEGMENT_ENTRY_YOUNG | \
+				 _SEGMENT_ENTRY_DIRTY | \
+				 _SEGMENT_ENTRY_NOEXEC)
+#define SEGMENT_KERNEL_RO __pgprot(_SEGMENT_ENTRY |	\
+				 _SEGMENT_ENTRY_LARGE |	\
+				 _SEGMENT_ENTRY_READ |	\
+				 _SEGMENT_ENTRY_YOUNG |	\
+				 _SEGMENT_ENTRY_PROTECT | \
+				 _SEGMENT_ENTRY_NOEXEC)
+#define SEGMENT_KERNEL_EXEC __pgprot(_SEGMENT_ENTRY |	\
+				 _SEGMENT_ENTRY_LARGE |	\
+				 _SEGMENT_ENTRY_READ |	\
+				 _SEGMENT_ENTRY_WRITE | \
+				 _SEGMENT_ENTRY_YOUNG |	\
+				 _SEGMENT_ENTRY_DIRTY)
+
+/*
+ * Region3 entry (large page) protection definitions.
+ */
+
+#define REGION3_KERNEL	__pgprot(_REGION_ENTRY_TYPE_R3 | \
+				 _REGION3_ENTRY_LARGE |	 \
+				 _REGION3_ENTRY_READ |	 \
+				 _REGION3_ENTRY_WRITE |	 \
+				 _REGION3_ENTRY_YOUNG |	 \
+				 _REGION3_ENTRY_DIRTY | \
+				 _REGION_ENTRY_NOEXEC)
+#define REGION3_KERNEL_RO __pgprot(_REGION_ENTRY_TYPE_R3 | \
+				   _REGION3_ENTRY_LARGE |  \
+				   _REGION3_ENTRY_READ |   \
+				   _REGION3_ENTRY_YOUNG |  \
+				   _REGION_ENTRY_PROTECT | \
+				   _REGION_ENTRY_NOEXEC)
+#define REGION3_KERNEL_EXEC __pgprot(_REGION_ENTRY_TYPE_R3 | \
+				 _REGION3_ENTRY_LARGE |	 \
+				 _REGION3_ENTRY_READ |	 \
+				 _REGION3_ENTRY_WRITE |	 \
+				 _REGION3_ENTRY_YOUNG |	 \
+				 _REGION3_ENTRY_DIRTY)
+
+static inline bool mm_p4d_folded(struct mm_struct *mm)
+{
+	return mm->context.asce_limit <= _REGION1_SIZE;
+}
+#define mm_p4d_folded(mm) mm_p4d_folded(mm)
+
+static inline bool mm_pud_folded(struct mm_struct *mm)
+{
+	return mm->context.asce_limit <= _REGION2_SIZE;
+}
+#define mm_pud_folded(mm) mm_pud_folded(mm)
+
+static inline bool mm_pmd_folded(struct mm_struct *mm)
+{
+	return mm->context.asce_limit <= _REGION3_SIZE;
+}
+#define mm_pmd_folded(mm) mm_pmd_folded(mm)
+
+static inline int mm_has_pgste(struct mm_struct *mm)
+{
+#ifdef CONFIG_PGSTE
+	if (unlikely(mm->context.has_pgste))
+		return 1;
+#endif
+	return 0;
+}
+
+static inline int mm_is_protected(struct mm_struct *mm)
+{
+#ifdef CONFIG_PGSTE
+	if (unlikely(atomic_read(&mm->context.protected_count)))
+		return 1;
+#endif
+	return 0;
+}
+
+static inline int mm_alloc_pgste(struct mm_struct *mm)
+{
+#ifdef CONFIG_PGSTE
+	if (unlikely(mm->context.alloc_pgste))
+		return 1;
+#endif
+	return 0;
+}
+
+static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot)
+{
+	return __pte(pte_val(pte) & ~pgprot_val(prot));
+}
+
+static inline pte_t set_pte_bit(pte_t pte, pgprot_t prot)
+{
+	return __pte(pte_val(pte) | pgprot_val(prot));
+}
+
+static inline pmd_t clear_pmd_bit(pmd_t pmd, pgprot_t prot)
+{
+	return __pmd(pmd_val(pmd) & ~pgprot_val(prot));
+}
+
+static inline pmd_t set_pmd_bit(pmd_t pmd, pgprot_t prot)
+{
+	return __pmd(pmd_val(pmd) | pgprot_val(prot));
+}
+
+static inline pud_t clear_pud_bit(pud_t pud, pgprot_t prot)
+{
+	return __pud(pud_val(pud) & ~pgprot_val(prot));
+}
+
+static inline pud_t set_pud_bit(pud_t pud, pgprot_t prot)
+{
+	return __pud(pud_val(pud) | pgprot_val(prot));
+}
+
+/*
+ * In the case that a guest uses storage keys
+ * faults should no longer be backed by zero pages
+ */
+#define mm_forbids_zeropage mm_has_pgste
+static inline int mm_uses_skeys(struct mm_struct *mm)
+{
+#ifdef CONFIG_PGSTE
+	if (mm->context.uses_skeys)
+		return 1;
+#endif
+	return 0;
+}
+
+static inline void csp(unsigned int *ptr, unsigned int old, unsigned int new)
+{
+	union register_pair r1 = { .even = old, .odd = new, };
+	unsigned long address = (unsigned long)ptr | 1;
+
+	asm volatile(
+		"	csp	%[r1],%[address]"
+		: [r1] "+&d" (r1.pair), "+m" (*ptr)
+		: [address] "d" (address)
+		: "cc");
+}
+
+static inline void cspg(unsigned long *ptr, unsigned long old, unsigned long new)
+{
+	union register_pair r1 = { .even = old, .odd = new, };
+	unsigned long address = (unsigned long)ptr | 1;
+
+	asm volatile(
+		"	cspg	%[r1],%[address]"
+		: [r1] "+&d" (r1.pair), "+m" (*ptr)
+		: [address] "d" (address)
+		: "cc");
+}
+
+#define CRDTE_DTT_PAGE		0x00UL
+#define CRDTE_DTT_SEGMENT	0x10UL
+#define CRDTE_DTT_REGION3	0x14UL
+#define CRDTE_DTT_REGION2	0x18UL
+#define CRDTE_DTT_REGION1	0x1cUL
+
+static inline void crdte(unsigned long old, unsigned long new,
+			 unsigned long *table, unsigned long dtt,
+			 unsigned long address, unsigned long asce)
+{
+	union register_pair r1 = { .even = old, .odd = new, };
+	union register_pair r2 = { .even = __pa(table) | dtt, .odd = address, };
+
+	asm volatile(".insn rrf,0xb98f0000,%[r1],%[r2],%[asce],0"
+		     : [r1] "+&d" (r1.pair)
+		     : [r2] "d" (r2.pair), [asce] "a" (asce)
+		     : "memory", "cc");
+}
+
+/*
+ * pgd/p4d/pud/pmd/pte query functions
+ */
+static inline int pgd_folded(pgd_t pgd)
+{
+	return (pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R1;
+}
+
+static inline int pgd_present(pgd_t pgd)
+{
+	if (pgd_folded(pgd))
+		return 1;
+	return (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) != 0UL;
+}
+
+static inline int pgd_none(pgd_t pgd)
+{
+	if (pgd_folded(pgd))
+		return 0;
+	return (pgd_val(pgd) & _REGION_ENTRY_INVALID) != 0UL;
+}
+
+static inline int pgd_bad(pgd_t pgd)
+{
+	if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R1)
+		return 0;
+	return (pgd_val(pgd) & ~_REGION_ENTRY_BITS) != 0;
+}
+
+static inline unsigned long pgd_pfn(pgd_t pgd)
+{
+	unsigned long origin_mask;
+
+	origin_mask = _REGION_ENTRY_ORIGIN;
+	return (pgd_val(pgd) & origin_mask) >> PAGE_SHIFT;
+}
+
+static inline int p4d_folded(p4d_t p4d)
+{
+	return (p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2;
+}
+
+static inline int p4d_present(p4d_t p4d)
+{
+	if (p4d_folded(p4d))
+		return 1;
+	return (p4d_val(p4d) & _REGION_ENTRY_ORIGIN) != 0UL;
+}
+
+static inline int p4d_none(p4d_t p4d)
+{
+	if (p4d_folded(p4d))
+		return 0;
+	return p4d_val(p4d) == _REGION2_ENTRY_EMPTY;
+}
+
+static inline unsigned long p4d_pfn(p4d_t p4d)
+{
+	unsigned long origin_mask;
+
+	origin_mask = _REGION_ENTRY_ORIGIN;
+	return (p4d_val(p4d) & origin_mask) >> PAGE_SHIFT;
+}
+
+static inline int pud_folded(pud_t pud)
+{
+	return (pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3;
+}
+
+static inline int pud_present(pud_t pud)
+{
+	if (pud_folded(pud))
+		return 1;
+	return (pud_val(pud) & _REGION_ENTRY_ORIGIN) != 0UL;
+}
+
+static inline int pud_none(pud_t pud)
+{
+	if (pud_folded(pud))
+		return 0;
+	return pud_val(pud) == _REGION3_ENTRY_EMPTY;
+}
+
+#define pud_leaf	pud_large
+static inline int pud_large(pud_t pud)
+{
+	if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) != _REGION_ENTRY_TYPE_R3)
+		return 0;
+	return !!(pud_val(pud) & _REGION3_ENTRY_LARGE);
+}
+
+#define pmd_leaf	pmd_large
+static inline int pmd_large(pmd_t pmd)
+{
+	return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0;
+}
+
+static inline int pmd_bad(pmd_t pmd)
+{
+	if ((pmd_val(pmd) & _SEGMENT_ENTRY_TYPE_MASK) > 0 || pmd_large(pmd))
+		return 1;
+	return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0;
+}
+
+static inline int pud_bad(pud_t pud)
+{
+	unsigned long type = pud_val(pud) & _REGION_ENTRY_TYPE_MASK;
+
+	if (type > _REGION_ENTRY_TYPE_R3 || pud_large(pud))
+		return 1;
+	if (type < _REGION_ENTRY_TYPE_R3)
+		return 0;
+	return (pud_val(pud) & ~_REGION_ENTRY_BITS) != 0;
+}
+
+static inline int p4d_bad(p4d_t p4d)
+{
+	unsigned long type = p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK;
+
+	if (type > _REGION_ENTRY_TYPE_R2)
+		return 1;
+	if (type < _REGION_ENTRY_TYPE_R2)
+		return 0;
+	return (p4d_val(p4d) & ~_REGION_ENTRY_BITS) != 0;
+}
+
+static inline int pmd_present(pmd_t pmd)
+{
+	return pmd_val(pmd) != _SEGMENT_ENTRY_EMPTY;
+}
+
+static inline int pmd_none(pmd_t pmd)
+{
+	return pmd_val(pmd) == _SEGMENT_ENTRY_EMPTY;
+}
+
+#define pmd_write pmd_write
+static inline int pmd_write(pmd_t pmd)
+{
+	return (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) != 0;
+}
+
+#define pud_write pud_write
+static inline int pud_write(pud_t pud)
+{
+	return (pud_val(pud) & _REGION3_ENTRY_WRITE) != 0;
+}
+
+static inline int pmd_dirty(pmd_t pmd)
+{
+	return (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0;
+}
+
+#define pmd_young pmd_young
+static inline int pmd_young(pmd_t pmd)
+{
+	return (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0;
+}
+
+static inline int pte_present(pte_t pte)
+{
+	/* Bit pattern: (pte & 0x001) == 0x001 */
+	return (pte_val(pte) & _PAGE_PRESENT) != 0;
+}
+
+static inline int pte_none(pte_t pte)
+{
+	/* Bit pattern: pte == 0x400 */
+	return pte_val(pte) == _PAGE_INVALID;
+}
+
+static inline int pte_swap(pte_t pte)
+{
+	/* Bit pattern: (pte & 0x201) == 0x200 */
+	return (pte_val(pte) & (_PAGE_PROTECT | _PAGE_PRESENT))
+		== _PAGE_PROTECT;
+}
+
+static inline int pte_special(pte_t pte)
+{
+	return (pte_val(pte) & _PAGE_SPECIAL);
+}
+
+#define __HAVE_ARCH_PTE_SAME
+static inline int pte_same(pte_t a, pte_t b)
+{
+	return pte_val(a) == pte_val(b);
+}
+
+#ifdef CONFIG_NUMA_BALANCING
+static inline int pte_protnone(pte_t pte)
+{
+	return pte_present(pte) && !(pte_val(pte) & _PAGE_READ);
+}
+
+static inline int pmd_protnone(pmd_t pmd)
+{
+	/* pmd_large(pmd) implies pmd_present(pmd) */
+	return pmd_large(pmd) && !(pmd_val(pmd) & _SEGMENT_ENTRY_READ);
+}
+#endif
+
+static inline int pte_swp_exclusive(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
+}
+
+static inline pte_t pte_swp_mkexclusive(pte_t pte)
+{
+	return set_pte_bit(pte, __pgprot(_PAGE_SWP_EXCLUSIVE));
+}
+
+static inline pte_t pte_swp_clear_exclusive(pte_t pte)
+{
+	return clear_pte_bit(pte, __pgprot(_PAGE_SWP_EXCLUSIVE));
+}
+
+static inline int pte_soft_dirty(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_SOFT_DIRTY;
+}
+#define pte_swp_soft_dirty pte_soft_dirty
+
+static inline pte_t pte_mksoft_dirty(pte_t pte)
+{
+	return set_pte_bit(pte, __pgprot(_PAGE_SOFT_DIRTY));
+}
+#define pte_swp_mksoft_dirty pte_mksoft_dirty
+
+static inline pte_t pte_clear_soft_dirty(pte_t pte)
+{
+	return clear_pte_bit(pte, __pgprot(_PAGE_SOFT_DIRTY));
+}
+#define pte_swp_clear_soft_dirty pte_clear_soft_dirty
+
+static inline int pmd_soft_dirty(pmd_t pmd)
+{
+	return pmd_val(pmd) & _SEGMENT_ENTRY_SOFT_DIRTY;
+}
+
+static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
+{
+	return set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_SOFT_DIRTY));
+}
+
+static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd)
+{
+	return clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_SOFT_DIRTY));
+}
+
+/*
+ * query functions pte_write/pte_dirty/pte_young only work if
+ * pte_present() is true. Undefined behaviour if not..
+ */
+static inline int pte_write(pte_t pte)
+{
+	return (pte_val(pte) & _PAGE_WRITE) != 0;
+}
+
+static inline int pte_dirty(pte_t pte)
+{
+	return (pte_val(pte) & _PAGE_DIRTY) != 0;
+}
+
+static inline int pte_young(pte_t pte)
+{
+	return (pte_val(pte) & _PAGE_YOUNG) != 0;
+}
+
+#define __HAVE_ARCH_PTE_UNUSED
+static inline int pte_unused(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_UNUSED;
+}
+
+/*
+ * Extract the pgprot value from the given pte while at the same time making it
+ * usable for kernel address space mappings where fault driven dirty and
+ * young/old accounting is not supported, i.e _PAGE_PROTECT and _PAGE_INVALID
+ * must not be set.
+ */
+static inline pgprot_t pte_pgprot(pte_t pte)
+{
+	unsigned long pte_flags = pte_val(pte) & _PAGE_CHG_MASK;
+
+	if (pte_write(pte))
+		pte_flags |= pgprot_val(PAGE_KERNEL);
+	else
+		pte_flags |= pgprot_val(PAGE_KERNEL_RO);
+	pte_flags |= pte_val(pte) & mio_wb_bit_mask;
+
+	return __pgprot(pte_flags);
+}
+
+/*
+ * pgd/pmd/pte modification functions
+ */
+
+static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+	WRITE_ONCE(*pgdp, pgd);
+}
+
+static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
+{
+	WRITE_ONCE(*p4dp, p4d);
+}
+
+static inline void set_pud(pud_t *pudp, pud_t pud)
+{
+	WRITE_ONCE(*pudp, pud);
+}
+
+static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
+{
+	WRITE_ONCE(*pmdp, pmd);
+}
+
+static inline void set_pte(pte_t *ptep, pte_t pte)
+{
+	WRITE_ONCE(*ptep, pte);
+}
+
+static inline void pgd_clear(pgd_t *pgd)
+{
+	if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1)
+		set_pgd(pgd, __pgd(_REGION1_ENTRY_EMPTY));
+}
+
+static inline void p4d_clear(p4d_t *p4d)
+{
+	if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
+		set_p4d(p4d, __p4d(_REGION2_ENTRY_EMPTY));
+}
+
+static inline void pud_clear(pud_t *pud)
+{
+	if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
+		set_pud(pud, __pud(_REGION3_ENTRY_EMPTY));
+}
+
+static inline void pmd_clear(pmd_t *pmdp)
+{
+	set_pmd(pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
+}
+
+static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+	set_pte(ptep, __pte(_PAGE_INVALID));
+}
+
+/*
+ * The following pte modification functions only work if
+ * pte_present() is true. Undefined behaviour if not..
+ */
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+	pte = clear_pte_bit(pte, __pgprot(~_PAGE_CHG_MASK));
+	pte = set_pte_bit(pte, newprot);
+	/*
+	 * newprot for PAGE_NONE, PAGE_RO, PAGE_RX, PAGE_RW and PAGE_RWX
+	 * has the invalid bit set, clear it again for readable, young pages
+	 */
+	if ((pte_val(pte) & _PAGE_YOUNG) && (pte_val(pte) & _PAGE_READ))
+		pte = clear_pte_bit(pte, __pgprot(_PAGE_INVALID));
+	/*
+	 * newprot for PAGE_RO, PAGE_RX, PAGE_RW and PAGE_RWX has the page
+	 * protection bit set, clear it again for writable, dirty pages
+	 */
+	if ((pte_val(pte) & _PAGE_DIRTY) && (pte_val(pte) & _PAGE_WRITE))
+		pte = clear_pte_bit(pte, __pgprot(_PAGE_PROTECT));
+	return pte;
+}
+
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+	pte = clear_pte_bit(pte, __pgprot(_PAGE_WRITE));
+	return set_pte_bit(pte, __pgprot(_PAGE_PROTECT));
+}
+
+static inline pte_t pte_mkwrite_novma(pte_t pte)
+{
+	pte = set_pte_bit(pte, __pgprot(_PAGE_WRITE));
+	if (pte_val(pte) & _PAGE_DIRTY)
+		pte = clear_pte_bit(pte, __pgprot(_PAGE_PROTECT));
+	return pte;
+}
+
+static inline pte_t pte_mkclean(pte_t pte)
+{
+	pte = clear_pte_bit(pte, __pgprot(_PAGE_DIRTY));
+	return set_pte_bit(pte, __pgprot(_PAGE_PROTECT));
+}
+
+static inline pte_t pte_mkdirty(pte_t pte)
+{
+	pte = set_pte_bit(pte, __pgprot(_PAGE_DIRTY | _PAGE_SOFT_DIRTY));
+	if (pte_val(pte) & _PAGE_WRITE)
+		pte = clear_pte_bit(pte, __pgprot(_PAGE_PROTECT));
+	return pte;
+}
+
+static inline pte_t pte_mkold(pte_t pte)
+{
+	pte = clear_pte_bit(pte, __pgprot(_PAGE_YOUNG));
+	return set_pte_bit(pte, __pgprot(_PAGE_INVALID));
+}
+
+static inline pte_t pte_mkyoung(pte_t pte)
+{
+	pte = set_pte_bit(pte, __pgprot(_PAGE_YOUNG));
+	if (pte_val(pte) & _PAGE_READ)
+		pte = clear_pte_bit(pte, __pgprot(_PAGE_INVALID));
+	return pte;
+}
+
+static inline pte_t pte_mkspecial(pte_t pte)
+{
+	return set_pte_bit(pte, __pgprot(_PAGE_SPECIAL));
+}
+
+#ifdef CONFIG_HUGETLB_PAGE
+static inline pte_t pte_mkhuge(pte_t pte)
+{
+	return set_pte_bit(pte, __pgprot(_PAGE_LARGE));
+}
+#endif
+
+#define IPTE_GLOBAL	0
+#define	IPTE_LOCAL	1
+
+#define IPTE_NODAT	0x400
+#define IPTE_GUEST_ASCE	0x800
+
+static __always_inline void __ptep_rdp(unsigned long addr, pte_t *ptep,
+				       unsigned long opt, unsigned long asce,
+				       int local)
+{
+	unsigned long pto;
+
+	pto = __pa(ptep) & ~(PTRS_PER_PTE * sizeof(pte_t) - 1);
+	asm volatile(".insn rrf,0xb98b0000,%[r1],%[r2],%[asce],%[m4]"
+		     : "+m" (*ptep)
+		     : [r1] "a" (pto), [r2] "a" ((addr & PAGE_MASK) | opt),
+		       [asce] "a" (asce), [m4] "i" (local));
+}
+
+static __always_inline void __ptep_ipte(unsigned long address, pte_t *ptep,
+					unsigned long opt, unsigned long asce,
+					int local)
+{
+	unsigned long pto = __pa(ptep);
+
+	if (__builtin_constant_p(opt) && opt == 0) {
+		/* Invalidation + TLB flush for the pte */
+		asm volatile(
+			"	ipte	%[r1],%[r2],0,%[m4]"
+			: "+m" (*ptep) : [r1] "a" (pto), [r2] "a" (address),
+			  [m4] "i" (local));
+		return;
+	}
+
+	/* Invalidate ptes with options + TLB flush of the ptes */
+	opt = opt | (asce & _ASCE_ORIGIN);
+	asm volatile(
+		"	ipte	%[r1],%[r2],%[r3],%[m4]"
+		: [r2] "+a" (address), [r3] "+a" (opt)
+		: [r1] "a" (pto), [m4] "i" (local) : "memory");
+}
+
+static __always_inline void __ptep_ipte_range(unsigned long address, int nr,
+					      pte_t *ptep, int local)
+{
+	unsigned long pto = __pa(ptep);
+
+	/* Invalidate a range of ptes + TLB flush of the ptes */
+	do {
+		asm volatile(
+			"	ipte %[r1],%[r2],%[r3],%[m4]"
+			: [r2] "+a" (address), [r3] "+a" (nr)
+			: [r1] "a" (pto), [m4] "i" (local) : "memory");
+	} while (nr != 255);
+}
+
+/*
+ * This is hard to understand. ptep_get_and_clear and ptep_clear_flush
+ * both clear the TLB for the unmapped pte. The reason is that
+ * ptep_get_and_clear is used in common code (e.g. change_pte_range)
+ * to modify an active pte. The sequence is
+ *   1) ptep_get_and_clear
+ *   2) set_pte_at
+ *   3) flush_tlb_range
+ * On s390 the tlb needs to get flushed with the modification of the pte
+ * if the pte is active. The only way how this can be implemented is to
+ * have ptep_get_and_clear do the tlb flush. In exchange flush_tlb_range
+ * is a nop.
+ */
+pte_t ptep_xchg_direct(struct mm_struct *, unsigned long, pte_t *, pte_t);
+pte_t ptep_xchg_lazy(struct mm_struct *, unsigned long, pte_t *, pte_t);
+
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
+					    unsigned long addr, pte_t *ptep)
+{
+	pte_t pte = *ptep;
+
+	pte = ptep_xchg_direct(vma->vm_mm, addr, ptep, pte_mkold(pte));
+	return pte_young(pte);
+}
+
+#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
+static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
+					 unsigned long address, pte_t *ptep)
+{
+	return ptep_test_and_clear_young(vma, address, ptep);
+}
+
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
+				       unsigned long addr, pte_t *ptep)
+{
+	pte_t res;
+
+	res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
+	/* At this point the reference through the mapping is still present */
+	if (mm_is_protected(mm) && pte_present(res))
+		uv_convert_owned_from_secure(pte_val(res) & PAGE_MASK);
+	return res;
+}
+
+#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
+pte_t ptep_modify_prot_start(struct vm_area_struct *, unsigned long, pte_t *);
+void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long,
+			     pte_t *, pte_t, pte_t);
+
+#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
+static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
+				     unsigned long addr, pte_t *ptep)
+{
+	pte_t res;
+
+	res = ptep_xchg_direct(vma->vm_mm, addr, ptep, __pte(_PAGE_INVALID));
+	/* At this point the reference through the mapping is still present */
+	if (mm_is_protected(vma->vm_mm) && pte_present(res))
+		uv_convert_owned_from_secure(pte_val(res) & PAGE_MASK);
+	return res;
+}
+
+/*
+ * The batched pte unmap code uses ptep_get_and_clear_full to clear the
+ * ptes. Here an optimization is possible. tlb_gather_mmu flushes all
+ * tlbs of an mm if it can guarantee that the ptes of the mm_struct
+ * cannot be accessed while the batched unmap is running. In this case
+ * full==1 and a simple pte_clear is enough. See tlb.h.
+ */
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
+static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
+					    unsigned long addr,
+					    pte_t *ptep, int full)
+{
+	pte_t res;
+
+	if (full) {
+		res = *ptep;
+		set_pte(ptep, __pte(_PAGE_INVALID));
+	} else {
+		res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
+	}
+	/* Nothing to do */
+	if (!mm_is_protected(mm) || !pte_present(res))
+		return res;
+	/*
+	 * At this point the reference through the mapping is still present.
+	 * The notifier should have destroyed all protected vCPUs at this
+	 * point, so the destroy should be successful.
+	 */
+	if (full && !uv_destroy_owned_page(pte_val(res) & PAGE_MASK))
+		return res;
+	/*
+	 * If something went wrong and the page could not be destroyed, or
+	 * if this is not a mm teardown, the slower export is used as
+	 * fallback instead.
+	 */
+	uv_convert_owned_from_secure(pte_val(res) & PAGE_MASK);
+	return res;
+}
+
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+static inline void ptep_set_wrprotect(struct mm_struct *mm,
+				      unsigned long addr, pte_t *ptep)
+{
+	pte_t pte = *ptep;
+
+	if (pte_write(pte))
+		ptep_xchg_lazy(mm, addr, ptep, pte_wrprotect(pte));
+}
+
+/*
+ * Check if PTEs only differ in _PAGE_PROTECT HW bit, but also allow SW PTE
+ * bits in the comparison. Those might change e.g. because of dirty and young
+ * tracking.
+ */
+static inline int pte_allow_rdp(pte_t old, pte_t new)
+{
+	/*
+	 * Only allow changes from RO to RW
+	 */
+	if (!(pte_val(old) & _PAGE_PROTECT) || pte_val(new) & _PAGE_PROTECT)
+		return 0;
+
+	return (pte_val(old) & _PAGE_RDP_MASK) == (pte_val(new) & _PAGE_RDP_MASK);
+}
+
+static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma,
+						unsigned long address,
+						pte_t *ptep)
+{
+	/*
+	 * RDP might not have propagated the PTE protection reset to all CPUs,
+	 * so there could be spurious TLB protection faults.
+	 * NOTE: This will also be called when a racing pagetable update on
+	 * another thread already installed the correct PTE. Both cases cannot
+	 * really be distinguished.
+	 * Therefore, only do the local TLB flush when RDP can be used, and the
+	 * PTE does not have _PAGE_PROTECT set, to avoid unnecessary overhead.
+	 * A local RDP can be used to do the flush.
+	 */
+	if (MACHINE_HAS_RDP && !(pte_val(*ptep) & _PAGE_PROTECT))
+		__ptep_rdp(address, ptep, 0, 0, 1);
+}
+#define flush_tlb_fix_spurious_fault flush_tlb_fix_spurious_fault
+
+void ptep_reset_dat_prot(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+			 pte_t new);
+
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+static inline int ptep_set_access_flags(struct vm_area_struct *vma,
+					unsigned long addr, pte_t *ptep,
+					pte_t entry, int dirty)
+{
+	if (pte_same(*ptep, entry))
+		return 0;
+	if (MACHINE_HAS_RDP && !mm_has_pgste(vma->vm_mm) && pte_allow_rdp(*ptep, entry))
+		ptep_reset_dat_prot(vma->vm_mm, addr, ptep, entry);
+	else
+		ptep_xchg_direct(vma->vm_mm, addr, ptep, entry);
+	return 1;
+}
+
+/*
+ * Additional functions to handle KVM guest page tables
+ */
+void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,
+		     pte_t *ptep, pte_t entry);
+void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
+void ptep_notify(struct mm_struct *mm, unsigned long addr,
+		 pte_t *ptep, unsigned long bits);
+int ptep_force_prot(struct mm_struct *mm, unsigned long gaddr,
+		    pte_t *ptep, int prot, unsigned long bit);
+void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
+		     pte_t *ptep , int reset);
+void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
+int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr,
+		    pte_t *sptep, pte_t *tptep, pte_t pte);
+void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep);
+
+bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long address,
+			    pte_t *ptep);
+int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
+			  unsigned char key, bool nq);
+int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
+			       unsigned char key, unsigned char *oldkey,
+			       bool nq, bool mr, bool mc);
+int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr);
+int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
+			  unsigned char *key);
+
+int set_pgste_bits(struct mm_struct *mm, unsigned long addr,
+				unsigned long bits, unsigned long value);
+int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep);
+int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
+			unsigned long *oldpte, unsigned long *oldpgste);
+void gmap_pmdp_csp(struct mm_struct *mm, unsigned long vmaddr);
+void gmap_pmdp_invalidate(struct mm_struct *mm, unsigned long vmaddr);
+void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr);
+void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr);
+
+#define pgprot_writecombine	pgprot_writecombine
+pgprot_t pgprot_writecombine(pgprot_t prot);
+
+#define pgprot_writethrough	pgprot_writethrough
+pgprot_t pgprot_writethrough(pgprot_t prot);
+
+/*
+ * Set multiple PTEs to consecutive pages with a single call.  All PTEs
+ * are within the same folio, PMD and VMA.
+ */
+static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
+			      pte_t *ptep, pte_t entry, unsigned int nr)
+{
+	if (pte_present(entry))
+		entry = clear_pte_bit(entry, __pgprot(_PAGE_UNUSED));
+	if (mm_has_pgste(mm)) {
+		for (;;) {
+			ptep_set_pte_at(mm, addr, ptep, entry);
+			if (--nr == 0)
+				break;
+			ptep++;
+			entry = __pte(pte_val(entry) + PAGE_SIZE);
+			addr += PAGE_SIZE;
+		}
+	} else {
+		for (;;) {
+			set_pte(ptep, entry);
+			if (--nr == 0)
+				break;
+			ptep++;
+			entry = __pte(pte_val(entry) + PAGE_SIZE);
+		}
+	}
+}
+#define set_ptes set_ptes
+
+/*
+ * Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ */
+static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
+{
+	pte_t __pte;
+
+	__pte = __pte(physpage | pgprot_val(pgprot));
+	if (!MACHINE_HAS_NX)
+		__pte = clear_pte_bit(__pte, __pgprot(_PAGE_NOEXEC));
+	return pte_mkyoung(__pte);
+}
+
+static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
+{
+	unsigned long physpage = page_to_phys(page);
+	pte_t __pte = mk_pte_phys(physpage, pgprot);
+
+	if (pte_write(__pte) && PageDirty(page))
+		__pte = pte_mkdirty(__pte);
+	return __pte;
+}
+
+#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
+#define p4d_index(address) (((address) >> P4D_SHIFT) & (PTRS_PER_P4D-1))
+#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
+#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
+
+#define p4d_deref(pud) ((unsigned long)__va(p4d_val(pud) & _REGION_ENTRY_ORIGIN))
+#define pgd_deref(pgd) ((unsigned long)__va(pgd_val(pgd) & _REGION_ENTRY_ORIGIN))
+
+static inline unsigned long pmd_deref(pmd_t pmd)
+{
+	unsigned long origin_mask;
+
+	origin_mask = _SEGMENT_ENTRY_ORIGIN;
+	if (pmd_large(pmd))
+		origin_mask = _SEGMENT_ENTRY_ORIGIN_LARGE;
+	return (unsigned long)__va(pmd_val(pmd) & origin_mask);
+}
+
+static inline unsigned long pmd_pfn(pmd_t pmd)
+{
+	return __pa(pmd_deref(pmd)) >> PAGE_SHIFT;
+}
+
+static inline unsigned long pud_deref(pud_t pud)
+{
+	unsigned long origin_mask;
+
+	origin_mask = _REGION_ENTRY_ORIGIN;
+	if (pud_large(pud))
+		origin_mask = _REGION3_ENTRY_ORIGIN_LARGE;
+	return (unsigned long)__va(pud_val(pud) & origin_mask);
+}
+
+static inline unsigned long pud_pfn(pud_t pud)
+{
+	return __pa(pud_deref(pud)) >> PAGE_SHIFT;
+}
+
+/*
+ * The pgd_offset function *always* adds the index for the top-level
+ * region/segment table. This is done to get a sequence like the
+ * following to work:
+ *	pgdp = pgd_offset(current->mm, addr);
+ *	pgd = READ_ONCE(*pgdp);
+ *	p4dp = p4d_offset(&pgd, addr);
+ *	...
+ * The subsequent p4d_offset, pud_offset and pmd_offset functions
+ * only add an index if they dereferenced the pointer.
+ */
+static inline pgd_t *pgd_offset_raw(pgd_t *pgd, unsigned long address)
+{
+	unsigned long rste;
+	unsigned int shift;
+
+	/* Get the first entry of the top level table */
+	rste = pgd_val(*pgd);
+	/* Pick up the shift from the table type of the first entry */
+	shift = ((rste & _REGION_ENTRY_TYPE_MASK) >> 2) * 11 + 20;
+	return pgd + ((address >> shift) & (PTRS_PER_PGD - 1));
+}
+
+#define pgd_offset(mm, address) pgd_offset_raw(READ_ONCE((mm)->pgd), address)
+
+static inline p4d_t *p4d_offset_lockless(pgd_t *pgdp, pgd_t pgd, unsigned long address)
+{
+	if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R1)
+		return (p4d_t *) pgd_deref(pgd) + p4d_index(address);
+	return (p4d_t *) pgdp;
+}
+#define p4d_offset_lockless p4d_offset_lockless
+
+static inline p4d_t *p4d_offset(pgd_t *pgdp, unsigned long address)
+{
+	return p4d_offset_lockless(pgdp, *pgdp, address);
+}
+
+static inline pud_t *pud_offset_lockless(p4d_t *p4dp, p4d_t p4d, unsigned long address)
+{
+	if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R2)
+		return (pud_t *) p4d_deref(p4d) + pud_index(address);
+	return (pud_t *) p4dp;
+}
+#define pud_offset_lockless pud_offset_lockless
+
+static inline pud_t *pud_offset(p4d_t *p4dp, unsigned long address)
+{
+	return pud_offset_lockless(p4dp, *p4dp, address);
+}
+#define pud_offset pud_offset
+
+static inline pmd_t *pmd_offset_lockless(pud_t *pudp, pud_t pud, unsigned long address)
+{
+	if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R3)
+		return (pmd_t *) pud_deref(pud) + pmd_index(address);
+	return (pmd_t *) pudp;
+}
+#define pmd_offset_lockless pmd_offset_lockless
+
+static inline pmd_t *pmd_offset(pud_t *pudp, unsigned long address)
+{
+	return pmd_offset_lockless(pudp, *pudp, address);
+}
+#define pmd_offset pmd_offset
+
+static inline unsigned long pmd_page_vaddr(pmd_t pmd)
+{
+	return (unsigned long) pmd_deref(pmd);
+}
+
+static inline bool gup_fast_permitted(unsigned long start, unsigned long end)
+{
+	return end <= current->mm->context.asce_limit;
+}
+#define gup_fast_permitted gup_fast_permitted
+
+#define pfn_pte(pfn, pgprot)	mk_pte_phys(((pfn) << PAGE_SHIFT), (pgprot))
+#define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT)
+#define pte_page(x) pfn_to_page(pte_pfn(x))
+
+#define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd))
+#define pud_page(pud) pfn_to_page(pud_pfn(pud))
+#define p4d_page(p4d) pfn_to_page(p4d_pfn(p4d))
+#define pgd_page(pgd) pfn_to_page(pgd_pfn(pgd))
+
+static inline pmd_t pmd_wrprotect(pmd_t pmd)
+{
+	pmd = clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_WRITE));
+	return set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_PROTECT));
+}
+
+static inline pmd_t pmd_mkwrite_novma(pmd_t pmd)
+{
+	pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_WRITE));
+	if (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY)
+		pmd = clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_PROTECT));
+	return pmd;
+}
+
+static inline pmd_t pmd_mkclean(pmd_t pmd)
+{
+	pmd = clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_DIRTY));
+	return set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_PROTECT));
+}
+
+static inline pmd_t pmd_mkdirty(pmd_t pmd)
+{
+	pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_SOFT_DIRTY));
+	if (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE)
+		pmd = clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_PROTECT));
+	return pmd;
+}
+
+static inline pud_t pud_wrprotect(pud_t pud)
+{
+	pud = clear_pud_bit(pud, __pgprot(_REGION3_ENTRY_WRITE));
+	return set_pud_bit(pud, __pgprot(_REGION_ENTRY_PROTECT));
+}
+
+static inline pud_t pud_mkwrite(pud_t pud)
+{
+	pud = set_pud_bit(pud, __pgprot(_REGION3_ENTRY_WRITE));
+	if (pud_val(pud) & _REGION3_ENTRY_DIRTY)
+		pud = clear_pud_bit(pud, __pgprot(_REGION_ENTRY_PROTECT));
+	return pud;
+}
+
+static inline pud_t pud_mkclean(pud_t pud)
+{
+	pud = clear_pud_bit(pud, __pgprot(_REGION3_ENTRY_DIRTY));
+	return set_pud_bit(pud, __pgprot(_REGION_ENTRY_PROTECT));
+}
+
+static inline pud_t pud_mkdirty(pud_t pud)
+{
+	pud = set_pud_bit(pud, __pgprot(_REGION3_ENTRY_DIRTY | _REGION3_ENTRY_SOFT_DIRTY));
+	if (pud_val(pud) & _REGION3_ENTRY_WRITE)
+		pud = clear_pud_bit(pud, __pgprot(_REGION_ENTRY_PROTECT));
+	return pud;
+}
+
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
+static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot)
+{
+	/*
+	 * pgprot is PAGE_NONE, PAGE_RO, PAGE_RX, PAGE_RW or PAGE_RWX
+	 * (see __Pxxx / __Sxxx). Convert to segment table entry format.
+	 */
+	if (pgprot_val(pgprot) == pgprot_val(PAGE_NONE))
+		return pgprot_val(SEGMENT_NONE);
+	if (pgprot_val(pgprot) == pgprot_val(PAGE_RO))
+		return pgprot_val(SEGMENT_RO);
+	if (pgprot_val(pgprot) == pgprot_val(PAGE_RX))
+		return pgprot_val(SEGMENT_RX);
+	if (pgprot_val(pgprot) == pgprot_val(PAGE_RW))
+		return pgprot_val(SEGMENT_RW);
+	return pgprot_val(SEGMENT_RWX);
+}
+
+static inline pmd_t pmd_mkyoung(pmd_t pmd)
+{
+	pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_YOUNG));
+	if (pmd_val(pmd) & _SEGMENT_ENTRY_READ)
+		pmd = clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_INVALID));
+	return pmd;
+}
+
+static inline pmd_t pmd_mkold(pmd_t pmd)
+{
+	pmd = clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_YOUNG));
+	return set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_INVALID));
+}
+
+static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+	unsigned long mask;
+
+	mask  = _SEGMENT_ENTRY_ORIGIN_LARGE;
+	mask |= _SEGMENT_ENTRY_DIRTY;
+	mask |= _SEGMENT_ENTRY_YOUNG;
+	mask |=	_SEGMENT_ENTRY_LARGE;
+	mask |= _SEGMENT_ENTRY_SOFT_DIRTY;
+	pmd = __pmd(pmd_val(pmd) & mask);
+	pmd = set_pmd_bit(pmd, __pgprot(massage_pgprot_pmd(newprot)));
+	if (!(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY))
+		pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_PROTECT));
+	if (!(pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG))
+		pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_INVALID));
+	return pmd;
+}
+
+static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot)
+{
+	return __pmd(physpage + massage_pgprot_pmd(pgprot));
+}
+
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */
+
+static inline void __pmdp_csp(pmd_t *pmdp)
+{
+	csp((unsigned int *)pmdp + 1, pmd_val(*pmdp),
+	    pmd_val(*pmdp) | _SEGMENT_ENTRY_INVALID);
+}
+
+#define IDTE_GLOBAL	0
+#define IDTE_LOCAL	1
+
+#define IDTE_PTOA	0x0800
+#define IDTE_NODAT	0x1000
+#define IDTE_GUEST_ASCE	0x2000
+
+static __always_inline void __pmdp_idte(unsigned long addr, pmd_t *pmdp,
+					unsigned long opt, unsigned long asce,
+					int local)
+{
+	unsigned long sto;
+
+	sto = __pa(pmdp) - pmd_index(addr) * sizeof(pmd_t);
+	if (__builtin_constant_p(opt) && opt == 0) {
+		/* flush without guest asce */
+		asm volatile(
+			"	idte	%[r1],0,%[r2],%[m4]"
+			: "+m" (*pmdp)
+			: [r1] "a" (sto), [r2] "a" ((addr & HPAGE_MASK)),
+			  [m4] "i" (local)
+			: "cc" );
+	} else {
+		/* flush with guest asce */
+		asm volatile(
+			"	idte	%[r1],%[r3],%[r2],%[m4]"
+			: "+m" (*pmdp)
+			: [r1] "a" (sto), [r2] "a" ((addr & HPAGE_MASK) | opt),
+			  [r3] "a" (asce), [m4] "i" (local)
+			: "cc" );
+	}
+}
+
+static __always_inline void __pudp_idte(unsigned long addr, pud_t *pudp,
+					unsigned long opt, unsigned long asce,
+					int local)
+{
+	unsigned long r3o;
+
+	r3o = __pa(pudp) - pud_index(addr) * sizeof(pud_t);
+	r3o |= _ASCE_TYPE_REGION3;
+	if (__builtin_constant_p(opt) && opt == 0) {
+		/* flush without guest asce */
+		asm volatile(
+			"	idte	%[r1],0,%[r2],%[m4]"
+			: "+m" (*pudp)
+			: [r1] "a" (r3o), [r2] "a" ((addr & PUD_MASK)),
+			  [m4] "i" (local)
+			: "cc");
+	} else {
+		/* flush with guest asce */
+		asm volatile(
+			"	idte	%[r1],%[r3],%[r2],%[m4]"
+			: "+m" (*pudp)
+			: [r1] "a" (r3o), [r2] "a" ((addr & PUD_MASK) | opt),
+			  [r3] "a" (asce), [m4] "i" (local)
+			: "cc" );
+	}
+}
+
+pmd_t pmdp_xchg_direct(struct mm_struct *, unsigned long, pmd_t *, pmd_t);
+pmd_t pmdp_xchg_lazy(struct mm_struct *, unsigned long, pmd_t *, pmd_t);
+pud_t pudp_xchg_direct(struct mm_struct *, unsigned long, pud_t *, pud_t);
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+
+#define __HAVE_ARCH_PGTABLE_DEPOSIT
+void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+				pgtable_t pgtable);
+
+#define __HAVE_ARCH_PGTABLE_WITHDRAW
+pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
+
+#define  __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
+static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
+					unsigned long addr, pmd_t *pmdp,
+					pmd_t entry, int dirty)
+{
+	VM_BUG_ON(addr & ~HPAGE_MASK);
+
+	entry = pmd_mkyoung(entry);
+	if (dirty)
+		entry = pmd_mkdirty(entry);
+	if (pmd_val(*pmdp) == pmd_val(entry))
+		return 0;
+	pmdp_xchg_direct(vma->vm_mm, addr, pmdp, entry);
+	return 1;
+}
+
+#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
+static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+					    unsigned long addr, pmd_t *pmdp)
+{
+	pmd_t pmd = *pmdp;
+
+	pmd = pmdp_xchg_direct(vma->vm_mm, addr, pmdp, pmd_mkold(pmd));
+	return pmd_young(pmd);
+}
+
+#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
+static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
+					 unsigned long addr, pmd_t *pmdp)
+{
+	VM_BUG_ON(addr & ~HPAGE_MASK);
+	return pmdp_test_and_clear_young(vma, addr, pmdp);
+}
+
+static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+			      pmd_t *pmdp, pmd_t entry)
+{
+	if (!MACHINE_HAS_NX)
+		entry = clear_pmd_bit(entry, __pgprot(_SEGMENT_ENTRY_NOEXEC));
+	set_pmd(pmdp, entry);
+}
+
+static inline pmd_t pmd_mkhuge(pmd_t pmd)
+{
+	pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_LARGE));
+	pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_YOUNG));
+	return set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_PROTECT));
+}
+
+#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
+static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
+					    unsigned long addr, pmd_t *pmdp)
+{
+	return pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
+}
+
+#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
+static inline pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma,
+						 unsigned long addr,
+						 pmd_t *pmdp, int full)
+{
+	if (full) {
+		pmd_t pmd = *pmdp;
+		set_pmd(pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
+		return pmd;
+	}
+	return pmdp_xchg_lazy(vma->vm_mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
+}
+
+#define __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH
+static inline pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma,
+					  unsigned long addr, pmd_t *pmdp)
+{
+	return pmdp_huge_get_and_clear(vma->vm_mm, addr, pmdp);
+}
+
+#define __HAVE_ARCH_PMDP_INVALIDATE
+static inline pmd_t pmdp_invalidate(struct vm_area_struct *vma,
+				   unsigned long addr, pmd_t *pmdp)
+{
+	pmd_t pmd = __pmd(pmd_val(*pmdp) | _SEGMENT_ENTRY_INVALID);
+
+	return pmdp_xchg_direct(vma->vm_mm, addr, pmdp, pmd);
+}
+
+#define __HAVE_ARCH_PMDP_SET_WRPROTECT
+static inline void pmdp_set_wrprotect(struct mm_struct *mm,
+				      unsigned long addr, pmd_t *pmdp)
+{
+	pmd_t pmd = *pmdp;
+
+	if (pmd_write(pmd))
+		pmd = pmdp_xchg_lazy(mm, addr, pmdp, pmd_wrprotect(pmd));
+}
+
+static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
+					unsigned long address,
+					pmd_t *pmdp)
+{
+	return pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
+}
+#define pmdp_collapse_flush pmdp_collapse_flush
+
+#define pfn_pmd(pfn, pgprot)	mk_pmd_phys(((pfn) << PAGE_SHIFT), (pgprot))
+#define mk_pmd(page, pgprot)	pfn_pmd(page_to_pfn(page), (pgprot))
+
+static inline int pmd_trans_huge(pmd_t pmd)
+{
+	return pmd_val(pmd) & _SEGMENT_ENTRY_LARGE;
+}
+
+#define has_transparent_hugepage has_transparent_hugepage
+static inline int has_transparent_hugepage(void)
+{
+	return MACHINE_HAS_EDAT1 ? 1 : 0;
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+/*
+ * 64 bit swap entry format:
+ * A page-table entry has some bits we have to treat in a special way.
+ * Bits 54 and 63 are used to indicate the page type. Bit 53 marks the pte
+ * as invalid.
+ * A swap pte is indicated by bit pattern (pte & 0x201) == 0x200
+ * |			  offset			|E11XX|type |S0|
+ * |0000000000111111111122222222223333333333444444444455|55555|55566|66|
+ * |0123456789012345678901234567890123456789012345678901|23456|78901|23|
+ *
+ * Bits 0-51 store the offset.
+ * Bit 52 (E) is used to remember PG_anon_exclusive.
+ * Bits 57-61 store the type.
+ * Bit 62 (S) is used for softdirty tracking.
+ * Bits 55 and 56 (X) are unused.
+ */
+
+#define __SWP_OFFSET_MASK	((1UL << 52) - 1)
+#define __SWP_OFFSET_SHIFT	12
+#define __SWP_TYPE_MASK		((1UL << 5) - 1)
+#define __SWP_TYPE_SHIFT	2
+
+static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
+{
+	unsigned long pteval;
+
+	pteval = _PAGE_INVALID | _PAGE_PROTECT;
+	pteval |= (offset & __SWP_OFFSET_MASK) << __SWP_OFFSET_SHIFT;
+	pteval |= (type & __SWP_TYPE_MASK) << __SWP_TYPE_SHIFT;
+	return __pte(pteval);
+}
+
+static inline unsigned long __swp_type(swp_entry_t entry)
+{
+	return (entry.val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK;
+}
+
+static inline unsigned long __swp_offset(swp_entry_t entry)
+{
+	return (entry.val >> __SWP_OFFSET_SHIFT) & __SWP_OFFSET_MASK;
+}
+
+static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset)
+{
+	return (swp_entry_t) { pte_val(mk_swap_pte(type, offset)) };
+}
+
+#define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val(pte) })
+#define __swp_entry_to_pte(x)	((pte_t) { (x).val })
+
+extern int vmem_add_mapping(unsigned long start, unsigned long size);
+extern void vmem_remove_mapping(unsigned long start, unsigned long size);
+extern int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc);
+extern int vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot);
+extern void vmem_unmap_4k_page(unsigned long addr);
+extern pte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc);
+extern int s390_enable_sie(void);
+extern int s390_enable_skey(void);
+extern void s390_reset_cmma(struct mm_struct *mm);
+
+/* s390 has a private copy of get unmapped area to deal with cache synonyms */
+#define HAVE_ARCH_UNMAPPED_AREA
+#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
+
+#define pmd_pgtable(pmd) \
+	((pgtable_t)__va(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE))
+
+#endif /* _S390_PAGE_H */
diff --git a/arch/s390/include/asm/physmem_info.h b/arch/s390/include/asm/physmem_info.h
new file mode 100644
index 0000000000..9e41a74fce
--- /dev/null
+++ b/arch/s390/include/asm/physmem_info.h
@@ -0,0 +1,172 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_MEM_DETECT_H
+#define _ASM_S390_MEM_DETECT_H
+
+#include <linux/types.h>
+#include <asm/page.h>
+
+enum physmem_info_source {
+	MEM_DETECT_NONE = 0,
+	MEM_DETECT_SCLP_STOR_INFO,
+	MEM_DETECT_DIAG260,
+	MEM_DETECT_SCLP_READ_INFO,
+	MEM_DETECT_BIN_SEARCH
+};
+
+struct physmem_range {
+	u64 start;
+	u64 end;
+};
+
+enum reserved_range_type {
+	RR_DECOMPRESSOR,
+	RR_INITRD,
+	RR_VMLINUX,
+	RR_AMODE31,
+	RR_IPLREPORT,
+	RR_CERT_COMP_LIST,
+	RR_MEM_DETECT_EXTENDED,
+	RR_VMEM,
+	RR_MAX
+};
+
+struct reserved_range {
+	unsigned long start;
+	unsigned long end;
+	struct reserved_range *chain;
+};
+
+/*
+ * Storage element id is defined as 1 byte (up to 256 storage elements).
+ * In practise only storage element id 0 and 1 are used).
+ * According to architecture one storage element could have as much as
+ * 1020 subincrements. 255 physmem_ranges are embedded in physmem_info.
+ * If more physmem_ranges are required, a block of memory from already
+ * known physmem_range is taken (online_extended points to it).
+ */
+#define MEM_INLINED_ENTRIES 255 /* (PAGE_SIZE - 16) / 16 */
+
+struct physmem_info {
+	u32 range_count;
+	u8 info_source;
+	unsigned long usable;
+	struct reserved_range reserved[RR_MAX];
+	struct physmem_range online[MEM_INLINED_ENTRIES];
+	struct physmem_range *online_extended;
+};
+
+extern struct physmem_info physmem_info;
+
+void add_physmem_online_range(u64 start, u64 end);
+
+static inline int __get_physmem_range(u32 n, unsigned long *start,
+				      unsigned long *end, bool respect_usable_limit)
+{
+	if (n >= physmem_info.range_count) {
+		*start = 0;
+		*end = 0;
+		return -1;
+	}
+
+	if (n < MEM_INLINED_ENTRIES) {
+		*start = (unsigned long)physmem_info.online[n].start;
+		*end = (unsigned long)physmem_info.online[n].end;
+	} else {
+		*start = (unsigned long)physmem_info.online_extended[n - MEM_INLINED_ENTRIES].start;
+		*end = (unsigned long)physmem_info.online_extended[n - MEM_INLINED_ENTRIES].end;
+	}
+
+	if (respect_usable_limit && physmem_info.usable) {
+		if (*start >= physmem_info.usable)
+			return -1;
+		if (*end > physmem_info.usable)
+			*end = physmem_info.usable;
+	}
+	return 0;
+}
+
+/**
+ * for_each_physmem_usable_range - early online memory range iterator
+ * @i: an integer used as loop variable
+ * @p_start: ptr to unsigned long for start address of the range
+ * @p_end: ptr to unsigned long for end address of the range
+ *
+ * Walks over detected online memory ranges below usable limit.
+ */
+#define for_each_physmem_usable_range(i, p_start, p_end)		\
+	for (i = 0; !__get_physmem_range(i, p_start, p_end, true); i++)
+
+/* Walks over all detected online memory ranges disregarding usable limit. */
+#define for_each_physmem_online_range(i, p_start, p_end)		\
+	for (i = 0; !__get_physmem_range(i, p_start, p_end, false); i++)
+
+static inline const char *get_physmem_info_source(void)
+{
+	switch (physmem_info.info_source) {
+	case MEM_DETECT_SCLP_STOR_INFO:
+		return "sclp storage info";
+	case MEM_DETECT_DIAG260:
+		return "diag260";
+	case MEM_DETECT_SCLP_READ_INFO:
+		return "sclp read info";
+	case MEM_DETECT_BIN_SEARCH:
+		return "binary search";
+	}
+	return "none";
+}
+
+#define RR_TYPE_NAME(t) case RR_ ## t: return #t
+static inline const char *get_rr_type_name(enum reserved_range_type t)
+{
+	switch (t) {
+	RR_TYPE_NAME(DECOMPRESSOR);
+	RR_TYPE_NAME(INITRD);
+	RR_TYPE_NAME(VMLINUX);
+	RR_TYPE_NAME(AMODE31);
+	RR_TYPE_NAME(IPLREPORT);
+	RR_TYPE_NAME(CERT_COMP_LIST);
+	RR_TYPE_NAME(MEM_DETECT_EXTENDED);
+	RR_TYPE_NAME(VMEM);
+	default:
+		return "UNKNOWN";
+	}
+}
+
+#define for_each_physmem_reserved_type_range(t, range, p_start, p_end)				\
+	for (range = &physmem_info.reserved[t], *p_start = range->start, *p_end = range->end;	\
+	     range && range->end; range = range->chain ? __va(range->chain) : NULL,		\
+	     *p_start = range ? range->start : 0, *p_end = range ? range->end : 0)
+
+static inline struct reserved_range *__physmem_reserved_next(enum reserved_range_type *t,
+							     struct reserved_range *range)
+{
+	if (!range) {
+		range = &physmem_info.reserved[*t];
+		if (range->end)
+			return range;
+	}
+	if (range->chain)
+		return __va(range->chain);
+	while (++*t < RR_MAX) {
+		range = &physmem_info.reserved[*t];
+		if (range->end)
+			return range;
+	}
+	return NULL;
+}
+
+#define for_each_physmem_reserved_range(t, range, p_start, p_end)			\
+	for (t = 0, range = __physmem_reserved_next(&t, NULL),			\
+	    *p_start = range ? range->start : 0, *p_end = range ? range->end : 0;	\
+	     range; range = __physmem_reserved_next(&t, range),			\
+	    *p_start = range ? range->start : 0, *p_end = range ? range->end : 0)
+
+static inline unsigned long get_physmem_reserved(enum reserved_range_type type,
+						 unsigned long *addr, unsigned long *size)
+{
+	*addr = physmem_info.reserved[type].start;
+	*size = physmem_info.reserved[type].end - physmem_info.reserved[type].start;
+	return *size;
+}
+
+#endif
diff --git a/arch/s390/include/asm/pkey.h b/arch/s390/include/asm/pkey.h
new file mode 100644
index 0000000000..47d80a7451
--- /dev/null
+++ b/arch/s390/include/asm/pkey.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Kernelspace interface to the pkey device driver
+ *
+ * Copyright IBM Corp. 2016, 2023
+ *
+ * Author: Harald Freudenberger <freude@de.ibm.com>
+ *
+ */
+
+#ifndef _KAPI_PKEY_H
+#define _KAPI_PKEY_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+#include <uapi/asm/pkey.h>
+
+/*
+ * In-kernel API: Transform an key blob (of any type) into a protected key.
+ * @param key pointer to a buffer containing the key blob
+ * @param keylen size of the key blob in bytes
+ * @param protkey pointer to buffer receiving the protected key
+ * @return 0 on success, negative errno value on failure
+ */
+int pkey_keyblob2pkey(const u8 *key, u32 keylen,
+		      u8 *protkey, u32 *protkeylen, u32 *protkeytype);
+
+#endif /* _KAPI_PKEY_H */
diff --git a/arch/s390/include/asm/pnet.h b/arch/s390/include/asm/pnet.h
new file mode 100644
index 0000000000..5739276b45
--- /dev/null
+++ b/arch/s390/include/asm/pnet.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  IBM System z PNET ID Support
+ *
+ *    Copyright IBM Corp. 2018
+ */
+
+#ifndef _ASM_S390_PNET_H
+#define _ASM_S390_PNET_H
+
+#include <linux/device.h>
+#include <linux/types.h>
+
+int pnet_id_by_dev_port(struct device *dev, unsigned short port, u8 *pnetid);
+#endif /* _ASM_S390_PNET_H */
diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h
new file mode 100644
index 0000000000..bf15da0fed
--- /dev/null
+++ b/arch/s390/include/asm/preempt.h
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_PREEMPT_H
+#define __ASM_PREEMPT_H
+
+#include <asm/current.h>
+#include <linux/thread_info.h>
+#include <asm/atomic_ops.h>
+
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+
+/* We use the MSB mostly because its available */
+#define PREEMPT_NEED_RESCHED	0x80000000
+#define PREEMPT_ENABLED	(0 + PREEMPT_NEED_RESCHED)
+
+static inline int preempt_count(void)
+{
+	return READ_ONCE(S390_lowcore.preempt_count) & ~PREEMPT_NEED_RESCHED;
+}
+
+static inline void preempt_count_set(int pc)
+{
+	int old, new;
+
+	do {
+		old = READ_ONCE(S390_lowcore.preempt_count);
+		new = (old & PREEMPT_NEED_RESCHED) |
+			(pc & ~PREEMPT_NEED_RESCHED);
+	} while (__atomic_cmpxchg(&S390_lowcore.preempt_count,
+				  old, new) != old);
+}
+
+static inline void set_preempt_need_resched(void)
+{
+	__atomic_and(~PREEMPT_NEED_RESCHED, &S390_lowcore.preempt_count);
+}
+
+static inline void clear_preempt_need_resched(void)
+{
+	__atomic_or(PREEMPT_NEED_RESCHED, &S390_lowcore.preempt_count);
+}
+
+static inline bool test_preempt_need_resched(void)
+{
+	return !(READ_ONCE(S390_lowcore.preempt_count) & PREEMPT_NEED_RESCHED);
+}
+
+static inline void __preempt_count_add(int val)
+{
+	/*
+	 * With some obscure config options and CONFIG_PROFILE_ALL_BRANCHES
+	 * enabled, gcc 12 fails to handle __builtin_constant_p().
+	 */
+	if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES)) {
+		if (__builtin_constant_p(val) && (val >= -128) && (val <= 127)) {
+			__atomic_add_const(val, &S390_lowcore.preempt_count);
+			return;
+		}
+	}
+	__atomic_add(val, &S390_lowcore.preempt_count);
+}
+
+static inline void __preempt_count_sub(int val)
+{
+	__preempt_count_add(-val);
+}
+
+static inline bool __preempt_count_dec_and_test(void)
+{
+	return __atomic_add(-1, &S390_lowcore.preempt_count) == 1;
+}
+
+static inline bool should_resched(int preempt_offset)
+{
+	return unlikely(READ_ONCE(S390_lowcore.preempt_count) ==
+			preempt_offset);
+}
+
+#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+#define PREEMPT_ENABLED	(0)
+
+static inline int preempt_count(void)
+{
+	return READ_ONCE(S390_lowcore.preempt_count);
+}
+
+static inline void preempt_count_set(int pc)
+{
+	S390_lowcore.preempt_count = pc;
+}
+
+static inline void set_preempt_need_resched(void)
+{
+}
+
+static inline void clear_preempt_need_resched(void)
+{
+}
+
+static inline bool test_preempt_need_resched(void)
+{
+	return false;
+}
+
+static inline void __preempt_count_add(int val)
+{
+	S390_lowcore.preempt_count += val;
+}
+
+static inline void __preempt_count_sub(int val)
+{
+	S390_lowcore.preempt_count -= val;
+}
+
+static inline bool __preempt_count_dec_and_test(void)
+{
+	return !--S390_lowcore.preempt_count && tif_need_resched();
+}
+
+static inline bool should_resched(int preempt_offset)
+{
+	return unlikely(preempt_count() == preempt_offset &&
+			tif_need_resched());
+}
+
+#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+#define init_task_preempt_count(p)	do { } while (0)
+/* Deferred to CPU bringup time */
+#define init_idle_preempt_count(p, cpu)	do { } while (0)
+
+#ifdef CONFIG_PREEMPTION
+extern void preempt_schedule(void);
+#define __preempt_schedule() preempt_schedule()
+extern void preempt_schedule_notrace(void);
+#define __preempt_schedule_notrace() preempt_schedule_notrace()
+#endif /* CONFIG_PREEMPTION */
+
+#endif /* __ASM_PREEMPT_H */
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
new file mode 100644
index 0000000000..dc17896a00
--- /dev/null
+++ b/arch/s390/include/asm/processor.h
@@ -0,0 +1,380 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999
+ *    Author(s): Hartmut Penner (hp@de.ibm.com),
+ *               Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ *  Derived from "include/asm-i386/processor.h"
+ *    Copyright (C) 1994, Linus Torvalds
+ */
+
+#ifndef __ASM_S390_PROCESSOR_H
+#define __ASM_S390_PROCESSOR_H
+
+#include <linux/bits.h>
+
+#define CIF_NOHZ_DELAY		2	/* delay HZ disable for a tick */
+#define CIF_FPU			3	/* restore FPU registers */
+#define CIF_ENABLED_WAIT	5	/* in enabled wait state */
+#define CIF_MCCK_GUEST		6	/* machine check happening in guest */
+#define CIF_DEDICATED_CPU	7	/* this CPU is dedicated */
+
+#define _CIF_NOHZ_DELAY		BIT(CIF_NOHZ_DELAY)
+#define _CIF_FPU		BIT(CIF_FPU)
+#define _CIF_ENABLED_WAIT	BIT(CIF_ENABLED_WAIT)
+#define _CIF_MCCK_GUEST		BIT(CIF_MCCK_GUEST)
+#define _CIF_DEDICATED_CPU	BIT(CIF_DEDICATED_CPU)
+
+#define RESTART_FLAG_CTLREGS	_AC(1 << 0, U)
+
+#ifndef __ASSEMBLY__
+
+#include <linux/cpumask.h>
+#include <linux/linkage.h>
+#include <linux/irqflags.h>
+#include <asm/cpu.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/setup.h>
+#include <asm/runtime_instr.h>
+#include <asm/fpu/types.h>
+#include <asm/fpu/internal.h>
+#include <asm/irqflags.h>
+
+typedef long (*sys_call_ptr_t)(struct pt_regs *regs);
+
+static __always_inline void set_cpu_flag(int flag)
+{
+	S390_lowcore.cpu_flags |= (1UL << flag);
+}
+
+static __always_inline void clear_cpu_flag(int flag)
+{
+	S390_lowcore.cpu_flags &= ~(1UL << flag);
+}
+
+static __always_inline bool test_cpu_flag(int flag)
+{
+	return S390_lowcore.cpu_flags & (1UL << flag);
+}
+
+static __always_inline bool test_and_set_cpu_flag(int flag)
+{
+	if (test_cpu_flag(flag))
+		return true;
+	set_cpu_flag(flag);
+	return false;
+}
+
+static __always_inline bool test_and_clear_cpu_flag(int flag)
+{
+	if (!test_cpu_flag(flag))
+		return false;
+	clear_cpu_flag(flag);
+	return true;
+}
+
+/*
+ * Test CIF flag of another CPU. The caller needs to ensure that
+ * CPU hotplug can not happen, e.g. by disabling preemption.
+ */
+static __always_inline bool test_cpu_flag_of(int flag, int cpu)
+{
+	struct lowcore *lc = lowcore_ptr[cpu];
+
+	return lc->cpu_flags & (1UL << flag);
+}
+
+#define arch_needs_cpu() test_cpu_flag(CIF_NOHZ_DELAY)
+
+static inline void get_cpu_id(struct cpuid *ptr)
+{
+	asm volatile("stidp %0" : "=Q" (*ptr));
+}
+
+void s390_adjust_jiffies(void);
+void s390_update_cpu_mhz(void);
+void cpu_detect_mhz_feature(void);
+
+extern const struct seq_operations cpuinfo_op;
+extern void execve_tail(void);
+unsigned long vdso_size(void);
+
+/*
+ * User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit.
+ */
+
+#define TASK_SIZE		(test_thread_flag(TIF_31BIT) ? \
+					_REGION3_SIZE : TASK_SIZE_MAX)
+#define TASK_UNMAPPED_BASE	(test_thread_flag(TIF_31BIT) ? \
+					(_REGION3_SIZE >> 1) : (_REGION2_SIZE >> 1))
+#define TASK_SIZE_MAX		(-PAGE_SIZE)
+
+#define VDSO_BASE		(STACK_TOP + PAGE_SIZE)
+#define VDSO_LIMIT		(test_thread_flag(TIF_31BIT) ? _REGION3_SIZE : _REGION2_SIZE)
+#define STACK_TOP		(VDSO_LIMIT - vdso_size() - PAGE_SIZE)
+#define STACK_TOP_MAX		(_REGION2_SIZE - vdso_size() - PAGE_SIZE)
+
+#define HAVE_ARCH_PICK_MMAP_LAYOUT
+
+#define __stackleak_poison __stackleak_poison
+static __always_inline void __stackleak_poison(unsigned long erase_low,
+					       unsigned long erase_high,
+					       unsigned long poison)
+{
+	unsigned long tmp, count;
+
+	count = erase_high - erase_low;
+	if (!count)
+		return;
+	asm volatile(
+		"	cghi	%[count],8\n"
+		"	je	2f\n"
+		"	aghi	%[count],-(8+1)\n"
+		"	srlg	%[tmp],%[count],8\n"
+		"	ltgr	%[tmp],%[tmp]\n"
+		"	jz	1f\n"
+		"0:	stg	%[poison],0(%[addr])\n"
+		"	mvc	8(256-8,%[addr]),0(%[addr])\n"
+		"	la	%[addr],256(%[addr])\n"
+		"	brctg	%[tmp],0b\n"
+		"1:	stg	%[poison],0(%[addr])\n"
+		"	larl	%[tmp],3f\n"
+		"	ex	%[count],0(%[tmp])\n"
+		"	j	4f\n"
+		"2:	stg	%[poison],0(%[addr])\n"
+		"	j	4f\n"
+		"3:	mvc	8(1,%[addr]),0(%[addr])\n"
+		"4:\n"
+		: [addr] "+&a" (erase_low), [count] "+&d" (count), [tmp] "=&a" (tmp)
+		: [poison] "d" (poison)
+		: "memory", "cc"
+		);
+}
+
+/*
+ * Thread structure
+ */
+struct thread_struct {
+	unsigned int  acrs[NUM_ACRS];
+	unsigned long ksp;			/* kernel stack pointer */
+	unsigned long user_timer;		/* task cputime in user space */
+	unsigned long guest_timer;		/* task cputime in kvm guest */
+	unsigned long system_timer;		/* task cputime in kernel space */
+	unsigned long hardirq_timer;		/* task cputime in hardirq context */
+	unsigned long softirq_timer;		/* task cputime in softirq context */
+	const sys_call_ptr_t *sys_call_table;	/* system call table address */
+	unsigned long gmap_addr;		/* address of last gmap fault. */
+	unsigned int gmap_write_flag;		/* gmap fault write indication */
+	unsigned int gmap_int_code;		/* int code of last gmap fault */
+	unsigned int gmap_pfault;		/* signal of a pending guest pfault */
+
+	/* Per-thread information related to debugging */
+	struct per_regs per_user;		/* User specified PER registers */
+	struct per_event per_event;		/* Cause of the last PER trap */
+	unsigned long per_flags;		/* Flags to control debug behavior */
+	unsigned int system_call;		/* system call number in signal */
+	unsigned long last_break;		/* last breaking-event-address. */
+	/* pfault_wait is used to block the process on a pfault event */
+	unsigned long pfault_wait;
+	struct list_head list;
+	/* cpu runtime instrumentation */
+	struct runtime_instr_cb *ri_cb;
+	struct gs_cb *gs_cb;			/* Current guarded storage cb */
+	struct gs_cb *gs_bc_cb;			/* Broadcast guarded storage cb */
+	struct pgm_tdb trap_tdb;		/* Transaction abort diagnose block */
+	/*
+	 * Warning: 'fpu' is dynamically-sized. It *MUST* be at
+	 * the end.
+	 */
+	struct fpu fpu;			/* FP and VX register save area */
+};
+
+/* Flag to disable transactions. */
+#define PER_FLAG_NO_TE			1UL
+/* Flag to enable random transaction aborts. */
+#define PER_FLAG_TE_ABORT_RAND		2UL
+/* Flag to specify random transaction abort mode:
+ * - abort each transaction at a random instruction before TEND if set.
+ * - abort random transactions at a random instruction if cleared.
+ */
+#define PER_FLAG_TE_ABORT_RAND_TEND	4UL
+
+typedef struct thread_struct thread_struct;
+
+#define ARCH_MIN_TASKALIGN	8
+
+#define INIT_THREAD {							\
+	.ksp = sizeof(init_stack) + (unsigned long) &init_stack,	\
+	.fpu.regs = (void *) init_task.thread.fpu.fprs,			\
+	.last_break = 1,						\
+}
+
+/*
+ * Do necessary setup to start up a new thread.
+ */
+#define start_thread(regs, new_psw, new_stackp) do {			\
+	regs->psw.mask	= PSW_USER_BITS | PSW_MASK_EA | PSW_MASK_BA;	\
+	regs->psw.addr	= new_psw;					\
+	regs->gprs[15]	= new_stackp;					\
+	execve_tail();							\
+} while (0)
+
+#define start_thread31(regs, new_psw, new_stackp) do {			\
+	regs->psw.mask	= PSW_USER_BITS | PSW_MASK_BA;			\
+	regs->psw.addr	= new_psw;					\
+	regs->gprs[15]	= new_stackp;					\
+	execve_tail();							\
+} while (0)
+
+/* Forward declaration, a strange C thing */
+struct task_struct;
+struct mm_struct;
+struct seq_file;
+struct pt_regs;
+
+void show_registers(struct pt_regs *regs);
+void show_cacheinfo(struct seq_file *m);
+
+/* Free guarded storage control block */
+void guarded_storage_release(struct task_struct *tsk);
+void gs_load_bc_cb(struct pt_regs *regs);
+
+unsigned long __get_wchan(struct task_struct *p);
+#define task_pt_regs(tsk) ((struct pt_regs *) \
+        (task_stack_page(tsk) + THREAD_SIZE) - 1)
+#define KSTK_EIP(tsk)	(task_pt_regs(tsk)->psw.addr)
+#define KSTK_ESP(tsk)	(task_pt_regs(tsk)->gprs[15])
+
+/* Has task runtime instrumentation enabled ? */
+#define is_ri_task(tsk) (!!(tsk)->thread.ri_cb)
+
+/* avoid using global register due to gcc bug in versions < 8.4 */
+#define current_stack_pointer (__current_stack_pointer())
+
+static __always_inline unsigned long __current_stack_pointer(void)
+{
+	unsigned long sp;
+
+	asm volatile("lgr %0,15" : "=d" (sp));
+	return sp;
+}
+
+static __always_inline bool on_thread_stack(void)
+{
+	unsigned long ksp = S390_lowcore.kernel_stack;
+
+	return !((ksp ^ current_stack_pointer) & ~(THREAD_SIZE - 1));
+}
+
+static __always_inline unsigned short stap(void)
+{
+	unsigned short cpu_address;
+
+	asm volatile("stap %0" : "=Q" (cpu_address));
+	return cpu_address;
+}
+
+#define cpu_relax() barrier()
+
+#define ECAG_CACHE_ATTRIBUTE	0
+#define ECAG_CPU_ATTRIBUTE	1
+
+static inline unsigned long __ecag(unsigned int asi, unsigned char parm)
+{
+	unsigned long val;
+
+	asm volatile("ecag %0,0,0(%1)" : "=d" (val) : "a" (asi << 8 | parm));
+	return val;
+}
+
+static inline void psw_set_key(unsigned int key)
+{
+	asm volatile("spka 0(%0)" : : "d" (key));
+}
+
+/*
+ * Set PSW to specified value.
+ */
+static inline void __load_psw(psw_t psw)
+{
+	asm volatile("lpswe %0" : : "Q" (psw) : "cc");
+}
+
+/*
+ * Set PSW mask to specified value, while leaving the
+ * PSW addr pointing to the next instruction.
+ */
+static __always_inline void __load_psw_mask(unsigned long mask)
+{
+	unsigned long addr;
+	psw_t psw;
+
+	psw.mask = mask;
+
+	asm volatile(
+		"	larl	%0,1f\n"
+		"	stg	%0,%1\n"
+		"	lpswe	%2\n"
+		"1:"
+		: "=&d" (addr), "=Q" (psw.addr) : "Q" (psw) : "memory", "cc");
+}
+
+/*
+ * Extract current PSW mask
+ */
+static inline unsigned long __extract_psw(void)
+{
+	unsigned int reg1, reg2;
+
+	asm volatile("epsw %0,%1" : "=d" (reg1), "=a" (reg2));
+	return (((unsigned long) reg1) << 32) | ((unsigned long) reg2);
+}
+
+static inline void local_mcck_enable(void)
+{
+	__load_psw_mask(__extract_psw() | PSW_MASK_MCHECK);
+}
+
+static inline void local_mcck_disable(void)
+{
+	__load_psw_mask(__extract_psw() & ~PSW_MASK_MCHECK);
+}
+
+/*
+ * Rewind PSW instruction address by specified number of bytes.
+ */
+static inline unsigned long __rewind_psw(psw_t psw, unsigned long ilc)
+{
+	unsigned long mask;
+
+	mask = (psw.mask & PSW_MASK_EA) ? -1UL :
+	       (psw.mask & PSW_MASK_BA) ? (1UL << 31) - 1 :
+					  (1UL << 24) - 1;
+	return (psw.addr - ilc) & mask;
+}
+
+/*
+ * Function to drop a processor into disabled wait state
+ */
+static __always_inline void __noreturn disabled_wait(void)
+{
+	psw_t psw;
+
+	psw.mask = PSW_MASK_BASE | PSW_MASK_WAIT | PSW_MASK_BA | PSW_MASK_EA;
+	psw.addr = _THIS_IP_;
+	__load_psw(psw);
+	while (1);
+}
+
+#define ARCH_LOW_ADDRESS_LIMIT	0x7fffffffUL
+
+static __always_inline bool regs_irqs_disabled(struct pt_regs *regs)
+{
+	return arch_irqs_disabled_flags(regs->psw.mask);
+}
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_S390_PROCESSOR_H */
diff --git a/arch/s390/include/asm/ptdump.h b/arch/s390/include/asm/ptdump.h
new file mode 100644
index 0000000000..f960b28966
--- /dev/null
+++ b/arch/s390/include/asm/ptdump.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_S390_PTDUMP_H
+#define _ASM_S390_PTDUMP_H
+
+void ptdump_check_wx(void);
+
+static inline void debug_checkwx(void)
+{
+	if (IS_ENABLED(CONFIG_DEBUG_WX))
+		ptdump_check_wx();
+}
+
+#endif /* _ASM_S390_PTDUMP_H */
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
new file mode 100644
index 0000000000..d28bf8fb27
--- /dev/null
+++ b/arch/s390/include/asm/ptrace.h
@@ -0,0 +1,263 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999, 2000
+ *    Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
+ */
+#ifndef _S390_PTRACE_H
+#define _S390_PTRACE_H
+
+#include <linux/bits.h>
+#include <uapi/asm/ptrace.h>
+#include <asm/tpi.h>
+
+#define PIF_SYSCALL			0	/* inside a system call */
+#define PIF_EXECVE_PGSTE_RESTART	1	/* restart execve for PGSTE binaries */
+#define PIF_SYSCALL_RET_SET		2	/* return value was set via ptrace */
+#define PIF_GUEST_FAULT			3	/* indicates program check in sie64a */
+#define PIF_FTRACE_FULL_REGS		4	/* all register contents valid (ftrace) */
+
+#define _PIF_SYSCALL			BIT(PIF_SYSCALL)
+#define _PIF_EXECVE_PGSTE_RESTART	BIT(PIF_EXECVE_PGSTE_RESTART)
+#define _PIF_SYSCALL_RET_SET		BIT(PIF_SYSCALL_RET_SET)
+#define _PIF_GUEST_FAULT		BIT(PIF_GUEST_FAULT)
+#define _PIF_FTRACE_FULL_REGS		BIT(PIF_FTRACE_FULL_REGS)
+
+#define PSW32_MASK_PER		_AC(0x40000000, UL)
+#define PSW32_MASK_DAT		_AC(0x04000000, UL)
+#define PSW32_MASK_IO		_AC(0x02000000, UL)
+#define PSW32_MASK_EXT		_AC(0x01000000, UL)
+#define PSW32_MASK_KEY		_AC(0x00F00000, UL)
+#define PSW32_MASK_BASE		_AC(0x00080000, UL)	/* Always one */
+#define PSW32_MASK_MCHECK	_AC(0x00040000, UL)
+#define PSW32_MASK_WAIT		_AC(0x00020000, UL)
+#define PSW32_MASK_PSTATE	_AC(0x00010000, UL)
+#define PSW32_MASK_ASC		_AC(0x0000C000, UL)
+#define PSW32_MASK_CC		_AC(0x00003000, UL)
+#define PSW32_MASK_PM		_AC(0x00000f00, UL)
+#define PSW32_MASK_RI		_AC(0x00000080, UL)
+
+#define PSW32_ADDR_AMODE	_AC(0x80000000, UL)
+#define PSW32_ADDR_INSN		_AC(0x7FFFFFFF, UL)
+
+#define PSW32_DEFAULT_KEY	((PAGE_DEFAULT_ACC) << 20)
+
+#define PSW32_ASC_PRIMARY	_AC(0x00000000, UL)
+#define PSW32_ASC_ACCREG	_AC(0x00004000, UL)
+#define PSW32_ASC_SECONDARY	_AC(0x00008000, UL)
+#define PSW32_ASC_HOME		_AC(0x0000C000, UL)
+
+#define PSW_DEFAULT_KEY			((PAGE_DEFAULT_ACC) << 52)
+
+#define PSW_KERNEL_BITS	(PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_ASC_HOME | \
+			 PSW_MASK_EA | PSW_MASK_BA | PSW_MASK_DAT)
+#define PSW_USER_BITS	(PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | \
+			 PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_MCHECK | \
+			 PSW_MASK_PSTATE | PSW_ASC_PRIMARY)
+
+#ifndef __ASSEMBLY__
+
+struct psw_bits {
+	unsigned long	     :	1;
+	unsigned long per    :	1; /* PER-Mask */
+	unsigned long	     :	3;
+	unsigned long dat    :	1; /* DAT Mode */
+	unsigned long io     :	1; /* Input/Output Mask */
+	unsigned long ext    :	1; /* External Mask */
+	unsigned long key    :	4; /* PSW Key */
+	unsigned long	     :	1;
+	unsigned long mcheck :	1; /* Machine-Check Mask */
+	unsigned long wait   :	1; /* Wait State */
+	unsigned long pstate :	1; /* Problem State */
+	unsigned long as     :	2; /* Address Space Control */
+	unsigned long cc     :	2; /* Condition Code */
+	unsigned long pm     :	4; /* Program Mask */
+	unsigned long ri     :	1; /* Runtime Instrumentation */
+	unsigned long	     :	6;
+	unsigned long eaba   :	2; /* Addressing Mode */
+	unsigned long	     : 31;
+	unsigned long ia     : 64; /* Instruction Address */
+};
+
+enum {
+	PSW_BITS_AMODE_24BIT = 0,
+	PSW_BITS_AMODE_31BIT = 1,
+	PSW_BITS_AMODE_64BIT = 3
+};
+
+enum {
+	PSW_BITS_AS_PRIMARY	= 0,
+	PSW_BITS_AS_ACCREG	= 1,
+	PSW_BITS_AS_SECONDARY	= 2,
+	PSW_BITS_AS_HOME	= 3
+};
+
+#define psw_bits(__psw) (*({			\
+	typecheck(psw_t, __psw);		\
+	&(*(struct psw_bits *)(&(__psw)));	\
+}))
+
+typedef struct {
+	unsigned int mask;
+	unsigned int addr;
+} psw_t32 __aligned(8);
+
+#define PGM_INT_CODE_MASK	0x7f
+#define PGM_INT_CODE_PER	0x80
+
+/*
+ * The pt_regs struct defines the way the registers are stored on
+ * the stack during a system call.
+ */
+struct pt_regs {
+	union {
+		user_pt_regs user_regs;
+		struct {
+			unsigned long args[1];
+			psw_t psw;
+			unsigned long gprs[NUM_GPRS];
+		};
+	};
+	unsigned long orig_gpr2;
+	union {
+		struct {
+			unsigned int int_code;
+			unsigned int int_parm;
+			unsigned long int_parm_long;
+		};
+		struct tpi_info tpi_info;
+	};
+	unsigned long flags;
+	unsigned long cr1;
+	unsigned long last_break;
+};
+
+/*
+ * Program event recording (PER) register set.
+ */
+struct per_regs {
+	unsigned long control;		/* PER control bits */
+	unsigned long start;		/* PER starting address */
+	unsigned long end;		/* PER ending address */
+};
+
+/*
+ * PER event contains information about the cause of the last PER exception.
+ */
+struct per_event {
+	unsigned short cause;		/* PER code, ATMID and AI */
+	unsigned long address;		/* PER address */
+	unsigned char paid;		/* PER access identification */
+};
+
+/*
+ * Simplified per_info structure used to decode the ptrace user space ABI.
+ */
+struct per_struct_kernel {
+	unsigned long cr9;		/* PER control bits */
+	unsigned long cr10;		/* PER starting address */
+	unsigned long cr11;		/* PER ending address */
+	unsigned long bits;		/* Obsolete software bits */
+	unsigned long starting_addr;	/* User specified start address */
+	unsigned long ending_addr;	/* User specified end address */
+	unsigned short perc_atmid;	/* PER trap ATMID */
+	unsigned long address;		/* PER trap instruction address */
+	unsigned char access_id;	/* PER trap access identification */
+};
+
+#define PER_EVENT_MASK			0xEB000000UL
+
+#define PER_EVENT_BRANCH		0x80000000UL
+#define PER_EVENT_IFETCH		0x40000000UL
+#define PER_EVENT_STORE			0x20000000UL
+#define PER_EVENT_STORE_REAL		0x08000000UL
+#define PER_EVENT_TRANSACTION_END	0x02000000UL
+#define PER_EVENT_NULLIFICATION		0x01000000UL
+
+#define PER_CONTROL_MASK		0x00e00000UL
+
+#define PER_CONTROL_BRANCH_ADDRESS	0x00800000UL
+#define PER_CONTROL_SUSPENSION		0x00400000UL
+#define PER_CONTROL_ALTERATION		0x00200000UL
+
+static inline void set_pt_regs_flag(struct pt_regs *regs, int flag)
+{
+	regs->flags |= (1UL << flag);
+}
+
+static inline void clear_pt_regs_flag(struct pt_regs *regs, int flag)
+{
+	regs->flags &= ~(1UL << flag);
+}
+
+static inline int test_pt_regs_flag(struct pt_regs *regs, int flag)
+{
+	return !!(regs->flags & (1UL << flag));
+}
+
+static inline int test_and_clear_pt_regs_flag(struct pt_regs *regs, int flag)
+{
+	int ret = test_pt_regs_flag(regs, flag);
+
+	clear_pt_regs_flag(regs, flag);
+	return ret;
+}
+
+/*
+ * These are defined as per linux/ptrace.h, which see.
+ */
+#define arch_has_single_step()	(1)
+#define arch_has_block_step()	(1)
+
+#define user_mode(regs) (((regs)->psw.mask & PSW_MASK_PSTATE) != 0)
+#define instruction_pointer(regs) ((regs)->psw.addr)
+#define user_stack_pointer(regs)((regs)->gprs[15])
+#define profile_pc(regs) instruction_pointer(regs)
+
+static inline long regs_return_value(struct pt_regs *regs)
+{
+	return regs->gprs[2];
+}
+
+static inline void instruction_pointer_set(struct pt_regs *regs,
+					   unsigned long val)
+{
+	regs->psw.addr = val;
+}
+
+int regs_query_register_offset(const char *name);
+const char *regs_query_register_name(unsigned int offset);
+unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset);
+unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n);
+
+/**
+ * regs_get_kernel_argument() - get Nth function argument in kernel
+ * @regs:	pt_regs of that context
+ * @n:		function argument number (start from 0)
+ *
+ * regs_get_kernel_argument() returns @n th argument of the function call.
+ */
+static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs,
+						     unsigned int n)
+{
+	unsigned int argoffset = STACK_FRAME_OVERHEAD / sizeof(long);
+
+#define NR_REG_ARGUMENTS 5
+	if (n < NR_REG_ARGUMENTS)
+		return regs_get_register(regs, 2 + n);
+	n -= NR_REG_ARGUMENTS;
+	return regs_get_kernel_stack_nth(regs, argoffset + n);
+}
+
+static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
+{
+	return regs->gprs[15];
+}
+
+static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
+{
+	regs->gprs[2] = rc;
+}
+
+#endif /* __ASSEMBLY__ */
+#endif /* _S390_PTRACE_H */
diff --git a/arch/s390/include/asm/purgatory.h b/arch/s390/include/asm/purgatory.h
new file mode 100644
index 0000000000..e297bcfc47
--- /dev/null
+++ b/arch/s390/include/asm/purgatory.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2018
+ *
+ * Author(s): Philipp Rudo <prudo@linux.vnet.ibm.com>
+ */
+
+#ifndef _S390_PURGATORY_H_
+#define _S390_PURGATORY_H_
+#ifndef __ASSEMBLY__
+
+#include <linux/purgatory.h>
+
+int verify_sha256_digest(void);
+
+#endif	/* __ASSEMBLY__ */
+#endif /* _S390_PURGATORY_H_ */
diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
new file mode 100644
index 0000000000..2f983e0b95
--- /dev/null
+++ b/arch/s390/include/asm/qdio.h
@@ -0,0 +1,364 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2000, 2008
+ * Author(s): Utz Bacher <utz.bacher@de.ibm.com>
+ *	      Jan Glauber <jang@linux.vnet.ibm.com>
+ *
+ */
+#ifndef __QDIO_H__
+#define __QDIO_H__
+
+#include <linux/interrupt.h>
+#include <asm/cio.h>
+#include <asm/ccwdev.h>
+
+/* only use 4 queues to save some cachelines */
+#define QDIO_MAX_QUEUES_PER_IRQ		4
+#define QDIO_MAX_BUFFERS_PER_Q		128
+#define QDIO_MAX_BUFFERS_MASK		(QDIO_MAX_BUFFERS_PER_Q - 1)
+#define QDIO_BUFNR(num)			((num) & QDIO_MAX_BUFFERS_MASK)
+#define QDIO_MAX_ELEMENTS_PER_BUFFER	16
+
+#define QDIO_QETH_QFMT			0
+#define QDIO_ZFCP_QFMT			1
+#define QDIO_IQDIO_QFMT			2
+
+/**
+ * struct qdesfmt0 - queue descriptor, format 0
+ * @sliba: absolute address of storage list information block
+ * @sla: absolute address of storage list
+ * @slsba: absolute address of storage list state block
+ * @akey: access key for SLIB
+ * @bkey: access key for SL
+ * @ckey: access key for SBALs
+ * @dkey: access key for SLSB
+ */
+struct qdesfmt0 {
+	u64 sliba;
+	u64 sla;
+	u64 slsba;
+	u32	 : 32;
+	u32 akey : 4;
+	u32 bkey : 4;
+	u32 ckey : 4;
+	u32 dkey : 4;
+	u32	 : 16;
+} __attribute__ ((packed));
+
+#define QDR_AC_MULTI_BUFFER_ENABLE 0x01
+
+/**
+ * struct qdr - queue description record (QDR)
+ * @qfmt: queue format
+ * @ac: adapter characteristics
+ * @iqdcnt: input queue descriptor count
+ * @oqdcnt: output queue descriptor count
+ * @iqdsz: input queue descriptor size
+ * @oqdsz: output queue descriptor size
+ * @qiba: absolute address of queue information block
+ * @qkey: queue information block key
+ * @qdf0: queue descriptions
+ */
+struct qdr {
+	u32 qfmt   : 8;
+	u32	   : 16;
+	u32 ac	   : 8;
+	u32	   : 8;
+	u32 iqdcnt : 8;
+	u32	   : 8;
+	u32 oqdcnt : 8;
+	u32	   : 8;
+	u32 iqdsz  : 8;
+	u32	   : 8;
+	u32 oqdsz  : 8;
+	/* private: */
+	u32 res[9];
+	/* public: */
+	u64 qiba;
+	u32	   : 32;
+	u32 qkey   : 4;
+	u32	   : 28;
+	struct qdesfmt0 qdf0[126];
+} __packed __aligned(PAGE_SIZE);
+
+#define QIB_AC_OUTBOUND_PCI_SUPPORTED	0x40
+#define QIB_RFLAGS_ENABLE_QEBSM		0x80
+#define QIB_RFLAGS_ENABLE_DATA_DIV	0x02
+
+/**
+ * struct qib - queue information block (QIB)
+ * @qfmt: queue format
+ * @pfmt: implementation dependent parameter format
+ * @rflags: QEBSM
+ * @ac: adapter characteristics
+ * @isliba: logical address of first input SLIB
+ * @osliba: logical address of first output SLIB
+ * @ebcnam: adapter identifier in EBCDIC
+ * @parm: implementation dependent parameters
+ */
+struct qib {
+	u32 qfmt   : 8;
+	u32 pfmt   : 8;
+	u32 rflags : 8;
+	u32 ac	   : 8;
+	u32	   : 32;
+	u64 isliba;
+	u64 osliba;
+	u32	   : 32;
+	u32	   : 32;
+	u8 ebcnam[8];
+	/* private: */
+	u8 res[88];
+	/* public: */
+	u8 parm[128];
+} __attribute__ ((packed, aligned(256)));
+
+/**
+ * struct slibe - storage list information block element (SLIBE)
+ * @parms: implementation dependent parameters
+ */
+struct slibe {
+	u64 parms;
+};
+
+/**
+ * struct qaob - queue asynchronous operation block
+ * @res0: reserved parameters
+ * @res1: reserved parameter
+ * @res2: reserved parameter
+ * @res3: reserved parameter
+ * @aorc: asynchronous operation return code
+ * @flags: internal flags
+ * @cbtbs: control block type
+ * @sb_count: number of storage blocks
+ * @sba: storage block element addresses
+ * @dcount: size of storage block elements
+ * @user0: user definable value
+ * @res4: reserved parameter
+ * @user1: user definable value
+ */
+struct qaob {
+	u64 res0[6];
+	u8 res1;
+	u8 res2;
+	u8 res3;
+	u8 aorc;
+	u8 flags;
+	u16 cbtbs;
+	u8 sb_count;
+	u64 sba[QDIO_MAX_ELEMENTS_PER_BUFFER];
+	u16 dcount[QDIO_MAX_ELEMENTS_PER_BUFFER];
+	u64 user0;
+	u64 res4[2];
+	u8 user1[16];
+} __attribute__ ((packed, aligned(256)));
+
+/**
+ * struct slib - storage list information block (SLIB)
+ * @nsliba: next SLIB address (if any)
+ * @sla: SL address
+ * @slsba: SLSB address
+ * @slibe: SLIB elements
+ */
+struct slib {
+	u64 nsliba;
+	u64 sla;
+	u64 slsba;
+	/* private: */
+	u8 res[1000];
+	/* public: */
+	struct slibe slibe[QDIO_MAX_BUFFERS_PER_Q];
+} __attribute__ ((packed, aligned(2048)));
+
+#define SBAL_EFLAGS_LAST_ENTRY		0x40
+#define SBAL_EFLAGS_CONTIGUOUS		0x20
+#define SBAL_EFLAGS_FIRST_FRAG		0x04
+#define SBAL_EFLAGS_MIDDLE_FRAG		0x08
+#define SBAL_EFLAGS_LAST_FRAG		0x0c
+#define SBAL_EFLAGS_MASK		0x6f
+
+#define SBAL_SFLAGS0_PCI_REQ		0x40
+#define SBAL_SFLAGS0_DATA_CONTINUATION	0x20
+
+/* Awesome OpenFCP extensions */
+#define SBAL_SFLAGS0_TYPE_STATUS	0x00
+#define SBAL_SFLAGS0_TYPE_WRITE		0x08
+#define SBAL_SFLAGS0_TYPE_READ		0x10
+#define SBAL_SFLAGS0_TYPE_WRITE_READ	0x18
+#define SBAL_SFLAGS0_MORE_SBALS		0x04
+#define SBAL_SFLAGS0_COMMAND		0x02
+#define SBAL_SFLAGS0_LAST_SBAL		0x00
+#define SBAL_SFLAGS0_ONLY_SBAL		SBAL_SFLAGS0_COMMAND
+#define SBAL_SFLAGS0_MIDDLE_SBAL	SBAL_SFLAGS0_MORE_SBALS
+#define SBAL_SFLAGS0_FIRST_SBAL (SBAL_SFLAGS0_MORE_SBALS | SBAL_SFLAGS0_COMMAND)
+
+/**
+ * struct qdio_buffer_element - SBAL entry
+ * @eflags: SBAL entry flags
+ * @scount: SBAL count
+ * @sflags: whole SBAL flags
+ * @length: length
+ * @addr: absolute data address
+*/
+struct qdio_buffer_element {
+	u8 eflags;
+	/* private: */
+	u8 res1;
+	/* public: */
+	u8 scount;
+	u8 sflags;
+	u32 length;
+	u64 addr;
+} __attribute__ ((packed, aligned(16)));
+
+/**
+ * struct qdio_buffer - storage block address list (SBAL)
+ * @element: SBAL entries
+ */
+struct qdio_buffer {
+	struct qdio_buffer_element element[QDIO_MAX_ELEMENTS_PER_BUFFER];
+} __attribute__ ((packed, aligned(256)));
+
+/**
+ * struct sl_element - storage list entry
+ * @sbal: absolute SBAL address
+ */
+struct sl_element {
+	u64 sbal;
+} __attribute__ ((packed));
+
+/**
+ * struct sl - storage list (SL)
+ * @element: SL entries
+ */
+struct sl {
+	struct sl_element element[QDIO_MAX_BUFFERS_PER_Q];
+} __attribute__ ((packed, aligned(1024)));
+
+/**
+ * struct slsb - storage list state block (SLSB)
+ * @val: state per buffer
+ */
+struct slsb {
+	u8 val[QDIO_MAX_BUFFERS_PER_Q];
+} __attribute__ ((packed, aligned(256)));
+
+/* qdio adapter-characteristics-1 flag */
+#define CHSC_AC1_INITIATE_INPUTQ	0x80
+#define AC1_SIGA_INPUT_NEEDED		0x40	/* process input queues */
+#define AC1_SIGA_OUTPUT_NEEDED		0x20	/* process output queues */
+#define AC1_SIGA_SYNC_NEEDED		0x10	/* ask hypervisor to sync */
+#define AC1_AUTOMATIC_SYNC_ON_THININT	0x08	/* set by hypervisor */
+#define AC1_AUTOMATIC_SYNC_ON_OUT_PCI	0x04	/* set by hypervisor */
+#define AC1_SC_QEBSM_AVAILABLE		0x02	/* available for subchannel */
+#define AC1_SC_QEBSM_ENABLED		0x01	/* enabled for subchannel */
+
+#define CHSC_AC2_MULTI_BUFFER_AVAILABLE	0x0080
+#define CHSC_AC2_MULTI_BUFFER_ENABLED	0x0040
+#define CHSC_AC2_DATA_DIV_AVAILABLE	0x0010
+#define CHSC_AC2_SNIFFER_AVAILABLE	0x0008
+#define CHSC_AC2_DATA_DIV_ENABLED	0x0002
+
+#define CHSC_AC3_FORMAT2_CQ_AVAILABLE	0x8000
+
+struct qdio_ssqd_desc {
+	u8 flags;
+	u8:8;
+	u16 sch;
+	u8 qfmt;
+	u8 parm;
+	u8 qdioac1;
+	u8 sch_class;
+	u8 pcnt;
+	u8 icnt;
+	u8:8;
+	u8 ocnt;
+	u8:8;
+	u8 mbccnt;
+	u16 qdioac2;
+	u64 sch_token;
+	u8 mro;
+	u8 mri;
+	u16 qdioac3;
+	u16:16;
+	u8:8;
+	u8 mmwc;
+} __attribute__ ((packed));
+
+/* params are: ccw_device, qdio_error, queue_number,
+   first element processed, number of elements processed, int_parm */
+typedef void qdio_handler_t(struct ccw_device *, unsigned int, int,
+			    int, int, unsigned long);
+
+/* qdio errors reported through the queue handlers: */
+#define QDIO_ERROR_ACTIVATE			0x0001
+#define QDIO_ERROR_GET_BUF_STATE		0x0002
+#define QDIO_ERROR_SET_BUF_STATE		0x0004
+
+/* extra info for completed SBALs: */
+#define QDIO_ERROR_SLSB_STATE			0x0100
+#define QDIO_ERROR_SLSB_PENDING			0x0200
+
+/* for qdio_cleanup */
+#define QDIO_FLAG_CLEANUP_USING_CLEAR		0x01
+#define QDIO_FLAG_CLEANUP_USING_HALT		0x02
+
+/**
+ * struct qdio_initialize - qdio initialization data
+ * @q_format: queue format
+ * @qdr_ac: feature flags to set
+ * @qib_param_field_format: format for qib_parm_field
+ * @qib_param_field: pointer to 128 bytes or NULL, if no param field
+ * @qib_rflags: rflags to set
+ * @no_input_qs: number of input queues
+ * @no_output_qs: number of output queues
+ * @input_handler: handler to be called for input queues, and device-wide errors
+ * @output_handler: handler to be called for output queues
+ * @irq_poll: Data IRQ polling handler
+ * @scan_threshold: # of in-use buffers that triggers scan on output queue
+ * @int_parm: interruption parameter
+ * @input_sbal_addr_array:  per-queue array, each element points to 128 SBALs
+ * @output_sbal_addr_array: per-queue array, each element points to 128 SBALs
+ */
+struct qdio_initialize {
+	unsigned char q_format;
+	unsigned char qdr_ac;
+	unsigned int qib_param_field_format;
+	unsigned char *qib_param_field;
+	unsigned char qib_rflags;
+	unsigned int no_input_qs;
+	unsigned int no_output_qs;
+	qdio_handler_t *input_handler;
+	qdio_handler_t *output_handler;
+	void (*irq_poll)(struct ccw_device *cdev, unsigned long data);
+	unsigned long int_parm;
+	struct qdio_buffer ***input_sbal_addr_array;
+	struct qdio_buffer ***output_sbal_addr_array;
+};
+
+int qdio_alloc_buffers(struct qdio_buffer **buf, unsigned int count);
+void qdio_free_buffers(struct qdio_buffer **buf, unsigned int count);
+void qdio_reset_buffers(struct qdio_buffer **buf, unsigned int count);
+
+extern int qdio_allocate(struct ccw_device *cdev, unsigned int no_input_qs,
+			 unsigned int no_output_qs);
+extern int qdio_establish(struct ccw_device *cdev,
+			  struct qdio_initialize *init_data);
+extern int qdio_activate(struct ccw_device *);
+extern int qdio_start_irq(struct ccw_device *cdev);
+extern int qdio_stop_irq(struct ccw_device *cdev);
+extern int qdio_inspect_input_queue(struct ccw_device *cdev, unsigned int nr,
+				    unsigned int *bufnr, unsigned int *error);
+extern int qdio_inspect_output_queue(struct ccw_device *cdev, unsigned int nr,
+				     unsigned int *bufnr, unsigned int *error);
+extern int qdio_add_bufs_to_input_queue(struct ccw_device *cdev,
+					unsigned int q_nr, unsigned int bufnr,
+					unsigned int count);
+extern int qdio_add_bufs_to_output_queue(struct ccw_device *cdev,
+					 unsigned int q_nr, unsigned int bufnr,
+					 unsigned int count, struct qaob *aob);
+extern int qdio_shutdown(struct ccw_device *, int);
+extern int qdio_free(struct ccw_device *);
+extern int qdio_get_ssqd_desc(struct ccw_device *, struct qdio_ssqd_desc *);
+
+#endif /* __QDIO_H__ */
diff --git a/arch/s390/include/asm/runtime_instr.h b/arch/s390/include/asm/runtime_instr.h
new file mode 100644
index 0000000000..0e1605538c
--- /dev/null
+++ b/arch/s390/include/asm/runtime_instr.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _RUNTIME_INSTR_H
+#define _RUNTIME_INSTR_H
+
+#include <uapi/asm/runtime_instr.h>
+
+extern struct runtime_instr_cb runtime_instr_empty_cb;
+
+static inline void save_ri_cb(struct runtime_instr_cb *cb_prev)
+{
+	if (cb_prev)
+		store_runtime_instr_cb(cb_prev);
+}
+
+static inline void restore_ri_cb(struct runtime_instr_cb *cb_next,
+				 struct runtime_instr_cb *cb_prev)
+{
+	if (cb_next)
+		load_runtime_instr_cb(cb_next);
+	else if (cb_prev)
+		load_runtime_instr_cb(&runtime_instr_empty_cb);
+}
+
+struct task_struct;
+
+void runtime_instr_release(struct task_struct *tsk);
+
+#endif /* _RUNTIME_INSTR_H */
diff --git a/arch/s390/include/asm/rwonce.h b/arch/s390/include/asm/rwonce.h
new file mode 100644
index 0000000000..91fc24520e
--- /dev/null
+++ b/arch/s390/include/asm/rwonce.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_S390_RWONCE_H
+#define __ASM_S390_RWONCE_H
+
+#include <linux/compiler_types.h>
+
+/*
+ * Use READ_ONCE_ALIGNED_128() for 128-bit block concurrent (atomic) read
+ * accesses. Note that x must be 128-bit aligned, otherwise a specification
+ * exception is generated.
+ */
+#define READ_ONCE_ALIGNED_128(x)			\
+({							\
+	union {						\
+		typeof(x) __x;				\
+		__uint128_t val;			\
+	} __u;						\
+							\
+	BUILD_BUG_ON(sizeof(x) != 16);			\
+	asm volatile(					\
+		"	lpq	%[val],%[_x]\n"		\
+		: [val] "=d" (__u.val)			\
+		: [_x] "QS" (x)				\
+		: "memory");				\
+	__u.__x;					\
+})
+
+#include <asm-generic/rwonce.h>
+
+#endif	/* __ASM_S390_RWONCE_H */
diff --git a/arch/s390/include/asm/schid.h b/arch/s390/include/asm/schid.h
new file mode 100644
index 0000000000..3ac405a67f
--- /dev/null
+++ b/arch/s390/include/asm/schid.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ASM_SCHID_H
+#define ASM_SCHID_H
+
+#include <linux/string.h>
+#include <uapi/asm/schid.h>
+
+/* Helper function for sane state of pre-allocated subchannel_id. */
+static inline void
+init_subchannel_id(struct subchannel_id *schid)
+{
+	memset(schid, 0, sizeof(struct subchannel_id));
+	schid->one = 1;
+}
+
+static inline int
+schid_equal(struct subchannel_id *schid1, struct subchannel_id *schid2)
+{
+	return !memcmp(schid1, schid2, sizeof(struct subchannel_id));
+}
+
+#endif /* ASM_SCHID_H */
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
new file mode 100644
index 0000000000..5742d23bba
--- /dev/null
+++ b/arch/s390/include/asm/sclp.h
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *    Copyright IBM Corp. 2007
+ */
+
+#ifndef _ASM_S390_SCLP_H
+#define _ASM_S390_SCLP_H
+
+#include <linux/types.h>
+
+#define SCLP_CHP_INFO_MASK_SIZE		32
+#define EARLY_SCCB_SIZE		PAGE_SIZE
+#define SCLP_MAX_CORES		512
+/* 144 + 16 * SCLP_MAX_CORES + 2 * (SCLP_MAX_CORES - 1) */
+#define EXT_SCCB_READ_SCP	(3 * PAGE_SIZE)
+/* 24 + 16 * SCLP_MAX_CORES */
+#define EXT_SCCB_READ_CPU	(3 * PAGE_SIZE)
+
+#ifndef __ASSEMBLY__
+#include <linux/uio.h>
+#include <asm/chpid.h>
+#include <asm/cpu.h>
+
+struct sclp_chp_info {
+	u8 recognized[SCLP_CHP_INFO_MASK_SIZE];
+	u8 standby[SCLP_CHP_INFO_MASK_SIZE];
+	u8 configured[SCLP_CHP_INFO_MASK_SIZE];
+};
+
+#define LOADPARM_LEN 8
+
+struct sclp_ipl_info {
+	int is_valid;
+	int has_dump;
+	char loadparm[LOADPARM_LEN];
+};
+
+struct sclp_core_entry {
+	u8 core_id;
+	u8 reserved0;
+	u8 : 4;
+	u8 sief2 : 1;
+	u8 skey : 1;
+	u8 : 2;
+	u8 : 2;
+	u8 gpere : 1;
+	u8 siif : 1;
+	u8 sigpif : 1;
+	u8 : 3;
+	u8 reserved2[3];
+	u8 : 2;
+	u8 ib : 1;
+	u8 cei : 1;
+	u8 : 4;
+	u8 reserved3[6];
+	u8 type;
+	u8 reserved1;
+} __attribute__((packed));
+
+struct sclp_core_info {
+	unsigned int configured;
+	unsigned int standby;
+	unsigned int combined;
+	struct sclp_core_entry core[SCLP_MAX_CORES];
+};
+
+struct sclp_info {
+	unsigned char has_linemode : 1;
+	unsigned char has_vt220 : 1;
+	unsigned char has_siif : 1;
+	unsigned char has_sigpif : 1;
+	unsigned char has_core_type : 1;
+	unsigned char has_sprp : 1;
+	unsigned char has_hvs : 1;
+	unsigned char has_esca : 1;
+	unsigned char has_sief2 : 1;
+	unsigned char has_64bscao : 1;
+	unsigned char has_gpere : 1;
+	unsigned char has_cmma : 1;
+	unsigned char has_gsls : 1;
+	unsigned char has_ib : 1;
+	unsigned char has_cei : 1;
+	unsigned char has_pfmfi : 1;
+	unsigned char has_ibs : 1;
+	unsigned char has_skey : 1;
+	unsigned char has_kss : 1;
+	unsigned char has_gisaf : 1;
+	unsigned char has_diag318 : 1;
+	unsigned char has_diag320 : 1;
+	unsigned char has_sipl : 1;
+	unsigned char has_sipl_eckd : 1;
+	unsigned char has_dirq : 1;
+	unsigned char has_iplcc : 1;
+	unsigned char has_zpci_lsi : 1;
+	unsigned char has_aisii : 1;
+	unsigned char has_aeni : 1;
+	unsigned char has_aisi : 1;
+	unsigned int ibc;
+	unsigned int mtid;
+	unsigned int mtid_cp;
+	unsigned int mtid_prev;
+	unsigned long rzm;
+	unsigned long rnmax;
+	unsigned long hamax;
+	unsigned int max_cores;
+	unsigned long hsa_size;
+	unsigned long facilities;
+	unsigned int hmfai;
+};
+extern struct sclp_info sclp;
+
+struct zpci_report_error_header {
+	u8 version;	/* Interface version byte */
+	u8 action;	/* Action qualifier byte
+			 * 0: Adapter Reset Request
+			 * 1: Deconfigure and repair action requested
+			 *	(OpenCrypto Problem Call Home)
+			 * 2: Informational Report
+			 *	(OpenCrypto Successful Diagnostics Execution)
+			 */
+	u16 length;	/* Length of Subsequent Data (up to 4K – SCLP header */
+	u8 data[];	/* Subsequent Data passed verbatim to SCLP ET 24 */
+} __packed;
+
+extern char *sclp_early_sccb;
+
+void sclp_early_adjust_va(void);
+void sclp_early_set_buffer(void *sccb);
+int sclp_early_read_info(void);
+int sclp_early_read_storage_info(void);
+int sclp_early_get_core_info(struct sclp_core_info *info);
+void sclp_early_get_ipl_info(struct sclp_ipl_info *info);
+void sclp_early_detect(void);
+void sclp_early_printk(const char *s);
+void __sclp_early_printk(const char *s, unsigned int len);
+void sclp_emergency_printk(const char *s);
+
+int sclp_early_get_memsize(unsigned long *mem);
+int sclp_early_get_hsa_size(unsigned long *hsa_size);
+int _sclp_get_core_info(struct sclp_core_info *info);
+int sclp_core_configure(u8 core);
+int sclp_core_deconfigure(u8 core);
+int sclp_sdias_blk_count(void);
+int sclp_sdias_copy(void *dest, int blk_num, int nr_blks);
+int sclp_chp_configure(struct chp_id chpid);
+int sclp_chp_deconfigure(struct chp_id chpid);
+int sclp_chp_read_info(struct sclp_chp_info *info);
+int sclp_pci_configure(u32 fid);
+int sclp_pci_deconfigure(u32 fid);
+int sclp_ap_configure(u32 apid);
+int sclp_ap_deconfigure(u32 apid);
+int sclp_pci_report(struct zpci_report_error_header *report, u32 fh, u32 fid);
+size_t memcpy_hsa_iter(struct iov_iter *iter, unsigned long src, size_t count);
+void sclp_ocf_cpc_name_copy(char *dst);
+
+static inline int sclp_get_core_info(struct sclp_core_info *info, int early)
+{
+	if (early)
+		return sclp_early_get_core_info(info);
+	return _sclp_get_core_info(info);
+}
+
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_S390_SCLP_H */
diff --git a/arch/s390/include/asm/scsw.h b/arch/s390/include/asm/scsw.h
new file mode 100644
index 0000000000..322bdcd4b6
--- /dev/null
+++ b/arch/s390/include/asm/scsw.h
@@ -0,0 +1,1050 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  Helper functions for scsw access.
+ *
+ *    Copyright IBM Corp. 2008, 2012
+ *    Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_SCSW_H_
+#define _ASM_S390_SCSW_H_
+
+#include <linux/types.h>
+#include <asm/css_chars.h>
+#include <asm/cio.h>
+
+/**
+ * struct cmd_scsw - command-mode subchannel status word
+ * @key: subchannel key
+ * @sctl: suspend control
+ * @eswf: esw format
+ * @cc: deferred condition code
+ * @fmt: format
+ * @pfch: prefetch
+ * @isic: initial-status interruption control
+ * @alcc: address-limit checking control
+ * @ssi: suppress-suspended interruption
+ * @zcc: zero condition code
+ * @ectl: extended control
+ * @pno: path not operational
+ * @res: reserved
+ * @fctl: function control
+ * @actl: activity control
+ * @stctl: status control
+ * @cpa: channel program address
+ * @dstat: device status
+ * @cstat: subchannel status
+ * @count: residual count
+ */
+struct cmd_scsw {
+	__u32 key  : 4;
+	__u32 sctl : 1;
+	__u32 eswf : 1;
+	__u32 cc   : 2;
+	__u32 fmt  : 1;
+	__u32 pfch : 1;
+	__u32 isic : 1;
+	__u32 alcc : 1;
+	__u32 ssi  : 1;
+	__u32 zcc  : 1;
+	__u32 ectl : 1;
+	__u32 pno  : 1;
+	__u32 res  : 1;
+	__u32 fctl : 3;
+	__u32 actl : 7;
+	__u32 stctl : 5;
+	__u32 cpa;
+	__u32 dstat : 8;
+	__u32 cstat : 8;
+	__u32 count : 16;
+} __attribute__ ((packed));
+
+/**
+ * struct tm_scsw - transport-mode subchannel status word
+ * @key: subchannel key
+ * @eswf: esw format
+ * @cc: deferred condition code
+ * @fmt: format
+ * @x: IRB-format control
+ * @q: interrogate-complete
+ * @ectl: extended control
+ * @pno: path not operational
+ * @fctl: function control
+ * @actl: activity control
+ * @stctl: status control
+ * @tcw: TCW address
+ * @dstat: device status
+ * @cstat: subchannel status
+ * @fcxs: FCX status
+ * @schxs: subchannel-extended status
+ */
+struct tm_scsw {
+	u32 key:4;
+	u32 :1;
+	u32 eswf:1;
+	u32 cc:2;
+	u32 fmt:3;
+	u32 x:1;
+	u32 q:1;
+	u32 :1;
+	u32 ectl:1;
+	u32 pno:1;
+	u32 :1;
+	u32 fctl:3;
+	u32 actl:7;
+	u32 stctl:5;
+	u32 tcw;
+	u32 dstat:8;
+	u32 cstat:8;
+	u32 fcxs:8;
+	u32 ifob:1;
+	u32 sesq:7;
+} __attribute__ ((packed));
+
+/**
+ * struct eadm_scsw - subchannel status word for eadm subchannels
+ * @key: subchannel key
+ * @eswf: esw format
+ * @cc: deferred condition code
+ * @ectl: extended control
+ * @fctl: function control
+ * @actl: activity control
+ * @stctl: status control
+ * @aob: AOB address
+ * @dstat: device status
+ * @cstat: subchannel status
+ */
+struct eadm_scsw {
+	u32 key:4;
+	u32:1;
+	u32 eswf:1;
+	u32 cc:2;
+	u32:6;
+	u32 ectl:1;
+	u32:2;
+	u32 fctl:3;
+	u32 actl:7;
+	u32 stctl:5;
+	u32 aob;
+	u32 dstat:8;
+	u32 cstat:8;
+	u32:16;
+} __packed;
+
+/**
+ * union scsw - subchannel status word
+ * @cmd: command-mode SCSW
+ * @tm: transport-mode SCSW
+ * @eadm: eadm SCSW
+ */
+union scsw {
+	struct cmd_scsw cmd;
+	struct tm_scsw tm;
+	struct eadm_scsw eadm;
+} __packed;
+
+#define SCSW_FCTL_CLEAR_FUNC	 0x1
+#define SCSW_FCTL_HALT_FUNC	 0x2
+#define SCSW_FCTL_START_FUNC	 0x4
+
+#define SCSW_ACTL_SUSPENDED	 0x1
+#define SCSW_ACTL_DEVACT	 0x2
+#define SCSW_ACTL_SCHACT	 0x4
+#define SCSW_ACTL_CLEAR_PEND	 0x8
+#define SCSW_ACTL_HALT_PEND	 0x10
+#define SCSW_ACTL_START_PEND	 0x20
+#define SCSW_ACTL_RESUME_PEND	 0x40
+
+#define SCSW_STCTL_STATUS_PEND	 0x1
+#define SCSW_STCTL_SEC_STATUS	 0x2
+#define SCSW_STCTL_PRIM_STATUS	 0x4
+#define SCSW_STCTL_INTER_STATUS	 0x8
+#define SCSW_STCTL_ALERT_STATUS	 0x10
+
+#define DEV_STAT_ATTENTION	 0x80
+#define DEV_STAT_STAT_MOD	 0x40
+#define DEV_STAT_CU_END		 0x20
+#define DEV_STAT_BUSY		 0x10
+#define DEV_STAT_CHN_END	 0x08
+#define DEV_STAT_DEV_END	 0x04
+#define DEV_STAT_UNIT_CHECK	 0x02
+#define DEV_STAT_UNIT_EXCEP	 0x01
+
+#define SCHN_STAT_PCI		 0x80
+#define SCHN_STAT_INCORR_LEN	 0x40
+#define SCHN_STAT_PROG_CHECK	 0x20
+#define SCHN_STAT_PROT_CHECK	 0x10
+#define SCHN_STAT_CHN_DATA_CHK	 0x08
+#define SCHN_STAT_CHN_CTRL_CHK	 0x04
+#define SCHN_STAT_INTF_CTRL_CHK	 0x02
+#define SCHN_STAT_CHAIN_CHECK	 0x01
+
+#define SCSW_SESQ_DEV_NOFCX	 3
+#define SCSW_SESQ_PATH_NOFCX	 4
+
+/*
+ * architectured values for first sense byte
+ */
+#define SNS0_CMD_REJECT		0x80
+#define SNS_CMD_REJECT		SNS0_CMD_REJEC
+#define SNS0_INTERVENTION_REQ	0x40
+#define SNS0_BUS_OUT_CHECK	0x20
+#define SNS0_EQUIPMENT_CHECK	0x10
+#define SNS0_DATA_CHECK		0x08
+#define SNS0_OVERRUN		0x04
+#define SNS0_INCOMPL_DOMAIN	0x01
+
+/*
+ * architectured values for second sense byte
+ */
+#define SNS1_PERM_ERR		0x80
+#define SNS1_INV_TRACK_FORMAT	0x40
+#define SNS1_EOC		0x20
+#define SNS1_MESSAGE_TO_OPER	0x10
+#define SNS1_NO_REC_FOUND	0x08
+#define SNS1_FILE_PROTECTED	0x04
+#define SNS1_WRITE_INHIBITED	0x02
+#define SNS1_INPRECISE_END	0x01
+
+/*
+ * architectured values for third sense byte
+ */
+#define SNS2_REQ_INH_WRITE	0x80
+#define SNS2_CORRECTABLE	0x40
+#define SNS2_FIRST_LOG_ERR	0x20
+#define SNS2_ENV_DATA_PRESENT	0x10
+#define SNS2_INPRECISE_END	0x04
+
+/*
+ * architectured values for PPRC errors
+ */
+#define SNS7_INVALID_ON_SEC	0x0e
+
+/**
+ * scsw_is_tm - check for transport mode scsw
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the specified scsw is a transport mode scsw, zero
+ * otherwise.
+ */
+static inline int scsw_is_tm(union scsw *scsw)
+{
+	return css_general_characteristics.fcx && (scsw->tm.x == 1);
+}
+
+/**
+ * scsw_key - return scsw key field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the key field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_key(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw->tm.key;
+	else
+		return scsw->cmd.key;
+}
+
+/**
+ * scsw_eswf - return scsw eswf field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the eswf field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_eswf(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw->tm.eswf;
+	else
+		return scsw->cmd.eswf;
+}
+
+/**
+ * scsw_cc - return scsw cc field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the cc field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_cc(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw->tm.cc;
+	else
+		return scsw->cmd.cc;
+}
+
+/**
+ * scsw_ectl - return scsw ectl field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the ectl field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_ectl(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw->tm.ectl;
+	else
+		return scsw->cmd.ectl;
+}
+
+/**
+ * scsw_pno - return scsw pno field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the pno field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_pno(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw->tm.pno;
+	else
+		return scsw->cmd.pno;
+}
+
+/**
+ * scsw_fctl - return scsw fctl field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the fctl field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_fctl(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw->tm.fctl;
+	else
+		return scsw->cmd.fctl;
+}
+
+/**
+ * scsw_actl - return scsw actl field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the actl field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_actl(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw->tm.actl;
+	else
+		return scsw->cmd.actl;
+}
+
+/**
+ * scsw_stctl - return scsw stctl field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the stctl field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_stctl(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw->tm.stctl;
+	else
+		return scsw->cmd.stctl;
+}
+
+/**
+ * scsw_dstat - return scsw dstat field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the dstat field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_dstat(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw->tm.dstat;
+	else
+		return scsw->cmd.dstat;
+}
+
+/**
+ * scsw_cstat - return scsw cstat field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the cstat field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_cstat(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw->tm.cstat;
+	else
+		return scsw->cmd.cstat;
+}
+
+/**
+ * scsw_cmd_is_valid_key - check key field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the key field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_key(union scsw *scsw)
+{
+	return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
+}
+
+/**
+ * scsw_cmd_is_valid_sctl - check sctl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the sctl field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_sctl(union scsw *scsw)
+{
+	return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
+}
+
+/**
+ * scsw_cmd_is_valid_eswf - check eswf field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the eswf field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_eswf(union scsw *scsw)
+{
+	return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND);
+}
+
+/**
+ * scsw_cmd_is_valid_cc - check cc field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the cc field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_cc(union scsw *scsw)
+{
+	return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC) &&
+	       (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND);
+}
+
+/**
+ * scsw_cmd_is_valid_fmt - check fmt field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the fmt field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_fmt(union scsw *scsw)
+{
+	return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
+}
+
+/**
+ * scsw_cmd_is_valid_pfch - check pfch field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the pfch field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_pfch(union scsw *scsw)
+{
+	return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
+}
+
+/**
+ * scsw_cmd_is_valid_isic - check isic field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the isic field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_isic(union scsw *scsw)
+{
+	return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
+}
+
+/**
+ * scsw_cmd_is_valid_alcc - check alcc field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the alcc field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_alcc(union scsw *scsw)
+{
+	return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
+}
+
+/**
+ * scsw_cmd_is_valid_ssi - check ssi field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the ssi field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_ssi(union scsw *scsw)
+{
+	return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
+}
+
+/**
+ * scsw_cmd_is_valid_zcc - check zcc field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the zcc field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_zcc(union scsw *scsw)
+{
+	return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC) &&
+	       (scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS);
+}
+
+/**
+ * scsw_cmd_is_valid_ectl - check ectl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the ectl field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_ectl(union scsw *scsw)
+{
+	/* Must be status pending. */
+	if (!(scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND))
+		return 0;
+
+	/* Must have alert status. */
+	if (!(scsw->cmd.stctl & SCSW_STCTL_ALERT_STATUS))
+		return 0;
+
+	/* Must be alone or together with primary, secondary or both,
+	 * => no intermediate status.
+	 */
+	if (scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS)
+		return 0;
+
+	return 1;
+}
+
+/**
+ * scsw_cmd_is_valid_pno - check pno field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the pno field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_pno(union scsw *scsw)
+{
+	/* Must indicate at least one I/O function. */
+	if (!scsw->cmd.fctl)
+		return 0;
+
+	/* Must be status pending. */
+	if (!(scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND))
+		return 0;
+
+	/* Can be status pending alone, or with any combination of primary,
+	 * secondary and alert => no intermediate status.
+	 */
+	if (!(scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS))
+		return 1;
+
+	/* If intermediate, must be suspended. */
+	if (scsw->cmd.actl & SCSW_ACTL_SUSPENDED)
+		return 1;
+
+	return 0;
+}
+
+/**
+ * scsw_cmd_is_valid_fctl - check fctl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the fctl field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_fctl(union scsw *scsw)
+{
+	/* Only valid if pmcw.dnv == 1*/
+	return 1;
+}
+
+/**
+ * scsw_cmd_is_valid_actl - check actl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the actl field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_actl(union scsw *scsw)
+{
+	/* Only valid if pmcw.dnv == 1*/
+	return 1;
+}
+
+/**
+ * scsw_cmd_is_valid_stctl - check stctl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the stctl field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_stctl(union scsw *scsw)
+{
+	/* Only valid if pmcw.dnv == 1*/
+	return 1;
+}
+
+/**
+ * scsw_cmd_is_valid_dstat - check dstat field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the dstat field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_dstat(union scsw *scsw)
+{
+	return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) &&
+	       (scsw->cmd.cc != 3);
+}
+
+/**
+ * scsw_cmd_is_valid_cstat - check cstat field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the cstat field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_cstat(union scsw *scsw)
+{
+	return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) &&
+	       (scsw->cmd.cc != 3);
+}
+
+/**
+ * scsw_tm_is_valid_key - check key field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the key field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_key(union scsw *scsw)
+{
+	return (scsw->tm.fctl & SCSW_FCTL_START_FUNC);
+}
+
+/**
+ * scsw_tm_is_valid_eswf - check eswf field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the eswf field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_eswf(union scsw *scsw)
+{
+	return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND);
+}
+
+/**
+ * scsw_tm_is_valid_cc - check cc field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the cc field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_cc(union scsw *scsw)
+{
+	return (scsw->tm.fctl & SCSW_FCTL_START_FUNC) &&
+	       (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND);
+}
+
+/**
+ * scsw_tm_is_valid_fmt - check fmt field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the fmt field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_fmt(union scsw *scsw)
+{
+	return 1;
+}
+
+/**
+ * scsw_tm_is_valid_x - check x field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the x field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_x(union scsw *scsw)
+{
+	return 1;
+}
+
+/**
+ * scsw_tm_is_valid_q - check q field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the q field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_q(union scsw *scsw)
+{
+	return 1;
+}
+
+/**
+ * scsw_tm_is_valid_ectl - check ectl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the ectl field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_ectl(union scsw *scsw)
+{
+	/* Must be status pending. */
+	if (!(scsw->tm.stctl & SCSW_STCTL_STATUS_PEND))
+		return 0;
+
+	/* Must have alert status. */
+	if (!(scsw->tm.stctl & SCSW_STCTL_ALERT_STATUS))
+		return 0;
+
+	/* Must be alone or together with primary, secondary or both,
+	 * => no intermediate status.
+	 */
+	if (scsw->tm.stctl & SCSW_STCTL_INTER_STATUS)
+		return 0;
+
+	return 1;
+}
+
+/**
+ * scsw_tm_is_valid_pno - check pno field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the pno field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_pno(union scsw *scsw)
+{
+	/* Must indicate at least one I/O function. */
+	if (!scsw->tm.fctl)
+		return 0;
+
+	/* Must be status pending. */
+	if (!(scsw->tm.stctl & SCSW_STCTL_STATUS_PEND))
+		return 0;
+
+	/* Can be status pending alone, or with any combination of primary,
+	 * secondary and alert => no intermediate status.
+	 */
+	if (!(scsw->tm.stctl & SCSW_STCTL_INTER_STATUS))
+		return 1;
+
+	/* If intermediate, must be suspended. */
+	if (scsw->tm.actl & SCSW_ACTL_SUSPENDED)
+		return 1;
+
+	return 0;
+}
+
+/**
+ * scsw_tm_is_valid_fctl - check fctl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the fctl field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_fctl(union scsw *scsw)
+{
+	/* Only valid if pmcw.dnv == 1*/
+	return 1;
+}
+
+/**
+ * scsw_tm_is_valid_actl - check actl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the actl field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_actl(union scsw *scsw)
+{
+	/* Only valid if pmcw.dnv == 1*/
+	return 1;
+}
+
+/**
+ * scsw_tm_is_valid_stctl - check stctl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the stctl field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_stctl(union scsw *scsw)
+{
+	/* Only valid if pmcw.dnv == 1*/
+	return 1;
+}
+
+/**
+ * scsw_tm_is_valid_dstat - check dstat field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the dstat field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_dstat(union scsw *scsw)
+{
+	return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) &&
+	       (scsw->tm.cc != 3);
+}
+
+/**
+ * scsw_tm_is_valid_cstat - check cstat field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the cstat field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_cstat(union scsw *scsw)
+{
+	return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) &&
+	       (scsw->tm.cc != 3);
+}
+
+/**
+ * scsw_tm_is_valid_fcxs - check fcxs field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the fcxs field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_fcxs(union scsw *scsw)
+{
+	return 1;
+}
+
+/**
+ * scsw_tm_is_valid_schxs - check schxs field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the schxs field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_schxs(union scsw *scsw)
+{
+	return (scsw->tm.cstat & (SCHN_STAT_PROG_CHECK |
+				  SCHN_STAT_INTF_CTRL_CHK |
+				  SCHN_STAT_PROT_CHECK |
+				  SCHN_STAT_CHN_DATA_CHK));
+}
+
+/**
+ * scsw_is_valid_actl - check actl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the actl field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_actl(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw_tm_is_valid_actl(scsw);
+	else
+		return scsw_cmd_is_valid_actl(scsw);
+}
+
+/**
+ * scsw_is_valid_cc - check cc field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the cc field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_cc(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw_tm_is_valid_cc(scsw);
+	else
+		return scsw_cmd_is_valid_cc(scsw);
+}
+
+/**
+ * scsw_is_valid_cstat - check cstat field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the cstat field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_cstat(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw_tm_is_valid_cstat(scsw);
+	else
+		return scsw_cmd_is_valid_cstat(scsw);
+}
+
+/**
+ * scsw_is_valid_dstat - check dstat field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the dstat field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_dstat(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw_tm_is_valid_dstat(scsw);
+	else
+		return scsw_cmd_is_valid_dstat(scsw);
+}
+
+/**
+ * scsw_is_valid_ectl - check ectl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the ectl field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_ectl(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw_tm_is_valid_ectl(scsw);
+	else
+		return scsw_cmd_is_valid_ectl(scsw);
+}
+
+/**
+ * scsw_is_valid_eswf - check eswf field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the eswf field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_eswf(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw_tm_is_valid_eswf(scsw);
+	else
+		return scsw_cmd_is_valid_eswf(scsw);
+}
+
+/**
+ * scsw_is_valid_fctl - check fctl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the fctl field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_fctl(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw_tm_is_valid_fctl(scsw);
+	else
+		return scsw_cmd_is_valid_fctl(scsw);
+}
+
+/**
+ * scsw_is_valid_key - check key field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the key field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_key(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw_tm_is_valid_key(scsw);
+	else
+		return scsw_cmd_is_valid_key(scsw);
+}
+
+/**
+ * scsw_is_valid_pno - check pno field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the pno field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_pno(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw_tm_is_valid_pno(scsw);
+	else
+		return scsw_cmd_is_valid_pno(scsw);
+}
+
+/**
+ * scsw_is_valid_stctl - check stctl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the stctl field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_stctl(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw_tm_is_valid_stctl(scsw);
+	else
+		return scsw_cmd_is_valid_stctl(scsw);
+}
+
+/**
+ * scsw_cmd_is_solicited - check for solicited scsw
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the command mode scsw indicates that the associated
+ * status condition is solicited, zero if it is unsolicited.
+ */
+static inline int scsw_cmd_is_solicited(union scsw *scsw)
+{
+	return (scsw->cmd.cc != 0) || (scsw->cmd.stctl !=
+		(SCSW_STCTL_STATUS_PEND | SCSW_STCTL_ALERT_STATUS));
+}
+
+/**
+ * scsw_tm_is_solicited - check for solicited scsw
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the transport mode scsw indicates that the associated
+ * status condition is solicited, zero if it is unsolicited.
+ */
+static inline int scsw_tm_is_solicited(union scsw *scsw)
+{
+	return (scsw->tm.cc != 0) || (scsw->tm.stctl !=
+		(SCSW_STCTL_STATUS_PEND | SCSW_STCTL_ALERT_STATUS));
+}
+
+/**
+ * scsw_is_solicited - check for solicited scsw
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the transport or command mode scsw indicates that the
+ * associated status condition is solicited, zero if it is unsolicited.
+ */
+static inline int scsw_is_solicited(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw_tm_is_solicited(scsw);
+	else
+		return scsw_cmd_is_solicited(scsw);
+}
+
+#endif /* _ASM_S390_SCSW_H_ */
diff --git a/arch/s390/include/asm/seccomp.h b/arch/s390/include/asm/seccomp.h
new file mode 100644
index 0000000000..71d46f0ba9
--- /dev/null
+++ b/arch/s390/include/asm/seccomp.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_SECCOMP_H
+#define _ASM_S390_SECCOMP_H
+
+#include <linux/unistd.h>
+
+#define __NR_seccomp_read	__NR_read
+#define __NR_seccomp_write	__NR_write
+#define __NR_seccomp_exit	__NR_exit
+#define __NR_seccomp_sigreturn	__NR_sigreturn
+
+#define __NR_seccomp_read_32	__NR_read
+#define __NR_seccomp_write_32	__NR_write
+#define __NR_seccomp_exit_32	__NR_exit
+#define __NR_seccomp_sigreturn_32 __NR_sigreturn
+
+#include <asm-generic/seccomp.h>
+
+#define SECCOMP_ARCH_NATIVE		AUDIT_ARCH_S390X
+#define SECCOMP_ARCH_NATIVE_NR		NR_syscalls
+#define SECCOMP_ARCH_NATIVE_NAME	"s390x"
+#ifdef CONFIG_COMPAT
+# define SECCOMP_ARCH_COMPAT		AUDIT_ARCH_S390
+# define SECCOMP_ARCH_COMPAT_NR		NR_syscalls
+# define SECCOMP_ARCH_COMPAT_NAME	"s390"
+#endif
+
+#endif	/* _ASM_S390_SECCOMP_H */
diff --git a/arch/s390/include/asm/sections.h b/arch/s390/include/asm/sections.h
new file mode 100644
index 0000000000..0486e6ef62
--- /dev/null
+++ b/arch/s390/include/asm/sections.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _S390_SECTIONS_H
+#define _S390_SECTIONS_H
+
+#include <asm-generic/sections.h>
+
+/*
+ * .boot.data section contains variables "shared" between the decompressor and
+ * the decompressed kernel. The decompressor will store values in them, and
+ * copy over to the decompressed image before starting it.
+ *
+ * Each variable end up in its own intermediate section .boot.data.<var name>,
+ * those sections are later sorted by alignment + name and merged together into
+ * final .boot.data section, which should be identical in the decompressor and
+ * the decompressed kernel (that is checked during the build).
+ */
+#define __bootdata(var) __section(".boot.data." #var) var
+
+/*
+ * .boot.preserved.data is similar to .boot.data, but it is not part of the
+ * .init section and thus will be preserved for later use in the decompressed
+ * kernel.
+ */
+#define __bootdata_preserved(var) __section(".boot.preserved.data." #var) var
+
+extern char *__samode31, *__eamode31;
+extern char *__stext_amode31, *__etext_amode31;
+
+#endif
diff --git a/arch/s390/include/asm/set_memory.h b/arch/s390/include/asm/set_memory.h
new file mode 100644
index 0000000000..06fbabe2f6
--- /dev/null
+++ b/arch/s390/include/asm/set_memory.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASMS390_SET_MEMORY_H
+#define _ASMS390_SET_MEMORY_H
+
+#include <linux/mutex.h>
+
+extern struct mutex cpa_mutex;
+
+enum {
+	_SET_MEMORY_RO_BIT,
+	_SET_MEMORY_RW_BIT,
+	_SET_MEMORY_NX_BIT,
+	_SET_MEMORY_X_BIT,
+	_SET_MEMORY_4K_BIT,
+	_SET_MEMORY_INV_BIT,
+	_SET_MEMORY_DEF_BIT,
+};
+
+#define SET_MEMORY_RO	BIT(_SET_MEMORY_RO_BIT)
+#define SET_MEMORY_RW	BIT(_SET_MEMORY_RW_BIT)
+#define SET_MEMORY_NX	BIT(_SET_MEMORY_NX_BIT)
+#define SET_MEMORY_X	BIT(_SET_MEMORY_X_BIT)
+#define SET_MEMORY_4K	BIT(_SET_MEMORY_4K_BIT)
+#define SET_MEMORY_INV	BIT(_SET_MEMORY_INV_BIT)
+#define SET_MEMORY_DEF	BIT(_SET_MEMORY_DEF_BIT)
+
+int __set_memory(unsigned long addr, unsigned long numpages, unsigned long flags);
+
+#define set_memory_rox set_memory_rox
+
+/*
+ * Generate two variants of each set_memory() function:
+ *
+ * set_memory_yy(unsigned long addr, int numpages);
+ * __set_memory_yy(void *start, void *end);
+ *
+ * The second variant exists for both convenience to avoid the usual
+ * (unsigned long) casts, but unlike the first variant it can also be used
+ * for areas larger than 8TB, which may happen at memory initialization.
+ */
+#define __SET_MEMORY_FUNC(fname, flags)					\
+static inline int fname(unsigned long addr, int numpages)		\
+{									\
+	return __set_memory(addr, numpages, (flags));			\
+}									\
+									\
+static inline int __##fname(void *start, void *end)			\
+{									\
+	unsigned long numpages;						\
+									\
+	numpages = (end - start) >> PAGE_SHIFT;				\
+	return __set_memory((unsigned long)start, numpages, (flags));	\
+}
+
+__SET_MEMORY_FUNC(set_memory_ro, SET_MEMORY_RO)
+__SET_MEMORY_FUNC(set_memory_rw, SET_MEMORY_RW)
+__SET_MEMORY_FUNC(set_memory_nx, SET_MEMORY_NX)
+__SET_MEMORY_FUNC(set_memory_x, SET_MEMORY_X)
+__SET_MEMORY_FUNC(set_memory_rox, SET_MEMORY_RO | SET_MEMORY_X)
+__SET_MEMORY_FUNC(set_memory_rwnx, SET_MEMORY_RW | SET_MEMORY_NX)
+__SET_MEMORY_FUNC(set_memory_4k, SET_MEMORY_4K)
+
+int set_direct_map_invalid_noflush(struct page *page);
+int set_direct_map_default_noflush(struct page *page);
+
+#endif
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
new file mode 100644
index 0000000000..25cadc2b9c
--- /dev/null
+++ b/arch/s390/include/asm/setup.h
@@ -0,0 +1,161 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999, 2017
+ */
+#ifndef _ASM_S390_SETUP_H
+#define _ASM_S390_SETUP_H
+
+#include <linux/bits.h>
+#include <uapi/asm/setup.h>
+#include <linux/build_bug.h>
+
+#define PARMAREA		0x10400
+
+#define COMMAND_LINE_SIZE CONFIG_COMMAND_LINE_SIZE
+/*
+ * Machine features detected in early.c
+ */
+
+#define MACHINE_FLAG_VM		BIT(0)
+#define MACHINE_FLAG_KVM	BIT(1)
+#define MACHINE_FLAG_LPAR	BIT(2)
+#define MACHINE_FLAG_DIAG9C	BIT(3)
+#define MACHINE_FLAG_ESOP	BIT(4)
+#define MACHINE_FLAG_IDTE	BIT(5)
+#define MACHINE_FLAG_EDAT1	BIT(7)
+#define MACHINE_FLAG_EDAT2	BIT(8)
+#define MACHINE_FLAG_TOPOLOGY	BIT(10)
+#define MACHINE_FLAG_TE		BIT(11)
+#define MACHINE_FLAG_TLB_LC	BIT(12)
+#define MACHINE_FLAG_VX		BIT(13)
+#define MACHINE_FLAG_TLB_GUEST	BIT(14)
+#define MACHINE_FLAG_NX		BIT(15)
+#define MACHINE_FLAG_GS		BIT(16)
+#define MACHINE_FLAG_SCC	BIT(17)
+#define MACHINE_FLAG_PCI_MIO	BIT(18)
+#define MACHINE_FLAG_RDP	BIT(19)
+
+#define LPP_MAGIC		BIT(31)
+#define LPP_PID_MASK		_AC(0xffffffff, UL)
+
+/* Offsets to entry points in kernel/head.S  */
+
+#define STARTUP_NORMAL_OFFSET	0x10000
+#define STARTUP_KDUMP_OFFSET	0x10010
+
+#define LEGACY_COMMAND_LINE_SIZE	896
+
+#ifndef __ASSEMBLY__
+
+#include <asm/lowcore.h>
+#include <asm/types.h>
+
+struct parmarea {
+	unsigned long ipl_device;			/* 0x10400 */
+	unsigned long initrd_start;			/* 0x10408 */
+	unsigned long initrd_size;			/* 0x10410 */
+	unsigned long oldmem_base;			/* 0x10418 */
+	unsigned long oldmem_size;			/* 0x10420 */
+	unsigned long kernel_version;			/* 0x10428 */
+	unsigned long max_command_line_size;		/* 0x10430 */
+	char pad1[0x10480-0x10438];			/* 0x10438 - 0x10480 */
+	char command_line[COMMAND_LINE_SIZE];		/* 0x10480 */
+};
+
+extern struct parmarea parmarea;
+
+extern unsigned int zlib_dfltcc_support;
+#define ZLIB_DFLTCC_DISABLED		0
+#define ZLIB_DFLTCC_FULL		1
+#define ZLIB_DFLTCC_DEFLATE_ONLY	2
+#define ZLIB_DFLTCC_INFLATE_ONLY	3
+#define ZLIB_DFLTCC_FULL_DEBUG		4
+
+extern unsigned long ident_map_size;
+extern unsigned long max_mappable;
+
+/* The Write Back bit position in the physaddr is given by the SLPC PCI */
+extern unsigned long mio_wb_bit_mask;
+
+#define MACHINE_IS_VM		(S390_lowcore.machine_flags & MACHINE_FLAG_VM)
+#define MACHINE_IS_KVM		(S390_lowcore.machine_flags & MACHINE_FLAG_KVM)
+#define MACHINE_IS_LPAR		(S390_lowcore.machine_flags & MACHINE_FLAG_LPAR)
+
+#define MACHINE_HAS_DIAG9C	(S390_lowcore.machine_flags & MACHINE_FLAG_DIAG9C)
+#define MACHINE_HAS_ESOP	(S390_lowcore.machine_flags & MACHINE_FLAG_ESOP)
+#define MACHINE_HAS_IDTE	(S390_lowcore.machine_flags & MACHINE_FLAG_IDTE)
+#define MACHINE_HAS_EDAT1	(S390_lowcore.machine_flags & MACHINE_FLAG_EDAT1)
+#define MACHINE_HAS_EDAT2	(S390_lowcore.machine_flags & MACHINE_FLAG_EDAT2)
+#define MACHINE_HAS_TOPOLOGY	(S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY)
+#define MACHINE_HAS_TE		(S390_lowcore.machine_flags & MACHINE_FLAG_TE)
+#define MACHINE_HAS_TLB_LC	(S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC)
+#define MACHINE_HAS_VX		(S390_lowcore.machine_flags & MACHINE_FLAG_VX)
+#define MACHINE_HAS_TLB_GUEST	(S390_lowcore.machine_flags & MACHINE_FLAG_TLB_GUEST)
+#define MACHINE_HAS_NX		(S390_lowcore.machine_flags & MACHINE_FLAG_NX)
+#define MACHINE_HAS_GS		(S390_lowcore.machine_flags & MACHINE_FLAG_GS)
+#define MACHINE_HAS_SCC		(S390_lowcore.machine_flags & MACHINE_FLAG_SCC)
+#define MACHINE_HAS_PCI_MIO	(S390_lowcore.machine_flags & MACHINE_FLAG_PCI_MIO)
+#define MACHINE_HAS_RDP		(S390_lowcore.machine_flags & MACHINE_FLAG_RDP)
+
+/*
+ * Console mode. Override with conmode=
+ */
+extern unsigned int console_mode;
+extern unsigned int console_devno;
+extern unsigned int console_irq;
+
+#define CONSOLE_IS_UNDEFINED	(console_mode == 0)
+#define CONSOLE_IS_SCLP		(console_mode == 1)
+#define CONSOLE_IS_3215		(console_mode == 2)
+#define CONSOLE_IS_3270		(console_mode == 3)
+#define CONSOLE_IS_VT220	(console_mode == 4)
+#define CONSOLE_IS_HVC		(console_mode == 5)
+#define SET_CONSOLE_SCLP	do { console_mode = 1; } while (0)
+#define SET_CONSOLE_3215	do { console_mode = 2; } while (0)
+#define SET_CONSOLE_3270	do { console_mode = 3; } while (0)
+#define SET_CONSOLE_VT220	do { console_mode = 4; } while (0)
+#define SET_CONSOLE_HVC		do { console_mode = 5; } while (0)
+
+#ifdef CONFIG_VMCP
+void vmcp_cma_reserve(void);
+#else
+static inline void vmcp_cma_reserve(void) { }
+#endif
+
+void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault);
+
+void cmma_init(void);
+void cmma_init_nodat(void);
+
+extern void (*_machine_restart)(char *command);
+extern void (*_machine_halt)(void);
+extern void (*_machine_power_off)(void);
+
+extern unsigned long __kaslr_offset;
+static inline unsigned long kaslr_offset(void)
+{
+	return __kaslr_offset;
+}
+
+extern int __kaslr_enabled;
+static inline int kaslr_enabled(void)
+{
+	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE))
+		return __kaslr_enabled;
+	return 0;
+}
+
+struct oldmem_data {
+	unsigned long start;
+	unsigned long size;
+};
+extern struct oldmem_data oldmem_data;
+
+static __always_inline u32 gen_lpswe(unsigned long addr)
+{
+	BUILD_BUG_ON(addr > 0xfff);
+	return 0xb2b20000 | addr;
+}
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_S390_SETUP_H */
diff --git a/arch/s390/include/asm/signal.h b/arch/s390/include/asm/signal.h
new file mode 100644
index 0000000000..7daf4d8b5e
--- /dev/null
+++ b/arch/s390/include/asm/signal.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  S390 version
+ *
+ *  Derived from "include/asm-i386/signal.h"
+ */
+#ifndef _ASMS390_SIGNAL_H
+#define _ASMS390_SIGNAL_H
+
+#include <uapi/asm/signal.h>
+
+/* Most things should be clean enough to redefine this at will, if care
+   is taken to make libc match.  */
+#include <asm/sigcontext.h>
+#define _NSIG           _SIGCONTEXT_NSIG
+#define _NSIG_BPW       _SIGCONTEXT_NSIG_BPW
+#define _NSIG_WORDS     _SIGCONTEXT_NSIG_WORDS
+
+typedef unsigned long old_sigset_t;             /* at least 32 bits */
+
+typedef struct {
+        unsigned long sig[_NSIG_WORDS];
+} sigset_t;
+
+#define __ARCH_HAS_SA_RESTORER
+#endif
diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h
new file mode 100644
index 0000000000..edee63da08
--- /dev/null
+++ b/arch/s390/include/asm/sigp.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __S390_ASM_SIGP_H
+#define __S390_ASM_SIGP_H
+
+/* SIGP order codes */
+#define SIGP_SENSE		      1
+#define SIGP_EXTERNAL_CALL	      2
+#define SIGP_EMERGENCY_SIGNAL	      3
+#define SIGP_START		      4
+#define SIGP_STOP		      5
+#define SIGP_RESTART		      6
+#define SIGP_STOP_AND_STORE_STATUS    9
+#define SIGP_INITIAL_CPU_RESET	     11
+#define SIGP_CPU_RESET		     12
+#define SIGP_SET_PREFIX		     13
+#define SIGP_STORE_STATUS_AT_ADDRESS 14
+#define SIGP_SET_ARCHITECTURE	     18
+#define SIGP_COND_EMERGENCY_SIGNAL   19
+#define SIGP_SENSE_RUNNING	     21
+#define SIGP_SET_MULTI_THREADING     22
+#define SIGP_STORE_ADDITIONAL_STATUS 23
+
+/* SIGP condition codes */
+#define SIGP_CC_ORDER_CODE_ACCEPTED 0
+#define SIGP_CC_STATUS_STORED	    1
+#define SIGP_CC_BUSY		    2
+#define SIGP_CC_NOT_OPERATIONAL	    3
+
+/* SIGP cpu status bits */
+
+#define SIGP_STATUS_INVALID_ORDER	0x00000002UL
+#define SIGP_STATUS_CHECK_STOP		0x00000010UL
+#define SIGP_STATUS_STOPPED		0x00000040UL
+#define SIGP_STATUS_EXT_CALL_PENDING	0x00000080UL
+#define SIGP_STATUS_INVALID_PARAMETER	0x00000100UL
+#define SIGP_STATUS_INCORRECT_STATE	0x00000200UL
+#define SIGP_STATUS_NOT_RUNNING		0x00000400UL
+
+#ifndef __ASSEMBLY__
+
+static inline int ____pcpu_sigp(u16 addr, u8 order, unsigned long parm,
+				u32 *status)
+{
+	union register_pair r1 = { .odd = parm, };
+	int cc;
+
+	asm volatile(
+		"	sigp	%[r1],%[addr],0(%[order])\n"
+		"	ipm	%[cc]\n"
+		"	srl	%[cc],28\n"
+		: [cc] "=&d" (cc), [r1] "+&d" (r1.pair)
+		: [addr] "d" (addr), [order] "a" (order)
+		: "cc");
+	*status = r1.even;
+	return cc;
+}
+
+static inline int __pcpu_sigp(u16 addr, u8 order, unsigned long parm,
+			      u32 *status)
+{
+	u32 _status;
+	int cc;
+
+	cc = ____pcpu_sigp(addr, order, parm, &_status);
+	if (status && cc == SIGP_CC_STATUS_STORED)
+		*status = _status;
+	return cc;
+}
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __S390_ASM_SIGP_H */
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
new file mode 100644
index 0000000000..73ed278107
--- /dev/null
+++ b/arch/s390/include/asm/smp.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *    Copyright IBM Corp. 1999, 2012
+ *    Author(s): Denis Joseph Barrow,
+ *		 Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ */
+#ifndef __ASM_SMP_H
+#define __ASM_SMP_H
+
+#include <asm/sigp.h>
+#include <asm/lowcore.h>
+#include <asm/processor.h>
+
+#define raw_smp_processor_id()	(S390_lowcore.cpu_nr)
+
+extern struct mutex smp_cpu_state_mutex;
+extern unsigned int smp_cpu_mt_shift;
+extern unsigned int smp_cpu_mtid;
+extern __vector128 __initdata boot_cpu_vector_save_area[__NUM_VXRS];
+extern cpumask_t cpu_setup_mask;
+
+extern int __cpu_up(unsigned int cpu, struct task_struct *tidle);
+
+extern void arch_send_call_function_single_ipi(int cpu);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
+
+extern void smp_call_online_cpu(void (*func)(void *), void *);
+extern void smp_call_ipl_cpu(void (*func)(void *), void *);
+extern void smp_emergency_stop(void);
+
+extern int smp_find_processor_id(u16 address);
+extern int smp_store_status(int cpu);
+extern void smp_save_dump_ipl_cpu(void);
+extern void smp_save_dump_secondary_cpus(void);
+extern void smp_yield_cpu(int cpu);
+extern void smp_cpu_set_polarization(int cpu, int val);
+extern int smp_cpu_get_polarization(int cpu);
+extern int smp_cpu_get_cpu_address(int cpu);
+extern void smp_fill_possible_mask(void);
+extern void smp_detect_cpus(void);
+
+static inline void smp_stop_cpu(void)
+{
+	u16 pcpu = stap();
+
+	for (;;) {
+		__pcpu_sigp(pcpu, SIGP_STOP, 0, NULL);
+		cpu_relax();
+	}
+}
+
+/* Return thread 0 CPU number as base CPU */
+static inline int smp_get_base_cpu(int cpu)
+{
+	return cpu - (cpu % (smp_cpu_mtid + 1));
+}
+
+static inline void smp_cpus_done(unsigned int max_cpus)
+{
+}
+
+extern int smp_reinit_ipl_cpu(void);
+extern int smp_rescan_cpus(void);
+extern void __noreturn cpu_die(void);
+extern void __cpu_die(unsigned int cpu);
+extern int __cpu_disable(void);
+extern void schedule_mcck_handler(void);
+void notrace smp_yield_cpu(int cpu);
+
+#endif /* __ASM_SMP_H */
diff --git a/arch/s390/include/asm/softirq_stack.h b/arch/s390/include/asm/softirq_stack.h
new file mode 100644
index 0000000000..1ac5115d31
--- /dev/null
+++ b/arch/s390/include/asm/softirq_stack.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef __ASM_S390_SOFTIRQ_STACK_H
+#define __ASM_S390_SOFTIRQ_STACK_H
+
+#include <asm/lowcore.h>
+#include <asm/stacktrace.h>
+
+#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK
+static inline void do_softirq_own_stack(void)
+{
+	call_on_stack(0, S390_lowcore.async_stack, void, __do_softirq);
+}
+#endif
+#endif /* __ASM_S390_SOFTIRQ_STACK_H */
diff --git a/arch/s390/include/asm/sparsemem.h b/arch/s390/include/asm/sparsemem.h
new file mode 100644
index 0000000000..c549893602
--- /dev/null
+++ b/arch/s390/include/asm/sparsemem.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_SPARSEMEM_H
+#define _ASM_S390_SPARSEMEM_H
+
+#define SECTION_SIZE_BITS	28
+#define MAX_PHYSMEM_BITS	CONFIG_MAX_PHYSMEM_BITS
+
+#endif /* _ASM_S390_SPARSEMEM_H */
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
new file mode 100644
index 0000000000..37127cd774
--- /dev/null
+++ b/arch/s390/include/asm/spinlock.h
@@ -0,0 +1,148 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ *  Derived from "include/asm-i386/spinlock.h"
+ */
+
+#ifndef __ASM_SPINLOCK_H
+#define __ASM_SPINLOCK_H
+
+#include <linux/smp.h>
+#include <asm/atomic_ops.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
+#include <asm/alternative.h>
+
+#define SPINLOCK_LOCKVAL (S390_lowcore.spinlock_lockval)
+
+extern int spin_retry;
+
+bool arch_vcpu_is_preempted(int cpu);
+
+#define vcpu_is_preempted arch_vcpu_is_preempted
+
+/*
+ * Simple spin lock operations.  There are two variants, one clears IRQ's
+ * on the local processor, one does not.
+ *
+ * We make no fairness assumptions. They have a cost.
+ *
+ * (the type definitions are in asm/spinlock_types.h)
+ */
+
+void arch_spin_relax(arch_spinlock_t *lock);
+#define arch_spin_relax	arch_spin_relax
+
+void arch_spin_lock_wait(arch_spinlock_t *);
+int arch_spin_trylock_retry(arch_spinlock_t *);
+void arch_spin_lock_setup(int cpu);
+
+static inline u32 arch_spin_lockval(int cpu)
+{
+	return cpu + 1;
+}
+
+static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
+{
+	return lock.lock == 0;
+}
+
+static inline int arch_spin_is_locked(arch_spinlock_t *lp)
+{
+	return READ_ONCE(lp->lock) != 0;
+}
+
+static inline int arch_spin_trylock_once(arch_spinlock_t *lp)
+{
+	barrier();
+	return likely(__atomic_cmpxchg_bool(&lp->lock, 0, SPINLOCK_LOCKVAL));
+}
+
+static inline void arch_spin_lock(arch_spinlock_t *lp)
+{
+	if (!arch_spin_trylock_once(lp))
+		arch_spin_lock_wait(lp);
+}
+
+static inline int arch_spin_trylock(arch_spinlock_t *lp)
+{
+	if (!arch_spin_trylock_once(lp))
+		return arch_spin_trylock_retry(lp);
+	return 1;
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *lp)
+{
+	typecheck(int, lp->lock);
+	kcsan_release();
+	asm_inline volatile(
+		ALTERNATIVE("nop", ".insn rre,0xb2fa0000,7,0", 49) /* NIAI 7 */
+		"	sth	%1,%0\n"
+		: "=R" (((unsigned short *) &lp->lock)[1])
+		: "d" (0) : "cc", "memory");
+}
+
+/*
+ * Read-write spinlocks, allowing multiple readers
+ * but only one writer.
+ *
+ * NOTE! it is quite common to have readers in interrupts
+ * but no interrupt writers. For those circumstances we
+ * can "mix" irq-safe locks - any writer needs to get a
+ * irq-safe write-lock, but readers can get non-irqsafe
+ * read-locks.
+ */
+
+#define arch_read_relax(rw) barrier()
+#define arch_write_relax(rw) barrier()
+
+void arch_read_lock_wait(arch_rwlock_t *lp);
+void arch_write_lock_wait(arch_rwlock_t *lp);
+
+static inline void arch_read_lock(arch_rwlock_t *rw)
+{
+	int old;
+
+	old = __atomic_add(1, &rw->cnts);
+	if (old & 0xffff0000)
+		arch_read_lock_wait(rw);
+}
+
+static inline void arch_read_unlock(arch_rwlock_t *rw)
+{
+	__atomic_add_const_barrier(-1, &rw->cnts);
+}
+
+static inline void arch_write_lock(arch_rwlock_t *rw)
+{
+	if (!__atomic_cmpxchg_bool(&rw->cnts, 0, 0x30000))
+		arch_write_lock_wait(rw);
+}
+
+static inline void arch_write_unlock(arch_rwlock_t *rw)
+{
+	__atomic_add_barrier(-0x30000, &rw->cnts);
+}
+
+
+static inline int arch_read_trylock(arch_rwlock_t *rw)
+{
+	int old;
+
+	old = READ_ONCE(rw->cnts);
+	return (!(old & 0xffff0000) &&
+		__atomic_cmpxchg_bool(&rw->cnts, old, old + 1));
+}
+
+static inline int arch_write_trylock(arch_rwlock_t *rw)
+{
+	int old;
+
+	old = READ_ONCE(rw->cnts);
+	return !old && __atomic_cmpxchg_bool(&rw->cnts, 0, 0x30000);
+}
+
+#endif /* __ASM_SPINLOCK_H */
diff --git a/arch/s390/include/asm/spinlock_types.h b/arch/s390/include/asm/spinlock_types.h
new file mode 100644
index 0000000000..b69695e399
--- /dev/null
+++ b/arch/s390/include/asm/spinlock_types.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_SPINLOCK_TYPES_H
+#define __ASM_SPINLOCK_TYPES_H
+
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
+# error "please don't include this file directly"
+#endif
+
+typedef struct {
+	int lock;
+} arch_spinlock_t;
+
+#define __ARCH_SPIN_LOCK_UNLOCKED { .lock = 0, }
+
+typedef struct {
+	int cnts;
+	arch_spinlock_t wait;
+} arch_rwlock_t;
+
+#define __ARCH_RW_LOCK_UNLOCKED		{ 0 }
+
+#endif
diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h
new file mode 100644
index 0000000000..78f7b729b6
--- /dev/null
+++ b/arch/s390/include/asm/stacktrace.h
@@ -0,0 +1,241 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_STACKTRACE_H
+#define _ASM_S390_STACKTRACE_H
+
+#include <linux/uaccess.h>
+#include <linux/ptrace.h>
+#include <asm/switch_to.h>
+
+enum stack_type {
+	STACK_TYPE_UNKNOWN,
+	STACK_TYPE_TASK,
+	STACK_TYPE_IRQ,
+	STACK_TYPE_NODAT,
+	STACK_TYPE_RESTART,
+	STACK_TYPE_MCCK,
+};
+
+struct stack_info {
+	enum stack_type type;
+	unsigned long begin, end;
+};
+
+const char *stack_type_name(enum stack_type type);
+int get_stack_info(unsigned long sp, struct task_struct *task,
+		   struct stack_info *info, unsigned long *visit_mask);
+
+static inline bool on_stack(struct stack_info *info,
+			    unsigned long addr, size_t len)
+{
+	if (info->type == STACK_TYPE_UNKNOWN)
+		return false;
+	if (addr + len < addr)
+		return false;
+	return addr >= info->begin && addr + len <= info->end;
+}
+
+/*
+ * Stack layout of a C stack frame.
+ * Kernel uses the packed stack layout (-mpacked-stack).
+ */
+struct stack_frame {
+	union {
+		unsigned long empty[9];
+		struct {
+			unsigned long sie_control_block;
+			unsigned long sie_savearea;
+			unsigned long sie_reason;
+			unsigned long sie_flags;
+			unsigned long sie_control_block_phys;
+		};
+	};
+	unsigned long gprs[10];
+	unsigned long back_chain;
+};
+
+/*
+ * Unlike current_stack_pointer which simply contains the current value of %r15
+ * current_frame_address() returns function stack frame address, which matches
+ * %r15 upon function invocation. It may differ from %r15 later if function
+ * allocates stack for local variables or new stack frame to call other
+ * functions.
+ */
+#define current_frame_address()						\
+	((unsigned long)__builtin_frame_address(0) -			\
+	 offsetof(struct stack_frame, back_chain))
+
+static __always_inline unsigned long get_stack_pointer(struct task_struct *task,
+						       struct pt_regs *regs)
+{
+	if (regs)
+		return (unsigned long)kernel_stack_pointer(regs);
+	if (task == current)
+		return current_frame_address();
+	return (unsigned long)task->thread.ksp;
+}
+
+/*
+ * To keep this simple mark register 2-6 as being changed (volatile)
+ * by the called function, even though register 6 is saved/nonvolatile.
+ */
+#define CALL_FMT_0 "=&d" (r2)
+#define CALL_FMT_1 "+&d" (r2)
+#define CALL_FMT_2 CALL_FMT_1, "+&d" (r3)
+#define CALL_FMT_3 CALL_FMT_2, "+&d" (r4)
+#define CALL_FMT_4 CALL_FMT_3, "+&d" (r5)
+#define CALL_FMT_5 CALL_FMT_4, "+&d" (r6)
+
+#define CALL_CLOBBER_5 "0", "1", "14", "cc", "memory"
+#define CALL_CLOBBER_4 CALL_CLOBBER_5
+#define CALL_CLOBBER_3 CALL_CLOBBER_4, "5"
+#define CALL_CLOBBER_2 CALL_CLOBBER_3, "4"
+#define CALL_CLOBBER_1 CALL_CLOBBER_2, "3"
+#define CALL_CLOBBER_0 CALL_CLOBBER_1
+
+#define CALL_LARGS_0(...)						\
+	long dummy = 0
+#define CALL_LARGS_1(t1, a1)						\
+	long arg1  = (long)(t1)(a1)
+#define CALL_LARGS_2(t1, a1, t2, a2)					\
+	CALL_LARGS_1(t1, a1);						\
+	long arg2 = (long)(t2)(a2)
+#define CALL_LARGS_3(t1, a1, t2, a2, t3, a3)				\
+	CALL_LARGS_2(t1, a1, t2, a2);					\
+	long arg3 = (long)(t3)(a3)
+#define CALL_LARGS_4(t1, a1, t2, a2, t3, a3, t4, a4)			\
+	CALL_LARGS_3(t1, a1, t2, a2, t3, a3);				\
+	long arg4  = (long)(t4)(a4)
+#define CALL_LARGS_5(t1, a1, t2, a2, t3, a3, t4, a4, t5, a5)		\
+	CALL_LARGS_4(t1, a1, t2, a2, t3, a3, t4, a4);			\
+	long arg5 = (long)(t5)(a5)
+
+#define CALL_REGS_0							\
+	register long r2 asm("2") = dummy
+#define CALL_REGS_1							\
+	register long r2 asm("2") = arg1
+#define CALL_REGS_2							\
+	CALL_REGS_1;							\
+	register long r3 asm("3") = arg2
+#define CALL_REGS_3							\
+	CALL_REGS_2;							\
+	register long r4 asm("4") = arg3
+#define CALL_REGS_4							\
+	CALL_REGS_3;							\
+	register long r5 asm("5") = arg4
+#define CALL_REGS_5							\
+	CALL_REGS_4;							\
+	register long r6 asm("6") = arg5
+
+#define CALL_TYPECHECK_0(...)
+#define CALL_TYPECHECK_1(t, a, ...)					\
+	typecheck(t, a)
+#define CALL_TYPECHECK_2(t, a, ...)					\
+	CALL_TYPECHECK_1(__VA_ARGS__);					\
+	typecheck(t, a)
+#define CALL_TYPECHECK_3(t, a, ...)					\
+	CALL_TYPECHECK_2(__VA_ARGS__);					\
+	typecheck(t, a)
+#define CALL_TYPECHECK_4(t, a, ...)					\
+	CALL_TYPECHECK_3(__VA_ARGS__);					\
+	typecheck(t, a)
+#define CALL_TYPECHECK_5(t, a, ...)					\
+	CALL_TYPECHECK_4(__VA_ARGS__);					\
+	typecheck(t, a)
+
+#define CALL_PARM_0(...) void
+#define CALL_PARM_1(t, a, ...) t
+#define CALL_PARM_2(t, a, ...) t, CALL_PARM_1(__VA_ARGS__)
+#define CALL_PARM_3(t, a, ...) t, CALL_PARM_2(__VA_ARGS__)
+#define CALL_PARM_4(t, a, ...) t, CALL_PARM_3(__VA_ARGS__)
+#define CALL_PARM_5(t, a, ...) t, CALL_PARM_4(__VA_ARGS__)
+#define CALL_PARM_6(t, a, ...) t, CALL_PARM_5(__VA_ARGS__)
+
+/*
+ * Use call_on_stack() to call a function switching to a specified
+ * stack. Proper sign and zero extension of function arguments is
+ * done. Usage:
+ *
+ * rc = call_on_stack(nr, stack, rettype, fn, t1, a1, t2, a2, ...)
+ *
+ * - nr specifies the number of function arguments of fn.
+ * - stack specifies the stack to be used.
+ * - fn is the function to be called.
+ * - rettype is the return type of fn.
+ * - t1, a1, ... are pairs, where t1 must match the type of the first
+ *   argument of fn, t2 the second, etc. a1 is the corresponding
+ *   first function argument (not name), etc.
+ */
+#define call_on_stack(nr, stack, rettype, fn, ...)			\
+({									\
+	rettype (*__fn)(CALL_PARM_##nr(__VA_ARGS__)) = fn;		\
+	unsigned long frame = current_frame_address();			\
+	unsigned long __stack = stack;					\
+	unsigned long prev;						\
+	CALL_LARGS_##nr(__VA_ARGS__);					\
+	CALL_REGS_##nr;							\
+									\
+	CALL_TYPECHECK_##nr(__VA_ARGS__);				\
+	asm volatile(							\
+		"	lgr	%[_prev],15\n"				\
+		"	lg	15,%[_stack]\n"				\
+		"	stg	%[_frame],%[_bc](15)\n"			\
+		"	brasl	14,%[_fn]\n"				\
+		"	lgr	15,%[_prev]\n"				\
+		: [_prev] "=&d" (prev), CALL_FMT_##nr			\
+		: [_stack] "R" (__stack),				\
+		  [_bc] "i" (offsetof(struct stack_frame, back_chain)),	\
+		  [_frame] "d" (frame),					\
+		  [_fn] "X" (__fn) : CALL_CLOBBER_##nr);		\
+	(rettype)r2;							\
+})
+
+/*
+ * Use call_nodat() to call a function with DAT disabled.
+ * Proper sign and zero extension of function arguments is done.
+ * Usage:
+ *
+ * rc = call_nodat(nr, rettype, fn, t1, a1, t2, a2, ...)
+ *
+ * - nr specifies the number of function arguments of fn.
+ * - fn is the function to be called, where fn is a physical address.
+ * - rettype is the return type of fn.
+ * - t1, a1, ... are pairs, where t1 must match the type of the first
+ *   argument of fn, t2 the second, etc. a1 is the corresponding
+ *   first function argument (not name), etc.
+ *
+ * fn() is called with standard C function call ABI, with the exception
+ * that no useful stackframe or stackpointer is passed via register 15.
+ * Therefore the called function must not use r15 to access the stack.
+ */
+#define call_nodat(nr, rettype, fn, ...)				\
+({									\
+	rettype (*__fn)(CALL_PARM_##nr(__VA_ARGS__)) = (fn);		\
+	/* aligned since psw_leave must not cross page boundary */	\
+	psw_t __aligned(16) psw_leave;					\
+	psw_t psw_enter;						\
+	CALL_LARGS_##nr(__VA_ARGS__);					\
+	CALL_REGS_##nr;							\
+									\
+	CALL_TYPECHECK_##nr(__VA_ARGS__);				\
+	psw_enter.mask = PSW_KERNEL_BITS & ~PSW_MASK_DAT;		\
+	psw_enter.addr = (unsigned long)__fn;				\
+	asm volatile(							\
+		"	epsw	0,1\n"					\
+		"	risbg	1,0,0,31,32\n"				\
+		"	larl	7,1f\n"					\
+		"	stg	1,%[psw_leave]\n"			\
+		"	stg	7,8+%[psw_leave]\n"			\
+		"	la	7,%[psw_leave]\n"			\
+		"	lra	7,0(7)\n"				\
+		"	larl	1,0f\n"					\
+		"	lra	14,0(1)\n"				\
+		"	lpswe	%[psw_enter]\n"				\
+		"0:	lpswe	0(7)\n"					\
+		"1:\n"							\
+		: CALL_FMT_##nr, [psw_leave] "=Q" (psw_leave)		\
+		: [psw_enter] "Q" (psw_enter)				\
+		: "7", CALL_CLOBBER_##nr);				\
+	(rettype)r2;							\
+})
+
+#endif /* _ASM_S390_STACKTRACE_H */
diff --git a/arch/s390/include/asm/stp.h b/arch/s390/include/asm/stp.h
new file mode 100644
index 0000000000..4d74d7e333
--- /dev/null
+++ b/arch/s390/include/asm/stp.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  Copyright IBM Corp. 2006
+ *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+#ifndef __S390_STP_H
+#define __S390_STP_H
+
+#include <linux/compiler.h>
+
+/* notifier for syncs */
+extern struct atomic_notifier_head s390_epoch_delta_notifier;
+
+/* STP interruption parameter */
+struct stp_irq_parm {
+	u32		: 14;
+	u32 tsc		:  1;	/* Timing status change */
+	u32 lac		:  1;	/* Link availability change */
+	u32 tcpc	:  1;	/* Time control parameter change */
+	u32		: 15;
+} __packed;
+
+#define STP_OP_SYNC	1
+#define STP_OP_CTRL	3
+
+struct stp_sstpi {
+	u32		: 32;
+	u32 tu		:  1;
+	u32 lu		:  1;
+	u32		:  6;
+	u32 stratum	:  8;
+	u32 vbits	: 16;
+	u32 leaps	: 16;
+	u32 tmd		:  4;
+	u32 ctn		:  4;
+	u32		:  3;
+	u32 c		:  1;
+	u32 tst		:  4;
+	u32 tzo		: 16;
+	u32 dsto	: 16;
+	u32 ctrl	: 16;
+	u32		: 16;
+	u32 tto;
+	u32		: 32;
+	u32 ctnid[3];
+	u32		: 32;
+	u64 todoff;
+	u32 rsvd[50];
+} __packed;
+
+struct stp_tzib {
+	u32 tzan	: 16;
+	u32		: 16;
+	u32 tzo		: 16;
+	u32 dsto	: 16;
+	u32 stn;
+	u32 dstn;
+	u64 dst_on_alg;
+	u64 dst_off_alg;
+} __packed;
+
+struct stp_tcpib {
+	u32 atcode	: 4;
+	u32 ntcode	: 4;
+	u32 d		: 1;
+	u32		: 23;
+	s32 tto;
+	struct stp_tzib atzib;
+	struct stp_tzib ntzib;
+	s32 adst_offset : 16;
+	s32 ndst_offset : 16;
+	u32 rsvd1;
+	u64 ntzib_update;
+	u64 ndsto_update;
+} __packed;
+
+struct stp_lsoib {
+	u32 p		: 1;
+	u32		: 31;
+	s32 also	: 16;
+	s32 nlso	: 16;
+	u64 nlsout;
+} __packed;
+
+struct stp_stzi {
+	u32 rsvd0[3];
+	u64 data_ts;
+	u32 rsvd1[22];
+	struct stp_tcpib tcpib;
+	struct stp_lsoib lsoib;
+} __packed;
+
+/* Functions needed by the machine check handler */
+int stp_sync_check(void);
+int stp_island_check(void);
+void stp_queue_work(void);
+
+#endif /* __S390_STP_H */
diff --git a/arch/s390/include/asm/string.h b/arch/s390/include/asm/string.h
new file mode 100644
index 0000000000..351685de53
--- /dev/null
+++ b/arch/s390/include/asm/string.h
@@ -0,0 +1,206 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ */
+
+#ifndef _S390_STRING_H_
+#define _S390_STRING_H_
+
+#ifndef _LINUX_TYPES_H
+#include <linux/types.h>
+#endif
+
+#define __HAVE_ARCH_MEMCPY	/* gcc builtin & arch function */
+#define __HAVE_ARCH_MEMMOVE	/* gcc builtin & arch function */
+#define __HAVE_ARCH_MEMSET	/* gcc builtin & arch function */
+#define __HAVE_ARCH_MEMSET16	/* arch function */
+#define __HAVE_ARCH_MEMSET32	/* arch function */
+#define __HAVE_ARCH_MEMSET64	/* arch function */
+
+void *memcpy(void *dest, const void *src, size_t n);
+void *memset(void *s, int c, size_t n);
+void *memmove(void *dest, const void *src, size_t n);
+
+#ifndef CONFIG_KASAN
+#define __HAVE_ARCH_MEMCHR	/* inline & arch function */
+#define __HAVE_ARCH_MEMCMP	/* arch function */
+#define __HAVE_ARCH_MEMSCAN	/* inline & arch function */
+#define __HAVE_ARCH_STRCAT	/* inline & arch function */
+#define __HAVE_ARCH_STRCMP	/* arch function */
+#define __HAVE_ARCH_STRCPY	/* inline & arch function */
+#define __HAVE_ARCH_STRLCAT	/* arch function */
+#define __HAVE_ARCH_STRLEN	/* inline & arch function */
+#define __HAVE_ARCH_STRNCAT	/* arch function */
+#define __HAVE_ARCH_STRNCPY	/* arch function */
+#define __HAVE_ARCH_STRNLEN	/* inline & arch function */
+#define __HAVE_ARCH_STRSTR	/* arch function */
+
+/* Prototypes for non-inlined arch strings functions. */
+int memcmp(const void *s1, const void *s2, size_t n);
+int strcmp(const char *s1, const char *s2);
+size_t strlcat(char *dest, const char *src, size_t n);
+char *strncat(char *dest, const char *src, size_t n);
+char *strncpy(char *dest, const char *src, size_t n);
+char *strstr(const char *s1, const char *s2);
+#endif /* !CONFIG_KASAN */
+
+#undef __HAVE_ARCH_STRCHR
+#undef __HAVE_ARCH_STRNCHR
+#undef __HAVE_ARCH_STRNCMP
+#undef __HAVE_ARCH_STRPBRK
+#undef __HAVE_ARCH_STRSEP
+#undef __HAVE_ARCH_STRSPN
+
+#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
+
+#define strlen(s) __strlen(s)
+
+#define __no_sanitize_prefix_strfunc(x) __##x
+
+#ifndef __NO_FORTIFY
+#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */
+#endif
+
+#else
+#define __no_sanitize_prefix_strfunc(x) x
+#endif /* defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) */
+
+void *__memcpy(void *dest, const void *src, size_t n);
+void *__memset(void *s, int c, size_t n);
+void *__memmove(void *dest, const void *src, size_t n);
+void *__memset16(uint16_t *s, uint16_t v, size_t count);
+void *__memset32(uint32_t *s, uint32_t v, size_t count);
+void *__memset64(uint64_t *s, uint64_t v, size_t count);
+
+static inline void *memset16(uint16_t *s, uint16_t v, size_t count)
+{
+	return __memset16(s, v, count * sizeof(v));
+}
+
+static inline void *memset32(uint32_t *s, uint32_t v, size_t count)
+{
+	return __memset32(s, v, count * sizeof(v));
+}
+
+static inline void *memset64(uint64_t *s, uint64_t v, size_t count)
+{
+	return __memset64(s, v, count * sizeof(v));
+}
+
+#if !defined(IN_ARCH_STRING_C) && (!defined(CONFIG_FORTIFY_SOURCE) || defined(__NO_FORTIFY))
+
+#ifdef __HAVE_ARCH_MEMCHR
+static inline void *memchr(const void * s, int c, size_t n)
+{
+	const void *ret = s + n;
+
+	asm volatile(
+		"	lgr	0,%[c]\n"
+		"0:	srst	%[ret],%[s]\n"
+		"	jo	0b\n"
+		"	jl	1f\n"
+		"	la	%[ret],0\n"
+		"1:"
+		: [ret] "+&a" (ret), [s] "+&a" (s)
+		: [c] "d" (c)
+		: "cc", "memory", "0");
+	return (void *) ret;
+}
+#endif
+
+#ifdef __HAVE_ARCH_MEMSCAN
+static inline void *memscan(void *s, int c, size_t n)
+{
+	const void *ret = s + n;
+
+	asm volatile(
+		"	lgr	0,%[c]\n"
+		"0:	srst	%[ret],%[s]\n"
+		"	jo	0b\n"
+		: [ret] "+&a" (ret), [s] "+&a" (s)
+		: [c] "d" (c)
+		: "cc", "memory", "0");
+	return (void *) ret;
+}
+#endif
+
+#ifdef __HAVE_ARCH_STRCAT
+static inline char *strcat(char *dst, const char *src)
+{
+	unsigned long dummy = 0;
+	char *ret = dst;
+
+	asm volatile(
+		"	lghi	0,0\n"
+		"0:	srst	%[dummy],%[dst]\n"
+		"	jo	0b\n"
+		"1:	mvst	%[dummy],%[src]\n"
+		"	jo	1b"
+		: [dummy] "+&a" (dummy), [dst] "+&a" (dst), [src] "+&a" (src)
+		:
+		: "cc", "memory", "0");
+	return ret;
+}
+#endif
+
+#ifdef __HAVE_ARCH_STRCPY
+static inline char *strcpy(char *dst, const char *src)
+{
+	char *ret = dst;
+
+	asm volatile(
+		"	lghi	0,0\n"
+		"0:	mvst	%[dst],%[src]\n"
+		"	jo	0b"
+		: [dst] "+&a" (dst), [src] "+&a" (src)
+		:
+		: "cc", "memory", "0");
+	return ret;
+}
+#endif
+
+#if defined(__HAVE_ARCH_STRLEN) || (defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__))
+static inline size_t __no_sanitize_prefix_strfunc(strlen)(const char *s)
+{
+	unsigned long end = 0;
+	const char *tmp = s;
+
+	asm volatile(
+		"	lghi	0,0\n"
+		"0:	srst	%[end],%[tmp]\n"
+		"	jo	0b"
+		: [end] "+&a" (end), [tmp] "+&a" (tmp)
+		:
+		: "cc", "memory", "0");
+	return end - (unsigned long)s;
+}
+#endif
+
+#ifdef __HAVE_ARCH_STRNLEN
+static inline size_t strnlen(const char * s, size_t n)
+{
+	const char *tmp = s;
+	const char *end = s + n;
+
+	asm volatile(
+		"	lghi	0,0\n"
+		"0:	srst	%[end],%[tmp]\n"
+		"	jo	0b"
+		: [end] "+&a" (end), [tmp] "+&a" (tmp)
+		:
+		: "cc", "memory", "0");
+	return end - s;
+}
+#endif
+#else /* IN_ARCH_STRING_C */
+void *memchr(const void * s, int c, size_t n);
+void *memscan(void *s, int c, size_t n);
+char *strcat(char *dst, const char *src);
+char *strcpy(char *dst, const char *src);
+size_t strlen(const char *s);
+size_t strnlen(const char * s, size_t n);
+#endif /* !IN_ARCH_STRING_C */
+
+#endif /* __S390_STRING_H_ */
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
new file mode 100644
index 0000000000..c61b2cc1a8
--- /dev/null
+++ b/arch/s390/include/asm/switch_to.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 1999, 2009
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef __ASM_SWITCH_TO_H
+#define __ASM_SWITCH_TO_H
+
+#include <linux/thread_info.h>
+#include <asm/fpu/api.h>
+#include <asm/ptrace.h>
+#include <asm/guarded_storage.h>
+
+extern struct task_struct *__switch_to(void *, void *);
+extern void update_cr_regs(struct task_struct *task);
+
+static inline void save_access_regs(unsigned int *acrs)
+{
+	typedef struct { int _[NUM_ACRS]; } acrstype;
+
+	asm volatile("stam 0,15,%0" : "=Q" (*(acrstype *)acrs));
+}
+
+static inline void restore_access_regs(unsigned int *acrs)
+{
+	typedef struct { int _[NUM_ACRS]; } acrstype;
+
+	asm volatile("lam 0,15,%0" : : "Q" (*(acrstype *)acrs));
+}
+
+#define switch_to(prev, next, last) do {				\
+	/* save_fpu_regs() sets the CIF_FPU flag, which enforces	\
+	 * a restore of the floating point / vector registers as	\
+	 * soon as the next task returns to user space			\
+	 */								\
+	save_fpu_regs();						\
+	save_access_regs(&prev->thread.acrs[0]);			\
+	save_ri_cb(prev->thread.ri_cb);					\
+	save_gs_cb(prev->thread.gs_cb);					\
+	update_cr_regs(next);						\
+	restore_access_regs(&next->thread.acrs[0]);			\
+	restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb);		\
+	restore_gs_cb(next->thread.gs_cb);				\
+	prev = __switch_to(prev, next);					\
+} while (0)
+
+#endif /* __ASM_SWITCH_TO_H */
diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h
new file mode 100644
index 0000000000..27e3d804b3
--- /dev/null
+++ b/arch/s390/include/asm/syscall.h
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Access to user system call parameters and results
+ *
+ *  Copyright IBM Corp. 2008
+ *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+
+#ifndef _ASM_SYSCALL_H
+#define _ASM_SYSCALL_H	1
+
+#include <uapi/linux/audit.h>
+#include <linux/sched.h>
+#include <linux/err.h>
+#include <asm/ptrace.h>
+
+extern const sys_call_ptr_t sys_call_table[];
+extern const sys_call_ptr_t sys_call_table_emu[];
+
+static inline long syscall_get_nr(struct task_struct *task,
+				  struct pt_regs *regs)
+{
+	return test_pt_regs_flag(regs, PIF_SYSCALL) ?
+		(regs->int_code & 0xffff) : -1;
+}
+
+static inline void syscall_rollback(struct task_struct *task,
+				    struct pt_regs *regs)
+{
+	regs->gprs[2] = regs->orig_gpr2;
+}
+
+static inline long syscall_get_error(struct task_struct *task,
+				     struct pt_regs *regs)
+{
+	unsigned long error = regs->gprs[2];
+#ifdef CONFIG_COMPAT
+	if (test_tsk_thread_flag(task, TIF_31BIT)) {
+		/*
+		 * Sign-extend the value so (int)-EFOO becomes (long)-EFOO
+		 * and will match correctly in comparisons.
+		 */
+		error = (long)(int)error;
+	}
+#endif
+	return IS_ERR_VALUE(error) ? error : 0;
+}
+
+static inline long syscall_get_return_value(struct task_struct *task,
+					    struct pt_regs *regs)
+{
+	return regs->gprs[2];
+}
+
+static inline void syscall_set_return_value(struct task_struct *task,
+					    struct pt_regs *regs,
+					    int error, long val)
+{
+	set_pt_regs_flag(regs, PIF_SYSCALL_RET_SET);
+	regs->gprs[2] = error ? error : val;
+}
+
+static inline void syscall_get_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned long *args)
+{
+	unsigned long mask = -1UL;
+	unsigned int n = 6;
+
+#ifdef CONFIG_COMPAT
+	if (test_tsk_thread_flag(task, TIF_31BIT))
+		mask = 0xffffffff;
+#endif
+	while (n-- > 0)
+		if (n > 0)
+			args[n] = regs->gprs[2 + n] & mask;
+
+	args[0] = regs->orig_gpr2 & mask;
+}
+
+static inline int syscall_get_arch(struct task_struct *task)
+{
+#ifdef CONFIG_COMPAT
+	if (test_tsk_thread_flag(task, TIF_31BIT))
+		return AUDIT_ARCH_S390;
+#endif
+	return AUDIT_ARCH_S390X;
+}
+
+static inline bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs)
+{
+	return false;
+}
+
+#define SYSCALL_FMT_0
+#define SYSCALL_FMT_1 , "0" (r2)
+#define SYSCALL_FMT_2 , "d" (r3) SYSCALL_FMT_1
+#define SYSCALL_FMT_3 , "d" (r4) SYSCALL_FMT_2
+#define SYSCALL_FMT_4 , "d" (r5) SYSCALL_FMT_3
+#define SYSCALL_FMT_5 , "d" (r6) SYSCALL_FMT_4
+#define SYSCALL_FMT_6 , "d" (r7) SYSCALL_FMT_5
+
+#define SYSCALL_PARM_0
+#define SYSCALL_PARM_1 , long arg1
+#define SYSCALL_PARM_2 SYSCALL_PARM_1, long arg2
+#define SYSCALL_PARM_3 SYSCALL_PARM_2, long arg3
+#define SYSCALL_PARM_4 SYSCALL_PARM_3, long arg4
+#define SYSCALL_PARM_5 SYSCALL_PARM_4, long arg5
+#define SYSCALL_PARM_6 SYSCALL_PARM_5, long arg6
+
+#define SYSCALL_REGS_0
+#define SYSCALL_REGS_1							\
+	register long r2 asm("2") = arg1
+#define SYSCALL_REGS_2							\
+	SYSCALL_REGS_1;							\
+	register long r3 asm("3") = arg2
+#define SYSCALL_REGS_3							\
+	SYSCALL_REGS_2;							\
+	register long r4 asm("4") = arg3
+#define SYSCALL_REGS_4							\
+	SYSCALL_REGS_3;							\
+	register long r5 asm("5") = arg4
+#define SYSCALL_REGS_5							\
+	SYSCALL_REGS_4;							\
+	register long r6 asm("6") = arg5
+#define SYSCALL_REGS_6							\
+	SYSCALL_REGS_5;							\
+	register long r7 asm("7") = arg6
+
+#define GENERATE_SYSCALL_FUNC(nr)					\
+static __always_inline							\
+long syscall##nr(unsigned long syscall SYSCALL_PARM_##nr)		\
+{									\
+	register unsigned long r1 asm ("1") = syscall;			\
+	register long rc asm ("2");					\
+	SYSCALL_REGS_##nr;						\
+									\
+	asm volatile (							\
+		"	svc	0\n"					\
+		: "=d" (rc)						\
+		: "d" (r1) SYSCALL_FMT_##nr				\
+		: "memory");						\
+	return rc;							\
+}
+
+GENERATE_SYSCALL_FUNC(0)
+GENERATE_SYSCALL_FUNC(1)
+GENERATE_SYSCALL_FUNC(2)
+GENERATE_SYSCALL_FUNC(3)
+GENERATE_SYSCALL_FUNC(4)
+GENERATE_SYSCALL_FUNC(5)
+GENERATE_SYSCALL_FUNC(6)
+
+#endif	/* _ASM_SYSCALL_H */
diff --git a/arch/s390/include/asm/syscall_wrapper.h b/arch/s390/include/asm/syscall_wrapper.h
new file mode 100644
index 0000000000..35c1d1b860
--- /dev/null
+++ b/arch/s390/include/asm/syscall_wrapper.h
@@ -0,0 +1,140 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * syscall_wrapper.h - s390 specific wrappers to syscall definitions
+ *
+ */
+
+#ifndef _ASM_S390_SYSCALL_WRAPPER_H
+#define _ASM_S390_SYSCALL_WRAPPER_H
+
+/* Mapping of registers to parameters for syscalls */
+#define SC_S390_REGS_TO_ARGS(x, ...)					\
+	__MAP(x, __SC_ARGS						\
+	      ,, regs->orig_gpr2,, regs->gprs[3],, regs->gprs[4]	\
+	      ,, regs->gprs[5],, regs->gprs[6],, regs->gprs[7])
+
+#ifdef CONFIG_COMPAT
+
+#define __SC_COMPAT_CAST(t, a)						\
+({									\
+	long __ReS = a;							\
+									\
+	BUILD_BUG_ON((sizeof(t) > 4) && !__TYPE_IS_L(t) &&		\
+		     !__TYPE_IS_UL(t) && !__TYPE_IS_PTR(t) &&		\
+		     !__TYPE_IS_LL(t));					\
+	if (__TYPE_IS_L(t))						\
+		__ReS = (s32)a;						\
+	if (__TYPE_IS_UL(t))						\
+		__ReS = (u32)a;						\
+	if (__TYPE_IS_PTR(t))						\
+		__ReS = a & 0x7fffffff;					\
+	if (__TYPE_IS_LL(t))						\
+		return -ENOSYS;						\
+	(t)__ReS;							\
+})
+
+/*
+ * To keep the naming coherent, re-define SYSCALL_DEFINE0 to create an alias
+ * named __s390x_sys_*()
+ */
+#define COMPAT_SYSCALL_DEFINE0(sname)					\
+	long __s390_compat_sys_##sname(void);				\
+	ALLOW_ERROR_INJECTION(__s390_compat_sys_##sname, ERRNO);	\
+	long __s390_compat_sys_##sname(void)
+
+#define SYSCALL_DEFINE0(sname)						\
+	SYSCALL_METADATA(_##sname, 0);					\
+	long __s390_sys_##sname(void);					\
+	ALLOW_ERROR_INJECTION(__s390_sys_##sname, ERRNO);		\
+	long __s390x_sys_##sname(void);					\
+	ALLOW_ERROR_INJECTION(__s390x_sys_##sname, ERRNO);		\
+	static inline long __do_sys_##sname(void);			\
+	long __s390_sys_##sname(void)					\
+	{								\
+		return __do_sys_##sname();				\
+	}								\
+	long __s390x_sys_##sname(void)					\
+	{								\
+		return __do_sys_##sname();				\
+	}								\
+	static inline long __do_sys_##sname(void)
+
+#define COND_SYSCALL(name)						\
+	cond_syscall(__s390x_sys_##name);				\
+	cond_syscall(__s390_sys_##name)
+
+#define COMPAT_SYSCALL_DEFINEx(x, name, ...)						\
+	long __s390_compat_sys##name(struct pt_regs *regs);				\
+	ALLOW_ERROR_INJECTION(__s390_compat_sys##name, ERRNO);				\
+	static inline long __se_compat_sys##name(__MAP(x, __SC_LONG, __VA_ARGS__));	\
+	static inline long __do_compat_sys##name(__MAP(x, __SC_DECL, __VA_ARGS__));	\
+	long __s390_compat_sys##name(struct pt_regs *regs)				\
+	{										\
+		return __se_compat_sys##name(SC_S390_REGS_TO_ARGS(x, __VA_ARGS__));	\
+	}										\
+	static inline long __se_compat_sys##name(__MAP(x, __SC_LONG, __VA_ARGS__))	\
+	{										\
+		__MAP(x, __SC_TEST, __VA_ARGS__);					\
+		return __do_compat_sys##name(__MAP(x, __SC_DELOUSE, __VA_ARGS__));	\
+	}										\
+	static inline long __do_compat_sys##name(__MAP(x, __SC_DECL, __VA_ARGS__))
+
+/*
+ * As some compat syscalls may not be implemented, we need to expand
+ * COND_SYSCALL_COMPAT in kernel/sys_ni.c to cover this case as well.
+ */
+#define COND_SYSCALL_COMPAT(name)					\
+	cond_syscall(__s390_compat_sys_##name)
+
+#define __S390_SYS_STUBx(x, name, ...)						\
+	long __s390_sys##name(struct pt_regs *regs);				\
+	ALLOW_ERROR_INJECTION(__s390_sys##name, ERRNO);				\
+	static inline long ___se_sys##name(__MAP(x, __SC_LONG, __VA_ARGS__));	\
+	long __s390_sys##name(struct pt_regs *regs)				\
+	{									\
+		return ___se_sys##name(SC_S390_REGS_TO_ARGS(x, __VA_ARGS__));	\
+	}									\
+	static inline long ___se_sys##name(__MAP(x, __SC_LONG, __VA_ARGS__))	\
+	{									\
+		__MAP(x, __SC_TEST, __VA_ARGS__);				\
+		return __do_sys##name(__MAP(x, __SC_COMPAT_CAST, __VA_ARGS__));	\
+	}
+
+#else /* CONFIG_COMPAT */
+
+#define SYSCALL_DEFINE0(sname)						\
+	SYSCALL_METADATA(_##sname, 0);					\
+	long __s390x_sys_##sname(void);					\
+	ALLOW_ERROR_INJECTION(__s390x_sys_##sname, ERRNO);		\
+	static inline long __do_sys_##sname(void);			\
+	long __s390x_sys_##sname(void)					\
+	{								\
+		return __do_sys_##sname();				\
+	}								\
+	static inline long __do_sys_##sname(void)
+
+#define COND_SYSCALL(name)						\
+	cond_syscall(__s390x_sys_##name)
+
+#define __S390_SYS_STUBx(x, fullname, name, ...)
+
+#endif /* CONFIG_COMPAT */
+
+#define __SYSCALL_DEFINEx(x, name, ...)						\
+	long __s390x_sys##name(struct pt_regs *regs);				\
+	ALLOW_ERROR_INJECTION(__s390x_sys##name, ERRNO);			\
+	static inline long __se_sys##name(__MAP(x, __SC_LONG, __VA_ARGS__));	\
+	static inline long __do_sys##name(__MAP(x, __SC_DECL, __VA_ARGS__));	\
+	__S390_SYS_STUBx(x, name, __VA_ARGS__);					\
+	long __s390x_sys##name(struct pt_regs *regs)				\
+	{									\
+		return __se_sys##name(SC_S390_REGS_TO_ARGS(x, __VA_ARGS__));	\
+	}									\
+	static inline long __se_sys##name(__MAP(x, __SC_LONG, __VA_ARGS__))	\
+	{									\
+		__MAP(x, __SC_TEST, __VA_ARGS__);				\
+		return __do_sys##name(__MAP(x, __SC_CAST, __VA_ARGS__));	\
+	}									\
+	static inline long __do_sys##name(__MAP(x, __SC_DECL, __VA_ARGS__))
+
+#endif /* _ASM_S390_SYSCALL_WRAPPER_H */
diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h
new file mode 100644
index 0000000000..ab1c631605
--- /dev/null
+++ b/arch/s390/include/asm/sysinfo.h
@@ -0,0 +1,201 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * definition for store system information stsi
+ *
+ * Copyright IBM Corp. 2001, 2008
+ *
+ *    Author(s): Ulrich Weigand <weigand@de.ibm.com>
+ *		 Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#ifndef __ASM_S390_SYSINFO_H
+#define __ASM_S390_SYSINFO_H
+
+#include <asm/bitsperlong.h>
+#include <linux/uuid.h>
+
+struct sysinfo_1_1_1 {
+	unsigned char p:1;
+	unsigned char :6;
+	unsigned char t:1;
+	unsigned char :8;
+	unsigned char ccr;
+	unsigned char cai;
+	char reserved_0[20];
+	unsigned long lic;
+	char manufacturer[16];
+	char type[4];
+	char reserved_1[12];
+	char model_capacity[16];
+	char sequence[16];
+	char plant[4];
+	char model[16];
+	char model_perm_cap[16];
+	char model_temp_cap[16];
+	unsigned int model_cap_rating;
+	unsigned int model_perm_cap_rating;
+	unsigned int model_temp_cap_rating;
+	unsigned char typepct[5];
+	unsigned char reserved_2[3];
+	unsigned int ncr;
+	unsigned int npr;
+	unsigned int ntr;
+};
+
+struct sysinfo_1_2_1 {
+	char reserved_0[80];
+	char sequence[16];
+	char plant[4];
+	char reserved_1[2];
+	unsigned short cpu_address;
+};
+
+struct sysinfo_1_2_2 {
+	char format;
+	char reserved_0[1];
+	unsigned short acc_offset;
+	unsigned char mt_installed :1;
+	unsigned char :2;
+	unsigned char mt_stid :5;
+	unsigned char :3;
+	unsigned char mt_gtid :5;
+	char reserved_1[18];
+	unsigned int nominal_cap;
+	unsigned int secondary_cap;
+	unsigned int capability;
+	unsigned short cpus_total;
+	unsigned short cpus_configured;
+	unsigned short cpus_standby;
+	unsigned short cpus_reserved;
+	unsigned short adjustment[];
+};
+
+struct sysinfo_1_2_2_extension {
+	unsigned int alt_capability;
+	unsigned short alt_adjustment[];
+};
+
+struct sysinfo_2_2_1 {
+	char reserved_0[80];
+	char sequence[16];
+	char plant[4];
+	unsigned short cpu_id;
+	unsigned short cpu_address;
+};
+
+struct sysinfo_2_2_2 {
+	char reserved_0[32];
+	unsigned short lpar_number;
+	char reserved_1;
+	unsigned char characteristics;
+	unsigned short cpus_total;
+	unsigned short cpus_configured;
+	unsigned short cpus_standby;
+	unsigned short cpus_reserved;
+	char name[8];
+	unsigned int caf;
+	char reserved_2[8];
+	unsigned char mt_installed :1;
+	unsigned char :2;
+	unsigned char mt_stid :5;
+	unsigned char :3;
+	unsigned char mt_gtid :5;
+	unsigned char :3;
+	unsigned char mt_psmtid :5;
+	char reserved_3[5];
+	unsigned short cpus_dedicated;
+	unsigned short cpus_shared;
+	char reserved_4[3];
+	unsigned char vsne;
+	uuid_t uuid;
+	char reserved_5[160];
+	char ext_name[256];
+};
+
+#define LPAR_CHAR_DEDICATED	(1 << 7)
+#define LPAR_CHAR_SHARED	(1 << 6)
+#define LPAR_CHAR_LIMITED	(1 << 5)
+
+struct sysinfo_3_2_2 {
+	char reserved_0[31];
+	unsigned char :4;
+	unsigned char count:4;
+	struct {
+		char reserved_0[4];
+		unsigned short cpus_total;
+		unsigned short cpus_configured;
+		unsigned short cpus_standby;
+		unsigned short cpus_reserved;
+		char name[8];
+		unsigned int caf;
+		char cpi[16];
+		char reserved_1[3];
+		unsigned char evmne;
+		unsigned int reserved_2;
+		uuid_t uuid;
+	} vm[8];
+	char reserved_3[1504];
+	char ext_names[8][256];
+};
+
+extern int topology_max_mnest;
+
+/*
+ * Returns the maximum nesting level supported by the cpu topology code.
+ * The current maximum level is 4 which is the drawer level.
+ */
+static inline unsigned char topology_mnest_limit(void)
+{
+	return min(topology_max_mnest, 4);
+}
+
+#define TOPOLOGY_NR_MAG		6
+
+struct topology_core {
+	unsigned char nl;
+	unsigned char reserved0[3];
+	unsigned char :5;
+	unsigned char d:1;
+	unsigned char pp:2;
+	unsigned char reserved1;
+	unsigned short origin;
+	unsigned long mask;
+};
+
+struct topology_container {
+	unsigned char nl;
+	unsigned char reserved[6];
+	unsigned char id;
+};
+
+union topology_entry {
+	unsigned char nl;
+	struct topology_core cpu;
+	struct topology_container container;
+};
+
+struct sysinfo_15_1_x {
+	unsigned char reserved0[2];
+	unsigned short length;
+	unsigned char mag[TOPOLOGY_NR_MAG];
+	unsigned char reserved1;
+	unsigned char mnest;
+	unsigned char reserved2[4];
+	union topology_entry tle[];
+};
+
+int stsi(void *sysinfo, int fc, int sel1, int sel2);
+
+/*
+ * Service level reporting interface.
+ */
+struct service_level {
+	struct list_head list;
+	void (*seq_print)(struct seq_file *, struct service_level *);
+};
+
+int register_service_level(struct service_level *);
+int unregister_service_level(struct service_level *);
+
+int sthyi_fill(void *dst, u64 *rc);
+#endif /* __ASM_S390_SYSINFO_H */
diff --git a/arch/s390/include/asm/text-patching.h b/arch/s390/include/asm/text-patching.h
new file mode 100644
index 0000000000..b219056a88
--- /dev/null
+++ b/arch/s390/include/asm/text-patching.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_S390_TEXT_PATCHING_H
+#define _ASM_S390_TEXT_PATCHING_H
+
+#include <asm/barrier.h>
+
+static __always_inline void sync_core(void)
+{
+	bcr_serialize();
+}
+
+void text_poke_sync(void);
+void text_poke_sync_lock(void);
+
+#endif /* _ASM_S390_TEXT_PATCHING_H */
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
new file mode 100644
index 0000000000..a674c7d25d
--- /dev/null
+++ b/arch/s390/include/asm/thread_info.h
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 2002, 2006
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+
+#ifndef _ASM_THREAD_INFO_H
+#define _ASM_THREAD_INFO_H
+
+#include <linux/bits.h>
+#ifndef ASM_OFFSETS_C
+#include <asm/asm-offsets.h>
+#endif
+
+/*
+ * General size of kernel stacks
+ */
+#ifdef CONFIG_KASAN
+#define THREAD_SIZE_ORDER 4
+#else
+#define THREAD_SIZE_ORDER 2
+#endif
+#define BOOT_STACK_SIZE (PAGE_SIZE << 2)
+#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
+
+#define STACK_INIT_OFFSET (THREAD_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE)
+
+#ifndef __ASSEMBLY__
+#include <asm/lowcore.h>
+#include <asm/page.h>
+
+/*
+ * low level task data that entry.S needs immediate access to
+ * - this struct should fit entirely inside of one cache line
+ * - this struct shares the supervisor stack pages
+ * - if the contents of this structure are changed, the assembly constants must also be changed
+ */
+struct thread_info {
+	unsigned long		flags;		/* low level flags */
+	unsigned long		syscall_work;	/* SYSCALL_WORK_ flags */
+	unsigned int		cpu;		/* current CPU */
+};
+
+/*
+ * macros/functions for gaining access to the thread information structure
+ */
+#define INIT_THREAD_INFO(tsk)			\
+{						\
+	.flags		= 0,			\
+}
+
+struct task_struct;
+
+void arch_setup_new_exec(void);
+#define arch_setup_new_exec arch_setup_new_exec
+
+#endif
+
+/*
+ * thread information flags bit numbers
+ */
+/* _TIF_WORK bits */
+#define TIF_NOTIFY_RESUME	0	/* callback before returning to user */
+#define TIF_SIGPENDING		1	/* signal pending */
+#define TIF_NEED_RESCHED	2	/* rescheduling necessary */
+#define TIF_UPROBE		3	/* breakpointed or single-stepping */
+#define TIF_GUARDED_STORAGE	4	/* load guarded storage control block */
+#define TIF_PATCH_PENDING	5	/* pending live patching update */
+#define TIF_PGSTE		6	/* New mm's will use 4K page tables */
+#define TIF_NOTIFY_SIGNAL	7	/* signal notifications exist */
+#define TIF_ISOLATE_BP_GUEST	9	/* Run KVM guests with isolated BP */
+#define TIF_PER_TRAP		10	/* Need to handle PER trap on exit to usermode */
+
+#define TIF_31BIT		16	/* 32bit process */
+#define TIF_MEMDIE		17	/* is terminating due to OOM killer */
+#define TIF_RESTORE_SIGMASK	18	/* restore signal mask in do_signal() */
+#define TIF_SINGLE_STEP		19	/* This task is single stepped */
+#define TIF_BLOCK_STEP		20	/* This task is block stepped */
+#define TIF_UPROBE_SINGLESTEP	21	/* This task is uprobe single stepped */
+
+/* _TIF_TRACE bits */
+#define TIF_SYSCALL_TRACE	24	/* syscall trace active */
+#define TIF_SYSCALL_AUDIT	25	/* syscall auditing active */
+#define TIF_SECCOMP		26	/* secure computing */
+#define TIF_SYSCALL_TRACEPOINT	27	/* syscall tracepoint instrumentation */
+
+#define _TIF_NOTIFY_RESUME	BIT(TIF_NOTIFY_RESUME)
+#define _TIF_NOTIFY_SIGNAL	BIT(TIF_NOTIFY_SIGNAL)
+#define _TIF_SIGPENDING		BIT(TIF_SIGPENDING)
+#define _TIF_NEED_RESCHED	BIT(TIF_NEED_RESCHED)
+#define _TIF_UPROBE		BIT(TIF_UPROBE)
+#define _TIF_GUARDED_STORAGE	BIT(TIF_GUARDED_STORAGE)
+#define _TIF_PATCH_PENDING	BIT(TIF_PATCH_PENDING)
+#define _TIF_ISOLATE_BP_GUEST	BIT(TIF_ISOLATE_BP_GUEST)
+#define _TIF_PER_TRAP		BIT(TIF_PER_TRAP)
+
+#define _TIF_31BIT		BIT(TIF_31BIT)
+#define _TIF_SINGLE_STEP	BIT(TIF_SINGLE_STEP)
+
+#define _TIF_SYSCALL_TRACE	BIT(TIF_SYSCALL_TRACE)
+#define _TIF_SYSCALL_AUDIT	BIT(TIF_SYSCALL_AUDIT)
+#define _TIF_SECCOMP		BIT(TIF_SECCOMP)
+#define _TIF_SYSCALL_TRACEPOINT	BIT(TIF_SYSCALL_TRACEPOINT)
+
+#endif /* _ASM_THREAD_INFO_H */
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
new file mode 100644
index 0000000000..4d646659a5
--- /dev/null
+++ b/arch/s390/include/asm/timex.h
@@ -0,0 +1,281 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999
+ *
+ *  Derived from "include/asm-i386/timex.h"
+ *    Copyright (C) 1992, Linus Torvalds
+ */
+
+#ifndef _ASM_S390_TIMEX_H
+#define _ASM_S390_TIMEX_H
+
+#include <linux/preempt.h>
+#include <linux/time64.h>
+#include <asm/lowcore.h>
+
+/* The value of the TOD clock for 1.1.1970. */
+#define TOD_UNIX_EPOCH 0x7d91048bca000000ULL
+
+extern u64 clock_comparator_max;
+
+union tod_clock {
+	__uint128_t val;
+	struct {
+		__uint128_t ei	:  8; /* epoch index */
+		__uint128_t tod : 64; /* bits 0-63 of tod clock */
+		__uint128_t	: 40;
+		__uint128_t pf	: 16; /* programmable field */
+	};
+	struct {
+		__uint128_t eitod : 72; /* epoch index + bits 0-63 tod clock */
+		__uint128_t	  : 56;
+	};
+	struct {
+		__uint128_t us	: 60; /* micro-seconds */
+		__uint128_t sus	: 12; /* sub-microseconds */
+		__uint128_t	: 56;
+	};
+} __packed;
+
+/* Inline functions for clock register access. */
+static inline int set_tod_clock(__u64 time)
+{
+	int cc;
+
+	asm volatile(
+		"   sck   %1\n"
+		"   ipm   %0\n"
+		"   srl   %0,28\n"
+		: "=d" (cc) : "Q" (time) : "cc");
+	return cc;
+}
+
+static inline int store_tod_clock_ext_cc(union tod_clock *clk)
+{
+	int cc;
+
+	asm volatile(
+		"   stcke  %1\n"
+		"   ipm   %0\n"
+		"   srl   %0,28\n"
+		: "=d" (cc), "=Q" (*clk) : : "cc");
+	return cc;
+}
+
+static __always_inline void store_tod_clock_ext(union tod_clock *tod)
+{
+	asm volatile("stcke %0" : "=Q" (*tod) : : "cc");
+}
+
+static inline void set_clock_comparator(__u64 time)
+{
+	asm volatile("sckc %0" : : "Q" (time));
+}
+
+static inline void set_tod_programmable_field(u16 val)
+{
+	asm volatile(
+		"	lgr	0,%[val]\n"
+		"	sckpf\n"
+		:
+		: [val] "d" ((unsigned long)val)
+		: "0");
+}
+
+void clock_comparator_work(void);
+
+void __init time_early_init(void);
+
+extern unsigned char ptff_function_mask[16];
+
+/* Function codes for the ptff instruction. */
+#define PTFF_QAF	0x00	/* query available functions */
+#define PTFF_QTO	0x01	/* query tod offset */
+#define PTFF_QSI	0x02	/* query steering information */
+#define PTFF_QUI	0x04	/* query UTC information */
+#define PTFF_ATO	0x40	/* adjust tod offset */
+#define PTFF_STO	0x41	/* set tod offset */
+#define PTFF_SFS	0x42	/* set fine steering rate */
+#define PTFF_SGS	0x43	/* set gross steering rate */
+
+/* Query TOD offset result */
+struct ptff_qto {
+	unsigned long physical_clock;
+	unsigned long tod_offset;
+	unsigned long logical_tod_offset;
+	unsigned long tod_epoch_difference;
+} __packed;
+
+static inline int ptff_query(unsigned int nr)
+{
+	unsigned char *ptr;
+
+	ptr = ptff_function_mask + (nr >> 3);
+	return (*ptr & (0x80 >> (nr & 7))) != 0;
+}
+
+/* Query UTC information result */
+struct ptff_qui {
+	unsigned int tm : 2;
+	unsigned int ts : 2;
+	unsigned int : 28;
+	unsigned int pad_0x04;
+	unsigned long leap_event;
+	short old_leap;
+	short new_leap;
+	unsigned int pad_0x14;
+	unsigned long prt[5];
+	unsigned long cst[3];
+	unsigned int skew;
+	unsigned int pad_0x5c[41];
+} __packed;
+
+/*
+ * ptff - Perform timing facility function
+ * @ptff_block: Pointer to ptff parameter block
+ * @len: Length of parameter block
+ * @func: Function code
+ * Returns: Condition code (0 on success)
+ */
+#define ptff(ptff_block, len, func)					\
+({									\
+	struct addrtype { char _[len]; };				\
+	unsigned int reg0 = func;					\
+	unsigned long reg1 = (unsigned long)(ptff_block);		\
+	int rc;								\
+									\
+	asm volatile(							\
+		"	lgr	0,%[reg0]\n"				\
+		"	lgr	1,%[reg1]\n"				\
+		"	ptff\n"						\
+		"	ipm	%[rc]\n"				\
+		"	srl	%[rc],28\n"				\
+		: [rc] "=&d" (rc), "+m" (*(struct addrtype *)reg1)	\
+		: [reg0] "d" (reg0), [reg1] "d" (reg1)			\
+		: "cc", "0", "1");					\
+	rc;								\
+})
+
+static inline unsigned long local_tick_disable(void)
+{
+	unsigned long old;
+
+	old = S390_lowcore.clock_comparator;
+	S390_lowcore.clock_comparator = clock_comparator_max;
+	set_clock_comparator(S390_lowcore.clock_comparator);
+	return old;
+}
+
+static inline void local_tick_enable(unsigned long comp)
+{
+	S390_lowcore.clock_comparator = comp;
+	set_clock_comparator(S390_lowcore.clock_comparator);
+}
+
+#define CLOCK_TICK_RATE		1193180 /* Underlying HZ */
+
+typedef unsigned long cycles_t;
+
+static __always_inline unsigned long get_tod_clock(void)
+{
+	union tod_clock clk;
+
+	store_tod_clock_ext(&clk);
+	return clk.tod;
+}
+
+static inline unsigned long get_tod_clock_fast(void)
+{
+	unsigned long clk;
+
+	asm volatile("stckf %0" : "=Q" (clk) : : "cc");
+	return clk;
+}
+
+static inline cycles_t get_cycles(void)
+{
+	return (cycles_t) get_tod_clock() >> 2;
+}
+#define get_cycles get_cycles
+
+int get_phys_clock(unsigned long *clock);
+void init_cpu_timer(void);
+
+extern union tod_clock tod_clock_base;
+
+static __always_inline unsigned long __get_tod_clock_monotonic(void)
+{
+	return get_tod_clock() - tod_clock_base.tod;
+}
+
+/**
+ * get_clock_monotonic - returns current time in clock rate units
+ *
+ * The clock and tod_clock_base get changed via stop_machine.
+ * Therefore preemption must be disabled, otherwise the returned
+ * value is not guaranteed to be monotonic.
+ */
+static inline unsigned long get_tod_clock_monotonic(void)
+{
+	unsigned long tod;
+
+	preempt_disable_notrace();
+	tod = __get_tod_clock_monotonic();
+	preempt_enable_notrace();
+	return tod;
+}
+
+/**
+ * tod_to_ns - convert a TOD format value to nanoseconds
+ * @todval: to be converted TOD format value
+ * Returns: number of nanoseconds that correspond to the TOD format value
+ *
+ * Converting a 64 Bit TOD format value to nanoseconds means that the value
+ * must be divided by 4.096. In order to achieve that we multiply with 125
+ * and divide by 512:
+ *
+ *    ns = (todval * 125) >> 9;
+ *
+ * In order to avoid an overflow with the multiplication we can rewrite this.
+ * With a split todval == 2^9 * th + tl (th upper 55 bits, tl lower 9 bits)
+ * we end up with
+ *
+ *    ns = ((2^9 * th + tl) * 125 ) >> 9;
+ * -> ns = (th * 125) + ((tl * 125) >> 9);
+ *
+ */
+static __always_inline unsigned long tod_to_ns(unsigned long todval)
+{
+	return ((todval >> 9) * 125) + (((todval & 0x1ff) * 125) >> 9);
+}
+
+/**
+ * tod_after - compare two 64 bit TOD values
+ * @a: first 64 bit TOD timestamp
+ * @b: second 64 bit TOD timestamp
+ *
+ * Returns: true if a is later than b
+ */
+static inline int tod_after(unsigned long a, unsigned long b)
+{
+	if (MACHINE_HAS_SCC)
+		return (long) a > (long) b;
+	return a > b;
+}
+
+/**
+ * tod_after_eq - compare two 64 bit TOD values
+ * @a: first 64 bit TOD timestamp
+ * @b: second 64 bit TOD timestamp
+ *
+ * Returns: true if a is later than b
+ */
+static inline int tod_after_eq(unsigned long a, unsigned long b)
+{
+	if (MACHINE_HAS_SCC)
+		return (long) a >= (long) b;
+	return a >= b;
+}
+
+#endif
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
new file mode 100644
index 0000000000..383b1f9144
--- /dev/null
+++ b/arch/s390/include/asm/tlb.h
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _S390_TLB_H
+#define _S390_TLB_H
+
+/*
+ * TLB flushing on s390 is complicated. The following requirement
+ * from the principles of operation is the most arduous:
+ *
+ * "A valid table entry must not be changed while it is attached
+ * to any CPU and may be used for translation by that CPU except to
+ * (1) invalidate the entry by using INVALIDATE PAGE TABLE ENTRY,
+ * or INVALIDATE DAT TABLE ENTRY, (2) alter bits 56-63 of a page
+ * table entry, or (3) make a change by means of a COMPARE AND SWAP
+ * AND PURGE instruction that purges the TLB."
+ *
+ * The modification of a pte of an active mm struct therefore is
+ * a two step process: i) invalidate the pte, ii) store the new pte.
+ * This is true for the page protection bit as well.
+ * The only possible optimization is to flush at the beginning of
+ * a tlb_gather_mmu cycle if the mm_struct is currently not in use.
+ *
+ * Pages used for the page tables is a different story. FIXME: more
+ */
+
+void __tlb_remove_table(void *_table);
+static inline void tlb_flush(struct mmu_gather *tlb);
+static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
+					  struct encoded_page *page,
+					  int page_size);
+
+#define tlb_flush tlb_flush
+#define pte_free_tlb pte_free_tlb
+#define pmd_free_tlb pmd_free_tlb
+#define p4d_free_tlb p4d_free_tlb
+#define pud_free_tlb pud_free_tlb
+
+#include <asm/tlbflush.h>
+#include <asm-generic/tlb.h>
+
+/*
+ * Release the page cache reference for a pte removed by
+ * tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page
+ * has already been freed, so just do free_page_and_swap_cache.
+ *
+ * s390 doesn't delay rmap removal, so there is nothing encoded in
+ * the page pointer.
+ */
+static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
+					  struct encoded_page *page,
+					  int page_size)
+{
+	free_page_and_swap_cache(encoded_page_ptr(page));
+	return false;
+}
+
+static inline void tlb_flush(struct mmu_gather *tlb)
+{
+	__tlb_flush_mm_lazy(tlb->mm);
+}
+
+/*
+ * pte_free_tlb frees a pte table and clears the CRSTE for the
+ * page table from the tlb.
+ */
+static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
+                                unsigned long address)
+{
+	__tlb_adjust_range(tlb, address, PAGE_SIZE);
+	tlb->mm->context.flush_mm = 1;
+	tlb->freed_tables = 1;
+	tlb->cleared_pmds = 1;
+	/*
+	 * page_table_free_rcu takes care of the allocation bit masks
+	 * of the 2K table fragments in the 4K page table page,
+	 * then calls tlb_remove_table.
+	 */
+	page_table_free_rcu(tlb, (unsigned long *) pte, address);
+}
+
+/*
+ * pmd_free_tlb frees a pmd table and clears the CRSTE for the
+ * segment table entry from the tlb.
+ * If the mm uses a two level page table the single pmd is freed
+ * as the pgd. pmd_free_tlb checks the asce_limit against 2GB
+ * to avoid the double free of the pmd in this case.
+ */
+static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
+				unsigned long address)
+{
+	if (mm_pmd_folded(tlb->mm))
+		return;
+	pagetable_pmd_dtor(virt_to_ptdesc(pmd));
+	__tlb_adjust_range(tlb, address, PAGE_SIZE);
+	tlb->mm->context.flush_mm = 1;
+	tlb->freed_tables = 1;
+	tlb->cleared_puds = 1;
+	tlb_remove_ptdesc(tlb, pmd);
+}
+
+/*
+ * p4d_free_tlb frees a pud table and clears the CRSTE for the
+ * region second table entry from the tlb.
+ * If the mm uses a four level page table the single p4d is freed
+ * as the pgd. p4d_free_tlb checks the asce_limit against 8PB
+ * to avoid the double free of the p4d in this case.
+ */
+static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
+				unsigned long address)
+{
+	if (mm_p4d_folded(tlb->mm))
+		return;
+	__tlb_adjust_range(tlb, address, PAGE_SIZE);
+	tlb->mm->context.flush_mm = 1;
+	tlb->freed_tables = 1;
+	tlb_remove_table(tlb, p4d);
+}
+
+/*
+ * pud_free_tlb frees a pud table and clears the CRSTE for the
+ * region third table entry from the tlb.
+ * If the mm uses a three level page table the single pud is freed
+ * as the pgd. pud_free_tlb checks the asce_limit against 4TB
+ * to avoid the double free of the pud in this case.
+ */
+static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
+				unsigned long address)
+{
+	if (mm_pud_folded(tlb->mm))
+		return;
+	tlb->mm->context.flush_mm = 1;
+	tlb->freed_tables = 1;
+	tlb->cleared_p4ds = 1;
+	tlb_remove_table(tlb, pud);
+}
+
+
+#endif /* _S390_TLB_H */
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
new file mode 100644
index 0000000000..a6e2cd89b6
--- /dev/null
+++ b/arch/s390/include/asm/tlbflush.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _S390_TLBFLUSH_H
+#define _S390_TLBFLUSH_H
+
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <asm/processor.h>
+
+/*
+ * Flush all TLB entries on the local CPU.
+ */
+static inline void __tlb_flush_local(void)
+{
+	asm volatile("ptlb" : : : "memory");
+}
+
+/*
+ * Flush TLB entries for a specific ASCE on all CPUs
+ */
+static inline void __tlb_flush_idte(unsigned long asce)
+{
+	unsigned long opt;
+
+	opt = IDTE_PTOA;
+	if (MACHINE_HAS_TLB_GUEST)
+		opt |= IDTE_GUEST_ASCE;
+	/* Global TLB flush for the mm */
+	asm volatile("idte 0,%1,%0" : : "a" (opt), "a" (asce) : "cc");
+}
+
+/*
+ * Flush all TLB entries on all CPUs.
+ */
+static inline void __tlb_flush_global(void)
+{
+	unsigned int dummy = 0;
+
+	csp(&dummy, 0, 0);
+}
+
+/*
+ * Flush TLB entries for a specific mm on all CPUs (in case gmap is used
+ * this implicates multiple ASCEs!).
+ */
+static inline void __tlb_flush_mm(struct mm_struct *mm)
+{
+	unsigned long gmap_asce;
+
+	/*
+	 * If the machine has IDTE we prefer to do a per mm flush
+	 * on all cpus instead of doing a local flush if the mm
+	 * only ran on the local cpu.
+	 */
+	preempt_disable();
+	atomic_inc(&mm->context.flush_count);
+	/* Reset TLB flush mask */
+	cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask);
+	barrier();
+	gmap_asce = READ_ONCE(mm->context.gmap_asce);
+	if (MACHINE_HAS_IDTE && gmap_asce != -1UL) {
+		if (gmap_asce)
+			__tlb_flush_idte(gmap_asce);
+		__tlb_flush_idte(mm->context.asce);
+	} else {
+		/* Global TLB flush */
+		__tlb_flush_global();
+	}
+	atomic_dec(&mm->context.flush_count);
+	preempt_enable();
+}
+
+static inline void __tlb_flush_kernel(void)
+{
+	if (MACHINE_HAS_IDTE)
+		__tlb_flush_idte(init_mm.context.asce);
+	else
+		__tlb_flush_global();
+}
+
+static inline void __tlb_flush_mm_lazy(struct mm_struct * mm)
+{
+	spin_lock(&mm->context.lock);
+	if (mm->context.flush_mm) {
+		mm->context.flush_mm = 0;
+		__tlb_flush_mm(mm);
+	}
+	spin_unlock(&mm->context.lock);
+}
+
+/*
+ * TLB flushing:
+ *  flush_tlb() - flushes the current mm struct TLBs
+ *  flush_tlb_all() - flushes all processes TLBs
+ *  flush_tlb_mm(mm) - flushes the specified mm context TLB's
+ *  flush_tlb_page(vma, vmaddr) - flushes one page
+ *  flush_tlb_range(vma, start, end) - flushes a range of pages
+ *  flush_tlb_kernel_range(start, end) - flushes a range of kernel pages
+ */
+
+/*
+ * flush_tlb_mm goes together with ptep_set_wrprotect for the
+ * copy_page_range operation and flush_tlb_range is related to
+ * ptep_get_and_clear for change_protection. ptep_set_wrprotect and
+ * ptep_get_and_clear do not flush the TLBs directly if the mm has
+ * only one user. At the end of the update the flush_tlb_mm and
+ * flush_tlb_range functions need to do the flush.
+ */
+#define flush_tlb()				do { } while (0)
+#define flush_tlb_all()				do { } while (0)
+#define flush_tlb_page(vma, addr)		do { } while (0)
+
+static inline void flush_tlb_mm(struct mm_struct *mm)
+{
+	__tlb_flush_mm_lazy(mm);
+}
+
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+				   unsigned long start, unsigned long end)
+{
+	__tlb_flush_mm_lazy(vma->vm_mm);
+}
+
+static inline void flush_tlb_kernel_range(unsigned long start,
+					  unsigned long end)
+{
+	__tlb_flush_kernel();
+}
+
+#endif /* _S390_TLBFLUSH_H */
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
new file mode 100644
index 0000000000..3a0ac0c7a9
--- /dev/null
+++ b/arch/s390/include/asm/topology.h
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_TOPOLOGY_H
+#define _ASM_S390_TOPOLOGY_H
+
+#include <linux/cpumask.h>
+#include <asm/numa.h>
+
+struct sysinfo_15_1_x;
+struct cpu;
+
+#ifdef CONFIG_SCHED_TOPOLOGY
+
+struct cpu_topology_s390 {
+	unsigned short thread_id;
+	unsigned short core_id;
+	unsigned short socket_id;
+	unsigned short book_id;
+	unsigned short drawer_id;
+	unsigned short dedicated : 1;
+	int booted_cores;
+	cpumask_t thread_mask;
+	cpumask_t core_mask;
+	cpumask_t book_mask;
+	cpumask_t drawer_mask;
+};
+
+extern struct cpu_topology_s390 cpu_topology[NR_CPUS];
+
+#define topology_physical_package_id(cpu) (cpu_topology[cpu].socket_id)
+#define topology_thread_id(cpu)		  (cpu_topology[cpu].thread_id)
+#define topology_sibling_cpumask(cpu)	  (&cpu_topology[cpu].thread_mask)
+#define topology_core_id(cpu)		  (cpu_topology[cpu].core_id)
+#define topology_core_cpumask(cpu)	  (&cpu_topology[cpu].core_mask)
+#define topology_book_id(cpu)		  (cpu_topology[cpu].book_id)
+#define topology_book_cpumask(cpu)	  (&cpu_topology[cpu].book_mask)
+#define topology_drawer_id(cpu)		  (cpu_topology[cpu].drawer_id)
+#define topology_drawer_cpumask(cpu)	  (&cpu_topology[cpu].drawer_mask)
+#define topology_cpu_dedicated(cpu)	  (cpu_topology[cpu].dedicated)
+#define topology_booted_cores(cpu)	  (cpu_topology[cpu].booted_cores)
+
+#define mc_capable() 1
+
+void topology_init_early(void);
+int topology_cpu_init(struct cpu *);
+int topology_set_cpu_management(int fc);
+void topology_schedule_update(void);
+void store_topology(struct sysinfo_15_1_x *info);
+void update_cpu_masks(void);
+void topology_expect_change(void);
+const struct cpumask *cpu_coregroup_mask(int cpu);
+
+#else /* CONFIG_SCHED_TOPOLOGY */
+
+static inline void topology_init_early(void) { }
+static inline void topology_schedule_update(void) { }
+static inline int topology_cpu_init(struct cpu *cpu) { return 0; }
+static inline int topology_cpu_dedicated(int cpu_nr) { return 0; }
+static inline int topology_booted_cores(int cpu_nr) { return 1; }
+static inline void update_cpu_masks(void) { }
+static inline void topology_expect_change(void) { }
+
+#endif /* CONFIG_SCHED_TOPOLOGY */
+
+#define POLARIZATION_UNKNOWN	(-1)
+#define POLARIZATION_HRZ	(0)
+#define POLARIZATION_VL		(1)
+#define POLARIZATION_VM		(2)
+#define POLARIZATION_VH		(3)
+
+#define SD_BOOK_INIT	SD_CPU_INIT
+
+#ifdef CONFIG_NUMA
+
+#define cpu_to_node cpu_to_node
+static inline int cpu_to_node(int cpu)
+{
+	return 0;
+}
+
+/* Returns a pointer to the cpumask of CPUs on node 'node'. */
+#define cpumask_of_node cpumask_of_node
+static inline const struct cpumask *cpumask_of_node(int node)
+{
+	return cpu_possible_mask;
+}
+
+#define pcibus_to_node(bus) __pcibus_to_node(bus)
+
+#else /* !CONFIG_NUMA */
+
+#define numa_node_id numa_node_id
+static inline int numa_node_id(void)
+{
+	return 0;
+}
+
+#endif /* CONFIG_NUMA */
+
+#include <asm-generic/topology.h>
+
+#endif /* _ASM_S390_TOPOLOGY_H */
diff --git a/arch/s390/include/asm/tpi.h b/arch/s390/include/asm/tpi.h
new file mode 100644
index 0000000000..f76e5fdff2
--- /dev/null
+++ b/arch/s390/include/asm/tpi.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_S390_TPI_H
+#define _ASM_S390_TPI_H
+
+#include <linux/types.h>
+#include <uapi/asm/schid.h>
+
+#ifndef __ASSEMBLY__
+
+/* I/O-Interruption Code as stored by TEST PENDING INTERRUPTION (TPI). */
+struct tpi_info {
+	struct subchannel_id schid;
+	u32 intparm;
+	u32 adapter_IO:1;
+	u32 directed_irq:1;
+	u32 isc:3;
+	u32 :12;
+	u32 type:3;
+	u32 :12;
+} __packed __aligned(4);
+
+/* I/O-Interruption Code as stored by TPI for an Adapter I/O */
+struct tpi_adapter_info {
+	u32 aism:8;
+	u32 :22;
+	u32 error:1;
+	u32 forward:1;
+	u32 reserved;
+	u32 adapter_IO:1;
+	u32 directed_irq:1;
+	u32 isc:3;
+	u32 :27;
+} __packed __aligned(4);
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_S390_TPI_H */
diff --git a/arch/s390/include/asm/trace/diag.h b/arch/s390/include/asm/trace/diag.h
new file mode 100644
index 0000000000..22fcac4ff9
--- /dev/null
+++ b/arch/s390/include/asm/trace/diag.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Tracepoint header for s390 diagnose calls
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM s390
+
+#if !defined(_TRACE_S390_DIAG_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_S390_DIAG_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+
+#define TRACE_INCLUDE_PATH asm/trace
+#define TRACE_INCLUDE_FILE diag
+
+TRACE_EVENT(s390_diagnose,
+	TP_PROTO(unsigned short nr),
+	TP_ARGS(nr),
+	TP_STRUCT__entry(
+		__field(unsigned short, nr)
+	),
+	TP_fast_assign(
+		__entry->nr = nr;
+	),
+	TP_printk("nr=0x%x", __entry->nr)
+);
+
+#ifdef CONFIG_TRACEPOINTS
+void trace_s390_diagnose_norecursion(int diag_nr);
+#else
+static inline void trace_s390_diagnose_norecursion(int diag_nr) { }
+#endif
+
+#endif /* _TRACE_S390_DIAG_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/s390/include/asm/trace/zcrypt.h b/arch/s390/include/asm/trace/zcrypt.h
new file mode 100644
index 0000000000..457ddaa99e
--- /dev/null
+++ b/arch/s390/include/asm/trace/zcrypt.h
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Tracepoint definitions for the s390 zcrypt device driver
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Harald Freudenberger <freude@de.ibm.com>
+ *
+ * Currently there are two tracepoint events defined here.
+ * An s390_zcrypt_req request event occurs as soon as the request is
+ * recognized by the zcrypt ioctl function. This event may act as some kind
+ * of request-processing-starts-now indication.
+ * As late as possible within the zcrypt ioctl function there occurs the
+ * s390_zcrypt_rep event which may act as the point in time where the
+ * request has been processed by the kernel and the result is about to be
+ * transferred back to userspace.
+ * The glue which binds together request and reply event is the ptr
+ * parameter, which is the local buffer address where the request from
+ * userspace has been stored by the ioctl function.
+ *
+ * The main purpose of this zcrypt tracepoint api is to get some data for
+ * performance measurements together with information about on which card
+ * and queue the request has been processed. It is not an ffdc interface as
+ * there is already code in the zcrypt device driver to serve the s390
+ * debug feature interface.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM s390
+
+#if !defined(_TRACE_S390_ZCRYPT_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_S390_ZCRYPT_H
+
+#include <linux/tracepoint.h>
+
+#define TP_ICARSAMODEXPO  0x0001
+#define TP_ICARSACRT	  0x0002
+#define TB_ZSECSENDCPRB   0x0003
+#define TP_ZSENDEP11CPRB  0x0004
+#define TP_HWRNGCPRB	  0x0005
+
+#define show_zcrypt_tp_type(type)				\
+	__print_symbolic(type,					\
+			 { TP_ICARSAMODEXPO, "ICARSAMODEXPO" }, \
+			 { TP_ICARSACRT, "ICARSACRT" },		\
+			 { TB_ZSECSENDCPRB, "ZSECSENDCPRB" },	\
+			 { TP_ZSENDEP11CPRB, "ZSENDEP11CPRB" }, \
+			 { TP_HWRNGCPRB, "HWRNGCPRB" })
+
+/**
+ * trace_s390_zcrypt_req - zcrypt request tracepoint function
+ * @ptr:  Address of the local buffer where the request from userspace
+ *	  is stored. Can be used as a unique id to relate together
+ *	  request and reply.
+ * @type: One of the TP_ defines above.
+ *
+ * Called when a request from userspace is recognised within the ioctl
+ * function of the zcrypt device driver and may act as an entry
+ * timestamp.
+ */
+TRACE_EVENT(s390_zcrypt_req,
+	    TP_PROTO(void *ptr, u32 type),
+	    TP_ARGS(ptr, type),
+	    TP_STRUCT__entry(
+		    __field(void *, ptr)
+		    __field(u32, type)),
+	    TP_fast_assign(
+		    __entry->ptr = ptr;
+		    __entry->type = type;),
+	    TP_printk("ptr=%p type=%s",
+		      __entry->ptr,
+		      show_zcrypt_tp_type(__entry->type))
+);
+
+/**
+ * trace_s390_zcrypt_rep - zcrypt reply tracepoint function
+ * @ptr:  Address of the local buffer where the request from userspace
+ *	  is stored. Can be used as a unique id to match together
+ *	  request and reply.
+ * @fc:   Function code.
+ * @rc:   The bare returncode as returned by the device driver ioctl
+ *	  function.
+ * @dev:  The adapter nr where this request was actually processed.
+ * @dom:  Domain id of the device where this request was processed.
+ *
+ * Called upon recognising the reply from the crypto adapter. This
+ * message may act as the exit timestamp for the request but also
+ * carries some info about on which adapter the request was processed
+ * and the returncode from the device driver.
+ */
+TRACE_EVENT(s390_zcrypt_rep,
+	    TP_PROTO(void *ptr, u32 fc, u32 rc, u16 dev, u16 dom),
+	    TP_ARGS(ptr, fc, rc, dev, dom),
+	    TP_STRUCT__entry(
+		    __field(void *, ptr)
+		    __field(u32, fc)
+		    __field(u32, rc)
+		    __field(u16, device)
+		    __field(u16, domain)),
+	    TP_fast_assign(
+		    __entry->ptr = ptr;
+		    __entry->fc = fc;
+		    __entry->rc = rc;
+		    __entry->device = dev;
+		    __entry->domain = dom;),
+	    TP_printk("ptr=%p fc=0x%04x rc=%d dev=0x%02hx domain=0x%04hx",
+		      __entry->ptr,
+		      (unsigned int) __entry->fc,
+		      (int) __entry->rc,
+		      (unsigned short) __entry->device,
+		      (unsigned short) __entry->domain)
+);
+
+#endif /* _TRACE_S390_ZCRYPT_H */
+
+/* This part must be outside protection */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+
+#define TRACE_INCLUDE_PATH asm/trace
+#define TRACE_INCLUDE_FILE zcrypt
+
+#include <trace/define_trace.h>
diff --git a/arch/s390/include/asm/types.h b/arch/s390/include/asm/types.h
new file mode 100644
index 0000000000..0b5d550a04
--- /dev/null
+++ b/arch/s390/include/asm/types.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef _ASM_S390_TYPES_H
+#define _ASM_S390_TYPES_H
+
+#include <uapi/asm/types.h>
+
+#ifndef __ASSEMBLY__
+
+union register_pair {
+	unsigned __int128 pair;
+	struct {
+		unsigned long even;
+		unsigned long odd;
+	};
+};
+
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_S390_TYPES_H */
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
new file mode 100644
index 0000000000..8a8c64a678
--- /dev/null
+++ b/arch/s390/include/asm/uaccess.h
@@ -0,0 +1,601 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999, 2000
+ *    Author(s): Hartmut Penner (hp@de.ibm.com),
+ *		 Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ *  Derived from "include/asm-i386/uaccess.h"
+ */
+#ifndef __S390_UACCESS_H
+#define __S390_UACCESS_H
+
+/*
+ * User space memory access functions
+ */
+#include <asm/asm-extable.h>
+#include <asm/processor.h>
+#include <asm/ctl_reg.h>
+#include <asm/extable.h>
+#include <asm/facility.h>
+#include <asm-generic/access_ok.h>
+
+void debug_user_asce(int exit);
+
+unsigned long __must_check
+raw_copy_from_user(void *to, const void __user *from, unsigned long n);
+
+unsigned long __must_check
+raw_copy_to_user(void __user *to, const void *from, unsigned long n);
+
+#ifndef CONFIG_KASAN
+#define INLINE_COPY_FROM_USER
+#define INLINE_COPY_TO_USER
+#endif
+
+unsigned long __must_check
+_copy_from_user_key(void *to, const void __user *from, unsigned long n, unsigned long key);
+
+static __always_inline unsigned long __must_check
+copy_from_user_key(void *to, const void __user *from, unsigned long n, unsigned long key)
+{
+	if (check_copy_size(to, n, false))
+		n = _copy_from_user_key(to, from, n, key);
+	return n;
+}
+
+unsigned long __must_check
+_copy_to_user_key(void __user *to, const void *from, unsigned long n, unsigned long key);
+
+static __always_inline unsigned long __must_check
+copy_to_user_key(void __user *to, const void *from, unsigned long n, unsigned long key)
+{
+	if (check_copy_size(from, n, true))
+		n = _copy_to_user_key(to, from, n, key);
+	return n;
+}
+
+union oac {
+	unsigned int val;
+	struct {
+		struct {
+			unsigned short key : 4;
+			unsigned short	   : 4;
+			unsigned short as  : 2;
+			unsigned short	   : 4;
+			unsigned short k   : 1;
+			unsigned short a   : 1;
+		} oac1;
+		struct {
+			unsigned short key : 4;
+			unsigned short	   : 4;
+			unsigned short as  : 2;
+			unsigned short	   : 4;
+			unsigned short k   : 1;
+			unsigned short a   : 1;
+		} oac2;
+	};
+};
+
+int __noreturn __put_user_bad(void);
+
+#define __put_user_asm(to, from, size)					\
+({									\
+	union oac __oac_spec = {					\
+		.oac1.as = PSW_BITS_AS_SECONDARY,			\
+		.oac1.a = 1,						\
+	};								\
+	int __rc;							\
+									\
+	asm volatile(							\
+		"	lr	0,%[spec]\n"				\
+		"0:	mvcos	%[_to],%[_from],%[_size]\n"		\
+		"1:	xr	%[rc],%[rc]\n"				\
+		"2:\n"							\
+		EX_TABLE_UA_STORE(0b, 2b, %[rc])			\
+		EX_TABLE_UA_STORE(1b, 2b, %[rc])			\
+		: [rc] "=&d" (__rc), [_to] "+Q" (*(to))			\
+		: [_size] "d" (size), [_from] "Q" (*(from)),		\
+		  [spec] "d" (__oac_spec.val)				\
+		: "cc", "0");						\
+	__rc;								\
+})
+
+static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned long size)
+{
+	int rc;
+
+	switch (size) {
+	case 1:
+		rc = __put_user_asm((unsigned char __user *)ptr,
+				    (unsigned char *)x,
+				    size);
+		break;
+	case 2:
+		rc = __put_user_asm((unsigned short __user *)ptr,
+				    (unsigned short *)x,
+				    size);
+		break;
+	case 4:
+		rc = __put_user_asm((unsigned int __user *)ptr,
+				    (unsigned int *)x,
+				    size);
+		break;
+	case 8:
+		rc = __put_user_asm((unsigned long __user *)ptr,
+				    (unsigned long *)x,
+				    size);
+		break;
+	default:
+		__put_user_bad();
+		break;
+	}
+	return rc;
+}
+
+int __noreturn __get_user_bad(void);
+
+#define __get_user_asm(to, from, size)					\
+({									\
+	union oac __oac_spec = {					\
+		.oac2.as = PSW_BITS_AS_SECONDARY,			\
+		.oac2.a = 1,						\
+	};								\
+	int __rc;							\
+									\
+	asm volatile(							\
+		"	lr	0,%[spec]\n"				\
+		"0:	mvcos	0(%[_to]),%[_from],%[_size]\n"		\
+		"1:	xr	%[rc],%[rc]\n"				\
+		"2:\n"							\
+		EX_TABLE_UA_LOAD_MEM(0b, 2b, %[rc], %[_to], %[_ksize])	\
+		EX_TABLE_UA_LOAD_MEM(1b, 2b, %[rc], %[_to], %[_ksize])	\
+		: [rc] "=&d" (__rc), "=Q" (*(to))			\
+		: [_size] "d" (size), [_from] "Q" (*(from)),		\
+		  [spec] "d" (__oac_spec.val), [_to] "a" (to),		\
+		  [_ksize] "K" (size)					\
+		: "cc", "0");						\
+	__rc;								\
+})
+
+static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size)
+{
+	int rc;
+
+	switch (size) {
+	case 1:
+		rc = __get_user_asm((unsigned char *)x,
+				    (unsigned char __user *)ptr,
+				    size);
+		break;
+	case 2:
+		rc = __get_user_asm((unsigned short *)x,
+				    (unsigned short __user *)ptr,
+				    size);
+		break;
+	case 4:
+		rc = __get_user_asm((unsigned int *)x,
+				    (unsigned int __user *)ptr,
+				    size);
+		break;
+	case 8:
+		rc = __get_user_asm((unsigned long *)x,
+				    (unsigned long __user *)ptr,
+				    size);
+		break;
+	default:
+		__get_user_bad();
+		break;
+	}
+	return rc;
+}
+
+/*
+ * These are the main single-value transfer routines.  They automatically
+ * use the right size if we just have the right pointer type.
+ */
+#define __put_user(x, ptr)						\
+({									\
+	__typeof__(*(ptr)) __x = (x);					\
+	int __pu_err = -EFAULT;						\
+									\
+	__chk_user_ptr(ptr);						\
+	switch (sizeof(*(ptr))) {					\
+	case 1:								\
+	case 2:								\
+	case 4:								\
+	case 8:								\
+		__pu_err = __put_user_fn(&__x, ptr, sizeof(*(ptr)));	\
+		break;							\
+	default:							\
+		__put_user_bad();					\
+		break;							\
+	}								\
+	__builtin_expect(__pu_err, 0);					\
+})
+
+#define put_user(x, ptr)						\
+({									\
+	might_fault();							\
+	__put_user(x, ptr);						\
+})
+
+#define __get_user(x, ptr)						\
+({									\
+	int __gu_err = -EFAULT;						\
+									\
+	__chk_user_ptr(ptr);						\
+	switch (sizeof(*(ptr))) {					\
+	case 1: {							\
+		unsigned char __x;					\
+									\
+		__gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr)));	\
+		(x) = *(__force __typeof__(*(ptr)) *)&__x;		\
+		break;							\
+	};								\
+	case 2: {							\
+		unsigned short __x;					\
+									\
+		__gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr)));	\
+		(x) = *(__force __typeof__(*(ptr)) *)&__x;		\
+		break;							\
+	};								\
+	case 4: {							\
+		unsigned int __x;					\
+									\
+		__gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr)));	\
+		(x) = *(__force __typeof__(*(ptr)) *)&__x;		\
+		break;							\
+	};								\
+	case 8: {							\
+		unsigned long __x;					\
+									\
+		__gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr)));	\
+		(x) = *(__force __typeof__(*(ptr)) *)&__x;		\
+		break;							\
+	};								\
+	default:							\
+		__get_user_bad();					\
+		break;							\
+	}								\
+	__builtin_expect(__gu_err, 0);					\
+})
+
+#define get_user(x, ptr)						\
+({									\
+	might_fault();							\
+	__get_user(x, ptr);						\
+})
+
+/*
+ * Copy a null terminated string from userspace.
+ */
+long __must_check strncpy_from_user(char *dst, const char __user *src, long count);
+
+long __must_check strnlen_user(const char __user *src, long count);
+
+/*
+ * Zero Userspace
+ */
+unsigned long __must_check __clear_user(void __user *to, unsigned long size);
+
+static inline unsigned long __must_check clear_user(void __user *to, unsigned long n)
+{
+	might_fault();
+	return __clear_user(to, n);
+}
+
+void *s390_kernel_write(void *dst, const void *src, size_t size);
+
+int __noreturn __put_kernel_bad(void);
+
+#define __put_kernel_asm(val, to, insn)					\
+({									\
+	int __rc;							\
+									\
+	asm volatile(							\
+		"0:   " insn "  %[_val],%[_to]\n"			\
+		"1:	xr	%[rc],%[rc]\n"				\
+		"2:\n"							\
+		EX_TABLE_UA_STORE(0b, 2b, %[rc])			\
+		EX_TABLE_UA_STORE(1b, 2b, %[rc])			\
+		: [rc] "=d" (__rc), [_to] "+Q" (*(to))			\
+		: [_val] "d" (val)					\
+		: "cc");						\
+	__rc;								\
+})
+
+#define __put_kernel_nofault(dst, src, type, err_label)			\
+do {									\
+	unsigned long __x = (unsigned long)(*((type *)(src)));		\
+	int __pk_err;							\
+									\
+	switch (sizeof(type)) {						\
+	case 1:								\
+		__pk_err = __put_kernel_asm(__x, (type *)(dst), "stc"); \
+		break;							\
+	case 2:								\
+		__pk_err = __put_kernel_asm(__x, (type *)(dst), "sth"); \
+		break;							\
+	case 4:								\
+		__pk_err = __put_kernel_asm(__x, (type *)(dst), "st");	\
+		break;							\
+	case 8:								\
+		__pk_err = __put_kernel_asm(__x, (type *)(dst), "stg"); \
+		break;							\
+	default:							\
+		__pk_err = __put_kernel_bad();				\
+		break;							\
+	}								\
+	if (unlikely(__pk_err))						\
+		goto err_label;						\
+} while (0)
+
+int __noreturn __get_kernel_bad(void);
+
+#define __get_kernel_asm(val, from, insn)				\
+({									\
+	int __rc;							\
+									\
+	asm volatile(							\
+		"0:   " insn "  %[_val],%[_from]\n"			\
+		"1:	xr	%[rc],%[rc]\n"				\
+		"2:\n"							\
+		EX_TABLE_UA_LOAD_REG(0b, 2b, %[rc], %[_val])		\
+		EX_TABLE_UA_LOAD_REG(1b, 2b, %[rc], %[_val])		\
+		: [rc] "=d" (__rc), [_val] "=d" (val)			\
+		: [_from] "Q" (*(from))					\
+		: "cc");						\
+	__rc;								\
+})
+
+#define __get_kernel_nofault(dst, src, type, err_label)			\
+do {									\
+	int __gk_err;							\
+									\
+	switch (sizeof(type)) {						\
+	case 1: {							\
+		unsigned char __x;					\
+									\
+		__gk_err = __get_kernel_asm(__x, (type *)(src), "ic");	\
+		*((type *)(dst)) = (type)__x;				\
+		break;							\
+	};								\
+	case 2: {							\
+		unsigned short __x;					\
+									\
+		__gk_err = __get_kernel_asm(__x, (type *)(src), "lh");	\
+		*((type *)(dst)) = (type)__x;				\
+		break;							\
+	};								\
+	case 4: {							\
+		unsigned int __x;					\
+									\
+		__gk_err = __get_kernel_asm(__x, (type *)(src), "l");	\
+		*((type *)(dst)) = (type)__x;				\
+		break;							\
+	};								\
+	case 8: {							\
+		unsigned long __x;					\
+									\
+		__gk_err = __get_kernel_asm(__x, (type *)(src), "lg");	\
+		*((type *)(dst)) = (type)__x;				\
+		break;							\
+	};								\
+	default:							\
+		__gk_err = __get_kernel_bad();				\
+		break;							\
+	}								\
+	if (unlikely(__gk_err))						\
+		goto err_label;						\
+} while (0)
+
+void __cmpxchg_user_key_called_with_bad_pointer(void);
+
+#define CMPXCHG_USER_KEY_MAX_LOOPS 128
+
+static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval,
+					      __uint128_t old, __uint128_t new,
+					      unsigned long key, int size)
+{
+	int rc = 0;
+
+	switch (size) {
+	case 1: {
+		unsigned int prev, shift, mask, _old, _new;
+		unsigned long count;
+
+		shift = (3 ^ (address & 3)) << 3;
+		address ^= address & 3;
+		_old = ((unsigned int)old & 0xff) << shift;
+		_new = ((unsigned int)new & 0xff) << shift;
+		mask = ~(0xff << shift);
+		asm volatile(
+			"	spka	0(%[key])\n"
+			"	sacf	256\n"
+			"	llill	%[count],%[max_loops]\n"
+			"0:	l	%[prev],%[address]\n"
+			"1:	nr	%[prev],%[mask]\n"
+			"	xilf	%[mask],0xffffffff\n"
+			"	or	%[new],%[prev]\n"
+			"	or	%[prev],%[tmp]\n"
+			"2:	lr	%[tmp],%[prev]\n"
+			"3:	cs	%[prev],%[new],%[address]\n"
+			"4:	jnl	5f\n"
+			"	xr	%[tmp],%[prev]\n"
+			"	xr	%[new],%[tmp]\n"
+			"	nr	%[tmp],%[mask]\n"
+			"	jnz	5f\n"
+			"	brct	%[count],2b\n"
+			"5:	sacf	768\n"
+			"	spka	%[default_key]\n"
+			EX_TABLE_UA_LOAD_REG(0b, 5b, %[rc], %[prev])
+			EX_TABLE_UA_LOAD_REG(1b, 5b, %[rc], %[prev])
+			EX_TABLE_UA_LOAD_REG(3b, 5b, %[rc], %[prev])
+			EX_TABLE_UA_LOAD_REG(4b, 5b, %[rc], %[prev])
+			: [rc] "+&d" (rc),
+			  [prev] "=&d" (prev),
+			  [address] "+Q" (*(int *)address),
+			  [tmp] "+&d" (_old),
+			  [new] "+&d" (_new),
+			  [mask] "+&d" (mask),
+			  [count] "=a" (count)
+			: [key] "%[count]" (key << 4),
+			  [default_key] "J" (PAGE_DEFAULT_KEY),
+			  [max_loops] "J" (CMPXCHG_USER_KEY_MAX_LOOPS)
+			: "memory", "cc");
+		*(unsigned char *)uval = prev >> shift;
+		if (!count)
+			rc = -EAGAIN;
+		return rc;
+	}
+	case 2: {
+		unsigned int prev, shift, mask, _old, _new;
+		unsigned long count;
+
+		shift = (2 ^ (address & 2)) << 3;
+		address ^= address & 2;
+		_old = ((unsigned int)old & 0xffff) << shift;
+		_new = ((unsigned int)new & 0xffff) << shift;
+		mask = ~(0xffff << shift);
+		asm volatile(
+			"	spka	0(%[key])\n"
+			"	sacf	256\n"
+			"	llill	%[count],%[max_loops]\n"
+			"0:	l	%[prev],%[address]\n"
+			"1:	nr	%[prev],%[mask]\n"
+			"	xilf	%[mask],0xffffffff\n"
+			"	or	%[new],%[prev]\n"
+			"	or	%[prev],%[tmp]\n"
+			"2:	lr	%[tmp],%[prev]\n"
+			"3:	cs	%[prev],%[new],%[address]\n"
+			"4:	jnl	5f\n"
+			"	xr	%[tmp],%[prev]\n"
+			"	xr	%[new],%[tmp]\n"
+			"	nr	%[tmp],%[mask]\n"
+			"	jnz	5f\n"
+			"	brct	%[count],2b\n"
+			"5:	sacf	768\n"
+			"	spka	%[default_key]\n"
+			EX_TABLE_UA_LOAD_REG(0b, 5b, %[rc], %[prev])
+			EX_TABLE_UA_LOAD_REG(1b, 5b, %[rc], %[prev])
+			EX_TABLE_UA_LOAD_REG(3b, 5b, %[rc], %[prev])
+			EX_TABLE_UA_LOAD_REG(4b, 5b, %[rc], %[prev])
+			: [rc] "+&d" (rc),
+			  [prev] "=&d" (prev),
+			  [address] "+Q" (*(int *)address),
+			  [tmp] "+&d" (_old),
+			  [new] "+&d" (_new),
+			  [mask] "+&d" (mask),
+			  [count] "=a" (count)
+			: [key] "%[count]" (key << 4),
+			  [default_key] "J" (PAGE_DEFAULT_KEY),
+			  [max_loops] "J" (CMPXCHG_USER_KEY_MAX_LOOPS)
+			: "memory", "cc");
+		*(unsigned short *)uval = prev >> shift;
+		if (!count)
+			rc = -EAGAIN;
+		return rc;
+	}
+	case 4:	{
+		unsigned int prev = old;
+
+		asm volatile(
+			"	spka	0(%[key])\n"
+			"	sacf	256\n"
+			"0:	cs	%[prev],%[new],%[address]\n"
+			"1:	sacf	768\n"
+			"	spka	%[default_key]\n"
+			EX_TABLE_UA_LOAD_REG(0b, 1b, %[rc], %[prev])
+			EX_TABLE_UA_LOAD_REG(1b, 1b, %[rc], %[prev])
+			: [rc] "+&d" (rc),
+			  [prev] "+&d" (prev),
+			  [address] "+Q" (*(int *)address)
+			: [new] "d" ((unsigned int)new),
+			  [key] "a" (key << 4),
+			  [default_key] "J" (PAGE_DEFAULT_KEY)
+			: "memory", "cc");
+		*(unsigned int *)uval = prev;
+		return rc;
+	}
+	case 8: {
+		unsigned long prev = old;
+
+		asm volatile(
+			"	spka	0(%[key])\n"
+			"	sacf	256\n"
+			"0:	csg	%[prev],%[new],%[address]\n"
+			"1:	sacf	768\n"
+			"	spka	%[default_key]\n"
+			EX_TABLE_UA_LOAD_REG(0b, 1b, %[rc], %[prev])
+			EX_TABLE_UA_LOAD_REG(1b, 1b, %[rc], %[prev])
+			: [rc] "+&d" (rc),
+			  [prev] "+&d" (prev),
+			  [address] "+QS" (*(long *)address)
+			: [new] "d" ((unsigned long)new),
+			  [key] "a" (key << 4),
+			  [default_key] "J" (PAGE_DEFAULT_KEY)
+			: "memory", "cc");
+		*(unsigned long *)uval = prev;
+		return rc;
+	}
+	case 16: {
+		__uint128_t prev = old;
+
+		asm volatile(
+			"	spka	0(%[key])\n"
+			"	sacf	256\n"
+			"0:	cdsg	%[prev],%[new],%[address]\n"
+			"1:	sacf	768\n"
+			"	spka	%[default_key]\n"
+			EX_TABLE_UA_LOAD_REGPAIR(0b, 1b, %[rc], %[prev])
+			EX_TABLE_UA_LOAD_REGPAIR(1b, 1b, %[rc], %[prev])
+			: [rc] "+&d" (rc),
+			  [prev] "+&d" (prev),
+			  [address] "+QS" (*(__int128_t *)address)
+			: [new] "d" (new),
+			  [key] "a" (key << 4),
+			  [default_key] "J" (PAGE_DEFAULT_KEY)
+			: "memory", "cc");
+		*(__uint128_t *)uval = prev;
+		return rc;
+	}
+	}
+	__cmpxchg_user_key_called_with_bad_pointer();
+	return rc;
+}
+
+/**
+ * cmpxchg_user_key() - cmpxchg with user space target, honoring storage keys
+ * @ptr: User space address of value to compare to @old and exchange with
+ *	 @new. Must be aligned to sizeof(*@ptr).
+ * @uval: Address where the old value of *@ptr is written to.
+ * @old: Old value. Compared to the content pointed to by @ptr in order to
+ *	 determine if the exchange occurs. The old value read from *@ptr is
+ *	 written to *@uval.
+ * @new: New value to place at *@ptr.
+ * @key: Access key to use for checking storage key protection.
+ *
+ * Perform a cmpxchg on a user space target, honoring storage key protection.
+ * @key alone determines how key checking is performed, neither
+ * storage-protection-override nor fetch-protection-override apply.
+ * The caller must compare *@uval and @old to determine if values have been
+ * exchanged. In case of an exception *@uval is set to zero.
+ *
+ * Return:     0: cmpxchg executed
+ *	       -EFAULT: an exception happened when trying to access *@ptr
+ *	       -EAGAIN: maxed out number of retries (byte and short only)
+ */
+#define cmpxchg_user_key(ptr, uval, old, new, key)			\
+({									\
+	__typeof__(ptr) __ptr = (ptr);					\
+	__typeof__(uval) __uval = (uval);				\
+									\
+	BUILD_BUG_ON(sizeof(*(__ptr)) != sizeof(*(__uval)));		\
+	might_fault();							\
+	__chk_user_ptr(__ptr);						\
+	__cmpxchg_user_key((unsigned long)(__ptr), (void *)(__uval),	\
+			   (old), (new), (key), sizeof(*(__ptr)));	\
+})
+
+#endif /* __S390_UACCESS_H */
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
new file mode 100644
index 0000000000..4260bc5ce7
--- /dev/null
+++ b/arch/s390/include/asm/unistd.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  S390 version
+ *
+ *  Derived from "include/asm-i386/unistd.h"
+ */
+#ifndef _ASM_S390_UNISTD_H_
+#define _ASM_S390_UNISTD_H_
+
+#include <uapi/asm/unistd.h>
+#include <asm/unistd_nr.h>
+
+#define __ARCH_WANT_NEW_STAT
+#define __ARCH_WANT_OLD_READDIR
+#define __ARCH_WANT_SYS_ALARM
+#define __ARCH_WANT_SYS_GETHOSTNAME
+#define __ARCH_WANT_SYS_PAUSE
+#define __ARCH_WANT_SYS_SIGNAL
+#define __ARCH_WANT_SYS_UTIME
+#define __ARCH_WANT_SYS_SOCKETCALL
+#define __ARCH_WANT_SYS_IPC
+#define __ARCH_WANT_SYS_FADVISE64
+#define __ARCH_WANT_SYS_GETPGRP
+#define __ARCH_WANT_SYS_NICE
+#define __ARCH_WANT_SYS_OLD_GETRLIMIT
+#define __ARCH_WANT_SYS_OLD_MMAP
+#define __ARCH_WANT_SYS_OLDUMOUNT
+#define __ARCH_WANT_SYS_SIGPENDING
+#define __ARCH_WANT_SYS_SIGPROCMASK
+# ifdef CONFIG_COMPAT
+#   define __ARCH_WANT_COMPAT_STAT
+#   define __ARCH_WANT_SYS_TIME32
+#   define __ARCH_WANT_SYS_UTIME32
+# endif
+#define __ARCH_WANT_SYS_FORK
+#define __ARCH_WANT_SYS_VFORK
+#define __ARCH_WANT_SYS_CLONE
+#define __ARCH_WANT_SYS_CLONE3
+
+#endif /* _ASM_S390_UNISTD_H_ */
diff --git a/arch/s390/include/asm/unwind.h b/arch/s390/include/asm/unwind.h
new file mode 100644
index 0000000000..b8ecf04e34
--- /dev/null
+++ b/arch/s390/include/asm/unwind.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_UNWIND_H
+#define _ASM_S390_UNWIND_H
+
+#include <linux/sched.h>
+#include <linux/ftrace.h>
+#include <linux/rethook.h>
+#include <linux/llist.h>
+#include <asm/ptrace.h>
+#include <asm/stacktrace.h>
+
+/*
+ * To use the stack unwinder it has to be initialized with unwind_start.
+ * There four combinations for task and regs:
+ * 1) task==NULL, regs==NULL: the unwind starts for the task that is currently
+ *    running, sp/ip picked up from the CPU registers
+ * 2) task==NULL, regs!=NULL: the unwind starts from the sp/ip found in
+ *    the struct pt_regs of an interrupt frame for the current task
+ * 3) task!=NULL, regs==NULL: the unwind starts for an inactive task with
+ *    the sp picked up from task->thread.ksp and the ip picked up from the
+ *    return address stored by __switch_to
+ * 4) task!=NULL, regs!=NULL: the sp/ip are picked up from the interrupt
+ *    frame 'regs' of a inactive task
+ * If 'first_frame' is not zero unwind_start skips unwind frames until it
+ * reaches the specified stack pointer.
+ * The end of the unwinding is indicated with unwind_done, this can be true
+ * right after unwind_start, e.g. with first_frame!=0 that can not be found.
+ * unwind_next_frame skips to the next frame.
+ * Once the unwind is completed unwind_error() can be used to check if there
+ * has been a situation where the unwinder could not correctly understand
+ * the tasks call chain.
+ */
+
+struct unwind_state {
+	struct stack_info stack_info;
+	unsigned long stack_mask;
+	struct task_struct *task;
+	struct pt_regs *regs;
+	unsigned long sp, ip;
+	int graph_idx;
+	struct llist_node *kr_cur;
+	bool reliable;
+	bool error;
+};
+
+/* Recover the return address modified by rethook and ftrace_graph. */
+static inline unsigned long unwind_recover_ret_addr(struct unwind_state *state,
+						    unsigned long ip)
+{
+	ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, (void *)state->sp);
+#ifdef CONFIG_RETHOOK
+	if (is_rethook_trampoline(ip))
+		ip = rethook_find_ret_addr(state->task, state->sp, &state->kr_cur);
+#endif
+	return ip;
+}
+
+void __unwind_start(struct unwind_state *state, struct task_struct *task,
+		    struct pt_regs *regs, unsigned long first_frame);
+bool unwind_next_frame(struct unwind_state *state);
+unsigned long unwind_get_return_address(struct unwind_state *state);
+
+static inline bool unwind_done(struct unwind_state *state)
+{
+	return state->stack_info.type == STACK_TYPE_UNKNOWN;
+}
+
+static inline bool unwind_error(struct unwind_state *state)
+{
+	return state->error;
+}
+
+static __always_inline void unwind_start(struct unwind_state *state,
+					 struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned long first_frame)
+{
+	task = task ?: current;
+	first_frame = first_frame ?: get_stack_pointer(task, regs);
+	__unwind_start(state, task, regs, first_frame);
+}
+
+static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
+{
+	return unwind_done(state) ? NULL : state->regs;
+}
+
+#define unwind_for_each_frame(state, task, regs, first_frame)	\
+	for (unwind_start(state, task, regs, first_frame);	\
+	     !unwind_done(state);				\
+	     unwind_next_frame(state))
+
+static inline void unwind_init(void) {}
+static inline void unwind_module_init(struct module *mod, void *orc_ip,
+				      size_t orc_ip_size, void *orc,
+				      size_t orc_size) {}
+
+#endif /* _ASM_S390_UNWIND_H */
diff --git a/arch/s390/include/asm/uprobes.h b/arch/s390/include/asm/uprobes.h
new file mode 100644
index 0000000000..b60b3c7ef6
--- /dev/null
+++ b/arch/s390/include/asm/uprobes.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *    User-space Probes (UProbes) for s390
+ *
+ *    Copyright IBM Corp. 2014
+ *    Author(s): Jan Willeke,
+ */
+
+#ifndef _ASM_UPROBES_H
+#define _ASM_UPROBES_H
+
+#include <linux/notifier.h>
+
+typedef u16 uprobe_opcode_t;
+
+#define UPROBE_XOL_SLOT_BYTES	256 /* cache aligned */
+
+#define UPROBE_SWBP_INSN	0x0002
+#define UPROBE_SWBP_INSN_SIZE	2
+
+struct arch_uprobe {
+	union{
+		uprobe_opcode_t insn[3];
+		uprobe_opcode_t ixol[3];
+	};
+	unsigned int saved_per : 1;
+	unsigned int saved_int_code;
+};
+
+struct arch_uprobe_task {
+};
+
+#endif	/* _ASM_UPROBES_H */
diff --git a/arch/s390/include/asm/user.h b/arch/s390/include/asm/user.h
new file mode 100644
index 0000000000..8e8aaf4858
--- /dev/null
+++ b/arch/s390/include/asm/user.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  S390 version
+ *
+ *  Derived from "include/asm-i386/usr.h"
+ */
+
+#ifndef _S390_USER_H
+#define _S390_USER_H
+
+#include <asm/page.h>
+#include <asm/ptrace.h>
+/* Core file format: The core file is written in such a way that gdb
+   can understand it and provide useful information to the user (under
+   linux we use the 'trad-core' bfd).  There are quite a number of
+   obstacles to being able to view the contents of the floating point
+   registers, and until these are solved you will not be able to view the
+   contents of them.  Actually, you can read in the core file and look at
+   the contents of the user struct to find out what the floating point
+   registers contain.
+   The actual file contents are as follows:
+   UPAGE: 1 page consisting of a user struct that tells gdb what is present
+   in the file.  Directly after this is a copy of the task_struct, which
+   is currently not used by gdb, but it may come in useful at some point.
+   All of the registers are stored as part of the upage.  The upage should
+   always be only one page.
+   DATA: The data area is stored.  We use current->end_text to
+   current->brk to pick up all of the user variables, plus any memory
+   that may have been malloced.  No attempt is made to determine if a page
+   is demand-zero or if a page is totally unused, we just cover the entire
+   range.  All of the addresses are rounded in such a way that an integral
+   number of pages is written.
+   STACK: We need the stack information in order to get a meaningful
+   backtrace.  We need to write the data from (esp) to
+   current->start_stack, so we round each of these off in order to be able
+   to write an integer number of pages.
+   The minimum core file size is 3 pages, or 12288 bytes.
+*/
+
+
+/*
+ * This is the old layout of "struct pt_regs", and
+ * is still the layout used by user mode (the new
+ * pt_regs doesn't have all registers as the kernel
+ * doesn't use the extra segment registers)
+ */
+
+/* When the kernel dumps core, it starts by dumping the user struct -
+   this will be used by gdb to figure out where the data and stack segments
+   are within the file, and what virtual addresses to use. */
+struct user {
+/* We start with the registers, to mimic the way that "memory" is returned
+   from the ptrace(3,...) function.  */
+  struct user_regs_struct regs;		/* Where the registers are actually stored */
+/* The rest of this junk is to help gdb figure out what goes where */
+  unsigned long int u_tsize;	/* Text segment size (pages). */
+  unsigned long int u_dsize;	/* Data segment size (pages). */
+  unsigned long int u_ssize;	/* Stack segment size (pages). */
+  unsigned long start_code;     /* Starting virtual address of text. */
+  unsigned long start_stack;	/* Starting virtual address of stack area.
+				   This is actually the bottom of the stack,
+				   the top of the stack is always found in the
+				   esp register.  */
+  long int signal;     		/* Signal that caused the core dump. */
+  unsigned long u_ar0;		/* Used by gdb to help find the values for */
+				/* the registers. */
+  unsigned long magic;		/* To uniquely identify a core file */
+  char u_comm[32];		/* User command that was responsible */
+};
+
+#endif /* _S390_USER_H */
diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
new file mode 100644
index 0000000000..0e7bd38739
--- /dev/null
+++ b/arch/s390/include/asm/uv.h
@@ -0,0 +1,517 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Ultravisor Interfaces
+ *
+ * Copyright IBM Corp. 2019, 2022
+ *
+ * Author(s):
+ *	Vasily Gorbik <gor@linux.ibm.com>
+ *	Janosch Frank <frankja@linux.ibm.com>
+ */
+#ifndef _ASM_S390_UV_H
+#define _ASM_S390_UV_H
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/bug.h>
+#include <linux/sched.h>
+#include <asm/page.h>
+#include <asm/gmap.h>
+
+#define UVC_CC_OK	0
+#define UVC_CC_ERROR	1
+#define UVC_CC_BUSY	2
+#define UVC_CC_PARTIAL	3
+
+#define UVC_RC_EXECUTED		0x0001
+#define UVC_RC_INV_CMD		0x0002
+#define UVC_RC_INV_STATE	0x0003
+#define UVC_RC_INV_LEN		0x0005
+#define UVC_RC_NO_RESUME	0x0007
+#define UVC_RC_NEED_DESTROY	0x8000
+
+#define UVC_CMD_QUI			0x0001
+#define UVC_CMD_INIT_UV			0x000f
+#define UVC_CMD_CREATE_SEC_CONF		0x0100
+#define UVC_CMD_DESTROY_SEC_CONF	0x0101
+#define UVC_CMD_DESTROY_SEC_CONF_FAST	0x0102
+#define UVC_CMD_CREATE_SEC_CPU		0x0120
+#define UVC_CMD_DESTROY_SEC_CPU		0x0121
+#define UVC_CMD_CONV_TO_SEC_STOR	0x0200
+#define UVC_CMD_CONV_FROM_SEC_STOR	0x0201
+#define UVC_CMD_DESTR_SEC_STOR		0x0202
+#define UVC_CMD_SET_SEC_CONF_PARAMS	0x0300
+#define UVC_CMD_UNPACK_IMG		0x0301
+#define UVC_CMD_VERIFY_IMG		0x0302
+#define UVC_CMD_CPU_RESET		0x0310
+#define UVC_CMD_CPU_RESET_INITIAL	0x0311
+#define UVC_CMD_PREPARE_RESET		0x0320
+#define UVC_CMD_CPU_RESET_CLEAR		0x0321
+#define UVC_CMD_CPU_SET_STATE		0x0330
+#define UVC_CMD_SET_UNSHARE_ALL		0x0340
+#define UVC_CMD_PIN_PAGE_SHARED		0x0341
+#define UVC_CMD_UNPIN_PAGE_SHARED	0x0342
+#define UVC_CMD_DUMP_INIT		0x0400
+#define UVC_CMD_DUMP_CONF_STOR_STATE	0x0401
+#define UVC_CMD_DUMP_CPU		0x0402
+#define UVC_CMD_DUMP_COMPLETE		0x0403
+#define UVC_CMD_SET_SHARED_ACCESS	0x1000
+#define UVC_CMD_REMOVE_SHARED_ACCESS	0x1001
+#define UVC_CMD_RETR_ATTEST		0x1020
+#define UVC_CMD_ADD_SECRET		0x1031
+#define UVC_CMD_LIST_SECRETS		0x1033
+#define UVC_CMD_LOCK_SECRETS		0x1034
+
+/* Bits in installed uv calls */
+enum uv_cmds_inst {
+	BIT_UVC_CMD_QUI = 0,
+	BIT_UVC_CMD_INIT_UV = 1,
+	BIT_UVC_CMD_CREATE_SEC_CONF = 2,
+	BIT_UVC_CMD_DESTROY_SEC_CONF = 3,
+	BIT_UVC_CMD_CREATE_SEC_CPU = 4,
+	BIT_UVC_CMD_DESTROY_SEC_CPU = 5,
+	BIT_UVC_CMD_CONV_TO_SEC_STOR = 6,
+	BIT_UVC_CMD_CONV_FROM_SEC_STOR = 7,
+	BIT_UVC_CMD_SET_SHARED_ACCESS = 8,
+	BIT_UVC_CMD_REMOVE_SHARED_ACCESS = 9,
+	BIT_UVC_CMD_SET_SEC_PARMS = 11,
+	BIT_UVC_CMD_UNPACK_IMG = 13,
+	BIT_UVC_CMD_VERIFY_IMG = 14,
+	BIT_UVC_CMD_CPU_RESET = 15,
+	BIT_UVC_CMD_CPU_RESET_INITIAL = 16,
+	BIT_UVC_CMD_CPU_SET_STATE = 17,
+	BIT_UVC_CMD_PREPARE_RESET = 18,
+	BIT_UVC_CMD_CPU_PERFORM_CLEAR_RESET = 19,
+	BIT_UVC_CMD_UNSHARE_ALL = 20,
+	BIT_UVC_CMD_PIN_PAGE_SHARED = 21,
+	BIT_UVC_CMD_UNPIN_PAGE_SHARED = 22,
+	BIT_UVC_CMD_DESTROY_SEC_CONF_FAST = 23,
+	BIT_UVC_CMD_DUMP_INIT = 24,
+	BIT_UVC_CMD_DUMP_CONFIG_STOR_STATE = 25,
+	BIT_UVC_CMD_DUMP_CPU = 26,
+	BIT_UVC_CMD_DUMP_COMPLETE = 27,
+	BIT_UVC_CMD_RETR_ATTEST = 28,
+	BIT_UVC_CMD_ADD_SECRET = 29,
+	BIT_UVC_CMD_LIST_SECRETS = 30,
+	BIT_UVC_CMD_LOCK_SECRETS = 31,
+};
+
+enum uv_feat_ind {
+	BIT_UV_FEAT_MISC = 0,
+	BIT_UV_FEAT_AIV = 1,
+	BIT_UV_FEAT_AP = 4,
+	BIT_UV_FEAT_AP_INTR = 5,
+};
+
+struct uv_cb_header {
+	u16 len;
+	u16 cmd;	/* Command Code */
+	u16 rc;		/* Response Code */
+	u16 rrc;	/* Return Reason Code */
+} __packed __aligned(8);
+
+/* Query Ultravisor Information */
+struct uv_cb_qui {
+	struct uv_cb_header header;		/* 0x0000 */
+	u64 reserved08;				/* 0x0008 */
+	u64 inst_calls_list[4];			/* 0x0010 */
+	u64 reserved30[2];			/* 0x0030 */
+	u64 uv_base_stor_len;			/* 0x0040 */
+	u64 reserved48;				/* 0x0048 */
+	u64 conf_base_phys_stor_len;		/* 0x0050 */
+	u64 conf_base_virt_stor_len;		/* 0x0058 */
+	u64 conf_virt_var_stor_len;		/* 0x0060 */
+	u64 cpu_stor_len;			/* 0x0068 */
+	u32 reserved70[3];			/* 0x0070 */
+	u32 max_num_sec_conf;			/* 0x007c */
+	u64 max_guest_stor_addr;		/* 0x0080 */
+	u8  reserved88[0x9e - 0x88];		/* 0x0088 */
+	u16 max_guest_cpu_id;			/* 0x009e */
+	u64 uv_feature_indications;		/* 0x00a0 */
+	u64 reserveda8;				/* 0x00a8 */
+	u64 supp_se_hdr_versions;		/* 0x00b0 */
+	u64 supp_se_hdr_pcf;			/* 0x00b8 */
+	u64 reservedc0;				/* 0x00c0 */
+	u64 conf_dump_storage_state_len;	/* 0x00c8 */
+	u64 conf_dump_finalize_len;		/* 0x00d0 */
+	u64 reservedd8;				/* 0x00d8 */
+	u64 supp_att_req_hdr_ver;		/* 0x00e0 */
+	u64 supp_att_pflags;			/* 0x00e8 */
+	u64 reservedf0;				/* 0x00f0 */
+	u64 supp_add_secret_req_ver;		/* 0x00f8 */
+	u64 supp_add_secret_pcf;		/* 0x0100 */
+	u64 supp_secret_types;			/* 0x0180 */
+	u16 max_secrets;			/* 0x0110 */
+	u8 reserved112[0x120 - 0x112];		/* 0x0112 */
+} __packed __aligned(8);
+
+/* Initialize Ultravisor */
+struct uv_cb_init {
+	struct uv_cb_header header;
+	u64 reserved08[2];
+	u64 stor_origin;
+	u64 stor_len;
+	u64 reserved28[4];
+} __packed __aligned(8);
+
+/* Create Guest Configuration */
+struct uv_cb_cgc {
+	struct uv_cb_header header;
+	u64 reserved08[2];
+	u64 guest_handle;
+	u64 conf_base_stor_origin;
+	u64 conf_virt_stor_origin;
+	u8  reserved30[6];
+	union {
+		struct {
+			u16 : 14;
+			u16 ap_instr_intr : 1;
+			u16 ap_allow_instr : 1;
+		};
+		u16 raw;
+	} flags;
+	u64 guest_stor_origin;
+	u64 guest_stor_len;
+	u64 guest_sca;
+	u64 guest_asce;
+	u64 reserved58[5];
+} __packed __aligned(8);
+
+/* Create Secure CPU */
+struct uv_cb_csc {
+	struct uv_cb_header header;
+	u64 reserved08[2];
+	u64 cpu_handle;
+	u64 guest_handle;
+	u64 stor_origin;
+	u8  reserved30[6];
+	u16 num;
+	u64 state_origin;
+	u64 reserved40[4];
+} __packed __aligned(8);
+
+/* Convert to Secure */
+struct uv_cb_cts {
+	struct uv_cb_header header;
+	u64 reserved08[2];
+	u64 guest_handle;
+	u64 gaddr;
+} __packed __aligned(8);
+
+/* Convert from Secure / Pin Page Shared */
+struct uv_cb_cfs {
+	struct uv_cb_header header;
+	u64 reserved08[2];
+	u64 paddr;
+} __packed __aligned(8);
+
+/* Set Secure Config Parameter */
+struct uv_cb_ssc {
+	struct uv_cb_header header;
+	u64 reserved08[2];
+	u64 guest_handle;
+	u64 sec_header_origin;
+	u32 sec_header_len;
+	u32 reserved2c;
+	u64 reserved30[4];
+} __packed __aligned(8);
+
+/* Unpack */
+struct uv_cb_unp {
+	struct uv_cb_header header;
+	u64 reserved08[2];
+	u64 guest_handle;
+	u64 gaddr;
+	u64 tweak[2];
+	u64 reserved38[3];
+} __packed __aligned(8);
+
+#define PV_CPU_STATE_OPR	1
+#define PV_CPU_STATE_STP	2
+#define PV_CPU_STATE_CHKSTP	3
+#define PV_CPU_STATE_OPR_LOAD	5
+
+struct uv_cb_cpu_set_state {
+	struct uv_cb_header header;
+	u64 reserved08[2];
+	u64 cpu_handle;
+	u8  reserved20[7];
+	u8  state;
+	u64 reserved28[5];
+};
+
+/*
+ * A common UV call struct for calls that take no payload
+ * Examples:
+ * Destroy cpu/config
+ * Verify
+ */
+struct uv_cb_nodata {
+	struct uv_cb_header header;
+	u64 reserved08[2];
+	u64 handle;
+	u64 reserved20[4];
+} __packed __aligned(8);
+
+/* Destroy Configuration Fast */
+struct uv_cb_destroy_fast {
+	struct uv_cb_header header;
+	u64 reserved08[2];
+	u64 handle;
+	u64 reserved20[5];
+} __packed __aligned(8);
+
+/* Set Shared Access */
+struct uv_cb_share {
+	struct uv_cb_header header;
+	u64 reserved08[3];
+	u64 paddr;
+	u64 reserved28;
+} __packed __aligned(8);
+
+/* Retrieve Attestation Measurement */
+struct uv_cb_attest {
+	struct uv_cb_header header;	/* 0x0000 */
+	u64 reserved08[2];		/* 0x0008 */
+	u64 arcb_addr;			/* 0x0018 */
+	u64 cont_token;			/* 0x0020 */
+	u8  reserved28[6];		/* 0x0028 */
+	u16 user_data_len;		/* 0x002e */
+	u8  user_data[256];		/* 0x0030 */
+	u32 reserved130[3];		/* 0x0130 */
+	u32 meas_len;			/* 0x013c */
+	u64 meas_addr;			/* 0x0140 */
+	u8  config_uid[16];		/* 0x0148 */
+	u32 reserved158;		/* 0x0158 */
+	u32 add_data_len;		/* 0x015c */
+	u64 add_data_addr;		/* 0x0160 */
+	u64 reserved168[4];		/* 0x0168 */
+} __packed __aligned(8);
+
+struct uv_cb_dump_cpu {
+	struct uv_cb_header header;
+	u64 reserved08[2];
+	u64 cpu_handle;
+	u64 dump_area_origin;
+	u64 reserved28[5];
+} __packed __aligned(8);
+
+struct uv_cb_dump_stor_state {
+	struct uv_cb_header header;
+	u64 reserved08[2];
+	u64 config_handle;
+	u64 dump_area_origin;
+	u64 gaddr;
+	u64 reserved28[4];
+} __packed __aligned(8);
+
+struct uv_cb_dump_complete {
+	struct uv_cb_header header;
+	u64 reserved08[2];
+	u64 config_handle;
+	u64 dump_area_origin;
+	u64 reserved30[5];
+} __packed __aligned(8);
+
+/*
+ * A common UV call struct for pv guests that contains a single address
+ * Examples:
+ * Add Secret
+ * List Secrets
+ */
+struct uv_cb_guest_addr {
+	struct uv_cb_header header;
+	u64 reserved08[3];
+	u64 addr;
+	u64 reserved28[4];
+} __packed __aligned(8);
+
+static inline int __uv_call(unsigned long r1, unsigned long r2)
+{
+	int cc;
+
+	asm volatile(
+		"	.insn rrf,0xB9A40000,%[r1],%[r2],0,0\n"
+		"	ipm	%[cc]\n"
+		"	srl	%[cc],28\n"
+		: [cc] "=d" (cc)
+		: [r1] "a" (r1), [r2] "a" (r2)
+		: "memory", "cc");
+	return cc;
+}
+
+static inline int uv_call(unsigned long r1, unsigned long r2)
+{
+	int cc;
+
+	do {
+		cc = __uv_call(r1, r2);
+	} while (cc > 1);
+	return cc;
+}
+
+/* Low level uv_call that avoids stalls for long running busy conditions  */
+static inline int uv_call_sched(unsigned long r1, unsigned long r2)
+{
+	int cc;
+
+	do {
+		cc = __uv_call(r1, r2);
+		cond_resched();
+	} while (cc > 1);
+	return cc;
+}
+
+/*
+ * special variant of uv_call that only transports the cpu or guest
+ * handle and the command, like destroy or verify.
+ */
+static inline int uv_cmd_nodata(u64 handle, u16 cmd, u16 *rc, u16 *rrc)
+{
+	struct uv_cb_nodata uvcb = {
+		.header.cmd = cmd,
+		.header.len = sizeof(uvcb),
+		.handle = handle,
+	};
+	int cc;
+
+	WARN(!handle, "No handle provided to Ultravisor call cmd %x\n", cmd);
+	cc = uv_call_sched(0, (u64)&uvcb);
+	*rc = uvcb.header.rc;
+	*rrc = uvcb.header.rrc;
+	return cc ? -EINVAL : 0;
+}
+
+struct uv_info {
+	unsigned long inst_calls_list[4];
+	unsigned long uv_base_stor_len;
+	unsigned long guest_base_stor_len;
+	unsigned long guest_virt_base_stor_len;
+	unsigned long guest_virt_var_stor_len;
+	unsigned long guest_cpu_stor_len;
+	unsigned long max_sec_stor_addr;
+	unsigned int max_num_sec_conf;
+	unsigned short max_guest_cpu_id;
+	unsigned long uv_feature_indications;
+	unsigned long supp_se_hdr_ver;
+	unsigned long supp_se_hdr_pcf;
+	unsigned long conf_dump_storage_state_len;
+	unsigned long conf_dump_finalize_len;
+	unsigned long supp_att_req_hdr_ver;
+	unsigned long supp_att_pflags;
+	unsigned long supp_add_secret_req_ver;
+	unsigned long supp_add_secret_pcf;
+	unsigned long supp_secret_types;
+	unsigned short max_secrets;
+};
+
+extern struct uv_info uv_info;
+
+static inline bool uv_has_feature(u8 feature_bit)
+{
+	if (feature_bit >= sizeof(uv_info.uv_feature_indications) * 8)
+		return false;
+	return test_bit_inv(feature_bit, &uv_info.uv_feature_indications);
+}
+
+#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
+extern int prot_virt_guest;
+
+static inline int is_prot_virt_guest(void)
+{
+	return prot_virt_guest;
+}
+
+static inline int share(unsigned long addr, u16 cmd)
+{
+	struct uv_cb_share uvcb = {
+		.header.cmd = cmd,
+		.header.len = sizeof(uvcb),
+		.paddr = addr
+	};
+
+	if (!is_prot_virt_guest())
+		return -EOPNOTSUPP;
+	/*
+	 * Sharing is page wise, if we encounter addresses that are
+	 * not page aligned, we assume something went wrong. If
+	 * malloced structs are passed to this function, we could leak
+	 * data to the hypervisor.
+	 */
+	BUG_ON(addr & ~PAGE_MASK);
+
+	if (!uv_call(0, (u64)&uvcb))
+		return 0;
+	return -EINVAL;
+}
+
+/*
+ * Guest 2 request to the Ultravisor to make a page shared with the
+ * hypervisor for IO.
+ *
+ * @addr: Real or absolute address of the page to be shared
+ */
+static inline int uv_set_shared(unsigned long addr)
+{
+	return share(addr, UVC_CMD_SET_SHARED_ACCESS);
+}
+
+/*
+ * Guest 2 request to the Ultravisor to make a page unshared.
+ *
+ * @addr: Real or absolute address of the page to be unshared
+ */
+static inline int uv_remove_shared(unsigned long addr)
+{
+	return share(addr, UVC_CMD_REMOVE_SHARED_ACCESS);
+}
+
+#else
+#define is_prot_virt_guest() 0
+static inline int uv_set_shared(unsigned long addr) { return 0; }
+static inline int uv_remove_shared(unsigned long addr) { return 0; }
+#endif
+
+#if IS_ENABLED(CONFIG_KVM)
+extern int prot_virt_host;
+
+static inline int is_prot_virt_host(void)
+{
+	return prot_virt_host;
+}
+
+int uv_pin_shared(unsigned long paddr);
+int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb);
+int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr);
+int uv_destroy_owned_page(unsigned long paddr);
+int uv_convert_from_secure(unsigned long paddr);
+int uv_convert_owned_from_secure(unsigned long paddr);
+int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr);
+
+void setup_uv(void);
+#else
+#define is_prot_virt_host() 0
+static inline void setup_uv(void) {}
+
+static inline int uv_pin_shared(unsigned long paddr)
+{
+	return 0;
+}
+
+static inline int uv_destroy_owned_page(unsigned long paddr)
+{
+	return 0;
+}
+
+static inline int uv_convert_from_secure(unsigned long paddr)
+{
+	return 0;
+}
+
+static inline int uv_convert_owned_from_secure(unsigned long paddr)
+{
+	return 0;
+}
+#endif
+
+#endif /* _ASM_S390_UV_H */
diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h
new file mode 100644
index 0000000000..53165aa781
--- /dev/null
+++ b/arch/s390/include/asm/vdso.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __S390_VDSO_H__
+#define __S390_VDSO_H__
+
+#include <vdso/datapage.h>
+
+#ifndef __ASSEMBLY__
+
+#include <generated/vdso64-offsets.h>
+#ifdef CONFIG_COMPAT
+#include <generated/vdso32-offsets.h>
+#endif
+
+#define VDSO64_SYMBOL(tsk, name) ((tsk)->mm->context.vdso_base + (vdso64_offset_##name))
+#ifdef CONFIG_COMPAT
+#define VDSO32_SYMBOL(tsk, name) ((tsk)->mm->context.vdso_base + (vdso32_offset_##name))
+#else
+#define VDSO32_SYMBOL(tsk, name) (-1UL)
+#endif
+
+extern struct vdso_data *vdso_data;
+
+int vdso_getcpu_init(void);
+
+#endif /* __ASSEMBLY__ */
+
+/* Default link address for the vDSO */
+#define VDSO_LBASE	0
+
+#define __VVAR_PAGES	2
+
+#define VDSO_VERSION_STRING	LINUX_2.6.29
+
+#endif /* __S390_VDSO_H__ */
diff --git a/arch/s390/include/asm/vdso/clocksource.h b/arch/s390/include/asm/vdso/clocksource.h
new file mode 100644
index 0000000000..a93eda0ce7
--- /dev/null
+++ b/arch/s390/include/asm/vdso/clocksource.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_VDSO_CLOCKSOURCE_H
+#define __ASM_VDSO_CLOCKSOURCE_H
+
+#define VDSO_ARCH_CLOCKMODES	\
+	VDSO_CLOCKMODE_TOD
+
+#endif /* __ASM_VDSO_CLOCKSOURCE_H */
diff --git a/arch/s390/include/asm/vdso/data.h b/arch/s390/include/asm/vdso/data.h
new file mode 100644
index 0000000000..73ee891426
--- /dev/null
+++ b/arch/s390/include/asm/vdso/data.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __S390_ASM_VDSO_DATA_H
+#define __S390_ASM_VDSO_DATA_H
+
+#include <linux/types.h>
+#include <vdso/datapage.h>
+
+struct arch_vdso_data {
+	__s64 tod_steering_delta;
+	__u64 tod_steering_end;
+};
+
+#endif /* __S390_ASM_VDSO_DATA_H */
diff --git a/arch/s390/include/asm/vdso/gettimeofday.h b/arch/s390/include/asm/vdso/gettimeofday.h
new file mode 100644
index 0000000000..db84942eb7
--- /dev/null
+++ b/arch/s390/include/asm/vdso/gettimeofday.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ASM_VDSO_GETTIMEOFDAY_H
+#define ASM_VDSO_GETTIMEOFDAY_H
+
+#define VDSO_HAS_TIME 1
+
+#define VDSO_HAS_CLOCK_GETRES 1
+
+#include <asm/syscall.h>
+#include <asm/timex.h>
+#include <asm/unistd.h>
+#include <linux/compiler.h>
+
+#define vdso_calc_delta __arch_vdso_calc_delta
+static __always_inline u64 __arch_vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
+{
+	return (cycles - last) * mult;
+}
+
+static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
+{
+	return _vdso_data;
+}
+
+static inline u64 __arch_get_hw_counter(s32 clock_mode, const struct vdso_data *vd)
+{
+	u64 adj, now;
+
+	now = get_tod_clock();
+	adj = vd->arch_data.tod_steering_end - now;
+	if (unlikely((s64) adj > 0))
+		now += (vd->arch_data.tod_steering_delta < 0) ? (adj >> 15) : -(adj >> 15);
+	return now;
+}
+
+static __always_inline
+long clock_gettime_fallback(clockid_t clkid, struct __kernel_timespec *ts)
+{
+	return syscall2(__NR_clock_gettime, (long)clkid, (long)ts);
+}
+
+static __always_inline
+long gettimeofday_fallback(register struct __kernel_old_timeval *tv,
+			   register struct timezone *tz)
+{
+	return syscall2(__NR_gettimeofday, (long)tv, (long)tz);
+}
+
+static __always_inline
+long clock_getres_fallback(clockid_t clkid, struct __kernel_timespec *ts)
+{
+	return syscall2(__NR_clock_getres, (long)clkid, (long)ts);
+}
+
+#ifdef CONFIG_TIME_NS
+static __always_inline
+const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
+{
+	return _timens_data;
+}
+#endif
+
+#endif
diff --git a/arch/s390/include/asm/vdso/processor.h b/arch/s390/include/asm/vdso/processor.h
new file mode 100644
index 0000000000..cfcc3e117c
--- /dev/null
+++ b/arch/s390/include/asm/vdso/processor.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_VDSO_PROCESSOR_H
+#define __ASM_VDSO_PROCESSOR_H
+
+#define cpu_relax() barrier()
+
+#endif /* __ASM_VDSO_PROCESSOR_H */
diff --git a/arch/s390/include/asm/vdso/vsyscall.h b/arch/s390/include/asm/vdso/vsyscall.h
new file mode 100644
index 0000000000..6c67c08cef
--- /dev/null
+++ b/arch/s390/include/asm/vdso/vsyscall.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_VDSO_VSYSCALL_H
+#define __ASM_VDSO_VSYSCALL_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/hrtimer.h>
+#include <linux/timekeeper_internal.h>
+#include <vdso/datapage.h>
+#include <asm/vdso.h>
+/*
+ * Update the vDSO data page to keep in sync with kernel timekeeping.
+ */
+
+static __always_inline struct vdso_data *__s390_get_k_vdso_data(void)
+{
+	return vdso_data;
+}
+#define __arch_get_k_vdso_data __s390_get_k_vdso_data
+
+/* The asm-generic header needs to be included after the definitions above */
+#include <asm-generic/vdso/vsyscall.h>
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_VSYSCALL_H */
diff --git a/arch/s390/include/asm/vmalloc.h b/arch/s390/include/asm/vmalloc.h
new file mode 100644
index 0000000000..3ba3a6bdca
--- /dev/null
+++ b/arch/s390/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_S390_VMALLOC_H
+#define _ASM_S390_VMALLOC_H
+
+#endif /* _ASM_S390_VMALLOC_H */
diff --git a/arch/s390/include/asm/vmlinux.lds.h b/arch/s390/include/asm/vmlinux.lds.h
new file mode 100644
index 0000000000..cbe670a686
--- /dev/null
+++ b/arch/s390/include/asm/vmlinux.lds.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/page.h>
+
+/*
+ * .boot.data section is shared between the decompressor code and the
+ * decompressed kernel. The decompressor will store values in it, and copy
+ * over to the decompressed image before starting it.
+ *
+ * .boot.data variables are kept in separate .boot.data.<var name> sections,
+ * which are sorted by alignment first, then by name before being merged
+ * into single .boot.data section. This way big holes cased by page aligned
+ * structs are avoided and linker produces consistent result.
+ */
+#define BOOT_DATA							\
+	. = ALIGN(PAGE_SIZE);						\
+	.boot.data : {							\
+		__boot_data_start = .;					\
+		*(SORT_BY_ALIGNMENT(SORT_BY_NAME(.boot.data*)))		\
+		__boot_data_end = .;					\
+	}
+
+/*
+ * .boot.preserved.data is similar to .boot.data, but it is not part of the
+ * .init section and thus will be preserved for later use in the decompressed
+ * kernel.
+ */
+#define BOOT_DATA_PRESERVED						\
+	. = ALIGN(PAGE_SIZE);						\
+	.boot.preserved.data : {					\
+		__boot_data_preserved_start = .;			\
+		*(SORT_BY_ALIGNMENT(SORT_BY_NAME(.boot.preserved.data*))) \
+		__boot_data_preserved_end = .;				\
+	}
diff --git a/arch/s390/include/asm/vtime.h b/arch/s390/include/asm/vtime.h
new file mode 100644
index 0000000000..fe17e448c0
--- /dev/null
+++ b/arch/s390/include/asm/vtime.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _S390_VTIME_H
+#define _S390_VTIME_H
+
+#define __ARCH_HAS_VTIME_TASK_SWITCH
+
+static inline void update_timer_sys(void)
+{
+	S390_lowcore.system_timer += S390_lowcore.last_update_timer - S390_lowcore.exit_timer;
+	S390_lowcore.user_timer += S390_lowcore.exit_timer - S390_lowcore.sys_enter_timer;
+	S390_lowcore.last_update_timer = S390_lowcore.sys_enter_timer;
+}
+
+static inline void update_timer_mcck(void)
+{
+	S390_lowcore.system_timer += S390_lowcore.last_update_timer - S390_lowcore.exit_timer;
+	S390_lowcore.user_timer += S390_lowcore.exit_timer - S390_lowcore.mcck_enter_timer;
+	S390_lowcore.last_update_timer = S390_lowcore.mcck_enter_timer;
+}
+
+#endif /* _S390_VTIME_H */
diff --git a/arch/s390/include/asm/vtimer.h b/arch/s390/include/asm/vtimer.h
new file mode 100644
index 0000000000..e601adaa63
--- /dev/null
+++ b/arch/s390/include/asm/vtimer.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  Copyright IBM Corp. 2003, 2012
+ *  Virtual CPU timer
+ *
+ *  Author(s): Jan Glauber <jan.glauber@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_TIMER_H
+#define _ASM_S390_TIMER_H
+
+#define VTIMER_MAX_SLICE (0x7fffffffffffffffULL)
+
+struct vtimer_list {
+	struct list_head entry;
+	u64 expires;
+	u64 interval;
+	void (*function)(unsigned long);
+	unsigned long data;
+};
+
+extern void init_virt_timer(struct vtimer_list *timer);
+extern void add_virt_timer(struct vtimer_list *timer);
+extern void add_virt_timer_periodic(struct vtimer_list *timer);
+extern int mod_virt_timer(struct vtimer_list *timer, u64 expires);
+extern int mod_virt_timer_periodic(struct vtimer_list *timer, u64 expires);
+extern int del_virt_timer(struct vtimer_list *timer);
+extern void vtime_init(void);
+
+#endif /* _ASM_S390_TIMER_H */
diff --git a/arch/s390/include/asm/vx-insn-asm.h b/arch/s390/include/asm/vx-insn-asm.h
new file mode 100644
index 0000000000..360f8b36d9
--- /dev/null
+++ b/arch/s390/include/asm/vx-insn-asm.h
@@ -0,0 +1,681 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Support for Vector Instructions
+ *
+ * Assembler macros to generate .byte/.word code for particular
+ * vector instructions that are supported by recent binutils (>= 2.26) only.
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef __ASM_S390_VX_INSN_INTERNAL_H
+#define __ASM_S390_VX_INSN_INTERNAL_H
+
+#ifndef __ASM_S390_VX_INSN_H
+#error only <asm/vx-insn.h> can be included directly
+#endif
+
+#ifdef __ASSEMBLY__
+
+/* Macros to generate vector instruction byte code */
+
+/* GR_NUM - Retrieve general-purpose register number
+ *
+ * @opd:	Operand to store register number
+ * @r64:	String designation register in the format "%rN"
+ */
+.macro	GR_NUM	opd gr
+	\opd = 255
+	.ifc \gr,%r0
+		\opd = 0
+	.endif
+	.ifc \gr,%r1
+		\opd = 1
+	.endif
+	.ifc \gr,%r2
+		\opd = 2
+	.endif
+	.ifc \gr,%r3
+		\opd = 3
+	.endif
+	.ifc \gr,%r4
+		\opd = 4
+	.endif
+	.ifc \gr,%r5
+		\opd = 5
+	.endif
+	.ifc \gr,%r6
+		\opd = 6
+	.endif
+	.ifc \gr,%r7
+		\opd = 7
+	.endif
+	.ifc \gr,%r8
+		\opd = 8
+	.endif
+	.ifc \gr,%r9
+		\opd = 9
+	.endif
+	.ifc \gr,%r10
+		\opd = 10
+	.endif
+	.ifc \gr,%r11
+		\opd = 11
+	.endif
+	.ifc \gr,%r12
+		\opd = 12
+	.endif
+	.ifc \gr,%r13
+		\opd = 13
+	.endif
+	.ifc \gr,%r14
+		\opd = 14
+	.endif
+	.ifc \gr,%r15
+		\opd = 15
+	.endif
+	.if \opd == 255
+		\opd = \gr
+	.endif
+.endm
+
+/* VX_NUM - Retrieve vector register number
+ *
+ * @opd:	Operand to store register number
+ * @vxr:	String designation register in the format "%vN"
+ *
+ * The vector register number is used for as input number to the
+ * instruction and, as well as, to compute the RXB field of the
+ * instruction.
+ */
+.macro	VX_NUM	opd vxr
+	\opd = 255
+	.ifc \vxr,%v0
+		\opd = 0
+	.endif
+	.ifc \vxr,%v1
+		\opd = 1
+	.endif
+	.ifc \vxr,%v2
+		\opd = 2
+	.endif
+	.ifc \vxr,%v3
+		\opd = 3
+	.endif
+	.ifc \vxr,%v4
+		\opd = 4
+	.endif
+	.ifc \vxr,%v5
+		\opd = 5
+	.endif
+	.ifc \vxr,%v6
+		\opd = 6
+	.endif
+	.ifc \vxr,%v7
+		\opd = 7
+	.endif
+	.ifc \vxr,%v8
+		\opd = 8
+	.endif
+	.ifc \vxr,%v9
+		\opd = 9
+	.endif
+	.ifc \vxr,%v10
+		\opd = 10
+	.endif
+	.ifc \vxr,%v11
+		\opd = 11
+	.endif
+	.ifc \vxr,%v12
+		\opd = 12
+	.endif
+	.ifc \vxr,%v13
+		\opd = 13
+	.endif
+	.ifc \vxr,%v14
+		\opd = 14
+	.endif
+	.ifc \vxr,%v15
+		\opd = 15
+	.endif
+	.ifc \vxr,%v16
+		\opd = 16
+	.endif
+	.ifc \vxr,%v17
+		\opd = 17
+	.endif
+	.ifc \vxr,%v18
+		\opd = 18
+	.endif
+	.ifc \vxr,%v19
+		\opd = 19
+	.endif
+	.ifc \vxr,%v20
+		\opd = 20
+	.endif
+	.ifc \vxr,%v21
+		\opd = 21
+	.endif
+	.ifc \vxr,%v22
+		\opd = 22
+	.endif
+	.ifc \vxr,%v23
+		\opd = 23
+	.endif
+	.ifc \vxr,%v24
+		\opd = 24
+	.endif
+	.ifc \vxr,%v25
+		\opd = 25
+	.endif
+	.ifc \vxr,%v26
+		\opd = 26
+	.endif
+	.ifc \vxr,%v27
+		\opd = 27
+	.endif
+	.ifc \vxr,%v28
+		\opd = 28
+	.endif
+	.ifc \vxr,%v29
+		\opd = 29
+	.endif
+	.ifc \vxr,%v30
+		\opd = 30
+	.endif
+	.ifc \vxr,%v31
+		\opd = 31
+	.endif
+	.if \opd == 255
+		\opd = \vxr
+	.endif
+.endm
+
+/* RXB - Compute most significant bit used vector registers
+ *
+ * @rxb:	Operand to store computed RXB value
+ * @v1:		First vector register designated operand
+ * @v2:		Second vector register designated operand
+ * @v3:		Third vector register designated operand
+ * @v4:		Fourth vector register designated operand
+ */
+.macro	RXB	rxb v1 v2=0 v3=0 v4=0
+	\rxb = 0
+	.if \v1 & 0x10
+		\rxb = \rxb | 0x08
+	.endif
+	.if \v2 & 0x10
+		\rxb = \rxb | 0x04
+	.endif
+	.if \v3 & 0x10
+		\rxb = \rxb | 0x02
+	.endif
+	.if \v4 & 0x10
+		\rxb = \rxb | 0x01
+	.endif
+.endm
+
+/* MRXB - Generate Element Size Control and RXB value
+ *
+ * @m:		Element size control
+ * @v1:		First vector register designated operand (for RXB)
+ * @v2:		Second vector register designated operand (for RXB)
+ * @v3:		Third vector register designated operand (for RXB)
+ * @v4:		Fourth vector register designated operand (for RXB)
+ */
+.macro	MRXB	m v1 v2=0 v3=0 v4=0
+	rxb = 0
+	RXB	rxb, \v1, \v2, \v3, \v4
+	.byte	(\m << 4) | rxb
+.endm
+
+/* MRXBOPC - Generate Element Size Control, RXB, and final Opcode fields
+ *
+ * @m:		Element size control
+ * @opc:	Opcode
+ * @v1:		First vector register designated operand (for RXB)
+ * @v2:		Second vector register designated operand (for RXB)
+ * @v3:		Third vector register designated operand (for RXB)
+ * @v4:		Fourth vector register designated operand (for RXB)
+ */
+.macro	MRXBOPC	m opc v1 v2=0 v3=0 v4=0
+	MRXB	\m, \v1, \v2, \v3, \v4
+	.byte	\opc
+.endm
+
+/* Vector support instructions */
+
+/* VECTOR GENERATE BYTE MASK */
+.macro	VGBM	vr imm2
+	VX_NUM	v1, \vr
+	.word	(0xE700 | ((v1&15) << 4))
+	.word	\imm2
+	MRXBOPC	0, 0x44, v1
+.endm
+.macro	VZERO	vxr
+	VGBM	\vxr, 0
+.endm
+.macro	VONE	vxr
+	VGBM	\vxr, 0xFFFF
+.endm
+
+/* VECTOR LOAD VR ELEMENT FROM GR */
+.macro	VLVG	v, gr, disp, m
+	VX_NUM	v1, \v
+	GR_NUM	b2, "%r0"
+	GR_NUM	r3, \gr
+	.word	0xE700 | ((v1&15) << 4) | r3
+	.word	(b2 << 12) | (\disp)
+	MRXBOPC	\m, 0x22, v1
+.endm
+.macro	VLVGB	v, gr, index, base
+	VLVG	\v, \gr, \index, \base, 0
+.endm
+.macro	VLVGH	v, gr, index
+	VLVG	\v, \gr, \index, 1
+.endm
+.macro	VLVGF	v, gr, index
+	VLVG	\v, \gr, \index, 2
+.endm
+.macro	VLVGG	v, gr, index
+	VLVG	\v, \gr, \index, 3
+.endm
+
+/* VECTOR LOAD REGISTER */
+.macro	VLR	v1, v2
+	VX_NUM	v1, \v1
+	VX_NUM	v2, \v2
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	0
+	MRXBOPC	0, 0x56, v1, v2
+.endm
+
+/* VECTOR LOAD */
+.macro	VL	v, disp, index="%r0", base
+	VX_NUM	v1, \v
+	GR_NUM	x2, \index
+	GR_NUM	b2, \base
+	.word	0xE700 | ((v1&15) << 4) | x2
+	.word	(b2 << 12) | (\disp)
+	MRXBOPC 0, 0x06, v1
+.endm
+
+/* VECTOR LOAD ELEMENT */
+.macro	VLEx	vr1, disp, index="%r0", base, m3, opc
+	VX_NUM	v1, \vr1
+	GR_NUM	x2, \index
+	GR_NUM	b2, \base
+	.word	0xE700 | ((v1&15) << 4) | x2
+	.word	(b2 << 12) | (\disp)
+	MRXBOPC	\m3, \opc, v1
+.endm
+.macro	VLEB	vr1, disp, index="%r0", base, m3
+	VLEx	\vr1, \disp, \index, \base, \m3, 0x00
+.endm
+.macro	VLEH	vr1, disp, index="%r0", base, m3
+	VLEx	\vr1, \disp, \index, \base, \m3, 0x01
+.endm
+.macro	VLEF	vr1, disp, index="%r0", base, m3
+	VLEx	\vr1, \disp, \index, \base, \m3, 0x03
+.endm
+.macro	VLEG	vr1, disp, index="%r0", base, m3
+	VLEx	\vr1, \disp, \index, \base, \m3, 0x02
+.endm
+
+/* VECTOR LOAD ELEMENT IMMEDIATE */
+.macro	VLEIx	vr1, imm2, m3, opc
+	VX_NUM	v1, \vr1
+	.word	0xE700 | ((v1&15) << 4)
+	.word	\imm2
+	MRXBOPC	\m3, \opc, v1
+.endm
+.macro	VLEIB	vr1, imm2, index
+	VLEIx	\vr1, \imm2, \index, 0x40
+.endm
+.macro	VLEIH	vr1, imm2, index
+	VLEIx	\vr1, \imm2, \index, 0x41
+.endm
+.macro	VLEIF	vr1, imm2, index
+	VLEIx	\vr1, \imm2, \index, 0x43
+.endm
+.macro	VLEIG	vr1, imm2, index
+	VLEIx	\vr1, \imm2, \index, 0x42
+.endm
+
+/* VECTOR LOAD GR FROM VR ELEMENT */
+.macro	VLGV	gr, vr, disp, base="%r0", m
+	GR_NUM	r1, \gr
+	GR_NUM	b2, \base
+	VX_NUM	v3, \vr
+	.word	0xE700 | (r1 << 4) | (v3&15)
+	.word	(b2 << 12) | (\disp)
+	MRXBOPC	\m, 0x21, v3
+.endm
+.macro	VLGVB	gr, vr, disp, base="%r0"
+	VLGV	\gr, \vr, \disp, \base, 0
+.endm
+.macro	VLGVH	gr, vr, disp, base="%r0"
+	VLGV	\gr, \vr, \disp, \base, 1
+.endm
+.macro	VLGVF	gr, vr, disp, base="%r0"
+	VLGV	\gr, \vr, \disp, \base, 2
+.endm
+.macro	VLGVG	gr, vr, disp, base="%r0"
+	VLGV	\gr, \vr, \disp, \base, 3
+.endm
+
+/* VECTOR LOAD MULTIPLE */
+.macro	VLM	vfrom, vto, disp, base, hint=3
+	VX_NUM	v1, \vfrom
+	VX_NUM	v3, \vto
+	GR_NUM	b2, \base
+	.word	0xE700 | ((v1&15) << 4) | (v3&15)
+	.word	(b2 << 12) | (\disp)
+	MRXBOPC	\hint, 0x36, v1, v3
+.endm
+
+/* VECTOR STORE */
+.macro	VST	vr1, disp, index="%r0", base
+	VX_NUM	v1, \vr1
+	GR_NUM	x2, \index
+	GR_NUM	b2, \base
+	.word	0xE700 | ((v1&15) << 4) | (x2&15)
+	.word	(b2 << 12) | (\disp)
+	MRXBOPC	0, 0x0E, v1
+.endm
+
+/* VECTOR STORE MULTIPLE */
+.macro	VSTM	vfrom, vto, disp, base, hint=3
+	VX_NUM	v1, \vfrom
+	VX_NUM	v3, \vto
+	GR_NUM	b2, \base
+	.word	0xE700 | ((v1&15) << 4) | (v3&15)
+	.word	(b2 << 12) | (\disp)
+	MRXBOPC	\hint, 0x3E, v1, v3
+.endm
+
+/* VECTOR PERMUTE */
+.macro	VPERM	vr1, vr2, vr3, vr4
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	VX_NUM	v4, \vr4
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12)
+	MRXBOPC	(v4&15), 0x8C, v1, v2, v3, v4
+.endm
+
+/* VECTOR UNPACK LOGICAL LOW */
+.macro	VUPLL	vr1, vr2, m3
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	0x0000
+	MRXBOPC	\m3, 0xD4, v1, v2
+.endm
+.macro	VUPLLB	vr1, vr2
+	VUPLL	\vr1, \vr2, 0
+.endm
+.macro	VUPLLH	vr1, vr2
+	VUPLL	\vr1, \vr2, 1
+.endm
+.macro	VUPLLF	vr1, vr2
+	VUPLL	\vr1, \vr2, 2
+.endm
+
+/* VECTOR PERMUTE DOUBLEWORD IMMEDIATE */
+.macro	VPDI	vr1, vr2, vr3, m4
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12)
+	MRXBOPC	\m4, 0x84, v1, v2, v3
+.endm
+
+/* VECTOR REPLICATE */
+.macro	VREP	vr1, vr3, imm2, m4
+	VX_NUM	v1, \vr1
+	VX_NUM	v3, \vr3
+	.word	0xE700 | ((v1&15) << 4) | (v3&15)
+	.word	\imm2
+	MRXBOPC	\m4, 0x4D, v1, v3
+.endm
+.macro	VREPB	vr1, vr3, imm2
+	VREP	\vr1, \vr3, \imm2, 0
+.endm
+.macro	VREPH	vr1, vr3, imm2
+	VREP	\vr1, \vr3, \imm2, 1
+.endm
+.macro	VREPF	vr1, vr3, imm2
+	VREP	\vr1, \vr3, \imm2, 2
+.endm
+.macro	VREPG	vr1, vr3, imm2
+	VREP	\vr1, \vr3, \imm2, 3
+.endm
+
+/* VECTOR MERGE HIGH */
+.macro	VMRH	vr1, vr2, vr3, m4
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12)
+	MRXBOPC	\m4, 0x61, v1, v2, v3
+.endm
+.macro	VMRHB	vr1, vr2, vr3
+	VMRH	\vr1, \vr2, \vr3, 0
+.endm
+.macro	VMRHH	vr1, vr2, vr3
+	VMRH	\vr1, \vr2, \vr3, 1
+.endm
+.macro	VMRHF	vr1, vr2, vr3
+	VMRH	\vr1, \vr2, \vr3, 2
+.endm
+.macro	VMRHG	vr1, vr2, vr3
+	VMRH	\vr1, \vr2, \vr3, 3
+.endm
+
+/* VECTOR MERGE LOW */
+.macro	VMRL	vr1, vr2, vr3, m4
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12)
+	MRXBOPC	\m4, 0x60, v1, v2, v3
+.endm
+.macro	VMRLB	vr1, vr2, vr3
+	VMRL	\vr1, \vr2, \vr3, 0
+.endm
+.macro	VMRLH	vr1, vr2, vr3
+	VMRL	\vr1, \vr2, \vr3, 1
+.endm
+.macro	VMRLF	vr1, vr2, vr3
+	VMRL	\vr1, \vr2, \vr3, 2
+.endm
+.macro	VMRLG	vr1, vr2, vr3
+	VMRL	\vr1, \vr2, \vr3, 3
+.endm
+
+
+/* Vector integer instructions */
+
+/* VECTOR AND */
+.macro	VN	vr1, vr2, vr3
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12)
+	MRXBOPC	0, 0x68, v1, v2, v3
+.endm
+
+/* VECTOR EXCLUSIVE OR */
+.macro	VX	vr1, vr2, vr3
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12)
+	MRXBOPC	0, 0x6D, v1, v2, v3
+.endm
+
+/* VECTOR GALOIS FIELD MULTIPLY SUM */
+.macro	VGFM	vr1, vr2, vr3, m4
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12)
+	MRXBOPC	\m4, 0xB4, v1, v2, v3
+.endm
+.macro	VGFMB	vr1, vr2, vr3
+	VGFM	\vr1, \vr2, \vr3, 0
+.endm
+.macro	VGFMH	vr1, vr2, vr3
+	VGFM	\vr1, \vr2, \vr3, 1
+.endm
+.macro	VGFMF	vr1, vr2, vr3
+	VGFM	\vr1, \vr2, \vr3, 2
+.endm
+.macro	VGFMG	vr1, vr2, vr3
+	VGFM	\vr1, \vr2, \vr3, 3
+.endm
+
+/* VECTOR GALOIS FIELD MULTIPLY SUM AND ACCUMULATE */
+.macro	VGFMA	vr1, vr2, vr3, vr4, m5
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	VX_NUM	v4, \vr4
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12) | (\m5 << 8)
+	MRXBOPC	(v4&15), 0xBC, v1, v2, v3, v4
+.endm
+.macro	VGFMAB	vr1, vr2, vr3, vr4
+	VGFMA	\vr1, \vr2, \vr3, \vr4, 0
+.endm
+.macro	VGFMAH	vr1, vr2, vr3, vr4
+	VGFMA	\vr1, \vr2, \vr3, \vr4, 1
+.endm
+.macro	VGFMAF	vr1, vr2, vr3, vr4
+	VGFMA	\vr1, \vr2, \vr3, \vr4, 2
+.endm
+.macro	VGFMAG	vr1, vr2, vr3, vr4
+	VGFMA	\vr1, \vr2, \vr3, \vr4, 3
+.endm
+
+/* VECTOR SHIFT RIGHT LOGICAL BY BYTE */
+.macro	VSRLB	vr1, vr2, vr3
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12)
+	MRXBOPC	0, 0x7D, v1, v2, v3
+.endm
+
+/* VECTOR REPLICATE IMMEDIATE */
+.macro	VREPI	vr1, imm2, m3
+	VX_NUM	v1, \vr1
+	.word	0xE700 | ((v1&15) << 4)
+	.word	\imm2
+	MRXBOPC	\m3, 0x45, v1
+.endm
+.macro	VREPIB	vr1, imm2
+	VREPI	\vr1, \imm2, 0
+.endm
+.macro	VREPIH	vr1, imm2
+	VREPI	\vr1, \imm2, 1
+.endm
+.macro	VREPIF	vr1, imm2
+	VREPI	\vr1, \imm2, 2
+.endm
+.macro	VREPIG	vr1, imm2
+	VREP	\vr1, \imm2, 3
+.endm
+
+/* VECTOR ADD */
+.macro	VA	vr1, vr2, vr3, m4
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12)
+	MRXBOPC	\m4, 0xF3, v1, v2, v3
+.endm
+.macro	VAB	vr1, vr2, vr3
+	VA	\vr1, \vr2, \vr3, 0
+.endm
+.macro	VAH	vr1, vr2, vr3
+	VA	\vr1, \vr2, \vr3, 1
+.endm
+.macro	VAF	vr1, vr2, vr3
+	VA	\vr1, \vr2, \vr3, 2
+.endm
+.macro	VAG	vr1, vr2, vr3
+	VA	\vr1, \vr2, \vr3, 3
+.endm
+.macro	VAQ	vr1, vr2, vr3
+	VA	\vr1, \vr2, \vr3, 4
+.endm
+
+/* VECTOR ELEMENT SHIFT RIGHT ARITHMETIC */
+.macro	VESRAV	vr1, vr2, vr3, m4
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12)
+	MRXBOPC \m4, 0x7A, v1, v2, v3
+.endm
+
+.macro	VESRAVB	vr1, vr2, vr3
+	VESRAV	\vr1, \vr2, \vr3, 0
+.endm
+.macro	VESRAVH	vr1, vr2, vr3
+	VESRAV	\vr1, \vr2, \vr3, 1
+.endm
+.macro	VESRAVF	vr1, vr2, vr3
+	VESRAV	\vr1, \vr2, \vr3, 2
+.endm
+.macro	VESRAVG	vr1, vr2, vr3
+	VESRAV	\vr1, \vr2, \vr3, 3
+.endm
+
+/* VECTOR ELEMENT ROTATE LEFT LOGICAL */
+.macro	VERLL	vr1, vr3, disp, base="%r0", m4
+	VX_NUM	v1, \vr1
+	VX_NUM	v3, \vr3
+	GR_NUM	b2, \base
+	.word	0xE700 | ((v1&15) << 4) | (v3&15)
+	.word	(b2 << 12) | (\disp)
+	MRXBOPC	\m4, 0x33, v1, v3
+.endm
+.macro	VERLLB	vr1, vr3, disp, base="%r0"
+	VERLL	\vr1, \vr3, \disp, \base, 0
+.endm
+.macro	VERLLH	vr1, vr3, disp, base="%r0"
+	VERLL	\vr1, \vr3, \disp, \base, 1
+.endm
+.macro	VERLLF	vr1, vr3, disp, base="%r0"
+	VERLL	\vr1, \vr3, \disp, \base, 2
+.endm
+.macro	VERLLG	vr1, vr3, disp, base="%r0"
+	VERLL	\vr1, \vr3, \disp, \base, 3
+.endm
+
+/* VECTOR SHIFT LEFT DOUBLE BY BYTE */
+.macro	VSLDB	vr1, vr2, vr3, imm4
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12) | (\imm4)
+	MRXBOPC	0, 0x77, v1, v2, v3
+.endm
+
+#endif	/* __ASSEMBLY__ */
+#endif	/* __ASM_S390_VX_INSN_INTERNAL_H */
diff --git a/arch/s390/include/asm/vx-insn.h b/arch/s390/include/asm/vx-insn.h
new file mode 100644
index 0000000000..8c188f1c6d
--- /dev/null
+++ b/arch/s390/include/asm/vx-insn.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Support for Vector Instructions
+ *
+ * This wrapper header file allows to use the vector instruction macros in
+ * both assembler files as well as in inline assemblies in C files.
+ */
+
+#ifndef __ASM_S390_VX_INSN_H
+#define __ASM_S390_VX_INSN_H
+
+#include <asm/vx-insn-asm.h>
+
+#ifndef __ASSEMBLY__
+
+asm(".include \"asm/vx-insn-asm.h\"\n");
+
+#endif /* __ASSEMBLY__ */
+#endif	/* __ASM_S390_VX_INSN_H */
diff --git a/arch/s390/include/asm/xor.h b/arch/s390/include/asm/xor.h
new file mode 100644
index 0000000000..857d6759b6
--- /dev/null
+++ b/arch/s390/include/asm/xor.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Optimited xor routines
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+#ifndef _ASM_S390_XOR_H
+#define _ASM_S390_XOR_H
+
+extern struct xor_block_template xor_block_xc;
+
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES				\
+do {							\
+	xor_speed(&xor_block_xc);			\
+} while (0)
+
+#define XOR_SELECT_TEMPLATE(FASTEST)	(&xor_block_xc)
+
+#endif /* _ASM_S390_XOR_H */
diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild
new file mode 100644
index 0000000000..46c1ff0b84
--- /dev/null
+++ b/arch/s390/include/uapi/asm/Kbuild
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+
+generated-y += unistd_32.h
+generated-y += unistd_64.h
diff --git a/arch/s390/include/uapi/asm/auxvec.h b/arch/s390/include/uapi/asm/auxvec.h
new file mode 100644
index 0000000000..a056c4637f
--- /dev/null
+++ b/arch/s390/include/uapi/asm/auxvec.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ASMS390_AUXVEC_H
+#define __ASMS390_AUXVEC_H
+
+#define AT_SYSINFO_EHDR		33
+
+#define AT_VECTOR_SIZE_ARCH 1 /* entries in ARCH_DLINFO */
+
+#endif
diff --git a/arch/s390/include/uapi/asm/bitsperlong.h b/arch/s390/include/uapi/asm/bitsperlong.h
new file mode 100644
index 0000000000..cceaf47b02
--- /dev/null
+++ b/arch/s390/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ASM_S390_BITSPERLONG_H
+#define __ASM_S390_BITSPERLONG_H
+
+#ifndef __s390x__
+#define __BITS_PER_LONG 32
+#else
+#define __BITS_PER_LONG 64
+#endif
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_S390_BITSPERLONG_H */
+
diff --git a/arch/s390/include/uapi/asm/bpf_perf_event.h b/arch/s390/include/uapi/asm/bpf_perf_event.h
new file mode 100644
index 0000000000..3ed42ff6da
--- /dev/null
+++ b/arch/s390/include/uapi/asm/bpf_perf_event.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
+#define _UAPI__ASM_BPF_PERF_EVENT_H__
+
+#include <asm/ptrace.h>
+
+typedef user_pt_regs bpf_user_pt_regs_t;
+
+#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */
diff --git a/arch/s390/include/uapi/asm/byteorder.h b/arch/s390/include/uapi/asm/byteorder.h
new file mode 100644
index 0000000000..1442b57dd9
--- /dev/null
+++ b/arch/s390/include/uapi/asm/byteorder.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _S390_BYTEORDER_H
+#define _S390_BYTEORDER_H
+
+#include <linux/byteorder/big_endian.h>
+
+#endif /* _S390_BYTEORDER_H */
diff --git a/arch/s390/include/uapi/asm/chpid.h b/arch/s390/include/uapi/asm/chpid.h
new file mode 100644
index 0000000000..2ae2ed8c09
--- /dev/null
+++ b/arch/s390/include/uapi/asm/chpid.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *    Copyright IBM Corp. 2007, 2012
+ *    Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
+ */
+
+#ifndef _UAPI_ASM_S390_CHPID_H
+#define _UAPI_ASM_S390_CHPID_H
+
+#include <linux/string.h>
+#include <linux/types.h>
+
+#define __MAX_CHPID 255
+
+struct chp_id {
+	__u8 reserved1;
+	__u8 cssid;
+	__u8 reserved2;
+	__u8 id;
+} __attribute__((packed));
+
+
+#endif /* _UAPI_ASM_S390_CHPID_H */
diff --git a/arch/s390/include/uapi/asm/chsc.h b/arch/s390/include/uapi/asm/chsc.h
new file mode 100644
index 0000000000..83a574e95b
--- /dev/null
+++ b/arch/s390/include/uapi/asm/chsc.h
@@ -0,0 +1,144 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * ioctl interface for /dev/chsc
+ *
+ * Copyright IBM Corp. 2008, 2012
+ * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ */
+
+#ifndef _ASM_CHSC_H
+#define _ASM_CHSC_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+#include <asm/chpid.h>
+#include <asm/schid.h>
+
+#define CHSC_SIZE 0x1000
+
+struct chsc_async_header {
+	__u16 length;
+	__u16 code;
+	__u32 cmd_dependend;
+	__u32 key : 4;
+	__u32 : 28;
+	struct subchannel_id sid;
+};
+
+struct chsc_async_area {
+	struct chsc_async_header header;
+	__u8 data[CHSC_SIZE - sizeof(struct chsc_async_header)];
+};
+
+struct chsc_header {
+	__u16 length;
+	__u16 code;
+};
+
+struct chsc_sync_area {
+	struct chsc_header header;
+	__u8 data[CHSC_SIZE - sizeof(struct chsc_header)];
+};
+
+struct chsc_response_struct {
+	__u16 length;
+	__u16 code;
+	__u32 parms;
+	__u8 data[CHSC_SIZE - 2 * sizeof(__u16) - sizeof(__u32)];
+};
+
+struct chsc_chp_cd {
+	struct chp_id chpid;
+	int m;
+	int fmt;
+	struct chsc_response_struct cpcb;
+};
+
+struct chsc_cu_cd {
+	__u16 cun;
+	__u8 cssid;
+	int m;
+	int fmt;
+	struct chsc_response_struct cucb;
+};
+
+struct chsc_sch_cud {
+	struct subchannel_id schid;
+	int fmt;
+	struct chsc_response_struct scub;
+};
+
+struct conf_id {
+	int m;
+	__u8 cssid;
+	__u8 ssid;
+};
+
+struct chsc_conf_info {
+	struct conf_id id;
+	int fmt;
+	struct chsc_response_struct scid;
+};
+
+struct ccl_parm_chpid {
+	int m;
+	struct chp_id chp;
+};
+
+struct ccl_parm_cssids {
+	__u8 f_cssid;
+	__u8 l_cssid;
+};
+
+struct chsc_comp_list {
+	struct {
+		enum {
+			CCL_CU_ON_CHP = 1,
+			CCL_CHP_TYPE_CAP = 2,
+			CCL_CSS_IMG = 4,
+			CCL_CSS_IMG_CONF_CHAR = 5,
+			CCL_IOP_CHP = 6,
+		} ctype;
+		int fmt;
+		struct ccl_parm_chpid chpid;
+		struct ccl_parm_cssids cssids;
+	} req;
+	struct chsc_response_struct sccl;
+};
+
+struct chsc_dcal {
+	struct {
+		enum {
+			DCAL_CSS_IID_PN = 4,
+		} atype;
+		__u32 list_parm[2];
+		int fmt;
+	} req;
+	struct chsc_response_struct sdcal;
+};
+
+struct chsc_cpd_info {
+	struct chp_id chpid;
+	int m;
+	int fmt;
+	int rfmt;
+	int c;
+	struct chsc_response_struct chpdb;
+};
+
+#define CHSC_IOCTL_MAGIC 'c'
+
+#define CHSC_START _IOWR(CHSC_IOCTL_MAGIC, 0x81, struct chsc_async_area)
+#define CHSC_INFO_CHANNEL_PATH _IOWR(CHSC_IOCTL_MAGIC, 0x82, \
+				    struct chsc_chp_cd)
+#define CHSC_INFO_CU _IOWR(CHSC_IOCTL_MAGIC, 0x83, struct chsc_cu_cd)
+#define CHSC_INFO_SCH_CU _IOWR(CHSC_IOCTL_MAGIC, 0x84, struct chsc_sch_cud)
+#define CHSC_INFO_CI _IOWR(CHSC_IOCTL_MAGIC, 0x85, struct chsc_conf_info)
+#define CHSC_INFO_CCL _IOWR(CHSC_IOCTL_MAGIC, 0x86, struct chsc_comp_list)
+#define CHSC_INFO_CPD _IOWR(CHSC_IOCTL_MAGIC, 0x87, struct chsc_cpd_info)
+#define CHSC_INFO_DCAL _IOWR(CHSC_IOCTL_MAGIC, 0x88, struct chsc_dcal)
+#define CHSC_START_SYNC _IOWR(CHSC_IOCTL_MAGIC, 0x89, struct chsc_sync_area)
+#define CHSC_ON_CLOSE_SET _IOWR(CHSC_IOCTL_MAGIC, 0x8a, struct chsc_async_area)
+#define CHSC_ON_CLOSE_REMOVE _IO(CHSC_IOCTL_MAGIC, 0x8b)
+
+#endif
diff --git a/arch/s390/include/uapi/asm/clp.h b/arch/s390/include/uapi/asm/clp.h
new file mode 100644
index 0000000000..b36d9e9cdd
--- /dev/null
+++ b/arch/s390/include/uapi/asm/clp.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * ioctl interface for /dev/clp
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef _ASM_CLP_H
+#define _ASM_CLP_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+struct clp_req {
+	unsigned int c : 1;
+	unsigned int r : 1;
+	unsigned int lps : 6;
+	unsigned int cmd : 8;
+	unsigned int : 16;
+	unsigned int reserved;
+	__u64 data_p;
+};
+
+#define CLP_IOCTL_MAGIC 'c'
+
+#define CLP_SYNC _IOWR(CLP_IOCTL_MAGIC, 0xC1, struct clp_req)
+
+#endif
diff --git a/arch/s390/include/uapi/asm/cmb.h b/arch/s390/include/uapi/asm/cmb.h
new file mode 100644
index 0000000000..115434ab98
--- /dev/null
+++ b/arch/s390/include/uapi/asm/cmb.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPIS390_CMB_H
+#define _UAPIS390_CMB_H
+
+#include <linux/types.h>
+
+/**
+ * struct cmbdata - channel measurement block data for user space
+ * @size: size of the stored data
+ * @elapsed_time: time since last sampling
+ * @ssch_rsch_count: number of ssch and rsch
+ * @sample_count: number of samples
+ * @device_connect_time: time of device connect
+ * @function_pending_time: time of function pending
+ * @device_disconnect_time: time of device disconnect
+ * @control_unit_queuing_time: time of control unit queuing
+ * @device_active_only_time: time of device active only
+ * @device_busy_time: time of device busy (ext. format)
+ * @initial_command_response_time: initial command response time (ext. format)
+ *
+ * All values are stored as 64 bit for simplicity, especially
+ * in 32 bit emulation mode. All time values are normalized to
+ * nanoseconds.
+ * Currently, two formats are known, which differ by the size of
+ * this structure, i.e. the last two members are only set when
+ * the extended channel measurement facility (first shipped in
+ * z990 machines) is activated.
+ * Potentially, more fields could be added, which would result in a
+ * new ioctl number.
+ */
+struct cmbdata {
+	__u64 size;
+	__u64 elapsed_time;
+ /* basic and extended format: */
+	__u64 ssch_rsch_count;
+	__u64 sample_count;
+	__u64 device_connect_time;
+	__u64 function_pending_time;
+	__u64 device_disconnect_time;
+	__u64 control_unit_queuing_time;
+	__u64 device_active_only_time;
+ /* extended format only: */
+	__u64 device_busy_time;
+	__u64 initial_command_response_time;
+};
+
+/* enable channel measurement */
+#define BIODASDCMFENABLE	_IO(DASD_IOCTL_LETTER, 32)
+/* enable channel measurement */
+#define BIODASDCMFDISABLE	_IO(DASD_IOCTL_LETTER, 33)
+/* read channel measurement data */
+#define BIODASDREADALLCMB	_IOWR(DASD_IOCTL_LETTER, 33, struct cmbdata)
+
+#endif /* _UAPIS390_CMB_H */
diff --git a/arch/s390/include/uapi/asm/dasd.h b/arch/s390/include/uapi/asm/dasd.h
new file mode 100644
index 0000000000..b11d988004
--- /dev/null
+++ b/arch/s390/include/uapi/asm/dasd.h
@@ -0,0 +1,354 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Author(s)......: Holger Smolinski <Holger.Smolinski@de.ibm.com>
+ * Bugreports.to..: <Linux390@de.ibm.com>
+ * Copyright IBM Corp. 1999, 2000
+ * EMC Symmetrix ioctl Copyright EMC Corporation, 2008
+ * Author.........: Nigel Hislop <hislop_nigel@emc.com>
+ *
+ * This file is the interface of the DASD device driver, which is exported to user space
+ * any future changes wrt the API will result in a change of the APIVERSION reported
+ * to userspace by the DASDAPIVER-ioctl
+ *
+ */
+
+#ifndef DASD_H
+#define DASD_H
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define DASD_IOCTL_LETTER 'D'
+
+#define DASD_API_VERSION 6
+
+/*
+ * struct dasd_information2_t
+ * represents any data about the device, which is visible to userspace.
+ *  including format and featueres.
+ */
+typedef struct dasd_information2_t {
+	unsigned int devno;	    /* S/390 devno */
+	unsigned int real_devno;    /* for aliases */
+	unsigned int schid;	    /* S/390 subchannel identifier */
+	unsigned int cu_type  : 16; /* from SenseID */
+	unsigned int cu_model :  8; /* from SenseID */
+	unsigned int dev_type : 16; /* from SenseID */
+	unsigned int dev_model : 8; /* from SenseID */
+	unsigned int open_count;
+	unsigned int req_queue_len;
+	unsigned int chanq_len;     /* length of chanq */
+	char type[4];		    /* from discipline.name, 'none' for unknown */
+	unsigned int status;	    /* current device level */
+	unsigned int label_block;   /* where to find the VOLSER */
+	unsigned int FBA_layout;    /* fixed block size (like AIXVOL) */
+	unsigned int characteristics_size;
+	unsigned int confdata_size;
+	char characteristics[64];   /* from read_device_characteristics */
+	char configuration_data[256]; /* from read_configuration_data */
+	unsigned int format;	      /* format info like formatted/cdl/ldl/... */
+	unsigned int features;	      /* dasd features like 'ro',...		*/
+	unsigned int reserved0;       /* reserved for further use ,...		*/
+	unsigned int reserved1;       /* reserved for further use ,...		*/
+	unsigned int reserved2;       /* reserved for further use ,...		*/
+	unsigned int reserved3;       /* reserved for further use ,...		*/
+	unsigned int reserved4;       /* reserved for further use ,...		*/
+	unsigned int reserved5;       /* reserved for further use ,...		*/
+	unsigned int reserved6;       /* reserved for further use ,...		*/
+	unsigned int reserved7;       /* reserved for further use ,...		*/
+} dasd_information2_t;
+
+/*
+ * values to be used for dasd_information_t.format
+ * 0x00: NOT formatted
+ * 0x01: Linux disc layout
+ * 0x02: Common disc layout
+ */
+#define DASD_FORMAT_NONE 0
+#define DASD_FORMAT_LDL  1
+#define DASD_FORMAT_CDL  2
+/*
+ * values to be used for dasd_information_t.features
+ * 0x100: default features
+ * 0x001: readonly (ro)
+ * 0x002: use diag discipline (diag)
+ * 0x004: set the device initially online (internal use only)
+ * 0x008: enable ERP related logging
+ * 0x010: allow I/O to fail on lost paths
+ * 0x020: allow I/O to fail when a lock was stolen
+ * 0x040: give access to raw eckd data
+ * 0x080: enable discard support
+ * 0x100: enable autodisable for IFCC errors (default)
+ * 0x200: enable requeue of all requests on autoquiesce
+ */
+#define DASD_FEATURE_READONLY	      0x001
+#define DASD_FEATURE_USEDIAG	      0x002
+#define DASD_FEATURE_INITIAL_ONLINE   0x004
+#define DASD_FEATURE_ERPLOG	      0x008
+#define DASD_FEATURE_FAILFAST	      0x010
+#define DASD_FEATURE_FAILONSLCK       0x020
+#define DASD_FEATURE_USERAW	      0x040
+#define DASD_FEATURE_DISCARD	      0x080
+#define DASD_FEATURE_PATH_AUTODISABLE 0x100
+#define DASD_FEATURE_REQUEUEQUIESCE   0x200
+#define DASD_FEATURE_DEFAULT	      DASD_FEATURE_PATH_AUTODISABLE
+
+#define DASD_PARTN_BITS 2
+
+/*
+ * struct dasd_information_t
+ * represents any data about the data, which is visible to userspace
+ */
+typedef struct dasd_information_t {
+	unsigned int devno;	    /* S/390 devno */
+	unsigned int real_devno;    /* for aliases */
+	unsigned int schid;	    /* S/390 subchannel identifier */
+	unsigned int cu_type  : 16; /* from SenseID */
+	unsigned int cu_model :  8; /* from SenseID */
+	unsigned int dev_type : 16; /* from SenseID */
+	unsigned int dev_model : 8; /* from SenseID */
+	unsigned int open_count;
+	unsigned int req_queue_len;
+	unsigned int chanq_len;     /* length of chanq */
+	char type[4];		    /* from discipline.name, 'none' for unknown */
+	unsigned int status;	    /* current device level */
+	unsigned int label_block;   /* where to find the VOLSER */
+	unsigned int FBA_layout;    /* fixed block size (like AIXVOL) */
+	unsigned int characteristics_size;
+	unsigned int confdata_size;
+	char characteristics[64];   /* from read_device_characteristics */
+	char configuration_data[256]; /* from read_configuration_data */
+} dasd_information_t;
+
+/*
+ * Read Subsystem Data - Performance Statistics
+ */
+typedef struct dasd_rssd_perf_stats_t {
+	unsigned char  invalid:1;
+	unsigned char  format:3;
+	unsigned char  data_format:4;
+	unsigned char  unit_address;
+	unsigned short device_status;
+	unsigned int   nr_read_normal;
+	unsigned int   nr_read_normal_hits;
+	unsigned int   nr_write_normal;
+	unsigned int   nr_write_fast_normal_hits;
+	unsigned int   nr_read_seq;
+	unsigned int   nr_read_seq_hits;
+	unsigned int   nr_write_seq;
+	unsigned int   nr_write_fast_seq_hits;
+	unsigned int   nr_read_cache;
+	unsigned int   nr_read_cache_hits;
+	unsigned int   nr_write_cache;
+	unsigned int   nr_write_fast_cache_hits;
+	unsigned int   nr_inhibit_cache;
+	unsigned int   nr_bybass_cache;
+	unsigned int   nr_seq_dasd_to_cache;
+	unsigned int   nr_dasd_to_cache;
+	unsigned int   nr_cache_to_dasd;
+	unsigned int   nr_delayed_fast_write;
+	unsigned int   nr_normal_fast_write;
+	unsigned int   nr_seq_fast_write;
+	unsigned int   nr_cache_miss;
+	unsigned char  status2;
+	unsigned int   nr_quick_write_promotes;
+	unsigned char  reserved;
+	unsigned short ssid;
+	unsigned char  reseved2[96];
+} __attribute__((packed)) dasd_rssd_perf_stats_t;
+
+/*
+ * struct profile_info_t
+ * holds the profinling information
+ */
+typedef struct dasd_profile_info_t {
+	unsigned int dasd_io_reqs;	 /* number of requests processed at all */
+	unsigned int dasd_io_sects;	 /* number of sectors processed at all */
+	unsigned int dasd_io_secs[32];	 /* histogram of request's sizes */
+	unsigned int dasd_io_times[32];	 /* histogram of requests's times */
+	unsigned int dasd_io_timps[32];	 /* histogram of requests's times per sector */
+	unsigned int dasd_io_time1[32];	 /* histogram of time from build to start */
+	unsigned int dasd_io_time2[32];	 /* histogram of time from start to irq */
+	unsigned int dasd_io_time2ps[32]; /* histogram of time from start to irq */
+	unsigned int dasd_io_time3[32];	 /* histogram of time from irq to end */
+	unsigned int dasd_io_nr_req[32]; /* histogram of # of requests in chanq */
+} dasd_profile_info_t;
+
+/*
+ * struct format_data_t
+ * represents all data necessary to format a dasd
+ */
+typedef struct format_data_t {
+	unsigned int start_unit; /* from track */
+	unsigned int stop_unit;  /* to track */
+	unsigned int blksize;	 /* sectorsize */
+	unsigned int intensity;
+} format_data_t;
+
+/*
+ * struct dasd_copypair_swap_data_t
+ * represents all data necessary to issue a swap of the copy pair relation
+ */
+struct dasd_copypair_swap_data_t {
+	char primary[20]; /* BUSID of primary */
+	char secondary[20]; /* BUSID of secondary */
+
+	/* Reserved for future updates. */
+	__u8 reserved[64];
+};
+
+/*
+ * values to be used for format_data_t.intensity
+ * 0/8: normal format
+ * 1/9: also write record zero
+ * 3/11: also write home address
+ * 4/12: invalidate track
+ */
+#define DASD_FMT_INT_FMT_R0	1	/* write record zero */
+#define DASD_FMT_INT_FMT_HA	2	/* write home address, also set FMT_R0 ! */
+#define DASD_FMT_INT_INVAL	4	/* invalidate tracks */
+#define DASD_FMT_INT_COMPAT	8	/* use OS/390 compatible disk layout */
+#define DASD_FMT_INT_FMT_NOR0	16	/* remove permission to write record zero */
+#define DASD_FMT_INT_ESE_FULL	32	/* release space for entire volume */
+
+/*
+ * struct format_check_t
+ * represents all data necessary to evaluate the format of
+ * different tracks of a dasd
+ */
+typedef struct format_check_t {
+	/* Input */
+	struct format_data_t expect;
+
+	/* Output */
+	unsigned int result;		/* Error indication (DASD_FMT_ERR_*) */
+	unsigned int unit;		/* Track that is in error */
+	unsigned int rec;		/* Record that is in error */
+	unsigned int num_records;	/* Records in the track in error */
+	unsigned int blksize;		/* Blocksize of first record in error */
+	unsigned int key_length;	/* Key length of first record in error */
+} format_check_t;
+
+/* Values returned in format_check_t when a format error is detected: */
+/* Too few records were found on a single track */
+#define DASD_FMT_ERR_TOO_FEW_RECORDS	1
+/* Too many records were found on a single track */
+#define DASD_FMT_ERR_TOO_MANY_RECORDS	2
+/* Blocksize/data-length of a record was wrong */
+#define DASD_FMT_ERR_BLKSIZE		3
+/* A record ID is defined by cylinder, head, and record number (CHR). */
+/* On mismatch, this error is set */
+#define DASD_FMT_ERR_RECORD_ID		4
+/* If key-length was != 0 */
+#define DASD_FMT_ERR_KEY_LENGTH		5
+
+/*
+ * struct attrib_data_t
+ * represents the operation (cache) bits for the device.
+ * Used in DE to influence caching of the DASD.
+ */
+typedef struct attrib_data_t {
+	unsigned char operation:3;     /* cache operation mode */
+	unsigned char reserved:5;      /* cache operation mode */
+	__u16         nr_cyl;          /* no of cyliners for read ahaed */
+	__u8          reserved2[29];   /* for future use */
+} __attribute__ ((packed)) attrib_data_t;
+
+/* definition of operation (cache) bits within attributes of DE */
+#define DASD_NORMAL_CACHE  0x0
+#define DASD_BYPASS_CACHE  0x1
+#define DASD_INHIBIT_LOAD  0x2
+#define DASD_SEQ_ACCESS    0x3
+#define DASD_SEQ_PRESTAGE  0x4
+#define DASD_REC_ACCESS    0x5
+
+/*
+ * Perform EMC Symmetrix I/O
+ */
+typedef struct dasd_symmio_parms {
+	unsigned char reserved[8];	/* compat with older releases */
+	unsigned long long psf_data;	/* char * cast to u64 */
+	unsigned long long rssd_result; /* char * cast to u64 */
+	int psf_data_len;
+	int rssd_result_len;
+} __attribute__ ((packed)) dasd_symmio_parms_t;
+
+/*
+ * Data returned by Sense Path Group ID (SNID)
+ */
+struct dasd_snid_data {
+	struct {
+		__u8 group:2;
+		__u8 reserve:2;
+		__u8 mode:1;
+		__u8 res:3;
+	} __attribute__ ((packed)) path_state;
+	__u8 pgid[11];
+} __attribute__ ((packed));
+
+struct dasd_snid_ioctl_data {
+	struct dasd_snid_data data;
+	__u8 path_mask;
+} __attribute__ ((packed));
+
+
+/********************************************************************************
+ * SECTION: Definition of IOCTLs
+ *
+ * Here ist how the ioctl-nr should be used:
+ *    0 -   31   DASD driver itself
+ *   32 -  239   still open
+ *  240 -  255	 reserved for EMC
+ *******************************************************************************/
+
+/* Disable the volume (for Linux) */
+#define BIODASDDISABLE _IO(DASD_IOCTL_LETTER,0)
+/* Enable the volume (for Linux) */
+#define BIODASDENABLE  _IO(DASD_IOCTL_LETTER,1)
+/* Issue a reserve/release command, rsp. */
+#define BIODASDRSRV    _IO(DASD_IOCTL_LETTER,2) /* reserve */
+#define BIODASDRLSE    _IO(DASD_IOCTL_LETTER,3) /* release */
+#define BIODASDSLCK    _IO(DASD_IOCTL_LETTER,4) /* steal lock */
+/* reset profiling information of a device */
+#define BIODASDPRRST   _IO(DASD_IOCTL_LETTER,5)
+/* Quiesce IO on device */
+#define BIODASDQUIESCE _IO(DASD_IOCTL_LETTER,6)
+/* Resume IO on device */
+#define BIODASDRESUME  _IO(DASD_IOCTL_LETTER,7)
+/* Abort all I/O on a device */
+#define BIODASDABORTIO _IO(DASD_IOCTL_LETTER, 240)
+/* Allow I/O on a device */
+#define BIODASDALLOWIO _IO(DASD_IOCTL_LETTER, 241)
+
+
+/* retrieve API version number */
+#define DASDAPIVER     _IOR(DASD_IOCTL_LETTER,0,int)
+/* Get information on a dasd device */
+#define BIODASDINFO    _IOR(DASD_IOCTL_LETTER,1,dasd_information_t)
+/* retrieve profiling information of a device */
+#define BIODASDPRRD    _IOR(DASD_IOCTL_LETTER,2,dasd_profile_info_t)
+/* Get information on a dasd device (enhanced) */
+#define BIODASDINFO2   _IOR(DASD_IOCTL_LETTER,3,dasd_information2_t)
+/* Performance Statistics Read */
+#define BIODASDPSRD    _IOR(DASD_IOCTL_LETTER,4,dasd_rssd_perf_stats_t)
+/* Get Attributes (cache operations) */
+#define BIODASDGATTR   _IOR(DASD_IOCTL_LETTER,5,attrib_data_t)
+
+
+/* #define BIODASDFORMAT  _IOW(IOCTL_LETTER,0,format_data_t) , deprecated */
+#define BIODASDFMT     _IOW(DASD_IOCTL_LETTER,1,format_data_t)
+/* Set Attributes (cache operations) */
+#define BIODASDSATTR   _IOW(DASD_IOCTL_LETTER,2,attrib_data_t)
+/* Release Allocated Space */
+#define BIODASDRAS     _IOW(DASD_IOCTL_LETTER, 3, format_data_t)
+/* Swap copy pair relation */
+#define BIODASDCOPYPAIRSWAP _IOW(DASD_IOCTL_LETTER, 4, struct dasd_copypair_swap_data_t)
+
+/* Get Sense Path Group ID (SNID) data */
+#define BIODASDSNID    _IOWR(DASD_IOCTL_LETTER, 1, struct dasd_snid_ioctl_data)
+/* Check device format according to format_check_t */
+#define BIODASDCHECKFMT _IOWR(DASD_IOCTL_LETTER, 2, format_check_t)
+
+#define BIODASDSYMMIO  _IOWR(DASD_IOCTL_LETTER, 240, dasd_symmio_parms_t)
+
+#endif				/* DASD_H */
+
diff --git a/arch/s390/include/uapi/asm/fs3270.h b/arch/s390/include/uapi/asm/fs3270.h
new file mode 100644
index 0000000000..c4bc1108af
--- /dev/null
+++ b/arch/s390/include/uapi/asm/fs3270.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier:  GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ASM_S390_UAPI_FS3270_H
+#define __ASM_S390_UAPI_FS3270_H
+
+#include <linux/types.h>
+#include <asm/ioctl.h>
+
+/* ioctls for fullscreen 3270 */
+#define TUBICMD		_IO('3', 3)	/* set ccw command for fs reads. */
+#define TUBOCMD		_IO('3', 4)	/* set ccw command for fs writes. */
+#define TUBGETI		_IO('3', 7)	/* get ccw command for fs reads. */
+#define TUBGETO		_IO('3', 8)	/* get ccw command for fs writes. */
+#define TUBGETMOD	_IO('3', 13)	/* get characteristics like model, cols, rows */
+
+/* For TUBGETMOD */
+struct raw3270_iocb {
+	__u16 model;
+	__u16 line_cnt;
+	__u16 col_cnt;
+	__u16 pf_cnt;
+	__u16 re_cnt;
+	__u16 map;
+};
+
+#endif /* __ASM_S390_UAPI_FS3270_H */
diff --git a/arch/s390/include/uapi/asm/guarded_storage.h b/arch/s390/include/uapi/asm/guarded_storage.h
new file mode 100644
index 0000000000..666af1c33b
--- /dev/null
+++ b/arch/s390/include/uapi/asm/guarded_storage.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _GUARDED_STORAGE_H
+#define _GUARDED_STORAGE_H
+
+#include <linux/types.h>
+
+struct gs_cb {
+	__u64 reserved;
+	__u64 gsd;
+	__u64 gssm;
+	__u64 gs_epl_a;
+};
+
+struct gs_epl {
+	__u8 pad1;
+	union {
+		__u8 gs_eam;
+		struct {
+			__u8	: 6;
+			__u8 e	: 1;
+			__u8 b	: 1;
+		};
+	};
+	union {
+		__u8 gs_eci;
+		struct {
+			__u8 tx	: 1;
+			__u8 cx	: 1;
+			__u8	: 5;
+			__u8 in	: 1;
+		};
+	};
+	union {
+		__u8 gs_eai;
+		struct {
+			__u8	: 1;
+			__u8 t	: 1;
+			__u8 as	: 2;
+			__u8 ar	: 4;
+		};
+	};
+	__u32 pad2;
+	__u64 gs_eha;
+	__u64 gs_eia;
+	__u64 gs_eoa;
+	__u64 gs_eir;
+	__u64 gs_era;
+};
+
+#define GS_ENABLE	0
+#define	GS_DISABLE	1
+#define GS_SET_BC_CB	2
+#define GS_CLEAR_BC_CB	3
+#define GS_BROADCAST	4
+
+static inline void load_gs_cb(struct gs_cb *gs_cb)
+{
+	asm volatile(".insn rxy,0xe3000000004d,0,%0" : : "Q" (*gs_cb));
+}
+
+static inline void store_gs_cb(struct gs_cb *gs_cb)
+{
+	asm volatile(".insn rxy,0xe30000000049,0,%0" : : "Q" (*gs_cb));
+}
+
+static inline void save_gs_cb(struct gs_cb *gs_cb)
+{
+	if (gs_cb)
+		store_gs_cb(gs_cb);
+}
+
+static inline void restore_gs_cb(struct gs_cb *gs_cb)
+{
+	if (gs_cb)
+		load_gs_cb(gs_cb);
+}
+
+#endif /* _GUARDED_STORAGE_H */
diff --git a/arch/s390/include/uapi/asm/hwctrset.h b/arch/s390/include/uapi/asm/hwctrset.h
new file mode 100644
index 0000000000..e56b9dd23a
--- /dev/null
+++ b/arch/s390/include/uapi/asm/hwctrset.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright IBM Corp. 2021
+ * Interface implementation for communication with the CPU Measurement
+ * counter facility device driver.
+ *
+ * Author(s): Thomas Richter <tmricht@linux.ibm.com>
+ *
+ * Define for ioctl() commands to communicate with the CPU Measurement
+ * counter facility device driver.
+ */
+
+#ifndef _PERF_CPUM_CF_DIAG_H
+#define _PERF_CPUM_CF_DIAG_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+#define S390_HWCTR_DEVICE		"hwctr"
+#define S390_HWCTR_START_VERSION	1
+
+struct s390_ctrset_start {		/* Set CPUs to operate on */
+	__u64 version;			/* Version of interface */
+	__u64 data_bytes;		/* # of bytes required */
+	__u64 cpumask_len;		/* Length of CPU mask in bytes */
+	__u64 *cpumask;			/* Pointer to CPU mask */
+	__u64 counter_sets;		/* Bit mask of counter sets to get */
+};
+
+struct s390_ctrset_setdata {		/* Counter set data */
+	__u32 set;			/* Counter set number */
+	__u32 no_cnts;			/* # of counters stored in cv[] */
+	__u64 cv[];			/* Counter values (variable length) */
+};
+
+struct s390_ctrset_cpudata {		/* Counter set data per CPU */
+	__u32 cpu_nr;			/* CPU number */
+	__u32 no_sets;			/* # of counters sets in data[] */
+	struct s390_ctrset_setdata data[];
+};
+
+struct s390_ctrset_read {		/* Structure to get all ctr sets */
+	__u64 no_cpus;			/* Total # of CPUs data taken from */
+	struct s390_ctrset_cpudata data[];
+};
+
+#define S390_HWCTR_MAGIC	'C'	/* Random magic # for ioctls */
+#define	S390_HWCTR_START	_IOWR(S390_HWCTR_MAGIC, 1, struct s390_ctrset_start)
+#define	S390_HWCTR_STOP		_IO(S390_HWCTR_MAGIC, 2)
+#define	S390_HWCTR_READ		_IOWR(S390_HWCTR_MAGIC, 3, struct s390_ctrset_read)
+#endif
diff --git a/arch/s390/include/uapi/asm/hypfs.h b/arch/s390/include/uapi/asm/hypfs.h
new file mode 100644
index 0000000000..fe6174e142
--- /dev/null
+++ b/arch/s390/include/uapi/asm/hypfs.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Structures for hypfs interface
+ *
+ * Copyright IBM Corp. 2013
+ *
+ * Author: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef _ASM_HYPFS_H
+#define _ASM_HYPFS_H
+
+#include <linux/types.h>
+
+/*
+ * IOCTL for binary interface /sys/kernel/debug/diag_304
+ */
+struct hypfs_diag304 {
+	__u32	args[2];
+	__u64	data;
+	__u64	rc;
+} __attribute__((packed));
+
+#define HYPFS_IOCTL_MAGIC 0x10
+
+#define HYPFS_DIAG304 \
+	_IOWR(HYPFS_IOCTL_MAGIC, 0x20, struct hypfs_diag304)
+
+/*
+ * Structures for binary interface /sys/kernel/debug/diag_0c
+ */
+struct hypfs_diag0c_hdr {
+	__u64	len;		/* Length of diag0c buffer without header */
+	__u16	version;	/* Version of header */
+	char	reserved1[6];	/* Reserved */
+	char	tod_ext[16];	/* TOD clock for diag0c */
+	__u64	count;		/* Number of entries (CPUs) in diag0c array */
+	char	reserved2[24];	/* Reserved */
+};
+
+struct hypfs_diag0c_entry {
+	char	date[8];	/* MM/DD/YY in EBCDIC */
+	char	time[8];	/* HH:MM:SS in EBCDIC */
+	__u64	virtcpu;	/* Virtual time consumed by the virt CPU (us) */
+	__u64	totalproc;	/* Total of virtual and simulation time (us) */
+	__u32	cpu;		/* Linux logical CPU number */
+	__u32	reserved;	/* Align to 8 byte */
+};
+
+struct hypfs_diag0c_data {
+	struct hypfs_diag0c_hdr		hdr;		/* 64 byte header */
+	struct hypfs_diag0c_entry	entry[];	/* diag0c entry array */
+};
+
+#endif
diff --git a/arch/s390/include/uapi/asm/ioctls.h b/arch/s390/include/uapi/asm/ioctls.h
new file mode 100644
index 0000000000..342a3284e6
--- /dev/null
+++ b/arch/s390/include/uapi/asm/ioctls.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ARCH_S390_IOCTLS_H__
+#define __ARCH_S390_IOCTLS_H__
+
+#define FIOQSIZE	0x545E
+
+#include <asm-generic/ioctls.h>
+
+#endif
diff --git a/arch/s390/include/uapi/asm/ipcbuf.h b/arch/s390/include/uapi/asm/ipcbuf.h
new file mode 100644
index 0000000000..1030cd1868
--- /dev/null
+++ b/arch/s390/include/uapi/asm/ipcbuf.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __S390_IPCBUF_H__
+#define __S390_IPCBUF_H__
+
+#include <linux/posix_types.h>
+
+/*
+ * The user_ipc_perm structure for S/390 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 32-bit mode_t and seq
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct ipc64_perm
+{
+	__kernel_key_t		key;
+	__kernel_uid32_t	uid;
+	__kernel_gid32_t	gid;
+	__kernel_uid32_t	cuid;
+	__kernel_gid32_t	cgid;
+	__kernel_mode_t		mode;
+	unsigned short		__pad1;
+	unsigned short		seq;
+#ifndef __s390x__
+	unsigned short		__pad2;
+#endif /* ! __s390x__ */
+	unsigned long		__unused1;
+	unsigned long		__unused2;
+};
+
+#endif /* __S390_IPCBUF_H__ */
diff --git a/arch/s390/include/uapi/asm/ipl.h b/arch/s390/include/uapi/asm/ipl.h
new file mode 100644
index 0000000000..2cd28af50d
--- /dev/null
+++ b/arch/s390/include/uapi/asm/ipl.h
@@ -0,0 +1,208 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_S390_UAPI_IPL_H
+#define _ASM_S390_UAPI_IPL_H
+
+#include <linux/types.h>
+
+/* IPL Parameter List header */
+struct ipl_pl_hdr {
+	__u32 len;
+	__u8  flags;
+	__u8  reserved1[2];
+	__u8  version;
+} __packed;
+
+#define IPL_PL_FLAG_IPLPS	0x80
+#define IPL_PL_FLAG_SIPL	0x40
+#define IPL_PL_FLAG_IPLSR	0x20
+
+/* IPL Parameter Block header */
+struct ipl_pb_hdr {
+	__u32 len;
+	__u8  pbt;
+} __packed;
+
+/* IPL Parameter Block types */
+enum ipl_pbt {
+	IPL_PBT_FCP = 0,
+	IPL_PBT_SCP_DATA = 1,
+	IPL_PBT_CCW = 2,
+	IPL_PBT_ECKD = 3,
+	IPL_PBT_NVME = 4,
+};
+
+/* IPL Parameter Block 0 with common fields */
+struct ipl_pb0_common {
+	__u32 len;
+	__u8  pbt;
+	__u8  flags;
+	__u8  reserved1[2];
+	__u8  loadparm[8];
+	__u8  reserved2[84];
+} __packed;
+
+#define IPL_PB0_FLAG_LOADPARM	0x80
+
+/* IPL Parameter Block 0 for FCP */
+struct ipl_pb0_fcp {
+	__u32 len;
+	__u8  pbt;
+	__u8  reserved1[3];
+	__u8  loadparm[8];
+	__u8  reserved2[304];
+	__u8  opt;
+	__u8  reserved3[3];
+	__u8  cssid;
+	__u8  reserved4[1];
+	__u16 devno;
+	__u8  reserved5[4];
+	__u64 wwpn;
+	__u64 lun;
+	__u32 bootprog;
+	__u8  reserved6[12];
+	__u64 br_lba;
+	__u32 scp_data_len;
+	__u8  reserved7[260];
+	__u8  scp_data[];
+} __packed;
+
+#define IPL_PB0_FCP_OPT_IPL	0x10
+#define IPL_PB0_FCP_OPT_DUMP	0x20
+
+/* IPL Parameter Block 0 for NVMe */
+struct ipl_pb0_nvme {
+	__u32 len;
+	__u8  pbt;
+	__u8  reserved1[3];
+	__u8  loadparm[8];
+	__u8  reserved2[304];
+	__u8  opt;
+	__u8  reserved3[3];
+	__u32 fid;
+	__u8 reserved4[12];
+	__u32 nsid;
+	__u8 reserved5[4];
+	__u32 bootprog;
+	__u8 reserved6[12];
+	__u64 br_lba;
+	__u32 scp_data_len;
+	__u8  reserved7[260];
+	__u8  scp_data[];
+} __packed;
+
+#define IPL_PB0_NVME_OPT_IPL	0x10
+#define IPL_PB0_NVME_OPT_DUMP	0x20
+
+/* IPL Parameter Block 0 for CCW */
+struct ipl_pb0_ccw {
+	__u32 len;
+	__u8  pbt;
+	__u8  flags;
+	__u8  reserved1[2];
+	__u8  loadparm[8];
+	__u8  reserved2[84];
+	__u16 reserved3 : 13;
+	__u8  ssid : 3;
+	__u16 devno;
+	__u8  vm_flags;
+	__u8  reserved4[3];
+	__u32 vm_parm_len;
+	__u8  nss_name[8];
+	__u8  vm_parm[64];
+	__u8  reserved5[8];
+} __packed;
+
+/* IPL Parameter Block 0 for ECKD */
+struct ipl_pb0_eckd {
+	__u32 len;
+	__u8  pbt;
+	__u8  reserved1[3];
+	__u32 reserved2[78];
+	__u8  opt;
+	__u8  reserved4[4];
+	__u8  reserved5:5;
+	__u8  ssid:3;
+	__u16 devno;
+	__u32 reserved6[5];
+	__u32 bootprog;
+	__u8  reserved7[12];
+	struct {
+		__u16 cyl;
+		__u8 head;
+		__u8 record;
+		__u32 reserved;
+	} br_chr __packed;
+	__u32 scp_data_len;
+	__u8  reserved8[260];
+	__u8  scp_data[];
+} __packed;
+
+#define IPL_PB0_ECKD_OPT_IPL	0x10
+#define IPL_PB0_ECKD_OPT_DUMP	0x20
+
+#define IPL_PB0_CCW_VM_FLAG_NSS		0x80
+#define IPL_PB0_CCW_VM_FLAG_VP		0x40
+
+/* IPL Parameter Block 1 for additional SCP data */
+struct ipl_pb1_scp_data {
+	__u32 len;
+	__u8  pbt;
+	__u8  scp_data[];
+} __packed;
+
+/* IPL Report List header */
+struct ipl_rl_hdr {
+	__u32 len;
+	__u8  flags;
+	__u8  reserved1[2];
+	__u8  version;
+	__u8  reserved2[8];
+} __packed;
+
+/* IPL Report Block header */
+struct ipl_rb_hdr {
+	__u32 len;
+	__u8  rbt;
+	__u8  reserved1[11];
+} __packed;
+
+/* IPL Report Block types */
+enum ipl_rbt {
+	IPL_RBT_CERTIFICATES = 1,
+	IPL_RBT_COMPONENTS = 2,
+};
+
+/* IPL Report Block for the certificate list */
+struct ipl_rb_certificate_entry {
+	__u64 addr;
+	__u64 len;
+} __packed;
+
+struct ipl_rb_certificates {
+	__u32 len;
+	__u8  rbt;
+	__u8  reserved1[11];
+	struct ipl_rb_certificate_entry entries[];
+} __packed;
+
+/* IPL Report Block for the component list */
+struct ipl_rb_component_entry {
+	__u64 addr;
+	__u64 len;
+	__u8  flags;
+	__u8  reserved1[5];
+	__u16 certificate_index;
+	__u8  reserved2[8];
+};
+
+#define IPL_RB_COMPONENT_FLAG_SIGNED	0x80
+#define IPL_RB_COMPONENT_FLAG_VERIFIED	0x40
+
+struct ipl_rb_components {
+	__u32 len;
+	__u8  rbt;
+	__u8  reserved1[11];
+	struct ipl_rb_component_entry entries[];
+} __packed;
+
+#endif
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
new file mode 100644
index 0000000000..abe926d43c
--- /dev/null
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -0,0 +1,308 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __LINUX_KVM_S390_H
+#define __LINUX_KVM_S390_H
+/*
+ * KVM s390 specific structures and definitions
+ *
+ * Copyright IBM Corp. 2008, 2018
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ *               Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+#include <linux/types.h>
+
+#define __KVM_S390
+#define __KVM_HAVE_GUEST_DEBUG
+
+/* Device control API: s390-specific devices */
+#define KVM_DEV_FLIC_GET_ALL_IRQS	1
+#define KVM_DEV_FLIC_ENQUEUE		2
+#define KVM_DEV_FLIC_CLEAR_IRQS		3
+#define KVM_DEV_FLIC_APF_ENABLE		4
+#define KVM_DEV_FLIC_APF_DISABLE_WAIT	5
+#define KVM_DEV_FLIC_ADAPTER_REGISTER	6
+#define KVM_DEV_FLIC_ADAPTER_MODIFY	7
+#define KVM_DEV_FLIC_CLEAR_IO_IRQ	8
+#define KVM_DEV_FLIC_AISM		9
+#define KVM_DEV_FLIC_AIRQ_INJECT	10
+#define KVM_DEV_FLIC_AISM_ALL		11
+/*
+ * We can have up to 4*64k pending subchannels + 8 adapter interrupts,
+ * as well as up  to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts.
+ * There are also sclp and machine checks. This gives us
+ * sizeof(kvm_s390_irq)*(4*65536+8+64*64+1+1) = 72 * 266250 = 19170000
+ * Lets round up to 8192 pages.
+ */
+#define KVM_S390_MAX_FLOAT_IRQS	266250
+#define KVM_S390_FLIC_MAX_BUFFER	0x2000000
+
+struct kvm_s390_io_adapter {
+	__u32 id;
+	__u8 isc;
+	__u8 maskable;
+	__u8 swap;
+	__u8 flags;
+};
+
+#define KVM_S390_ADAPTER_SUPPRESSIBLE 0x01
+
+struct kvm_s390_ais_req {
+	__u8 isc;
+	__u16 mode;
+};
+
+struct kvm_s390_ais_all {
+	__u8 simm;
+	__u8 nimm;
+};
+
+#define KVM_S390_IO_ADAPTER_MASK 1
+#define KVM_S390_IO_ADAPTER_MAP 2
+#define KVM_S390_IO_ADAPTER_UNMAP 3
+
+struct kvm_s390_io_adapter_req {
+	__u32 id;
+	__u8 type;
+	__u8 mask;
+	__u16 pad0;
+	__u64 addr;
+};
+
+/* kvm attr_group  on vm fd */
+#define KVM_S390_VM_MEM_CTRL		0
+#define KVM_S390_VM_TOD			1
+#define KVM_S390_VM_CRYPTO		2
+#define KVM_S390_VM_CPU_MODEL		3
+#define KVM_S390_VM_MIGRATION		4
+#define KVM_S390_VM_CPU_TOPOLOGY	5
+
+/* kvm attributes for mem_ctrl */
+#define KVM_S390_VM_MEM_ENABLE_CMMA	0
+#define KVM_S390_VM_MEM_CLR_CMMA	1
+#define KVM_S390_VM_MEM_LIMIT_SIZE	2
+
+#define KVM_S390_NO_MEM_LIMIT		U64_MAX
+
+/* kvm attributes for KVM_S390_VM_TOD */
+#define KVM_S390_VM_TOD_LOW		0
+#define KVM_S390_VM_TOD_HIGH		1
+#define KVM_S390_VM_TOD_EXT		2
+
+struct kvm_s390_vm_tod_clock {
+	__u8  epoch_idx;
+	__u64 tod;
+};
+
+/* kvm attributes for KVM_S390_VM_CPU_MODEL */
+/* processor related attributes are r/w */
+#define KVM_S390_VM_CPU_PROCESSOR	0
+struct kvm_s390_vm_cpu_processor {
+	__u64 cpuid;
+	__u16 ibc;
+	__u8  pad[6];
+	__u64 fac_list[256];
+};
+
+/* machine related attributes are r/o */
+#define KVM_S390_VM_CPU_MACHINE		1
+struct kvm_s390_vm_cpu_machine {
+	__u64 cpuid;
+	__u32 ibc;
+	__u8  pad[4];
+	__u64 fac_mask[256];
+	__u64 fac_list[256];
+};
+
+#define KVM_S390_VM_CPU_PROCESSOR_FEAT	2
+#define KVM_S390_VM_CPU_MACHINE_FEAT	3
+
+#define KVM_S390_VM_CPU_FEAT_NR_BITS	1024
+#define KVM_S390_VM_CPU_FEAT_ESOP	0
+#define KVM_S390_VM_CPU_FEAT_SIEF2	1
+#define KVM_S390_VM_CPU_FEAT_64BSCAO	2
+#define KVM_S390_VM_CPU_FEAT_SIIF	3
+#define KVM_S390_VM_CPU_FEAT_GPERE	4
+#define KVM_S390_VM_CPU_FEAT_GSLS	5
+#define KVM_S390_VM_CPU_FEAT_IB		6
+#define KVM_S390_VM_CPU_FEAT_CEI	7
+#define KVM_S390_VM_CPU_FEAT_IBS	8
+#define KVM_S390_VM_CPU_FEAT_SKEY	9
+#define KVM_S390_VM_CPU_FEAT_CMMA	10
+#define KVM_S390_VM_CPU_FEAT_PFMFI	11
+#define KVM_S390_VM_CPU_FEAT_SIGPIF	12
+#define KVM_S390_VM_CPU_FEAT_KSS	13
+struct kvm_s390_vm_cpu_feat {
+	__u64 feat[16];
+};
+
+#define KVM_S390_VM_CPU_PROCESSOR_SUBFUNC	4
+#define KVM_S390_VM_CPU_MACHINE_SUBFUNC		5
+/* for "test bit" instructions MSB 0 bit ordering, for "query" raw blocks */
+struct kvm_s390_vm_cpu_subfunc {
+	__u8 plo[32];		/* always */
+	__u8 ptff[16];		/* with TOD-clock steering */
+	__u8 kmac[16];		/* with MSA */
+	__u8 kmc[16];		/* with MSA */
+	__u8 km[16];		/* with MSA */
+	__u8 kimd[16];		/* with MSA */
+	__u8 klmd[16];		/* with MSA */
+	__u8 pckmo[16];		/* with MSA3 */
+	__u8 kmctr[16];		/* with MSA4 */
+	__u8 kmf[16];		/* with MSA4 */
+	__u8 kmo[16];		/* with MSA4 */
+	__u8 pcc[16];		/* with MSA4 */
+	__u8 ppno[16];		/* with MSA5 */
+	__u8 kma[16];		/* with MSA8 */
+	__u8 kdsa[16];		/* with MSA9 */
+	__u8 sortl[32];		/* with STFLE.150 */
+	__u8 dfltcc[32];	/* with STFLE.151 */
+	__u8 reserved[1728];
+};
+
+#define KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST	6
+#define KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST	7
+
+#define KVM_S390_VM_CPU_UV_FEAT_NR_BITS	64
+struct kvm_s390_vm_cpu_uv_feat {
+	union {
+		struct {
+			__u64 : 4;
+			__u64 ap : 1;		/* bit 4 */
+			__u64 ap_intr : 1;	/* bit 5 */
+			__u64 : 58;
+		};
+		__u64 feat;
+	};
+};
+
+/* kvm attributes for crypto */
+#define KVM_S390_VM_CRYPTO_ENABLE_AES_KW	0
+#define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW	1
+#define KVM_S390_VM_CRYPTO_DISABLE_AES_KW	2
+#define KVM_S390_VM_CRYPTO_DISABLE_DEA_KW	3
+#define KVM_S390_VM_CRYPTO_ENABLE_APIE		4
+#define KVM_S390_VM_CRYPTO_DISABLE_APIE		5
+
+/* kvm attributes for migration mode */
+#define KVM_S390_VM_MIGRATION_STOP	0
+#define KVM_S390_VM_MIGRATION_START	1
+#define KVM_S390_VM_MIGRATION_STATUS	2
+
+/* for KVM_GET_REGS and KVM_SET_REGS */
+struct kvm_regs {
+	/* general purpose regs for s390 */
+	__u64 gprs[16];
+};
+
+/* for KVM_GET_SREGS and KVM_SET_SREGS */
+struct kvm_sregs {
+	__u32 acrs[16];
+	__u64 crs[16];
+};
+
+/* for KVM_GET_FPU and KVM_SET_FPU */
+struct kvm_fpu {
+	__u32 fpc;
+	__u64 fprs[16];
+};
+
+#define KVM_GUESTDBG_USE_HW_BP		0x00010000
+
+#define KVM_HW_BP			1
+#define KVM_HW_WP_WRITE			2
+#define KVM_SINGLESTEP			4
+
+struct kvm_debug_exit_arch {
+	__u64 addr;
+	__u8 type;
+	__u8 pad[7]; /* Should be set to 0 */
+};
+
+struct kvm_hw_breakpoint {
+	__u64 addr;
+	__u64 phys_addr;
+	__u64 len;
+	__u8 type;
+	__u8 pad[7]; /* Should be set to 0 */
+};
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+	__u32 nr_hw_bp;
+	__u32 pad; /* Should be set to 0 */
+	struct kvm_hw_breakpoint __user *hw_bp;
+};
+
+/* for KVM_SYNC_PFAULT and KVM_REG_S390_PFTOKEN */
+#define KVM_S390_PFAULT_TOKEN_INVALID	0xffffffffffffffffULL
+
+#define KVM_SYNC_PREFIX (1UL << 0)
+#define KVM_SYNC_GPRS   (1UL << 1)
+#define KVM_SYNC_ACRS   (1UL << 2)
+#define KVM_SYNC_CRS    (1UL << 3)
+#define KVM_SYNC_ARCH0  (1UL << 4)
+#define KVM_SYNC_PFAULT (1UL << 5)
+#define KVM_SYNC_VRS    (1UL << 6)
+#define KVM_SYNC_RICCB  (1UL << 7)
+#define KVM_SYNC_FPRS   (1UL << 8)
+#define KVM_SYNC_GSCB   (1UL << 9)
+#define KVM_SYNC_BPBC   (1UL << 10)
+#define KVM_SYNC_ETOKEN (1UL << 11)
+#define KVM_SYNC_DIAG318 (1UL << 12)
+
+#define KVM_SYNC_S390_VALID_FIELDS \
+	(KVM_SYNC_PREFIX | KVM_SYNC_GPRS | KVM_SYNC_ACRS | KVM_SYNC_CRS | \
+	 KVM_SYNC_ARCH0 | KVM_SYNC_PFAULT | KVM_SYNC_VRS | KVM_SYNC_RICCB | \
+	 KVM_SYNC_FPRS | KVM_SYNC_GSCB | KVM_SYNC_BPBC | KVM_SYNC_ETOKEN | \
+	 KVM_SYNC_DIAG318)
+
+/* length and alignment of the sdnx as a power of two */
+#define SDNXC 8
+#define SDNXL (1UL << SDNXC)
+/* definition of registers in kvm_run */
+struct kvm_sync_regs {
+	__u64 prefix;	/* prefix register */
+	__u64 gprs[16];	/* general purpose registers */
+	__u32 acrs[16];	/* access registers */
+	__u64 crs[16];	/* control registers */
+	__u64 todpr;	/* tod programmable register [ARCH0] */
+	__u64 cputm;	/* cpu timer [ARCH0] */
+	__u64 ckc;	/* clock comparator [ARCH0] */
+	__u64 pp;	/* program parameter [ARCH0] */
+	__u64 gbea;	/* guest breaking-event address [ARCH0] */
+	__u64 pft;	/* pfault token [PFAULT] */
+	__u64 pfs;	/* pfault select [PFAULT] */
+	__u64 pfc;	/* pfault compare [PFAULT] */
+	union {
+		__u64 vrs[32][2];	/* vector registers (KVM_SYNC_VRS) */
+		__u64 fprs[16];		/* fp registers (KVM_SYNC_FPRS) */
+	};
+	__u8  reserved[512];	/* for future vector expansion */
+	__u32 fpc;		/* valid on KVM_SYNC_VRS or KVM_SYNC_FPRS */
+	__u8 bpbc : 1;		/* bp mode */
+	__u8 reserved2 : 7;
+	__u8 padding1[51];	/* riccb needs to be 64byte aligned */
+	__u8 riccb[64];		/* runtime instrumentation controls block */
+	__u64 diag318;		/* diagnose 0x318 info */
+	__u8 padding2[184];	/* sdnx needs to be 256byte aligned */
+	union {
+		__u8 sdnx[SDNXL];  /* state description annex */
+		struct {
+			__u64 reserved1[2];
+			__u64 gscb[4];
+			__u64 etoken;
+			__u64 etoken_extension;
+		};
+	};
+};
+
+#define KVM_REG_S390_TODPR	(KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1)
+#define KVM_REG_S390_EPOCHDIFF	(KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x2)
+#define KVM_REG_S390_CPU_TIMER  (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x3)
+#define KVM_REG_S390_CLOCK_COMP (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x4)
+#define KVM_REG_S390_PFTOKEN	(KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x5)
+#define KVM_REG_S390_PFCOMPARE	(KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x6)
+#define KVM_REG_S390_PFSELECT	(KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x7)
+#define KVM_REG_S390_PP		(KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x8)
+#define KVM_REG_S390_GBEA	(KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x9)
+#endif
diff --git a/arch/s390/include/uapi/asm/kvm_para.h b/arch/s390/include/uapi/asm/kvm_para.h
new file mode 100644
index 0000000000..b9ab584adf
--- /dev/null
+++ b/arch/s390/include/uapi/asm/kvm_para.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * User API definitions for paravirtual devices on s390
+ *
+ * Copyright IBM Corp. 2008
+ *
+ *    Author(s): Christian Borntraeger <borntraeger@de.ibm.com>
+ */
diff --git a/arch/s390/include/uapi/asm/kvm_perf.h b/arch/s390/include/uapi/asm/kvm_perf.h
new file mode 100644
index 0000000000..84606b8cc4
--- /dev/null
+++ b/arch/s390/include/uapi/asm/kvm_perf.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Definitions for perf-kvm on s390
+ *
+ * Copyright 2014 IBM Corp.
+ * Author(s): Alexander Yarygin <yarygin@linux.vnet.ibm.com>
+ */
+
+#ifndef __LINUX_KVM_PERF_S390_H
+#define __LINUX_KVM_PERF_S390_H
+
+#include <asm/sie.h>
+
+#define DECODE_STR_LEN 40
+
+#define VCPU_ID "id"
+
+#define KVM_ENTRY_TRACE "kvm:kvm_s390_sie_enter"
+#define KVM_EXIT_TRACE "kvm:kvm_s390_sie_exit"
+#define KVM_EXIT_REASON "icptcode"
+
+#endif
diff --git a/arch/s390/include/uapi/asm/monwriter.h b/arch/s390/include/uapi/asm/monwriter.h
new file mode 100644
index 0000000000..03d172f314
--- /dev/null
+++ b/arch/s390/include/uapi/asm/monwriter.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright IBM Corp. 2006
+ * Character device driver for writing z/VM APPLDATA monitor records
+ * Version 1.0
+ * Author(s): Melissa Howland <melissah@us.ibm.com>
+ *
+ */
+
+#ifndef _ASM_390_MONWRITER_H
+#define _ASM_390_MONWRITER_H
+
+/* mon_function values */
+#define MONWRITE_START_INTERVAL	0x00 /* start interval recording */
+#define MONWRITE_STOP_INTERVAL	0x01 /* stop interval or config recording */
+#define MONWRITE_GEN_EVENT	0x02 /* generate event record */
+#define MONWRITE_START_CONFIG	0x03 /* start configuration recording */
+
+/* the header the app uses in its write() data */
+struct monwrite_hdr {
+	unsigned char mon_function;
+	unsigned short applid;
+	unsigned char record_num;
+	unsigned short version;
+	unsigned short release;
+	unsigned short mod_level;
+	unsigned short datalen;
+	unsigned char hdrlen;
+
+} __attribute__((packed));
+
+#endif /* _ASM_390_MONWRITER_H */
diff --git a/arch/s390/include/uapi/asm/perf_regs.h b/arch/s390/include/uapi/asm/perf_regs.h
new file mode 100644
index 0000000000..d17dd9e5d5
--- /dev/null
+++ b/arch/s390/include/uapi/asm/perf_regs.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_S390_PERF_REGS_H
+#define _ASM_S390_PERF_REGS_H
+
+enum perf_event_s390_regs {
+	PERF_REG_S390_R0,
+	PERF_REG_S390_R1,
+	PERF_REG_S390_R2,
+	PERF_REG_S390_R3,
+	PERF_REG_S390_R4,
+	PERF_REG_S390_R5,
+	PERF_REG_S390_R6,
+	PERF_REG_S390_R7,
+	PERF_REG_S390_R8,
+	PERF_REG_S390_R9,
+	PERF_REG_S390_R10,
+	PERF_REG_S390_R11,
+	PERF_REG_S390_R12,
+	PERF_REG_S390_R13,
+	PERF_REG_S390_R14,
+	PERF_REG_S390_R15,
+	PERF_REG_S390_FP0,
+	PERF_REG_S390_FP1,
+	PERF_REG_S390_FP2,
+	PERF_REG_S390_FP3,
+	PERF_REG_S390_FP4,
+	PERF_REG_S390_FP5,
+	PERF_REG_S390_FP6,
+	PERF_REG_S390_FP7,
+	PERF_REG_S390_FP8,
+	PERF_REG_S390_FP9,
+	PERF_REG_S390_FP10,
+	PERF_REG_S390_FP11,
+	PERF_REG_S390_FP12,
+	PERF_REG_S390_FP13,
+	PERF_REG_S390_FP14,
+	PERF_REG_S390_FP15,
+	PERF_REG_S390_MASK,
+	PERF_REG_S390_PC,
+
+	PERF_REG_S390_MAX
+};
+
+#endif /* _ASM_S390_PERF_REGS_H */
diff --git a/arch/s390/include/uapi/asm/pkey.h b/arch/s390/include/uapi/asm/pkey.h
new file mode 100644
index 0000000000..5ad76471e7
--- /dev/null
+++ b/arch/s390/include/uapi/asm/pkey.h
@@ -0,0 +1,452 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Userspace interface to the pkey device driver
+ *
+ * Copyright IBM Corp. 2017, 2023
+ *
+ * Author: Harald Freudenberger <freude@de.ibm.com>
+ *
+ */
+
+#ifndef _UAPI_PKEY_H
+#define _UAPI_PKEY_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+/*
+ * Ioctl calls supported by the pkey device driver
+ */
+
+#define PKEY_IOCTL_MAGIC 'p'
+
+#define SECKEYBLOBSIZE	64	   /* secure key blob size is always 64 bytes */
+#define PROTKEYBLOBSIZE 80	/* protected key blob size is always 80 bytes */
+#define MAXPROTKEYSIZE	64	/* a protected key blob may be up to 64 bytes */
+#define MAXCLRKEYSIZE	32	   /* a clear key value may be up to 32 bytes */
+#define MAXAESCIPHERKEYSIZE 136  /* our aes cipher keys have always 136 bytes */
+#define MINEP11AESKEYBLOBSIZE 256  /* min EP11 AES key blob size  */
+#define MAXEP11AESKEYBLOBSIZE 336  /* max EP11 AES key blob size */
+
+/* Minimum size of a key blob */
+#define MINKEYBLOBSIZE	SECKEYBLOBSIZE
+
+/* defines for the type field within the pkey_protkey struct */
+#define PKEY_KEYTYPE_AES_128		1
+#define PKEY_KEYTYPE_AES_192		2
+#define PKEY_KEYTYPE_AES_256		3
+#define PKEY_KEYTYPE_ECC		4
+#define PKEY_KEYTYPE_ECC_P256		5
+#define PKEY_KEYTYPE_ECC_P384		6
+#define PKEY_KEYTYPE_ECC_P521		7
+#define PKEY_KEYTYPE_ECC_ED25519	8
+#define PKEY_KEYTYPE_ECC_ED448		9
+
+/* the newer ioctls use a pkey_key_type enum for type information */
+enum pkey_key_type {
+	PKEY_TYPE_CCA_DATA   = (__u32) 1,
+	PKEY_TYPE_CCA_CIPHER = (__u32) 2,
+	PKEY_TYPE_EP11	     = (__u32) 3,
+	PKEY_TYPE_CCA_ECC    = (__u32) 0x1f,
+	PKEY_TYPE_EP11_AES   = (__u32) 6,
+	PKEY_TYPE_EP11_ECC   = (__u32) 7,
+};
+
+/* the newer ioctls use a pkey_key_size enum for key size information */
+enum pkey_key_size {
+	PKEY_SIZE_AES_128 = (__u32) 128,
+	PKEY_SIZE_AES_192 = (__u32) 192,
+	PKEY_SIZE_AES_256 = (__u32) 256,
+	PKEY_SIZE_UNKNOWN = (__u32) 0xFFFFFFFF,
+};
+
+/* some of the newer ioctls use these flags */
+#define PKEY_FLAGS_MATCH_CUR_MKVP  0x00000002
+#define PKEY_FLAGS_MATCH_ALT_MKVP  0x00000004
+
+/* keygenflags defines for CCA AES cipher keys */
+#define PKEY_KEYGEN_XPRT_SYM  0x00008000
+#define PKEY_KEYGEN_XPRT_UASY 0x00004000
+#define PKEY_KEYGEN_XPRT_AASY 0x00002000
+#define PKEY_KEYGEN_XPRT_RAW  0x00001000
+#define PKEY_KEYGEN_XPRT_CPAC 0x00000800
+#define PKEY_KEYGEN_XPRT_DES  0x00000080
+#define PKEY_KEYGEN_XPRT_AES  0x00000040
+#define PKEY_KEYGEN_XPRT_RSA  0x00000008
+
+/* Struct to hold apqn target info (card/domain pair) */
+struct pkey_apqn {
+	__u16 card;
+	__u16 domain;
+};
+
+/* Struct to hold a CCA AES secure key blob */
+struct pkey_seckey {
+	__u8  seckey[SECKEYBLOBSIZE];		  /* the secure key blob */
+};
+
+/* Struct to hold protected key and length info */
+struct pkey_protkey {
+	__u32 type;	 /* key type, one of the PKEY_KEYTYPE_AES values */
+	__u32 len;		/* bytes actually stored in protkey[]	 */
+	__u8  protkey[MAXPROTKEYSIZE];	       /* the protected key blob */
+};
+
+/* Struct to hold an AES clear key value */
+struct pkey_clrkey {
+	__u8  clrkey[MAXCLRKEYSIZE]; /* 16, 24, or 32 byte clear key value */
+};
+
+/*
+ * EP11 key blobs of type PKEY_TYPE_EP11_AES and PKEY_TYPE_EP11_ECC
+ * are ep11 blobs prepended by this header:
+ */
+struct ep11kblob_header {
+	__u8  type;	/* always 0x00 */
+	__u8  hver;	/* header version,  currently needs to be 0x00 */
+	__u16 len;	/* total length in bytes (including this header) */
+	__u8  version;	/* PKEY_TYPE_EP11_AES or PKEY_TYPE_EP11_ECC */
+	__u8  res0;	/* unused */
+	__u16 bitlen;	/* clear key bit len, 0 for unknown */
+	__u8  res1[8];	/* unused */
+} __packed;
+
+/*
+ * Generate CCA AES secure key.
+ */
+struct pkey_genseck {
+	__u16 cardnr;		    /* in: card to use or FFFF for any	 */
+	__u16 domain;		    /* in: domain or FFFF for any	 */
+	__u32 keytype;		    /* in: key type to generate		 */
+	struct pkey_seckey seckey;  /* out: the secure key blob		 */
+};
+#define PKEY_GENSECK _IOWR(PKEY_IOCTL_MAGIC, 0x01, struct pkey_genseck)
+
+/*
+ * Construct CCA AES secure key from clear key value
+ */
+struct pkey_clr2seck {
+	__u16 cardnr;		    /* in: card to use or FFFF for any	 */
+	__u16 domain;		    /* in: domain or FFFF for any	 */
+	__u32 keytype;		    /* in: key type to generate		 */
+	struct pkey_clrkey clrkey;  /* in: the clear key value		 */
+	struct pkey_seckey seckey;  /* out: the secure key blob		 */
+};
+#define PKEY_CLR2SECK _IOWR(PKEY_IOCTL_MAGIC, 0x02, struct pkey_clr2seck)
+
+/*
+ * Fabricate AES protected key from a CCA AES secure key
+ */
+struct pkey_sec2protk {
+	__u16 cardnr;		     /* in: card to use or FFFF for any   */
+	__u16 domain;		     /* in: domain or FFFF for any	  */
+	struct pkey_seckey seckey;   /* in: the secure key blob		  */
+	struct pkey_protkey protkey; /* out: the protected key		  */
+};
+#define PKEY_SEC2PROTK _IOWR(PKEY_IOCTL_MAGIC, 0x03, struct pkey_sec2protk)
+
+/*
+ * Fabricate AES protected key from clear key value
+ */
+struct pkey_clr2protk {
+	__u32 keytype;		     /* in: key type to generate	  */
+	struct pkey_clrkey clrkey;   /* in: the clear key value		  */
+	struct pkey_protkey protkey; /* out: the protected key		  */
+};
+#define PKEY_CLR2PROTK _IOWR(PKEY_IOCTL_MAGIC, 0x04, struct pkey_clr2protk)
+
+/*
+ * Search for matching crypto card based on the Master Key
+ * Verification Pattern provided inside a CCA AES secure key.
+ */
+struct pkey_findcard {
+	struct pkey_seckey seckey;	       /* in: the secure key blob */
+	__u16  cardnr;			       /* out: card number	  */
+	__u16  domain;			       /* out: domain number	  */
+};
+#define PKEY_FINDCARD _IOWR(PKEY_IOCTL_MAGIC, 0x05, struct pkey_findcard)
+
+/*
+ * Combined together: findcard + sec2prot
+ */
+struct pkey_skey2pkey {
+	struct pkey_seckey seckey;   /* in: the secure key blob		  */
+	struct pkey_protkey protkey; /* out: the protected key		  */
+};
+#define PKEY_SKEY2PKEY _IOWR(PKEY_IOCTL_MAGIC, 0x06, struct pkey_skey2pkey)
+
+/*
+ * Verify the given CCA AES secure key for being able to be usable with
+ * the pkey module. Check for correct key type and check for having at
+ * least one crypto card being able to handle this key (master key
+ * or old master key verification pattern matches).
+ * Return some info about the key: keysize in bits, keytype (currently
+ * only AES), flag if key is wrapped with an old MKVP.
+ */
+struct pkey_verifykey {
+	struct pkey_seckey seckey;	       /* in: the secure key blob */
+	__u16  cardnr;			       /* out: card number	  */
+	__u16  domain;			       /* out: domain number	  */
+	__u16  keysize;			       /* out: key size in bits   */
+	__u32  attributes;		       /* out: attribute bits	  */
+};
+#define PKEY_VERIFYKEY _IOWR(PKEY_IOCTL_MAGIC, 0x07, struct pkey_verifykey)
+#define PKEY_VERIFY_ATTR_AES	   0x00000001  /* key is an AES key */
+#define PKEY_VERIFY_ATTR_OLD_MKVP  0x00000100  /* key has old MKVP value */
+
+/*
+ * Generate AES random protected key.
+ */
+struct pkey_genprotk {
+	__u32 keytype;			       /* in: key type to generate */
+	struct pkey_protkey protkey;	       /* out: the protected key   */
+};
+
+#define PKEY_GENPROTK _IOWR(PKEY_IOCTL_MAGIC, 0x08, struct pkey_genprotk)
+
+/*
+ * Verify an AES protected key.
+ */
+struct pkey_verifyprotk {
+	struct pkey_protkey protkey;	/* in: the protected key to verify */
+};
+
+#define PKEY_VERIFYPROTK _IOW(PKEY_IOCTL_MAGIC, 0x09, struct pkey_verifyprotk)
+
+/*
+ * Transform an key blob (of any type) into a protected key
+ */
+struct pkey_kblob2pkey {
+	__u8 __user *key;		/* in: the key blob	   */
+	__u32 keylen;			/* in: the key blob length */
+	struct pkey_protkey protkey;	/* out: the protected key  */
+};
+#define PKEY_KBLOB2PROTK _IOWR(PKEY_IOCTL_MAGIC, 0x0A, struct pkey_kblob2pkey)
+
+/*
+ * Generate secure key, version 2.
+ * Generate CCA AES secure key, CCA AES cipher key or EP11 AES secure key.
+ * There needs to be a list of apqns given with at least one entry in there.
+ * All apqns in the list need to be exact apqns, 0xFFFF as ANY card or domain
+ * is not supported. The implementation walks through the list of apqns and
+ * tries to send the request to each apqn without any further checking (like
+ * card type or online state). If the apqn fails, simple the next one in the
+ * list is tried until success (return 0) or the end of the list is reached
+ * (return -1 with errno ENODEV). You may use the PKEY_APQNS4KT ioctl to
+ * generate a list of apqns based on the key type to generate.
+ * The keygenflags argument is passed to the low level generation functions
+ * individual for the key type and has a key type specific meaning. When
+ * generating CCA cipher keys you can use one or more of the PKEY_KEYGEN_*
+ * flags to widen the export possibilities. By default a cipher key is
+ * only exportable for CPACF (PKEY_KEYGEN_XPRT_CPAC).
+ * The keygenflag argument for generating an EP11 AES key should either be 0
+ * to use the defaults which are XCP_BLOB_ENCRYPT, XCP_BLOB_DECRYPT and
+ * XCP_BLOB_PROTKEY_EXTRACTABLE or a valid combination of XCP_BLOB_* flags.
+ */
+struct pkey_genseck2 {
+	struct pkey_apqn __user *apqns; /* in: ptr to list of apqn targets*/
+	__u32 apqn_entries;	    /* in: # of apqn target list entries  */
+	enum pkey_key_type type;    /* in: key type to generate		  */
+	enum pkey_key_size size;    /* in: key size to generate		  */
+	__u32 keygenflags;	    /* in: key generation flags		  */
+	__u8 __user *key;	    /* in: pointer to key blob buffer	  */
+	__u32 keylen;		    /* in: available key blob buffer size */
+				    /* out: actual key blob size	  */
+};
+#define PKEY_GENSECK2 _IOWR(PKEY_IOCTL_MAGIC, 0x11, struct pkey_genseck2)
+
+/*
+ * Generate secure key from clear key value, version 2.
+ * Construct an CCA AES secure key, CCA AES cipher key or EP11 AES secure
+ * key from a given clear key value.
+ * There needs to be a list of apqns given with at least one entry in there.
+ * All apqns in the list need to be exact apqns, 0xFFFF as ANY card or domain
+ * is not supported. The implementation walks through the list of apqns and
+ * tries to send the request to each apqn without any further checking (like
+ * card type or online state). If the apqn fails, simple the next one in the
+ * list is tried until success (return 0) or the end of the list is reached
+ * (return -1 with errno ENODEV). You may use the PKEY_APQNS4KT ioctl to
+ * generate a list of apqns based on the key type to generate.
+ * The keygenflags argument is passed to the low level generation functions
+ * individual for the key type and has a key type specific meaning. When
+ * generating CCA cipher keys you can use one or more of the PKEY_KEYGEN_*
+ * flags to widen the export possibilities. By default a cipher key is
+ * only exportable for CPACF (PKEY_KEYGEN_XPRT_CPAC).
+ * The keygenflag argument for generating an EP11 AES key should either be 0
+ * to use the defaults which are XCP_BLOB_ENCRYPT, XCP_BLOB_DECRYPT and
+ * XCP_BLOB_PROTKEY_EXTRACTABLE or a valid combination of XCP_BLOB_* flags.
+ */
+struct pkey_clr2seck2 {
+	struct pkey_apqn __user *apqns; /* in: ptr to list of apqn targets */
+	__u32 apqn_entries;	    /* in: # of apqn target list entries   */
+	enum pkey_key_type type;    /* in: key type to generate		   */
+	enum pkey_key_size size;    /* in: key size to generate		   */
+	__u32 keygenflags;	    /* in: key generation flags		   */
+	struct pkey_clrkey clrkey;  /* in: the clear key value		   */
+	__u8 __user *key;	    /* in: pointer to key blob buffer	   */
+	__u32 keylen;		    /* in: available key blob buffer size  */
+				    /* out: actual key blob size	   */
+};
+#define PKEY_CLR2SECK2 _IOWR(PKEY_IOCTL_MAGIC, 0x12, struct pkey_clr2seck2)
+
+/*
+ * Verify the given secure key, version 2.
+ * Check for correct key type. If cardnr and domain are given (are not
+ * 0xFFFF) also check if this apqn is able to handle this type of key.
+ * If cardnr and/or domain is 0xFFFF, on return these values are filled
+ * with one apqn able to handle this key.
+ * The function also checks for the master key verification patterns
+ * of the key matching to the current or alternate mkvp of the apqn.
+ * For CCA AES secure keys and CCA AES cipher keys this means to check
+ * the key's mkvp against the current or old mkvp of the apqns. The flags
+ * field is updated with some additional info about the apqn mkvp
+ * match: If the current mkvp matches to the key's mkvp then the
+ * PKEY_FLAGS_MATCH_CUR_MKVP bit is set, if the alternate mkvp matches to
+ * the key's mkvp the PKEY_FLAGS_MATCH_ALT_MKVP is set. For CCA keys the
+ * alternate mkvp is the old master key verification pattern.
+ * CCA AES secure keys are also checked to have the CPACF export allowed
+ * bit enabled (XPRTCPAC) in the kmf1 field.
+ * EP11 keys are also supported and the wkvp of the key is checked against
+ * the current wkvp of the apqns. There is no alternate for this type of
+ * key and so on a match the flag PKEY_FLAGS_MATCH_CUR_MKVP always is set.
+ * EP11 keys are also checked to have XCP_BLOB_PROTKEY_EXTRACTABLE set.
+ * The ioctl returns 0 as long as the given or found apqn matches to
+ * matches with the current or alternate mkvp to the key's mkvp. If the given
+ * apqn does not match or there is no such apqn found, -1 with errno
+ * ENODEV is returned.
+ */
+struct pkey_verifykey2 {
+	__u8 __user *key;	    /* in: pointer to key blob		 */
+	__u32 keylen;		    /* in: key blob size		 */
+	__u16 cardnr;		    /* in/out: card number		 */
+	__u16 domain;		    /* in/out: domain number		 */
+	enum pkey_key_type type;    /* out: the key type		 */
+	enum pkey_key_size size;    /* out: the key size		 */
+	__u32 flags;		    /* out: additional key info flags	 */
+};
+#define PKEY_VERIFYKEY2 _IOWR(PKEY_IOCTL_MAGIC, 0x17, struct pkey_verifykey2)
+
+/*
+ * Transform a key blob into a protected key, version 2.
+ * There needs to be a list of apqns given with at least one entry in there.
+ * All apqns in the list need to be exact apqns, 0xFFFF as ANY card or domain
+ * is not supported. The implementation walks through the list of apqns and
+ * tries to send the request to each apqn without any further checking (like
+ * card type or online state). If the apqn fails, simple the next one in the
+ * list is tried until success (return 0) or the end of the list is reached
+ * (return -1 with errno ENODEV). You may use the PKEY_APQNS4K ioctl to
+ * generate a list of apqns based on the key.
+ * Deriving ECC protected keys from ECC secure keys is not supported with
+ * this ioctl, use PKEY_KBLOB2PROTK3 for this purpose.
+ */
+struct pkey_kblob2pkey2 {
+	__u8 __user *key;	     /* in: pointer to key blob		   */
+	__u32 keylen;		     /* in: key blob size		   */
+	struct pkey_apqn __user *apqns; /* in: ptr to list of apqn targets */
+	__u32 apqn_entries;	     /* in: # of apqn target list entries  */
+	struct pkey_protkey protkey; /* out: the protected key		   */
+};
+#define PKEY_KBLOB2PROTK2 _IOWR(PKEY_IOCTL_MAGIC, 0x1A, struct pkey_kblob2pkey2)
+
+/*
+ * Build a list of APQNs based on a key blob given.
+ * Is able to find out which type of secure key is given (CCA AES secure
+ * key, CCA AES cipher key, CCA ECC private key, EP11 AES key, EP11 ECC private
+ * key) and tries to find all matching crypto cards based on the MKVP and maybe
+ * other criteria (like CCA AES cipher keys need a CEX5C or higher, EP11 keys
+ * with BLOB_PKEY_EXTRACTABLE need a CEX7 and EP11 api version 4). The list of
+ * APQNs is further filtered by the key's mkvp which needs to match to either
+ * the current mkvp (CCA and EP11) or the alternate mkvp (old mkvp, CCA adapters
+ * only) of the apqns. The flags argument may be used to limit the matching
+ * apqns. If the PKEY_FLAGS_MATCH_CUR_MKVP is given, only the current mkvp of
+ * each apqn is compared. Likewise with the PKEY_FLAGS_MATCH_ALT_MKVP. If both
+ * are given, it is assumed to return apqns where either the current or the
+ * alternate mkvp matches. At least one of the matching flags needs to be given.
+ * The flags argument for EP11 keys has no further action and is currently
+ * ignored (but needs to be given as PKEY_FLAGS_MATCH_CUR_MKVP) as there is only
+ * the wkvp from the key to match against the apqn's wkvp.
+ * The list of matching apqns is stored into the space given by the apqns
+ * argument and the number of stored entries goes into apqn_entries. If the list
+ * is empty (apqn_entries is 0) the apqn_entries field is updated to the number
+ * of apqn targets found and the ioctl returns with 0. If apqn_entries is > 0
+ * but the number of apqn targets does not fit into the list, the apqn_targets
+ * field is updated with the number of required entries but there are no apqn
+ * values stored in the list and the ioctl returns with ENOSPC. If no matching
+ * APQN is found, the ioctl returns with 0 but the apqn_entries value is 0.
+ */
+struct pkey_apqns4key {
+	__u8 __user *key;	   /* in: pointer to key blob		      */
+	__u32 keylen;		   /* in: key blob size			      */
+	__u32 flags;		   /* in: match controlling flags	      */
+	struct pkey_apqn __user *apqns; /* in/out: ptr to list of apqn targets*/
+	__u32 apqn_entries;	   /* in: max # of apqn entries in the list   */
+				   /* out: # apqns stored into the list	      */
+};
+#define PKEY_APQNS4K _IOWR(PKEY_IOCTL_MAGIC, 0x1B, struct pkey_apqns4key)
+
+/*
+ * Build a list of APQNs based on a key type given.
+ * Build a list of APQNs based on a given key type and maybe further
+ * restrict the list by given master key verification patterns.
+ * For different key types there may be different ways to match the
+ * master key verification patterns. For CCA keys (CCA data key and CCA
+ * cipher key) the first 8 bytes of cur_mkvp refer to the current AES mkvp value
+ * of the apqn and the first 8 bytes of the alt_mkvp refer to the old AES mkvp.
+ * For CCA ECC keys it is similar but the match is against the APKA current/old
+ * mkvp. The flags argument controls if the apqns current and/or alternate mkvp
+ * should match. If the PKEY_FLAGS_MATCH_CUR_MKVP is given, only the current
+ * mkvp of each apqn is compared. Likewise with the PKEY_FLAGS_MATCH_ALT_MKVP.
+ * If both are given, it is assumed to return apqns where either the
+ * current or the alternate mkvp matches. If no match flag is given
+ * (flags is 0) the mkvp values are ignored for the match process.
+ * For EP11 keys there is only the current wkvp. So if the apqns should also
+ * match to a given wkvp, then the PKEY_FLAGS_MATCH_CUR_MKVP flag should be
+ * set. The wkvp value is 32 bytes but only the leftmost 16 bytes are compared
+ * against the leftmost 16 byte of the wkvp of the apqn.
+ * The list of matching apqns is stored into the space given by the apqns
+ * argument and the number of stored entries goes into apqn_entries. If the list
+ * is empty (apqn_entries is 0) the apqn_entries field is updated to the number
+ * of apqn targets found and the ioctl returns with 0. If apqn_entries is > 0
+ * but the number of apqn targets does not fit into the list, the apqn_targets
+ * field is updated with the number of required entries but there are no apqn
+ * values stored in the list and the ioctl returns with ENOSPC. If no matching
+ * APQN is found, the ioctl returns with 0 but the apqn_entries value is 0.
+ */
+struct pkey_apqns4keytype {
+	enum pkey_key_type type;   /* in: key type			      */
+	__u8  cur_mkvp[32];	   /* in: current mkvp			      */
+	__u8  alt_mkvp[32];	   /* in: alternate mkvp		      */
+	__u32 flags;		   /* in: match controlling flags	      */
+	struct pkey_apqn __user *apqns; /* in/out: ptr to list of apqn targets*/
+	__u32 apqn_entries;	   /* in: max # of apqn entries in the list   */
+				   /* out: # apqns stored into the list	      */
+};
+#define PKEY_APQNS4KT _IOWR(PKEY_IOCTL_MAGIC, 0x1C, struct pkey_apqns4keytype)
+
+/*
+ * Transform a key blob into a protected key, version 3.
+ * The difference to version 2 of this ioctl is that the protected key
+ * buffer is now explicitly and not within a struct pkey_protkey any more.
+ * So this ioctl is also able to handle EP11 and CCA ECC secure keys and
+ * provide ECC protected keys.
+ * There needs to be a list of apqns given with at least one entry in there.
+ * All apqns in the list need to be exact apqns, 0xFFFF as ANY card or domain
+ * is not supported. The implementation walks through the list of apqns and
+ * tries to send the request to each apqn without any further checking (like
+ * card type or online state). If the apqn fails, simple the next one in the
+ * list is tried until success (return 0) or the end of the list is reached
+ * (return -1 with errno ENODEV). You may use the PKEY_APQNS4K ioctl to
+ * generate a list of apqns based on the key.
+ */
+struct pkey_kblob2pkey3 {
+	__u8 __user *key;	     /* in: pointer to key blob		   */
+	__u32 keylen;		     /* in: key blob size		   */
+	struct pkey_apqn __user *apqns; /* in: ptr to list of apqn targets */
+	__u32 apqn_entries;	     /* in: # of apqn target list entries  */
+	__u32 pkeytype;		/* out: prot key type (enum pkey_key_type) */
+	__u32 pkeylen;	 /* in/out: size of pkey buffer/actual len of pkey */
+	__u8 __user *pkey;		 /* in: pkey blob buffer space ptr */
+};
+#define PKEY_KBLOB2PROTK3 _IOWR(PKEY_IOCTL_MAGIC, 0x1D, struct pkey_kblob2pkey3)
+
+#endif /* _UAPI_PKEY_H */
diff --git a/arch/s390/include/uapi/asm/posix_types.h b/arch/s390/include/uapi/asm/posix_types.h
new file mode 100644
index 0000000000..1913613e71
--- /dev/null
+++ b/arch/s390/include/uapi/asm/posix_types.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *  S390 version
+ *
+ */
+
+#ifndef __ARCH_S390_POSIX_TYPES_H
+#define __ARCH_S390_POSIX_TYPES_H
+
+/*
+ * This file is generally used by user-level software, so you need to
+ * be a little careful about namespace pollution etc.  Also, we cannot
+ * assume GCC is being used.
+ */
+
+typedef unsigned long   __kernel_size_t;
+typedef long            __kernel_ssize_t;
+#define __kernel_size_t __kernel_size_t
+
+typedef unsigned short	__kernel_old_dev_t;
+#define __kernel_old_dev_t __kernel_old_dev_t
+
+#ifdef __KERNEL__
+typedef unsigned short __kernel_old_uid_t;
+typedef unsigned short __kernel_old_gid_t;
+#define __kernel_old_uid_t __kernel_old_uid_t
+#endif
+
+#ifndef __s390x__
+
+typedef unsigned long   __kernel_ino_t;
+typedef unsigned short  __kernel_mode_t;
+typedef unsigned short  __kernel_ipc_pid_t;
+typedef unsigned short  __kernel_uid_t;
+typedef unsigned short  __kernel_gid_t;
+typedef int             __kernel_ptrdiff_t;
+
+#else /* __s390x__ */
+
+typedef unsigned int    __kernel_ino_t;
+typedef unsigned int    __kernel_mode_t;
+typedef int             __kernel_ipc_pid_t;
+typedef unsigned int    __kernel_uid_t;
+typedef unsigned int    __kernel_gid_t;
+typedef long            __kernel_ptrdiff_t;
+typedef unsigned long   __kernel_sigset_t;      /* at least 32 bits */
+
+#endif /* __s390x__ */
+
+#define __kernel_ino_t  __kernel_ino_t
+#define __kernel_mode_t __kernel_mode_t
+#define __kernel_ipc_pid_t __kernel_ipc_pid_t
+#define __kernel_uid_t __kernel_uid_t
+#define __kernel_gid_t __kernel_gid_t
+
+#include <asm-generic/posix_types.h>
+
+#endif
diff --git a/arch/s390/include/uapi/asm/ptrace.h b/arch/s390/include/uapi/asm/ptrace.h
new file mode 100644
index 0000000000..bb0826024b
--- /dev/null
+++ b/arch/s390/include/uapi/asm/ptrace.h
@@ -0,0 +1,455 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999, 2000
+ *    Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
+ */
+
+#ifndef _UAPI_S390_PTRACE_H
+#define _UAPI_S390_PTRACE_H
+
+#include <linux/const.h>
+
+/*
+ * Offsets in the user_regs_struct. They are used for the ptrace
+ * system call and in entry.S
+ */
+#ifndef __s390x__
+
+#define PT_PSWMASK  0x00
+#define PT_PSWADDR  0x04
+#define PT_GPR0     0x08
+#define PT_GPR1     0x0C
+#define PT_GPR2     0x10
+#define PT_GPR3     0x14
+#define PT_GPR4     0x18
+#define PT_GPR5     0x1C
+#define PT_GPR6     0x20
+#define PT_GPR7     0x24
+#define PT_GPR8     0x28
+#define PT_GPR9     0x2C
+#define PT_GPR10    0x30
+#define PT_GPR11    0x34
+#define PT_GPR12    0x38
+#define PT_GPR13    0x3C
+#define PT_GPR14    0x40
+#define PT_GPR15    0x44
+#define PT_ACR0     0x48
+#define PT_ACR1     0x4C
+#define PT_ACR2     0x50
+#define PT_ACR3     0x54
+#define PT_ACR4	    0x58
+#define PT_ACR5	    0x5C
+#define PT_ACR6	    0x60
+#define PT_ACR7	    0x64
+#define PT_ACR8	    0x68
+#define PT_ACR9	    0x6C
+#define PT_ACR10    0x70
+#define PT_ACR11    0x74
+#define PT_ACR12    0x78
+#define PT_ACR13    0x7C
+#define PT_ACR14    0x80
+#define PT_ACR15    0x84
+#define PT_ORIGGPR2 0x88
+#define PT_FPC	    0x90
+/*
+ * A nasty fact of life that the ptrace api
+ * only supports passing of longs.
+ */
+#define PT_FPR0_HI  0x98
+#define PT_FPR0_LO  0x9C
+#define PT_FPR1_HI  0xA0
+#define PT_FPR1_LO  0xA4
+#define PT_FPR2_HI  0xA8
+#define PT_FPR2_LO  0xAC
+#define PT_FPR3_HI  0xB0
+#define PT_FPR3_LO  0xB4
+#define PT_FPR4_HI  0xB8
+#define PT_FPR4_LO  0xBC
+#define PT_FPR5_HI  0xC0
+#define PT_FPR5_LO  0xC4
+#define PT_FPR6_HI  0xC8
+#define PT_FPR6_LO  0xCC
+#define PT_FPR7_HI  0xD0
+#define PT_FPR7_LO  0xD4
+#define PT_FPR8_HI  0xD8
+#define PT_FPR8_LO  0XDC
+#define PT_FPR9_HI  0xE0
+#define PT_FPR9_LO  0xE4
+#define PT_FPR10_HI 0xE8
+#define PT_FPR10_LO 0xEC
+#define PT_FPR11_HI 0xF0
+#define PT_FPR11_LO 0xF4
+#define PT_FPR12_HI 0xF8
+#define PT_FPR12_LO 0xFC
+#define PT_FPR13_HI 0x100
+#define PT_FPR13_LO 0x104
+#define PT_FPR14_HI 0x108
+#define PT_FPR14_LO 0x10C
+#define PT_FPR15_HI 0x110
+#define PT_FPR15_LO 0x114
+#define PT_CR_9	    0x118
+#define PT_CR_10    0x11C
+#define PT_CR_11    0x120
+#define PT_IEEE_IP  0x13C
+#define PT_LASTOFF  PT_IEEE_IP
+#define PT_ENDREGS  0x140-1
+
+#define GPR_SIZE	4
+#define CR_SIZE		4
+
+#define STACK_FRAME_OVERHEAD	96	/* size of minimum stack frame */
+
+#else /* __s390x__ */
+
+#define PT_PSWMASK  0x00
+#define PT_PSWADDR  0x08
+#define PT_GPR0     0x10
+#define PT_GPR1     0x18
+#define PT_GPR2     0x20
+#define PT_GPR3     0x28
+#define PT_GPR4     0x30
+#define PT_GPR5     0x38
+#define PT_GPR6     0x40
+#define PT_GPR7     0x48
+#define PT_GPR8     0x50
+#define PT_GPR9     0x58
+#define PT_GPR10    0x60
+#define PT_GPR11    0x68
+#define PT_GPR12    0x70
+#define PT_GPR13    0x78
+#define PT_GPR14    0x80
+#define PT_GPR15    0x88
+#define PT_ACR0     0x90
+#define PT_ACR1     0x94
+#define PT_ACR2     0x98
+#define PT_ACR3     0x9C
+#define PT_ACR4	    0xA0
+#define PT_ACR5	    0xA4
+#define PT_ACR6	    0xA8
+#define PT_ACR7	    0xAC
+#define PT_ACR8	    0xB0
+#define PT_ACR9	    0xB4
+#define PT_ACR10    0xB8
+#define PT_ACR11    0xBC
+#define PT_ACR12    0xC0
+#define PT_ACR13    0xC4
+#define PT_ACR14    0xC8
+#define PT_ACR15    0xCC
+#define PT_ORIGGPR2 0xD0
+#define PT_FPC	    0xD8
+#define PT_FPR0     0xE0
+#define PT_FPR1     0xE8
+#define PT_FPR2     0xF0
+#define PT_FPR3     0xF8
+#define PT_FPR4     0x100
+#define PT_FPR5     0x108
+#define PT_FPR6     0x110
+#define PT_FPR7     0x118
+#define PT_FPR8     0x120
+#define PT_FPR9     0x128
+#define PT_FPR10    0x130
+#define PT_FPR11    0x138
+#define PT_FPR12    0x140
+#define PT_FPR13    0x148
+#define PT_FPR14    0x150
+#define PT_FPR15    0x158
+#define PT_CR_9     0x160
+#define PT_CR_10    0x168
+#define PT_CR_11    0x170
+#define PT_IEEE_IP  0x1A8
+#define PT_LASTOFF  PT_IEEE_IP
+#define PT_ENDREGS  0x1B0-1
+
+#define GPR_SIZE	8
+#define CR_SIZE		8
+
+#define STACK_FRAME_OVERHEAD	160	 /* size of minimum stack frame */
+
+#endif /* __s390x__ */
+
+#ifndef __s390x__
+
+#define PSW_MASK_PER		_AC(0x40000000, UL)
+#define PSW_MASK_DAT		_AC(0x04000000, UL)
+#define PSW_MASK_IO		_AC(0x02000000, UL)
+#define PSW_MASK_EXT		_AC(0x01000000, UL)
+#define PSW_MASK_KEY		_AC(0x00F00000, UL)
+#define PSW_MASK_BASE		_AC(0x00080000, UL)	/* always one */
+#define PSW_MASK_MCHECK		_AC(0x00040000, UL)
+#define PSW_MASK_WAIT		_AC(0x00020000, UL)
+#define PSW_MASK_PSTATE		_AC(0x00010000, UL)
+#define PSW_MASK_ASC		_AC(0x0000C000, UL)
+#define PSW_MASK_CC		_AC(0x00003000, UL)
+#define PSW_MASK_PM		_AC(0x00000F00, UL)
+#define PSW_MASK_RI		_AC(0x00000000, UL)
+#define PSW_MASK_EA		_AC(0x00000000, UL)
+#define PSW_MASK_BA		_AC(0x00000000, UL)
+
+#define PSW_MASK_USER		_AC(0x0000FF00, UL)
+
+#define PSW_ADDR_AMODE		_AC(0x80000000, UL)
+#define PSW_ADDR_INSN		_AC(0x7FFFFFFF, UL)
+
+#define PSW_ASC_PRIMARY		_AC(0x00000000, UL)
+#define PSW_ASC_ACCREG		_AC(0x00004000, UL)
+#define PSW_ASC_SECONDARY	_AC(0x00008000, UL)
+#define PSW_ASC_HOME		_AC(0x0000C000, UL)
+
+#else /* __s390x__ */
+
+#define PSW_MASK_PER		_AC(0x4000000000000000, UL)
+#define PSW_MASK_DAT		_AC(0x0400000000000000, UL)
+#define PSW_MASK_IO		_AC(0x0200000000000000, UL)
+#define PSW_MASK_EXT		_AC(0x0100000000000000, UL)
+#define PSW_MASK_BASE		_AC(0x0000000000000000, UL)
+#define PSW_MASK_KEY		_AC(0x00F0000000000000, UL)
+#define PSW_MASK_MCHECK		_AC(0x0004000000000000, UL)
+#define PSW_MASK_WAIT		_AC(0x0002000000000000, UL)
+#define PSW_MASK_PSTATE		_AC(0x0001000000000000, UL)
+#define PSW_MASK_ASC		_AC(0x0000C00000000000, UL)
+#define PSW_MASK_CC		_AC(0x0000300000000000, UL)
+#define PSW_MASK_PM		_AC(0x00000F0000000000, UL)
+#define PSW_MASK_RI		_AC(0x0000008000000000, UL)
+#define PSW_MASK_EA		_AC(0x0000000100000000, UL)
+#define PSW_MASK_BA		_AC(0x0000000080000000, UL)
+
+#define PSW_MASK_USER		_AC(0x0000FF0180000000, UL)
+
+#define PSW_ADDR_AMODE		_AC(0x0000000000000000, UL)
+#define PSW_ADDR_INSN		_AC(0xFFFFFFFFFFFFFFFF, UL)
+
+#define PSW_ASC_PRIMARY		_AC(0x0000000000000000, UL)
+#define PSW_ASC_ACCREG		_AC(0x0000400000000000, UL)
+#define PSW_ASC_SECONDARY	_AC(0x0000800000000000, UL)
+#define PSW_ASC_HOME		_AC(0x0000C00000000000, UL)
+
+#endif /* __s390x__ */
+
+#define NUM_GPRS	16
+#define NUM_FPRS	16
+#define NUM_CRS		16
+#define NUM_ACRS	16
+
+#define NUM_CR_WORDS	3
+
+#define FPR_SIZE	8
+#define FPC_SIZE	4
+#define FPC_PAD_SIZE	4 /* gcc insists on aligning the fpregs */
+#define ACR_SIZE	4
+
+
+#define PTRACE_OLDSETOPTIONS		21
+#define PTRACE_SYSEMU			31
+#define PTRACE_SYSEMU_SINGLESTEP	32
+#ifndef __ASSEMBLY__
+#include <linux/stddef.h>
+#include <linux/types.h>
+
+typedef union {
+	float	f;
+	double	d;
+	__u64	ui;
+	struct
+	{
+		__u32 hi;
+		__u32 lo;
+	} fp;
+} freg_t;
+
+typedef struct {
+	__u32	fpc;
+	__u32	pad;
+	freg_t	fprs[NUM_FPRS];
+} s390_fp_regs;
+
+#define FPC_EXCEPTION_MASK	0xF8000000
+#define FPC_FLAGS_MASK		0x00F80000
+#define FPC_DXC_MASK		0x0000FF00
+#define FPC_RM_MASK		0x00000003
+
+/* this typedef defines how a Program Status Word looks like */
+typedef struct {
+	unsigned long mask;
+	unsigned long addr;
+} __attribute__ ((aligned(8))) psw_t;
+
+/*
+ * The s390_regs structure is used to define the elf_gregset_t.
+ */
+typedef struct {
+	psw_t psw;
+	unsigned long gprs[NUM_GPRS];
+	unsigned int  acrs[NUM_ACRS];
+	unsigned long orig_gpr2;
+} s390_regs;
+
+/*
+ * The user_pt_regs structure exports the beginning of
+ * the in-kernel pt_regs structure to user space.
+ */
+typedef struct {
+	unsigned long args[1];
+	psw_t psw;
+	unsigned long gprs[NUM_GPRS];
+} user_pt_regs;
+
+/*
+ * Now for the user space program event recording (trace) definitions.
+ * The following structures are used only for the ptrace interface, don't
+ * touch or even look at it if you don't want to modify the user-space
+ * ptrace interface. In particular stay away from it for in-kernel PER.
+ */
+typedef struct {
+	unsigned long cr[NUM_CR_WORDS];
+} per_cr_words;
+
+#define PER_EM_MASK 0xE8000000UL
+
+typedef struct {
+#ifdef __s390x__
+	unsigned		       : 32;
+#endif /* __s390x__ */
+	unsigned em_branching	       : 1;
+	unsigned em_instruction_fetch  : 1;
+	/*
+	 * Switching on storage alteration automatically fixes
+	 * the storage alteration event bit in the users std.
+	 */
+	unsigned em_storage_alteration : 1;
+	unsigned em_gpr_alt_unused     : 1;
+	unsigned em_store_real_address : 1;
+	unsigned		       : 3;
+	unsigned branch_addr_ctl       : 1;
+	unsigned		       : 1;
+	unsigned storage_alt_space_ctl : 1;
+	unsigned		       : 21;
+	unsigned long starting_addr;
+	unsigned long ending_addr;
+} per_cr_bits;
+
+typedef struct {
+	unsigned short perc_atmid;
+	unsigned long address;
+	unsigned char access_id;
+} per_lowcore_words;
+
+typedef struct {
+	unsigned perc_branching		 : 1;
+	unsigned perc_instruction_fetch  : 1;
+	unsigned perc_storage_alteration : 1;
+	unsigned perc_gpr_alt_unused	 : 1;
+	unsigned perc_store_real_address : 1;
+	unsigned			 : 3;
+	unsigned atmid_psw_bit_31	 : 1;
+	unsigned atmid_validity_bit	 : 1;
+	unsigned atmid_psw_bit_32	 : 1;
+	unsigned atmid_psw_bit_5	 : 1;
+	unsigned atmid_psw_bit_16	 : 1;
+	unsigned atmid_psw_bit_17	 : 1;
+	unsigned si			 : 2;
+	unsigned long address;
+	unsigned			 : 4;
+	unsigned access_id		 : 4;
+} per_lowcore_bits;
+
+typedef struct {
+	union {
+		per_cr_words   words;
+		per_cr_bits    bits;
+	} control_regs;
+	/*
+	 * The single_step and instruction_fetch bits are obsolete,
+	 * the kernel always sets them to zero. To enable single
+	 * stepping use ptrace(PTRACE_SINGLESTEP) instead.
+	 */
+	unsigned  single_step	    : 1;
+	unsigned  instruction_fetch : 1;
+	unsigned		    : 30;
+	/*
+	 * These addresses are copied into cr10 & cr11 if single
+	 * stepping is switched off
+	 */
+	unsigned long starting_addr;
+	unsigned long ending_addr;
+	union {
+		per_lowcore_words words;
+		per_lowcore_bits  bits;
+	} lowcore;
+} per_struct;
+
+typedef struct {
+	unsigned int  len;
+	unsigned long kernel_addr;
+	unsigned long process_addr;
+} ptrace_area;
+
+/*
+ * S/390 specific non posix ptrace requests. I chose unusual values so
+ * they are unlikely to clash with future ptrace definitions.
+ */
+#define PTRACE_PEEKUSR_AREA	      0x5000
+#define PTRACE_POKEUSR_AREA	      0x5001
+#define PTRACE_PEEKTEXT_AREA	      0x5002
+#define PTRACE_PEEKDATA_AREA	      0x5003
+#define PTRACE_POKETEXT_AREA	      0x5004
+#define PTRACE_POKEDATA_AREA	      0x5005
+#define PTRACE_GET_LAST_BREAK	      0x5006
+#define PTRACE_PEEK_SYSTEM_CALL       0x5007
+#define PTRACE_POKE_SYSTEM_CALL	      0x5008
+#define PTRACE_ENABLE_TE	      0x5009
+#define PTRACE_DISABLE_TE	      0x5010
+#define PTRACE_TE_ABORT_RAND	      0x5011
+
+/*
+ * The numbers chosen here are somewhat arbitrary but absolutely MUST
+ * not overlap with any of the number assigned in <linux/ptrace.h>.
+ */
+#define PTRACE_SINGLEBLOCK	12	/* resume execution until next branch */
+
+/*
+ * PT_PROT definition is loosely based on hppa bsd definition in
+ * gdb/hppab-nat.c
+ */
+#define PTRACE_PROT			  21
+
+typedef enum {
+	ptprot_set_access_watchpoint,
+	ptprot_set_write_watchpoint,
+	ptprot_disable_watchpoint
+} ptprot_flags;
+
+typedef struct {
+	unsigned long lowaddr;
+	unsigned long hiaddr;
+	ptprot_flags prot;
+} ptprot_area;
+
+/* Sequence of bytes for breakpoint illegal instruction.  */
+#define S390_BREAKPOINT     {0x0,0x1}
+#define S390_BREAKPOINT_U16 ((__u16)0x0001)
+#define S390_SYSCALL_OPCODE ((__u16)0x0a00)
+#define S390_SYSCALL_SIZE   2
+
+/*
+ * The user_regs_struct defines the way the user registers are
+ * store on the stack for signal handling.
+ */
+struct user_regs_struct {
+	psw_t psw;
+	unsigned long gprs[NUM_GPRS];
+	unsigned int  acrs[NUM_ACRS];
+	unsigned long orig_gpr2;
+	s390_fp_regs fp_regs;
+	/*
+	 * These per registers are in here so that gdb can modify them
+	 * itself as there is no "official" ptrace interface for hardware
+	 * watchpoints. This is the way intel does it.
+	 */
+	per_struct per_info;
+	unsigned long ieee_instruction_pointer;	/* obsolete, always 0 */
+};
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _UAPI_S390_PTRACE_H */
diff --git a/arch/s390/include/uapi/asm/qeth.h b/arch/s390/include/uapi/asm/qeth.h
new file mode 100644
index 0000000000..fac9995dfe
--- /dev/null
+++ b/arch/s390/include/uapi/asm/qeth.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * ioctl definitions for qeth driver
+ *
+ * Copyright IBM Corp. 2004
+ *
+ * Author(s):	Thomas Spatzier <tspat@de.ibm.com>
+ *
+ */
+#ifndef __ASM_S390_QETH_IOCTL_H__
+#define __ASM_S390_QETH_IOCTL_H__
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define SIOC_QETH_ARP_SET_NO_ENTRIES    (SIOCDEVPRIVATE)
+#define SIOC_QETH_ARP_QUERY_INFO        (SIOCDEVPRIVATE + 1)
+#define SIOC_QETH_ARP_ADD_ENTRY         (SIOCDEVPRIVATE + 2)
+#define SIOC_QETH_ARP_REMOVE_ENTRY      (SIOCDEVPRIVATE + 3)
+#define SIOC_QETH_ARP_FLUSH_CACHE       (SIOCDEVPRIVATE + 4)
+#define SIOC_QETH_ADP_SET_SNMP_CONTROL  (SIOCDEVPRIVATE + 5)
+#define SIOC_QETH_GET_CARD_TYPE         (SIOCDEVPRIVATE + 6)
+#define SIOC_QETH_QUERY_OAT		(SIOCDEVPRIVATE + 7)
+
+struct qeth_arp_cache_entry {
+	__u8  macaddr[6];
+	__u8  reserved1[2];
+	__u8  ipaddr[16]; /* for both  IPv4 and IPv6 */
+	__u8  reserved2[32];
+} __attribute__ ((packed));
+
+enum qeth_arp_ipaddrtype {
+	QETHARP_IP_ADDR_V4 = 1,
+	QETHARP_IP_ADDR_V6 = 2,
+};
+struct qeth_arp_entrytype {
+	__u8 mac;
+	__u8 ip;
+} __attribute__((packed));
+
+#define QETH_QARP_MEDIASPECIFIC_BYTES 32
+#define QETH_QARP_MACADDRTYPE_BYTES 1
+struct qeth_arp_qi_entry7 {
+	__u8 media_specific[QETH_QARP_MEDIASPECIFIC_BYTES];
+	struct qeth_arp_entrytype type;
+	__u8 macaddr[6];
+	__u8 ipaddr[4];
+} __attribute__((packed));
+
+struct qeth_arp_qi_entry7_ipv6 {
+	__u8 media_specific[QETH_QARP_MEDIASPECIFIC_BYTES];
+	struct qeth_arp_entrytype type;
+	__u8 macaddr[6];
+	__u8 ipaddr[16];
+} __attribute__((packed));
+
+struct qeth_arp_qi_entry7_short {
+	struct qeth_arp_entrytype type;
+	__u8 macaddr[6];
+	__u8 ipaddr[4];
+} __attribute__((packed));
+
+struct qeth_arp_qi_entry7_short_ipv6 {
+	struct qeth_arp_entrytype type;
+	__u8 macaddr[6];
+	__u8 ipaddr[16];
+} __attribute__((packed));
+
+struct qeth_arp_qi_entry5 {
+	__u8 media_specific[QETH_QARP_MEDIASPECIFIC_BYTES];
+	struct qeth_arp_entrytype type;
+	__u8 ipaddr[4];
+} __attribute__((packed));
+
+struct qeth_arp_qi_entry5_ipv6 {
+	__u8 media_specific[QETH_QARP_MEDIASPECIFIC_BYTES];
+	struct qeth_arp_entrytype type;
+	__u8 ipaddr[16];
+} __attribute__((packed));
+
+struct qeth_arp_qi_entry5_short {
+	struct qeth_arp_entrytype type;
+	__u8 ipaddr[4];
+} __attribute__((packed));
+
+struct qeth_arp_qi_entry5_short_ipv6 {
+	struct qeth_arp_entrytype type;
+	__u8 ipaddr[16];
+} __attribute__((packed));
+/*
+ * can be set by user if no "media specific information" is wanted
+ * -> saves a lot of space in user space buffer
+ */
+#define QETH_QARP_STRIP_ENTRIES  0x8000
+#define QETH_QARP_WITH_IPV6	 0x4000
+#define QETH_QARP_REQUEST_MASK   0x00ff
+
+/* data sent to user space as result of query arp ioctl */
+#define QETH_QARP_USER_DATA_SIZE 20000
+#define QETH_QARP_MASK_OFFSET    4
+#define QETH_QARP_ENTRIES_OFFSET 6
+struct qeth_arp_query_user_data {
+	union {
+		__u32 data_len;		/* set by user space program */
+		__u32 no_entries;	/* set by kernel */
+	} u;
+	__u16 mask_bits;
+	char *entries;
+} __attribute__((packed));
+
+struct qeth_query_oat_data {
+	__u32 command;
+	__u32 buffer_len;
+	__u32 response_len;
+	__u64 ptr;
+};
+#endif /* __ASM_S390_QETH_IOCTL_H__ */
diff --git a/arch/s390/include/uapi/asm/raw3270.h b/arch/s390/include/uapi/asm/raw3270.h
new file mode 100644
index 0000000000..6676f102bd
--- /dev/null
+++ b/arch/s390/include/uapi/asm/raw3270.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ASM_S390_UAPI_RAW3270_H
+#define __ASM_S390_UAPI_RAW3270_H
+
+/* Local Channel Commands */
+#define TC_WRITE	0x01		/* Write */
+#define TC_RDBUF	0x02		/* Read Buffer */
+#define TC_EWRITE	0x05		/* Erase write */
+#define TC_READMOD	0x06		/* Read modified */
+#define TC_EWRITEA	0x0d		/* Erase write alternate */
+#define TC_WRITESF	0x11		/* Write structured field */
+
+/* Buffer Control Orders */
+#define TO_GE		0x08		/* Graphics Escape */
+#define TO_SF		0x1d		/* Start field */
+#define TO_SBA		0x11		/* Set buffer address */
+#define TO_IC		0x13		/* Insert cursor */
+#define TO_PT		0x05		/* Program tab */
+#define TO_RA		0x3c		/* Repeat to address */
+#define TO_SFE		0x29		/* Start field extended */
+#define TO_EUA		0x12		/* Erase unprotected to address */
+#define TO_MF		0x2c		/* Modify field */
+#define TO_SA		0x28		/* Set attribute */
+
+/* Field Attribute Bytes */
+#define TF_INPUT	0x40		/* Visible input */
+#define TF_INPUTN	0x4c		/* Invisible input */
+#define TF_INMDT	0xc1		/* Visible, Set-MDT */
+#define TF_LOG		0x60
+
+/* Character Attribute Bytes */
+#define TAT_RESET	0x00
+#define TAT_FIELD	0xc0
+#define TAT_EXTHI	0x41
+#define TAT_FGCOLOR	0x42
+#define TAT_CHARS	0x43
+#define TAT_BGCOLOR	0x45
+#define TAT_TRANS	0x46
+
+/* Extended-Highlighting Bytes */
+#define TAX_RESET	0x00
+#define TAX_BLINK	0xf1
+#define TAX_REVER	0xf2
+#define TAX_UNDER	0xf4
+
+/* Reset value */
+#define TAR_RESET	0x00
+
+/* Color values */
+#define TAC_RESET	0x00
+#define TAC_BLUE	0xf1
+#define TAC_RED		0xf2
+#define TAC_PINK	0xf3
+#define TAC_GREEN	0xf4
+#define TAC_TURQ	0xf5
+#define TAC_YELLOW	0xf6
+#define TAC_WHITE	0xf7
+#define TAC_DEFAULT	0x00
+
+/* Write Control Characters */
+#define TW_NONE		0x40		/* No particular action */
+#define TW_KR		0xc2		/* Keyboard restore */
+#define TW_PLUSALARM	0x04		/* Add this bit for alarm */
+
+#define RAW3270_FIRSTMINOR	1	/* First minor number */
+#define RAW3270_MAXDEVS		255	/* Max number of 3270 devices */
+
+#define AID_CLEAR		0x6d
+#define AID_ENTER		0x7d
+#define AID_PF3			0xf3
+#define AID_PF7			0xf7
+#define AID_PF8			0xf8
+#define AID_READ_PARTITION	0x88
+
+#endif /* __ASM_S390_UAPI_RAW3270_H */
diff --git a/arch/s390/include/uapi/asm/runtime_instr.h b/arch/s390/include/uapi/asm/runtime_instr.h
new file mode 100644
index 0000000000..455da46e31
--- /dev/null
+++ b/arch/s390/include/uapi/asm/runtime_instr.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _S390_UAPI_RUNTIME_INSTR_H
+#define _S390_UAPI_RUNTIME_INSTR_H
+
+#include <linux/types.h>
+
+#define S390_RUNTIME_INSTR_START	0x1
+#define S390_RUNTIME_INSTR_STOP		0x2
+
+struct runtime_instr_cb {
+	__u64 rca;
+	__u64 roa;
+	__u64 rla;
+
+	__u32 v			: 1;
+	__u32 s			: 1;
+	__u32 k			: 1;
+	__u32 h			: 1;
+	__u32 a			: 1;
+	__u32 reserved1		: 3;
+	__u32 ps		: 1;
+	__u32 qs		: 1;
+	__u32 pc		: 1;
+	__u32 qc		: 1;
+	__u32 reserved2		: 1;
+	__u32 g			: 1;
+	__u32 u			: 1;
+	__u32 l			: 1;
+	__u32 key		: 4;
+	__u32 reserved3		: 8;
+	__u32 t			: 1;
+	__u32 rgs		: 3;
+
+	__u32 m			: 4;
+	__u32 n			: 1;
+	__u32 mae		: 1;
+	__u32 reserved4		: 2;
+	__u32 c			: 1;
+	__u32 r			: 1;
+	__u32 b			: 1;
+	__u32 j			: 1;
+	__u32 e			: 1;
+	__u32 x			: 1;
+	__u32 reserved5		: 2;
+	__u32 bpxn		: 1;
+	__u32 bpxt		: 1;
+	__u32 bpti		: 1;
+	__u32 bpni		: 1;
+	__u32 reserved6		: 2;
+
+	__u32 d			: 1;
+	__u32 f			: 1;
+	__u32 ic		: 4;
+	__u32 dc		: 4;
+
+	__u64 reserved7;
+	__u64 sf;
+	__u64 rsic;
+	__u64 reserved8;
+} __attribute__((__packed__, __aligned__(8)));
+
+static inline void load_runtime_instr_cb(struct runtime_instr_cb *cb)
+{
+	asm volatile(".insn	rsy,0xeb0000000060,0,0,%0"	/* LRIC */
+		: : "Q" (*cb));
+}
+
+static inline void store_runtime_instr_cb(struct runtime_instr_cb *cb)
+{
+	asm volatile(".insn	rsy,0xeb0000000061,0,0,%0"	/* STRIC */
+		: "=Q" (*cb) : : "cc");
+}
+
+#endif /* _S390_UAPI_RUNTIME_INSTR_H */
diff --git a/arch/s390/include/uapi/asm/schid.h b/arch/s390/include/uapi/asm/schid.h
new file mode 100644
index 0000000000..a3e1cf1685
--- /dev/null
+++ b/arch/s390/include/uapi/asm/schid.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPIASM_SCHID_H
+#define _UAPIASM_SCHID_H
+
+#include <linux/types.h>
+
+#ifndef __ASSEMBLY__
+
+struct subchannel_id {
+	__u32 cssid : 8;
+	__u32 : 4;
+	__u32 m : 1;
+	__u32 ssid : 2;
+	__u32 one : 1;
+	__u32 sch_no : 16;
+} __attribute__ ((packed, aligned(4)));
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _UAPIASM_SCHID_H */
diff --git a/arch/s390/include/uapi/asm/sclp_ctl.h b/arch/s390/include/uapi/asm/sclp_ctl.h
new file mode 100644
index 0000000000..e4e8c4dcd1
--- /dev/null
+++ b/arch/s390/include/uapi/asm/sclp_ctl.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * IOCTL interface for SCLP
+ *
+ * Copyright IBM Corp. 2012
+ *
+ * Author: Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#ifndef _ASM_SCLP_CTL_H
+#define _ASM_SCLP_CTL_H
+
+#include <linux/types.h>
+
+struct sclp_ctl_sccb {
+	__u32	cmdw;
+	__u64	sccb;
+} __attribute__((packed));
+
+#define SCLP_CTL_IOCTL_MAGIC 0x10
+
+#define SCLP_CTL_SCCB \
+	_IOWR(SCLP_CTL_IOCTL_MAGIC, 0x10, struct sclp_ctl_sccb)
+
+#endif
diff --git a/arch/s390/include/uapi/asm/setup.h b/arch/s390/include/uapi/asm/setup.h
new file mode 100644
index 0000000000..598d769e76
--- /dev/null
+++ b/arch/s390/include/uapi/asm/setup.h
@@ -0,0 +1 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
diff --git a/arch/s390/include/uapi/asm/sie.h b/arch/s390/include/uapi/asm/sie.h
new file mode 100644
index 0000000000..ede318653c
--- /dev/null
+++ b/arch/s390/include/uapi/asm/sie.h
@@ -0,0 +1,252 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_ASM_S390_SIE_H
+#define _UAPI_ASM_S390_SIE_H
+
+#define diagnose_codes						\
+	{ 0x10, "DIAG (0x10) release pages" },			\
+	{ 0x44, "DIAG (0x44) time slice end" },			\
+	{ 0x9c, "DIAG (0x9c) time slice end directed" },	\
+	{ 0x204, "DIAG (0x204) logical-cpu utilization" },	\
+	{ 0x258, "DIAG (0x258) page-reference services" },	\
+	{ 0x288, "DIAG (0x288) watchdog functions" },		\
+	{ 0x308, "DIAG (0x308) ipl functions" },		\
+	{ 0x500, "DIAG (0x500) KVM virtio functions" },		\
+	{ 0x501, "DIAG (0x501) KVM breakpoint" }
+
+#define sigp_order_codes					\
+	{ 0x01, "SIGP sense" },					\
+	{ 0x02, "SIGP external call" },				\
+	{ 0x03, "SIGP emergency signal" },			\
+	{ 0x04, "SIGP start" },					\
+	{ 0x05, "SIGP stop" },					\
+	{ 0x06, "SIGP restart" },				\
+	{ 0x09, "SIGP stop and store status" },			\
+	{ 0x0b, "SIGP initial cpu reset" },			\
+	{ 0x0c, "SIGP cpu reset" },				\
+	{ 0x0d, "SIGP set prefix" },				\
+	{ 0x0e, "SIGP store status at address" },		\
+	{ 0x12, "SIGP set architecture" },			\
+	{ 0x13, "SIGP conditional emergency signal" },		\
+	{ 0x15, "SIGP sense running" },				\
+	{ 0x16, "SIGP set multithreading"},			\
+	{ 0x17, "SIGP store additional status at address"}
+
+#define icpt_prog_codes						\
+	{ 0x0001, "Prog Operation" },				\
+	{ 0x0002, "Prog Privileged Operation" },		\
+	{ 0x0003, "Prog Execute" },				\
+	{ 0x0004, "Prog Protection" },				\
+	{ 0x0005, "Prog Addressing" },				\
+	{ 0x0006, "Prog Specification" },			\
+	{ 0x0007, "Prog Data" },				\
+	{ 0x0008, "Prog Fixedpoint overflow" },			\
+	{ 0x0009, "Prog Fixedpoint divide" },			\
+	{ 0x000A, "Prog Decimal overflow" },			\
+	{ 0x000B, "Prog Decimal divide" },			\
+	{ 0x000C, "Prog HFP exponent overflow" },		\
+	{ 0x000D, "Prog HFP exponent underflow" },		\
+	{ 0x000E, "Prog HFP significance" },			\
+	{ 0x000F, "Prog HFP divide" },				\
+	{ 0x0010, "Prog Segment translation" },			\
+	{ 0x0011, "Prog Page translation" },			\
+	{ 0x0012, "Prog Translation specification" },		\
+	{ 0x0013, "Prog Special operation" },			\
+	{ 0x0015, "Prog Operand" },				\
+	{ 0x0016, "Prog Trace table" },				\
+	{ 0x0017, "Prog ASNtranslation specification" },	\
+	{ 0x001C, "Prog Spaceswitch event" },			\
+	{ 0x001D, "Prog HFP square root" },			\
+	{ 0x001F, "Prog PCtranslation specification" },		\
+	{ 0x0020, "Prog AFX translation" },			\
+	{ 0x0021, "Prog ASX translation" },			\
+	{ 0x0022, "Prog LX translation" },			\
+	{ 0x0023, "Prog EX translation" },			\
+	{ 0x0024, "Prog Primary authority" },			\
+	{ 0x0025, "Prog Secondary authority" },			\
+	{ 0x0026, "Prog LFXtranslation exception" },		\
+	{ 0x0027, "Prog LSXtranslation exception" },		\
+	{ 0x0028, "Prog ALET specification" },			\
+	{ 0x0029, "Prog ALEN translation" },			\
+	{ 0x002A, "Prog ALE sequence" },			\
+	{ 0x002B, "Prog ASTE validity" },			\
+	{ 0x002C, "Prog ASTE sequence" },			\
+	{ 0x002D, "Prog Extended authority" },			\
+	{ 0x002E, "Prog LSTE sequence" },			\
+	{ 0x002F, "Prog ASTE instance" },			\
+	{ 0x0030, "Prog Stack full" },				\
+	{ 0x0031, "Prog Stack empty" },				\
+	{ 0x0032, "Prog Stack specification" },			\
+	{ 0x0033, "Prog Stack type" },				\
+	{ 0x0034, "Prog Stack operation" },			\
+	{ 0x0039, "Prog Region first translation" },		\
+	{ 0x003A, "Prog Region second translation" },		\
+	{ 0x003B, "Prog Region third translation" },		\
+	{ 0x0040, "Prog Monitor event" },			\
+	{ 0x0080, "Prog PER event" },				\
+	{ 0x0119, "Prog Crypto operation" }
+
+#define exit_code_ipa0(ipa0, opcode, mnemonic)		\
+	{ (ipa0 << 8 | opcode), #ipa0 " " mnemonic }
+#define exit_code(opcode, mnemonic)			\
+	{ opcode, mnemonic }
+
+#define icpt_insn_codes				\
+	exit_code_ipa0(0x01, 0x01, "PR"),	\
+	exit_code_ipa0(0x01, 0x04, "PTFF"),	\
+	exit_code_ipa0(0x01, 0x07, "SCKPF"),	\
+	exit_code_ipa0(0xAA, 0x00, "RINEXT"),	\
+	exit_code_ipa0(0xAA, 0x01, "RION"),	\
+	exit_code_ipa0(0xAA, 0x02, "TRIC"),	\
+	exit_code_ipa0(0xAA, 0x03, "RIOFF"),	\
+	exit_code_ipa0(0xAA, 0x04, "RIEMIT"),	\
+	exit_code_ipa0(0xB2, 0x02, "STIDP"),	\
+	exit_code_ipa0(0xB2, 0x04, "SCK"),	\
+	exit_code_ipa0(0xB2, 0x05, "STCK"),	\
+	exit_code_ipa0(0xB2, 0x06, "SCKC"),	\
+	exit_code_ipa0(0xB2, 0x07, "STCKC"),	\
+	exit_code_ipa0(0xB2, 0x08, "SPT"),	\
+	exit_code_ipa0(0xB2, 0x09, "STPT"),	\
+	exit_code_ipa0(0xB2, 0x0d, "PTLB"),	\
+	exit_code_ipa0(0xB2, 0x10, "SPX"),	\
+	exit_code_ipa0(0xB2, 0x11, "STPX"),	\
+	exit_code_ipa0(0xB2, 0x12, "STAP"),	\
+	exit_code_ipa0(0xB2, 0x14, "SIE"),	\
+	exit_code_ipa0(0xB2, 0x16, "SETR"),	\
+	exit_code_ipa0(0xB2, 0x17, "STETR"),	\
+	exit_code_ipa0(0xB2, 0x18, "PC"),	\
+	exit_code_ipa0(0xB2, 0x20, "SERVC"),	\
+	exit_code_ipa0(0xB2, 0x21, "IPTE"),	\
+	exit_code_ipa0(0xB2, 0x28, "PT"),	\
+	exit_code_ipa0(0xB2, 0x29, "ISKE"),	\
+	exit_code_ipa0(0xB2, 0x2a, "RRBE"),	\
+	exit_code_ipa0(0xB2, 0x2b, "SSKE"),	\
+	exit_code_ipa0(0xB2, 0x2c, "TB"),	\
+	exit_code_ipa0(0xB2, 0x2e, "PGIN"),	\
+	exit_code_ipa0(0xB2, 0x2f, "PGOUT"),	\
+	exit_code_ipa0(0xB2, 0x30, "CSCH"),	\
+	exit_code_ipa0(0xB2, 0x31, "HSCH"),	\
+	exit_code_ipa0(0xB2, 0x32, "MSCH"),	\
+	exit_code_ipa0(0xB2, 0x33, "SSCH"),	\
+	exit_code_ipa0(0xB2, 0x34, "STSCH"),	\
+	exit_code_ipa0(0xB2, 0x35, "TSCH"),	\
+	exit_code_ipa0(0xB2, 0x36, "TPI"),	\
+	exit_code_ipa0(0xB2, 0x37, "SAL"),	\
+	exit_code_ipa0(0xB2, 0x38, "RSCH"),	\
+	exit_code_ipa0(0xB2, 0x39, "STCRW"),	\
+	exit_code_ipa0(0xB2, 0x3a, "STCPS"),	\
+	exit_code_ipa0(0xB2, 0x3b, "RCHP"),	\
+	exit_code_ipa0(0xB2, 0x3c, "SCHM"),	\
+	exit_code_ipa0(0xB2, 0x40, "BAKR"),	\
+	exit_code_ipa0(0xB2, 0x48, "PALB"),	\
+	exit_code_ipa0(0xB2, 0x4c, "TAR"),	\
+	exit_code_ipa0(0xB2, 0x50, "CSP"),	\
+	exit_code_ipa0(0xB2, 0x54, "MVPG"),	\
+	exit_code_ipa0(0xB2, 0x56, "STHYI"),	\
+	exit_code_ipa0(0xB2, 0x58, "BSG"),	\
+	exit_code_ipa0(0xB2, 0x5a, "BSA"),	\
+	exit_code_ipa0(0xB2, 0x5f, "CHSC"),	\
+	exit_code_ipa0(0xB2, 0x74, "SIGA"),	\
+	exit_code_ipa0(0xB2, 0x76, "XSCH"),	\
+	exit_code_ipa0(0xB2, 0x78, "STCKE"),	\
+	exit_code_ipa0(0xB2, 0x7c, "STCKF"),	\
+	exit_code_ipa0(0xB2, 0x7d, "STSI"),	\
+	exit_code_ipa0(0xB2, 0xb0, "STFLE"),	\
+	exit_code_ipa0(0xB2, 0xb1, "STFL"),	\
+	exit_code_ipa0(0xB2, 0xb2, "LPSWE"),	\
+	exit_code_ipa0(0xB2, 0xf8, "TEND"),	\
+	exit_code_ipa0(0xB2, 0xfc, "TABORT"),	\
+	exit_code_ipa0(0xB9, 0x1e, "KMAC"),	\
+	exit_code_ipa0(0xB9, 0x28, "PCKMO"),	\
+	exit_code_ipa0(0xB9, 0x2a, "KMF"),	\
+	exit_code_ipa0(0xB9, 0x2b, "KMO"),	\
+	exit_code_ipa0(0xB9, 0x2d, "KMCTR"),	\
+	exit_code_ipa0(0xB9, 0x2e, "KM"),	\
+	exit_code_ipa0(0xB9, 0x2f, "KMC"),	\
+	exit_code_ipa0(0xB9, 0x3e, "KIMD"),	\
+	exit_code_ipa0(0xB9, 0x3f, "KLMD"),	\
+	exit_code_ipa0(0xB9, 0x8a, "CSPG"),	\
+	exit_code_ipa0(0xB9, 0x8d, "EPSW"),	\
+	exit_code_ipa0(0xB9, 0x8e, "IDTE"),	\
+	exit_code_ipa0(0xB9, 0x8f, "CRDTE"),	\
+	exit_code_ipa0(0xB9, 0x9c, "EQBS"),	\
+	exit_code_ipa0(0xB9, 0xa2, "PTF"),	\
+	exit_code_ipa0(0xB9, 0xab, "ESSA"),	\
+	exit_code_ipa0(0xB9, 0xae, "RRBM"),	\
+	exit_code_ipa0(0xB9, 0xaf, "PFMF"),	\
+	exit_code_ipa0(0xE3, 0x03, "LRAG"),	\
+	exit_code_ipa0(0xE3, 0x13, "LRAY"),	\
+	exit_code_ipa0(0xE3, 0x25, "NTSTG"),	\
+	exit_code_ipa0(0xE5, 0x00, "LASP"),	\
+	exit_code_ipa0(0xE5, 0x01, "TPROT"),	\
+	exit_code_ipa0(0xE5, 0x60, "TBEGIN"),	\
+	exit_code_ipa0(0xE5, 0x61, "TBEGINC"),	\
+	exit_code_ipa0(0xEB, 0x25, "STCTG"),	\
+	exit_code_ipa0(0xEB, 0x2f, "LCTLG"),	\
+	exit_code_ipa0(0xEB, 0x60, "LRIC"),	\
+	exit_code_ipa0(0xEB, 0x61, "STRIC"),	\
+	exit_code_ipa0(0xEB, 0x62, "MRIC"),	\
+	exit_code_ipa0(0xEB, 0x8a, "SQBS"),	\
+	exit_code_ipa0(0xC8, 0x01, "ECTG"),	\
+	exit_code(0x0a, "SVC"),			\
+	exit_code(0x80, "SSM"),			\
+	exit_code(0x82, "LPSW"),		\
+	exit_code(0x83, "DIAG"),		\
+	exit_code(0xae, "SIGP"),		\
+	exit_code(0xac, "STNSM"),		\
+	exit_code(0xad, "STOSM"),		\
+	exit_code(0xb1, "LRA"),			\
+	exit_code(0xb6, "STCTL"),		\
+	exit_code(0xb7, "LCTL"),		\
+	exit_code(0xee, "PLO")
+
+#define sie_intercept_code					\
+	{ 0x00, "Host interruption" },				\
+	{ 0x04, "Instruction" },				\
+	{ 0x08, "Program interruption" },			\
+	{ 0x0c, "Instruction and program interruption" },	\
+	{ 0x10, "External request" },				\
+	{ 0x14, "External interruption" },			\
+	{ 0x18, "I/O request" },				\
+	{ 0x1c, "Wait state" },					\
+	{ 0x20, "Validity" },					\
+	{ 0x28, "Stop request" },				\
+	{ 0x2c, "Operation exception" },			\
+	{ 0x38, "Partial-execution" },				\
+	{ 0x3c, "I/O interruption" },				\
+	{ 0x40, "I/O instruction" },				\
+	{ 0x48, "Timing subset" }
+
+/*
+ * This is the simple interceptable instructions decoder.
+ *
+ * It will be used as userspace interface and it can be used in places
+ * that does not allow to use general decoder functions,
+ * such as trace events declarations.
+ *
+ * Some userspace tools may want to parse this code
+ * and would be confused by switch(), if() and other statements,
+ * but they can understand conditional operator.
+ */
+#define INSN_DECODE_IPA0(ipa0, insn, rshift, mask)		\
+	(insn >> 56) == (ipa0) ?				\
+		((ipa0 << 8) | ((insn >> rshift) & mask)) :
+
+#define INSN_DECODE(insn) (insn >> 56)
+
+/*
+ * The macro icpt_insn_decoder() takes an intercepted instruction
+ * and returns a key, which can be used to find a mnemonic name
+ * of the instruction in the icpt_insn_codes table.
+ */
+#define icpt_insn_decoder(insn) (		\
+	INSN_DECODE_IPA0(0x01, insn, 48, 0xff)	\
+	INSN_DECODE_IPA0(0xaa, insn, 48, 0x0f)	\
+	INSN_DECODE_IPA0(0xb2, insn, 48, 0xff)	\
+	INSN_DECODE_IPA0(0xb9, insn, 48, 0xff)	\
+	INSN_DECODE_IPA0(0xe3, insn, 48, 0xff)	\
+	INSN_DECODE_IPA0(0xe5, insn, 48, 0xff)	\
+	INSN_DECODE_IPA0(0xeb, insn, 16, 0xff)	\
+	INSN_DECODE_IPA0(0xc8, insn, 48, 0x0f)	\
+	INSN_DECODE(insn))
+
+#endif /* _UAPI_ASM_S390_SIE_H */
diff --git a/arch/s390/include/uapi/asm/sigcontext.h b/arch/s390/include/uapi/asm/sigcontext.h
new file mode 100644
index 0000000000..8b35033334
--- /dev/null
+++ b/arch/s390/include/uapi/asm/sigcontext.h
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999, 2000
+ */
+
+#ifndef _ASM_S390_SIGCONTEXT_H
+#define _ASM_S390_SIGCONTEXT_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+#define __NUM_GPRS		16
+#define __NUM_FPRS		16
+#define __NUM_ACRS		16
+#define __NUM_VXRS		32
+#define __NUM_VXRS_LOW		16
+#define __NUM_VXRS_HIGH		16
+
+#ifndef __s390x__
+
+/* Has to be at least _NSIG_WORDS from asm/signal.h */
+#define _SIGCONTEXT_NSIG	64
+#define _SIGCONTEXT_NSIG_BPW	32
+/* Size of stack frame allocated when calling signal handler. */
+#define __SIGNAL_FRAMESIZE	96
+
+#else /* __s390x__ */
+
+/* Has to be at least _NSIG_WORDS from asm/signal.h */
+#define _SIGCONTEXT_NSIG	64
+#define _SIGCONTEXT_NSIG_BPW	64 
+/* Size of stack frame allocated when calling signal handler. */
+#define __SIGNAL_FRAMESIZE	160
+
+#endif /* __s390x__ */
+
+#define _SIGCONTEXT_NSIG_WORDS	(_SIGCONTEXT_NSIG / _SIGCONTEXT_NSIG_BPW)
+#define _SIGMASK_COPY_SIZE	(sizeof(unsigned long)*_SIGCONTEXT_NSIG_WORDS)
+
+typedef struct 
+{
+        unsigned long mask;
+        unsigned long addr;
+} __attribute__ ((aligned(8))) _psw_t;
+
+typedef struct
+{
+	_psw_t psw;
+	unsigned long gprs[__NUM_GPRS];
+	unsigned int  acrs[__NUM_ACRS];
+} _s390_regs_common;
+
+typedef struct
+{
+	unsigned int fpc;
+	unsigned int pad;
+	double   fprs[__NUM_FPRS];
+} _s390_fp_regs;
+
+typedef struct
+{
+	_s390_regs_common regs;
+	_s390_fp_regs     fpregs;
+} _sigregs;
+
+typedef struct
+{
+#ifndef __s390x__
+	unsigned long gprs_high[__NUM_GPRS];
+#endif
+	unsigned long long vxrs_low[__NUM_VXRS_LOW];
+	__vector128 vxrs_high[__NUM_VXRS_HIGH];
+	unsigned char __reserved[128];
+} _sigregs_ext;
+
+struct sigcontext
+{
+	unsigned long	oldmask[_SIGCONTEXT_NSIG_WORDS];
+	_sigregs        __user *sregs;
+};
+
+
+#endif
+
diff --git a/arch/s390/include/uapi/asm/signal.h b/arch/s390/include/uapi/asm/signal.h
new file mode 100644
index 0000000000..e74d6ba1bd
--- /dev/null
+++ b/arch/s390/include/uapi/asm/signal.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *  S390 version
+ *
+ *  Derived from "include/asm-i386/signal.h"
+ */
+
+#ifndef _UAPI_ASMS390_SIGNAL_H
+#define _UAPI_ASMS390_SIGNAL_H
+
+#include <linux/types.h>
+#include <linux/time.h>
+
+/* Avoid too many header ordering problems.  */
+struct siginfo;
+struct pt_regs;
+
+#ifndef __KERNEL__
+/* Here we must cater to libcs that poke about in kernel headers.  */
+
+#define NSIG            32
+typedef unsigned long sigset_t;
+
+#endif /* __KERNEL__ */
+
+#define SIGHUP           1
+#define SIGINT           2
+#define SIGQUIT          3
+#define SIGILL           4
+#define SIGTRAP          5
+#define SIGABRT          6
+#define SIGIOT           6
+#define SIGBUS           7
+#define SIGFPE           8
+#define SIGKILL          9
+#define SIGUSR1         10
+#define SIGSEGV         11
+#define SIGUSR2         12
+#define SIGPIPE         13
+#define SIGALRM         14
+#define SIGTERM         15
+#define SIGSTKFLT       16
+#define SIGCHLD         17
+#define SIGCONT         18
+#define SIGSTOP         19
+#define SIGTSTP         20
+#define SIGTTIN         21
+#define SIGTTOU         22
+#define SIGURG          23
+#define SIGXCPU         24
+#define SIGXFSZ         25
+#define SIGVTALRM       26
+#define SIGPROF         27
+#define SIGWINCH        28
+#define SIGIO           29
+#define SIGPOLL         SIGIO
+/*
+#define SIGLOST         29
+*/
+#define SIGPWR          30
+#define SIGSYS		31
+#define SIGUNUSED       31
+
+/* These should not be considered constants from userland.  */
+#define SIGRTMIN        32
+#define SIGRTMAX        _NSIG
+
+#define SA_RESTORER     0x04000000
+
+#define MINSIGSTKSZ     2048
+#define SIGSTKSZ        8192
+
+#include <asm-generic/signal-defs.h>
+
+#ifndef __KERNEL__
+
+/*
+ * There are two system calls in regard to sigaction, sys_rt_sigaction
+ * and sys_sigaction. Internally the kernel uses the struct old_sigaction
+ * for the older sys_sigaction system call, and the kernel version of the
+ * struct sigaction for the newer sys_rt_sigaction.
+ *
+ * The uapi definition for struct sigaction has made a strange distinction
+ * between 31-bit and 64-bit in the past. For 64-bit the uapi structure
+ * looks like the kernel struct sigaction, but for 31-bit it used to
+ * look like the kernel struct old_sigaction. That practically made the
+ * structure unusable for either system call. To get around this problem
+ * the glibc always had its own definitions for the sigaction structures.
+ *
+ * The current struct sigaction uapi definition below is suitable for the
+ * sys_rt_sigaction system call only.
+ */
+struct sigaction {
+        union {
+          __sighandler_t _sa_handler;
+          void (*_sa_sigaction)(int, struct siginfo *, void *);
+        } _u;
+        unsigned long sa_flags;
+        void (*sa_restorer)(void);
+	sigset_t sa_mask;
+};
+
+#define sa_handler      _u._sa_handler
+#define sa_sigaction    _u._sa_sigaction
+
+#endif /* __KERNEL__ */
+
+typedef struct sigaltstack {
+        void __user *ss_sp;
+        int ss_flags;
+	__kernel_size_t ss_size;
+} stack_t;
+
+
+#endif /* _UAPI_ASMS390_SIGNAL_H */
diff --git a/arch/s390/include/uapi/asm/stat.h b/arch/s390/include/uapi/asm/stat.h
new file mode 100644
index 0000000000..ac253d2360
--- /dev/null
+++ b/arch/s390/include/uapi/asm/stat.h
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *  S390 version
+ *
+ *  Derived from "include/asm-i386/stat.h"
+ */
+
+#ifndef _S390_STAT_H
+#define _S390_STAT_H
+
+#ifndef __s390x__
+struct __old_kernel_stat {
+        unsigned short st_dev;
+        unsigned short st_ino;
+        unsigned short st_mode;
+        unsigned short st_nlink;
+        unsigned short st_uid;
+        unsigned short st_gid;
+        unsigned short st_rdev;
+        unsigned long  st_size;
+        unsigned long  st_atime;
+        unsigned long  st_mtime;
+        unsigned long  st_ctime;
+};
+
+struct stat {
+        unsigned short st_dev;
+        unsigned short __pad1;
+        unsigned long  st_ino;
+        unsigned short st_mode;
+        unsigned short st_nlink;
+        unsigned short st_uid;
+        unsigned short st_gid;
+        unsigned short st_rdev;
+        unsigned short __pad2;
+        unsigned long  st_size;
+        unsigned long  st_blksize;
+        unsigned long  st_blocks;
+        unsigned long  st_atime;
+        unsigned long  st_atime_nsec;
+        unsigned long  st_mtime;
+        unsigned long  st_mtime_nsec;
+        unsigned long  st_ctime;
+        unsigned long  st_ctime_nsec;
+        unsigned long  __unused4;
+        unsigned long  __unused5;
+};
+
+/* This matches struct stat64 in glibc2.1, hence the absolutely
+ * insane amounts of padding around dev_t's.
+ */
+struct stat64 {
+        unsigned long long	st_dev;
+        unsigned int    __pad1;
+#define STAT64_HAS_BROKEN_ST_INO        1
+        unsigned long   __st_ino;
+        unsigned int    st_mode;
+        unsigned int    st_nlink;
+        unsigned long   st_uid;
+        unsigned long   st_gid;
+        unsigned long long	st_rdev;
+        unsigned int    __pad3;
+        long long	st_size;
+        unsigned long   st_blksize;
+        unsigned char   __pad4[4];
+        unsigned long   __pad5;     /* future possible st_blocks high bits */
+        unsigned long   st_blocks;  /* Number 512-byte blocks allocated. */
+        unsigned long   st_atime;
+        unsigned long   st_atime_nsec;
+        unsigned long   st_mtime;
+        unsigned long   st_mtime_nsec;
+        unsigned long   st_ctime;
+        unsigned long   st_ctime_nsec;  /* will be high 32 bits of ctime someday */
+        unsigned long long	st_ino;
+};
+
+#else /* __s390x__ */
+
+struct stat {
+        unsigned long  st_dev;
+        unsigned long  st_ino;
+        unsigned long  st_nlink;
+        unsigned int   st_mode;
+        unsigned int   st_uid;
+        unsigned int   st_gid;
+        unsigned int   __pad1;
+        unsigned long  st_rdev;
+        unsigned long  st_size;
+        unsigned long  st_atime;
+	unsigned long  st_atime_nsec;
+        unsigned long  st_mtime;
+	unsigned long  st_mtime_nsec;
+        unsigned long  st_ctime;
+	unsigned long  st_ctime_nsec;
+        unsigned long  st_blksize;
+        long           st_blocks;
+        unsigned long  __unused[3];
+};
+
+#endif /* __s390x__ */
+
+#define STAT_HAVE_NSEC 1
+
+#endif
diff --git a/arch/s390/include/uapi/asm/statfs.h b/arch/s390/include/uapi/asm/statfs.h
new file mode 100644
index 0000000000..f85b50723d
--- /dev/null
+++ b/arch/s390/include/uapi/asm/statfs.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *  S390 version
+ *
+ *  Derived from "include/asm-i386/statfs.h"
+ */
+
+#ifndef _S390_STATFS_H
+#define _S390_STATFS_H
+
+/*
+ * We can't use <asm-generic/statfs.h> because in 64-bit mode
+ * we mix ints of different sizes in our struct statfs.
+ */
+
+#ifndef __KERNEL_STRICT_NAMES
+#include <linux/types.h>
+typedef __kernel_fsid_t	fsid_t;
+#endif
+
+struct statfs {
+	unsigned int	f_type;
+	unsigned int	f_bsize;
+	unsigned long	f_blocks;
+	unsigned long	f_bfree;
+	unsigned long	f_bavail;
+	unsigned long	f_files;
+	unsigned long	f_ffree;
+	__kernel_fsid_t f_fsid;
+	unsigned int	f_namelen;
+	unsigned int	f_frsize;
+	unsigned int	f_flags;
+	unsigned int	f_spare[5];
+};
+
+struct statfs64 {
+	unsigned int	f_type;
+	unsigned int	f_bsize;
+	unsigned long long f_blocks;
+	unsigned long long f_bfree;
+	unsigned long long f_bavail;
+	unsigned long long f_files;
+	unsigned long long f_ffree;
+	__kernel_fsid_t f_fsid;
+	unsigned int	f_namelen;
+	unsigned int	f_frsize;
+	unsigned int	f_flags;
+	unsigned int	f_spare[5];
+};
+
+#endif
diff --git a/arch/s390/include/uapi/asm/sthyi.h b/arch/s390/include/uapi/asm/sthyi.h
new file mode 100644
index 0000000000..b1b0223169
--- /dev/null
+++ b/arch/s390/include/uapi/asm/sthyi.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_ASM_STHYI_H
+#define _UAPI_ASM_STHYI_H
+
+#define STHYI_FC_CP_IFL_CAP	0
+
+#endif /* _UAPI_ASM_STHYI_H */
diff --git a/arch/s390/include/uapi/asm/tape390.h b/arch/s390/include/uapi/asm/tape390.h
new file mode 100644
index 0000000000..90266c6964
--- /dev/null
+++ b/arch/s390/include/uapi/asm/tape390.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*************************************************************************
+ *
+ *	   enables user programs to display messages and control encryption
+ *	   on s390 tape devices
+ *
+ *	   Copyright IBM Corp. 2001, 2006
+ *	   Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ *
+ *************************************************************************/
+
+#ifndef _TAPE390_H
+#define _TAPE390_H
+
+#define TAPE390_DISPLAY _IOW('d', 1, struct display_struct)
+
+/*
+ * The TAPE390_DISPLAY ioctl calls the Load Display command
+ * which transfers 17 bytes of data from the channel to the subsystem:
+ *     - 1 format control byte, and
+ *     - two 8-byte messages
+ *
+ * Format control byte:
+ *   0-2: New Message Overlay
+ *     3: Alternate Messages
+ *     4: Blink Message
+ *     5: Display Low/High Message
+ *     6: Reserved
+ *     7: Automatic Load Request
+ *
+ */
+
+typedef struct display_struct {
+        char cntrl;
+        char message1[8];
+        char message2[8];
+} display_struct;
+
+/*
+ * Tape encryption support
+ */
+
+struct tape390_crypt_info {
+	char capability;
+	char status;
+	char medium_status;
+} __attribute__ ((packed));
+
+
+/* Macros for "capable" field */
+#define TAPE390_CRYPT_SUPPORTED_MASK 0x01
+#define TAPE390_CRYPT_SUPPORTED(x) \
+	((x.capability & TAPE390_CRYPT_SUPPORTED_MASK))
+
+/* Macros for "status" field */
+#define TAPE390_CRYPT_ON_MASK 0x01
+#define TAPE390_CRYPT_ON(x) (((x.status) & TAPE390_CRYPT_ON_MASK))
+
+/* Macros for "medium status" field */
+#define TAPE390_MEDIUM_LOADED_MASK 0x01
+#define TAPE390_MEDIUM_ENCRYPTED_MASK 0x02
+#define TAPE390_MEDIUM_ENCRYPTED(x) \
+	(((x.medium_status) & TAPE390_MEDIUM_ENCRYPTED_MASK))
+#define TAPE390_MEDIUM_LOADED(x) \
+	(((x.medium_status) & TAPE390_MEDIUM_LOADED_MASK))
+
+/*
+ * The TAPE390_CRYPT_SET ioctl is used to switch on/off encryption.
+ * The "encryption_capable" and "tape_status" fields are ignored for this ioctl!
+ */
+#define TAPE390_CRYPT_SET _IOW('d', 2, struct tape390_crypt_info)
+
+/*
+ * The TAPE390_CRYPT_QUERY ioctl is used to query the encryption state.
+ */
+#define TAPE390_CRYPT_QUERY _IOR('d', 3, struct tape390_crypt_info)
+
+/* Values for "kekl1/2_type" and "kekl1/2_type_on_tape" fields */
+#define TAPE390_KEKL_TYPE_NONE 0
+#define TAPE390_KEKL_TYPE_LABEL 1
+#define TAPE390_KEKL_TYPE_HASH 2
+
+struct tape390_kekl {
+	unsigned char type;
+	unsigned char type_on_tape;
+	char label[65];
+} __attribute__ ((packed));
+
+struct tape390_kekl_pair {
+	struct tape390_kekl kekl[2];
+} __attribute__ ((packed));
+
+/*
+ * The TAPE390_KEKL_SET ioctl is used to set Key Encrypting Key labels.
+ */
+#define TAPE390_KEKL_SET _IOW('d', 4, struct tape390_kekl_pair)
+
+/*
+ * The TAPE390_KEKL_QUERY ioctl is used to query Key Encrypting Key labels.
+ */
+#define TAPE390_KEKL_QUERY _IOR('d', 5, struct tape390_kekl_pair)
+
+#endif 
diff --git a/arch/s390/include/uapi/asm/types.h b/arch/s390/include/uapi/asm/types.h
new file mode 100644
index 0000000000..84457dbb26
--- /dev/null
+++ b/arch/s390/include/uapi/asm/types.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *  S390 version
+ *
+ *  Derived from "include/asm-i386/types.h"
+ */
+
+#ifndef _UAPI_S390_TYPES_H
+#define _UAPI_S390_TYPES_H
+
+#include <asm-generic/int-ll64.h>
+
+#ifndef __ASSEMBLY__
+
+typedef unsigned long addr_t;
+typedef __signed__ long saddr_t;
+
+typedef struct {
+	union {
+		struct {
+			__u64 high;
+			__u64 low;
+		};
+		__u32 u[4];
+	};
+} __attribute__((packed, aligned(4))) __vector128;
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _UAPI_S390_TYPES_H */
diff --git a/arch/s390/include/uapi/asm/ucontext.h b/arch/s390/include/uapi/asm/ucontext.h
new file mode 100644
index 0000000000..c95f42e853
--- /dev/null
+++ b/arch/s390/include/uapi/asm/ucontext.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *  S390 version
+ *
+ *  Derived from "include/asm-i386/ucontext.h"
+ */
+
+#ifndef _ASM_S390_UCONTEXT_H
+#define _ASM_S390_UCONTEXT_H
+
+#define UC_GPRS_HIGH	1	/* uc_mcontext_ext has valid high gprs */
+#define UC_VXRS		2	/* uc_mcontext_ext has valid vector regs */
+
+/*
+ * The struct ucontext_extended describes how the registers are stored
+ * on a rt signal frame. Please note that the structure is not fixed,
+ * if new CPU registers are added to the user state the size of the
+ * struct ucontext_extended will increase.
+ */
+struct ucontext_extended {
+	unsigned long	  uc_flags;
+	struct ucontext  *uc_link;
+	stack_t		  uc_stack;
+	_sigregs	  uc_mcontext;
+	sigset_t	  uc_sigmask;
+	/* Allow for uc_sigmask growth.  Glibc uses a 1024-bit sigset_t.  */
+	unsigned char	  __unused[128 - sizeof(sigset_t)];
+	_sigregs_ext	  uc_mcontext_ext;
+};
+
+struct ucontext {
+	unsigned long	  uc_flags;
+	struct ucontext  *uc_link;
+	stack_t		  uc_stack;
+	_sigregs          uc_mcontext;
+	sigset_t	  uc_sigmask;
+	/* Allow for uc_sigmask growth.  Glibc uses a 1024-bit sigset_t.  */
+	unsigned char	  __unused[128 - sizeof(sigset_t)];
+};
+
+#endif /* !_ASM_S390_UCONTEXT_H */
diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h
new file mode 100644
index 0000000000..01b5fe8b9d
--- /dev/null
+++ b/arch/s390/include/uapi/asm/unistd.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *  S390 version
+ *
+ *  Derived from "include/asm-i386/unistd.h"
+ */
+
+#ifndef _UAPI_ASM_S390_UNISTD_H_
+#define _UAPI_ASM_S390_UNISTD_H_
+
+#ifdef __s390x__
+#include <asm/unistd_64.h>
+#else
+#include <asm/unistd_32.h>
+#endif
+
+#endif /* _UAPI_ASM_S390_UNISTD_H_ */
diff --git a/arch/s390/include/uapi/asm/uvdevice.h b/arch/s390/include/uapi/asm/uvdevice.h
new file mode 100644
index 0000000000..b9c2f14a6a
--- /dev/null
+++ b/arch/s390/include/uapi/asm/uvdevice.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *  Copyright IBM Corp. 2022
+ *  Author(s): Steffen Eiden <seiden@linux.ibm.com>
+ */
+#ifndef __S390_ASM_UVDEVICE_H
+#define __S390_ASM_UVDEVICE_H
+
+#include <linux/types.h>
+
+struct uvio_ioctl_cb {
+	__u32 flags;
+	__u16 uv_rc;			/* UV header rc value */
+	__u16 uv_rrc;			/* UV header rrc value */
+	__u64 argument_addr;		/* Userspace address of uvio argument */
+	__u32 argument_len;
+	__u8  reserved14[0x40 - 0x14];	/* must be zero */
+};
+
+#define UVIO_ATT_USER_DATA_LEN		0x100
+#define UVIO_ATT_UID_LEN		0x10
+struct uvio_attest {
+	__u64 arcb_addr;				/* 0x0000 */
+	__u64 meas_addr;				/* 0x0008 */
+	__u64 add_data_addr;				/* 0x0010 */
+	__u8  user_data[UVIO_ATT_USER_DATA_LEN];	/* 0x0018 */
+	__u8  config_uid[UVIO_ATT_UID_LEN];		/* 0x0118 */
+	__u32 arcb_len;					/* 0x0128 */
+	__u32 meas_len;					/* 0x012c */
+	__u32 add_data_len;				/* 0x0130 */
+	__u16 user_data_len;				/* 0x0134 */
+	__u16 reserved136;				/* 0x0136 */
+};
+
+/**
+ * uvio_uvdev_info - Information of supported functions
+ * @supp_uvio_cmds - supported IOCTLs by this device
+ * @supp_uv_cmds - supported UVCs corresponding to the IOCTL
+ *
+ * UVIO request to get information about supported request types by this
+ * uvdevice and the Ultravisor.  Everything is output. Bits are in LSB0
+ * ordering.  If the bit is set in both, @supp_uvio_cmds and @supp_uv_cmds, the
+ * uvdevice and the Ultravisor support that call.
+ *
+ * Note that bit 0 (UVIO_IOCTL_UVDEV_INFO_NR) is always zero for `supp_uv_cmds`
+ * as there is no corresponding UV-call.
+ */
+struct uvio_uvdev_info {
+	/*
+	 * If bit `n` is set, this device supports the IOCTL with nr `n`.
+	 */
+	__u64 supp_uvio_cmds;
+	/*
+	 * If bit `n` is set, the Ultravisor(UV) supports the UV-call
+	 * corresponding to the IOCTL with nr `n` in the calling contextx (host
+	 * or guest).  The value is only valid if the corresponding bit in
+	 * @supp_uvio_cmds is set as well.
+	 */
+	__u64 supp_uv_cmds;
+};
+
+/*
+ * The following max values define an upper length for the IOCTL in/out buffers.
+ * However, they do not represent the maximum the Ultravisor allows which is
+ * often way smaller. By allowing larger buffer sizes we hopefully do not need
+ * to update the code with every machine update. It is therefore possible for
+ * userspace to request more memory than actually used by kernel/UV.
+ */
+#define UVIO_ATT_ARCB_MAX_LEN		0x100000
+#define UVIO_ATT_MEASUREMENT_MAX_LEN	0x8000
+#define UVIO_ATT_ADDITIONAL_MAX_LEN	0x8000
+#define UVIO_ADD_SECRET_MAX_LEN		0x100000
+#define UVIO_LIST_SECRETS_LEN		0x1000
+
+#define UVIO_DEVICE_NAME "uv"
+#define UVIO_TYPE_UVC 'u'
+
+enum UVIO_IOCTL_NR {
+	UVIO_IOCTL_UVDEV_INFO_NR = 0x00,
+	UVIO_IOCTL_ATT_NR,
+	UVIO_IOCTL_ADD_SECRET_NR,
+	UVIO_IOCTL_LIST_SECRETS_NR,
+	UVIO_IOCTL_LOCK_SECRETS_NR,
+	/* must be the last entry */
+	UVIO_IOCTL_NUM_IOCTLS
+};
+
+#define UVIO_IOCTL(nr)		_IOWR(UVIO_TYPE_UVC, nr, struct uvio_ioctl_cb)
+#define UVIO_IOCTL_UVDEV_INFO	UVIO_IOCTL(UVIO_IOCTL_UVDEV_INFO_NR)
+#define UVIO_IOCTL_ATT		UVIO_IOCTL(UVIO_IOCTL_ATT_NR)
+#define UVIO_IOCTL_ADD_SECRET	UVIO_IOCTL(UVIO_IOCTL_ADD_SECRET_NR)
+#define UVIO_IOCTL_LIST_SECRETS	UVIO_IOCTL(UVIO_IOCTL_LIST_SECRETS_NR)
+#define UVIO_IOCTL_LOCK_SECRETS	UVIO_IOCTL(UVIO_IOCTL_LOCK_SECRETS_NR)
+
+#define UVIO_SUPP_CALL(nr)	(1ULL << (nr))
+#define UVIO_SUPP_UDEV_INFO	UVIO_SUPP_CALL(UVIO_IOCTL_UDEV_INFO_NR)
+#define UVIO_SUPP_ATT		UVIO_SUPP_CALL(UVIO_IOCTL_ATT_NR)
+#define UVIO_SUPP_ADD_SECRET	UVIO_SUPP_CALL(UVIO_IOCTL_ADD_SECRET_NR)
+#define UVIO_SUPP_LIST_SECRETS	UVIO_SUPP_CALL(UVIO_IOCTL_LIST_SECRETS_NR)
+#define UVIO_SUPP_LOCK_SECRETS	UVIO_SUPP_CALL(UVIO_IOCTL_LOCK_SECRETS_NR)
+
+#endif /* __S390_ASM_UVDEVICE_H */
diff --git a/arch/s390/include/uapi/asm/virtio-ccw.h b/arch/s390/include/uapi/asm/virtio-ccw.h
new file mode 100644
index 0000000000..2b605f7e84
--- /dev/null
+++ b/arch/s390/include/uapi/asm/virtio-ccw.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/*
+ * Definitions for virtio-ccw devices.
+ *
+ * Copyright IBM Corp. 2013
+ *
+ *  Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ */
+#ifndef __KVM_VIRTIO_CCW_H
+#define __KVM_VIRTIO_CCW_H
+
+/* Alignment of vring buffers. */
+#define KVM_VIRTIO_CCW_RING_ALIGN 4096
+
+/* Subcode for diagnose 500 (virtio hypercall). */
+#define KVM_S390_VIRTIO_CCW_NOTIFY 3
+
+#endif
diff --git a/arch/s390/include/uapi/asm/vmcp.h b/arch/s390/include/uapi/asm/vmcp.h
new file mode 100644
index 0000000000..aeaaa03003
--- /dev/null
+++ b/arch/s390/include/uapi/asm/vmcp.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright IBM Corp. 2004, 2005
+ * Interface implementation for communication with the z/VM control program
+ * Version 1.0
+ * Author(s): Christian Borntraeger <cborntra@de.ibm.com>
+ *
+ *
+ * z/VMs CP offers the possibility to issue commands via the diagnose code 8
+ * this driver implements a character device that issues these commands and
+ * returns the answer of CP.
+ *
+ * The idea of this driver is based on cpint from Neale Ferguson
+ */
+
+#ifndef _UAPI_ASM_VMCP_H
+#define _UAPI_ASM_VMCP_H
+
+#include <linux/ioctl.h>
+
+#define VMCP_GETCODE	_IOR(0x10, 1, int)
+#define VMCP_SETBUF	_IOW(0x10, 2, int)
+#define VMCP_GETSIZE	_IOR(0x10, 3, int)
+
+#endif /* _UAPI_ASM_VMCP_H */
diff --git a/arch/s390/include/uapi/asm/vtoc.h b/arch/s390/include/uapi/asm/vtoc.h
new file mode 100644
index 0000000000..50c1d7b9e8
--- /dev/null
+++ b/arch/s390/include/uapi/asm/vtoc.h
@@ -0,0 +1,214 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * This file contains volume label definitions for DASD devices.
+ *
+ * Copyright IBM Corp. 2005
+ *
+ * Author(s): Volker Sameske <sameske@de.ibm.com>
+ *
+ */
+
+#ifndef _ASM_S390_VTOC_H
+#define _ASM_S390_VTOC_H
+
+#include <linux/types.h>
+
+struct vtoc_ttr
+{
+	__u16 tt;
+	__u8 r;
+} __attribute__ ((packed));
+
+struct vtoc_cchhb
+{
+	__u16 cc;
+	__u16 hh;
+	__u8 b;
+} __attribute__ ((packed));
+
+struct vtoc_cchh
+{
+	__u16 cc;
+	__u16 hh;
+} __attribute__ ((packed));
+
+struct vtoc_labeldate
+{
+	__u8 year;
+	__u16 day;
+} __attribute__ ((packed));
+
+struct vtoc_volume_label_cdl
+{
+	char volkey[4];		/* volume key = volume label */
+	char vollbl[4];		/* volume label */
+	char volid[6];		/* volume identifier */
+	__u8 security;		/* security byte */
+	struct vtoc_cchhb vtoc;	/* VTOC address */
+	char res1[5];		/* reserved */
+	char cisize[4];		/* CI-size for FBA,... */
+				/* ...blanks for CKD */
+	char blkperci[4];	/* no of blocks per CI (FBA), blanks for CKD */
+	char labperci[4];	/* no of labels per CI (FBA), blanks for CKD */
+	char res2[4];		/* reserved */
+	char lvtoc[14];		/* owner code for LVTOC */
+	char res3[29];		/* reserved */
+} __attribute__ ((packed));
+
+struct vtoc_volume_label_ldl {
+	char vollbl[4];		/* volume label */
+	char volid[6];		/* volume identifier */
+	char res3[69];		/* reserved */
+	char ldl_version;	/* version number, valid for ldl format */
+	__u64 formatted_blocks; /* valid when ldl_version >= f2  */
+} __attribute__ ((packed));
+
+struct vtoc_extent
+{
+	__u8 typeind;			/* extent type indicator */
+	__u8 seqno;			/* extent sequence number */
+	struct vtoc_cchh llimit;	/* starting point of this extent */
+	struct vtoc_cchh ulimit;	/* ending point of this extent */
+} __attribute__ ((packed));
+
+struct vtoc_dev_const
+{
+	__u16 DS4DSCYL;	/* number of logical cyls */
+	__u16 DS4DSTRK;	/* number of tracks in a logical cylinder */
+	__u16 DS4DEVTK;	/* device track length */
+	__u8 DS4DEVI;	/* non-last keyed record overhead */
+	__u8 DS4DEVL;	/* last keyed record overhead */
+	__u8 DS4DEVK;	/* non-keyed record overhead differential */
+	__u8 DS4DEVFG;	/* flag byte */
+	__u16 DS4DEVTL;	/* device tolerance */
+	__u8 DS4DEVDT;	/* number of DSCB's per track */
+	__u8 DS4DEVDB;	/* number of directory blocks per track */
+} __attribute__ ((packed));
+
+struct vtoc_format1_label
+{
+	char DS1DSNAM[44];	/* data set name */
+	__u8 DS1FMTID;		/* format identifier */
+	char DS1DSSN[6];	/* data set serial number */
+	__u16 DS1VOLSQ;		/* volume sequence number */
+	struct vtoc_labeldate DS1CREDT; /* creation date: ydd */
+	struct vtoc_labeldate DS1EXPDT; /* expiration date */
+	__u8 DS1NOEPV;		/* number of extents on volume */
+	__u8 DS1NOBDB;		/* no. of bytes used in last direction blk */
+	__u8 DS1FLAG1;		/* flag 1 */
+	char DS1SYSCD[13];	/* system code */
+	struct vtoc_labeldate DS1REFD; /* date last referenced	*/
+	__u8 DS1SMSFG;		/* system managed storage indicators */
+	__u8 DS1SCXTF;		/* sec. space extension flag byte */
+	__u16 DS1SCXTV;		/* secondary space extension value */
+	__u8 DS1DSRG1;		/* data set organisation byte 1 */
+	__u8 DS1DSRG2;		/* data set organisation byte 2 */
+	__u8 DS1RECFM;		/* record format */
+	__u8 DS1OPTCD;		/* option code */
+	__u16 DS1BLKL;		/* block length */
+	__u16 DS1LRECL;		/* record length */
+	__u8 DS1KEYL;		/* key length */
+	__u16 DS1RKP;		/* relative key position */
+	__u8 DS1DSIND;		/* data set indicators */
+	__u8 DS1SCAL1;		/* secondary allocation flag byte */
+	char DS1SCAL3[3];	/* secondary allocation quantity */
+	struct vtoc_ttr DS1LSTAR; /* last used track and block on track */
+	__u16 DS1TRBAL;		/* space remaining on last used track */
+	__u16 res1;		/* reserved */
+	struct vtoc_extent DS1EXT1; /* first extent description */
+	struct vtoc_extent DS1EXT2; /* second extent description */
+	struct vtoc_extent DS1EXT3; /* third extent description */
+	struct vtoc_cchhb DS1PTRDS; /* possible pointer to f2 or f3 DSCB */
+} __attribute__ ((packed));
+
+struct vtoc_format4_label
+{
+	char DS4KEYCD[44];	/* key code for VTOC labels: 44 times 0x04 */
+	__u8 DS4IDFMT;		/* format identifier */
+	struct vtoc_cchhb DS4HPCHR; /* highest address of a format 1 DSCB */
+	__u16 DS4DSREC;		/* number of available DSCB's */
+	struct vtoc_cchh DS4HCCHH; /* CCHH of next available alternate track */
+	__u16 DS4NOATK;		/* number of remaining alternate tracks */
+	__u8 DS4VTOCI;		/* VTOC indicators */
+	__u8 DS4NOEXT;		/* number of extents in VTOC */
+	__u8 DS4SMSFG;		/* system managed storage indicators */
+	__u8 DS4DEVAC;		/* number of alternate cylinders.
+				 * Subtract from first two bytes of
+				 * DS4DEVSZ to get number of usable
+				 * cylinders. can be zero. valid
+				 * only if DS4DEVAV on. */
+	struct vtoc_dev_const DS4DEVCT;	/* device constants */
+	char DS4AMTIM[8];	/* VSAM time stamp */
+	char DS4AMCAT[3];	/* VSAM catalog indicator */
+	char DS4R2TIM[8];	/* VSAM volume/catalog match time stamp */
+	char res1[5];		/* reserved */
+	char DS4F6PTR[5];	/* pointer to first format 6 DSCB */
+	struct vtoc_extent DS4VTOCE; /* VTOC extent description */
+	char res2[10];		/* reserved */
+	__u8 DS4EFLVL;		/* extended free-space management level */
+	struct vtoc_cchhb DS4EFPTR; /* pointer to extended free-space info */
+	char res3;		/* reserved */
+	__u32 DS4DCYL;		/* number of logical cyls */
+	char res4[2];		/* reserved */
+	__u8 DS4DEVF2;		/* device flags */
+	char res5;		/* reserved */
+} __attribute__ ((packed));
+
+struct vtoc_ds5ext
+{
+	__u16 t;	/* RTA of the first track of free extent */
+	__u16 fc;	/* number of whole cylinders in free ext. */
+	__u8 ft;	/* number of remaining free tracks */
+} __attribute__ ((packed));
+
+struct vtoc_format5_label
+{
+	char DS5KEYID[4];	/* key identifier */
+	struct vtoc_ds5ext DS5AVEXT; /* first available (free-space) extent. */
+	struct vtoc_ds5ext DS5EXTAV[7]; /* seven available extents */
+	__u8 DS5FMTID;		/* format identifier */
+	struct vtoc_ds5ext DS5MAVET[18]; /* eighteen available extents */
+	struct vtoc_cchhb DS5PTRDS; /* pointer to next format5 DSCB */
+} __attribute__ ((packed));
+
+struct vtoc_ds7ext
+{
+	__u32 a; /* starting RTA value */
+	__u32 b; /* ending RTA value + 1 */
+} __attribute__ ((packed));
+
+struct vtoc_format7_label
+{
+	char DS7KEYID[4];	/* key identifier */
+	struct vtoc_ds7ext DS7EXTNT[5]; /* space for 5 extent descriptions */
+	__u8 DS7FMTID;		/* format identifier */
+	struct vtoc_ds7ext DS7ADEXT[11]; /* space for 11 extent descriptions */
+	char res1[2];		/* reserved */
+	struct vtoc_cchhb DS7PTRDS; /* pointer to next FMT7 DSCB */
+} __attribute__ ((packed));
+
+struct vtoc_cms_label {
+	__u8 label_id[4];		/* Label identifier */
+	__u8 vol_id[6];		/* Volid */
+	__u16 version_id;		/* Version identifier */
+	__u32 block_size;		/* Disk block size */
+	__u32 origin_ptr;		/* Disk origin pointer */
+	__u32 usable_count;	/* Number of usable cylinders/blocks */
+	__u32 formatted_count;	/* Maximum number of formatted cylinders/
+				 * blocks */
+	__u32 block_count;	/* Disk size in CMS blocks */
+	__u32 used_count;		/* Number of CMS blocks in use */
+	__u32 fst_size;		/* File Status Table (FST) size */
+	__u32 fst_count;		/* Number of FSTs per CMS block */
+	__u8 format_date[6];	/* Disk FORMAT date */
+	__u8 reserved1[2];
+	__u32 disk_offset;	/* Disk offset when reserved*/
+	__u32 map_block;		/* Allocation Map Block with next hole */
+	__u32 hblk_disp;		/* Displacement into HBLK data of next hole */
+	__u32 user_disp;		/* Displacement into user part of Allocation
+				 * map */
+	__u8 reserved2[4];
+	__u8 segment_name[8];	/* Name of shared segment */
+} __attribute__ ((packed));
+
+#endif /* _ASM_S390_VTOC_H */
diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h
new file mode 100644
index 0000000000..f4785abe1b
--- /dev/null
+++ b/arch/s390/include/uapi/asm/zcrypt.h
@@ -0,0 +1,373 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ *  include/asm-s390/zcrypt.h
+ *
+ *  zcrypt 2.2.1 (user-visible header)
+ *
+ *  Copyright IBM Corp. 2001, 2022
+ *  Author(s): Robert Burroughs
+ *	       Eric Rossman (edrossma@us.ibm.com)
+ *
+ *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
+ */
+
+#ifndef __ASM_S390_ZCRYPT_H
+#define __ASM_S390_ZCRYPT_H
+
+#define ZCRYPT_VERSION 2
+#define ZCRYPT_RELEASE 2
+#define ZCRYPT_VARIANT 1
+
+#include <linux/ioctl.h>
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+/* Name of the zcrypt device driver. */
+#define ZCRYPT_NAME "zcrypt"
+
+/**
+ * struct ica_rsa_modexpo
+ *
+ * Requirements:
+ * - outputdatalength is at least as large as inputdatalength.
+ * - All key parts are right justified in their fields, padded on
+ *   the left with zeroes.
+ * - length(b_key) = inputdatalength
+ * - length(n_modulus) = inputdatalength
+ */
+struct ica_rsa_modexpo {
+	__u8 __user  *inputdata;
+	__u32	      inputdatalength;
+	__u8 __user  *outputdata;
+	__u32	      outputdatalength;
+	__u8 __user  *b_key;
+	__u8 __user  *n_modulus;
+};
+
+/**
+ * struct ica_rsa_modexpo_crt
+ *
+ * Requirements:
+ * - inputdatalength is even.
+ * - outputdatalength is at least as large as inputdatalength.
+ * - All key parts are right justified in their fields, padded on
+ *   the left with zeroes.
+ * - length(bp_key)	= inputdatalength/2 + 8
+ * - length(bq_key)	= inputdatalength/2
+ * - length(np_key)	= inputdatalength/2 + 8
+ * - length(nq_key)	= inputdatalength/2
+ * - length(u_mult_inv) = inputdatalength/2 + 8
+ */
+struct ica_rsa_modexpo_crt {
+	__u8 __user  *inputdata;
+	__u32	      inputdatalength;
+	__u8 __user  *outputdata;
+	__u32	      outputdatalength;
+	__u8 __user  *bp_key;
+	__u8 __user  *bq_key;
+	__u8 __user  *np_prime;
+	__u8 __user  *nq_prime;
+	__u8 __user  *u_mult_inv;
+};
+
+/**
+ * CPRBX
+ *	  Note that all shorts and ints are big-endian.
+ *	  All pointer fields are 16 bytes long, and mean nothing.
+ *
+ *	  A request CPRB is followed by a request_parameter_block.
+ *
+ *	  The request (or reply) parameter block is organized thus:
+ *	    function code
+ *	    VUD block
+ *	    key block
+ */
+struct CPRBX {
+	__u16	     cprb_len;		/* CPRB length	      220	 */
+	__u8	     cprb_ver_id;	/* CPRB version id.   0x02	 */
+	__u8	     ctfm;		/* Command Type Filtering Mask	 */
+	__u8	     pad_000[2];	/* Alignment pad bytes		 */
+	__u8	     func_id[2];	/* function id	      0x5432	 */
+	__u8	     cprb_flags[4];	/* Flags			 */
+	__u32	     req_parml;		/* request parameter buffer len	 */
+	__u32	     req_datal;		/* request data buffer		 */
+	__u32	     rpl_msgbl;		/* reply  message block length	 */
+	__u32	     rpld_parml;	/* replied parameter block len	 */
+	__u32	     rpl_datal;		/* reply data block len		 */
+	__u32	     rpld_datal;	/* replied data block len	 */
+	__u32	     req_extbl;		/* request extension block len	 */
+	__u8	     _pad_001[4];	/* reserved			 */
+	__u32	     rpld_extbl;	/* replied extension block len	 */
+	__u8	     _pad_002[16 - sizeof(__u8 *)];
+	__u8 __user *req_parmb;		/* request parm block 'address'	 */
+	__u8	     _pad_003[16 - sizeof(__u8 *)];
+	__u8 __user *req_datab;		/* request data block 'address'	 */
+	__u8	     _pad_004[16 - sizeof(__u8 *)];
+	__u8 __user *rpl_parmb;		/* reply parm block 'address'	 */
+	__u8	     _pad_005[16 - sizeof(__u8 *)];
+	__u8 __user *rpl_datab;		/* reply data block 'address'	 */
+	__u8	     _pad_006[16 - sizeof(__u8 *)];
+	__u8 __user *req_extb;		/* request extension block 'addr'*/
+	__u8	     _pad_007[16 - sizeof(__u8 *)];
+	__u8 __user *rpl_extb;		/* reply extension block 'address'*/
+	__u16	     ccp_rtcode;	/* server return code		 */
+	__u16	     ccp_rscode;	/* server reason code		 */
+	__u32	     mac_data_len;	/* Mac Data Length		 */
+	__u8	     logon_id[8];	/* Logon Identifier		 */
+	__u8	     mac_value[8];	/* Mac Value			 */
+	__u8	     mac_content_flgs;	/* Mac content flag byte	 */
+	__u8	     _pad_008;		/* Alignment			 */
+	__u16	     domain;		/* Domain			 */
+	__u8	     _pad_009[12];	/* reserved, checked for zeros	 */
+	__u8	     _pad_010[36];	/* reserved			 */
+} __attribute__((packed));
+
+/**
+ * xcRB
+ */
+struct ica_xcRB {
+	__u16	      agent_ID;
+	__u32	      user_defined;
+	__u16	      request_ID;
+	__u32	      request_control_blk_length;
+	__u8	      _padding1[16 - sizeof(__u8 *)];
+	__u8 __user  *request_control_blk_addr;
+	__u32	      request_data_length;
+	__u8	      _padding2[16 - sizeof(__u8 *)];
+	__u8 __user  *request_data_address;
+	__u32	      reply_control_blk_length;
+	__u8	      _padding3[16 - sizeof(__u8 *)];
+	__u8 __user  *reply_control_blk_addr;
+	__u32	      reply_data_length;
+	__u8	      __padding4[16 - sizeof(__u8 *)];
+	__u8 __user  *reply_data_addr;
+	__u16	      priority_window;
+	__u32	      status;
+} __attribute__((packed));
+
+/**
+ * struct ep11_cprb - EP11 connectivity programming request block
+ * @cprb_len:		CPRB header length [0x0020]
+ * @cprb_ver_id:	CPRB version id.   [0x04]
+ * @pad_000:		Alignment pad bytes
+ * @flags:		Admin bit [0x80], Special bit [0x20]
+ * @func_id:		Function id / subtype [0x5434] "T4"
+ * @source_id:		Source id [originator id]
+ * @target_id:		Target id [usage/ctrl domain id]
+ * @ret_code:		Return code
+ * @reserved1:		Reserved
+ * @reserved2:		Reserved
+ * @payload_len:	Payload length
+ */
+struct ep11_cprb {
+	__u16	cprb_len;
+	__u8	cprb_ver_id;
+	__u8	pad_000[2];
+	__u8	flags;
+	__u8	func_id[2];
+	__u32	source_id;
+	__u32	target_id;
+	__u32	ret_code;
+	__u32	reserved1;
+	__u32	reserved2;
+	__u32	payload_len;
+} __attribute__((packed));
+
+/**
+ * struct ep11_target_dev - EP11 target device list
+ * @ap_id:	AP device id
+ * @dom_id:	Usage domain id
+ */
+struct ep11_target_dev {
+	__u16 ap_id;
+	__u16 dom_id;
+};
+
+/**
+ * struct ep11_urb - EP11 user request block
+ * @targets_num:	Number of target adapters
+ * @targets:		Addr to target adapter list
+ * @weight:		Level of request priority
+ * @req_no:		Request id/number
+ * @req_len:		Request length
+ * @req:		Addr to request block
+ * @resp_len:		Response length
+ * @resp:		Addr to response block
+ */
+struct ep11_urb {
+	__u16		targets_num;
+	__u8 __user    *targets;
+	__u64		weight;
+	__u64		req_no;
+	__u64		req_len;
+	__u8 __user    *req;
+	__u64		resp_len;
+	__u8 __user    *resp;
+} __attribute__((packed));
+
+/**
+ * struct zcrypt_device_status_ext
+ * @hwtype:		raw hardware type
+ * @qid:		8 bit device index, 8 bit domain
+ * @functions:		AP device function bit field 'abcdef'
+ *			a, b, c = reserved
+ *			d = CCA coprocessor
+ *			e = Accelerator
+ *			f = EP11 coprocessor
+ * @online		online status
+ * @reserved		reserved
+ */
+struct zcrypt_device_status_ext {
+	unsigned int hwtype:8;
+	unsigned int qid:16;
+	unsigned int online:1;
+	unsigned int functions:6;
+	unsigned int reserved:1;
+};
+
+#define MAX_ZDEV_CARDIDS_EXT 256
+#define MAX_ZDEV_DOMAINS_EXT 256
+
+/* Maximum number of zcrypt devices */
+#define MAX_ZDEV_ENTRIES_EXT (MAX_ZDEV_CARDIDS_EXT * MAX_ZDEV_DOMAINS_EXT)
+
+/* Device matrix of all zcrypt devices */
+struct zcrypt_device_matrix_ext {
+	struct zcrypt_device_status_ext device[MAX_ZDEV_ENTRIES_EXT];
+};
+
+#define AUTOSELECT  0xFFFFFFFF
+#define AUTOSEL_AP  ((__u16)0xFFFF)
+#define AUTOSEL_DOM ((__u16)0xFFFF)
+
+#define ZCRYPT_IOCTL_MAGIC 'z'
+
+/**
+ * Interface notes:
+ *
+ * The ioctl()s which are implemented (along with relevant details)
+ * are:
+ *
+ *   ICARSAMODEXPO
+ *     Perform an RSA operation using a Modulus-Exponent pair
+ *     This takes an ica_rsa_modexpo struct as its arg.
+ *
+ *     NOTE: please refer to the comments preceding this structure
+ *	     for the implementation details for the contents of the
+ *	     block
+ *
+ *   ICARSACRT
+ *     Perform an RSA operation using a Chinese-Remainder Theorem key
+ *     This takes an ica_rsa_modexpo_crt struct as its arg.
+ *
+ *     NOTE: please refer to the comments preceding this structure
+ *	     for the implementation details for the contents of the
+ *	     block
+ *
+ *   ZSECSENDCPRB
+ *     Send an arbitrary CPRB to a crypto card.
+ *
+ *   ZSENDEP11CPRB
+ *     Send an arbitrary EP11 CPRB to an EP11 coprocessor crypto card.
+ *
+ *   ZCRYPT_DEVICE_STATUS
+ *     The given struct zcrypt_device_matrix_ext is updated with
+ *     status information for each currently known apqn.
+ *
+ *   ZCRYPT_STATUS_MASK
+ *     Return an MAX_ZDEV_CARDIDS_EXT element array of unsigned chars for the
+ *     status of all devices.
+ *	 0x01: PCICA
+ *	 0x02: PCICC
+ *	 0x03: PCIXCC_MCL2
+ *	 0x04: PCIXCC_MCL3
+ *	 0x05: CEX2C
+ *	 0x06: CEX2A
+ *	 0x07: CEX3C
+ *	 0x08: CEX3A
+ *	 0x0a: CEX4
+ *	 0x0b: CEX5
+ *	 0x0c: CEX6, CEX7 or CEX8
+ *	 0x0d: device is disabled
+ *
+ *   ZCRYPT_QDEPTH_MASK
+ *     Return an MAX_ZDEV_CARDIDS_EXT element array of unsigned chars for the
+ *     queue depth of all devices.
+ *
+ *   ZCRYPT_PERDEV_REQCNT
+ *     Return an MAX_ZDEV_CARDIDS_EXT element array of unsigned integers for
+ *     the number of successfully completed requests per device since the
+ *     device was detected and made available.
+ *
+ */
+
+/**
+ * Supported ioctl calls
+ */
+#define ICARSAMODEXPO  _IOC(_IOC_READ | _IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x05, 0)
+#define ICARSACRT      _IOC(_IOC_READ | _IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x06, 0)
+#define ZSECSENDCPRB   _IOC(_IOC_READ | _IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x81, 0)
+#define ZSENDEP11CPRB  _IOC(_IOC_READ | _IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x04, 0)
+
+#define ZCRYPT_DEVICE_STATUS _IOC(_IOC_READ | _IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x5f, 0)
+#define ZCRYPT_STATUS_MASK   _IOR(ZCRYPT_IOCTL_MAGIC, 0x58, char[MAX_ZDEV_CARDIDS_EXT])
+#define ZCRYPT_QDEPTH_MASK   _IOR(ZCRYPT_IOCTL_MAGIC, 0x59, char[MAX_ZDEV_CARDIDS_EXT])
+#define ZCRYPT_PERDEV_REQCNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x5a, int[MAX_ZDEV_CARDIDS_EXT])
+
+/*
+ * Support for multiple zcrypt device nodes.
+ */
+
+/* Nr of minor device node numbers to allocate. */
+#define ZCRYPT_MAX_MINOR_NODES 256
+
+/* Max amount of possible ioctls */
+#define MAX_ZDEV_IOCTLS (1 << _IOC_NRBITS)
+
+/*
+ * Only deprecated defines, structs and ioctls below this line.
+ */
+
+/* Deprecated: use MAX_ZDEV_CARDIDS_EXT */
+#define MAX_ZDEV_CARDIDS 64
+/* Deprecated: use MAX_ZDEV_DOMAINS_EXT */
+#define MAX_ZDEV_DOMAINS 256
+
+/* Deprecated: use MAX_ZDEV_ENTRIES_EXT */
+#define MAX_ZDEV_ENTRIES (MAX_ZDEV_CARDIDS * MAX_ZDEV_DOMAINS)
+
+/* Deprecated: use struct zcrypt_device_status_ext */
+struct zcrypt_device_status {
+	unsigned int hwtype:8;
+	unsigned int qid:14;
+	unsigned int online:1;
+	unsigned int functions:6;
+	unsigned int reserved:3;
+};
+
+/* Deprecated: use struct zcrypt_device_matrix_ext */
+struct zcrypt_device_matrix {
+	struct zcrypt_device_status device[MAX_ZDEV_ENTRIES];
+};
+
+/* Deprecated: use ZCRYPT_DEVICE_STATUS */
+#define ZDEVICESTATUS _IOC(_IOC_READ | _IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x4f, 0)
+/* Deprecated: use ZCRYPT_STATUS_MASK */
+#define Z90STAT_STATUS_MASK _IOR(ZCRYPT_IOCTL_MAGIC, 0x48, char[64])
+/* Deprecated: use ZCRYPT_QDEPTH_MASK */
+#define Z90STAT_QDEPTH_MASK _IOR(ZCRYPT_IOCTL_MAGIC, 0x49, char[64])
+/* Deprecated: use ZCRYPT_PERDEV_REQCNT */
+#define Z90STAT_PERDEV_REQCNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x4a, int[64])
+
+/* Deprecated: use sysfs to query these values */
+#define Z90STAT_REQUESTQ_COUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x44, int)
+#define Z90STAT_PENDINGQ_COUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x45, int)
+#define Z90STAT_TOTALOPEN_COUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x46, int)
+#define Z90STAT_DOMAIN_INDEX	_IOR(ZCRYPT_IOCTL_MAGIC, 0x47, int)
+
+/*
+ * The ioctl number ranges 0x40 - 0x42 and 0x4b - 0x4e had been used in the
+ * past, don't assign new ioctls for these.
+ */
+
+#endif /* __ASM_S390_ZCRYPT_H */
diff --git a/arch/s390/kernel/.gitignore b/arch/s390/kernel/.gitignore
new file mode 100644
index 0000000000..bbb90f92d0
--- /dev/null
+++ b/arch/s390/kernel/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+vmlinux.lds
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
new file mode 100644
index 0000000000..0df2b88cc0
--- /dev/null
+++ b/arch/s390/kernel/Makefile
@@ -0,0 +1,85 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the linux kernel.
+#
+
+ifdef CONFIG_FUNCTION_TRACER
+
+# Do not trace tracer code
+CFLAGS_REMOVE_ftrace.o		= $(CC_FLAGS_FTRACE)
+
+# Do not trace early setup code
+CFLAGS_REMOVE_early.o		= $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_rethook.o		= $(CC_FLAGS_FTRACE)
+
+endif
+
+GCOV_PROFILE_early.o		:= n
+KCOV_INSTRUMENT_early.o		:= n
+UBSAN_SANITIZE_early.o		:= n
+KASAN_SANITIZE_ipl.o		:= n
+KASAN_SANITIZE_machine_kexec.o	:= n
+
+#
+# Passing null pointers is ok for smp code, since we access the lowcore here.
+#
+CFLAGS_smp.o		:= -Wno-nonnull
+
+#
+# Disable tailcall optimizations for stack / callchain walking functions
+# since this might generate broken code when accessing register 15 and
+# passing its content to other functions.
+#
+CFLAGS_stacktrace.o	+= -fno-optimize-sibling-calls
+CFLAGS_dumpstack.o	+= -fno-optimize-sibling-calls
+CFLAGS_unwind_bc.o	+= -fno-optimize-sibling-calls
+
+obj-y	:= head64.o traps.o time.o process.o earlypgm.o early.o setup.o idle.o vtime.o
+obj-y	+= processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
+obj-y	+= debug.o irq.o ipl.o dis.o diag.o vdso.o cpufeature.o
+obj-y	+= sysinfo.o lgr.o os_info.o
+obj-y	+= runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
+obj-y	+= entry.o reipl.o kdebugfs.o alternative.o
+obj-y	+= nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o
+obj-y	+= smp.o text_amode31.o stacktrace.o abs_lowcore.o
+
+extra-y				+= vmlinux.lds
+
+obj-$(CONFIG_SYSFS)		+= nospec-sysfs.o
+CFLAGS_REMOVE_nospec-branch.o	+= $(CC_FLAGS_EXPOLINE)
+
+obj-$(CONFIG_MODULES)		+= module.o
+obj-$(CONFIG_SCHED_TOPOLOGY)	+= topology.o
+obj-$(CONFIG_NUMA)		+= numa.o
+obj-$(CONFIG_AUDIT)		+= audit.o
+compat-obj-$(CONFIG_AUDIT)	+= compat_audit.o
+obj-$(CONFIG_COMPAT)		+= compat_linux.o compat_signal.o
+obj-$(CONFIG_COMPAT)		+= $(compat-obj-y)
+obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
+obj-$(CONFIG_KPROBES)		+= kprobes.o
+obj-$(CONFIG_KPROBES)		+= kprobes_insn_page.o
+obj-$(CONFIG_KPROBES)		+= mcount.o
+obj-$(CONFIG_RETHOOK)		+= rethook.o
+obj-$(CONFIG_FUNCTION_TRACER)	+= ftrace.o
+obj-$(CONFIG_FUNCTION_TRACER)	+= mcount.o
+obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
+obj-$(CONFIG_KEXEC_CORE)	+= machine_kexec.o relocate_kernel.o
+obj-$(CONFIG_UPROBES)		+= uprobes.o
+obj-$(CONFIG_JUMP_LABEL)	+= jump_label.o
+
+obj-$(CONFIG_KEXEC_FILE)	+= machine_kexec_file.o kexec_image.o
+obj-$(CONFIG_KEXEC_FILE)	+= kexec_elf.o
+obj-$(CONFIG_CERT_STORE)	+= cert_store.o
+obj-$(CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT)	+= ima_arch.o
+
+obj-$(CONFIG_PERF_EVENTS)	+= perf_event.o
+obj-$(CONFIG_PERF_EVENTS)	+= perf_cpum_cf.o perf_cpum_sf.o
+obj-$(CONFIG_PERF_EVENTS)	+= perf_cpum_cf_events.o perf_regs.o
+obj-$(CONFIG_PERF_EVENTS)	+= perf_pai_crypto.o perf_pai_ext.o
+
+obj-$(CONFIG_TRACEPOINTS)	+= trace.o
+obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE))	+= uv.o
+
+# vdso
+obj-y				+= vdso64/
+obj-$(CONFIG_COMPAT)		+= vdso32/
diff --git a/arch/s390/kernel/abs_lowcore.c b/arch/s390/kernel/abs_lowcore.c
new file mode 100644
index 0000000000..f9efc54ec4
--- /dev/null
+++ b/arch/s390/kernel/abs_lowcore.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/pgtable.h>
+#include <asm/abs_lowcore.h>
+
+unsigned long __bootdata_preserved(__abs_lowcore);
+
+int abs_lowcore_map(int cpu, struct lowcore *lc, bool alloc)
+{
+	unsigned long addr = __abs_lowcore + (cpu * sizeof(struct lowcore));
+	unsigned long phys = __pa(lc);
+	int rc, i;
+
+	for (i = 0; i < LC_PAGES; i++) {
+		rc = __vmem_map_4k_page(addr, phys, PAGE_KERNEL, alloc);
+		if (rc) {
+			/*
+			 * Do not unmap allocated page tables in case the
+			 * allocation was not requested. In such a case the
+			 * request is expected coming from an atomic context,
+			 * while the unmap attempt might sleep.
+			 */
+			if (alloc) {
+				for (--i; i >= 0; i--) {
+					addr -= PAGE_SIZE;
+					vmem_unmap_4k_page(addr);
+				}
+			}
+			return rc;
+		}
+		addr += PAGE_SIZE;
+		phys += PAGE_SIZE;
+	}
+	return 0;
+}
+
+void abs_lowcore_unmap(int cpu)
+{
+	unsigned long addr = __abs_lowcore + (cpu * sizeof(struct lowcore));
+	int i;
+
+	for (i = 0; i < LC_PAGES; i++) {
+		vmem_unmap_4k_page(addr);
+		addr += PAGE_SIZE;
+	}
+}
diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c
new file mode 100644
index 0000000000..e7bca29f9c
--- /dev/null
+++ b/arch/s390/kernel/alternative.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/module.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+#include <asm/text-patching.h>
+#include <asm/alternative.h>
+#include <asm/facility.h>
+#include <asm/nospec-branch.h>
+
+static int __initdata_or_module alt_instr_disabled;
+
+static int __init disable_alternative_instructions(char *str)
+{
+	alt_instr_disabled = 1;
+	return 0;
+}
+
+early_param("noaltinstr", disable_alternative_instructions);
+
+static void __init_or_module __apply_alternatives(struct alt_instr *start,
+						  struct alt_instr *end)
+{
+	struct alt_instr *a;
+	u8 *instr, *replacement;
+
+	/*
+	 * The scan order should be from start to end. A later scanned
+	 * alternative code can overwrite previously scanned alternative code.
+	 */
+	for (a = start; a < end; a++) {
+		instr = (u8 *)&a->instr_offset + a->instr_offset;
+		replacement = (u8 *)&a->repl_offset + a->repl_offset;
+
+		if (!__test_facility(a->facility, alt_stfle_fac_list))
+			continue;
+
+		if (unlikely(a->instrlen % 2)) {
+			WARN_ONCE(1, "cpu alternatives instructions length is "
+				     "odd, skipping patching\n");
+			continue;
+		}
+
+		s390_kernel_write(instr, replacement, a->instrlen);
+	}
+}
+
+void __init_or_module apply_alternatives(struct alt_instr *start,
+					 struct alt_instr *end)
+{
+	if (!alt_instr_disabled)
+		__apply_alternatives(start, end);
+}
+
+extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
+void __init apply_alternative_instructions(void)
+{
+	apply_alternatives(__alt_instructions, __alt_instructions_end);
+}
+
+static void do_sync_core(void *info)
+{
+	sync_core();
+}
+
+void text_poke_sync(void)
+{
+	on_each_cpu(do_sync_core, NULL, 1);
+}
+
+void text_poke_sync_lock(void)
+{
+	cpus_read_lock();
+	text_poke_sync();
+	cpus_read_unlock();
+}
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
new file mode 100644
index 0000000000..fa5f6885c7
--- /dev/null
+++ b/arch/s390/kernel/asm-offsets.c
@@ -0,0 +1,190 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Generate definitions needed by assembly language modules.
+ * This code generates raw asm output which is post-processed to extract
+ * and format the required data.
+ */
+
+#define ASM_OFFSETS_C
+
+#include <linux/kbuild.h>
+#include <linux/kvm_host.h>
+#include <linux/sched.h>
+#include <linux/purgatory.h>
+#include <linux/pgtable.h>
+#include <linux/ftrace.h>
+#include <asm/idle.h>
+#include <asm/gmap.h>
+#include <asm/stacktrace.h>
+
+int main(void)
+{
+	/* task struct offsets */
+	OFFSET(__TASK_stack, task_struct, stack);
+	OFFSET(__TASK_thread, task_struct, thread);
+	OFFSET(__TASK_pid, task_struct, pid);
+	BLANK();
+	/* thread struct offsets */
+	OFFSET(__THREAD_ksp, thread_struct, ksp);
+	BLANK();
+	/* thread info offsets */
+	OFFSET(__TI_flags, task_struct, thread_info.flags);
+	BLANK();
+	/* pt_regs offsets */
+	OFFSET(__PT_PSW, pt_regs, psw);
+	OFFSET(__PT_GPRS, pt_regs, gprs);
+	OFFSET(__PT_R0, pt_regs, gprs[0]);
+	OFFSET(__PT_R1, pt_regs, gprs[1]);
+	OFFSET(__PT_R2, pt_regs, gprs[2]);
+	OFFSET(__PT_R3, pt_regs, gprs[3]);
+	OFFSET(__PT_R4, pt_regs, gprs[4]);
+	OFFSET(__PT_R5, pt_regs, gprs[5]);
+	OFFSET(__PT_R6, pt_regs, gprs[6]);
+	OFFSET(__PT_R7, pt_regs, gprs[7]);
+	OFFSET(__PT_R8, pt_regs, gprs[8]);
+	OFFSET(__PT_R9, pt_regs, gprs[9]);
+	OFFSET(__PT_R10, pt_regs, gprs[10]);
+	OFFSET(__PT_R11, pt_regs, gprs[11]);
+	OFFSET(__PT_R12, pt_regs, gprs[12]);
+	OFFSET(__PT_R13, pt_regs, gprs[13]);
+	OFFSET(__PT_R14, pt_regs, gprs[14]);
+	OFFSET(__PT_R15, pt_regs, gprs[15]);
+	OFFSET(__PT_ORIG_GPR2, pt_regs, orig_gpr2);
+	OFFSET(__PT_FLAGS, pt_regs, flags);
+	OFFSET(__PT_CR1, pt_regs, cr1);
+	OFFSET(__PT_LAST_BREAK, pt_regs, last_break);
+	DEFINE(__PT_SIZE, sizeof(struct pt_regs));
+	BLANK();
+	/* stack_frame offsets */
+	OFFSET(__SF_BACKCHAIN, stack_frame, back_chain);
+	OFFSET(__SF_GPRS, stack_frame, gprs);
+	OFFSET(__SF_EMPTY, stack_frame, empty[0]);
+	OFFSET(__SF_SIE_CONTROL, stack_frame, sie_control_block);
+	OFFSET(__SF_SIE_SAVEAREA, stack_frame, sie_savearea);
+	OFFSET(__SF_SIE_REASON, stack_frame, sie_reason);
+	OFFSET(__SF_SIE_FLAGS, stack_frame, sie_flags);
+	OFFSET(__SF_SIE_CONTROL_PHYS, stack_frame, sie_control_block_phys);
+	DEFINE(STACK_FRAME_OVERHEAD, sizeof(struct stack_frame));
+	BLANK();
+	/* idle data offsets */
+	OFFSET(__CLOCK_IDLE_ENTER, s390_idle_data, clock_idle_enter);
+	OFFSET(__TIMER_IDLE_ENTER, s390_idle_data, timer_idle_enter);
+	OFFSET(__MT_CYCLES_ENTER, s390_idle_data, mt_cycles_enter);
+	BLANK();
+	/* hardware defined lowcore locations 0x000 - 0x1ff */
+	OFFSET(__LC_EXT_PARAMS, lowcore, ext_params);
+	OFFSET(__LC_EXT_CPU_ADDR, lowcore, ext_cpu_addr);
+	OFFSET(__LC_EXT_INT_CODE, lowcore, ext_int_code);
+	OFFSET(__LC_PGM_ILC, lowcore, pgm_ilc);
+	OFFSET(__LC_PGM_INT_CODE, lowcore, pgm_code);
+	OFFSET(__LC_DATA_EXC_CODE, lowcore, data_exc_code);
+	OFFSET(__LC_MON_CLASS_NR, lowcore, mon_class_num);
+	OFFSET(__LC_PER_CODE, lowcore, per_code);
+	OFFSET(__LC_PER_ATMID, lowcore, per_atmid);
+	OFFSET(__LC_PER_ADDRESS, lowcore, per_address);
+	OFFSET(__LC_EXC_ACCESS_ID, lowcore, exc_access_id);
+	OFFSET(__LC_PER_ACCESS_ID, lowcore, per_access_id);
+	OFFSET(__LC_OP_ACCESS_ID, lowcore, op_access_id);
+	OFFSET(__LC_AR_MODE_ID, lowcore, ar_mode_id);
+	OFFSET(__LC_TRANS_EXC_CODE, lowcore, trans_exc_code);
+	OFFSET(__LC_MON_CODE, lowcore, monitor_code);
+	OFFSET(__LC_SUBCHANNEL_ID, lowcore, subchannel_id);
+	OFFSET(__LC_SUBCHANNEL_NR, lowcore, subchannel_nr);
+	OFFSET(__LC_IO_INT_PARM, lowcore, io_int_parm);
+	OFFSET(__LC_IO_INT_WORD, lowcore, io_int_word);
+	OFFSET(__LC_MCCK_CODE, lowcore, mcck_interruption_code);
+	OFFSET(__LC_EXT_DAMAGE_CODE, lowcore, external_damage_code);
+	OFFSET(__LC_MCCK_FAIL_STOR_ADDR, lowcore, failing_storage_address);
+	OFFSET(__LC_PGM_LAST_BREAK, lowcore, pgm_last_break);
+	OFFSET(__LC_RETURN_LPSWE, lowcore, return_lpswe);
+	OFFSET(__LC_RETURN_MCCK_LPSWE, lowcore, return_mcck_lpswe);
+	OFFSET(__LC_RST_OLD_PSW, lowcore, restart_old_psw);
+	OFFSET(__LC_EXT_OLD_PSW, lowcore, external_old_psw);
+	OFFSET(__LC_SVC_OLD_PSW, lowcore, svc_old_psw);
+	OFFSET(__LC_PGM_OLD_PSW, lowcore, program_old_psw);
+	OFFSET(__LC_MCK_OLD_PSW, lowcore, mcck_old_psw);
+	OFFSET(__LC_IO_OLD_PSW, lowcore, io_old_psw);
+	OFFSET(__LC_RST_NEW_PSW, lowcore, restart_psw);
+	OFFSET(__LC_EXT_NEW_PSW, lowcore, external_new_psw);
+	OFFSET(__LC_SVC_NEW_PSW, lowcore, svc_new_psw);
+	OFFSET(__LC_PGM_NEW_PSW, lowcore, program_new_psw);
+	OFFSET(__LC_MCK_NEW_PSW, lowcore, mcck_new_psw);
+	OFFSET(__LC_IO_NEW_PSW, lowcore, io_new_psw);
+	/* software defined lowcore locations 0x200 - 0xdff*/
+	OFFSET(__LC_SAVE_AREA_SYNC, lowcore, save_area_sync);
+	OFFSET(__LC_SAVE_AREA_ASYNC, lowcore, save_area_async);
+	OFFSET(__LC_SAVE_AREA_RESTART, lowcore, save_area_restart);
+	OFFSET(__LC_CPU_FLAGS, lowcore, cpu_flags);
+	OFFSET(__LC_RETURN_PSW, lowcore, return_psw);
+	OFFSET(__LC_RETURN_MCCK_PSW, lowcore, return_mcck_psw);
+	OFFSET(__LC_SYS_ENTER_TIMER, lowcore, sys_enter_timer);
+	OFFSET(__LC_MCCK_ENTER_TIMER, lowcore, mcck_enter_timer);
+	OFFSET(__LC_EXIT_TIMER, lowcore, exit_timer);
+	OFFSET(__LC_LAST_UPDATE_TIMER, lowcore, last_update_timer);
+	OFFSET(__LC_LAST_UPDATE_CLOCK, lowcore, last_update_clock);
+	OFFSET(__LC_INT_CLOCK, lowcore, int_clock);
+	OFFSET(__LC_BOOT_CLOCK, lowcore, boot_clock);
+	OFFSET(__LC_CURRENT, lowcore, current_task);
+	OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack);
+	OFFSET(__LC_ASYNC_STACK, lowcore, async_stack);
+	OFFSET(__LC_NODAT_STACK, lowcore, nodat_stack);
+	OFFSET(__LC_RESTART_STACK, lowcore, restart_stack);
+	OFFSET(__LC_MCCK_STACK, lowcore, mcck_stack);
+	OFFSET(__LC_RESTART_FN, lowcore, restart_fn);
+	OFFSET(__LC_RESTART_DATA, lowcore, restart_data);
+	OFFSET(__LC_RESTART_SOURCE, lowcore, restart_source);
+	OFFSET(__LC_RESTART_FLAGS, lowcore, restart_flags);
+	OFFSET(__LC_KERNEL_ASCE, lowcore, kernel_asce);
+	OFFSET(__LC_USER_ASCE, lowcore, user_asce);
+	OFFSET(__LC_LPP, lowcore, lpp);
+	OFFSET(__LC_CURRENT_PID, lowcore, current_pid);
+	OFFSET(__LC_GMAP, lowcore, gmap);
+	OFFSET(__LC_LAST_BREAK, lowcore, last_break);
+	/* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */
+	OFFSET(__LC_DUMP_REIPL, lowcore, ipib);
+	OFFSET(__LC_VMCORE_INFO, lowcore, vmcore_info);
+	OFFSET(__LC_OS_INFO, lowcore, os_info);
+	/* hardware defined lowcore locations 0x1000 - 0x18ff */
+	OFFSET(__LC_MCESAD, lowcore, mcesad);
+	OFFSET(__LC_EXT_PARAMS2, lowcore, ext_params2);
+	OFFSET(__LC_FPREGS_SAVE_AREA, lowcore, floating_pt_save_area);
+	OFFSET(__LC_GPREGS_SAVE_AREA, lowcore, gpregs_save_area);
+	OFFSET(__LC_PSW_SAVE_AREA, lowcore, psw_save_area);
+	OFFSET(__LC_PREFIX_SAVE_AREA, lowcore, prefixreg_save_area);
+	OFFSET(__LC_FP_CREG_SAVE_AREA, lowcore, fpt_creg_save_area);
+	OFFSET(__LC_TOD_PROGREG_SAVE_AREA, lowcore, tod_progreg_save_area);
+	OFFSET(__LC_CPU_TIMER_SAVE_AREA, lowcore, cpu_timer_save_area);
+	OFFSET(__LC_CLOCK_COMP_SAVE_AREA, lowcore, clock_comp_save_area);
+	OFFSET(__LC_LAST_BREAK_SAVE_AREA, lowcore, last_break_save_area);
+	OFFSET(__LC_AREGS_SAVE_AREA, lowcore, access_regs_save_area);
+	OFFSET(__LC_CREGS_SAVE_AREA, lowcore, cregs_save_area);
+	OFFSET(__LC_PGM_TDB, lowcore, pgm_tdb);
+	BLANK();
+	/* gmap/sie offsets */
+	OFFSET(__GMAP_ASCE, gmap, asce);
+	OFFSET(__SIE_PROG0C, kvm_s390_sie_block, prog0c);
+	OFFSET(__SIE_PROG20, kvm_s390_sie_block, prog20);
+	/* kexec_sha_region */
+	OFFSET(__KEXEC_SHA_REGION_START, kexec_sha_region, start);
+	OFFSET(__KEXEC_SHA_REGION_LEN, kexec_sha_region, len);
+	DEFINE(__KEXEC_SHA_REGION_SIZE, sizeof(struct kexec_sha_region));
+	/* sizeof kernel parameter area */
+	DEFINE(__PARMAREA_SIZE, sizeof(struct parmarea));
+	/* kernel parameter area offsets */
+	DEFINE(IPL_DEVICE, PARMAREA + offsetof(struct parmarea, ipl_device));
+	DEFINE(INITRD_START, PARMAREA + offsetof(struct parmarea, initrd_start));
+	DEFINE(INITRD_SIZE, PARMAREA + offsetof(struct parmarea, initrd_size));
+	DEFINE(OLDMEM_BASE, PARMAREA + offsetof(struct parmarea, oldmem_base));
+	DEFINE(OLDMEM_SIZE, PARMAREA + offsetof(struct parmarea, oldmem_size));
+	DEFINE(COMMAND_LINE, PARMAREA + offsetof(struct parmarea, command_line));
+	DEFINE(MAX_COMMAND_LINE_SIZE, PARMAREA + offsetof(struct parmarea, max_command_line_size));
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	/* function graph return value tracing */
+	OFFSET(__FGRAPH_RET_GPR2, fgraph_ret_regs, gpr2);
+	OFFSET(__FGRAPH_RET_FP, fgraph_ret_regs, fp);
+	DEFINE(__FGRAPH_RET_SIZE, sizeof(struct fgraph_ret_regs));
+#endif
+	OFFSET(__FTRACE_REGS_PT_REGS, ftrace_regs, regs);
+	DEFINE(__FTRACE_REGS_SIZE, sizeof(struct ftrace_regs));
+	return 0;
+}
diff --git a/arch/s390/kernel/audit.c b/arch/s390/kernel/audit.c
new file mode 100644
index 0000000000..02051a596b
--- /dev/null
+++ b/arch/s390/kernel/audit.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/audit.h>
+#include <asm/unistd.h>
+#include "audit.h"
+
+static unsigned dir_class[] = {
+#include <asm-generic/audit_dir_write.h>
+~0U
+};
+
+static unsigned read_class[] = {
+#include <asm-generic/audit_read.h>
+~0U
+};
+
+static unsigned write_class[] = {
+#include <asm-generic/audit_write.h>
+~0U
+};
+
+static unsigned chattr_class[] = {
+#include <asm-generic/audit_change_attr.h>
+~0U
+};
+
+static unsigned signal_class[] = {
+#include <asm-generic/audit_signal.h>
+~0U
+};
+
+int audit_classify_arch(int arch)
+{
+#ifdef CONFIG_COMPAT
+	if (arch == AUDIT_ARCH_S390)
+		return 1;
+#endif
+	return 0;
+}
+
+int audit_classify_syscall(int abi, unsigned syscall)
+{
+#ifdef CONFIG_COMPAT
+	if (abi == AUDIT_ARCH_S390)
+		return s390_classify_syscall(syscall);
+#endif
+	switch(syscall) {
+	case __NR_open:
+		return AUDITSC_OPEN;
+	case __NR_openat:
+		return AUDITSC_OPENAT;
+	case __NR_socketcall:
+		return AUDITSC_SOCKETCALL;
+	case __NR_execve:
+		return AUDITSC_EXECVE;
+	case __NR_openat2:
+		return AUDITSC_OPENAT2;
+	default:
+		return AUDITSC_NATIVE;
+	}
+}
+
+static int __init audit_classes_init(void)
+{
+#ifdef CONFIG_COMPAT
+	audit_register_class(AUDIT_CLASS_WRITE_32, s390_write_class);
+	audit_register_class(AUDIT_CLASS_READ_32, s390_read_class);
+	audit_register_class(AUDIT_CLASS_DIR_WRITE_32, s390_dir_class);
+	audit_register_class(AUDIT_CLASS_CHATTR_32, s390_chattr_class);
+	audit_register_class(AUDIT_CLASS_SIGNAL_32, s390_signal_class);
+#endif
+	audit_register_class(AUDIT_CLASS_WRITE, write_class);
+	audit_register_class(AUDIT_CLASS_READ, read_class);
+	audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class);
+	audit_register_class(AUDIT_CLASS_CHATTR, chattr_class);
+	audit_register_class(AUDIT_CLASS_SIGNAL, signal_class);
+	return 0;
+}
+
+__initcall(audit_classes_init);
diff --git a/arch/s390/kernel/audit.h b/arch/s390/kernel/audit.h
new file mode 100644
index 0000000000..4d4b596412
--- /dev/null
+++ b/arch/s390/kernel/audit.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ARCH_S390_KERNEL_AUDIT_H
+#define __ARCH_S390_KERNEL_AUDIT_H
+
+#include <linux/types.h>
+
+#ifdef CONFIG_COMPAT
+extern int s390_classify_syscall(unsigned);
+extern __u32 s390_dir_class[];
+extern __u32 s390_write_class[];
+extern __u32 s390_read_class[];
+extern __u32 s390_chattr_class[];
+extern __u32 s390_signal_class[];
+#endif /* CONFIG_COMPAT */
+
+#endif /* __ARCH_S390_KERNEL_AUDIT_H */
diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c
new file mode 100644
index 0000000000..56254fa06f
--- /dev/null
+++ b/arch/s390/kernel/cache.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Extract CPU cache information and expose them via sysfs.
+ *
+ *    Copyright IBM Corp. 2012
+ */
+
+#include <linux/seq_file.h>
+#include <linux/cpu.h>
+#include <linux/cacheinfo.h>
+#include <asm/facility.h>
+
+enum {
+	CACHE_SCOPE_NOTEXISTS,
+	CACHE_SCOPE_PRIVATE,
+	CACHE_SCOPE_SHARED,
+	CACHE_SCOPE_RESERVED,
+};
+
+enum {
+	CTYPE_SEPARATE,
+	CTYPE_DATA,
+	CTYPE_INSTRUCTION,
+	CTYPE_UNIFIED,
+};
+
+enum {
+	EXTRACT_TOPOLOGY,
+	EXTRACT_LINE_SIZE,
+	EXTRACT_SIZE,
+	EXTRACT_ASSOCIATIVITY,
+};
+
+enum {
+	CACHE_TI_UNIFIED = 0,
+	CACHE_TI_DATA = 0,
+	CACHE_TI_INSTRUCTION,
+};
+
+struct cache_info {
+	unsigned char	    : 4;
+	unsigned char scope : 2;
+	unsigned char type  : 2;
+};
+
+#define CACHE_MAX_LEVEL 8
+union cache_topology {
+	struct cache_info ci[CACHE_MAX_LEVEL];
+	unsigned long raw;
+};
+
+static const char * const cache_type_string[] = {
+	"",
+	"Instruction",
+	"Data",
+	"",
+	"Unified",
+};
+
+static const enum cache_type cache_type_map[] = {
+	[CTYPE_SEPARATE] = CACHE_TYPE_SEPARATE,
+	[CTYPE_DATA] = CACHE_TYPE_DATA,
+	[CTYPE_INSTRUCTION] = CACHE_TYPE_INST,
+	[CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
+};
+
+void show_cacheinfo(struct seq_file *m)
+{
+	struct cpu_cacheinfo *this_cpu_ci;
+	struct cacheinfo *cache;
+	int idx;
+
+	this_cpu_ci = get_cpu_cacheinfo(cpumask_any(cpu_online_mask));
+	for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
+		cache = this_cpu_ci->info_list + idx;
+		seq_printf(m, "cache%-11d: ", idx);
+		seq_printf(m, "level=%d ", cache->level);
+		seq_printf(m, "type=%s ", cache_type_string[cache->type]);
+		seq_printf(m, "scope=%s ",
+			   cache->disable_sysfs ? "Shared" : "Private");
+		seq_printf(m, "size=%dK ", cache->size >> 10);
+		seq_printf(m, "line_size=%u ", cache->coherency_line_size);
+		seq_printf(m, "associativity=%d", cache->ways_of_associativity);
+		seq_puts(m, "\n");
+	}
+}
+
+static inline enum cache_type get_cache_type(struct cache_info *ci, int level)
+{
+	if (level >= CACHE_MAX_LEVEL)
+		return CACHE_TYPE_NOCACHE;
+	ci += level;
+	if (ci->scope != CACHE_SCOPE_SHARED && ci->scope != CACHE_SCOPE_PRIVATE)
+		return CACHE_TYPE_NOCACHE;
+	return cache_type_map[ci->type];
+}
+
+static inline unsigned long ecag(int ai, int li, int ti)
+{
+	return __ecag(ECAG_CACHE_ATTRIBUTE, ai << 4 | li << 1 | ti);
+}
+
+static void ci_leaf_init(struct cacheinfo *this_leaf, int private,
+			 enum cache_type type, unsigned int level, int cpu)
+{
+	int ti, num_sets;
+
+	if (type == CACHE_TYPE_INST)
+		ti = CACHE_TI_INSTRUCTION;
+	else
+		ti = CACHE_TI_UNIFIED;
+	this_leaf->level = level + 1;
+	this_leaf->type = type;
+	this_leaf->coherency_line_size = ecag(EXTRACT_LINE_SIZE, level, ti);
+	this_leaf->ways_of_associativity = ecag(EXTRACT_ASSOCIATIVITY, level, ti);
+	this_leaf->size = ecag(EXTRACT_SIZE, level, ti);
+	num_sets = this_leaf->size / this_leaf->coherency_line_size;
+	num_sets /= this_leaf->ways_of_associativity;
+	this_leaf->number_of_sets = num_sets;
+	cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
+	if (!private)
+		this_leaf->disable_sysfs = true;
+}
+
+int init_cache_level(unsigned int cpu)
+{
+	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+	unsigned int level = 0, leaves = 0;
+	union cache_topology ct;
+	enum cache_type ctype;
+
+	if (!this_cpu_ci)
+		return -EINVAL;
+	ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0);
+	do {
+		ctype = get_cache_type(&ct.ci[0], level);
+		if (ctype == CACHE_TYPE_NOCACHE)
+			break;
+		/* Separate instruction and data caches */
+		leaves += (ctype == CACHE_TYPE_SEPARATE) ? 2 : 1;
+	} while (++level < CACHE_MAX_LEVEL);
+	this_cpu_ci->num_levels = level;
+	this_cpu_ci->num_leaves = leaves;
+	return 0;
+}
+
+int populate_cache_leaves(unsigned int cpu)
+{
+	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
+	unsigned int level, idx, pvt;
+	union cache_topology ct;
+	enum cache_type ctype;
+
+	ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0);
+	for (idx = 0, level = 0; level < this_cpu_ci->num_levels &&
+	     idx < this_cpu_ci->num_leaves; idx++, level++) {
+		if (!this_leaf)
+			return -EINVAL;
+		pvt = (ct.ci[level].scope == CACHE_SCOPE_PRIVATE) ? 1 : 0;
+		ctype = get_cache_type(&ct.ci[0], level);
+		if (ctype == CACHE_TYPE_SEPARATE) {
+			ci_leaf_init(this_leaf++, pvt, CACHE_TYPE_DATA, level, cpu);
+			ci_leaf_init(this_leaf++, pvt, CACHE_TYPE_INST, level, cpu);
+		} else {
+			ci_leaf_init(this_leaf++, pvt, ctype, level, cpu);
+		}
+	}
+	return 0;
+}
diff --git a/arch/s390/kernel/cert_store.c b/arch/s390/kernel/cert_store.c
new file mode 100644
index 0000000000..554447768b
--- /dev/null
+++ b/arch/s390/kernel/cert_store.c
@@ -0,0 +1,812 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DIAG 0x320 support and certificate store handling
+ *
+ * Copyright IBM Corp. 2023
+ * Author(s):	Anastasia Eskova <anastasia.eskova@ibm.com>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/key-type.h>
+#include <linux/key.h>
+#include <linux/keyctl.h>
+#include <linux/kobject.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/sysfs.h>
+#include <crypto/sha2.h>
+#include <keys/user-type.h>
+#include <asm/debug.h>
+#include <asm/diag.h>
+#include <asm/ebcdic.h>
+#include <asm/sclp.h>
+
+#define DIAG_MAX_RETRIES		10
+
+#define VCE_FLAGS_VALID_MASK		0x80
+
+#define ISM_LEN_DWORDS			4
+#define VCSSB_LEN_BYTES			128
+#define VCSSB_LEN_NO_CERTS		4
+#define VCB_LEN_NO_CERTS		64
+#define VC_NAME_LEN_BYTES		64
+
+#define CERT_STORE_KEY_TYPE_NAME	"cert_store_key"
+#define CERT_STORE_KEYRING_NAME		"cert_store"
+
+static debug_info_t *cert_store_dbf;
+static debug_info_t *cert_store_hexdump;
+
+#define pr_dbf_msg(fmt, ...) \
+	debug_sprintf_event(cert_store_dbf, 3, fmt "\n", ## __VA_ARGS__)
+
+enum diag320_subcode {
+	DIAG320_SUBCODES	= 0,
+	DIAG320_STORAGE		= 1,
+	DIAG320_CERT_BLOCK	= 2,
+};
+
+enum diag320_rc {
+	DIAG320_RC_OK		= 0x0001,
+	DIAG320_RC_CS_NOMATCH	= 0x0306,
+};
+
+/* Verification Certificates Store Support Block (VCSSB). */
+struct vcssb {
+	u32 vcssb_length;
+	u8  pad_0x04[3];
+	u8  version;
+	u8  pad_0x08[8];
+	u32 cs_token;
+	u8  pad_0x14[12];
+	u16 total_vc_index_count;
+	u16 max_vc_index_count;
+	u8  pad_0x24[28];
+	u32 max_vce_length;
+	u32 max_vcxe_length;
+	u8  pad_0x48[8];
+	u32 max_single_vcb_length;
+	u32 total_vcb_length;
+	u32 max_single_vcxb_length;
+	u32 total_vcxb_length;
+	u8  pad_0x60[32];
+} __packed __aligned(8);
+
+/* Verification Certificate Entry (VCE) Header. */
+struct vce_header {
+	u32 vce_length;
+	u8  flags;
+	u8  key_type;
+	u16 vc_index;
+	u8  vc_name[VC_NAME_LEN_BYTES]; /* EBCDIC */
+	u8  vc_format;
+	u8  pad_0x49;
+	u16 key_id_length;
+	u8  pad_0x4c;
+	u8  vc_hash_type;
+	u16 vc_hash_length;
+	u8  pad_0x50[4];
+	u32 vc_length;
+	u8  pad_0x58[8];
+	u16 vc_hash_offset;
+	u16 vc_offset;
+	u8  pad_0x64[28];
+} __packed __aligned(4);
+
+/* Verification Certificate Block (VCB) Header. */
+struct vcb_header {
+	u32 vcb_input_length;
+	u8  pad_0x04[4];
+	u16 first_vc_index;
+	u16 last_vc_index;
+	u32 pad_0x0c;
+	u32 cs_token;
+	u8  pad_0x14[12];
+	u32 vcb_output_length;
+	u8  pad_0x24[3];
+	u8  version;
+	u16 stored_vc_count;
+	u16 remaining_vc_count;
+	u8  pad_0x2c[20];
+} __packed __aligned(4);
+
+/* Verification Certificate Block (VCB). */
+struct vcb {
+	struct vcb_header vcb_hdr;
+	u8 vcb_buf[];
+} __packed __aligned(4);
+
+/* Verification Certificate Entry (VCE). */
+struct vce {
+	struct vce_header vce_hdr;
+	u8 cert_data_buf[];
+} __packed __aligned(4);
+
+static void cert_store_key_describe(const struct key *key, struct seq_file *m)
+{
+	char ascii[VC_NAME_LEN_BYTES + 1];
+
+	/*
+	 * First 64 bytes of the key description is key name in EBCDIC CP 500.
+	 * Convert it to ASCII for displaying in /proc/keys.
+	 */
+	strscpy(ascii, key->description, sizeof(ascii));
+	EBCASC_500(ascii, VC_NAME_LEN_BYTES);
+	seq_puts(m, ascii);
+
+	seq_puts(m, &key->description[VC_NAME_LEN_BYTES]);
+	if (key_is_positive(key))
+		seq_printf(m, ": %u", key->datalen);
+}
+
+/*
+ * Certificate store key type takes over properties of
+ * user key but cannot be updated.
+ */
+static struct key_type key_type_cert_store_key = {
+	.name		= CERT_STORE_KEY_TYPE_NAME,
+	.preparse	= user_preparse,
+	.free_preparse	= user_free_preparse,
+	.instantiate	= generic_key_instantiate,
+	.revoke		= user_revoke,
+	.destroy	= user_destroy,
+	.describe	= cert_store_key_describe,
+	.read		= user_read,
+};
+
+/* Logging functions. */
+static void pr_dbf_vcb(const struct vcb *b)
+{
+	pr_dbf_msg("VCB Header:");
+	pr_dbf_msg("vcb_input_length: %d", b->vcb_hdr.vcb_input_length);
+	pr_dbf_msg("first_vc_index: %d", b->vcb_hdr.first_vc_index);
+	pr_dbf_msg("last_vc_index: %d", b->vcb_hdr.last_vc_index);
+	pr_dbf_msg("cs_token: %d", b->vcb_hdr.cs_token);
+	pr_dbf_msg("vcb_output_length: %d", b->vcb_hdr.vcb_output_length);
+	pr_dbf_msg("version: %d", b->vcb_hdr.version);
+	pr_dbf_msg("stored_vc_count: %d", b->vcb_hdr.stored_vc_count);
+	pr_dbf_msg("remaining_vc_count: %d", b->vcb_hdr.remaining_vc_count);
+}
+
+static void pr_dbf_vce(const struct vce *e)
+{
+	unsigned char vc_name[VC_NAME_LEN_BYTES + 1];
+	char log_string[VC_NAME_LEN_BYTES + 40];
+
+	pr_dbf_msg("VCE Header:");
+	pr_dbf_msg("vce_hdr.vce_length: %d", e->vce_hdr.vce_length);
+	pr_dbf_msg("vce_hdr.flags: %d", e->vce_hdr.flags);
+	pr_dbf_msg("vce_hdr.key_type: %d", e->vce_hdr.key_type);
+	pr_dbf_msg("vce_hdr.vc_index: %d", e->vce_hdr.vc_index);
+	pr_dbf_msg("vce_hdr.vc_format: %d", e->vce_hdr.vc_format);
+	pr_dbf_msg("vce_hdr.key_id_length: %d", e->vce_hdr.key_id_length);
+	pr_dbf_msg("vce_hdr.vc_hash_type: %d", e->vce_hdr.vc_hash_type);
+	pr_dbf_msg("vce_hdr.vc_hash_length: %d", e->vce_hdr.vc_hash_length);
+	pr_dbf_msg("vce_hdr.vc_hash_offset: %d", e->vce_hdr.vc_hash_offset);
+	pr_dbf_msg("vce_hdr.vc_length: %d", e->vce_hdr.vc_length);
+	pr_dbf_msg("vce_hdr.vc_offset: %d", e->vce_hdr.vc_offset);
+
+	/* Certificate name in ASCII. */
+	memcpy(vc_name, e->vce_hdr.vc_name, VC_NAME_LEN_BYTES);
+	EBCASC_500(vc_name, VC_NAME_LEN_BYTES);
+	vc_name[VC_NAME_LEN_BYTES] = '\0';
+
+	snprintf(log_string, sizeof(log_string),
+		 "index: %d vce_hdr.vc_name (ASCII): %s",
+		 e->vce_hdr.vc_index, vc_name);
+	debug_text_event(cert_store_hexdump, 3, log_string);
+
+	/* Certificate data. */
+	debug_text_event(cert_store_hexdump, 3, "VCE: Certificate data start");
+	debug_event(cert_store_hexdump, 3, (u8 *)e->cert_data_buf, 128);
+	debug_text_event(cert_store_hexdump, 3, "VCE: Certificate data end");
+	debug_event(cert_store_hexdump, 3,
+		    (u8 *)e->cert_data_buf + e->vce_hdr.vce_length - 128, 128);
+}
+
+static void pr_dbf_vcssb(const struct vcssb *s)
+{
+	debug_text_event(cert_store_hexdump, 3, "DIAG320 Subcode1");
+	debug_event(cert_store_hexdump, 3, (u8 *)s, VCSSB_LEN_BYTES);
+
+	pr_dbf_msg("VCSSB:");
+	pr_dbf_msg("vcssb_length: %u", s->vcssb_length);
+	pr_dbf_msg("version: %u", s->version);
+	pr_dbf_msg("cs_token: %u", s->cs_token);
+	pr_dbf_msg("total_vc_index_count: %u", s->total_vc_index_count);
+	pr_dbf_msg("max_vc_index_count: %u", s->max_vc_index_count);
+	pr_dbf_msg("max_vce_length: %u", s->max_vce_length);
+	pr_dbf_msg("max_vcxe_length: %u", s->max_vce_length);
+	pr_dbf_msg("max_single_vcb_length: %u", s->max_single_vcb_length);
+	pr_dbf_msg("total_vcb_length: %u", s->total_vcb_length);
+	pr_dbf_msg("max_single_vcxb_length: %u", s->max_single_vcxb_length);
+	pr_dbf_msg("total_vcxb_length: %u", s->total_vcxb_length);
+}
+
+static int __diag320(unsigned long subcode, void *addr)
+{
+	union register_pair rp = { .even = (unsigned long)addr, };
+
+	asm volatile(
+		"	diag	%[rp],%[subcode],0x320\n"
+		"0:	nopr	%%r7\n"
+		EX_TABLE(0b, 0b)
+		: [rp] "+d" (rp.pair)
+		: [subcode] "d" (subcode)
+		: "cc", "memory");
+
+	return rp.odd;
+}
+
+static int diag320(unsigned long subcode, void *addr)
+{
+	diag_stat_inc(DIAG_STAT_X320);
+
+	return __diag320(subcode, addr);
+}
+
+/*
+ * Calculate SHA256 hash of the VCE certificate and compare it to hash stored in
+ * VCE. Return -EINVAL if hashes don't match.
+ */
+static int check_certificate_hash(const struct vce *vce)
+{
+	u8 hash[SHA256_DIGEST_SIZE];
+	u16 vc_hash_length;
+	u8 *vce_hash;
+
+	vce_hash = (u8 *)vce + vce->vce_hdr.vc_hash_offset;
+	vc_hash_length = vce->vce_hdr.vc_hash_length;
+	sha256((u8 *)vce + vce->vce_hdr.vc_offset, vce->vce_hdr.vc_length, hash);
+	if (memcmp(vce_hash, hash, vc_hash_length) == 0)
+		return 0;
+
+	pr_dbf_msg("SHA256 hash of received certificate does not match");
+	debug_text_event(cert_store_hexdump, 3, "VCE hash:");
+	debug_event(cert_store_hexdump, 3, vce_hash, SHA256_DIGEST_SIZE);
+	debug_text_event(cert_store_hexdump, 3, "Calculated hash:");
+	debug_event(cert_store_hexdump, 3, hash, SHA256_DIGEST_SIZE);
+
+	return -EINVAL;
+}
+
+static int check_certificate_valid(const struct vce *vce)
+{
+	if (!(vce->vce_hdr.flags & VCE_FLAGS_VALID_MASK)) {
+		pr_dbf_msg("Certificate entry is invalid");
+		return -EINVAL;
+	}
+	if (vce->vce_hdr.vc_format != 1) {
+		pr_dbf_msg("Certificate format is not supported");
+		return -EINVAL;
+	}
+	if (vce->vce_hdr.vc_hash_type != 1) {
+		pr_dbf_msg("Hash type is not supported");
+		return -EINVAL;
+	}
+
+	return check_certificate_hash(vce);
+}
+
+static struct key *get_user_session_keyring(void)
+{
+	key_ref_t us_keyring_ref;
+
+	us_keyring_ref = lookup_user_key(KEY_SPEC_USER_SESSION_KEYRING,
+					 KEY_LOOKUP_CREATE, KEY_NEED_LINK);
+	if (IS_ERR(us_keyring_ref)) {
+		pr_dbf_msg("Couldn't get user session keyring: %ld",
+			   PTR_ERR(us_keyring_ref));
+		return ERR_PTR(-ENOKEY);
+	}
+	key_ref_put(us_keyring_ref);
+	return key_ref_to_ptr(us_keyring_ref);
+}
+
+/* Invalidate all keys from cert_store keyring. */
+static int invalidate_keyring_keys(struct key *keyring)
+{
+	unsigned long num_keys, key_index;
+	size_t keyring_payload_len;
+	key_serial_t *key_array;
+	struct key *current_key;
+	int rc;
+
+	keyring_payload_len = key_type_keyring.read(keyring, NULL, 0);
+	num_keys = keyring_payload_len / sizeof(key_serial_t);
+	key_array = kcalloc(num_keys, sizeof(key_serial_t), GFP_KERNEL);
+	if (!key_array)
+		return -ENOMEM;
+
+	rc = key_type_keyring.read(keyring, (char *)key_array, keyring_payload_len);
+	if (rc != keyring_payload_len) {
+		pr_dbf_msg("Couldn't read keyring payload");
+		goto out;
+	}
+
+	for (key_index = 0; key_index < num_keys; key_index++) {
+		current_key = key_lookup(key_array[key_index]);
+		pr_dbf_msg("Invalidating key %08x", current_key->serial);
+
+		key_invalidate(current_key);
+		key_put(current_key);
+		rc = key_unlink(keyring, current_key);
+		if (rc) {
+			pr_dbf_msg("Couldn't unlink key %08x: %d", current_key->serial, rc);
+			break;
+		}
+	}
+out:
+	kfree(key_array);
+	return rc;
+}
+
+static struct key *find_cs_keyring(void)
+{
+	key_ref_t cs_keyring_ref;
+	struct key *cs_keyring;
+
+	cs_keyring_ref = keyring_search(make_key_ref(get_user_session_keyring(), true),
+					&key_type_keyring, CERT_STORE_KEYRING_NAME,
+					false);
+	if (!IS_ERR(cs_keyring_ref)) {
+		cs_keyring = key_ref_to_ptr(cs_keyring_ref);
+		key_ref_put(cs_keyring_ref);
+		goto found;
+	}
+	/* Search default locations: thread, process, session keyrings */
+	cs_keyring = request_key(&key_type_keyring, CERT_STORE_KEYRING_NAME, NULL);
+	if (IS_ERR(cs_keyring))
+		return NULL;
+	key_put(cs_keyring);
+found:
+	return cs_keyring;
+}
+
+static void cleanup_cs_keys(void)
+{
+	struct key *cs_keyring;
+
+	cs_keyring = find_cs_keyring();
+	if (!cs_keyring)
+		return;
+
+	pr_dbf_msg("Found cert_store keyring. Purging...");
+	/*
+	 * Remove cert_store_key_type in case invalidation
+	 * of old cert_store keys failed (= severe error).
+	 */
+	if (invalidate_keyring_keys(cs_keyring))
+		unregister_key_type(&key_type_cert_store_key);
+
+	keyring_clear(cs_keyring);
+	key_invalidate(cs_keyring);
+	key_put(cs_keyring);
+	key_unlink(get_user_session_keyring(), cs_keyring);
+}
+
+static struct key *create_cs_keyring(void)
+{
+	static struct key *cs_keyring;
+
+	/* Cleanup previous cs_keyring and all associated keys if any. */
+	cleanup_cs_keys();
+	cs_keyring = keyring_alloc(CERT_STORE_KEYRING_NAME, GLOBAL_ROOT_UID,
+				   GLOBAL_ROOT_GID, current_cred(),
+				   (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | KEY_USR_READ,
+				   KEY_ALLOC_NOT_IN_QUOTA | KEY_ALLOC_SET_KEEP,
+				   NULL, get_user_session_keyring());
+	if (IS_ERR(cs_keyring)) {
+		pr_dbf_msg("Can't allocate cert_store keyring");
+		return NULL;
+	}
+
+	pr_dbf_msg("Successfully allocated cert_store keyring: %08x", cs_keyring->serial);
+
+	/*
+	 * In case a previous clean-up ran into an
+	 * error and unregistered key type.
+	 */
+	register_key_type(&key_type_cert_store_key);
+
+	return cs_keyring;
+}
+
+/*
+ * Allocate memory and create key description in format
+ * [key name in EBCDIC]:[VCE index]:[CS token].
+ * Return a pointer to key description or NULL if memory
+ * allocation failed. Memory should be freed by caller.
+ */
+static char *get_key_description(struct vcssb *vcssb, const struct vce *vce)
+{
+	size_t len, name_len;
+	u32 cs_token;
+	char *desc;
+
+	cs_token = vcssb->cs_token;
+	/* Description string contains "%64s:%05u:%010u\0". */
+	name_len = sizeof(vce->vce_hdr.vc_name);
+	len = name_len + 1 + 5 + 1 + 10 + 1;
+	desc = kmalloc(len, GFP_KERNEL);
+	if (!desc)
+		return NULL;
+
+	memcpy(desc, vce->vce_hdr.vc_name, name_len);
+	snprintf(desc + name_len, len - name_len, ":%05u:%010u",
+		 vce->vce_hdr.vc_index, cs_token);
+
+	return desc;
+}
+
+/*
+ * Create a key of type "cert_store_key" using the data from VCE for key
+ * payload and key description. Link the key to "cert_store" keyring.
+ */
+static int create_key_from_vce(struct vcssb *vcssb, struct vce *vce,
+			       struct key *keyring)
+{
+	key_ref_t newkey;
+	char *desc;
+	int rc;
+
+	desc = get_key_description(vcssb, vce);
+	if (!desc)
+		return -ENOMEM;
+
+	newkey = key_create_or_update(
+		make_key_ref(keyring, true), CERT_STORE_KEY_TYPE_NAME,
+		desc, (u8 *)vce + vce->vce_hdr.vc_offset,
+		vce->vce_hdr.vc_length,
+		(KEY_POS_ALL & ~KEY_POS_SETATTR)  | KEY_USR_VIEW | KEY_USR_READ,
+		KEY_ALLOC_NOT_IN_QUOTA);
+
+	rc = PTR_ERR_OR_ZERO(newkey);
+	if (rc) {
+		pr_dbf_msg("Couldn't create a key from Certificate Entry (%d)", rc);
+		rc = -ENOKEY;
+		goto out;
+	}
+
+	key_ref_put(newkey);
+out:
+	kfree(desc);
+	return rc;
+}
+
+/* Get Verification Certificate Storage Size block with DIAG320 subcode2. */
+static int get_vcssb(struct vcssb *vcssb)
+{
+	int diag320_rc;
+
+	memset(vcssb, 0, sizeof(*vcssb));
+	vcssb->vcssb_length = VCSSB_LEN_BYTES;
+	diag320_rc = diag320(DIAG320_STORAGE, vcssb);
+	pr_dbf_vcssb(vcssb);
+
+	if (diag320_rc != DIAG320_RC_OK) {
+		pr_dbf_msg("Diag 320 Subcode 1 returned bad RC: %04x", diag320_rc);
+		return -EIO;
+	}
+	if (vcssb->vcssb_length == VCSSB_LEN_NO_CERTS) {
+		pr_dbf_msg("No certificates available for current configuration");
+		return -ENOKEY;
+	}
+
+	return 0;
+}
+
+static u32 get_4k_mult_vcb_size(struct vcssb *vcssb)
+{
+	return round_up(vcssb->max_single_vcb_length, PAGE_SIZE);
+}
+
+/* Fill input fields of single-entry VCB that will be read by LPAR. */
+static void fill_vcb_input(struct vcssb *vcssb, struct vcb *vcb, u16 index)
+{
+	memset(vcb, 0, sizeof(*vcb));
+	vcb->vcb_hdr.vcb_input_length = get_4k_mult_vcb_size(vcssb);
+	vcb->vcb_hdr.cs_token = vcssb->cs_token;
+
+	/* Request single entry. */
+	vcb->vcb_hdr.first_vc_index = index;
+	vcb->vcb_hdr.last_vc_index = index;
+}
+
+static void extract_vce_from_sevcb(struct vcb *vcb, struct vce *vce)
+{
+	struct vce *extracted_vce;
+
+	extracted_vce = (struct vce *)vcb->vcb_buf;
+	memcpy(vce, vcb->vcb_buf, extracted_vce->vce_hdr.vce_length);
+	pr_dbf_vce(vce);
+}
+
+static int get_sevcb(struct vcssb *vcssb, u16 index, struct vcb *vcb)
+{
+	int rc, diag320_rc;
+
+	fill_vcb_input(vcssb, vcb, index);
+
+	diag320_rc = diag320(DIAG320_CERT_BLOCK, vcb);
+	pr_dbf_msg("Diag 320 Subcode2 RC %2x", diag320_rc);
+	pr_dbf_vcb(vcb);
+
+	switch (diag320_rc) {
+	case DIAG320_RC_OK:
+		rc = 0;
+		if (vcb->vcb_hdr.vcb_output_length == VCB_LEN_NO_CERTS) {
+			pr_dbf_msg("No certificate entry for index %u", index);
+			rc = -ENOKEY;
+		} else if (vcb->vcb_hdr.remaining_vc_count != 0) {
+			/* Retry on insufficient space. */
+			pr_dbf_msg("Couldn't get all requested certificates");
+			rc = -EAGAIN;
+		}
+		break;
+	case DIAG320_RC_CS_NOMATCH:
+		pr_dbf_msg("Certificate Store token mismatch");
+		rc = -EAGAIN;
+		break;
+	default:
+		pr_dbf_msg("Diag 320 Subcode2 returned bad rc (0x%4x)", diag320_rc);
+		rc = -EINVAL;
+		break;
+	}
+
+	return rc;
+}
+
+/*
+ * Allocate memory for single-entry VCB, get VCB via DIAG320 subcode 2 call,
+ * extract VCE and create a key from its' certificate.
+ */
+static int create_key_from_sevcb(struct vcssb *vcssb, u16 index,
+				 struct key *keyring)
+{
+	struct vcb *vcb;
+	struct vce *vce;
+	int rc;
+
+	rc = -ENOMEM;
+	vcb = vmalloc(get_4k_mult_vcb_size(vcssb));
+	vce = vmalloc(vcssb->max_single_vcb_length - sizeof(vcb->vcb_hdr));
+	if (!vcb || !vce)
+		goto out;
+
+	rc = get_sevcb(vcssb, index, vcb);
+	if (rc)
+		goto out;
+
+	extract_vce_from_sevcb(vcb, vce);
+	rc = check_certificate_valid(vce);
+	if (rc)
+		goto out;
+
+	rc = create_key_from_vce(vcssb, vce, keyring);
+	if (rc)
+		goto out;
+
+	pr_dbf_msg("Successfully created key from Certificate Entry %d", index);
+out:
+	vfree(vce);
+	vfree(vcb);
+	return rc;
+}
+
+/*
+ * Request a single-entry VCB for each VCE available for the partition.
+ * Create a key from it and link it to cert_store keyring. If no keys
+ * could be created (i.e. VCEs were invalid) return -ENOKEY.
+ */
+static int add_certificates_to_keyring(struct vcssb *vcssb, struct key *keyring)
+{
+	int rc, index, count, added;
+
+	count = 0;
+	added = 0;
+	/* Certificate Store entries indices start with 1 and have no gaps. */
+	for (index = 1; index < vcssb->total_vc_index_count + 1; index++) {
+		pr_dbf_msg("Creating key from VCE %u", index);
+		rc = create_key_from_sevcb(vcssb, index, keyring);
+		count++;
+
+		if (rc == -EAGAIN)
+			return rc;
+
+		if (rc)
+			pr_dbf_msg("Creating key from VCE %u failed (%d)", index, rc);
+		else
+			added++;
+	}
+
+	if (added == 0) {
+		pr_dbf_msg("Processed %d entries. No keys created", count);
+		return -ENOKEY;
+	}
+
+	pr_info("Added %d of %d keys to cert_store keyring", added, count);
+
+	/*
+	 * Do not allow to link more keys to certificate store keyring after all
+	 * the VCEs were processed.
+	 */
+	rc = keyring_restrict(make_key_ref(keyring, true), NULL, NULL);
+	if (rc)
+		pr_dbf_msg("Failed to set restriction to cert_store keyring (%d)", rc);
+
+	return 0;
+}
+
+/*
+ * Check which DIAG320 subcodes are installed.
+ * Return -ENOENT if subcodes 1 or 2 are not available.
+ */
+static int query_diag320_subcodes(void)
+{
+	unsigned long ism[ISM_LEN_DWORDS];
+	int rc;
+
+	rc = diag320(0, ism);
+	if (rc != DIAG320_RC_OK) {
+		pr_dbf_msg("DIAG320 subcode query returned %04x", rc);
+		return -ENOENT;
+	}
+
+	debug_text_event(cert_store_hexdump, 3, "DIAG320 Subcode 0");
+	debug_event(cert_store_hexdump, 3, ism, sizeof(ism));
+
+	if (!test_bit_inv(1, ism) || !test_bit_inv(2, ism)) {
+		pr_dbf_msg("Not all required DIAG320 subcodes are installed");
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+/*
+ * Check if Certificate Store is supported by the firmware and DIAG320 subcodes
+ * 1 and 2 are installed. Create cert_store keyring and link all certificates
+ * available for the current partition to it as "cert_store_key" type
+ * keys. On refresh or error invalidate cert_store keyring and destroy
+ * all keys of "cert_store_key" type.
+ */
+static int fill_cs_keyring(void)
+{
+	struct key *cs_keyring;
+	struct vcssb *vcssb;
+	int rc;
+
+	rc = -ENOMEM;
+	vcssb = kmalloc(VCSSB_LEN_BYTES, GFP_KERNEL);
+	if (!vcssb)
+		goto cleanup_keys;
+
+	rc = -ENOENT;
+	if (!sclp.has_diag320) {
+		pr_dbf_msg("Certificate Store is not supported");
+		goto cleanup_keys;
+	}
+
+	rc = query_diag320_subcodes();
+	if (rc)
+		goto cleanup_keys;
+
+	rc = get_vcssb(vcssb);
+	if (rc)
+		goto cleanup_keys;
+
+	rc = -ENOMEM;
+	cs_keyring = create_cs_keyring();
+	if (!cs_keyring)
+		goto cleanup_keys;
+
+	rc = add_certificates_to_keyring(vcssb, cs_keyring);
+	if (rc)
+		goto cleanup_cs_keyring;
+
+	goto out;
+
+cleanup_cs_keyring:
+	key_put(cs_keyring);
+cleanup_keys:
+	cleanup_cs_keys();
+out:
+	kfree(vcssb);
+	return rc;
+}
+
+static DEFINE_MUTEX(cs_refresh_lock);
+static int cs_status_val = -1;
+
+static ssize_t cs_status_show(struct kobject *kobj,
+			      struct kobj_attribute *attr, char *buf)
+{
+	if (cs_status_val == -1)
+		return sysfs_emit(buf, "uninitialized\n");
+	else if (cs_status_val == 0)
+		return sysfs_emit(buf, "ok\n");
+
+	return sysfs_emit(buf, "failed (%d)\n", cs_status_val);
+}
+
+static struct kobj_attribute cs_status_attr = __ATTR_RO(cs_status);
+
+static ssize_t refresh_store(struct kobject *kobj, struct kobj_attribute *attr,
+			     const char *buf, size_t count)
+{
+	int rc, retries;
+
+	pr_dbf_msg("Refresh certificate store information requested");
+	rc = mutex_lock_interruptible(&cs_refresh_lock);
+	if (rc)
+		return rc;
+
+	for (retries = 0; retries < DIAG_MAX_RETRIES; retries++) {
+		/* Request certificates from certificate store. */
+		rc = fill_cs_keyring();
+		if (rc)
+			pr_dbf_msg("Failed to refresh certificate store information (%d)", rc);
+		if (rc != -EAGAIN)
+			break;
+	}
+	cs_status_val = rc;
+	mutex_unlock(&cs_refresh_lock);
+
+	return rc ?: count;
+}
+
+static struct kobj_attribute refresh_attr = __ATTR_WO(refresh);
+
+static const struct attribute *cert_store_attrs[] __initconst = {
+	&cs_status_attr.attr,
+	&refresh_attr.attr,
+	NULL,
+};
+
+static struct kobject *cert_store_kobj;
+
+static int __init cert_store_init(void)
+{
+	int rc = -ENOMEM;
+
+	cert_store_dbf = debug_register("cert_store_msg", 10, 1, 64);
+	if (!cert_store_dbf)
+		goto cleanup_dbf;
+
+	cert_store_hexdump = debug_register("cert_store_hexdump", 3, 1, 128);
+	if (!cert_store_hexdump)
+		goto cleanup_dbf;
+
+	debug_register_view(cert_store_hexdump, &debug_hex_ascii_view);
+	debug_register_view(cert_store_dbf, &debug_sprintf_view);
+
+	/* Create directory /sys/firmware/cert_store. */
+	cert_store_kobj = kobject_create_and_add("cert_store", firmware_kobj);
+	if (!cert_store_kobj)
+		goto cleanup_dbf;
+
+	rc = sysfs_create_files(cert_store_kobj, cert_store_attrs);
+	if (rc)
+		goto cleanup_kobj;
+
+	register_key_type(&key_type_cert_store_key);
+
+	return rc;
+
+cleanup_kobj:
+	kobject_put(cert_store_kobj);
+cleanup_dbf:
+	debug_unregister(cert_store_dbf);
+	debug_unregister(cert_store_hexdump);
+
+	return rc;
+}
+device_initcall(cert_store_init);
diff --git a/arch/s390/kernel/compat_audit.c b/arch/s390/kernel/compat_audit.c
new file mode 100644
index 0000000000..a7c46e8310
--- /dev/null
+++ b/arch/s390/kernel/compat_audit.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+#undef __s390x__
+#include <linux/audit_arch.h>
+#include <asm/unistd.h>
+#include "audit.h"
+
+unsigned s390_dir_class[] = {
+#include <asm-generic/audit_dir_write.h>
+~0U
+};
+
+unsigned s390_chattr_class[] = {
+#include <asm-generic/audit_change_attr.h>
+~0U
+};
+
+unsigned s390_write_class[] = {
+#include <asm-generic/audit_write.h>
+~0U
+};
+
+unsigned s390_read_class[] = {
+#include <asm-generic/audit_read.h>
+~0U
+};
+
+unsigned s390_signal_class[] = {
+#include <asm-generic/audit_signal.h>
+~0U
+};
+
+int s390_classify_syscall(unsigned syscall)
+{
+	switch(syscall) {
+	case __NR_open:
+		return AUDITSC_OPEN;
+	case __NR_openat:
+		return AUDITSC_OPENAT;
+	case __NR_socketcall:
+		return AUDITSC_SOCKETCALL;
+	case __NR_execve:
+		return AUDITSC_EXECVE;
+	case __NR_openat2:
+		return AUDITSC_OPENAT2;
+	default:
+		return AUDITSC_COMPAT;
+	}
+}
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
new file mode 100644
index 0000000000..f9d418d1b6
--- /dev/null
+++ b/arch/s390/kernel/compat_linux.c
@@ -0,0 +1,289 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 2000
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ *               Gerhard Tonn (ton@de.ibm.com)   
+ *               Thomas Spatzier (tspat@de.ibm.com)
+ *
+ *  Conversion between 31bit and 64bit native syscalls.
+ *
+ * Heavily inspired by the 32-bit Sparc compat code which is 
+ * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
+ *
+ */
+
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/fs.h> 
+#include <linux/mm.h> 
+#include <linux/file.h> 
+#include <linux/signal.h>
+#include <linux/resource.h>
+#include <linux/times.h>
+#include <linux/smp.h>
+#include <linux/sem.h>
+#include <linux/msg.h>
+#include <linux/shm.h>
+#include <linux/uio.h>
+#include <linux/quota.h>
+#include <linux/poll.h>
+#include <linux/personality.h>
+#include <linux/stat.h>
+#include <linux/filter.h>
+#include <linux/highmem.h>
+#include <linux/mman.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/icmpv6.h>
+#include <linux/syscalls.h>
+#include <linux/sysctl.h>
+#include <linux/binfmts.h>
+#include <linux/capability.h>
+#include <linux/compat.h>
+#include <linux/vfs.h>
+#include <linux/ptrace.h>
+#include <linux/fadvise.h>
+#include <linux/ipc.h>
+#include <linux/slab.h>
+
+#include <asm/types.h>
+#include <linux/uaccess.h>
+
+#include <net/scm.h>
+#include <net/sock.h>
+
+#include "compat_linux.h"
+
+#ifdef CONFIG_SYSVIPC
+COMPAT_SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, compat_ulong_t, second,
+		compat_ulong_t, third, compat_uptr_t, ptr)
+{
+	if (call >> 16)		/* hack for backward compatibility */
+		return -EINVAL;
+	return compat_ksys_ipc(call, first, second, third, ptr, third);
+}
+#endif
+
+COMPAT_SYSCALL_DEFINE3(s390_truncate64, const char __user *, path, u32, high, u32, low)
+{
+	return ksys_truncate(path, (unsigned long)high << 32 | low);
+}
+
+COMPAT_SYSCALL_DEFINE3(s390_ftruncate64, unsigned int, fd, u32, high, u32, low)
+{
+	return ksys_ftruncate(fd, (unsigned long)high << 32 | low);
+}
+
+COMPAT_SYSCALL_DEFINE5(s390_pread64, unsigned int, fd, char __user *, ubuf,
+		       compat_size_t, count, u32, high, u32, low)
+{
+	if ((compat_ssize_t) count < 0)
+		return -EINVAL;
+	return ksys_pread64(fd, ubuf, count, (unsigned long)high << 32 | low);
+}
+
+COMPAT_SYSCALL_DEFINE5(s390_pwrite64, unsigned int, fd, const char __user *, ubuf,
+		       compat_size_t, count, u32, high, u32, low)
+{
+	if ((compat_ssize_t) count < 0)
+		return -EINVAL;
+	return ksys_pwrite64(fd, ubuf, count, (unsigned long)high << 32 | low);
+}
+
+COMPAT_SYSCALL_DEFINE4(s390_readahead, int, fd, u32, high, u32, low, s32, count)
+{
+	return ksys_readahead(fd, (unsigned long)high << 32 | low, count);
+}
+
+struct stat64_emu31 {
+	unsigned long long  st_dev;
+	unsigned int    __pad1;
+#define STAT64_HAS_BROKEN_ST_INO        1
+	u32             __st_ino;
+	unsigned int    st_mode;
+	unsigned int    st_nlink;
+	u32             st_uid;
+	u32             st_gid;
+	unsigned long long  st_rdev;
+	unsigned int    __pad3;
+	long            st_size;
+	u32             st_blksize;
+	unsigned char   __pad4[4];
+	u32             __pad5;     /* future possible st_blocks high bits */
+	u32             st_blocks;  /* Number 512-byte blocks allocated. */
+	u32             st_atime;
+	u32             __pad6;
+	u32             st_mtime;
+	u32             __pad7;
+	u32             st_ctime;
+	u32             __pad8;     /* will be high 32 bits of ctime someday */
+	unsigned long   st_ino;
+};	
+
+static int cp_stat64(struct stat64_emu31 __user *ubuf, struct kstat *stat)
+{
+	struct stat64_emu31 tmp;
+
+	memset(&tmp, 0, sizeof(tmp));
+
+	tmp.st_dev = huge_encode_dev(stat->dev);
+	tmp.st_ino = stat->ino;
+	tmp.__st_ino = (u32)stat->ino;
+	tmp.st_mode = stat->mode;
+	tmp.st_nlink = (unsigned int)stat->nlink;
+	tmp.st_uid = from_kuid_munged(current_user_ns(), stat->uid);
+	tmp.st_gid = from_kgid_munged(current_user_ns(), stat->gid);
+	tmp.st_rdev = huge_encode_dev(stat->rdev);
+	tmp.st_size = stat->size;
+	tmp.st_blksize = (u32)stat->blksize;
+	tmp.st_blocks = (u32)stat->blocks;
+	tmp.st_atime = (u32)stat->atime.tv_sec;
+	tmp.st_mtime = (u32)stat->mtime.tv_sec;
+	tmp.st_ctime = (u32)stat->ctime.tv_sec;
+
+	return copy_to_user(ubuf,&tmp,sizeof(tmp)) ? -EFAULT : 0; 
+}
+
+COMPAT_SYSCALL_DEFINE2(s390_stat64, const char __user *, filename, struct stat64_emu31 __user *, statbuf)
+{
+	struct kstat stat;
+	int ret = vfs_stat(filename, &stat);
+	if (!ret)
+		ret = cp_stat64(statbuf, &stat);
+	return ret;
+}
+
+COMPAT_SYSCALL_DEFINE2(s390_lstat64, const char __user *, filename, struct stat64_emu31 __user *, statbuf)
+{
+	struct kstat stat;
+	int ret = vfs_lstat(filename, &stat);
+	if (!ret)
+		ret = cp_stat64(statbuf, &stat);
+	return ret;
+}
+
+COMPAT_SYSCALL_DEFINE2(s390_fstat64, unsigned int, fd, struct stat64_emu31 __user *, statbuf)
+{
+	struct kstat stat;
+	int ret = vfs_fstat(fd, &stat);
+	if (!ret)
+		ret = cp_stat64(statbuf, &stat);
+	return ret;
+}
+
+COMPAT_SYSCALL_DEFINE4(s390_fstatat64, unsigned int, dfd, const char __user *, filename,
+		       struct stat64_emu31 __user *, statbuf, int, flag)
+{
+	struct kstat stat;
+	int error;
+
+	error = vfs_fstatat(dfd, filename, &stat, flag);
+	if (error)
+		return error;
+	return cp_stat64(statbuf, &stat);
+}
+
+/*
+ * Linux/i386 didn't use to be able to handle more than
+ * 4 system call parameters, so these system calls used a memory
+ * block for parameter passing..
+ */
+
+struct mmap_arg_struct_emu31 {
+	compat_ulong_t addr;
+	compat_ulong_t len;
+	compat_ulong_t prot;
+	compat_ulong_t flags;
+	compat_ulong_t fd;
+	compat_ulong_t offset;
+};
+
+COMPAT_SYSCALL_DEFINE1(s390_old_mmap, struct mmap_arg_struct_emu31 __user *, arg)
+{
+	struct mmap_arg_struct_emu31 a;
+
+	if (copy_from_user(&a, arg, sizeof(a)))
+		return -EFAULT;
+	if (a.offset & ~PAGE_MASK)
+		return -EINVAL;
+	return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
+			       a.offset >> PAGE_SHIFT);
+}
+
+COMPAT_SYSCALL_DEFINE1(s390_mmap2, struct mmap_arg_struct_emu31 __user *, arg)
+{
+	struct mmap_arg_struct_emu31 a;
+
+	if (copy_from_user(&a, arg, sizeof(a)))
+		return -EFAULT;
+	return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset);
+}
+
+COMPAT_SYSCALL_DEFINE3(s390_read, unsigned int, fd, char __user *, buf, compat_size_t, count)
+{
+	if ((compat_ssize_t) count < 0)
+		return -EINVAL; 
+
+	return ksys_read(fd, buf, count);
+}
+
+COMPAT_SYSCALL_DEFINE3(s390_write, unsigned int, fd, const char __user *, buf, compat_size_t, count)
+{
+	if ((compat_ssize_t) count < 0)
+		return -EINVAL; 
+
+	return ksys_write(fd, buf, count);
+}
+
+/*
+ * 31 bit emulation wrapper functions for sys_fadvise64/fadvise64_64.
+ * These need to rewrite the advise values for POSIX_FADV_{DONTNEED,NOREUSE}
+ * because the 31 bit values differ from the 64 bit values.
+ */
+
+COMPAT_SYSCALL_DEFINE5(s390_fadvise64, int, fd, u32, high, u32, low, compat_size_t, len, int, advise)
+{
+	if (advise == 4)
+		advise = POSIX_FADV_DONTNEED;
+	else if (advise == 5)
+		advise = POSIX_FADV_NOREUSE;
+	return ksys_fadvise64_64(fd, (unsigned long)high << 32 | low, len,
+				 advise);
+}
+
+struct fadvise64_64_args {
+	int fd;
+	long long offset;
+	long long len;
+	int advice;
+};
+
+COMPAT_SYSCALL_DEFINE1(s390_fadvise64_64, struct fadvise64_64_args __user *, args)
+{
+	struct fadvise64_64_args a;
+
+	if ( copy_from_user(&a, args, sizeof(a)) )
+		return -EFAULT;
+	if (a.advice == 4)
+		a.advice = POSIX_FADV_DONTNEED;
+	else if (a.advice == 5)
+		a.advice = POSIX_FADV_NOREUSE;
+	return ksys_fadvise64_64(a.fd, a.offset, a.len, a.advice);
+}
+
+COMPAT_SYSCALL_DEFINE6(s390_sync_file_range, int, fd, u32, offhigh, u32, offlow,
+		       u32, nhigh, u32, nlow, unsigned int, flags)
+{
+	return ksys_sync_file_range(fd, ((loff_t)offhigh << 32) + offlow,
+				   ((u64)nhigh << 32) + nlow, flags);
+}
+
+COMPAT_SYSCALL_DEFINE6(s390_fallocate, int, fd, int, mode, u32, offhigh, u32, offlow,
+		       u32, lenhigh, u32, lenlow)
+{
+	return ksys_fallocate(fd, mode, ((loff_t)offhigh << 32) + offlow,
+			      ((u64)lenhigh << 32) + lenlow);
+}
diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h
new file mode 100644
index 0000000000..ef23739b27
--- /dev/null
+++ b/arch/s390/kernel/compat_linux.h
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390X_S390_H
+#define _ASM_S390X_S390_H
+
+#include <linux/compat.h>
+#include <linux/socket.h>
+#include <linux/syscalls.h>
+#include <asm/ptrace.h>
+
+/*
+ * Macro that masks the high order bit of a 32 bit pointer and
+ * converts it to a 64 bit pointer.
+ */
+#define A(__x)	((unsigned long)((__x) & 0x7FFFFFFFUL))
+#define AA(__x)	((unsigned long)(__x))
+
+/* Now 32bit compatibility types */
+struct ipc_kludge_32 {
+	__u32	msgp;	/* pointer */
+	__s32	msgtyp;
+};
+
+/* asm/sigcontext.h */
+typedef union {
+	__u64	d;
+	__u32	f;
+} freg_t32;
+
+typedef struct {
+	unsigned int	fpc;
+	unsigned int	pad;
+	freg_t32	fprs[__NUM_FPRS];
+} _s390_fp_regs32;
+
+typedef struct {
+	psw_t32		psw;
+	__u32		gprs[__NUM_GPRS];
+	__u32		acrs[__NUM_ACRS];
+} _s390_regs_common32;
+
+typedef struct {
+	_s390_regs_common32 regs;
+	_s390_fp_regs32	    fpregs;
+} _sigregs32;
+
+typedef struct {
+	__u32		gprs_high[__NUM_GPRS];
+	__u64		vxrs_low[__NUM_VXRS_LOW];
+	__vector128	vxrs_high[__NUM_VXRS_HIGH];
+	__u8		__reserved[128];
+} _sigregs_ext32;
+
+#define _SIGCONTEXT_NSIG32	64
+#define _SIGCONTEXT_NSIG_BPW32	32
+#define __SIGNAL_FRAMESIZE32	96
+#define _SIGMASK_COPY_SIZE32	(sizeof(u32) * 2)
+
+struct sigcontext32 {
+	__u32	oldmask[_COMPAT_NSIG_WORDS];
+	__u32	sregs;	/* pointer */
+};
+
+/* asm/signal.h */
+
+/* asm/ucontext.h */
+struct ucontext32 {
+	__u32			uc_flags;
+	__u32			uc_link;	/* pointer */
+	compat_stack_t		uc_stack;
+	_sigregs32		uc_mcontext;
+	compat_sigset_t		uc_sigmask;
+	/* Allow for uc_sigmask growth. Glibc uses a 1024-bit sigset_t. */
+	unsigned char		__unused[128 - sizeof(compat_sigset_t)];
+	_sigregs_ext32		uc_mcontext_ext;
+};
+
+struct stat64_emu31;
+struct mmap_arg_struct_emu31;
+struct fadvise64_64_args;
+
+long compat_sys_s390_truncate64(const char __user *path, u32 high, u32 low);
+long compat_sys_s390_ftruncate64(unsigned int fd, u32 high, u32 low);
+long compat_sys_s390_pread64(unsigned int fd, char __user *ubuf, compat_size_t count, u32 high, u32 low);
+long compat_sys_s390_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count, u32 high, u32 low);
+long compat_sys_s390_readahead(int fd, u32 high, u32 low, s32 count);
+long compat_sys_s390_stat64(const char __user *filename, struct stat64_emu31 __user *statbuf);
+long compat_sys_s390_lstat64(const char __user *filename, struct stat64_emu31 __user *statbuf);
+long compat_sys_s390_fstat64(unsigned int fd, struct stat64_emu31 __user *statbuf);
+long compat_sys_s390_fstatat64(unsigned int dfd, const char __user *filename, struct stat64_emu31 __user *statbuf, int flag);
+long compat_sys_s390_old_mmap(struct mmap_arg_struct_emu31 __user *arg);
+long compat_sys_s390_mmap2(struct mmap_arg_struct_emu31 __user *arg);
+long compat_sys_s390_read(unsigned int fd, char __user *buf, compat_size_t count);
+long compat_sys_s390_write(unsigned int fd, const char __user *buf, compat_size_t count);
+long compat_sys_s390_fadvise64(int fd, u32 high, u32 low, compat_size_t len, int advise);
+long compat_sys_s390_fadvise64_64(struct fadvise64_64_args __user *args);
+long compat_sys_s390_sync_file_range(int fd, u32 offhigh, u32 offlow, u32 nhigh, u32 nlow, unsigned int flags);
+long compat_sys_s390_fallocate(int fd, int mode, u32 offhigh, u32 offlow, u32 lenhigh, u32 lenlow);
+long compat_sys_sigreturn(void);
+long compat_sys_rt_sigreturn(void);
+
+#endif /* _ASM_S390X_S390_H */
diff --git a/arch/s390/kernel/compat_ptrace.h b/arch/s390/kernel/compat_ptrace.h
new file mode 100644
index 0000000000..3c400fc7e9
--- /dev/null
+++ b/arch/s390/kernel/compat_ptrace.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PTRACE32_H
+#define _PTRACE32_H
+
+#include <asm/ptrace.h>    /* needed for NUM_CR_WORDS */
+#include "compat_linux.h"  /* needed for psw_compat_t */
+
+struct compat_per_struct_kernel {
+	__u32 cr9;		/* PER control bits */
+	__u32 cr10;		/* PER starting address */
+	__u32 cr11;		/* PER ending address */
+	__u32 bits;		/* Obsolete software bits */
+	__u32 starting_addr;	/* User specified start address */
+	__u32 ending_addr;	/* User specified end address */
+	__u16 perc_atmid;	/* PER trap ATMID */
+	__u32 address;		/* PER trap instruction address */
+	__u8  access_id;	/* PER trap access identification */
+};
+
+struct compat_user_regs_struct
+{
+	psw_compat_t psw;
+	u32 gprs[NUM_GPRS];
+	u32 acrs[NUM_ACRS];
+	u32 orig_gpr2;
+	/* nb: there's a 4-byte hole here */
+	s390_fp_regs fp_regs;
+	/*
+	 * These per registers are in here so that gdb can modify them
+	 * itself as there is no "official" ptrace interface for hardware
+	 * watchpoints. This is the way intel does it.
+	 */
+	struct compat_per_struct_kernel per_info;
+	u32  ieee_instruction_pointer;	/* obsolete, always 0 */
+};
+
+struct compat_user {
+	/* We start with the registers, to mimic the way that "memory"
+	   is returned from the ptrace(3,...) function.  */
+	struct compat_user_regs_struct regs;
+	/* The rest of this junk is to help gdb figure out what goes where */
+	u32 u_tsize;		/* Text segment size (pages). */
+	u32 u_dsize;	        /* Data segment size (pages). */
+	u32 u_ssize;	        /* Stack segment size (pages). */
+	u32 start_code;         /* Starting virtual address of text. */
+	u32 start_stack;	/* Starting virtual address of stack area.
+				   This is actually the bottom of the stack,
+				   the top of the stack is always found in the
+				   esp register.  */
+	s32 signal;     	 /* Signal that caused the core dump. */
+	u32 u_ar0;               /* Used by gdb to help find the values for */
+	                         /* the registers. */
+	u32 magic;		 /* To uniquely identify a core file */
+	char u_comm[32];	 /* User command that was responsible */
+};
+
+typedef struct
+{
+	__u32   len;
+	__u32   kernel_addr;
+	__u32   process_addr;
+} compat_ptrace_area;
+
+#endif /* _PTRACE32_H */
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
new file mode 100644
index 0000000000..cecedd01d4
--- /dev/null
+++ b/arch/s390/kernel/compat_signal.c
@@ -0,0 +1,422 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    Copyright IBM Corp. 2000, 2006
+ *    Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
+ *               Gerhard Tonn (ton@de.ibm.com)                  
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  1997-11-28  Modified for POSIX.1b signals by Richard Henderson
+ */
+
+#include <linux/compat.h>
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+#include <linux/ptrace.h>
+#include <linux/unistd.h>
+#include <linux/stddef.h>
+#include <linux/tty.h>
+#include <linux/personality.h>
+#include <linux/binfmts.h>
+#include <asm/ucontext.h>
+#include <linux/uaccess.h>
+#include <asm/lowcore.h>
+#include <asm/switch_to.h>
+#include <asm/vdso.h>
+#include "compat_linux.h"
+#include "compat_ptrace.h"
+#include "entry.h"
+
+typedef struct 
+{
+	__u8 callee_used_stack[__SIGNAL_FRAMESIZE32];
+	struct sigcontext32 sc;
+	_sigregs32 sregs;
+	int signo;
+	_sigregs_ext32 sregs_ext;
+	__u16 svc_insn;		/* Offset of svc_insn is NOT fixed! */
+} sigframe32;
+
+typedef struct 
+{
+	__u8 callee_used_stack[__SIGNAL_FRAMESIZE32];
+	__u16 svc_insn;
+	compat_siginfo_t info;
+	struct ucontext32 uc;
+} rt_sigframe32;
+
+/* Store registers needed to create the signal frame */
+static void store_sigregs(void)
+{
+	save_access_regs(current->thread.acrs);
+	save_fpu_regs();
+}
+
+/* Load registers after signal return */
+static void load_sigregs(void)
+{
+	restore_access_regs(current->thread.acrs);
+}
+
+static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs)
+{
+	_sigregs32 user_sregs;
+	int i;
+
+	user_sregs.regs.psw.mask = (__u32)(regs->psw.mask >> 32);
+	user_sregs.regs.psw.mask &= PSW32_MASK_USER | PSW32_MASK_RI;
+	user_sregs.regs.psw.mask |= PSW32_USER_BITS;
+	user_sregs.regs.psw.addr = (__u32) regs->psw.addr |
+		(__u32)(regs->psw.mask & PSW_MASK_BA);
+	for (i = 0; i < NUM_GPRS; i++)
+		user_sregs.regs.gprs[i] = (__u32) regs->gprs[i];
+	memcpy(&user_sregs.regs.acrs, current->thread.acrs,
+	       sizeof(user_sregs.regs.acrs));
+	fpregs_store((_s390_fp_regs *) &user_sregs.fpregs, &current->thread.fpu);
+	if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs32)))
+		return -EFAULT;
+	return 0;
+}
+
+static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs)
+{
+	_sigregs32 user_sregs;
+	int i;
+
+	/* Always make any pending restarted system call return -EINTR */
+	current->restart_block.fn = do_no_restart_syscall;
+
+	if (__copy_from_user(&user_sregs, &sregs->regs, sizeof(user_sregs)))
+		return -EFAULT;
+
+	if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW32_MASK_RI))
+		return -EINVAL;
+
+	/* Test the floating-point-control word. */
+	if (test_fp_ctl(user_sregs.fpregs.fpc))
+		return -EINVAL;
+
+	/* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */
+	regs->psw.mask = (regs->psw.mask & ~(PSW_MASK_USER | PSW_MASK_RI)) |
+		(__u64)(user_sregs.regs.psw.mask & PSW32_MASK_USER) << 32 |
+		(__u64)(user_sregs.regs.psw.mask & PSW32_MASK_RI) << 32 |
+		(__u64)(user_sregs.regs.psw.addr & PSW32_ADDR_AMODE);
+	/* Check for invalid user address space control. */
+	if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_HOME)
+		regs->psw.mask = PSW_ASC_PRIMARY |
+			(regs->psw.mask & ~PSW_MASK_ASC);
+	regs->psw.addr = (__u64)(user_sregs.regs.psw.addr & PSW32_ADDR_INSN);
+	for (i = 0; i < NUM_GPRS; i++)
+		regs->gprs[i] = (__u64) user_sregs.regs.gprs[i];
+	memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
+	       sizeof(current->thread.acrs));
+	fpregs_load((_s390_fp_regs *) &user_sregs.fpregs, &current->thread.fpu);
+
+	clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
+	return 0;
+}
+
+static int save_sigregs_ext32(struct pt_regs *regs,
+			      _sigregs_ext32 __user *sregs_ext)
+{
+	__u32 gprs_high[NUM_GPRS];
+	__u64 vxrs[__NUM_VXRS_LOW];
+	int i;
+
+	/* Save high gprs to signal stack */
+	for (i = 0; i < NUM_GPRS; i++)
+		gprs_high[i] = regs->gprs[i] >> 32;
+	if (__copy_to_user(&sregs_ext->gprs_high, &gprs_high,
+			   sizeof(sregs_ext->gprs_high)))
+		return -EFAULT;
+
+	/* Save vector registers to signal stack */
+	if (MACHINE_HAS_VX) {
+		for (i = 0; i < __NUM_VXRS_LOW; i++)
+			vxrs[i] = current->thread.fpu.vxrs[i].low;
+		if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
+				   sizeof(sregs_ext->vxrs_low)) ||
+		    __copy_to_user(&sregs_ext->vxrs_high,
+				   current->thread.fpu.vxrs + __NUM_VXRS_LOW,
+				   sizeof(sregs_ext->vxrs_high)))
+			return -EFAULT;
+	}
+	return 0;
+}
+
+static int restore_sigregs_ext32(struct pt_regs *regs,
+				 _sigregs_ext32 __user *sregs_ext)
+{
+	__u32 gprs_high[NUM_GPRS];
+	__u64 vxrs[__NUM_VXRS_LOW];
+	int i;
+
+	/* Restore high gprs from signal stack */
+	if (__copy_from_user(&gprs_high, &sregs_ext->gprs_high,
+			     sizeof(sregs_ext->gprs_high)))
+		return -EFAULT;
+	for (i = 0; i < NUM_GPRS; i++)
+		*(__u32 *)&regs->gprs[i] = gprs_high[i];
+
+	/* Restore vector registers from signal stack */
+	if (MACHINE_HAS_VX) {
+		if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
+				     sizeof(sregs_ext->vxrs_low)) ||
+		    __copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW,
+				     &sregs_ext->vxrs_high,
+				     sizeof(sregs_ext->vxrs_high)))
+			return -EFAULT;
+		for (i = 0; i < __NUM_VXRS_LOW; i++)
+			current->thread.fpu.vxrs[i].low = vxrs[i];
+	}
+	return 0;
+}
+
+COMPAT_SYSCALL_DEFINE0(sigreturn)
+{
+	struct pt_regs *regs = task_pt_regs(current);
+	sigframe32 __user *frame = (sigframe32 __user *)regs->gprs[15];
+	sigset_t set;
+
+	if (get_compat_sigset(&set, (compat_sigset_t __user *)frame->sc.oldmask))
+		goto badframe;
+	set_current_blocked(&set);
+	save_fpu_regs();
+	if (restore_sigregs32(regs, &frame->sregs))
+		goto badframe;
+	if (restore_sigregs_ext32(regs, &frame->sregs_ext))
+		goto badframe;
+	load_sigregs();
+	return regs->gprs[2];
+badframe:
+	force_sig(SIGSEGV);
+	return 0;
+}
+
+COMPAT_SYSCALL_DEFINE0(rt_sigreturn)
+{
+	struct pt_regs *regs = task_pt_regs(current);
+	rt_sigframe32 __user *frame = (rt_sigframe32 __user *)regs->gprs[15];
+	sigset_t set;
+
+	if (get_compat_sigset(&set, &frame->uc.uc_sigmask))
+		goto badframe;
+	set_current_blocked(&set);
+	if (compat_restore_altstack(&frame->uc.uc_stack))
+		goto badframe;
+	save_fpu_regs();
+	if (restore_sigregs32(regs, &frame->uc.uc_mcontext))
+		goto badframe;
+	if (restore_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext))
+		goto badframe;
+	load_sigregs();
+	return regs->gprs[2];
+badframe:
+	force_sig(SIGSEGV);
+	return 0;
+}	
+
+/*
+ * Set up a signal frame.
+ */
+
+
+/*
+ * Determine which stack to use..
+ */
+static inline void __user *
+get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
+{
+	unsigned long sp;
+
+	/* Default to using normal stack */
+	sp = (unsigned long) A(regs->gprs[15]);
+
+	/* Overflow on alternate signal stack gives SIGSEGV. */
+	if (on_sig_stack(sp) && !on_sig_stack((sp - frame_size) & -8UL))
+		return (void __user *) -1UL;
+
+	/* This is the X/Open sanctioned signal stack switching.  */
+	if (ka->sa.sa_flags & SA_ONSTACK) {
+		if (! sas_ss_flags(sp))
+			sp = current->sas_ss_sp + current->sas_ss_size;
+	}
+
+	return (void __user *)((sp - frame_size) & -8ul);
+}
+
+static int setup_frame32(struct ksignal *ksig, sigset_t *set,
+			 struct pt_regs *regs)
+{
+	int sig = ksig->sig;
+	sigframe32 __user *frame;
+	unsigned long restorer;
+	size_t frame_size;
+
+	/*
+	 * gprs_high are always present for 31-bit compat tasks.
+	 * The space for vector registers is only allocated if
+	 * the machine supports it
+	 */
+	frame_size = sizeof(*frame) - sizeof(frame->sregs_ext.__reserved);
+	if (!MACHINE_HAS_VX)
+		frame_size -= sizeof(frame->sregs_ext.vxrs_low) +
+			      sizeof(frame->sregs_ext.vxrs_high);
+	frame = get_sigframe(&ksig->ka, regs, frame_size);
+	if (frame == (void __user *) -1UL)
+		return -EFAULT;
+
+	/* Set up backchain. */
+	if (__put_user(regs->gprs[15], (unsigned int __user *) frame))
+		return -EFAULT;
+
+	/* Create struct sigcontext32 on the signal stack */
+	if (put_compat_sigset((compat_sigset_t __user *)frame->sc.oldmask,
+			      set, sizeof(compat_sigset_t)))
+		return -EFAULT;
+	if (__put_user(ptr_to_compat(&frame->sregs), &frame->sc.sregs))
+		return -EFAULT;
+
+	/* Store registers needed to create the signal frame */
+	store_sigregs();
+
+	/* Create _sigregs32 on the signal stack */
+	if (save_sigregs32(regs, &frame->sregs))
+		return -EFAULT;
+
+	/* Place signal number on stack to allow backtrace from handler.  */
+	if (__put_user(regs->gprs[2], (int __force __user *) &frame->signo))
+		return -EFAULT;
+
+	/* Create _sigregs_ext32 on the signal stack */
+	if (save_sigregs_ext32(regs, &frame->sregs_ext))
+		return -EFAULT;
+
+	/* Set up to return from userspace.  If provided, use a stub
+	   already in userspace.  */
+	if (ksig->ka.sa.sa_flags & SA_RESTORER) {
+		restorer = (unsigned long __force)
+			ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE;
+	} else {
+		restorer = VDSO32_SYMBOL(current, sigreturn);
+        }
+
+	/* Set up registers for signal handler */
+	regs->gprs[14] = restorer;
+	regs->gprs[15] = (__force __u64) frame;
+	/* Force 31 bit amode and default user address space control. */
+	regs->psw.mask = PSW_MASK_BA |
+		(PSW_USER_BITS & PSW_MASK_ASC) |
+		(regs->psw.mask & ~PSW_MASK_ASC);
+	regs->psw.addr = (__force __u64) ksig->ka.sa.sa_handler;
+
+	regs->gprs[2] = sig;
+	regs->gprs[3] = (__force __u64) &frame->sc;
+
+	/* We forgot to include these in the sigcontext.
+	   To avoid breaking binary compatibility, they are passed as args. */
+	if (sig == SIGSEGV || sig == SIGBUS || sig == SIGILL ||
+	    sig == SIGTRAP || sig == SIGFPE) {
+		/* set extra registers only for synchronous signals */
+		regs->gprs[4] = regs->int_code & 127;
+		regs->gprs[5] = regs->int_parm_long;
+		regs->gprs[6] = current->thread.last_break;
+	}
+
+	return 0;
+}
+
+static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set,
+			    struct pt_regs *regs)
+{
+	rt_sigframe32 __user *frame;
+	unsigned long restorer;
+	size_t frame_size;
+	u32 uc_flags;
+
+	frame_size = sizeof(*frame) -
+		     sizeof(frame->uc.uc_mcontext_ext.__reserved);
+	/*
+	 * gprs_high are always present for 31-bit compat tasks.
+	 * The space for vector registers is only allocated if
+	 * the machine supports it
+	 */
+	uc_flags = UC_GPRS_HIGH;
+	if (MACHINE_HAS_VX) {
+		uc_flags |= UC_VXRS;
+	} else
+		frame_size -= sizeof(frame->uc.uc_mcontext_ext.vxrs_low) +
+			      sizeof(frame->uc.uc_mcontext_ext.vxrs_high);
+	frame = get_sigframe(&ksig->ka, regs, frame_size);
+	if (frame == (void __user *) -1UL)
+		return -EFAULT;
+
+	/* Set up backchain. */
+	if (__put_user(regs->gprs[15], (unsigned int __force __user *) frame))
+		return -EFAULT;
+
+	/* Set up to return from userspace.  If provided, use a stub
+	   already in userspace.  */
+	if (ksig->ka.sa.sa_flags & SA_RESTORER) {
+		restorer = (unsigned long __force)
+			ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE;
+	} else {
+		restorer = VDSO32_SYMBOL(current, rt_sigreturn);
+	}
+
+	/* Create siginfo on the signal stack */
+	if (copy_siginfo_to_user32(&frame->info, &ksig->info))
+		return -EFAULT;
+
+	/* Store registers needed to create the signal frame */
+	store_sigregs();
+
+	/* Create ucontext on the signal stack. */
+	if (__put_user(uc_flags, &frame->uc.uc_flags) ||
+	    __put_user(0, &frame->uc.uc_link) ||
+	    __compat_save_altstack(&frame->uc.uc_stack, regs->gprs[15]) ||
+	    save_sigregs32(regs, &frame->uc.uc_mcontext) ||
+	    put_compat_sigset(&frame->uc.uc_sigmask, set, sizeof(compat_sigset_t)) ||
+	    save_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext))
+		return -EFAULT;
+
+	/* Set up registers for signal handler */
+	regs->gprs[14] = restorer;
+	regs->gprs[15] = (__force __u64) frame;
+	/* Force 31 bit amode and default user address space control. */
+	regs->psw.mask = PSW_MASK_BA |
+		(PSW_USER_BITS & PSW_MASK_ASC) |
+		(regs->psw.mask & ~PSW_MASK_ASC);
+	regs->psw.addr = (__u64 __force) ksig->ka.sa.sa_handler;
+
+	regs->gprs[2] = ksig->sig;
+	regs->gprs[3] = (__force __u64) &frame->info;
+	regs->gprs[4] = (__force __u64) &frame->uc;
+	regs->gprs[5] = current->thread.last_break;
+	return 0;
+}
+
+/*
+ * OK, we're invoking a handler
+ */	
+
+void handle_signal32(struct ksignal *ksig, sigset_t *oldset,
+		     struct pt_regs *regs)
+{
+	int ret;
+
+	/* Set up the stack frame */
+	if (ksig->ka.sa.sa_flags & SA_SIGINFO)
+		ret = setup_rt_frame32(ksig, oldset, regs);
+	else
+		ret = setup_frame32(ksig, oldset, regs);
+
+	signal_setup_done(ret, ksig, test_thread_flag(TIF_SINGLE_STEP));
+}
+
diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c
new file mode 100644
index 0000000000..b210a29d3e
--- /dev/null
+++ b/arch/s390/kernel/cpcmd.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999, 2007
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ *               Christian Borntraeger (cborntra@de.ibm.com),
+ */
+
+#define KMSG_COMPONENT "cpcmd"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <asm/diag.h>
+#include <asm/ebcdic.h>
+#include <asm/cpcmd.h>
+
+static DEFINE_SPINLOCK(cpcmd_lock);
+static char cpcmd_buf[241];
+
+static int diag8_noresponse(int cmdlen)
+{
+	asm volatile(
+		"	diag	%[rx],%[ry],0x8\n"
+		: [ry] "+&d" (cmdlen)
+		: [rx] "d" (__pa(cpcmd_buf))
+		: "cc");
+	return cmdlen;
+}
+
+static int diag8_response(int cmdlen, char *response, int *rlen)
+{
+	union register_pair rx, ry;
+	int cc;
+
+	rx.even = __pa(cpcmd_buf);
+	rx.odd	= __pa(response);
+	ry.even = cmdlen | 0x40000000L;
+	ry.odd	= *rlen;
+	asm volatile(
+		"	diag	%[rx],%[ry],0x8\n"
+		"	ipm	%[cc]\n"
+		"	srl	%[cc],28\n"
+		: [cc] "=&d" (cc), [ry] "+&d" (ry.pair)
+		: [rx] "d" (rx.pair)
+		: "cc");
+	if (cc)
+		*rlen += ry.odd;
+	else
+		*rlen = ry.odd;
+	return ry.even;
+}
+
+/*
+ * __cpcmd has some restrictions over cpcmd
+ *  - __cpcmd is unlocked and therefore not SMP-safe
+ */
+int  __cpcmd(const char *cmd, char *response, int rlen, int *response_code)
+{
+	int cmdlen;
+	int rc;
+	int response_len;
+
+	cmdlen = strlen(cmd);
+	BUG_ON(cmdlen > 240);
+	memcpy(cpcmd_buf, cmd, cmdlen);
+	ASCEBC(cpcmd_buf, cmdlen);
+
+	diag_stat_inc(DIAG_STAT_X008);
+	if (response) {
+		memset(response, 0, rlen);
+		response_len = rlen;
+		rc = diag8_response(cmdlen, response, &rlen);
+		EBCASC(response, response_len);
+        } else {
+		rc = diag8_noresponse(cmdlen);
+        }
+	if (response_code)
+		*response_code = rc;
+	return rlen;
+}
+EXPORT_SYMBOL(__cpcmd);
+
+int cpcmd(const char *cmd, char *response, int rlen, int *response_code)
+{
+	unsigned long flags;
+	char *lowbuf;
+	int len;
+
+	if (is_vmalloc_or_module_addr(response)) {
+		lowbuf = kmalloc(rlen, GFP_KERNEL);
+		if (!lowbuf) {
+			pr_warn("The cpcmd kernel function failed to allocate a response buffer\n");
+			return -ENOMEM;
+		}
+		spin_lock_irqsave(&cpcmd_lock, flags);
+		len = __cpcmd(cmd, lowbuf, rlen, response_code);
+		spin_unlock_irqrestore(&cpcmd_lock, flags);
+		memcpy(response, lowbuf, rlen);
+		kfree(lowbuf);
+	} else {
+		spin_lock_irqsave(&cpcmd_lock, flags);
+		len = __cpcmd(cmd, response, rlen, response_code);
+		spin_unlock_irqrestore(&cpcmd_lock, flags);
+	}
+	return len;
+}
+EXPORT_SYMBOL(cpcmd);
diff --git a/arch/s390/kernel/cpufeature.c b/arch/s390/kernel/cpufeature.c
new file mode 100644
index 0000000000..1b2ae42a0c
--- /dev/null
+++ b/arch/s390/kernel/cpufeature.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2022
+ */
+
+#include <linux/cpufeature.h>
+#include <linux/bug.h>
+#include <asm/elf.h>
+
+enum {
+	TYPE_HWCAP,
+	TYPE_FACILITY,
+};
+
+struct s390_cpu_feature {
+	unsigned int type	: 4;
+	unsigned int num	: 28;
+};
+
+static struct s390_cpu_feature s390_cpu_features[MAX_CPU_FEATURES] = {
+	[S390_CPU_FEATURE_MSA]	= {.type = TYPE_HWCAP, .num = HWCAP_NR_MSA},
+	[S390_CPU_FEATURE_VXRS]	= {.type = TYPE_HWCAP, .num = HWCAP_NR_VXRS},
+	[S390_CPU_FEATURE_UV]	= {.type = TYPE_FACILITY, .num = 158},
+};
+
+/*
+ * cpu_have_feature - Test CPU features on module initialization
+ */
+int cpu_have_feature(unsigned int num)
+{
+	struct s390_cpu_feature *feature;
+
+	if (WARN_ON_ONCE(num >= MAX_CPU_FEATURES))
+		return 0;
+	feature = &s390_cpu_features[num];
+	switch (feature->type) {
+	case TYPE_HWCAP:
+		return !!(elf_hwcap & BIT(feature->num));
+	case TYPE_FACILITY:
+		return test_facility(feature->num);
+	default:
+		WARN_ON_ONCE(1);
+		return 0;
+	}
+}
+EXPORT_SYMBOL(cpu_have_feature);
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
new file mode 100644
index 0000000000..7af69948b2
--- /dev/null
+++ b/arch/s390/kernel/crash_dump.c
@@ -0,0 +1,652 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * S390 kdump implementation
+ *
+ * Copyright IBM Corp. 2011
+ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#include <linux/crash_dump.h>
+#include <asm/lowcore.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/memblock.h>
+#include <linux/elf.h>
+#include <linux/uio.h>
+#include <asm/asm-offsets.h>
+#include <asm/os_info.h>
+#include <asm/elf.h>
+#include <asm/ipl.h>
+#include <asm/sclp.h>
+#include <asm/maccess.h>
+
+#define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y)))
+#define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y)))
+#define PTR_DIFF(x, y) ((unsigned long)(((char *) (x)) - ((unsigned long) (y))))
+
+static struct memblock_region oldmem_region;
+
+static struct memblock_type oldmem_type = {
+	.cnt = 1,
+	.max = 1,
+	.total_size = 0,
+	.regions = &oldmem_region,
+	.name = "oldmem",
+};
+
+struct save_area {
+	struct list_head list;
+	u64 psw[2];
+	u64 ctrs[16];
+	u64 gprs[16];
+	u32 acrs[16];
+	u64 fprs[16];
+	u32 fpc;
+	u32 prefix;
+	u32 todpreg;
+	u64 timer;
+	u64 todcmp;
+	u64 vxrs_low[16];
+	__vector128 vxrs_high[16];
+};
+
+static LIST_HEAD(dump_save_areas);
+
+/*
+ * Allocate a save area
+ */
+struct save_area * __init save_area_alloc(bool is_boot_cpu)
+{
+	struct save_area *sa;
+
+	sa = memblock_alloc(sizeof(*sa), 8);
+	if (!sa)
+		return NULL;
+
+	if (is_boot_cpu)
+		list_add(&sa->list, &dump_save_areas);
+	else
+		list_add_tail(&sa->list, &dump_save_areas);
+	return sa;
+}
+
+/*
+ * Return the address of the save area for the boot CPU
+ */
+struct save_area * __init save_area_boot_cpu(void)
+{
+	return list_first_entry_or_null(&dump_save_areas, struct save_area, list);
+}
+
+/*
+ * Copy CPU registers into the save area
+ */
+void __init save_area_add_regs(struct save_area *sa, void *regs)
+{
+	struct lowcore *lc;
+
+	lc = (struct lowcore *)(regs - __LC_FPREGS_SAVE_AREA);
+	memcpy(&sa->psw, &lc->psw_save_area, sizeof(sa->psw));
+	memcpy(&sa->ctrs, &lc->cregs_save_area, sizeof(sa->ctrs));
+	memcpy(&sa->gprs, &lc->gpregs_save_area, sizeof(sa->gprs));
+	memcpy(&sa->acrs, &lc->access_regs_save_area, sizeof(sa->acrs));
+	memcpy(&sa->fprs, &lc->floating_pt_save_area, sizeof(sa->fprs));
+	memcpy(&sa->fpc, &lc->fpt_creg_save_area, sizeof(sa->fpc));
+	memcpy(&sa->prefix, &lc->prefixreg_save_area, sizeof(sa->prefix));
+	memcpy(&sa->todpreg, &lc->tod_progreg_save_area, sizeof(sa->todpreg));
+	memcpy(&sa->timer, &lc->cpu_timer_save_area, sizeof(sa->timer));
+	memcpy(&sa->todcmp, &lc->clock_comp_save_area, sizeof(sa->todcmp));
+}
+
+/*
+ * Copy vector registers into the save area
+ */
+void __init save_area_add_vxrs(struct save_area *sa, __vector128 *vxrs)
+{
+	int i;
+
+	/* Copy lower halves of vector registers 0-15 */
+	for (i = 0; i < 16; i++)
+		sa->vxrs_low[i] = vxrs[i].low;
+	/* Copy vector registers 16-31 */
+	memcpy(sa->vxrs_high, vxrs + 16, 16 * sizeof(__vector128));
+}
+
+static size_t copy_oldmem_iter(struct iov_iter *iter, unsigned long src, size_t count)
+{
+	size_t len, copied, res = 0;
+
+	while (count) {
+		if (!oldmem_data.start && src < sclp.hsa_size) {
+			/* Copy from zfcp/nvme dump HSA area */
+			len = min(count, sclp.hsa_size - src);
+			copied = memcpy_hsa_iter(iter, src, len);
+		} else {
+			/* Check for swapped kdump oldmem areas */
+			if (oldmem_data.start && src - oldmem_data.start < oldmem_data.size) {
+				src -= oldmem_data.start;
+				len = min(count, oldmem_data.size - src);
+			} else if (oldmem_data.start && src < oldmem_data.size) {
+				len = min(count, oldmem_data.size - src);
+				src += oldmem_data.start;
+			} else {
+				len = count;
+			}
+			copied = memcpy_real_iter(iter, src, len);
+		}
+		count -= copied;
+		src += copied;
+		res += copied;
+		if (copied < len)
+			break;
+	}
+	return res;
+}
+
+int copy_oldmem_kernel(void *dst, unsigned long src, size_t count)
+{
+	struct iov_iter iter;
+	struct kvec kvec;
+
+	kvec.iov_base = dst;
+	kvec.iov_len = count;
+	iov_iter_kvec(&iter, ITER_DEST, &kvec, 1, count);
+	if (copy_oldmem_iter(&iter, src, count) < count)
+		return -EFAULT;
+	return 0;
+}
+
+/*
+ * Copy one page from "oldmem"
+ */
+ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, size_t csize,
+			 unsigned long offset)
+{
+	unsigned long src;
+
+	src = pfn_to_phys(pfn) + offset;
+	return copy_oldmem_iter(iter, src, csize);
+}
+
+/*
+ * Remap "oldmem" for kdump
+ *
+ * For the kdump reserved memory this functions performs a swap operation:
+ * [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE]
+ */
+static int remap_oldmem_pfn_range_kdump(struct vm_area_struct *vma,
+					unsigned long from, unsigned long pfn,
+					unsigned long size, pgprot_t prot)
+{
+	unsigned long size_old;
+	int rc;
+
+	if (pfn < oldmem_data.size >> PAGE_SHIFT) {
+		size_old = min(size, oldmem_data.size - (pfn << PAGE_SHIFT));
+		rc = remap_pfn_range(vma, from,
+				     pfn + (oldmem_data.start >> PAGE_SHIFT),
+				     size_old, prot);
+		if (rc || size == size_old)
+			return rc;
+		size -= size_old;
+		from += size_old;
+		pfn += size_old >> PAGE_SHIFT;
+	}
+	return remap_pfn_range(vma, from, pfn, size, prot);
+}
+
+/*
+ * Remap "oldmem" for zfcp/nvme dump
+ *
+ * We only map available memory above HSA size. Memory below HSA size
+ * is read on demand using the copy_oldmem_page() function.
+ */
+static int remap_oldmem_pfn_range_zfcpdump(struct vm_area_struct *vma,
+					   unsigned long from,
+					   unsigned long pfn,
+					   unsigned long size, pgprot_t prot)
+{
+	unsigned long hsa_end = sclp.hsa_size;
+	unsigned long size_hsa;
+
+	if (pfn < hsa_end >> PAGE_SHIFT) {
+		size_hsa = min(size, hsa_end - (pfn << PAGE_SHIFT));
+		if (size == size_hsa)
+			return 0;
+		size -= size_hsa;
+		from += size_hsa;
+		pfn += size_hsa >> PAGE_SHIFT;
+	}
+	return remap_pfn_range(vma, from, pfn, size, prot);
+}
+
+/*
+ * Remap "oldmem" for kdump or zfcp/nvme dump
+ */
+int remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long from,
+			   unsigned long pfn, unsigned long size, pgprot_t prot)
+{
+	if (oldmem_data.start)
+		return remap_oldmem_pfn_range_kdump(vma, from, pfn, size, prot);
+	else
+		return remap_oldmem_pfn_range_zfcpdump(vma, from, pfn, size,
+						       prot);
+}
+
+static const char *nt_name(Elf64_Word type)
+{
+	const char *name = "LINUX";
+
+	if (type == NT_PRPSINFO || type == NT_PRSTATUS || type == NT_PRFPREG)
+		name = KEXEC_CORE_NOTE_NAME;
+	return name;
+}
+
+/*
+ * Initialize ELF note
+ */
+static void *nt_init_name(void *buf, Elf64_Word type, void *desc, int d_len,
+			  const char *name)
+{
+	Elf64_Nhdr *note;
+	u64 len;
+
+	note = (Elf64_Nhdr *)buf;
+	note->n_namesz = strlen(name) + 1;
+	note->n_descsz = d_len;
+	note->n_type = type;
+	len = sizeof(Elf64_Nhdr);
+
+	memcpy(buf + len, name, note->n_namesz);
+	len = roundup(len + note->n_namesz, 4);
+
+	memcpy(buf + len, desc, note->n_descsz);
+	len = roundup(len + note->n_descsz, 4);
+
+	return PTR_ADD(buf, len);
+}
+
+static inline void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len)
+{
+	return nt_init_name(buf, type, desc, d_len, nt_name(type));
+}
+
+/*
+ * Calculate the size of ELF note
+ */
+static size_t nt_size_name(int d_len, const char *name)
+{
+	size_t size;
+
+	size = sizeof(Elf64_Nhdr);
+	size += roundup(strlen(name) + 1, 4);
+	size += roundup(d_len, 4);
+
+	return size;
+}
+
+static inline size_t nt_size(Elf64_Word type, int d_len)
+{
+	return nt_size_name(d_len, nt_name(type));
+}
+
+/*
+ * Fill ELF notes for one CPU with save area registers
+ */
+static void *fill_cpu_elf_notes(void *ptr, int cpu, struct save_area *sa)
+{
+	struct elf_prstatus nt_prstatus;
+	elf_fpregset_t nt_fpregset;
+
+	/* Prepare prstatus note */
+	memset(&nt_prstatus, 0, sizeof(nt_prstatus));
+	memcpy(&nt_prstatus.pr_reg.gprs, sa->gprs, sizeof(sa->gprs));
+	memcpy(&nt_prstatus.pr_reg.psw, sa->psw, sizeof(sa->psw));
+	memcpy(&nt_prstatus.pr_reg.acrs, sa->acrs, sizeof(sa->acrs));
+	nt_prstatus.common.pr_pid = cpu;
+	/* Prepare fpregset (floating point) note */
+	memset(&nt_fpregset, 0, sizeof(nt_fpregset));
+	memcpy(&nt_fpregset.fpc, &sa->fpc, sizeof(sa->fpc));
+	memcpy(&nt_fpregset.fprs, &sa->fprs, sizeof(sa->fprs));
+	/* Create ELF notes for the CPU */
+	ptr = nt_init(ptr, NT_PRSTATUS, &nt_prstatus, sizeof(nt_prstatus));
+	ptr = nt_init(ptr, NT_PRFPREG, &nt_fpregset, sizeof(nt_fpregset));
+	ptr = nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer));
+	ptr = nt_init(ptr, NT_S390_TODCMP, &sa->todcmp, sizeof(sa->todcmp));
+	ptr = nt_init(ptr, NT_S390_TODPREG, &sa->todpreg, sizeof(sa->todpreg));
+	ptr = nt_init(ptr, NT_S390_CTRS, &sa->ctrs, sizeof(sa->ctrs));
+	ptr = nt_init(ptr, NT_S390_PREFIX, &sa->prefix, sizeof(sa->prefix));
+	if (MACHINE_HAS_VX) {
+		ptr = nt_init(ptr, NT_S390_VXRS_HIGH,
+			      &sa->vxrs_high, sizeof(sa->vxrs_high));
+		ptr = nt_init(ptr, NT_S390_VXRS_LOW,
+			      &sa->vxrs_low, sizeof(sa->vxrs_low));
+	}
+	return ptr;
+}
+
+/*
+ * Calculate size of ELF notes per cpu
+ */
+static size_t get_cpu_elf_notes_size(void)
+{
+	struct save_area *sa = NULL;
+	size_t size;
+
+	size =	nt_size(NT_PRSTATUS, sizeof(struct elf_prstatus));
+	size +=  nt_size(NT_PRFPREG, sizeof(elf_fpregset_t));
+	size +=  nt_size(NT_S390_TIMER, sizeof(sa->timer));
+	size +=  nt_size(NT_S390_TODCMP, sizeof(sa->todcmp));
+	size +=  nt_size(NT_S390_TODPREG, sizeof(sa->todpreg));
+	size +=  nt_size(NT_S390_CTRS, sizeof(sa->ctrs));
+	size +=  nt_size(NT_S390_PREFIX, sizeof(sa->prefix));
+	if (MACHINE_HAS_VX) {
+		size += nt_size(NT_S390_VXRS_HIGH, sizeof(sa->vxrs_high));
+		size += nt_size(NT_S390_VXRS_LOW, sizeof(sa->vxrs_low));
+	}
+
+	return size;
+}
+
+/*
+ * Initialize prpsinfo note (new kernel)
+ */
+static void *nt_prpsinfo(void *ptr)
+{
+	struct elf_prpsinfo prpsinfo;
+
+	memset(&prpsinfo, 0, sizeof(prpsinfo));
+	prpsinfo.pr_sname = 'R';
+	strcpy(prpsinfo.pr_fname, "vmlinux");
+	return nt_init(ptr, NT_PRPSINFO, &prpsinfo, sizeof(prpsinfo));
+}
+
+/*
+ * Get vmcoreinfo using lowcore->vmcore_info (new kernel)
+ */
+static void *get_vmcoreinfo_old(unsigned long *size)
+{
+	char nt_name[11], *vmcoreinfo;
+	unsigned long addr;
+	Elf64_Nhdr note;
+
+	if (copy_oldmem_kernel(&addr, __LC_VMCORE_INFO, sizeof(addr)))
+		return NULL;
+	memset(nt_name, 0, sizeof(nt_name));
+	if (copy_oldmem_kernel(&note, addr, sizeof(note)))
+		return NULL;
+	if (copy_oldmem_kernel(nt_name, addr + sizeof(note),
+			       sizeof(nt_name) - 1))
+		return NULL;
+	if (strcmp(nt_name, VMCOREINFO_NOTE_NAME) != 0)
+		return NULL;
+	vmcoreinfo = kzalloc(note.n_descsz, GFP_KERNEL);
+	if (!vmcoreinfo)
+		return NULL;
+	if (copy_oldmem_kernel(vmcoreinfo, addr + 24, note.n_descsz)) {
+		kfree(vmcoreinfo);
+		return NULL;
+	}
+	*size = note.n_descsz;
+	return vmcoreinfo;
+}
+
+/*
+ * Initialize vmcoreinfo note (new kernel)
+ */
+static void *nt_vmcoreinfo(void *ptr)
+{
+	const char *name = VMCOREINFO_NOTE_NAME;
+	unsigned long size;
+	void *vmcoreinfo;
+
+	vmcoreinfo = os_info_old_entry(OS_INFO_VMCOREINFO, &size);
+	if (vmcoreinfo)
+		return nt_init_name(ptr, 0, vmcoreinfo, size, name);
+
+	vmcoreinfo = get_vmcoreinfo_old(&size);
+	if (!vmcoreinfo)
+		return ptr;
+	ptr = nt_init_name(ptr, 0, vmcoreinfo, size, name);
+	kfree(vmcoreinfo);
+	return ptr;
+}
+
+static size_t nt_vmcoreinfo_size(void)
+{
+	const char *name = VMCOREINFO_NOTE_NAME;
+	unsigned long size;
+	void *vmcoreinfo;
+
+	vmcoreinfo = os_info_old_entry(OS_INFO_VMCOREINFO, &size);
+	if (vmcoreinfo)
+		return nt_size_name(size, name);
+
+	vmcoreinfo = get_vmcoreinfo_old(&size);
+	if (!vmcoreinfo)
+		return 0;
+
+	kfree(vmcoreinfo);
+	return nt_size_name(size, name);
+}
+
+/*
+ * Initialize final note (needed for /proc/vmcore code)
+ */
+static void *nt_final(void *ptr)
+{
+	Elf64_Nhdr *note;
+
+	note = (Elf64_Nhdr *) ptr;
+	note->n_namesz = 0;
+	note->n_descsz = 0;
+	note->n_type = 0;
+	return PTR_ADD(ptr, sizeof(Elf64_Nhdr));
+}
+
+/*
+ * Initialize ELF header (new kernel)
+ */
+static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt)
+{
+	memset(ehdr, 0, sizeof(*ehdr));
+	memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
+	ehdr->e_ident[EI_CLASS] = ELFCLASS64;
+	ehdr->e_ident[EI_DATA] = ELFDATA2MSB;
+	ehdr->e_ident[EI_VERSION] = EV_CURRENT;
+	memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD);
+	ehdr->e_type = ET_CORE;
+	ehdr->e_machine = EM_S390;
+	ehdr->e_version = EV_CURRENT;
+	ehdr->e_phoff = sizeof(Elf64_Ehdr);
+	ehdr->e_ehsize = sizeof(Elf64_Ehdr);
+	ehdr->e_phentsize = sizeof(Elf64_Phdr);
+	ehdr->e_phnum = mem_chunk_cnt + 1;
+	return ehdr + 1;
+}
+
+/*
+ * Return CPU count for ELF header (new kernel)
+ */
+static int get_cpu_cnt(void)
+{
+	struct save_area *sa;
+	int cpus = 0;
+
+	list_for_each_entry(sa, &dump_save_areas, list)
+		if (sa->prefix != 0)
+			cpus++;
+	return cpus;
+}
+
+/*
+ * Return memory chunk count for ELF header (new kernel)
+ */
+static int get_mem_chunk_cnt(void)
+{
+	int cnt = 0;
+	u64 idx;
+
+	for_each_physmem_range(idx, &oldmem_type, NULL, NULL)
+		cnt++;
+	return cnt;
+}
+
+/*
+ * Initialize ELF loads (new kernel)
+ */
+static void loads_init(Elf64_Phdr *phdr, u64 loads_offset)
+{
+	phys_addr_t start, end;
+	u64 idx;
+
+	for_each_physmem_range(idx, &oldmem_type, &start, &end) {
+		phdr->p_filesz = end - start;
+		phdr->p_type = PT_LOAD;
+		phdr->p_offset = start;
+		phdr->p_vaddr = start;
+		phdr->p_paddr = start;
+		phdr->p_memsz = end - start;
+		phdr->p_flags = PF_R | PF_W | PF_X;
+		phdr->p_align = PAGE_SIZE;
+		phdr++;
+	}
+}
+
+/*
+ * Initialize notes (new kernel)
+ */
+static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset)
+{
+	struct save_area *sa;
+	void *ptr_start = ptr;
+	int cpu;
+
+	ptr = nt_prpsinfo(ptr);
+
+	cpu = 1;
+	list_for_each_entry(sa, &dump_save_areas, list)
+		if (sa->prefix != 0)
+			ptr = fill_cpu_elf_notes(ptr, cpu++, sa);
+	ptr = nt_vmcoreinfo(ptr);
+	ptr = nt_final(ptr);
+	memset(phdr, 0, sizeof(*phdr));
+	phdr->p_type = PT_NOTE;
+	phdr->p_offset = notes_offset;
+	phdr->p_filesz = (unsigned long) PTR_SUB(ptr, ptr_start);
+	phdr->p_memsz = phdr->p_filesz;
+	return ptr;
+}
+
+static size_t get_elfcorehdr_size(int mem_chunk_cnt)
+{
+	size_t size;
+
+	size = sizeof(Elf64_Ehdr);
+	/* PT_NOTES */
+	size += sizeof(Elf64_Phdr);
+	/* nt_prpsinfo */
+	size += nt_size(NT_PRPSINFO, sizeof(struct elf_prpsinfo));
+	/* regsets */
+	size += get_cpu_cnt() * get_cpu_elf_notes_size();
+	/* nt_vmcoreinfo */
+	size += nt_vmcoreinfo_size();
+	/* nt_final */
+	size += sizeof(Elf64_Nhdr);
+	/* PT_LOADS */
+	size += mem_chunk_cnt * sizeof(Elf64_Phdr);
+
+	return size;
+}
+
+/*
+ * Create ELF core header (new kernel)
+ */
+int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
+{
+	Elf64_Phdr *phdr_notes, *phdr_loads;
+	size_t alloc_size;
+	int mem_chunk_cnt;
+	void *ptr, *hdr;
+	u64 hdr_off;
+
+	/* If we are not in kdump or zfcp/nvme dump mode return */
+	if (!oldmem_data.start && !is_ipl_type_dump())
+		return 0;
+	/* If we cannot get HSA size for zfcp/nvme dump return error */
+	if (is_ipl_type_dump() && !sclp.hsa_size)
+		return -ENODEV;
+
+	/* For kdump, exclude previous crashkernel memory */
+	if (oldmem_data.start) {
+		oldmem_region.base = oldmem_data.start;
+		oldmem_region.size = oldmem_data.size;
+		oldmem_type.total_size = oldmem_data.size;
+	}
+
+	mem_chunk_cnt = get_mem_chunk_cnt();
+
+	alloc_size = get_elfcorehdr_size(mem_chunk_cnt);
+
+	hdr = kzalloc(alloc_size, GFP_KERNEL);
+
+	/* Without elfcorehdr /proc/vmcore cannot be created. Thus creating
+	 * a dump with this crash kernel will fail. Panic now to allow other
+	 * dump mechanisms to take over.
+	 */
+	if (!hdr)
+		panic("s390 kdump allocating elfcorehdr failed");
+
+	/* Init elf header */
+	ptr = ehdr_init(hdr, mem_chunk_cnt);
+	/* Init program headers */
+	phdr_notes = ptr;
+	ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr));
+	phdr_loads = ptr;
+	ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr) * mem_chunk_cnt);
+	/* Init notes */
+	hdr_off = PTR_DIFF(ptr, hdr);
+	ptr = notes_init(phdr_notes, ptr, ((unsigned long) hdr) + hdr_off);
+	/* Init loads */
+	hdr_off = PTR_DIFF(ptr, hdr);
+	loads_init(phdr_loads, hdr_off);
+	*addr = (unsigned long long) hdr;
+	*size = (unsigned long long) hdr_off;
+	BUG_ON(elfcorehdr_size > alloc_size);
+	return 0;
+}
+
+/*
+ * Free ELF core header (new kernel)
+ */
+void elfcorehdr_free(unsigned long long addr)
+{
+	kfree((void *)(unsigned long)addr);
+}
+
+/*
+ * Read from ELF header
+ */
+ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos)
+{
+	void *src = (void *)(unsigned long)*ppos;
+
+	memcpy(buf, src, count);
+	*ppos += count;
+	return count;
+}
+
+/*
+ * Read from ELF notes data
+ */
+ssize_t elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos)
+{
+	void *src = (void *)(unsigned long)*ppos;
+
+	memcpy(buf, src, count);
+	*ppos += count;
+	return count;
+}
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
new file mode 100644
index 0000000000..a85e0c3e70
--- /dev/null
+++ b/arch/s390/kernel/debug.c
@@ -0,0 +1,1574 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *   S/390 debug facility
+ *
+ *    Copyright IBM Corp. 1999, 2020
+ *
+ *    Author(s): Michael Holzheu (holzheu@de.ibm.com),
+ *		 Holger Smolinski (Holger.Smolinski@de.ibm.com)
+ *
+ *    Bugreports to: <Linux390@de.ibm.com>
+ */
+
+#define KMSG_COMPONENT "s390dbf"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/ctype.h>
+#include <linux/string.h>
+#include <linux/sysctl.h>
+#include <linux/uaccess.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/minmax.h>
+#include <linux/debugfs.h>
+
+#include <asm/debug.h>
+
+#define DEBUG_PROLOG_ENTRY -1
+
+#define ALL_AREAS 0 /* copy all debug areas */
+#define NO_AREAS  1 /* copy no debug areas */
+
+/* typedefs */
+
+typedef struct file_private_info {
+	loff_t offset;			/* offset of last read in file */
+	int    act_area;		/* number of last formated area */
+	int    act_page;		/* act page in given area */
+	int    act_entry;		/* last formated entry (offset */
+					/* relative to beginning of last */
+					/* formated page) */
+	size_t act_entry_offset;	/* up to this offset we copied */
+					/* in last read the last formated */
+					/* entry to userland */
+	char   temp_buf[2048];		/* buffer for output */
+	debug_info_t *debug_info_org;	/* original debug information */
+	debug_info_t *debug_info_snap;	/* snapshot of debug information */
+	struct debug_view *view;	/* used view of debug info */
+} file_private_info_t;
+
+typedef struct {
+	char *string;
+	/*
+	 * This assumes that all args are converted into longs
+	 * on L/390 this is the case for all types of parameter
+	 * except of floats, and long long (32 bit)
+	 *
+	 */
+	long args[];
+} debug_sprintf_entry_t;
+
+/* internal function prototyes */
+
+static int debug_init(void);
+static ssize_t debug_output(struct file *file, char __user *user_buf,
+			    size_t user_len, loff_t *offset);
+static ssize_t debug_input(struct file *file, const char __user *user_buf,
+			   size_t user_len, loff_t *offset);
+static int debug_open(struct inode *inode, struct file *file);
+static int debug_close(struct inode *inode, struct file *file);
+static debug_info_t *debug_info_create(const char *name, int pages_per_area,
+				       int nr_areas, int buf_size, umode_t mode);
+static void debug_info_get(debug_info_t *);
+static void debug_info_put(debug_info_t *);
+static int debug_prolog_level_fn(debug_info_t *id,
+				 struct debug_view *view, char *out_buf);
+static int debug_input_level_fn(debug_info_t *id, struct debug_view *view,
+				struct file *file, const char __user *user_buf,
+				size_t user_buf_size, loff_t *offset);
+static int debug_prolog_pages_fn(debug_info_t *id,
+				 struct debug_view *view, char *out_buf);
+static int debug_input_pages_fn(debug_info_t *id, struct debug_view *view,
+				struct file *file, const char __user *user_buf,
+				size_t user_buf_size, loff_t *offset);
+static int debug_input_flush_fn(debug_info_t *id, struct debug_view *view,
+				struct file *file, const char __user *user_buf,
+				size_t user_buf_size, loff_t *offset);
+static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view,
+				     char *out_buf, const char *in_buf);
+static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
+				   char *out_buf, const char *inbuf);
+static void debug_areas_swap(debug_info_t *a, debug_info_t *b);
+static void debug_events_append(debug_info_t *dest, debug_info_t *src);
+
+/* globals */
+
+struct debug_view debug_hex_ascii_view = {
+	"hex_ascii",
+	NULL,
+	&debug_dflt_header_fn,
+	&debug_hex_ascii_format_fn,
+	NULL,
+	NULL
+};
+EXPORT_SYMBOL(debug_hex_ascii_view);
+
+static struct debug_view debug_level_view = {
+	"level",
+	&debug_prolog_level_fn,
+	NULL,
+	NULL,
+	&debug_input_level_fn,
+	NULL
+};
+
+static struct debug_view debug_pages_view = {
+	"pages",
+	&debug_prolog_pages_fn,
+	NULL,
+	NULL,
+	&debug_input_pages_fn,
+	NULL
+};
+
+static struct debug_view debug_flush_view = {
+	"flush",
+	NULL,
+	NULL,
+	NULL,
+	&debug_input_flush_fn,
+	NULL
+};
+
+struct debug_view debug_sprintf_view = {
+	"sprintf",
+	NULL,
+	&debug_dflt_header_fn,
+	&debug_sprintf_format_fn,
+	NULL,
+	NULL
+};
+EXPORT_SYMBOL(debug_sprintf_view);
+
+/* used by dump analysis tools to determine version of debug feature */
+static unsigned int __used debug_feature_version = __DEBUG_FEATURE_VERSION;
+
+/* static globals */
+
+static debug_info_t *debug_area_first;
+static debug_info_t *debug_area_last;
+static DEFINE_MUTEX(debug_mutex);
+
+static int initialized;
+static int debug_critical;
+
+static const struct file_operations debug_file_ops = {
+	.owner	 = THIS_MODULE,
+	.read	 = debug_output,
+	.write	 = debug_input,
+	.open	 = debug_open,
+	.release = debug_close,
+	.llseek  = no_llseek,
+};
+
+static struct dentry *debug_debugfs_root_entry;
+
+/* functions */
+
+/*
+ * debug_areas_alloc
+ * - Debug areas are implemented as a threedimensonal array:
+ *   areas[areanumber][pagenumber][pageoffset]
+ */
+
+static debug_entry_t ***debug_areas_alloc(int pages_per_area, int nr_areas)
+{
+	debug_entry_t ***areas;
+	int i, j;
+
+	areas = kmalloc_array(nr_areas, sizeof(debug_entry_t **), GFP_KERNEL);
+	if (!areas)
+		goto fail_malloc_areas;
+	for (i = 0; i < nr_areas; i++) {
+		/* GFP_NOWARN to avoid user triggerable WARN, we handle fails */
+		areas[i] = kmalloc_array(pages_per_area,
+					 sizeof(debug_entry_t *),
+					 GFP_KERNEL | __GFP_NOWARN);
+		if (!areas[i])
+			goto fail_malloc_areas2;
+		for (j = 0; j < pages_per_area; j++) {
+			areas[i][j] = kzalloc(PAGE_SIZE, GFP_KERNEL);
+			if (!areas[i][j]) {
+				for (j--; j >= 0 ; j--)
+					kfree(areas[i][j]);
+				kfree(areas[i]);
+				goto fail_malloc_areas2;
+			}
+		}
+	}
+	return areas;
+
+fail_malloc_areas2:
+	for (i--; i >= 0; i--) {
+		for (j = 0; j < pages_per_area; j++)
+			kfree(areas[i][j]);
+		kfree(areas[i]);
+	}
+	kfree(areas);
+fail_malloc_areas:
+	return NULL;
+}
+
+/*
+ * debug_info_alloc
+ * - alloc new debug-info
+ */
+static debug_info_t *debug_info_alloc(const char *name, int pages_per_area,
+				      int nr_areas, int buf_size, int level,
+				      int mode)
+{
+	debug_info_t *rc;
+
+	/* alloc everything */
+	rc = kmalloc(sizeof(debug_info_t), GFP_KERNEL);
+	if (!rc)
+		goto fail_malloc_rc;
+	rc->active_entries = kcalloc(nr_areas, sizeof(int), GFP_KERNEL);
+	if (!rc->active_entries)
+		goto fail_malloc_active_entries;
+	rc->active_pages = kcalloc(nr_areas, sizeof(int), GFP_KERNEL);
+	if (!rc->active_pages)
+		goto fail_malloc_active_pages;
+	if ((mode == ALL_AREAS) && (pages_per_area != 0)) {
+		rc->areas = debug_areas_alloc(pages_per_area, nr_areas);
+		if (!rc->areas)
+			goto fail_malloc_areas;
+	} else {
+		rc->areas = NULL;
+	}
+
+	/* initialize members */
+	spin_lock_init(&rc->lock);
+	rc->pages_per_area = pages_per_area;
+	rc->nr_areas	   = nr_areas;
+	rc->active_area    = 0;
+	rc->level	   = level;
+	rc->buf_size	   = buf_size;
+	rc->entry_size	   = sizeof(debug_entry_t) + buf_size;
+	strscpy(rc->name, name, sizeof(rc->name));
+	memset(rc->views, 0, DEBUG_MAX_VIEWS * sizeof(struct debug_view *));
+	memset(rc->debugfs_entries, 0, DEBUG_MAX_VIEWS * sizeof(struct dentry *));
+	refcount_set(&(rc->ref_count), 0);
+
+	return rc;
+
+fail_malloc_areas:
+	kfree(rc->active_pages);
+fail_malloc_active_pages:
+	kfree(rc->active_entries);
+fail_malloc_active_entries:
+	kfree(rc);
+fail_malloc_rc:
+	return NULL;
+}
+
+/*
+ * debug_areas_free
+ * - free all debug areas
+ */
+static void debug_areas_free(debug_info_t *db_info)
+{
+	int i, j;
+
+	if (!db_info->areas)
+		return;
+	for (i = 0; i < db_info->nr_areas; i++) {
+		for (j = 0; j < db_info->pages_per_area; j++)
+			kfree(db_info->areas[i][j]);
+		kfree(db_info->areas[i]);
+	}
+	kfree(db_info->areas);
+	db_info->areas = NULL;
+}
+
+/*
+ * debug_info_free
+ * - free memory debug-info
+ */
+static void debug_info_free(debug_info_t *db_info)
+{
+	debug_areas_free(db_info);
+	kfree(db_info->active_entries);
+	kfree(db_info->active_pages);
+	kfree(db_info);
+}
+
+/*
+ * debug_info_create
+ * - create new debug-info
+ */
+
+static debug_info_t *debug_info_create(const char *name, int pages_per_area,
+				       int nr_areas, int buf_size, umode_t mode)
+{
+	debug_info_t *rc;
+
+	rc = debug_info_alloc(name, pages_per_area, nr_areas, buf_size,
+			      DEBUG_DEFAULT_LEVEL, ALL_AREAS);
+	if (!rc)
+		goto out;
+
+	rc->mode = mode & ~S_IFMT;
+	refcount_set(&rc->ref_count, 1);
+out:
+	return rc;
+}
+
+/*
+ * debug_info_copy
+ * - copy debug-info
+ */
+static debug_info_t *debug_info_copy(debug_info_t *in, int mode)
+{
+	unsigned long flags;
+	debug_info_t *rc;
+	int i, j;
+
+	/* get a consistent copy of the debug areas */
+	do {
+		rc = debug_info_alloc(in->name, in->pages_per_area,
+			in->nr_areas, in->buf_size, in->level, mode);
+		spin_lock_irqsave(&in->lock, flags);
+		if (!rc)
+			goto out;
+		/* has something changed in the meantime ? */
+		if ((rc->pages_per_area == in->pages_per_area) &&
+		    (rc->nr_areas == in->nr_areas)) {
+			break;
+		}
+		spin_unlock_irqrestore(&in->lock, flags);
+		debug_info_free(rc);
+	} while (1);
+
+	if (mode == NO_AREAS)
+		goto out;
+
+	for (i = 0; i < in->nr_areas; i++) {
+		for (j = 0; j < in->pages_per_area; j++)
+			memcpy(rc->areas[i][j], in->areas[i][j], PAGE_SIZE);
+	}
+out:
+	spin_unlock_irqrestore(&in->lock, flags);
+	return rc;
+}
+
+/*
+ * debug_info_get
+ * - increments reference count for debug-info
+ */
+static void debug_info_get(debug_info_t *db_info)
+{
+	if (db_info)
+		refcount_inc(&db_info->ref_count);
+}
+
+/*
+ * debug_info_put:
+ * - decreases reference count for debug-info and frees it if necessary
+ */
+static void debug_info_put(debug_info_t *db_info)
+{
+	if (!db_info)
+		return;
+	if (refcount_dec_and_test(&db_info->ref_count))
+		debug_info_free(db_info);
+}
+
+/*
+ * debug_format_entry:
+ * - format one debug entry and return size of formated data
+ */
+static int debug_format_entry(file_private_info_t *p_info)
+{
+	debug_info_t *id_snap	= p_info->debug_info_snap;
+	struct debug_view *view = p_info->view;
+	debug_entry_t *act_entry;
+	size_t len = 0;
+
+	if (p_info->act_entry == DEBUG_PROLOG_ENTRY) {
+		/* print prolog */
+		if (view->prolog_proc)
+			len += view->prolog_proc(id_snap, view, p_info->temp_buf);
+		goto out;
+	}
+	if (!id_snap->areas) /* this is true, if we have a prolog only view */
+		goto out;    /* or if 'pages_per_area' is 0 */
+	act_entry = (debug_entry_t *) ((char *)id_snap->areas[p_info->act_area]
+				       [p_info->act_page] + p_info->act_entry);
+
+	if (act_entry->clock == 0LL)
+		goto out; /* empty entry */
+	if (view->header_proc)
+		len += view->header_proc(id_snap, view, p_info->act_area,
+					 act_entry, p_info->temp_buf + len);
+	if (view->format_proc)
+		len += view->format_proc(id_snap, view, p_info->temp_buf + len,
+					 DEBUG_DATA(act_entry));
+out:
+	return len;
+}
+
+/*
+ * debug_next_entry:
+ * - goto next entry in p_info
+ */
+static inline int debug_next_entry(file_private_info_t *p_info)
+{
+	debug_info_t *id;
+
+	id = p_info->debug_info_snap;
+	if (p_info->act_entry == DEBUG_PROLOG_ENTRY) {
+		p_info->act_entry = 0;
+		p_info->act_page  = 0;
+		goto out;
+	}
+	if (!id->areas)
+		return 1;
+	p_info->act_entry += id->entry_size;
+	/* switch to next page, if we reached the end of the page  */
+	if (p_info->act_entry > (PAGE_SIZE - id->entry_size)) {
+		/* next page */
+		p_info->act_entry = 0;
+		p_info->act_page += 1;
+		if ((p_info->act_page % id->pages_per_area) == 0) {
+			/* next area */
+			p_info->act_area++;
+			p_info->act_page = 0;
+		}
+		if (p_info->act_area >= id->nr_areas)
+			return 1;
+	}
+out:
+	return 0;
+}
+
+/*
+ * debug_output:
+ * - called for user read()
+ * - copies formated debug entries to the user buffer
+ */
+static ssize_t debug_output(struct file *file,		/* file descriptor */
+			    char __user *user_buf,	/* user buffer */
+			    size_t len,			/* length of buffer */
+			    loff_t *offset)		/* offset in the file */
+{
+	size_t count = 0;
+	size_t entry_offset;
+	file_private_info_t *p_info;
+
+	p_info = (file_private_info_t *) file->private_data;
+	if (*offset != p_info->offset)
+		return -EPIPE;
+	if (p_info->act_area >= p_info->debug_info_snap->nr_areas)
+		return 0;
+	entry_offset = p_info->act_entry_offset;
+	while (count < len) {
+		int formatted_line_residue;
+		int formatted_line_size;
+		int user_buf_residue;
+		size_t copy_size;
+
+		formatted_line_size = debug_format_entry(p_info);
+		formatted_line_residue = formatted_line_size - entry_offset;
+		user_buf_residue = len-count;
+		copy_size = min(user_buf_residue, formatted_line_residue);
+		if (copy_size) {
+			if (copy_to_user(user_buf + count, p_info->temp_buf
+					 + entry_offset, copy_size))
+				return -EFAULT;
+			count += copy_size;
+			entry_offset += copy_size;
+		}
+		if (copy_size == formatted_line_residue) {
+			entry_offset = 0;
+			if (debug_next_entry(p_info))
+				goto out;
+		}
+	}
+out:
+	p_info->offset		 = *offset + count;
+	p_info->act_entry_offset = entry_offset;
+	*offset = p_info->offset;
+	return count;
+}
+
+/*
+ * debug_input:
+ * - called for user write()
+ * - calls input function of view
+ */
+static ssize_t debug_input(struct file *file, const char __user *user_buf,
+			   size_t length, loff_t *offset)
+{
+	file_private_info_t *p_info;
+	int rc = 0;
+
+	mutex_lock(&debug_mutex);
+	p_info = ((file_private_info_t *) file->private_data);
+	if (p_info->view->input_proc) {
+		rc = p_info->view->input_proc(p_info->debug_info_org,
+					      p_info->view, file, user_buf,
+					      length, offset);
+	} else {
+		rc = -EPERM;
+	}
+	mutex_unlock(&debug_mutex);
+	return rc; /* number of input characters */
+}
+
+/*
+ * debug_open:
+ * - called for user open()
+ * - copies formated output to private_data area of the file
+ *   handle
+ */
+static int debug_open(struct inode *inode, struct file *file)
+{
+	debug_info_t *debug_info, *debug_info_snapshot;
+	file_private_info_t *p_info;
+	int i, rc = 0;
+
+	mutex_lock(&debug_mutex);
+	debug_info = file_inode(file)->i_private;
+	/* find debug view */
+	for (i = 0; i < DEBUG_MAX_VIEWS; i++) {
+		if (!debug_info->views[i])
+			continue;
+		else if (debug_info->debugfs_entries[i] == file->f_path.dentry)
+			goto found; /* found view ! */
+	}
+	/* no entry found */
+	rc = -EINVAL;
+	goto out;
+
+found:
+
+	/* Make snapshot of current debug areas to get it consistent.	  */
+	/* To copy all the areas is only needed, if we have a view which  */
+	/* formats the debug areas. */
+
+	if (!debug_info->views[i]->format_proc && !debug_info->views[i]->header_proc)
+		debug_info_snapshot = debug_info_copy(debug_info, NO_AREAS);
+	else
+		debug_info_snapshot = debug_info_copy(debug_info, ALL_AREAS);
+
+	if (!debug_info_snapshot) {
+		rc = -ENOMEM;
+		goto out;
+	}
+	p_info = kmalloc(sizeof(file_private_info_t), GFP_KERNEL);
+	if (!p_info) {
+		debug_info_free(debug_info_snapshot);
+		rc = -ENOMEM;
+		goto out;
+	}
+	p_info->offset = 0;
+	p_info->debug_info_snap = debug_info_snapshot;
+	p_info->debug_info_org	= debug_info;
+	p_info->view = debug_info->views[i];
+	p_info->act_area = 0;
+	p_info->act_page = 0;
+	p_info->act_entry = DEBUG_PROLOG_ENTRY;
+	p_info->act_entry_offset = 0;
+	file->private_data = p_info;
+	debug_info_get(debug_info);
+	nonseekable_open(inode, file);
+out:
+	mutex_unlock(&debug_mutex);
+	return rc;
+}
+
+/*
+ * debug_close:
+ * - called for user close()
+ * - deletes  private_data area of the file handle
+ */
+static int debug_close(struct inode *inode, struct file *file)
+{
+	file_private_info_t *p_info;
+
+	p_info = (file_private_info_t *) file->private_data;
+	if (p_info->debug_info_snap)
+		debug_info_free(p_info->debug_info_snap);
+	debug_info_put(p_info->debug_info_org);
+	kfree(file->private_data);
+	return 0; /* success */
+}
+
+/* Create debugfs entries and add to internal list. */
+static void _debug_register(debug_info_t *id)
+{
+	/* create root directory */
+	id->debugfs_root_entry = debugfs_create_dir(id->name,
+						    debug_debugfs_root_entry);
+
+	/* append new element to linked list */
+	if (!debug_area_first) {
+		/* first element in list */
+		debug_area_first = id;
+		id->prev = NULL;
+	} else {
+		/* append element to end of list */
+		debug_area_last->next = id;
+		id->prev = debug_area_last;
+	}
+	debug_area_last = id;
+	id->next = NULL;
+
+	debug_register_view(id, &debug_level_view);
+	debug_register_view(id, &debug_flush_view);
+	debug_register_view(id, &debug_pages_view);
+}
+
+/**
+ * debug_register_mode() - creates and initializes debug area.
+ *
+ * @name:	Name of debug log (e.g. used for debugfs entry)
+ * @pages_per_area:	Number of pages, which will be allocated per area
+ * @nr_areas:	Number of debug areas
+ * @buf_size:	Size of data area in each debug entry
+ * @mode:	File mode for debugfs files. E.g. S_IRWXUGO
+ * @uid:	User ID for debugfs files. Currently only 0 is supported.
+ * @gid:	Group ID for debugfs files. Currently only 0 is supported.
+ *
+ * Return:
+ * - Handle for generated debug area
+ * - %NULL if register failed
+ *
+ * Allocates memory for a debug log.
+ * Must not be called within an interrupt handler.
+ */
+debug_info_t *debug_register_mode(const char *name, int pages_per_area,
+				  int nr_areas, int buf_size, umode_t mode,
+				  uid_t uid, gid_t gid)
+{
+	debug_info_t *rc = NULL;
+
+	/* Since debugfs currently does not support uid/gid other than root, */
+	/* we do not allow gid/uid != 0 until we get support for that. */
+	if ((uid != 0) || (gid != 0))
+		pr_warn("Root becomes the owner of all s390dbf files in sysfs\n");
+	BUG_ON(!initialized);
+
+	/* create new debug_info */
+	rc = debug_info_create(name, pages_per_area, nr_areas, buf_size, mode);
+	if (rc) {
+		mutex_lock(&debug_mutex);
+		_debug_register(rc);
+		mutex_unlock(&debug_mutex);
+	} else {
+		pr_err("Registering debug feature %s failed\n", name);
+	}
+	return rc;
+}
+EXPORT_SYMBOL(debug_register_mode);
+
+/**
+ * debug_register() - creates and initializes debug area with default file mode.
+ *
+ * @name:	Name of debug log (e.g. used for debugfs entry)
+ * @pages_per_area:	Number of pages, which will be allocated per area
+ * @nr_areas:	Number of debug areas
+ * @buf_size:	Size of data area in each debug entry
+ *
+ * Return:
+ * - Handle for generated debug area
+ * - %NULL if register failed
+ *
+ * Allocates memory for a debug log.
+ * The debugfs file mode access permissions are read and write for user.
+ * Must not be called within an interrupt handler.
+ */
+debug_info_t *debug_register(const char *name, int pages_per_area,
+			     int nr_areas, int buf_size)
+{
+	return debug_register_mode(name, pages_per_area, nr_areas, buf_size,
+				   S_IRUSR | S_IWUSR, 0, 0);
+}
+EXPORT_SYMBOL(debug_register);
+
+/**
+ * debug_register_static() - registers a static debug area
+ *
+ * @id: Handle for static debug area
+ * @pages_per_area: Number of pages per area
+ * @nr_areas: Number of debug areas
+ *
+ * Register debug_info_t defined using DEFINE_STATIC_DEBUG_INFO.
+ *
+ * Note: This function is called automatically via an initcall generated by
+ *	 DEFINE_STATIC_DEBUG_INFO.
+ */
+void debug_register_static(debug_info_t *id, int pages_per_area, int nr_areas)
+{
+	unsigned long flags;
+	debug_info_t *copy;
+
+	if (!initialized) {
+		pr_err("Tried to register debug feature %s too early\n",
+		       id->name);
+		return;
+	}
+
+	copy = debug_info_alloc("", pages_per_area, nr_areas, id->buf_size,
+				id->level, ALL_AREAS);
+	if (!copy) {
+		pr_err("Registering debug feature %s failed\n", id->name);
+
+		/* Clear pointers to prevent tracing into released initdata. */
+		spin_lock_irqsave(&id->lock, flags);
+		id->areas = NULL;
+		id->active_pages = NULL;
+		id->active_entries = NULL;
+		spin_unlock_irqrestore(&id->lock, flags);
+
+		return;
+	}
+
+	/* Replace static trace area with dynamic copy. */
+	spin_lock_irqsave(&id->lock, flags);
+	debug_events_append(copy, id);
+	debug_areas_swap(id, copy);
+	spin_unlock_irqrestore(&id->lock, flags);
+
+	/* Clear pointers to initdata and discard copy. */
+	copy->areas = NULL;
+	copy->active_pages = NULL;
+	copy->active_entries = NULL;
+	debug_info_free(copy);
+
+	mutex_lock(&debug_mutex);
+	_debug_register(id);
+	mutex_unlock(&debug_mutex);
+}
+
+/* Remove debugfs entries and remove from internal list. */
+static void _debug_unregister(debug_info_t *id)
+{
+	int i;
+
+	for (i = 0; i < DEBUG_MAX_VIEWS; i++) {
+		if (!id->views[i])
+			continue;
+		debugfs_remove(id->debugfs_entries[i]);
+	}
+	debugfs_remove(id->debugfs_root_entry);
+	if (id == debug_area_first)
+		debug_area_first = id->next;
+	if (id == debug_area_last)
+		debug_area_last = id->prev;
+	if (id->prev)
+		id->prev->next = id->next;
+	if (id->next)
+		id->next->prev = id->prev;
+}
+
+/**
+ * debug_unregister() - give back debug area.
+ *
+ * @id:		handle for debug log
+ *
+ * Return:
+ *    none
+ */
+void debug_unregister(debug_info_t *id)
+{
+	if (!id)
+		return;
+	mutex_lock(&debug_mutex);
+	_debug_unregister(id);
+	mutex_unlock(&debug_mutex);
+
+	debug_info_put(id);
+}
+EXPORT_SYMBOL(debug_unregister);
+
+/*
+ * debug_set_size:
+ * - set area size (number of pages) and number of areas
+ */
+static int debug_set_size(debug_info_t *id, int nr_areas, int pages_per_area)
+{
+	debug_info_t *new_id;
+	unsigned long flags;
+
+	if (!id || (nr_areas <= 0) || (pages_per_area < 0))
+		return -EINVAL;
+
+	new_id = debug_info_alloc("", pages_per_area, nr_areas, id->buf_size,
+				  id->level, ALL_AREAS);
+	if (!new_id) {
+		pr_info("Allocating memory for %i pages failed\n",
+			pages_per_area);
+		return -ENOMEM;
+	}
+
+	spin_lock_irqsave(&id->lock, flags);
+	debug_events_append(new_id, id);
+	debug_areas_swap(new_id, id);
+	debug_info_free(new_id);
+	spin_unlock_irqrestore(&id->lock, flags);
+	pr_info("%s: set new size (%i pages)\n", id->name, pages_per_area);
+
+	return 0;
+}
+
+/**
+ * debug_set_level() - Sets new actual debug level if new_level is valid.
+ *
+ * @id:		handle for debug log
+ * @new_level:	new debug level
+ *
+ * Return:
+ *    none
+ */
+void debug_set_level(debug_info_t *id, int new_level)
+{
+	unsigned long flags;
+
+	if (!id)
+		return;
+
+	if (new_level == DEBUG_OFF_LEVEL) {
+		pr_info("%s: switched off\n", id->name);
+	} else if ((new_level > DEBUG_MAX_LEVEL) || (new_level < 0)) {
+		pr_info("%s: level %i is out of range (%i - %i)\n",
+			id->name, new_level, 0, DEBUG_MAX_LEVEL);
+		return;
+	}
+
+	spin_lock_irqsave(&id->lock, flags);
+	id->level = new_level;
+	spin_unlock_irqrestore(&id->lock, flags);
+}
+EXPORT_SYMBOL(debug_set_level);
+
+/*
+ * proceed_active_entry:
+ * - set active entry to next in the ring buffer
+ */
+static inline void proceed_active_entry(debug_info_t *id)
+{
+	if ((id->active_entries[id->active_area] += id->entry_size)
+	    > (PAGE_SIZE - id->entry_size)) {
+		id->active_entries[id->active_area] = 0;
+		id->active_pages[id->active_area] =
+			(id->active_pages[id->active_area] + 1) %
+			id->pages_per_area;
+	}
+}
+
+/*
+ * proceed_active_area:
+ * - set active area to next in the ring buffer
+ */
+static inline void proceed_active_area(debug_info_t *id)
+{
+	id->active_area++;
+	id->active_area = id->active_area % id->nr_areas;
+}
+
+/*
+ * get_active_entry:
+ */
+static inline debug_entry_t *get_active_entry(debug_info_t *id)
+{
+	return (debug_entry_t *) (((char *) id->areas[id->active_area]
+				   [id->active_pages[id->active_area]]) +
+				  id->active_entries[id->active_area]);
+}
+
+/* Swap debug areas of a and b. */
+static void debug_areas_swap(debug_info_t *a, debug_info_t *b)
+{
+	swap(a->nr_areas, b->nr_areas);
+	swap(a->pages_per_area, b->pages_per_area);
+	swap(a->areas, b->areas);
+	swap(a->active_area, b->active_area);
+	swap(a->active_pages, b->active_pages);
+	swap(a->active_entries, b->active_entries);
+}
+
+/* Append all debug events in active area from source to destination log. */
+static void debug_events_append(debug_info_t *dest, debug_info_t *src)
+{
+	debug_entry_t *from, *to, *last;
+
+	if (!src->areas || !dest->areas)
+		return;
+
+	/* Loop over all entries in src, starting with oldest. */
+	from = get_active_entry(src);
+	last = from;
+	do {
+		if (from->clock != 0LL) {
+			to = get_active_entry(dest);
+			memset(to, 0, dest->entry_size);
+			memcpy(to, from, min(src->entry_size,
+					     dest->entry_size));
+			proceed_active_entry(dest);
+		}
+
+		proceed_active_entry(src);
+		from = get_active_entry(src);
+	} while (from != last);
+}
+
+/*
+ * debug_finish_entry:
+ * - set timestamp, caller address, cpu number etc.
+ */
+
+static inline void debug_finish_entry(debug_info_t *id, debug_entry_t *active,
+				      int level, int exception)
+{
+	unsigned long timestamp;
+	union tod_clock clk;
+
+	store_tod_clock_ext(&clk);
+	timestamp = clk.us;
+	timestamp -= TOD_UNIX_EPOCH >> 12;
+	active->clock = timestamp;
+	active->cpu = smp_processor_id();
+	active->caller = __builtin_return_address(0);
+	active->exception = exception;
+	active->level = level;
+	proceed_active_entry(id);
+	if (exception)
+		proceed_active_area(id);
+}
+
+static int debug_stoppable = 1;
+static int debug_active = 1;
+
+#define CTL_S390DBF_STOPPABLE 5678
+#define CTL_S390DBF_ACTIVE 5679
+
+/*
+ * proc handler for the running debug_active sysctl
+ * always allow read, allow write only if debug_stoppable is set or
+ * if debug_active is already off
+ */
+static int s390dbf_procactive(struct ctl_table *table, int write,
+			      void *buffer, size_t *lenp, loff_t *ppos)
+{
+	if (!write || debug_stoppable || !debug_active)
+		return proc_dointvec(table, write, buffer, lenp, ppos);
+	else
+		return 0;
+}
+
+static struct ctl_table s390dbf_table[] = {
+	{
+		.procname	= "debug_stoppable",
+		.data		= &debug_stoppable,
+		.maxlen		= sizeof(int),
+		.mode		= S_IRUGO | S_IWUSR,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "debug_active",
+		.data		= &debug_active,
+		.maxlen		= sizeof(int),
+		.mode		= S_IRUGO | S_IWUSR,
+		.proc_handler	= s390dbf_procactive,
+	},
+	{ }
+};
+
+static struct ctl_table_header *s390dbf_sysctl_header;
+
+/**
+ * debug_stop_all() - stops the debug feature if stopping is allowed.
+ *
+ * Return:
+ * -   none
+ *
+ * Currently used in case of a kernel oops.
+ */
+void debug_stop_all(void)
+{
+	if (debug_stoppable)
+		debug_active = 0;
+}
+EXPORT_SYMBOL(debug_stop_all);
+
+/**
+ * debug_set_critical() - event/exception functions try lock instead of spin.
+ *
+ * Return:
+ * -   none
+ *
+ * Currently used in case of stopping all CPUs but the current one.
+ * Once in this state, functions to write a debug entry for an
+ * event or exception no longer spin on the debug area lock,
+ * but only try to get it and fail if they do not get the lock.
+ */
+void debug_set_critical(void)
+{
+	debug_critical = 1;
+}
+
+/*
+ * debug_event_common:
+ * - write debug entry with given size
+ */
+debug_entry_t *debug_event_common(debug_info_t *id, int level, const void *buf,
+				  int len)
+{
+	debug_entry_t *active;
+	unsigned long flags;
+
+	if (!debug_active || !id->areas)
+		return NULL;
+	if (debug_critical) {
+		if (!spin_trylock_irqsave(&id->lock, flags))
+			return NULL;
+	} else {
+		spin_lock_irqsave(&id->lock, flags);
+	}
+	do {
+		active = get_active_entry(id);
+		memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size));
+		if (len < id->buf_size)
+			memset((DEBUG_DATA(active)) + len, 0, id->buf_size - len);
+		debug_finish_entry(id, active, level, 0);
+		len -= id->buf_size;
+		buf += id->buf_size;
+	} while (len > 0);
+
+	spin_unlock_irqrestore(&id->lock, flags);
+	return active;
+}
+EXPORT_SYMBOL(debug_event_common);
+
+/*
+ * debug_exception_common:
+ * - write debug entry with given size and switch to next debug area
+ */
+debug_entry_t *debug_exception_common(debug_info_t *id, int level,
+				      const void *buf, int len)
+{
+	debug_entry_t *active;
+	unsigned long flags;
+
+	if (!debug_active || !id->areas)
+		return NULL;
+	if (debug_critical) {
+		if (!spin_trylock_irqsave(&id->lock, flags))
+			return NULL;
+	} else {
+		spin_lock_irqsave(&id->lock, flags);
+	}
+	do {
+		active = get_active_entry(id);
+		memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size));
+		if (len < id->buf_size)
+			memset((DEBUG_DATA(active)) + len, 0, id->buf_size - len);
+		debug_finish_entry(id, active, level, len <= id->buf_size);
+		len -= id->buf_size;
+		buf += id->buf_size;
+	} while (len > 0);
+
+	spin_unlock_irqrestore(&id->lock, flags);
+	return active;
+}
+EXPORT_SYMBOL(debug_exception_common);
+
+/*
+ * counts arguments in format string for sprintf view
+ */
+static inline int debug_count_numargs(char *string)
+{
+	int numargs = 0;
+
+	while (*string) {
+		if (*string++ == '%')
+			numargs++;
+	}
+	return numargs;
+}
+
+/*
+ * debug_sprintf_event:
+ */
+debug_entry_t *__debug_sprintf_event(debug_info_t *id, int level, char *string, ...)
+{
+	debug_sprintf_entry_t *curr_event;
+	debug_entry_t *active;
+	unsigned long flags;
+	int numargs, idx;
+	va_list ap;
+
+	if (!debug_active || !id->areas)
+		return NULL;
+	numargs = debug_count_numargs(string);
+
+	if (debug_critical) {
+		if (!spin_trylock_irqsave(&id->lock, flags))
+			return NULL;
+	} else {
+		spin_lock_irqsave(&id->lock, flags);
+	}
+	active = get_active_entry(id);
+	curr_event = (debug_sprintf_entry_t *) DEBUG_DATA(active);
+	va_start(ap, string);
+	curr_event->string = string;
+	for (idx = 0; idx < min(numargs, (int)(id->buf_size / sizeof(long)) - 1); idx++)
+		curr_event->args[idx] = va_arg(ap, long);
+	va_end(ap);
+	debug_finish_entry(id, active, level, 0);
+	spin_unlock_irqrestore(&id->lock, flags);
+
+	return active;
+}
+EXPORT_SYMBOL(__debug_sprintf_event);
+
+/*
+ * debug_sprintf_exception:
+ */
+debug_entry_t *__debug_sprintf_exception(debug_info_t *id, int level, char *string, ...)
+{
+	debug_sprintf_entry_t *curr_event;
+	debug_entry_t *active;
+	unsigned long flags;
+	int numargs, idx;
+	va_list ap;
+
+	if (!debug_active || !id->areas)
+		return NULL;
+
+	numargs = debug_count_numargs(string);
+
+	if (debug_critical) {
+		if (!spin_trylock_irqsave(&id->lock, flags))
+			return NULL;
+	} else {
+		spin_lock_irqsave(&id->lock, flags);
+	}
+	active = get_active_entry(id);
+	curr_event = (debug_sprintf_entry_t *)DEBUG_DATA(active);
+	va_start(ap, string);
+	curr_event->string = string;
+	for (idx = 0; idx < min(numargs, (int)(id->buf_size / sizeof(long)) - 1); idx++)
+		curr_event->args[idx] = va_arg(ap, long);
+	va_end(ap);
+	debug_finish_entry(id, active, level, 1);
+	spin_unlock_irqrestore(&id->lock, flags);
+
+	return active;
+}
+EXPORT_SYMBOL(__debug_sprintf_exception);
+
+/**
+ * debug_register_view() - registers new debug view and creates debugfs
+ *			   dir entry
+ *
+ * @id:		handle for debug log
+ * @view:	pointer to debug view struct
+ *
+ * Return:
+ * -   0  : ok
+ * -   < 0: Error
+ */
+int debug_register_view(debug_info_t *id, struct debug_view *view)
+{
+	unsigned long flags;
+	struct dentry *pde;
+	umode_t mode;
+	int rc = 0;
+	int i;
+
+	if (!id)
+		goto out;
+	mode = (id->mode | S_IFREG) & ~S_IXUGO;
+	if (!(view->prolog_proc || view->format_proc || view->header_proc))
+		mode &= ~(S_IRUSR | S_IRGRP | S_IROTH);
+	if (!view->input_proc)
+		mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH);
+	pde = debugfs_create_file(view->name, mode, id->debugfs_root_entry,
+				  id, &debug_file_ops);
+	spin_lock_irqsave(&id->lock, flags);
+	for (i = 0; i < DEBUG_MAX_VIEWS; i++) {
+		if (!id->views[i])
+			break;
+	}
+	if (i == DEBUG_MAX_VIEWS) {
+		rc = -1;
+	} else {
+		id->views[i] = view;
+		id->debugfs_entries[i] = pde;
+	}
+	spin_unlock_irqrestore(&id->lock, flags);
+	if (rc) {
+		pr_err("Registering view %s/%s would exceed the maximum "
+		       "number of views %i\n", id->name, view->name, i);
+		debugfs_remove(pde);
+	}
+out:
+	return rc;
+}
+EXPORT_SYMBOL(debug_register_view);
+
+/**
+ * debug_unregister_view() - unregisters debug view and removes debugfs
+ *			     dir entry
+ *
+ * @id:		handle for debug log
+ * @view:	pointer to debug view struct
+ *
+ * Return:
+ * -   0  : ok
+ * -   < 0: Error
+ */
+int debug_unregister_view(debug_info_t *id, struct debug_view *view)
+{
+	struct dentry *dentry = NULL;
+	unsigned long flags;
+	int i, rc = 0;
+
+	if (!id)
+		goto out;
+	spin_lock_irqsave(&id->lock, flags);
+	for (i = 0; i < DEBUG_MAX_VIEWS; i++) {
+		if (id->views[i] == view)
+			break;
+	}
+	if (i == DEBUG_MAX_VIEWS) {
+		rc = -1;
+	} else {
+		dentry = id->debugfs_entries[i];
+		id->views[i] = NULL;
+		id->debugfs_entries[i] = NULL;
+	}
+	spin_unlock_irqrestore(&id->lock, flags);
+	debugfs_remove(dentry);
+out:
+	return rc;
+}
+EXPORT_SYMBOL(debug_unregister_view);
+
+static inline char *debug_get_user_string(const char __user *user_buf,
+					  size_t user_len)
+{
+	char *buffer;
+
+	buffer = kmalloc(user_len + 1, GFP_KERNEL);
+	if (!buffer)
+		return ERR_PTR(-ENOMEM);
+	if (copy_from_user(buffer, user_buf, user_len) != 0) {
+		kfree(buffer);
+		return ERR_PTR(-EFAULT);
+	}
+	/* got the string, now strip linefeed. */
+	if (buffer[user_len - 1] == '\n')
+		buffer[user_len - 1] = 0;
+	else
+		buffer[user_len] = 0;
+	return buffer;
+}
+
+static inline int debug_get_uint(char *buf)
+{
+	int rc;
+
+	buf = skip_spaces(buf);
+	rc = simple_strtoul(buf, &buf, 10);
+	if (*buf)
+		rc = -EINVAL;
+	return rc;
+}
+
+/*
+ * functions for debug-views
+ ***********************************
+*/
+
+/*
+ * prints out actual debug level
+ */
+
+static int debug_prolog_pages_fn(debug_info_t *id, struct debug_view *view,
+				 char *out_buf)
+{
+	return sprintf(out_buf, "%i\n", id->pages_per_area);
+}
+
+/*
+ * reads new size (number of pages per debug area)
+ */
+
+static int debug_input_pages_fn(debug_info_t *id, struct debug_view *view,
+				struct file *file, const char __user *user_buf,
+				size_t user_len, loff_t *offset)
+{
+	int rc, new_pages;
+	char *str;
+
+	if (user_len > 0x10000)
+		user_len = 0x10000;
+	if (*offset != 0) {
+		rc = -EPIPE;
+		goto out;
+	}
+	str = debug_get_user_string(user_buf, user_len);
+	if (IS_ERR(str)) {
+		rc = PTR_ERR(str);
+		goto out;
+	}
+	new_pages = debug_get_uint(str);
+	if (new_pages < 0) {
+		rc = -EINVAL;
+		goto free_str;
+	}
+	rc = debug_set_size(id, id->nr_areas, new_pages);
+	if (rc != 0) {
+		rc = -EINVAL;
+		goto free_str;
+	}
+	rc = user_len;
+free_str:
+	kfree(str);
+out:
+	*offset += user_len;
+	return rc;		/* number of input characters */
+}
+
+/*
+ * prints out actual debug level
+ */
+static int debug_prolog_level_fn(debug_info_t *id, struct debug_view *view,
+				 char *out_buf)
+{
+	int rc = 0;
+
+	if (id->level == DEBUG_OFF_LEVEL)
+		rc = sprintf(out_buf, "-\n");
+	else
+		rc = sprintf(out_buf, "%i\n", id->level);
+	return rc;
+}
+
+/*
+ * reads new debug level
+ */
+static int debug_input_level_fn(debug_info_t *id, struct debug_view *view,
+				struct file *file, const char __user *user_buf,
+				size_t user_len, loff_t *offset)
+{
+	int rc, new_level;
+	char *str;
+
+	if (user_len > 0x10000)
+		user_len = 0x10000;
+	if (*offset != 0) {
+		rc = -EPIPE;
+		goto out;
+	}
+	str = debug_get_user_string(user_buf, user_len);
+	if (IS_ERR(str)) {
+		rc = PTR_ERR(str);
+		goto out;
+	}
+	if (str[0] == '-') {
+		debug_set_level(id, DEBUG_OFF_LEVEL);
+		rc = user_len;
+		goto free_str;
+	} else {
+		new_level = debug_get_uint(str);
+	}
+	if (new_level < 0) {
+		pr_warn("%s is not a valid level for a debug feature\n", str);
+		rc = -EINVAL;
+	} else {
+		debug_set_level(id, new_level);
+		rc = user_len;
+	}
+free_str:
+	kfree(str);
+out:
+	*offset += user_len;
+	return rc;		/* number of input characters */
+}
+
+/*
+ * flushes debug areas
+ */
+static void debug_flush(debug_info_t *id, int area)
+{
+	unsigned long flags;
+	int i, j;
+
+	if (!id || !id->areas)
+		return;
+	spin_lock_irqsave(&id->lock, flags);
+	if (area == DEBUG_FLUSH_ALL) {
+		id->active_area = 0;
+		memset(id->active_entries, 0, id->nr_areas * sizeof(int));
+		for (i = 0; i < id->nr_areas; i++) {
+			id->active_pages[i] = 0;
+			for (j = 0; j < id->pages_per_area; j++)
+				memset(id->areas[i][j], 0, PAGE_SIZE);
+		}
+	} else if (area >= 0 && area < id->nr_areas) {
+		id->active_entries[area] = 0;
+		id->active_pages[area] = 0;
+		for (i = 0; i < id->pages_per_area; i++)
+			memset(id->areas[area][i], 0, PAGE_SIZE);
+	}
+	spin_unlock_irqrestore(&id->lock, flags);
+}
+
+/*
+ * view function: flushes debug areas
+ */
+static int debug_input_flush_fn(debug_info_t *id, struct debug_view *view,
+				struct file *file, const char __user *user_buf,
+				size_t user_len, loff_t *offset)
+{
+	char input_buf[1];
+	int rc = user_len;
+
+	if (user_len > 0x10000)
+		user_len = 0x10000;
+	if (*offset != 0) {
+		rc = -EPIPE;
+		goto out;
+	}
+	if (copy_from_user(input_buf, user_buf, 1)) {
+		rc = -EFAULT;
+		goto out;
+	}
+	if (input_buf[0] == '-') {
+		debug_flush(id, DEBUG_FLUSH_ALL);
+		goto out;
+	}
+	if (isdigit(input_buf[0])) {
+		int area = ((int) input_buf[0] - (int) '0');
+
+		debug_flush(id, area);
+		goto out;
+	}
+
+	pr_info("Flushing debug data failed because %c is not a valid "
+		 "area\n", input_buf[0]);
+
+out:
+	*offset += user_len;
+	return rc;		/* number of input characters */
+}
+
+/*
+ * prints debug data in hex/ascii format
+ */
+static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view,
+				     char *out_buf, const char *in_buf)
+{
+	int i, rc = 0;
+
+	for (i = 0; i < id->buf_size; i++)
+		rc += sprintf(out_buf + rc, "%02x ", ((unsigned char *) in_buf)[i]);
+	rc += sprintf(out_buf + rc, "| ");
+	for (i = 0; i < id->buf_size; i++) {
+		unsigned char c = in_buf[i];
+
+		if (isascii(c) && isprint(c))
+			rc += sprintf(out_buf + rc, "%c", c);
+		else
+			rc += sprintf(out_buf + rc, ".");
+	}
+	rc += sprintf(out_buf + rc, "\n");
+	return rc;
+}
+
+/*
+ * prints header for debug entry
+ */
+int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view,
+			 int area, debug_entry_t *entry, char *out_buf)
+{
+	unsigned long sec, usec;
+	unsigned long caller;
+	unsigned int level;
+	char *except_str;
+	int rc = 0;
+
+	level = entry->level;
+	sec = entry->clock;
+	usec = do_div(sec, USEC_PER_SEC);
+
+	if (entry->exception)
+		except_str = "*";
+	else
+		except_str = "-";
+	caller = (unsigned long) entry->caller;
+	rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %04u %px  ",
+		      area, sec, usec, level, except_str,
+		      entry->cpu, (void *)caller);
+	return rc;
+}
+EXPORT_SYMBOL(debug_dflt_header_fn);
+
+/*
+ * prints debug data sprintf-formated:
+ * debug_sprinf_event/exception calls must be used together with this view
+ */
+
+#define DEBUG_SPRINTF_MAX_ARGS 10
+
+static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
+				   char *out_buf, const char *inbuf)
+{
+	debug_sprintf_entry_t *curr_event = (debug_sprintf_entry_t *)inbuf;
+	int num_longs, num_used_args = 0, i, rc = 0;
+	int index[DEBUG_SPRINTF_MAX_ARGS];
+
+	/* count of longs fit into one entry */
+	num_longs = id->buf_size / sizeof(long);
+
+	if (num_longs < 1)
+		goto out; /* bufsize of entry too small */
+	if (num_longs == 1) {
+		/* no args, we use only the string */
+		strcpy(out_buf, curr_event->string);
+		rc = strlen(curr_event->string);
+		goto out;
+	}
+
+	/* number of arguments used for sprintf (without the format string) */
+	num_used_args = min(DEBUG_SPRINTF_MAX_ARGS, (num_longs - 1));
+
+	memset(index, 0, DEBUG_SPRINTF_MAX_ARGS * sizeof(int));
+
+	for (i = 0; i < num_used_args; i++)
+		index[i] = i;
+
+	rc = sprintf(out_buf, curr_event->string, curr_event->args[index[0]],
+		     curr_event->args[index[1]], curr_event->args[index[2]],
+		     curr_event->args[index[3]], curr_event->args[index[4]],
+		     curr_event->args[index[5]], curr_event->args[index[6]],
+		     curr_event->args[index[7]], curr_event->args[index[8]],
+		     curr_event->args[index[9]]);
+out:
+	return rc;
+}
+
+/*
+ * debug_init:
+ * - is called exactly once to initialize the debug feature
+ */
+static int __init debug_init(void)
+{
+	s390dbf_sysctl_header = register_sysctl("s390dbf", s390dbf_table);
+	mutex_lock(&debug_mutex);
+	debug_debugfs_root_entry = debugfs_create_dir(DEBUG_DIR_ROOT, NULL);
+	initialized = 1;
+	mutex_unlock(&debug_mutex);
+	return 0;
+}
+postcore_initcall(debug_init);
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
new file mode 100644
index 0000000000..f9f06cd8fc
--- /dev/null
+++ b/arch/s390/kernel/diag.c
@@ -0,0 +1,269 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Implementation of s390 diagnose codes
+ *
+ * Copyright IBM Corp. 2007
+ * Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <linux/vmalloc.h>
+#include <asm/asm-extable.h>
+#include <asm/diag.h>
+#include <asm/trace/diag.h>
+#include <asm/sections.h>
+#include "entry.h"
+
+struct diag_stat {
+	unsigned int counter[NR_DIAG_STAT];
+};
+
+static DEFINE_PER_CPU(struct diag_stat, diag_stat);
+
+struct diag_desc {
+	int code;
+	char *name;
+};
+
+static const struct diag_desc diag_map[NR_DIAG_STAT] = {
+	[DIAG_STAT_X008] = { .code = 0x008, .name = "Console Function" },
+	[DIAG_STAT_X00C] = { .code = 0x00c, .name = "Pseudo Timer" },
+	[DIAG_STAT_X010] = { .code = 0x010, .name = "Release Pages" },
+	[DIAG_STAT_X014] = { .code = 0x014, .name = "Spool File Services" },
+	[DIAG_STAT_X044] = { .code = 0x044, .name = "Voluntary Timeslice End" },
+	[DIAG_STAT_X064] = { .code = 0x064, .name = "NSS Manipulation" },
+	[DIAG_STAT_X08C] = { .code = 0x08c, .name = "Access 3270 Display Device Information" },
+	[DIAG_STAT_X09C] = { .code = 0x09c, .name = "Relinquish Timeslice" },
+	[DIAG_STAT_X0DC] = { .code = 0x0dc, .name = "Appldata Control" },
+	[DIAG_STAT_X204] = { .code = 0x204, .name = "Logical-CPU Utilization" },
+	[DIAG_STAT_X210] = { .code = 0x210, .name = "Device Information" },
+	[DIAG_STAT_X224] = { .code = 0x224, .name = "EBCDIC-Name Table" },
+	[DIAG_STAT_X250] = { .code = 0x250, .name = "Block I/O" },
+	[DIAG_STAT_X258] = { .code = 0x258, .name = "Page-Reference Services" },
+	[DIAG_STAT_X26C] = { .code = 0x26c, .name = "Certain System Information" },
+	[DIAG_STAT_X288] = { .code = 0x288, .name = "Time Bomb" },
+	[DIAG_STAT_X2C4] = { .code = 0x2c4, .name = "FTP Services" },
+	[DIAG_STAT_X2FC] = { .code = 0x2fc, .name = "Guest Performance Data" },
+	[DIAG_STAT_X304] = { .code = 0x304, .name = "Partition-Resource Service" },
+	[DIAG_STAT_X308] = { .code = 0x308, .name = "List-Directed IPL" },
+	[DIAG_STAT_X318] = { .code = 0x318, .name = "CP Name and Version Codes" },
+	[DIAG_STAT_X320] = { .code = 0x320, .name = "Certificate Store" },
+	[DIAG_STAT_X500] = { .code = 0x500, .name = "Virtio Service" },
+};
+
+struct diag_ops __amode31_ref diag_amode31_ops = {
+	.diag210 = _diag210_amode31,
+	.diag26c = _diag26c_amode31,
+	.diag14 = _diag14_amode31,
+	.diag0c = _diag0c_amode31,
+	.diag8c = _diag8c_amode31,
+	.diag308_reset = _diag308_reset_amode31
+};
+
+static struct diag210 _diag210_tmp_amode31 __section(".amode31.data");
+struct diag210 __amode31_ref *__diag210_tmp_amode31 = &_diag210_tmp_amode31;
+
+static struct diag8c _diag8c_tmp_amode31 __section(".amode31.data");
+static struct diag8c __amode31_ref *__diag8c_tmp_amode31 = &_diag8c_tmp_amode31;
+
+static int show_diag_stat(struct seq_file *m, void *v)
+{
+	struct diag_stat *stat;
+	unsigned long n = (unsigned long) v - 1;
+	int cpu, prec, tmp;
+
+	cpus_read_lock();
+	if (n == 0) {
+		seq_puts(m, "         ");
+
+		for_each_online_cpu(cpu) {
+			prec = 10;
+			for (tmp = 10; cpu >= tmp; tmp *= 10)
+				prec--;
+			seq_printf(m, "%*s%d", prec, "CPU", cpu);
+		}
+		seq_putc(m, '\n');
+	} else if (n <= NR_DIAG_STAT) {
+		seq_printf(m, "diag %03x:", diag_map[n-1].code);
+		for_each_online_cpu(cpu) {
+			stat = &per_cpu(diag_stat, cpu);
+			seq_printf(m, " %10u", stat->counter[n-1]);
+		}
+		seq_printf(m, "    %s\n", diag_map[n-1].name);
+	}
+	cpus_read_unlock();
+	return 0;
+}
+
+static void *show_diag_stat_start(struct seq_file *m, loff_t *pos)
+{
+	return *pos <= NR_DIAG_STAT ? (void *)((unsigned long) *pos + 1) : NULL;
+}
+
+static void *show_diag_stat_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return show_diag_stat_start(m, pos);
+}
+
+static void show_diag_stat_stop(struct seq_file *m, void *v)
+{
+}
+
+static const struct seq_operations show_diag_stat_sops = {
+	.start	= show_diag_stat_start,
+	.next	= show_diag_stat_next,
+	.stop	= show_diag_stat_stop,
+	.show	= show_diag_stat,
+};
+
+DEFINE_SEQ_ATTRIBUTE(show_diag_stat);
+
+static int __init show_diag_stat_init(void)
+{
+	debugfs_create_file("diag_stat", 0400, NULL, NULL,
+			    &show_diag_stat_fops);
+	return 0;
+}
+
+device_initcall(show_diag_stat_init);
+
+void diag_stat_inc(enum diag_stat_enum nr)
+{
+	this_cpu_inc(diag_stat.counter[nr]);
+	trace_s390_diagnose(diag_map[nr].code);
+}
+EXPORT_SYMBOL(diag_stat_inc);
+
+void notrace diag_stat_inc_norecursion(enum diag_stat_enum nr)
+{
+	this_cpu_inc(diag_stat.counter[nr]);
+	trace_s390_diagnose_norecursion(diag_map[nr].code);
+}
+EXPORT_SYMBOL(diag_stat_inc_norecursion);
+
+/*
+ * Diagnose 14: Input spool file manipulation
+ */
+int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode)
+{
+	diag_stat_inc(DIAG_STAT_X014);
+	return diag_amode31_ops.diag14(rx, ry1, subcode);
+}
+EXPORT_SYMBOL(diag14);
+
+static inline int __diag204(unsigned long *subcode, unsigned long size, void *addr)
+{
+	union register_pair rp = { .even = *subcode, .odd = size };
+
+	asm volatile(
+		"	diag	%[addr],%[rp],0x204\n"
+		"0:	nopr	%%r7\n"
+		EX_TABLE(0b,0b)
+		: [rp] "+&d" (rp.pair) : [addr] "d" (addr) : "memory");
+	*subcode = rp.even;
+	return rp.odd;
+}
+
+/**
+ * diag204() - Issue diagnose 204 call.
+ * @subcode: Subcode of diagnose 204 to be executed.
+ * @size: Size of area in pages which @area points to, if given.
+ * @addr: Vmalloc'ed memory area where the result is written to.
+ *
+ * Execute diagnose 204 with the given subcode and write the result to the
+ * memory area specified with @addr. For subcodes which do not write a
+ * result to memory both @size and @addr must be zero. If @addr is
+ * specified it must be page aligned and must have been allocated with
+ * vmalloc(). Conversion to real / physical addresses will be handled by
+ * this function if required.
+ */
+int diag204(unsigned long subcode, unsigned long size, void *addr)
+{
+	if (addr) {
+		if (WARN_ON_ONCE(!is_vmalloc_addr(addr)))
+			return -1;
+		if (WARN_ON_ONCE(!IS_ALIGNED((unsigned long)addr, PAGE_SIZE)))
+			return -1;
+	}
+	if ((subcode & DIAG204_SUBCODE_MASK) == DIAG204_SUBC_STIB4)
+		addr = (void *)pfn_to_phys(vmalloc_to_pfn(addr));
+	diag_stat_inc(DIAG_STAT_X204);
+	size = __diag204(&subcode, size, addr);
+	if (subcode)
+		return -1;
+	return size;
+}
+EXPORT_SYMBOL(diag204);
+
+/*
+ * Diagnose 210: Get information about a virtual device
+ */
+int diag210(struct diag210 *addr)
+{
+	static DEFINE_SPINLOCK(diag210_lock);
+	unsigned long flags;
+	int ccode;
+
+	spin_lock_irqsave(&diag210_lock, flags);
+	*__diag210_tmp_amode31 = *addr;
+
+	diag_stat_inc(DIAG_STAT_X210);
+	ccode = diag_amode31_ops.diag210(__diag210_tmp_amode31);
+
+	*addr = *__diag210_tmp_amode31;
+	spin_unlock_irqrestore(&diag210_lock, flags);
+
+	return ccode;
+}
+EXPORT_SYMBOL(diag210);
+
+/*
+ * Diagnose 8C: Access 3270 Display Device Information
+ */
+int diag8c(struct diag8c *addr, struct ccw_dev_id *devno)
+{
+	static DEFINE_SPINLOCK(diag8c_lock);
+	unsigned long flags;
+	int ccode;
+
+	spin_lock_irqsave(&diag8c_lock, flags);
+
+	diag_stat_inc(DIAG_STAT_X08C);
+	ccode = diag_amode31_ops.diag8c(__diag8c_tmp_amode31, devno, sizeof(*addr));
+
+	*addr = *__diag8c_tmp_amode31;
+	spin_unlock_irqrestore(&diag8c_lock, flags);
+
+	return ccode;
+}
+EXPORT_SYMBOL(diag8c);
+
+int diag224(void *ptr)
+{
+	int rc = -EOPNOTSUPP;
+
+	diag_stat_inc(DIAG_STAT_X224);
+	asm volatile(
+		"	diag	%1,%2,0x224\n"
+		"0:	lhi	%0,0x0\n"
+		"1:\n"
+		EX_TABLE(0b,1b)
+		: "+d" (rc) :"d" (0), "d" (ptr) : "memory");
+	return rc;
+}
+EXPORT_SYMBOL(diag224);
+
+/*
+ * Diagnose 26C: Access Certain System Information
+ */
+int diag26c(void *req, void *resp, enum diag26c_sc subcode)
+{
+	diag_stat_inc(DIAG_STAT_X26C);
+	return diag_amode31_ops.diag26c(req, resp, subcode);
+}
+EXPORT_SYMBOL(diag26c);
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
new file mode 100644
index 0000000000..89dc826a8d
--- /dev/null
+++ b/arch/s390/kernel/dis.c
@@ -0,0 +1,590 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Disassemble s390 instructions.
+ *
+ * Copyright IBM Corp. 2007
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/kallsyms.h>
+#include <linux/reboot.h>
+#include <linux/kprobes.h>
+#include <linux/kdebug.h>
+#include <linux/uaccess.h>
+#include <linux/atomic.h>
+#include <linux/io.h>
+#include <asm/dis.h>
+#include <asm/cpcmd.h>
+#include <asm/lowcore.h>
+#include <asm/debug.h>
+#include <asm/irq.h>
+
+/* Type of operand */
+#define OPERAND_GPR	0x1	/* Operand printed as %rx */
+#define OPERAND_FPR	0x2	/* Operand printed as %fx */
+#define OPERAND_AR	0x4	/* Operand printed as %ax */
+#define OPERAND_CR	0x8	/* Operand printed as %cx */
+#define OPERAND_VR	0x10	/* Operand printed as %vx */
+#define OPERAND_DISP	0x20	/* Operand printed as displacement */
+#define OPERAND_BASE	0x40	/* Operand printed as base register */
+#define OPERAND_INDEX	0x80	/* Operand printed as index register */
+#define OPERAND_PCREL	0x100	/* Operand printed as pc-relative symbol */
+#define OPERAND_SIGNED	0x200	/* Operand printed as signed value */
+#define OPERAND_LENGTH	0x400	/* Operand printed as length (+1) */
+
+struct s390_operand {
+	unsigned char bits;	/* The number of bits in the operand. */
+	unsigned char shift;	/* The number of bits to shift. */
+	unsigned short flags;	/* One bit syntax flags. */
+};
+
+struct s390_insn {
+	union {
+		const char name[5];
+		struct {
+			unsigned char zero;
+			unsigned int offset;
+		} __packed;
+	};
+	unsigned char opfrag;
+	unsigned char format;
+};
+
+struct s390_opcode_offset {
+	unsigned char opcode;
+	unsigned char mask;
+	unsigned char byte;
+	unsigned short offset;
+	unsigned short count;
+} __packed;
+
+enum {
+	UNUSED,
+	A_8,	/* Access reg. starting at position 8 */
+	A_12,	/* Access reg. starting at position 12 */
+	A_24,	/* Access reg. starting at position 24 */
+	A_28,	/* Access reg. starting at position 28 */
+	B_16,	/* Base register starting at position 16 */
+	B_32,	/* Base register starting at position 32 */
+	C_8,	/* Control reg. starting at position 8 */
+	C_12,	/* Control reg. starting at position 12 */
+	D20_20, /* 20 bit displacement starting at 20 */
+	D_20,	/* Displacement starting at position 20 */
+	D_36,	/* Displacement starting at position 36 */
+	F_8,	/* FPR starting at position 8 */
+	F_12,	/* FPR starting at position 12 */
+	F_16,	/* FPR starting at position 16 */
+	F_24,	/* FPR starting at position 24 */
+	F_28,	/* FPR starting at position 28 */
+	F_32,	/* FPR starting at position 32 */
+	I8_8,	/* 8 bit signed value starting at 8 */
+	I8_32,	/* 8 bit signed value starting at 32 */
+	I16_16, /* 16 bit signed value starting at 16 */
+	I16_32, /* 16 bit signed value starting at 32 */
+	I32_16, /* 32 bit signed value starting at 16 */
+	J12_12, /* 12 bit PC relative offset at 12 */
+	J16_16, /* 16 bit PC relative offset at 16 */
+	J16_32, /* 16 bit PC relative offset at 32 */
+	J24_24, /* 24 bit PC relative offset at 24 */
+	J32_16, /* 32 bit PC relative offset at 16 */
+	L4_8,	/* 4 bit length starting at position 8 */
+	L4_12,	/* 4 bit length starting at position 12 */
+	L8_8,	/* 8 bit length starting at position 8 */
+	R_8,	/* GPR starting at position 8 */
+	R_12,	/* GPR starting at position 12 */
+	R_16,	/* GPR starting at position 16 */
+	R_24,	/* GPR starting at position 24 */
+	R_28,	/* GPR starting at position 28 */
+	U4_8,	/* 4 bit unsigned value starting at 8 */
+	U4_12,	/* 4 bit unsigned value starting at 12 */
+	U4_16,	/* 4 bit unsigned value starting at 16 */
+	U4_20,	/* 4 bit unsigned value starting at 20 */
+	U4_24,	/* 4 bit unsigned value starting at 24 */
+	U4_28,	/* 4 bit unsigned value starting at 28 */
+	U4_32,	/* 4 bit unsigned value starting at 32 */
+	U4_36,	/* 4 bit unsigned value starting at 36 */
+	U8_8,	/* 8 bit unsigned value starting at 8 */
+	U8_16,	/* 8 bit unsigned value starting at 16 */
+	U8_24,	/* 8 bit unsigned value starting at 24 */
+	U8_28,	/* 8 bit unsigned value starting at 28 */
+	U8_32,	/* 8 bit unsigned value starting at 32 */
+	U12_16, /* 12 bit unsigned value starting at 16 */
+	U16_16, /* 16 bit unsigned value starting at 16 */
+	U16_32, /* 16 bit unsigned value starting at 32 */
+	U32_16, /* 32 bit unsigned value starting at 16 */
+	VX_12,	/* Vector index register starting at position 12 */
+	V_8,	/* Vector reg. starting at position 8 */
+	V_12,	/* Vector reg. starting at position 12 */
+	V_16,	/* Vector reg. starting at position 16 */
+	V_32,	/* Vector reg. starting at position 32 */
+	X_12,	/* Index register starting at position 12 */
+};
+
+static const struct s390_operand operands[] = {
+	[UNUSED] = {  0,  0, 0 },
+	[A_8]	 = {  4,  8, OPERAND_AR },
+	[A_12]	 = {  4, 12, OPERAND_AR },
+	[A_24]	 = {  4, 24, OPERAND_AR },
+	[A_28]	 = {  4, 28, OPERAND_AR },
+	[B_16]	 = {  4, 16, OPERAND_BASE | OPERAND_GPR },
+	[B_32]	 = {  4, 32, OPERAND_BASE | OPERAND_GPR },
+	[C_8]	 = {  4,  8, OPERAND_CR },
+	[C_12]	 = {  4, 12, OPERAND_CR },
+	[D20_20] = { 20, 20, OPERAND_DISP | OPERAND_SIGNED },
+	[D_20]	 = { 12, 20, OPERAND_DISP },
+	[D_36]	 = { 12, 36, OPERAND_DISP },
+	[F_8]	 = {  4,  8, OPERAND_FPR },
+	[F_12]	 = {  4, 12, OPERAND_FPR },
+	[F_16]	 = {  4, 16, OPERAND_FPR },
+	[F_24]	 = {  4, 24, OPERAND_FPR },
+	[F_28]	 = {  4, 28, OPERAND_FPR },
+	[F_32]	 = {  4, 32, OPERAND_FPR },
+	[I8_8]	 = {  8,  8, OPERAND_SIGNED },
+	[I8_32]	 = {  8, 32, OPERAND_SIGNED },
+	[I16_16] = { 16, 16, OPERAND_SIGNED },
+	[I16_32] = { 16, 32, OPERAND_SIGNED },
+	[I32_16] = { 32, 16, OPERAND_SIGNED },
+	[J12_12] = { 12, 12, OPERAND_PCREL },
+	[J16_16] = { 16, 16, OPERAND_PCREL },
+	[J16_32] = { 16, 32, OPERAND_PCREL },
+	[J24_24] = { 24, 24, OPERAND_PCREL },
+	[J32_16] = { 32, 16, OPERAND_PCREL },
+	[L4_8]	 = {  4,  8, OPERAND_LENGTH },
+	[L4_12]	 = {  4, 12, OPERAND_LENGTH },
+	[L8_8]	 = {  8,  8, OPERAND_LENGTH },
+	[R_8]	 = {  4,  8, OPERAND_GPR },
+	[R_12]	 = {  4, 12, OPERAND_GPR },
+	[R_16]	 = {  4, 16, OPERAND_GPR },
+	[R_24]	 = {  4, 24, OPERAND_GPR },
+	[R_28]	 = {  4, 28, OPERAND_GPR },
+	[U4_8]	 = {  4,  8, 0 },
+	[U4_12]	 = {  4, 12, 0 },
+	[U4_16]	 = {  4, 16, 0 },
+	[U4_20]	 = {  4, 20, 0 },
+	[U4_24]	 = {  4, 24, 0 },
+	[U4_28]	 = {  4, 28, 0 },
+	[U4_32]	 = {  4, 32, 0 },
+	[U4_36]	 = {  4, 36, 0 },
+	[U8_8]	 = {  8,  8, 0 },
+	[U8_16]	 = {  8, 16, 0 },
+	[U8_24]	 = {  8, 24, 0 },
+	[U8_28]	 = {  8, 28, 0 },
+	[U8_32]	 = {  8, 32, 0 },
+	[U12_16] = { 12, 16, 0 },
+	[U16_16] = { 16, 16, 0 },
+	[U16_32] = { 16, 32, 0 },
+	[U32_16] = { 32, 16, 0 },
+	[VX_12]	 = {  4, 12, OPERAND_INDEX | OPERAND_VR },
+	[V_8]	 = {  4,  8, OPERAND_VR },
+	[V_12]	 = {  4, 12, OPERAND_VR },
+	[V_16]	 = {  4, 16, OPERAND_VR },
+	[V_32]	 = {  4, 32, OPERAND_VR },
+	[X_12]	 = {  4, 12, OPERAND_INDEX | OPERAND_GPR },
+};
+
+static const unsigned char formats[][6] = {
+	[INSTR_E]	     = { 0, 0, 0, 0, 0, 0 },
+	[INSTR_IE_UU]	     = { U4_24, U4_28, 0, 0, 0, 0 },
+	[INSTR_MII_UPP]	     = { U4_8, J12_12, J24_24 },
+	[INSTR_RIE_R0IU]     = { R_8, I16_16, U4_32, 0, 0, 0 },
+	[INSTR_RIE_R0UU]     = { R_8, U16_16, U4_32, 0, 0, 0 },
+	[INSTR_RIE_RRI0]     = { R_8, R_12, I16_16, 0, 0, 0 },
+	[INSTR_RIE_RRP]	     = { R_8, R_12, J16_16, 0, 0, 0 },
+	[INSTR_RIE_RRPU]     = { R_8, R_12, U4_32, J16_16, 0, 0 },
+	[INSTR_RIE_RRUUU]    = { R_8, R_12, U8_16, U8_24, U8_32, 0 },
+	[INSTR_RIE_RUI0]     = { R_8, I16_16, U4_12, 0, 0, 0 },
+	[INSTR_RIE_RUPI]     = { R_8, I8_32, U4_12, J16_16, 0, 0 },
+	[INSTR_RIE_RUPU]     = { R_8, U8_32, U4_12, J16_16, 0, 0 },
+	[INSTR_RIL_RI]	     = { R_8, I32_16, 0, 0, 0, 0 },
+	[INSTR_RIL_RP]	     = { R_8, J32_16, 0, 0, 0, 0 },
+	[INSTR_RIL_RU]	     = { R_8, U32_16, 0, 0, 0, 0 },
+	[INSTR_RIL_UP]	     = { U4_8, J32_16, 0, 0, 0, 0 },
+	[INSTR_RIS_RURDI]    = { R_8, I8_32, U4_12, D_20, B_16, 0 },
+	[INSTR_RIS_RURDU]    = { R_8, U8_32, U4_12, D_20, B_16, 0 },
+	[INSTR_RI_RI]	     = { R_8, I16_16, 0, 0, 0, 0 },
+	[INSTR_RI_RP]	     = { R_8, J16_16, 0, 0, 0, 0 },
+	[INSTR_RI_RU]	     = { R_8, U16_16, 0, 0, 0, 0 },
+	[INSTR_RI_UP]	     = { U4_8, J16_16, 0, 0, 0, 0 },
+	[INSTR_RRE_00]	     = { 0, 0, 0, 0, 0, 0 },
+	[INSTR_RRE_AA]	     = { A_24, A_28, 0, 0, 0, 0 },
+	[INSTR_RRE_AR]	     = { A_24, R_28, 0, 0, 0, 0 },
+	[INSTR_RRE_F0]	     = { F_24, 0, 0, 0, 0, 0 },
+	[INSTR_RRE_FF]	     = { F_24, F_28, 0, 0, 0, 0 },
+	[INSTR_RRE_FR]	     = { F_24, R_28, 0, 0, 0, 0 },
+	[INSTR_RRE_R0]	     = { R_24, 0, 0, 0, 0, 0 },
+	[INSTR_RRE_RA]	     = { R_24, A_28, 0, 0, 0, 0 },
+	[INSTR_RRE_RF]	     = { R_24, F_28, 0, 0, 0, 0 },
+	[INSTR_RRE_RR]	     = { R_24, R_28, 0, 0, 0, 0 },
+	[INSTR_RRF_0UFF]     = { F_24, F_28, U4_20, 0, 0, 0 },
+	[INSTR_RRF_0URF]     = { R_24, F_28, U4_20, 0, 0, 0 },
+	[INSTR_RRF_F0FF]     = { F_16, F_24, F_28, 0, 0, 0 },
+	[INSTR_RRF_F0FF2]    = { F_24, F_16, F_28, 0, 0, 0 },
+	[INSTR_RRF_F0FR]     = { F_24, F_16, R_28, 0, 0, 0 },
+	[INSTR_RRF_FFRU]     = { F_24, F_16, R_28, U4_20, 0, 0 },
+	[INSTR_RRF_FUFF]     = { F_24, F_16, F_28, U4_20, 0, 0 },
+	[INSTR_RRF_FUFF2]    = { F_24, F_28, F_16, U4_20, 0, 0 },
+	[INSTR_RRF_R0RR]     = { R_24, R_16, R_28, 0, 0, 0 },
+	[INSTR_RRF_R0RR2]    = { R_24, R_28, R_16, 0, 0, 0 },
+	[INSTR_RRF_RURR]     = { R_24, R_28, R_16, U4_20, 0, 0 },
+	[INSTR_RRF_RURR2]    = { R_24, R_16, R_28, U4_20, 0, 0 },
+	[INSTR_RRF_U0FF]     = { F_24, U4_16, F_28, 0, 0, 0 },
+	[INSTR_RRF_U0RF]     = { R_24, U4_16, F_28, 0, 0, 0 },
+	[INSTR_RRF_U0RR]     = { R_24, R_28, U4_16, 0, 0, 0 },
+	[INSTR_RRF_URR]	     = { R_24, R_28, U8_16, 0, 0, 0 },
+	[INSTR_RRF_UUFF]     = { F_24, U4_16, F_28, U4_20, 0, 0 },
+	[INSTR_RRF_UUFR]     = { F_24, U4_16, R_28, U4_20, 0, 0 },
+	[INSTR_RRF_UURF]     = { R_24, U4_16, F_28, U4_20, 0, 0 },
+	[INSTR_RRS_RRRDU]    = { R_8, R_12, U4_32, D_20, B_16 },
+	[INSTR_RR_FF]	     = { F_8, F_12, 0, 0, 0, 0 },
+	[INSTR_RR_R0]	     = { R_8,  0, 0, 0, 0, 0 },
+	[INSTR_RR_RR]	     = { R_8, R_12, 0, 0, 0, 0 },
+	[INSTR_RR_U0]	     = { U8_8,	0, 0, 0, 0, 0 },
+	[INSTR_RR_UR]	     = { U4_8, R_12, 0, 0, 0, 0 },
+	[INSTR_RSI_RRP]	     = { R_8, R_12, J16_16, 0, 0, 0 },
+	[INSTR_RSL_LRDFU]    = { F_32, D_20, L8_8, B_16, U4_36, 0 },
+	[INSTR_RSL_R0RD]     = { D_20, L4_8, B_16, 0, 0, 0 },
+	[INSTR_RSY_AARD]     = { A_8, A_12, D20_20, B_16, 0, 0 },
+	[INSTR_RSY_CCRD]     = { C_8, C_12, D20_20, B_16, 0, 0 },
+	[INSTR_RSY_RDRU]     = { R_8, D20_20, B_16, U4_12, 0, 0 },
+	[INSTR_RSY_RRRD]     = { R_8, R_12, D20_20, B_16, 0, 0 },
+	[INSTR_RSY_RURD]     = { R_8, U4_12, D20_20, B_16, 0, 0 },
+	[INSTR_RSY_RURD2]    = { R_8, D20_20, B_16, U4_12, 0, 0 },
+	[INSTR_RS_AARD]	     = { A_8, A_12, D_20, B_16, 0, 0 },
+	[INSTR_RS_CCRD]	     = { C_8, C_12, D_20, B_16, 0, 0 },
+	[INSTR_RS_R0RD]	     = { R_8, D_20, B_16, 0, 0, 0 },
+	[INSTR_RS_RRRD]	     = { R_8, R_12, D_20, B_16, 0, 0 },
+	[INSTR_RS_RURD]	     = { R_8, U4_12, D_20, B_16, 0, 0 },
+	[INSTR_RXE_FRRD]     = { F_8, D_20, X_12, B_16, 0, 0 },
+	[INSTR_RXE_RRRDU]    = { R_8, D_20, X_12, B_16, U4_32, 0 },
+	[INSTR_RXF_FRRDF]    = { F_32, F_8, D_20, X_12, B_16, 0 },
+	[INSTR_RXY_FRRD]     = { F_8, D20_20, X_12, B_16, 0, 0 },
+	[INSTR_RXY_RRRD]     = { R_8, D20_20, X_12, B_16, 0, 0 },
+	[INSTR_RXY_URRD]     = { U4_8, D20_20, X_12, B_16, 0, 0 },
+	[INSTR_RX_FRRD]	     = { F_8, D_20, X_12, B_16, 0, 0 },
+	[INSTR_RX_RRRD]	     = { R_8, D_20, X_12, B_16, 0, 0 },
+	[INSTR_RX_URRD]	     = { U4_8, D_20, X_12, B_16, 0, 0 },
+	[INSTR_SIL_RDI]	     = { D_20, B_16, I16_32, 0, 0, 0 },
+	[INSTR_SIL_RDU]	     = { D_20, B_16, U16_32, 0, 0, 0 },
+	[INSTR_SIY_IRD]	     = { D20_20, B_16, I8_8, 0, 0, 0 },
+	[INSTR_SIY_RD]	     = { D20_20, B_16, 0, 0, 0, 0 },
+	[INSTR_SIY_URD]	     = { D20_20, B_16, U8_8, 0, 0, 0 },
+	[INSTR_SI_RD]	     = { D_20, B_16, 0, 0, 0, 0 },
+	[INSTR_SI_URD]	     = { D_20, B_16, U8_8, 0, 0, 0 },
+	[INSTR_SMI_U0RDP]    = { U4_8, J16_32, D_20, B_16, 0, 0 },
+	[INSTR_SSE_RDRD]     = { D_20, B_16, D_36, B_32, 0, 0 },
+	[INSTR_SSF_RRDRD]    = { D_20, B_16, D_36, B_32, R_8, 0 },
+	[INSTR_SSF_RRDRD2]   = { R_8, D_20, B_16, D_36, B_32, 0 },
+	[INSTR_SS_L0RDRD]    = { D_20, L8_8, B_16, D_36, B_32, 0 },
+	[INSTR_SS_L2RDRD]    = { D_20, B_16, D_36, L8_8, B_32, 0 },
+	[INSTR_SS_LIRDRD]    = { D_20, L4_8, B_16, D_36, B_32, U4_12 },
+	[INSTR_SS_LLRDRD]    = { D_20, L4_8, B_16, D_36, L4_12, B_32 },
+	[INSTR_SS_RRRDRD]    = { D_20, R_8, B_16, D_36, B_32, R_12 },
+	[INSTR_SS_RRRDRD2]   = { R_8, D_20, B_16, R_12, D_36, B_32 },
+	[INSTR_SS_RRRDRD3]   = { R_8, R_12, D_20, B_16, D_36, B_32 },
+	[INSTR_S_00]	     = { 0, 0, 0, 0, 0, 0 },
+	[INSTR_S_RD]	     = { D_20, B_16, 0, 0, 0, 0 },
+	[INSTR_VRI_V0IU]     = { V_8, I16_16, U4_32, 0, 0, 0 },
+	[INSTR_VRI_V0U]	     = { V_8, U16_16, 0, 0, 0, 0 },
+	[INSTR_VRI_V0UU2]    = { V_8, U16_16, U4_32, 0, 0, 0 },
+	[INSTR_VRI_V0UUU]    = { V_8, U8_16, U8_24, U4_32, 0, 0 },
+	[INSTR_VRI_VR0UU]    = { V_8, R_12, U8_28, U4_24, 0, 0 },
+	[INSTR_VRI_VVUU]     = { V_8, V_12, U16_16, U4_32, 0, 0 },
+	[INSTR_VRI_VVUUU]    = { V_8, V_12, U12_16, U4_32, U4_28, 0 },
+	[INSTR_VRI_VVUUU2]   = { V_8, V_12, U8_28, U8_16, U4_24, 0 },
+	[INSTR_VRI_VVV0U]    = { V_8, V_12, V_16, U8_24, 0, 0 },
+	[INSTR_VRI_VVV0UU]   = { V_8, V_12, V_16, U8_24, U4_32, 0 },
+	[INSTR_VRI_VVV0UU2]  = { V_8, V_12, V_16, U8_28, U4_24, 0 },
+	[INSTR_VRR_0V]	     = { V_12, 0, 0, 0, 0, 0 },
+	[INSTR_VRR_0VV0U]    = { V_12, V_16, U4_24, 0, 0, 0 },
+	[INSTR_VRR_RV0UU]    = { R_8, V_12, U4_24, U4_28, 0, 0 },
+	[INSTR_VRR_VRR]	     = { V_8, R_12, R_16, 0, 0, 0 },
+	[INSTR_VRR_VV]	     = { V_8, V_12, 0, 0, 0, 0 },
+	[INSTR_VRR_VV0U]     = { V_8, V_12, U4_32, 0, 0, 0 },
+	[INSTR_VRR_VV0U0U]   = { V_8, V_12, U4_32, U4_24, 0, 0 },
+	[INSTR_VRR_VV0U2]    = { V_8, V_12, U4_24, 0, 0, 0 },
+	[INSTR_VRR_VV0UU2]   = { V_8, V_12, U4_32, U4_28, 0, 0 },
+	[INSTR_VRR_VV0UUU]   = { V_8, V_12, U4_32, U4_28, U4_24, 0 },
+	[INSTR_VRR_VVV]	     = { V_8, V_12, V_16, 0, 0, 0 },
+	[INSTR_VRR_VVV0U]    = { V_8, V_12, V_16, U4_32, 0, 0 },
+	[INSTR_VRR_VVV0U0]   = { V_8, V_12, V_16, U4_24, 0, 0 },
+	[INSTR_VRR_VVV0U0U]  = { V_8, V_12, V_16, U4_32, U4_24, 0 },
+	[INSTR_VRR_VVV0UU]   = { V_8, V_12, V_16, U4_32, U4_28, 0 },
+	[INSTR_VRR_VVV0UUU]  = { V_8, V_12, V_16, U4_32, U4_28, U4_24 },
+	[INSTR_VRR_VVV0V]    = { V_8, V_12, V_16, V_32, 0, 0 },
+	[INSTR_VRR_VVVU0UV]  = { V_8, V_12, V_16, V_32, U4_28, U4_20 },
+	[INSTR_VRR_VVVU0V]   = { V_8, V_12, V_16, V_32, U4_20, 0 },
+	[INSTR_VRR_VVVUU0V]  = { V_8, V_12, V_16, V_32, U4_20, U4_24 },
+	[INSTR_VRS_RRDV]     = { V_32, R_12, D_20, B_16, 0, 0 },
+	[INSTR_VRS_RVRDU]    = { R_8, V_12, D_20, B_16, U4_32, 0 },
+	[INSTR_VRS_VRRD]     = { V_8, R_12, D_20, B_16, 0, 0 },
+	[INSTR_VRS_VRRDU]    = { V_8, R_12, D_20, B_16, U4_32, 0 },
+	[INSTR_VRS_VVRDU]    = { V_8, V_12, D_20, B_16, U4_32, 0 },
+	[INSTR_VRV_VVXRDU]   = { V_8, D_20, VX_12, B_16, U4_32, 0 },
+	[INSTR_VRX_VRRDU]    = { V_8, D_20, X_12, B_16, U4_32, 0 },
+	[INSTR_VRX_VV]	     = { V_8, V_12, 0, 0, 0, 0 },
+	[INSTR_VSI_URDV]     = { V_32, D_20, B_16, U8_8, 0, 0 },
+};
+
+static char long_insn_name[][7] = LONG_INSN_INITIALIZER;
+static struct s390_insn opcode[] = OPCODE_TABLE_INITIALIZER;
+static struct s390_opcode_offset opcode_offset[] = OPCODE_OFFSET_INITIALIZER;
+
+/* Extracts an operand value from an instruction.  */
+static unsigned int extract_operand(unsigned char *code,
+				    const struct s390_operand *operand)
+{
+	unsigned char *cp;
+	unsigned int val;
+	int bits;
+
+	/* Extract fragments of the operand byte for byte.  */
+	cp = code + operand->shift / 8;
+	bits = (operand->shift & 7) + operand->bits;
+	val = 0;
+	do {
+		val <<= 8;
+		val |= (unsigned int) *cp++;
+		bits -= 8;
+	} while (bits > 0);
+	val >>= -bits;
+	val &= ((1U << (operand->bits - 1)) << 1) - 1;
+
+	/* Check for special long displacement case.  */
+	if (operand->bits == 20 && operand->shift == 20)
+		val = (val & 0xff) << 12 | (val & 0xfff00) >> 8;
+
+	/* Check for register extensions bits for vector registers. */
+	if (operand->flags & OPERAND_VR) {
+		if (operand->shift == 8)
+			val |= (code[4] & 8) << 1;
+		else if (operand->shift == 12)
+			val |= (code[4] & 4) << 2;
+		else if (operand->shift == 16)
+			val |= (code[4] & 2) << 3;
+		else if (operand->shift == 32)
+			val |= (code[4] & 1) << 4;
+	}
+
+	/* Sign extend value if the operand is signed or pc relative.  */
+	if ((operand->flags & (OPERAND_SIGNED | OPERAND_PCREL)) &&
+	    (val & (1U << (operand->bits - 1))))
+		val |= (-1U << (operand->bits - 1)) << 1;
+
+	/* Double value if the operand is pc relative.	*/
+	if (operand->flags & OPERAND_PCREL)
+		val <<= 1;
+
+	/* Length x in an instructions has real length x + 1.  */
+	if (operand->flags & OPERAND_LENGTH)
+		val++;
+	return val;
+}
+
+struct s390_insn *find_insn(unsigned char *code)
+{
+	struct s390_opcode_offset *entry;
+	struct s390_insn *insn;
+	unsigned char opfrag;
+	int i;
+
+	/* Search the opcode offset table to find an entry which
+	 * matches the beginning of the opcode. If there is no match
+	 * the last entry will be used, which is the default entry for
+	 * unknown instructions as well as 1-byte opcode instructions.
+	 */
+	for (i = 0; i < ARRAY_SIZE(opcode_offset); i++) {
+		entry = &opcode_offset[i];
+		if (entry->opcode == code[0])
+			break;
+	}
+
+	opfrag = *(code + entry->byte) & entry->mask;
+
+	insn = &opcode[entry->offset];
+	for (i = 0; i < entry->count; i++) {
+		if (insn->opfrag == opfrag)
+			return insn;
+		insn++;
+	}
+	return NULL;
+}
+
+static int print_insn(char *buffer, unsigned char *code, unsigned long addr)
+{
+	struct s390_insn *insn;
+	const unsigned char *ops;
+	const struct s390_operand *operand;
+	unsigned int value;
+	char separator;
+	char *ptr;
+	int i;
+
+	ptr = buffer;
+	insn = find_insn(code);
+	if (insn) {
+		if (insn->zero == 0)
+			ptr += sprintf(ptr, "%.7s\t",
+				       long_insn_name[insn->offset]);
+		else
+			ptr += sprintf(ptr, "%.5s\t", insn->name);
+		/* Extract the operands. */
+		separator = 0;
+		for (ops = formats[insn->format], i = 0;
+		     *ops != 0 && i < 6; ops++, i++) {
+			operand = operands + *ops;
+			value = extract_operand(code, operand);
+			if ((operand->flags & OPERAND_INDEX)  && value == 0)
+				continue;
+			if ((operand->flags & OPERAND_BASE) &&
+			    value == 0 && separator == '(') {
+				separator = ',';
+				continue;
+			}
+			if (separator)
+				ptr += sprintf(ptr, "%c", separator);
+			if (operand->flags & OPERAND_GPR)
+				ptr += sprintf(ptr, "%%r%i", value);
+			else if (operand->flags & OPERAND_FPR)
+				ptr += sprintf(ptr, "%%f%i", value);
+			else if (operand->flags & OPERAND_AR)
+				ptr += sprintf(ptr, "%%a%i", value);
+			else if (operand->flags & OPERAND_CR)
+				ptr += sprintf(ptr, "%%c%i", value);
+			else if (operand->flags & OPERAND_VR)
+				ptr += sprintf(ptr, "%%v%i", value);
+			else if (operand->flags & OPERAND_PCREL) {
+				void *pcrel = (void *)((int)value + addr);
+
+				ptr += sprintf(ptr, "%px", pcrel);
+			} else if (operand->flags & OPERAND_SIGNED)
+				ptr += sprintf(ptr, "%i", value);
+			else
+				ptr += sprintf(ptr, "%u", value);
+			if (operand->flags & OPERAND_DISP)
+				separator = '(';
+			else if (operand->flags & OPERAND_BASE) {
+				ptr += sprintf(ptr, ")");
+				separator = ',';
+			} else
+				separator = ',';
+		}
+	} else
+		ptr += sprintf(ptr, "unknown");
+	return (int) (ptr - buffer);
+}
+
+static int copy_from_regs(struct pt_regs *regs, void *dst, void *src, int len)
+{
+	if (user_mode(regs)) {
+		if (copy_from_user(dst, (char __user *)src, len))
+			return -EFAULT;
+	} else {
+		if (copy_from_kernel_nofault(dst, src, len))
+			return -EFAULT;
+	}
+	return 0;
+}
+
+void show_code(struct pt_regs *regs)
+{
+	char *mode = user_mode(regs) ? "User" : "Krnl";
+	unsigned char code[64];
+	char buffer[128], *ptr;
+	unsigned long addr;
+	int start, end, opsize, hops, i;
+
+	/* Get a snapshot of the 64 bytes surrounding the fault address. */
+	for (start = 32; start && regs->psw.addr >= 34 - start; start -= 2) {
+		addr = regs->psw.addr - 34 + start;
+		if (copy_from_regs(regs, code + start - 2, (void *)addr, 2))
+			break;
+	}
+	for (end = 32; end < 64; end += 2) {
+		addr = regs->psw.addr + end - 32;
+		if (copy_from_regs(regs, code + end, (void *)addr, 2))
+			break;
+	}
+	/* Code snapshot usable ? */
+	if ((regs->psw.addr & 1) || start >= end) {
+		printk("%s Code: Bad PSW.\n", mode);
+		return;
+	}
+	/* Find a starting point for the disassembly. */
+	while (start < 32) {
+		for (i = 0, hops = 0; start + i < 32 && hops < 3; hops++) {
+			if (!find_insn(code + start + i))
+				break;
+			i += insn_length(code[start + i]);
+		}
+		if (start + i == 32)
+			/* Looks good, sequence ends at PSW. */
+			break;
+		start += 2;
+	}
+	/* Decode the instructions. */
+	ptr = buffer;
+	ptr += sprintf(ptr, "%s Code:", mode);
+	hops = 0;
+	while (start < end && hops < 8) {
+		opsize = insn_length(code[start]);
+		if  (start + opsize == 32)
+			*ptr++ = '#';
+		else if (start == 32)
+			*ptr++ = '>';
+		else
+			*ptr++ = ' ';
+		addr = regs->psw.addr + start - 32;
+		ptr += sprintf(ptr, "%px: ", (void *)addr);
+		if (start + opsize >= end)
+			break;
+		for (i = 0; i < opsize; i++)
+			ptr += sprintf(ptr, "%02x", code[start + i]);
+		*ptr++ = '\t';
+		if (i < 6)
+			*ptr++ = '\t';
+		ptr += print_insn(ptr, code + start, addr);
+		start += opsize;
+		pr_cont("%s", buffer);
+		ptr = buffer;
+		ptr += sprintf(ptr, "\n          ");
+		hops++;
+	}
+	pr_cont("\n");
+}
+
+void print_fn_code(unsigned char *code, unsigned long len)
+{
+	char buffer[128], *ptr;
+	int opsize, i;
+
+	while (len) {
+		ptr = buffer;
+		opsize = insn_length(*code);
+		if (opsize > len)
+			break;
+		ptr += sprintf(ptr, "%px: ", code);
+		for (i = 0; i < opsize; i++)
+			ptr += sprintf(ptr, "%02x", code[i]);
+		*ptr++ = '\t';
+		if (i < 4)
+			*ptr++ = '\t';
+		ptr += print_insn(ptr, code, (unsigned long) code);
+		*ptr++ = '\n';
+		*ptr++ = 0;
+		printk("%s", buffer);
+		code += opsize;
+		len -= opsize;
+	}
+}
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
new file mode 100644
index 0000000000..d2012635b0
--- /dev/null
+++ b/arch/s390/kernel/dumpstack.c
@@ -0,0 +1,224 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Stack dumping functions
+ *
+ *  Copyright IBM Corp. 1999, 2013
+ */
+
+#include <linux/kallsyms.h>
+#include <linux/hardirq.h>
+#include <linux/kprobes.h>
+#include <linux/utsname.h>
+#include <linux/export.h>
+#include <linux/kdebug.h>
+#include <linux/ptrace.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
+#include <asm/processor.h>
+#include <asm/debug.h>
+#include <asm/dis.h>
+#include <asm/ipl.h>
+#include <asm/unwind.h>
+
+const char *stack_type_name(enum stack_type type)
+{
+	switch (type) {
+	case STACK_TYPE_TASK:
+		return "task";
+	case STACK_TYPE_IRQ:
+		return "irq";
+	case STACK_TYPE_NODAT:
+		return "nodat";
+	case STACK_TYPE_RESTART:
+		return "restart";
+	default:
+		return "unknown";
+	}
+}
+EXPORT_SYMBOL_GPL(stack_type_name);
+
+static inline bool in_stack(unsigned long sp, struct stack_info *info,
+			    enum stack_type type, unsigned long stack)
+{
+	if (sp < stack || sp >= stack + THREAD_SIZE)
+		return false;
+	info->type = type;
+	info->begin = stack;
+	info->end = stack + THREAD_SIZE;
+	return true;
+}
+
+static bool in_task_stack(unsigned long sp, struct task_struct *task,
+			  struct stack_info *info)
+{
+	unsigned long stack = (unsigned long)task_stack_page(task);
+
+	return in_stack(sp, info, STACK_TYPE_TASK, stack);
+}
+
+static bool in_irq_stack(unsigned long sp, struct stack_info *info)
+{
+	unsigned long stack = S390_lowcore.async_stack - STACK_INIT_OFFSET;
+
+	return in_stack(sp, info, STACK_TYPE_IRQ, stack);
+}
+
+static bool in_nodat_stack(unsigned long sp, struct stack_info *info)
+{
+	unsigned long stack = S390_lowcore.nodat_stack - STACK_INIT_OFFSET;
+
+	return in_stack(sp, info, STACK_TYPE_NODAT, stack);
+}
+
+static bool in_mcck_stack(unsigned long sp, struct stack_info *info)
+{
+	unsigned long stack = S390_lowcore.mcck_stack - STACK_INIT_OFFSET;
+
+	return in_stack(sp, info, STACK_TYPE_MCCK, stack);
+}
+
+static bool in_restart_stack(unsigned long sp, struct stack_info *info)
+{
+	unsigned long stack = S390_lowcore.restart_stack - STACK_INIT_OFFSET;
+
+	return in_stack(sp, info, STACK_TYPE_RESTART, stack);
+}
+
+int get_stack_info(unsigned long sp, struct task_struct *task,
+		   struct stack_info *info, unsigned long *visit_mask)
+{
+	if (!sp)
+		goto unknown;
+
+	/* Sanity check: ABI requires SP to be aligned 8 bytes. */
+	if (sp & 0x7)
+		goto unknown;
+
+	/* Check per-task stack */
+	if (in_task_stack(sp, task, info))
+		goto recursion_check;
+
+	if (task != current)
+		goto unknown;
+
+	/* Check per-cpu stacks */
+	if (!in_irq_stack(sp, info) &&
+	    !in_nodat_stack(sp, info) &&
+	    !in_restart_stack(sp, info) &&
+	    !in_mcck_stack(sp, info))
+		goto unknown;
+
+recursion_check:
+	/*
+	 * Make sure we don't iterate through any given stack more than once.
+	 * If it comes up a second time then there's something wrong going on:
+	 * just break out and report an unknown stack type.
+	 */
+	if (*visit_mask & (1UL << info->type))
+		goto unknown;
+	*visit_mask |= 1UL << info->type;
+	return 0;
+unknown:
+	info->type = STACK_TYPE_UNKNOWN;
+	return -EINVAL;
+}
+
+void show_stack(struct task_struct *task, unsigned long *stack,
+		       const char *loglvl)
+{
+	struct unwind_state state;
+
+	printk("%sCall Trace:\n", loglvl);
+	unwind_for_each_frame(&state, task, NULL, (unsigned long) stack)
+		printk(state.reliable ? "%s [<%016lx>] %pSR \n" :
+					"%s([<%016lx>] %pSR)\n",
+		       loglvl, state.ip, (void *) state.ip);
+	debug_show_held_locks(task ? : current);
+}
+
+static void show_last_breaking_event(struct pt_regs *regs)
+{
+	printk("Last Breaking-Event-Address:\n");
+	printk(" [<%016lx>] ", regs->last_break);
+	if (user_mode(regs)) {
+		print_vma_addr(KERN_CONT, regs->last_break);
+		pr_cont("\n");
+	} else {
+		pr_cont("%pSR\n", (void *)regs->last_break);
+	}
+}
+
+void show_registers(struct pt_regs *regs)
+{
+	struct psw_bits *psw = &psw_bits(regs->psw);
+	char *mode;
+
+	mode = user_mode(regs) ? "User" : "Krnl";
+	printk("%s PSW : %px %px", mode, (void *)regs->psw.mask, (void *)regs->psw.addr);
+	if (!user_mode(regs))
+		pr_cont(" (%pSR)", (void *)regs->psw.addr);
+	pr_cont("\n");
+	printk("           R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x "
+	       "P:%x AS:%x CC:%x PM:%x", psw->per, psw->dat, psw->io, psw->ext,
+	       psw->key, psw->mcheck, psw->wait, psw->pstate, psw->as, psw->cc, psw->pm);
+	pr_cont(" RI:%x EA:%x\n", psw->ri, psw->eaba);
+	printk("%s GPRS: %016lx %016lx %016lx %016lx\n", mode,
+	       regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]);
+	printk("           %016lx %016lx %016lx %016lx\n",
+	       regs->gprs[4], regs->gprs[5], regs->gprs[6], regs->gprs[7]);
+	printk("           %016lx %016lx %016lx %016lx\n",
+	       regs->gprs[8], regs->gprs[9], regs->gprs[10], regs->gprs[11]);
+	printk("           %016lx %016lx %016lx %016lx\n",
+	       regs->gprs[12], regs->gprs[13], regs->gprs[14], regs->gprs[15]);
+	show_code(regs);
+}
+
+void show_regs(struct pt_regs *regs)
+{
+	show_regs_print_info(KERN_DEFAULT);
+	show_registers(regs);
+	/* Show stack backtrace if pt_regs is from kernel mode */
+	if (!user_mode(regs))
+		show_stack(NULL, (unsigned long *) regs->gprs[15], KERN_DEFAULT);
+	show_last_breaking_event(regs);
+}
+
+static DEFINE_SPINLOCK(die_lock);
+
+void __noreturn die(struct pt_regs *regs, const char *str)
+{
+	static int die_counter;
+
+	oops_enter();
+	lgr_info_log();
+	debug_stop_all();
+	console_verbose();
+	spin_lock_irq(&die_lock);
+	bust_spinlocks(1);
+	printk("%s: %04x ilc:%d [#%d] ", str, regs->int_code & 0xffff,
+	       regs->int_code >> 17, ++die_counter);
+#ifdef CONFIG_PREEMPT
+	pr_cont("PREEMPT ");
+#elif defined(CONFIG_PREEMPT_RT)
+	pr_cont("PREEMPT_RT ");
+#endif
+	pr_cont("SMP ");
+	if (debug_pagealloc_enabled())
+		pr_cont("DEBUG_PAGEALLOC");
+	pr_cont("\n");
+	notify_die(DIE_OOPS, str, regs, 0, regs->int_code & 0xffff, SIGSEGV);
+	print_modules();
+	show_regs(regs);
+	bust_spinlocks(0);
+	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
+	spin_unlock_irq(&die_lock);
+	if (in_interrupt())
+		panic("Fatal exception in interrupt");
+	if (panic_on_oops)
+		panic("Fatal exception: panic_on_oops");
+	oops_exit();
+	make_task_dead(SIGSEGV);
+}
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
new file mode 100644
index 0000000000..442ce0489e
--- /dev/null
+++ b/arch/s390/kernel/early.c
@@ -0,0 +1,320 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    Copyright IBM Corp. 2007, 2009
+ *    Author(s): Hongjie Yang <hongjie@us.ibm.com>,
+ */
+
+#define KMSG_COMPONENT "setup"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/compiler.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/lockdep.h>
+#include <linux/extable.h>
+#include <linux/pfn.h>
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+#include <asm/asm-extable.h>
+#include <linux/memblock.h>
+#include <asm/diag.h>
+#include <asm/ebcdic.h>
+#include <asm/ipl.h>
+#include <asm/lowcore.h>
+#include <asm/processor.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/sysinfo.h>
+#include <asm/cpcmd.h>
+#include <asm/sclp.h>
+#include <asm/facility.h>
+#include <asm/boot_data.h>
+#include <asm/switch_to.h>
+#include "entry.h"
+
+#define decompressor_handled_param(param)			\
+static int __init ignore_decompressor_param_##param(char *s)	\
+{								\
+	return 0;						\
+}								\
+early_param(#param, ignore_decompressor_param_##param)
+
+decompressor_handled_param(mem);
+decompressor_handled_param(vmalloc);
+decompressor_handled_param(dfltcc);
+decompressor_handled_param(facilities);
+decompressor_handled_param(nokaslr);
+#if IS_ENABLED(CONFIG_KVM)
+decompressor_handled_param(prot_virt);
+#endif
+
+static void __init kasan_early_init(void)
+{
+#ifdef CONFIG_KASAN
+	init_task.kasan_depth = 0;
+	sclp_early_printk("KernelAddressSanitizer initialized\n");
+#endif
+}
+
+static void __init reset_tod_clock(void)
+{
+	union tod_clock clk;
+
+	if (store_tod_clock_ext_cc(&clk) == 0)
+		return;
+	/* TOD clock not running. Set the clock to Unix Epoch. */
+	if (set_tod_clock(TOD_UNIX_EPOCH) || store_tod_clock_ext_cc(&clk))
+		disabled_wait();
+
+	memset(&tod_clock_base, 0, sizeof(tod_clock_base));
+	tod_clock_base.tod = TOD_UNIX_EPOCH;
+	S390_lowcore.last_update_clock = TOD_UNIX_EPOCH;
+}
+
+/*
+ * Initialize storage key for kernel pages
+ */
+static noinline __init void init_kernel_storage_key(void)
+{
+#if PAGE_DEFAULT_KEY
+	unsigned long end_pfn, init_pfn;
+
+	end_pfn = PFN_UP(__pa(_end));
+
+	for (init_pfn = 0 ; init_pfn < end_pfn; init_pfn++)
+		page_set_storage_key(init_pfn << PAGE_SHIFT,
+				     PAGE_DEFAULT_KEY, 0);
+#endif
+}
+
+static __initdata char sysinfo_page[PAGE_SIZE] __aligned(PAGE_SIZE);
+
+static noinline __init void detect_machine_type(void)
+{
+	struct sysinfo_3_2_2 *vmms = (struct sysinfo_3_2_2 *)&sysinfo_page;
+
+	/* Check current-configuration-level */
+	if (stsi(NULL, 0, 0, 0) <= 2) {
+		S390_lowcore.machine_flags |= MACHINE_FLAG_LPAR;
+		return;
+	}
+	/* Get virtual-machine cpu information. */
+	if (stsi(vmms, 3, 2, 2) || !vmms->count)
+		return;
+
+	/* Detect known hypervisors */
+	if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3))
+		S390_lowcore.machine_flags |= MACHINE_FLAG_KVM;
+	else if (!memcmp(vmms->vm[0].cpi, "\xa9\x61\xe5\xd4", 4))
+		S390_lowcore.machine_flags |= MACHINE_FLAG_VM;
+}
+
+/* Remove leading, trailing and double whitespace. */
+static inline void strim_all(char *str)
+{
+	char *s;
+
+	s = strim(str);
+	if (s != str)
+		memmove(str, s, strlen(s));
+	while (*str) {
+		if (!isspace(*str++))
+			continue;
+		if (isspace(*str)) {
+			s = skip_spaces(str);
+			memmove(str, s, strlen(s) + 1);
+		}
+	}
+}
+
+static noinline __init void setup_arch_string(void)
+{
+	struct sysinfo_1_1_1 *mach = (struct sysinfo_1_1_1 *)&sysinfo_page;
+	struct sysinfo_3_2_2 *vm = (struct sysinfo_3_2_2 *)&sysinfo_page;
+	char mstr[80], hvstr[17];
+
+	if (stsi(mach, 1, 1, 1))
+		return;
+	EBCASC(mach->manufacturer, sizeof(mach->manufacturer));
+	EBCASC(mach->type, sizeof(mach->type));
+	EBCASC(mach->model, sizeof(mach->model));
+	EBCASC(mach->model_capacity, sizeof(mach->model_capacity));
+	sprintf(mstr, "%-16.16s %-4.4s %-16.16s %-16.16s",
+		mach->manufacturer, mach->type,
+		mach->model, mach->model_capacity);
+	strim_all(mstr);
+	if (stsi(vm, 3, 2, 2) == 0 && vm->count) {
+		EBCASC(vm->vm[0].cpi, sizeof(vm->vm[0].cpi));
+		sprintf(hvstr, "%-16.16s", vm->vm[0].cpi);
+		strim_all(hvstr);
+	} else {
+		sprintf(hvstr, "%s",
+			MACHINE_IS_LPAR ? "LPAR" :
+			MACHINE_IS_VM ? "z/VM" :
+			MACHINE_IS_KVM ? "KVM" : "unknown");
+	}
+	dump_stack_set_arch_desc("%s (%s)", mstr, hvstr);
+}
+
+static __init void setup_topology(void)
+{
+	int max_mnest;
+
+	if (!test_facility(11))
+		return;
+	S390_lowcore.machine_flags |= MACHINE_FLAG_TOPOLOGY;
+	for (max_mnest = 6; max_mnest > 1; max_mnest--) {
+		if (stsi(&sysinfo_page, 15, 1, max_mnest) == 0)
+			break;
+	}
+	topology_max_mnest = max_mnest;
+}
+
+void __do_early_pgm_check(struct pt_regs *regs)
+{
+	if (!fixup_exception(regs))
+		disabled_wait();
+}
+
+static noinline __init void setup_lowcore_early(void)
+{
+	psw_t psw;
+
+	psw.addr = (unsigned long)early_pgm_check_handler;
+	psw.mask = PSW_KERNEL_BITS;
+	S390_lowcore.program_new_psw = psw;
+	S390_lowcore.preempt_count = INIT_PREEMPT_COUNT;
+}
+
+static noinline __init void setup_facility_list(void)
+{
+	memcpy(alt_stfle_fac_list, stfle_fac_list, sizeof(alt_stfle_fac_list));
+	if (!IS_ENABLED(CONFIG_KERNEL_NOBP))
+		__clear_facility(82, alt_stfle_fac_list);
+}
+
+static __init void detect_diag9c(void)
+{
+	unsigned int cpu_address;
+	int rc;
+
+	cpu_address = stap();
+	diag_stat_inc(DIAG_STAT_X09C);
+	asm volatile(
+		"	diag	%2,0,0x9c\n"
+		"0:	la	%0,0\n"
+		"1:\n"
+		EX_TABLE(0b,1b)
+		: "=d" (rc) : "0" (-EOPNOTSUPP), "d" (cpu_address) : "cc");
+	if (!rc)
+		S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG9C;
+}
+
+static __init void detect_machine_facilities(void)
+{
+	if (test_facility(8)) {
+		S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT1;
+		__ctl_set_bit(0, 23);
+	}
+	if (test_facility(78))
+		S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT2;
+	if (test_facility(3))
+		S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE;
+	if (test_facility(50) && test_facility(73)) {
+		S390_lowcore.machine_flags |= MACHINE_FLAG_TE;
+		__ctl_set_bit(0, 55);
+	}
+	if (test_facility(51))
+		S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC;
+	if (test_facility(129)) {
+		S390_lowcore.machine_flags |= MACHINE_FLAG_VX;
+		__ctl_set_bit(0, 17);
+	}
+	if (test_facility(130))
+		S390_lowcore.machine_flags |= MACHINE_FLAG_NX;
+	if (test_facility(133))
+		S390_lowcore.machine_flags |= MACHINE_FLAG_GS;
+	if (test_facility(139) && (tod_clock_base.tod >> 63)) {
+		/* Enabled signed clock comparator comparisons */
+		S390_lowcore.machine_flags |= MACHINE_FLAG_SCC;
+		clock_comparator_max = -1ULL >> 1;
+		__ctl_set_bit(0, 53);
+	}
+	if (IS_ENABLED(CONFIG_PCI) && test_facility(153)) {
+		S390_lowcore.machine_flags |= MACHINE_FLAG_PCI_MIO;
+		/* the control bit is set during PCI initialization */
+	}
+	if (test_facility(194))
+		S390_lowcore.machine_flags |= MACHINE_FLAG_RDP;
+}
+
+static inline void save_vector_registers(void)
+{
+#ifdef CONFIG_CRASH_DUMP
+	if (test_facility(129))
+		save_vx_regs(boot_cpu_vector_save_area);
+#endif
+}
+
+static inline void setup_control_registers(void)
+{
+	unsigned long reg;
+
+	__ctl_store(reg, 0, 0);
+	reg |= CR0_LOW_ADDRESS_PROTECTION;
+	reg |= CR0_EMERGENCY_SIGNAL_SUBMASK;
+	reg |= CR0_EXTERNAL_CALL_SUBMASK;
+	__ctl_load(reg, 0, 0);
+}
+
+static inline void setup_access_registers(void)
+{
+	unsigned int acrs[NUM_ACRS] = { 0 };
+
+	restore_access_regs(acrs);
+}
+
+static int __init disable_vector_extension(char *str)
+{
+	S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX;
+	__ctl_clear_bit(0, 17);
+	return 0;
+}
+early_param("novx", disable_vector_extension);
+
+char __bootdata(early_command_line)[COMMAND_LINE_SIZE];
+static void __init setup_boot_command_line(void)
+{
+	/* copy arch command line */
+	strscpy(boot_command_line, early_command_line, COMMAND_LINE_SIZE);
+}
+
+static void __init sort_amode31_extable(void)
+{
+	sort_extable(__start_amode31_ex_table, __stop_amode31_ex_table);
+}
+
+void __init startup_init(void)
+{
+	kasan_early_init();
+	reset_tod_clock();
+	time_early_init();
+	init_kernel_storage_key();
+	lockdep_off();
+	sort_amode31_extable();
+	setup_lowcore_early();
+	setup_facility_list();
+	detect_machine_type();
+	setup_arch_string();
+	setup_boot_command_line();
+	detect_diag9c();
+	detect_machine_facilities();
+	save_vector_registers();
+	setup_topology();
+	sclp_early_detect();
+	setup_control_registers();
+	setup_access_registers();
+	lockdep_on();
+}
diff --git a/arch/s390/kernel/early_printk.c b/arch/s390/kernel/early_printk.c
new file mode 100644
index 0000000000..d9d53f4400
--- /dev/null
+++ b/arch/s390/kernel/early_printk.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    Copyright IBM Corp. 2017
+ */
+
+#include <linux/console.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <asm/sclp.h>
+
+static void sclp_early_write(struct console *con, const char *s, unsigned int len)
+{
+	__sclp_early_printk(s, len);
+}
+
+static struct console sclp_early_console = {
+	.name  = "earlysclp",
+	.write = sclp_early_write,
+	.flags = CON_PRINTBUFFER | CON_BOOT,
+	.index = -1,
+};
+
+static int __init setup_early_printk(char *buf)
+{
+	if (early_console)
+		return 0;
+	/* Accept only "earlyprintk" and "earlyprintk=sclp" */
+	if (buf && !str_has_prefix(buf, "sclp"))
+		return 0;
+	if (!sclp.has_linemode && !sclp.has_vt220)
+		return 0;
+	early_console = &sclp_early_console;
+	register_console(early_console);
+	return 0;
+}
+early_param("earlyprintk", setup_early_printk);
diff --git a/arch/s390/kernel/earlypgm.S b/arch/s390/kernel/earlypgm.S
new file mode 100644
index 0000000000..c634871f0d
--- /dev/null
+++ b/arch/s390/kernel/earlypgm.S
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *    Copyright IBM Corp. 2006, 2007
+ *    Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+
+SYM_CODE_START(early_pgm_check_handler)
+	stmg	%r8,%r15,__LC_SAVE_AREA_SYNC
+	aghi	%r15,-(STACK_FRAME_OVERHEAD+__PT_SIZE)
+	la	%r11,STACK_FRAME_OVERHEAD(%r15)
+	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+	stmg	%r0,%r7,__PT_R0(%r11)
+	mvc	__PT_PSW(16,%r11),__LC_PGM_OLD_PSW
+	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
+	lgr	%r2,%r11
+	brasl	%r14,__do_early_pgm_check
+	mvc	__LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+	lmg	%r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
+	lpswe	__LC_RETURN_PSW
+SYM_CODE_END(early_pgm_check_handler)
diff --git a/arch/s390/kernel/ebcdic.c b/arch/s390/kernel/ebcdic.c
new file mode 100644
index 0000000000..0e51fa5372
--- /dev/null
+++ b/arch/s390/kernel/ebcdic.c
@@ -0,0 +1,401 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    EBCDIC -> ASCII, ASCII -> EBCDIC,
+ *    upper to lower case (EBCDIC) conversion tables.
+ *
+ *  S390 version
+ *    Copyright IBM Corp. 1999
+ *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *               Martin Peschke <peschke@fh-brandenburg.de>
+ */
+
+#include <linux/types.h>
+#include <linux/export.h>
+#include <asm/ebcdic.h>
+
+/*
+ * ASCII (IBM PC 437)  -> EBCDIC 037
+ */
+__u8 _ascebc[256] =
+{
+ /*00 NUL   SOH   STX   ETX   EOT   ENQ   ACK   BEL */
+     0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F,
+ /*08  BS    HT    LF    VT    FF    CR    SO    SI */
+ /*              ->NL                               */
+     0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ /*10 DLE   DC1   DC2   DC3   DC4   NAK   SYN   ETB */
+     0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26,
+ /*18 CAN    EM   SUB   ESC    FS    GS    RS    US */
+ /*                               ->IGS ->IRS ->IUS */
+     0x18, 0x19, 0x3F, 0x27, 0x22, 0x1D, 0x1E, 0x1F,
+ /*20  SP     !     "     #     $     %     &     ' */
+     0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D,
+ /*28   (     )     *     +     ,     -    .      / */
+     0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
+ /*30   0     1     2     3     4     5     6     7 */
+     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
+ /*38   8     9     :     ;     <     =     >     ? */
+     0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
+ /*40   @     A     B     C     D     E     F     G */
+     0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
+ /*48   H     I     J     K     L     M     N     O */
+     0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
+ /*50   P     Q     R     S     T     U     V     W */
+     0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6,
+ /*58   X     Y     Z     [     \     ]     ^     _ */
+     0xE7, 0xE8, 0xE9, 0xBA, 0xE0, 0xBB, 0xB0, 0x6D,
+ /*60   `     a     b     c     d     e     f     g */
+     0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ /*68   h     i     j     k     l     m     n     o */
+     0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
+ /*70   p     q     r     s     t     u     v     w */
+     0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6,
+ /*78   x     y     z     {     |     }     ~    DL */
+     0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07,
+ /*80*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*88*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*90*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*98*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*A0*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*A8*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*B0*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*B8*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*C0*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*C8*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*D0*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*D8*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*E0        sz						*/
+     0x3F, 0x59, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*E8*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*F0*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*F8*/
+     0x90, 0x3F, 0x3F, 0x3F, 0x3F, 0xEA, 0x3F, 0xFF
+};
+
+/*
+ * EBCDIC 037 -> ASCII (IBM PC 437)
+ */
+__u8 _ebcasc[256] =
+{
+ /* 0x00   NUL   SOH   STX   ETX  *SEL    HT  *RNL   DEL */
+          0x00, 0x01, 0x02, 0x03, 0x07, 0x09, 0x07, 0x7F,
+ /* 0x08   -GE  -SPS  -RPT    VT    FF    CR    SO    SI */
+          0x07, 0x07, 0x07, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ /* 0x10   DLE   DC1   DC2   DC3  -RES   -NL    BS  -POC
+                                  -ENP  ->LF             */
+          0x10, 0x11, 0x12, 0x13, 0x07, 0x0A, 0x08, 0x07,
+ /* 0x18   CAN    EM  -UBS  -CU1  -IFS  -IGS  -IRS  -ITB
+                                                    -IUS */
+          0x18, 0x19, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ /* 0x20   -DS  -SOS    FS  -WUS  -BYP    LF   ETB   ESC
+                                  -INP                   */
+          0x07, 0x07, 0x1C, 0x07, 0x07, 0x0A, 0x17, 0x1B,
+ /* 0x28   -SA  -SFE   -SM  -CSP  -MFA   ENQ   ACK   BEL
+                       -SW                               */ 
+          0x07, 0x07, 0x07, 0x07, 0x07, 0x05, 0x06, 0x07,
+ /* 0x30  ----  ----   SYN   -IR   -PP  -TRN  -NBS   EOT */
+          0x07, 0x07, 0x16, 0x07, 0x07, 0x07, 0x07, 0x04,
+ /* 0x38  -SBS   -IT  -RFF  -CU3   DC4   NAK  ----   SUB */
+          0x07, 0x07, 0x07, 0x07, 0x14, 0x15, 0x07, 0x1A,
+ /* 0x40    SP   RSP           ä              ----       */
+          0x20, 0xFF, 0x83, 0x84, 0x85, 0xA0, 0x07, 0x86,
+ /* 0x48                       .     <     (     +     | */
+          0x87, 0xA4, 0x9B, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
+ /* 0x50     &                                      ---- */
+          0x26, 0x82, 0x88, 0x89, 0x8A, 0xA1, 0x8C, 0x07,
+ /* 0x58           ß     !     $     *     )     ;       */
+          0x8D, 0xE1, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0xAA,
+ /* 0x60     -     /  ----     Ä  ----  ----  ----       */
+          0x2D, 0x2F, 0x07, 0x8E, 0x07, 0x07, 0x07, 0x8F,
+ /* 0x68              ----     ,     %     _     >     ? */ 
+          0x80, 0xA5, 0x07, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
+ /* 0x70  ----        ----  ----  ----  ----  ----  ---- */
+          0x07, 0x90, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ /* 0x78     *     `     :     #     @     '     =     " */
+          0x70, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
+ /* 0x80     *     a     b     c     d     e     f     g */
+          0x07, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ /* 0x88     h     i              ----  ----  ----       */
+          0x68, 0x69, 0xAE, 0xAF, 0x07, 0x07, 0x07, 0xF1,
+ /* 0x90     °     j     k     l     m     n     o     p */
+          0xF8, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70,
+ /* 0x98     q     r                    ----        ---- */
+          0x71, 0x72, 0xA6, 0xA7, 0x91, 0x07, 0x92, 0x07,
+ /* 0xA0           ~     s     t     u     v     w     x */
+          0xE6, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
+ /* 0xA8     y     z              ----  ----  ----  ---- */
+          0x79, 0x7A, 0xAD, 0xAB, 0x07, 0x07, 0x07, 0x07,
+ /* 0xB0     ^                    ----     §  ----       */
+          0x5E, 0x9C, 0x9D, 0xFA, 0x07, 0x07, 0x07, 0xAC,
+ /* 0xB8        ----     [     ]  ----  ----  ----  ---- */
+          0xAB, 0x07, 0x5B, 0x5D, 0x07, 0x07, 0x07, 0x07,
+ /* 0xC0     {     A     B     C     D     E     F     G */
+          0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+ /* 0xC8     H     I  ----           ö              ---- */
+          0x48, 0x49, 0x07, 0x93, 0x94, 0x95, 0xA2, 0x07,
+ /* 0xD0     }     J     K     L     M     N     O     P */
+          0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50,
+ /* 0xD8     Q     R  ----           ü                   */
+          0x51, 0x52, 0x07, 0x96, 0x81, 0x97, 0xA3, 0x98,
+ /* 0xE0     \           S     T     U     V     W     X */
+          0x5C, 0xF6, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+ /* 0xE8     Y     Z        ----     Ö  ----  ----  ---- */
+          0x59, 0x5A, 0xFD, 0x07, 0x99, 0x07, 0x07, 0x07,
+ /* 0xF0     0     1     2     3     4     5     6     7 */
+          0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+ /* 0xF8     8     9  ----  ----     Ü  ----  ----  ---- */
+          0x38, 0x39, 0x07, 0x07, 0x9A, 0x07, 0x07, 0x07
+};
+
+
+/*
+ * ASCII (IBM PC 437)  -> EBCDIC 500
+ */
+__u8 _ascebc_500[256] =
+{
+ /*00 NUL   SOH   STX   ETX   EOT   ENQ   ACK   BEL */
+     0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F,
+ /*08  BS    HT    LF    VT    FF    CR    SO    SI */
+ /*              ->NL                               */
+     0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ /*10 DLE   DC1   DC2   DC3   DC4   NAK   SYN   ETB */
+     0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26,
+ /*18 CAN    EM   SUB   ESC    FS    GS    RS    US */
+ /*                               ->IGS ->IRS ->IUS */
+     0x18, 0x19, 0x3F, 0x27, 0x22, 0x1D, 0x1E, 0x1F,
+ /*20  SP     !     "     #     $     %     &     ' */
+     0x40, 0x4F, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D,
+ /*28   (     )     *     +     ,     -    .      / */
+     0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
+ /*30   0     1     2     3     4     5     6     7 */
+     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
+ /*38   8     9     :     ;     <     =     >     ? */
+     0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
+ /*40   @     A     B     C     D     E     F     G */
+     0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
+ /*48   H     I     J     K     L     M     N     O */
+     0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
+ /*50   P     Q     R     S     T     U     V     W */
+     0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6,
+ /*58   X     Y     Z     [     \     ]     ^     _ */
+     0xE7, 0xE8, 0xE9, 0x4A, 0xE0, 0x5A, 0x5F, 0x6D,
+ /*60   `     a     b     c     d     e     f     g */
+     0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ /*68   h     i     j     k     l     m     n     o */
+     0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
+ /*70   p     q     r     s     t     u     v     w */
+     0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6,
+ /*78   x     y     z     {     |     }     ~    DL */
+     0xA7, 0xA8, 0xA9, 0xC0, 0xBB, 0xD0, 0xA1, 0x07,
+ /*80*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*88*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*90*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*98*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*A0*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*A8*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*B0*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*B8*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*C0*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*C8*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*D0*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*D8*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*E0        sz						*/
+     0x3F, 0x59, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*E8*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*F0*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*F8*/
+     0x90, 0x3F, 0x3F, 0x3F, 0x3F, 0xEA, 0x3F, 0xFF
+};
+
+/*
+ * EBCDIC 500 -> ASCII (IBM PC 437)
+ */
+__u8 _ebcasc_500[256] =
+{
+ /* 0x00   NUL   SOH   STX   ETX  *SEL    HT  *RNL   DEL */
+          0x00, 0x01, 0x02, 0x03, 0x07, 0x09, 0x07, 0x7F,
+ /* 0x08   -GE  -SPS  -RPT    VT    FF    CR    SO    SI */
+          0x07, 0x07, 0x07, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ /* 0x10   DLE   DC1   DC2   DC3  -RES   -NL    BS  -POC
+                                  -ENP  ->LF             */
+          0x10, 0x11, 0x12, 0x13, 0x07, 0x0A, 0x08, 0x07,
+ /* 0x18   CAN    EM  -UBS  -CU1  -IFS  -IGS  -IRS  -ITB
+                                                    -IUS */
+          0x18, 0x19, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ /* 0x20   -DS  -SOS    FS  -WUS  -BYP    LF   ETB   ESC
+                                  -INP                   */
+          0x07, 0x07, 0x1C, 0x07, 0x07, 0x0A, 0x17, 0x1B,
+ /* 0x28   -SA  -SFE   -SM  -CSP  -MFA   ENQ   ACK   BEL
+                       -SW                               */ 
+          0x07, 0x07, 0x07, 0x07, 0x07, 0x05, 0x06, 0x07,
+ /* 0x30  ----  ----   SYN   -IR   -PP  -TRN  -NBS   EOT */
+          0x07, 0x07, 0x16, 0x07, 0x07, 0x07, 0x07, 0x04,
+ /* 0x38  -SBS   -IT  -RFF  -CU3   DC4   NAK  ----   SUB */
+          0x07, 0x07, 0x07, 0x07, 0x14, 0x15, 0x07, 0x1A,
+ /* 0x40    SP   RSP           ä              ----       */
+          0x20, 0xFF, 0x83, 0x84, 0x85, 0xA0, 0x07, 0x86,
+ /* 0x48                 [     .     <     (     +     ! */
+          0x87, 0xA4, 0x5B, 0x2E, 0x3C, 0x28, 0x2B, 0x21,
+ /* 0x50     &                                      ---- */
+          0x26, 0x82, 0x88, 0x89, 0x8A, 0xA1, 0x8C, 0x07,
+ /* 0x58           ß     ]     $     *     )     ;     ^ */
+          0x8D, 0xE1, 0x5D, 0x24, 0x2A, 0x29, 0x3B, 0x5E,
+ /* 0x60     -     /  ----     Ä  ----  ----  ----       */
+          0x2D, 0x2F, 0x07, 0x8E, 0x07, 0x07, 0x07, 0x8F,
+ /* 0x68              ----     ,     %     _     >     ? */ 
+          0x80, 0xA5, 0x07, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
+ /* 0x70  ----        ----  ----  ----  ----  ----  ---- */
+          0x07, 0x90, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ /* 0x78     *     `     :     #     @     '     =     " */
+          0x70, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
+ /* 0x80     *     a     b     c     d     e     f     g */
+          0x07, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ /* 0x88     h     i              ----  ----  ----       */
+          0x68, 0x69, 0xAE, 0xAF, 0x07, 0x07, 0x07, 0xF1,
+ /* 0x90     °     j     k     l     m     n     o     p */
+          0xF8, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70,
+ /* 0x98     q     r                    ----        ---- */
+          0x71, 0x72, 0xA6, 0xA7, 0x91, 0x07, 0x92, 0x07,
+ /* 0xA0           ~     s     t     u     v     w     x */
+          0xE6, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
+ /* 0xA8     y     z              ----  ----  ----  ---- */
+          0x79, 0x7A, 0xAD, 0xAB, 0x07, 0x07, 0x07, 0x07,
+ /* 0xB0                          ----     §  ----       */
+          0x9B, 0x9C, 0x9D, 0xFA, 0x07, 0x07, 0x07, 0xAC,
+ /* 0xB8        ----           |  ----  ----  ----  ---- */
+          0xAB, 0x07, 0xAA, 0x7C, 0x07, 0x07, 0x07, 0x07,
+ /* 0xC0     {     A     B     C     D     E     F     G */
+          0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+ /* 0xC8     H     I  ----           ö              ---- */
+          0x48, 0x49, 0x07, 0x93, 0x94, 0x95, 0xA2, 0x07,
+ /* 0xD0     }     J     K     L     M     N     O     P */
+          0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50,
+ /* 0xD8     Q     R  ----           ü                   */
+          0x51, 0x52, 0x07, 0x96, 0x81, 0x97, 0xA3, 0x98,
+ /* 0xE0     \           S     T     U     V     W     X */
+          0x5C, 0xF6, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+ /* 0xE8     Y     Z        ----     Ö  ----  ----  ---- */
+          0x59, 0x5A, 0xFD, 0x07, 0x99, 0x07, 0x07, 0x07,
+ /* 0xF0     0     1     2     3     4     5     6     7 */
+          0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+ /* 0xF8     8     9  ----  ----     Ü  ----  ----  ---- */
+          0x38, 0x39, 0x07, 0x07, 0x9A, 0x07, 0x07, 0x07
+};
+
+
+/*
+ * EBCDIC 037/500 conversion table:
+ * from upper to lower case
+ */
+__u8 _ebc_tolower[256] =
+{
+	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+	0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+	0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
+	0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
+	0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
+	0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+	0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
+	0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+	0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
+	0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
+	0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
+	0x60, 0x61, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+	0x48, 0x49, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
+	0x70, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
+	0x58, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
+	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+	0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
+	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+	0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9C, 0x9F,
+	0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
+	0xA8, 0xA9, 0xAA, 0xAB, 0x8C, 0x8D, 0x8E, 0xAF,
+	0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7,
+	0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
+	0xC0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+	0x88, 0x89, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
+	0xD0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+	0x98, 0x99, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
+	0xE0, 0xE1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
+	0xA8, 0xA9, 0xEA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
+	0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
+	0xF8, 0xF9, 0xFA, 0xDB, 0xDC, 0xDD, 0xDE, 0xFF
+};
+
+
+/*
+ * EBCDIC 037/500 conversion table:
+ * from lower to upper case
+ */
+__u8 _ebc_toupper[256] =
+{
+	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+	0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+	0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
+	0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
+	0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
+	0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+	0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
+	0x40, 0x41, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+	0x68, 0x69, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
+	0x50, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+	0x78, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
+	0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+	0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
+	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+	0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
+	0x80, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
+	0xC8, 0xC9, 0x8A, 0x8B, 0xAC, 0xAD, 0xAE, 0x8F,
+	0x90, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
+	0xD8, 0xD9, 0x9A, 0x9B, 0x9E, 0x9D, 0x9E, 0x9F,
+	0xA0, 0xA1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
+	0xE8, 0xE9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
+	0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7,
+	0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
+	0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
+	0xC8, 0xC9, 0xCA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
+	0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
+	0xD8, 0xD9, 0xDA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF,
+	0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
+	0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
+	0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
+	0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
+};
+
+EXPORT_SYMBOL(_ascebc_500);
+EXPORT_SYMBOL(_ebcasc_500);
+EXPORT_SYMBOL(_ascebc);
+EXPORT_SYMBOL(_ebcasc);
+EXPORT_SYMBOL(_ebc_tolower);
+EXPORT_SYMBOL(_ebc_toupper);
+
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
new file mode 100644
index 0000000000..49a11f6dd7
--- /dev/null
+++ b/arch/s390/kernel/entry.S
@@ -0,0 +1,669 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *    S390 low-level entry points.
+ *
+ *    Copyright IBM Corp. 1999, 2012
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ *		 Hartmut Penner (hp@de.ibm.com),
+ *		 Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
+ */
+
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/asm-extable.h>
+#include <asm/alternative-asm.h>
+#include <asm/processor.h>
+#include <asm/cache.h>
+#include <asm/dwarf.h>
+#include <asm/errno.h>
+#include <asm/ptrace.h>
+#include <asm/thread_info.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+#include <asm/page.h>
+#include <asm/sigp.h>
+#include <asm/irq.h>
+#include <asm/vx-insn.h>
+#include <asm/setup.h>
+#include <asm/nmi.h>
+#include <asm/nospec-insn.h>
+
+_LPP_OFFSET	= __LC_LPP
+
+	.macro STBEAR address
+	ALTERNATIVE "nop", ".insn s,0xb2010000,\address", 193
+	.endm
+
+	.macro LBEAR address
+	ALTERNATIVE "nop", ".insn s,0xb2000000,\address", 193
+	.endm
+
+	.macro LPSWEY address,lpswe
+	ALTERNATIVE "b \lpswe; nopr", ".insn siy,0xeb0000000071,\address,0", 193
+	.endm
+
+	.macro MBEAR reg
+	ALTERNATIVE "brcl 0,0", __stringify(mvc __PT_LAST_BREAK(8,\reg),__LC_LAST_BREAK), 193
+	.endm
+
+	.macro	CHECK_STACK savearea
+#ifdef CONFIG_CHECK_STACK
+	tml	%r15,THREAD_SIZE - CONFIG_STACK_GUARD
+	lghi	%r14,\savearea
+	jz	stack_overflow
+#endif
+	.endm
+
+	.macro	CHECK_VMAP_STACK savearea,oklabel
+#ifdef CONFIG_VMAP_STACK
+	lgr	%r14,%r15
+	nill	%r14,0x10000 - THREAD_SIZE
+	oill	%r14,STACK_INIT_OFFSET
+	clg	%r14,__LC_KERNEL_STACK
+	je	\oklabel
+	clg	%r14,__LC_ASYNC_STACK
+	je	\oklabel
+	clg	%r14,__LC_MCCK_STACK
+	je	\oklabel
+	clg	%r14,__LC_NODAT_STACK
+	je	\oklabel
+	clg	%r14,__LC_RESTART_STACK
+	je	\oklabel
+	lghi	%r14,\savearea
+	j	stack_overflow
+#else
+	j	\oklabel
+#endif
+	.endm
+
+	/*
+	 * The TSTMSK macro generates a test-under-mask instruction by
+	 * calculating the memory offset for the specified mask value.
+	 * Mask value can be any constant.  The macro shifts the mask
+	 * value to calculate the memory offset for the test-under-mask
+	 * instruction.
+	 */
+	.macro TSTMSK addr, mask, size=8, bytepos=0
+		.if (\bytepos < \size) && (\mask >> 8)
+			.if (\mask & 0xff)
+				.error "Mask exceeds byte boundary"
+			.endif
+			TSTMSK \addr, "(\mask >> 8)", \size, "(\bytepos + 1)"
+			.exitm
+		.endif
+		.ifeq \mask
+			.error "Mask must not be zero"
+		.endif
+		off = \size - \bytepos - 1
+		tm	off+\addr, \mask
+	.endm
+
+	.macro BPOFF
+	ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,12,0", 82
+	.endm
+
+	.macro BPON
+	ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,13,0", 82
+	.endm
+
+	.macro BPENTER tif_ptr,tif_mask
+	ALTERNATIVE "TSTMSK \tif_ptr,\tif_mask; jz .+8; .insn rrf,0xb2e80000,0,0,13,0", \
+		    "j .+12; nop; nop", 82
+	.endm
+
+	.macro BPEXIT tif_ptr,tif_mask
+	TSTMSK	\tif_ptr,\tif_mask
+	ALTERNATIVE "jz .+8;  .insn rrf,0xb2e80000,0,0,12,0", \
+		    "jnz .+8; .insn rrf,0xb2e80000,0,0,13,0", 82
+	.endm
+
+#if IS_ENABLED(CONFIG_KVM)
+	/*
+	 * The OUTSIDE macro jumps to the provided label in case the value
+	 * in the provided register is outside of the provided range. The
+	 * macro is useful for checking whether a PSW stored in a register
+	 * pair points inside or outside of a block of instructions.
+	 * @reg: register to check
+	 * @start: start of the range
+	 * @end: end of the range
+	 * @outside_label: jump here if @reg is outside of [@start..@end)
+	 */
+	.macro OUTSIDE reg,start,end,outside_label
+	lgr	%r14,\reg
+	larl	%r13,\start
+	slgr	%r14,%r13
+	clgfrl	%r14,.Lrange_size\@
+	jhe	\outside_label
+	.section .rodata, "a"
+	.balign 4
+.Lrange_size\@:
+	.long	\end - \start
+	.previous
+	.endm
+
+	.macro SIEEXIT
+	lg	%r9,__SF_SIE_CONTROL(%r15)	# get control block pointer
+	ni	__SIE_PROG0C+3(%r9),0xfe	# no longer in SIE
+	lctlg	%c1,%c1,__LC_KERNEL_ASCE	# load primary asce
+	larl	%r9,sie_exit			# skip forward to sie_exit
+	.endm
+#endif
+
+	.macro STACKLEAK_ERASE
+#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+	brasl	%r14,stackleak_erase_on_task_stack
+#endif
+	.endm
+
+	GEN_BR_THUNK %r14
+
+	.section .kprobes.text, "ax"
+.Ldummy:
+	/*
+	 * The following nop exists only in order to avoid that the next
+	 * symbol starts at the beginning of the kprobes text section.
+	 * In that case there would be several symbols at the same address.
+	 * E.g. objdump would take an arbitrary symbol when disassembling
+	 * the code.
+	 * With the added nop in between this cannot happen.
+	 */
+	nop	0
+
+/*
+ * Scheduler resume function, called by switch_to
+ *  gpr2 = (task_struct *) prev
+ *  gpr3 = (task_struct *) next
+ * Returns:
+ *  gpr2 = prev
+ */
+SYM_FUNC_START(__switch_to)
+	stmg	%r6,%r15,__SF_GPRS(%r15)	# store gprs of prev task
+	lghi	%r4,__TASK_stack
+	lghi	%r1,__TASK_thread
+	llill	%r5,STACK_INIT_OFFSET
+	stg	%r15,__THREAD_ksp(%r1,%r2)	# store kernel stack of prev
+	lg	%r15,0(%r4,%r3)			# start of kernel stack of next
+	agr	%r15,%r5			# end of kernel stack of next
+	stg	%r3,__LC_CURRENT		# store task struct of next
+	stg	%r15,__LC_KERNEL_STACK		# store end of kernel stack
+	lg	%r15,__THREAD_ksp(%r1,%r3)	# load kernel stack of next
+	aghi	%r3,__TASK_pid
+	mvc	__LC_CURRENT_PID(4,%r0),0(%r3)	# store pid of next
+	lmg	%r6,%r15,__SF_GPRS(%r15)	# load gprs of next task
+	ALTERNATIVE "nop", "lpp _LPP_OFFSET", 40
+	BR_EX	%r14
+SYM_FUNC_END(__switch_to)
+
+#if IS_ENABLED(CONFIG_KVM)
+/*
+ * __sie64a calling convention:
+ * %r2 pointer to sie control block phys
+ * %r3 pointer to sie control block virt
+ * %r4 guest register save area
+ */
+SYM_FUNC_START(__sie64a)
+	stmg	%r6,%r14,__SF_GPRS(%r15)	# save kernel registers
+	lg	%r12,__LC_CURRENT
+	stg	%r2,__SF_SIE_CONTROL_PHYS(%r15)	# save sie block physical..
+	stg	%r3,__SF_SIE_CONTROL(%r15)	# ...and virtual addresses
+	stg	%r4,__SF_SIE_SAVEAREA(%r15)	# save guest register save area
+	xc	__SF_SIE_REASON(8,%r15),__SF_SIE_REASON(%r15) # reason code = 0
+	mvc	__SF_SIE_FLAGS(8,%r15),__TI_flags(%r12) # copy thread flags
+	lmg	%r0,%r13,0(%r4)			# load guest gprs 0-13
+	lg	%r14,__LC_GMAP			# get gmap pointer
+	ltgr	%r14,%r14
+	jz	.Lsie_gmap
+	lctlg	%c1,%c1,__GMAP_ASCE(%r14)	# load primary asce
+.Lsie_gmap:
+	lg	%r14,__SF_SIE_CONTROL(%r15)	# get control block pointer
+	oi	__SIE_PROG0C+3(%r14),1		# we are going into SIE now
+	tm	__SIE_PROG20+3(%r14),3		# last exit...
+	jnz	.Lsie_skip
+	TSTMSK	__LC_CPU_FLAGS,_CIF_FPU
+	jo	.Lsie_skip			# exit if fp/vx regs changed
+	lg	%r14,__SF_SIE_CONTROL_PHYS(%r15)	# get sie block phys addr
+	BPEXIT	__SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
+.Lsie_entry:
+	sie	0(%r14)
+# Let the next instruction be NOP to avoid triggering a machine check
+# and handling it in a guest as result of the instruction execution.
+	nopr	7
+.Lsie_leave:
+	BPOFF
+	BPENTER	__SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
+.Lsie_skip:
+	lg	%r14,__SF_SIE_CONTROL(%r15)	# get control block pointer
+	ni	__SIE_PROG0C+3(%r14),0xfe	# no longer in SIE
+	lctlg	%c1,%c1,__LC_KERNEL_ASCE	# load primary asce
+.Lsie_done:
+# some program checks are suppressing. C code (e.g. do_protection_exception)
+# will rewind the PSW by the ILC, which is often 4 bytes in case of SIE. There
+# are some corner cases (e.g. runtime instrumentation) where ILC is unpredictable.
+# Other instructions between __sie64a and .Lsie_done should not cause program
+# interrupts. So lets use 3 nops as a landing pad for all possible rewinds.
+.Lrewind_pad6:
+	nopr	7
+.Lrewind_pad4:
+	nopr	7
+.Lrewind_pad2:
+	nopr	7
+SYM_INNER_LABEL(sie_exit, SYM_L_GLOBAL)
+	lg	%r14,__SF_SIE_SAVEAREA(%r15)	# load guest register save area
+	stmg	%r0,%r13,0(%r14)		# save guest gprs 0-13
+	xgr	%r0,%r0				# clear guest registers to
+	xgr	%r1,%r1				# prevent speculative use
+	xgr	%r3,%r3
+	xgr	%r4,%r4
+	xgr	%r5,%r5
+	lmg	%r6,%r14,__SF_GPRS(%r15)	# restore kernel registers
+	lg	%r2,__SF_SIE_REASON(%r15)	# return exit reason code
+	BR_EX	%r14
+.Lsie_fault:
+	lghi	%r14,-EFAULT
+	stg	%r14,__SF_SIE_REASON(%r15)	# set exit reason code
+	j	sie_exit
+
+	EX_TABLE(.Lrewind_pad6,.Lsie_fault)
+	EX_TABLE(.Lrewind_pad4,.Lsie_fault)
+	EX_TABLE(.Lrewind_pad2,.Lsie_fault)
+	EX_TABLE(sie_exit,.Lsie_fault)
+SYM_FUNC_END(__sie64a)
+EXPORT_SYMBOL(__sie64a)
+EXPORT_SYMBOL(sie_exit)
+#endif
+
+/*
+ * SVC interrupt handler routine. System calls are synchronous events and
+ * are entered with interrupts disabled.
+ */
+
+SYM_CODE_START(system_call)
+	stpt	__LC_SYS_ENTER_TIMER
+	stmg	%r8,%r15,__LC_SAVE_AREA_SYNC
+	BPOFF
+	lghi	%r14,0
+.Lsysc_per:
+	STBEAR	__LC_LAST_BREAK
+	lctlg	%c1,%c1,__LC_KERNEL_ASCE
+	lg	%r15,__LC_KERNEL_STACK
+	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+	stmg	%r0,%r7,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
+	# clear user controlled register to prevent speculative use
+	xgr	%r0,%r0
+	xgr	%r1,%r1
+	xgr	%r4,%r4
+	xgr	%r5,%r5
+	xgr	%r6,%r6
+	xgr	%r7,%r7
+	xgr	%r8,%r8
+	xgr	%r9,%r9
+	xgr	%r10,%r10
+	xgr	%r11,%r11
+	la	%r2,STACK_FRAME_OVERHEAD(%r15)	# pointer to pt_regs
+	mvc	__PT_R8(64,%r2),__LC_SAVE_AREA_SYNC
+	MBEAR	%r2
+	lgr	%r3,%r14
+	brasl	%r14,__do_syscall
+	STACKLEAK_ERASE
+	lctlg	%c1,%c1,__LC_USER_ASCE
+	mvc	__LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+	BPON
+	LBEAR	STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
+	lmg	%r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
+	stpt	__LC_EXIT_TIMER
+	LPSWEY	__LC_RETURN_PSW,__LC_RETURN_LPSWE
+SYM_CODE_END(system_call)
+
+#
+# a new process exits the kernel with ret_from_fork
+#
+SYM_CODE_START(ret_from_fork)
+	lgr	%r3,%r11
+	brasl	%r14,__ret_from_fork
+	STACKLEAK_ERASE
+	lctlg	%c1,%c1,__LC_USER_ASCE
+	mvc	__LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+	BPON
+	LBEAR	STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
+	lmg	%r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
+	stpt	__LC_EXIT_TIMER
+	LPSWEY	__LC_RETURN_PSW,__LC_RETURN_LPSWE
+SYM_CODE_END(ret_from_fork)
+
+/*
+ * Program check handler routine
+ */
+
+SYM_CODE_START(pgm_check_handler)
+	stpt	__LC_SYS_ENTER_TIMER
+	BPOFF
+	stmg	%r8,%r15,__LC_SAVE_AREA_SYNC
+	lghi	%r10,0
+	lmg	%r8,%r9,__LC_PGM_OLD_PSW
+	tmhh	%r8,0x0001		# coming from user space?
+	jno	.Lpgm_skip_asce
+	lctlg	%c1,%c1,__LC_KERNEL_ASCE
+	j	3f			# -> fault in user space
+.Lpgm_skip_asce:
+#if IS_ENABLED(CONFIG_KVM)
+	# cleanup critical section for program checks in __sie64a
+	OUTSIDE	%r9,.Lsie_gmap,.Lsie_done,1f
+	BPENTER	__SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
+	SIEEXIT
+	lghi	%r10,_PIF_GUEST_FAULT
+#endif
+1:	tmhh	%r8,0x4000		# PER bit set in old PSW ?
+	jnz	2f			# -> enabled, can't be a double fault
+	tm	__LC_PGM_ILC+3,0x80	# check for per exception
+	jnz	.Lpgm_svcper		# -> single stepped svc
+2:	CHECK_STACK __LC_SAVE_AREA_SYNC
+	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+	# CHECK_VMAP_STACK branches to stack_overflow or 4f
+	CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,4f
+3:	lg	%r15,__LC_KERNEL_STACK
+4:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
+	stg	%r10,__PT_FLAGS(%r11)
+	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+	stmg	%r0,%r7,__PT_R0(%r11)
+	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
+	mvc	__PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK
+	stmg	%r8,%r9,__PT_PSW(%r11)
+
+	# clear user controlled registers to prevent speculative use
+	xgr	%r0,%r0
+	xgr	%r1,%r1
+	xgr	%r3,%r3
+	xgr	%r4,%r4
+	xgr	%r5,%r5
+	xgr	%r6,%r6
+	xgr	%r7,%r7
+	lgr	%r2,%r11
+	brasl	%r14,__do_pgm_check
+	tmhh	%r8,0x0001		# returning to user space?
+	jno	.Lpgm_exit_kernel
+	STACKLEAK_ERASE
+	lctlg	%c1,%c1,__LC_USER_ASCE
+	BPON
+	stpt	__LC_EXIT_TIMER
+.Lpgm_exit_kernel:
+	mvc	__LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+	LBEAR	STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
+	lmg	%r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
+	LPSWEY	__LC_RETURN_PSW,__LC_RETURN_LPSWE
+
+#
+# single stepped system call
+#
+.Lpgm_svcper:
+	mvc	__LC_RETURN_PSW(8),__LC_SVC_NEW_PSW
+	larl	%r14,.Lsysc_per
+	stg	%r14,__LC_RETURN_PSW+8
+	lghi	%r14,1
+	LBEAR	__LC_PGM_LAST_BREAK
+	LPSWEY	__LC_RETURN_PSW,__LC_RETURN_LPSWE # branch to .Lsysc_per
+SYM_CODE_END(pgm_check_handler)
+
+/*
+ * Interrupt handler macro used for external and IO interrupts.
+ */
+.macro INT_HANDLER name,lc_old_psw,handler
+SYM_CODE_START(\name)
+	stckf	__LC_INT_CLOCK
+	stpt	__LC_SYS_ENTER_TIMER
+	STBEAR	__LC_LAST_BREAK
+	BPOFF
+	stmg	%r8,%r15,__LC_SAVE_AREA_ASYNC
+	lmg	%r8,%r9,\lc_old_psw
+	tmhh	%r8,0x0001			# interrupting from user ?
+	jnz	1f
+#if IS_ENABLED(CONFIG_KVM)
+	OUTSIDE	%r9,.Lsie_gmap,.Lsie_done,0f
+	BPENTER	__SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
+	SIEEXIT
+#endif
+0:	CHECK_STACK __LC_SAVE_AREA_ASYNC
+	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+	j	2f
+1:	lctlg	%c1,%c1,__LC_KERNEL_ASCE
+	lg	%r15,__LC_KERNEL_STACK
+2:	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+	la	%r11,STACK_FRAME_OVERHEAD(%r15)
+	stmg	%r0,%r7,__PT_R0(%r11)
+	# clear user controlled registers to prevent speculative use
+	xgr	%r0,%r0
+	xgr	%r1,%r1
+	xgr	%r3,%r3
+	xgr	%r4,%r4
+	xgr	%r5,%r5
+	xgr	%r6,%r6
+	xgr	%r7,%r7
+	xgr	%r10,%r10
+	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
+	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
+	MBEAR	%r11
+	stmg	%r8,%r9,__PT_PSW(%r11)
+	lgr	%r2,%r11		# pass pointer to pt_regs
+	brasl	%r14,\handler
+	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r11)
+	tmhh	%r8,0x0001		# returning to user ?
+	jno	2f
+	STACKLEAK_ERASE
+	lctlg	%c1,%c1,__LC_USER_ASCE
+	BPON
+	stpt	__LC_EXIT_TIMER
+2:	LBEAR	__PT_LAST_BREAK(%r11)
+	lmg	%r0,%r15,__PT_R0(%r11)
+	LPSWEY	__LC_RETURN_PSW,__LC_RETURN_LPSWE
+SYM_CODE_END(\name)
+.endm
+
+INT_HANDLER ext_int_handler,__LC_EXT_OLD_PSW,do_ext_irq
+INT_HANDLER io_int_handler,__LC_IO_OLD_PSW,do_io_irq
+
+/*
+ * Load idle PSW.
+ */
+SYM_FUNC_START(psw_idle)
+	stg	%r14,(__SF_GPRS+8*8)(%r15)
+	stg	%r3,__SF_EMPTY(%r15)
+	larl	%r1,psw_idle_exit
+	stg	%r1,__SF_EMPTY+8(%r15)
+	larl	%r1,smp_cpu_mtid
+	llgf	%r1,0(%r1)
+	ltgr	%r1,%r1
+	jz	.Lpsw_idle_stcctm
+	.insn	rsy,0xeb0000000017,%r1,5,__MT_CYCLES_ENTER(%r2)
+.Lpsw_idle_stcctm:
+	oi	__LC_CPU_FLAGS+7,_CIF_ENABLED_WAIT
+	BPON
+	stckf	__CLOCK_IDLE_ENTER(%r2)
+	stpt	__TIMER_IDLE_ENTER(%r2)
+	lpswe	__SF_EMPTY(%r15)
+SYM_INNER_LABEL(psw_idle_exit, SYM_L_GLOBAL)
+	BR_EX	%r14
+SYM_FUNC_END(psw_idle)
+
+/*
+ * Machine check handler routines
+ */
+SYM_CODE_START(mcck_int_handler)
+	BPOFF
+	la	%r1,4095		# validate r1
+	spt	__LC_CPU_TIMER_SAVE_AREA-4095(%r1)	# validate cpu timer
+	LBEAR	__LC_LAST_BREAK_SAVE_AREA-4095(%r1)		# validate bear
+	lmg	%r0,%r15,__LC_GPREGS_SAVE_AREA		# validate gprs
+	lmg	%r8,%r9,__LC_MCK_OLD_PSW
+	TSTMSK	__LC_MCCK_CODE,MCCK_CODE_SYSTEM_DAMAGE
+	jo	.Lmcck_panic		# yes -> rest of mcck code invalid
+	TSTMSK	__LC_MCCK_CODE,MCCK_CODE_CR_VALID
+	jno	.Lmcck_panic		# control registers invalid -> panic
+	lctlg	%c0,%c15,__LC_CREGS_SAVE_AREA		# validate ctl regs
+	ptlb
+	lghi	%r14,__LC_CPU_TIMER_SAVE_AREA
+	mvc	__LC_MCCK_ENTER_TIMER(8),0(%r14)
+	TSTMSK	__LC_MCCK_CODE,MCCK_CODE_CPU_TIMER_VALID
+	jo	3f
+	la	%r14,__LC_SYS_ENTER_TIMER
+	clc	0(8,%r14),__LC_EXIT_TIMER
+	jl	1f
+	la	%r14,__LC_EXIT_TIMER
+1:	clc	0(8,%r14),__LC_LAST_UPDATE_TIMER
+	jl	2f
+	la	%r14,__LC_LAST_UPDATE_TIMER
+2:	spt	0(%r14)
+	mvc	__LC_MCCK_ENTER_TIMER(8),0(%r14)
+3:	TSTMSK	__LC_MCCK_CODE,MCCK_CODE_PSW_MWP_VALID
+	jno	.Lmcck_panic
+	tmhh	%r8,0x0001		# interrupting from user ?
+	jnz	.Lmcck_user
+	TSTMSK	__LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID
+	jno	.Lmcck_panic
+#if IS_ENABLED(CONFIG_KVM)
+	OUTSIDE	%r9,.Lsie_gmap,.Lsie_done,.Lmcck_user
+	OUTSIDE	%r9,.Lsie_entry,.Lsie_leave,4f
+	oi	__LC_CPU_FLAGS+7, _CIF_MCCK_GUEST
+4:	BPENTER	__SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
+	SIEEXIT
+#endif
+.Lmcck_user:
+	lg	%r15,__LC_MCCK_STACK
+	la	%r11,STACK_FRAME_OVERHEAD(%r15)
+	stctg	%c1,%c1,__PT_CR1(%r11)
+	lctlg	%c1,%c1,__LC_KERNEL_ASCE
+	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+	lghi	%r14,__LC_GPREGS_SAVE_AREA+64
+	stmg	%r0,%r7,__PT_R0(%r11)
+	# clear user controlled registers to prevent speculative use
+	xgr	%r0,%r0
+	xgr	%r1,%r1
+	xgr	%r3,%r3
+	xgr	%r4,%r4
+	xgr	%r5,%r5
+	xgr	%r6,%r6
+	xgr	%r7,%r7
+	xgr	%r10,%r10
+	mvc	__PT_R8(64,%r11),0(%r14)
+	stmg	%r8,%r9,__PT_PSW(%r11)
+	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
+	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+	lgr	%r2,%r11		# pass pointer to pt_regs
+	brasl	%r14,s390_do_machine_check
+	lctlg	%c1,%c1,__PT_CR1(%r11)
+	lmg	%r0,%r10,__PT_R0(%r11)
+	mvc	__LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW
+	tm	__LC_RETURN_MCCK_PSW+1,0x01 # returning to user ?
+	jno	0f
+	BPON
+	stpt	__LC_EXIT_TIMER
+0:	ALTERNATIVE "nop", __stringify(lghi %r12,__LC_LAST_BREAK_SAVE_AREA),193
+	LBEAR	0(%r12)
+	lmg	%r11,%r15,__PT_R11(%r11)
+	LPSWEY	__LC_RETURN_MCCK_PSW,__LC_RETURN_MCCK_LPSWE
+
+.Lmcck_panic:
+	/*
+	 * Iterate over all possible CPU addresses in the range 0..0xffff
+	 * and stop each CPU using signal processor. Use compare and swap
+	 * to allow just one CPU-stopper and prevent concurrent CPUs from
+	 * stopping each other while leaving the others running.
+	 */
+	lhi	%r5,0
+	lhi	%r6,1
+	larl	%r7,stop_lock
+	cs	%r5,%r6,0(%r7)		# single CPU-stopper only
+	jnz	4f
+	larl	%r7,this_cpu
+	stap	0(%r7)			# this CPU address
+	lh	%r4,0(%r7)
+	nilh	%r4,0
+	lhi	%r0,1
+	sll	%r0,16			# CPU counter
+	lhi	%r3,0			# next CPU address
+0:	cr	%r3,%r4
+	je	2f
+1:	sigp	%r1,%r3,SIGP_STOP	# stop next CPU
+	brc	SIGP_CC_BUSY,1b
+2:	ahi	%r3,1
+	brct	%r0,0b
+3:	sigp	%r1,%r4,SIGP_STOP	# stop this CPU
+	brc	SIGP_CC_BUSY,3b
+4:	j	4b
+SYM_CODE_END(mcck_int_handler)
+
+SYM_CODE_START(restart_int_handler)
+	ALTERNATIVE "nop", "lpp _LPP_OFFSET", 40
+	stg	%r15,__LC_SAVE_AREA_RESTART
+	TSTMSK	__LC_RESTART_FLAGS,RESTART_FLAG_CTLREGS,4
+	jz	0f
+	lctlg	%c0,%c15,__LC_CREGS_SAVE_AREA
+0:	larl	%r15,daton_psw
+	lpswe	0(%r15)				# turn dat on, keep irqs off
+.Ldaton:
+	lg	%r15,__LC_RESTART_STACK
+	xc	STACK_FRAME_OVERHEAD(__PT_SIZE,%r15),STACK_FRAME_OVERHEAD(%r15)
+	stmg	%r0,%r14,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
+	mvc	STACK_FRAME_OVERHEAD+__PT_R15(8,%r15),__LC_SAVE_AREA_RESTART
+	mvc	STACK_FRAME_OVERHEAD+__PT_PSW(16,%r15),__LC_RST_OLD_PSW
+	xc	0(STACK_FRAME_OVERHEAD,%r15),0(%r15)
+	lg	%r1,__LC_RESTART_FN		# load fn, parm & source cpu
+	lg	%r2,__LC_RESTART_DATA
+	lgf	%r3,__LC_RESTART_SOURCE
+	ltgr	%r3,%r3				# test source cpu address
+	jm	1f				# negative -> skip source stop
+0:	sigp	%r4,%r3,SIGP_SENSE		# sigp sense to source cpu
+	brc	10,0b				# wait for status stored
+1:	basr	%r14,%r1			# call function
+	stap	__SF_EMPTY(%r15)		# store cpu address
+	llgh	%r3,__SF_EMPTY(%r15)
+2:	sigp	%r4,%r3,SIGP_STOP		# sigp stop to current cpu
+	brc	2,2b
+3:	j	3b
+SYM_CODE_END(restart_int_handler)
+
+	.section .kprobes.text, "ax"
+
+#if defined(CONFIG_CHECK_STACK) || defined(CONFIG_VMAP_STACK)
+/*
+ * The synchronous or the asynchronous stack overflowed. We are dead.
+ * No need to properly save the registers, we are going to panic anyway.
+ * Setup a pt_regs so that show_trace can provide a good call trace.
+ */
+SYM_CODE_START(stack_overflow)
+	lg	%r15,__LC_NODAT_STACK	# change to panic stack
+	la	%r11,STACK_FRAME_OVERHEAD(%r15)
+	stmg	%r0,%r7,__PT_R0(%r11)
+	stmg	%r8,%r9,__PT_PSW(%r11)
+	mvc	__PT_R8(64,%r11),0(%r14)
+	stg	%r10,__PT_ORIG_GPR2(%r11) # store last break to orig_gpr2
+	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+	lgr	%r2,%r11		# pass pointer to pt_regs
+	jg	kernel_stack_overflow
+SYM_CODE_END(stack_overflow)
+#endif
+
+	.section .data, "aw"
+	.balign	4
+SYM_DATA_LOCAL(stop_lock,	.long 0)
+SYM_DATA_LOCAL(this_cpu,	.short 0)
+	.balign	8
+SYM_DATA_START_LOCAL(daton_psw)
+	.quad	PSW_KERNEL_BITS
+	.quad	.Ldaton
+SYM_DATA_END(daton_psw)
+
+	.section .rodata, "a"
+#define SYSCALL(esame,emu)	.quad __s390x_ ## esame
+SYM_DATA_START(sys_call_table)
+#include "asm/syscall_table.h"
+SYM_DATA_END(sys_call_table)
+#undef SYSCALL
+
+#ifdef CONFIG_COMPAT
+
+#define SYSCALL(esame,emu)	.quad __s390_ ## emu
+SYM_DATA_START(sys_call_table_emu)
+#include "asm/syscall_table.h"
+SYM_DATA_END(sys_call_table_emu)
+#undef SYSCALL
+#endif
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
new file mode 100644
index 0000000000..9f41853f36
--- /dev/null
+++ b/arch/s390/kernel/entry.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ENTRY_H
+#define _ENTRY_H
+
+#include <linux/percpu.h>
+#include <linux/types.h>
+#include <linux/signal.h>
+#include <asm/extable.h>
+#include <asm/ptrace.h>
+#include <asm/idle.h>
+
+extern void *restart_stack;
+
+void system_call(void);
+void pgm_check_handler(void);
+void ext_int_handler(void);
+void io_int_handler(void);
+void mcck_int_handler(void);
+void restart_int_handler(void);
+void early_pgm_check_handler(void);
+
+void __ret_from_fork(struct task_struct *prev, struct pt_regs *regs);
+void __do_pgm_check(struct pt_regs *regs);
+void __do_syscall(struct pt_regs *regs, int per_trap);
+void __do_early_pgm_check(struct pt_regs *regs);
+
+void do_protection_exception(struct pt_regs *regs);
+void do_dat_exception(struct pt_regs *regs);
+void do_secure_storage_access(struct pt_regs *regs);
+void do_non_secure_storage_access(struct pt_regs *regs);
+void do_secure_storage_violation(struct pt_regs *regs);
+void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str);
+void kernel_stack_overflow(struct pt_regs * regs);
+void handle_signal32(struct ksignal *ksig, sigset_t *oldset,
+		     struct pt_regs *regs);
+
+void do_io_irq(struct pt_regs *regs);
+void do_ext_irq(struct pt_regs *regs);
+void do_restart(void *arg);
+void __init startup_init(void);
+void die(struct pt_regs *regs, const char *str);
+int setup_profiling_timer(unsigned int multiplier);
+unsigned long prepare_ftrace_return(unsigned long parent, unsigned long sp, unsigned long ip);
+
+struct s390_mmap_arg_struct;
+struct fadvise64_64_args;
+struct old_sigaction;
+
+long sys_rt_sigreturn(void);
+long sys_sigreturn(void);
+
+long sys_s390_personality(unsigned int personality);
+long sys_s390_runtime_instr(int command, int signum);
+long sys_s390_guarded_storage(int command, struct gs_cb __user *);
+long sys_s390_pci_mmio_write(unsigned long, const void __user *, size_t);
+long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t);
+long sys_s390_sthyi(unsigned long function_code, void __user *buffer, u64 __user *return_code, unsigned long flags);
+
+DECLARE_PER_CPU(u64, mt_cycles[8]);
+
+unsigned long stack_alloc(void);
+void stack_free(unsigned long stack);
+
+extern char kprobes_insn_page[];
+
+extern char _samode31[], _eamode31[];
+extern char _stext_amode31[], _etext_amode31[];
+extern struct exception_table_entry _start_amode31_ex_table[];
+extern struct exception_table_entry _stop_amode31_ex_table[];
+
+#define __amode31_data __section(".amode31.data")
+#define __amode31_ref __section(".amode31.refs")
+extern long _start_amode31_refs[], _end_amode31_refs[];
+
+#endif /* _ENTRY_H */
diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c
new file mode 100644
index 0000000000..4666b29ac8
--- /dev/null
+++ b/arch/s390/kernel/fpu.c
@@ -0,0 +1,264 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * In-kernel vector facility support functions
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+#include <linux/kernel.h>
+#include <linux/cpu.h>
+#include <linux/sched.h>
+#include <asm/fpu/types.h>
+#include <asm/fpu/api.h>
+#include <asm/vx-insn.h>
+
+void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags)
+{
+	/*
+	 * Limit the save to the FPU/vector registers already
+	 * in use by the previous context
+	 */
+	flags &= state->mask;
+
+	if (flags & KERNEL_FPC)
+		/* Save floating point control */
+		asm volatile("stfpc %0" : "=Q" (state->fpc));
+
+	if (!MACHINE_HAS_VX) {
+		if (flags & KERNEL_VXR_V0V7) {
+			/* Save floating-point registers */
+			asm volatile("std 0,%0" : "=Q" (state->fprs[0]));
+			asm volatile("std 1,%0" : "=Q" (state->fprs[1]));
+			asm volatile("std 2,%0" : "=Q" (state->fprs[2]));
+			asm volatile("std 3,%0" : "=Q" (state->fprs[3]));
+			asm volatile("std 4,%0" : "=Q" (state->fprs[4]));
+			asm volatile("std 5,%0" : "=Q" (state->fprs[5]));
+			asm volatile("std 6,%0" : "=Q" (state->fprs[6]));
+			asm volatile("std 7,%0" : "=Q" (state->fprs[7]));
+			asm volatile("std 8,%0" : "=Q" (state->fprs[8]));
+			asm volatile("std 9,%0" : "=Q" (state->fprs[9]));
+			asm volatile("std 10,%0" : "=Q" (state->fprs[10]));
+			asm volatile("std 11,%0" : "=Q" (state->fprs[11]));
+			asm volatile("std 12,%0" : "=Q" (state->fprs[12]));
+			asm volatile("std 13,%0" : "=Q" (state->fprs[13]));
+			asm volatile("std 14,%0" : "=Q" (state->fprs[14]));
+			asm volatile("std 15,%0" : "=Q" (state->fprs[15]));
+		}
+		return;
+	}
+
+	/* Test and save vector registers */
+	asm volatile (
+		/*
+		 * Test if any vector register must be saved and, if so,
+		 * test if all register can be saved.
+		 */
+		"	la	1,%[vxrs]\n"	/* load save area */
+		"	tmll	%[m],30\n"	/* KERNEL_VXR */
+		"	jz	7f\n"		/* no work -> done */
+		"	jo	5f\n"		/* -> save V0..V31 */
+		/*
+		 * Test for special case KERNEL_FPU_MID only. In this
+		 * case a vstm V8..V23 is the best instruction
+		 */
+		"	chi	%[m],12\n"	/* KERNEL_VXR_MID */
+		"	jne	0f\n"		/* -> save V8..V23 */
+		"	VSTM	8,23,128,1\n"	/* vstm %v8,%v23,128(%r1) */
+		"	j	7f\n"
+		/* Test and save the first half of 16 vector registers */
+		"0:	tmll	%[m],6\n"	/* KERNEL_VXR_LOW */
+		"	jz	3f\n"		/* -> KERNEL_VXR_HIGH */
+		"	jo	2f\n"		/* 11 -> save V0..V15 */
+		"	brc	2,1f\n"		/* 10 -> save V8..V15 */
+		"	VSTM	0,7,0,1\n"	/* vstm %v0,%v7,0(%r1) */
+		"	j	3f\n"
+		"1:	VSTM	8,15,128,1\n"	/* vstm %v8,%v15,128(%r1) */
+		"	j	3f\n"
+		"2:	VSTM	0,15,0,1\n"	/* vstm %v0,%v15,0(%r1) */
+		/* Test and save the second half of 16 vector registers */
+		"3:	tmll	%[m],24\n"	/* KERNEL_VXR_HIGH */
+		"	jz	7f\n"
+		"	jo	6f\n"		/* 11 -> save V16..V31 */
+		"	brc	2,4f\n"		/* 10 -> save V24..V31 */
+		"	VSTM	16,23,256,1\n"	/* vstm %v16,%v23,256(%r1) */
+		"	j	7f\n"
+		"4:	VSTM	24,31,384,1\n"	/* vstm %v24,%v31,384(%r1) */
+		"	j	7f\n"
+		"5:	VSTM	0,15,0,1\n"	/* vstm %v0,%v15,0(%r1) */
+		"6:	VSTM	16,31,256,1\n"	/* vstm %v16,%v31,256(%r1) */
+		"7:"
+		: [vxrs] "=Q" (*(struct vx_array *) &state->vxrs)
+		: [m] "d" (flags)
+		: "1", "cc");
+}
+EXPORT_SYMBOL(__kernel_fpu_begin);
+
+void __kernel_fpu_end(struct kernel_fpu *state, u32 flags)
+{
+	/*
+	 * Limit the restore to the FPU/vector registers of the
+	 * previous context that have been overwritte by the
+	 * current context
+	 */
+	flags &= state->mask;
+
+	if (flags & KERNEL_FPC)
+		/* Restore floating-point controls */
+		asm volatile("lfpc %0" : : "Q" (state->fpc));
+
+	if (!MACHINE_HAS_VX) {
+		if (flags & KERNEL_VXR_V0V7) {
+			/* Restore floating-point registers */
+			asm volatile("ld 0,%0" : : "Q" (state->fprs[0]));
+			asm volatile("ld 1,%0" : : "Q" (state->fprs[1]));
+			asm volatile("ld 2,%0" : : "Q" (state->fprs[2]));
+			asm volatile("ld 3,%0" : : "Q" (state->fprs[3]));
+			asm volatile("ld 4,%0" : : "Q" (state->fprs[4]));
+			asm volatile("ld 5,%0" : : "Q" (state->fprs[5]));
+			asm volatile("ld 6,%0" : : "Q" (state->fprs[6]));
+			asm volatile("ld 7,%0" : : "Q" (state->fprs[7]));
+			asm volatile("ld 8,%0" : : "Q" (state->fprs[8]));
+			asm volatile("ld 9,%0" : : "Q" (state->fprs[9]));
+			asm volatile("ld 10,%0" : : "Q" (state->fprs[10]));
+			asm volatile("ld 11,%0" : : "Q" (state->fprs[11]));
+			asm volatile("ld 12,%0" : : "Q" (state->fprs[12]));
+			asm volatile("ld 13,%0" : : "Q" (state->fprs[13]));
+			asm volatile("ld 14,%0" : : "Q" (state->fprs[14]));
+			asm volatile("ld 15,%0" : : "Q" (state->fprs[15]));
+		}
+		return;
+	}
+
+	/* Test and restore (load) vector registers */
+	asm volatile (
+		/*
+		 * Test if any vector register must be loaded and, if so,
+		 * test if all registers can be loaded at once.
+		 */
+		"	la	1,%[vxrs]\n"	/* load restore area */
+		"	tmll	%[m],30\n"	/* KERNEL_VXR */
+		"	jz	7f\n"		/* no work -> done */
+		"	jo	5f\n"		/* -> restore V0..V31 */
+		/*
+		 * Test for special case KERNEL_FPU_MID only. In this
+		 * case a vlm V8..V23 is the best instruction
+		 */
+		"	chi	%[m],12\n"	/* KERNEL_VXR_MID */
+		"	jne	0f\n"		/* -> restore V8..V23 */
+		"	VLM	8,23,128,1\n"	/* vlm %v8,%v23,128(%r1) */
+		"	j	7f\n"
+		/* Test and restore the first half of 16 vector registers */
+		"0:	tmll	%[m],6\n"	/* KERNEL_VXR_LOW */
+		"	jz	3f\n"		/* -> KERNEL_VXR_HIGH */
+		"	jo	2f\n"		/* 11 -> restore V0..V15 */
+		"	brc	2,1f\n"		/* 10 -> restore V8..V15 */
+		"	VLM	0,7,0,1\n"	/* vlm %v0,%v7,0(%r1) */
+		"	j	3f\n"
+		"1:	VLM	8,15,128,1\n"	/* vlm %v8,%v15,128(%r1) */
+		"	j	3f\n"
+		"2:	VLM	0,15,0,1\n"	/* vlm %v0,%v15,0(%r1) */
+		/* Test and restore the second half of 16 vector registers */
+		"3:	tmll	%[m],24\n"	/* KERNEL_VXR_HIGH */
+		"	jz	7f\n"
+		"	jo	6f\n"		/* 11 -> restore V16..V31 */
+		"	brc	2,4f\n"		/* 10 -> restore V24..V31 */
+		"	VLM	16,23,256,1\n"	/* vlm %v16,%v23,256(%r1) */
+		"	j	7f\n"
+		"4:	VLM	24,31,384,1\n"	/* vlm %v24,%v31,384(%r1) */
+		"	j	7f\n"
+		"5:	VLM	0,15,0,1\n"	/* vlm %v0,%v15,0(%r1) */
+		"6:	VLM	16,31,256,1\n"	/* vlm %v16,%v31,256(%r1) */
+		"7:"
+		: [vxrs] "=Q" (*(struct vx_array *) &state->vxrs)
+		: [m] "d" (flags)
+		: "1", "cc");
+}
+EXPORT_SYMBOL(__kernel_fpu_end);
+
+void __load_fpu_regs(void)
+{
+	struct fpu *state = &current->thread.fpu;
+	unsigned long *regs = current->thread.fpu.regs;
+
+	asm volatile("lfpc %0" : : "Q" (state->fpc));
+	if (likely(MACHINE_HAS_VX)) {
+		asm volatile("lgr	1,%0\n"
+			     "VLM	0,15,0,1\n"
+			     "VLM	16,31,256,1\n"
+			     :
+			     : "d" (regs)
+			     : "1", "cc", "memory");
+	} else {
+		asm volatile("ld 0,%0" : : "Q" (regs[0]));
+		asm volatile("ld 1,%0" : : "Q" (regs[1]));
+		asm volatile("ld 2,%0" : : "Q" (regs[2]));
+		asm volatile("ld 3,%0" : : "Q" (regs[3]));
+		asm volatile("ld 4,%0" : : "Q" (regs[4]));
+		asm volatile("ld 5,%0" : : "Q" (regs[5]));
+		asm volatile("ld 6,%0" : : "Q" (regs[6]));
+		asm volatile("ld 7,%0" : : "Q" (regs[7]));
+		asm volatile("ld 8,%0" : : "Q" (regs[8]));
+		asm volatile("ld 9,%0" : : "Q" (regs[9]));
+		asm volatile("ld 10,%0" : : "Q" (regs[10]));
+		asm volatile("ld 11,%0" : : "Q" (regs[11]));
+		asm volatile("ld 12,%0" : : "Q" (regs[12]));
+		asm volatile("ld 13,%0" : : "Q" (regs[13]));
+		asm volatile("ld 14,%0" : : "Q" (regs[14]));
+		asm volatile("ld 15,%0" : : "Q" (regs[15]));
+	}
+	clear_cpu_flag(CIF_FPU);
+}
+EXPORT_SYMBOL(__load_fpu_regs);
+
+void load_fpu_regs(void)
+{
+	raw_local_irq_disable();
+	__load_fpu_regs();
+	raw_local_irq_enable();
+}
+EXPORT_SYMBOL(load_fpu_regs);
+
+void save_fpu_regs(void)
+{
+	unsigned long flags, *regs;
+	struct fpu *state;
+
+	local_irq_save(flags);
+
+	if (test_cpu_flag(CIF_FPU))
+		goto out;
+
+	state = &current->thread.fpu;
+	regs = current->thread.fpu.regs;
+
+	asm volatile("stfpc %0" : "=Q" (state->fpc));
+	if (likely(MACHINE_HAS_VX)) {
+		asm volatile("lgr	1,%0\n"
+			     "VSTM	0,15,0,1\n"
+			     "VSTM	16,31,256,1\n"
+			     :
+			     : "d" (regs)
+			     : "1", "cc", "memory");
+	} else {
+		asm volatile("std 0,%0" : "=Q" (regs[0]));
+		asm volatile("std 1,%0" : "=Q" (regs[1]));
+		asm volatile("std 2,%0" : "=Q" (regs[2]));
+		asm volatile("std 3,%0" : "=Q" (regs[3]));
+		asm volatile("std 4,%0" : "=Q" (regs[4]));
+		asm volatile("std 5,%0" : "=Q" (regs[5]));
+		asm volatile("std 6,%0" : "=Q" (regs[6]));
+		asm volatile("std 7,%0" : "=Q" (regs[7]));
+		asm volatile("std 8,%0" : "=Q" (regs[8]));
+		asm volatile("std 9,%0" : "=Q" (regs[9]));
+		asm volatile("std 10,%0" : "=Q" (regs[10]));
+		asm volatile("std 11,%0" : "=Q" (regs[11]));
+		asm volatile("std 12,%0" : "=Q" (regs[12]));
+		asm volatile("std 13,%0" : "=Q" (regs[13]));
+		asm volatile("std 14,%0" : "=Q" (regs[14]));
+		asm volatile("std 15,%0" : "=Q" (regs[15]));
+	}
+	set_cpu_flag(CIF_FPU);
+out:
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL(save_fpu_regs);
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
new file mode 100644
index 0000000000..c46381ea04
--- /dev/null
+++ b/arch/s390/kernel/ftrace.c
@@ -0,0 +1,340 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Dynamic function tracer architecture backend.
+ *
+ * Copyright IBM Corp. 2009,2014
+ *
+ *   Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/moduleloader.h>
+#include <linux/hardirq.h>
+#include <linux/uaccess.h>
+#include <linux/ftrace.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/kprobes.h>
+#include <trace/syscall.h>
+#include <asm/asm-offsets.h>
+#include <asm/text-patching.h>
+#include <asm/cacheflush.h>
+#include <asm/ftrace.lds.h>
+#include <asm/nospec-branch.h>
+#include <asm/set_memory.h>
+#include "entry.h"
+#include "ftrace.h"
+
+/*
+ * To generate function prologue either gcc's hotpatch feature (since gcc 4.8)
+ * or a combination of -pg -mrecord-mcount -mnop-mcount -mfentry flags
+ * (since gcc 9 / clang 10) is used.
+ * In both cases the original and also the disabled function prologue contains
+ * only a single six byte instruction and looks like this:
+ * >	brcl	0,0			# offset 0
+ * To enable ftrace the code gets patched like above and afterwards looks
+ * like this:
+ * >	brasl	%r0,ftrace_caller	# offset 0
+ *
+ * The instruction will be patched by ftrace_make_call / ftrace_make_nop.
+ * The ftrace function gets called with a non-standard C function call ABI
+ * where r0 contains the return address. It is also expected that the called
+ * function only clobbers r0 and r1, but restores r2-r15.
+ * For module code we can't directly jump to ftrace caller, but need a
+ * trampoline (ftrace_plt), which clobbers also r1.
+ */
+
+void *ftrace_func __read_mostly = ftrace_stub;
+struct ftrace_insn {
+	u16 opc;
+	s32 disp;
+} __packed;
+
+#ifdef CONFIG_MODULES
+static char *ftrace_plt;
+#endif /* CONFIG_MODULES */
+
+static const char *ftrace_shared_hotpatch_trampoline(const char **end)
+{
+	const char *tstart, *tend;
+
+	tstart = ftrace_shared_hotpatch_trampoline_br;
+	tend = ftrace_shared_hotpatch_trampoline_br_end;
+#ifdef CONFIG_EXPOLINE
+	if (!nospec_disable) {
+		tstart = ftrace_shared_hotpatch_trampoline_exrl;
+		tend = ftrace_shared_hotpatch_trampoline_exrl_end;
+	}
+#endif /* CONFIG_EXPOLINE */
+	if (end)
+		*end = tend;
+	return tstart;
+}
+
+bool ftrace_need_init_nop(void)
+{
+	return true;
+}
+
+int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
+{
+	static struct ftrace_hotpatch_trampoline *next_vmlinux_trampoline =
+		__ftrace_hotpatch_trampolines_start;
+	static const char orig[6] = { 0xc0, 0x04, 0x00, 0x00, 0x00, 0x00 };
+	static struct ftrace_hotpatch_trampoline *trampoline;
+	struct ftrace_hotpatch_trampoline **next_trampoline;
+	struct ftrace_hotpatch_trampoline *trampolines_end;
+	struct ftrace_hotpatch_trampoline tmp;
+	struct ftrace_insn *insn;
+	const char *shared;
+	s32 disp;
+
+	BUILD_BUG_ON(sizeof(struct ftrace_hotpatch_trampoline) !=
+		     SIZEOF_FTRACE_HOTPATCH_TRAMPOLINE);
+
+	next_trampoline = &next_vmlinux_trampoline;
+	trampolines_end = __ftrace_hotpatch_trampolines_end;
+	shared = ftrace_shared_hotpatch_trampoline(NULL);
+#ifdef CONFIG_MODULES
+	if (mod) {
+		next_trampoline = &mod->arch.next_trampoline;
+		trampolines_end = mod->arch.trampolines_end;
+		shared = ftrace_plt;
+	}
+#endif
+
+	if (WARN_ON_ONCE(*next_trampoline >= trampolines_end))
+		return -ENOMEM;
+	trampoline = (*next_trampoline)++;
+
+	/* Check for the compiler-generated fentry nop (brcl 0, .). */
+	if (WARN_ON_ONCE(memcmp((const void *)rec->ip, &orig, sizeof(orig))))
+		return -EINVAL;
+
+	/* Generate the trampoline. */
+	tmp.brasl_opc = 0xc015; /* brasl %r1, shared */
+	tmp.brasl_disp = (shared - (const char *)&trampoline->brasl_opc) / 2;
+	tmp.interceptor = FTRACE_ADDR;
+	tmp.rest_of_intercepted_function = rec->ip + sizeof(struct ftrace_insn);
+	s390_kernel_write(trampoline, &tmp, sizeof(tmp));
+
+	/* Generate a jump to the trampoline. */
+	disp = ((char *)trampoline - (char *)rec->ip) / 2;
+	insn = (struct ftrace_insn *)rec->ip;
+	s390_kernel_write(&insn->disp, &disp, sizeof(disp));
+
+	return 0;
+}
+
+static struct ftrace_hotpatch_trampoline *ftrace_get_trampoline(struct dyn_ftrace *rec)
+{
+	struct ftrace_hotpatch_trampoline *trampoline;
+	struct ftrace_insn insn;
+	s64 disp;
+	u16 opc;
+
+	if (copy_from_kernel_nofault(&insn, (void *)rec->ip, sizeof(insn)))
+		return ERR_PTR(-EFAULT);
+	disp = (s64)insn.disp * 2;
+	trampoline = (void *)(rec->ip + disp);
+	if (get_kernel_nofault(opc, &trampoline->brasl_opc))
+		return ERR_PTR(-EFAULT);
+	if (opc != 0xc015)
+		return ERR_PTR(-EINVAL);
+	return trampoline;
+}
+
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+		       unsigned long addr)
+{
+	struct ftrace_hotpatch_trampoline *trampoline;
+	u64 old;
+
+	trampoline = ftrace_get_trampoline(rec);
+	if (IS_ERR(trampoline))
+		return PTR_ERR(trampoline);
+	if (get_kernel_nofault(old, &trampoline->interceptor))
+		return -EFAULT;
+	if (old != old_addr)
+		return -EINVAL;
+	s390_kernel_write(&trampoline->interceptor, &addr, sizeof(addr));
+	return 0;
+}
+
+static int ftrace_patch_branch_mask(void *addr, u16 expected, bool enable)
+{
+	u16 old;
+	u8 op;
+
+	if (get_kernel_nofault(old, addr))
+		return -EFAULT;
+	if (old != expected)
+		return -EINVAL;
+	/* set mask field to all ones or zeroes */
+	op = enable ? 0xf4 : 0x04;
+	s390_kernel_write((char *)addr + 1, &op, sizeof(op));
+	return 0;
+}
+
+int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
+		    unsigned long addr)
+{
+	/* Expect brcl 0xf,... */
+	return ftrace_patch_branch_mask((void *)rec->ip, 0xc0f4, false);
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	struct ftrace_hotpatch_trampoline *trampoline;
+
+	trampoline = ftrace_get_trampoline(rec);
+	if (IS_ERR(trampoline))
+		return PTR_ERR(trampoline);
+	s390_kernel_write(&trampoline->interceptor, &addr, sizeof(addr));
+	/* Expect brcl 0x0,... */
+	return ftrace_patch_branch_mask((void *)rec->ip, 0xc004, true);
+}
+
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+	ftrace_func = func;
+	return 0;
+}
+
+void arch_ftrace_update_code(int command)
+{
+	ftrace_modify_all_code(command);
+}
+
+void ftrace_arch_code_modify_post_process(void)
+{
+	/*
+	 * Flush any pre-fetched instructions on all
+	 * CPUs to make the new code visible.
+	 */
+	text_poke_sync_lock();
+}
+
+#ifdef CONFIG_MODULES
+
+static int __init ftrace_plt_init(void)
+{
+	const char *start, *end;
+
+	ftrace_plt = module_alloc(PAGE_SIZE);
+	if (!ftrace_plt)
+		panic("cannot allocate ftrace plt\n");
+
+	start = ftrace_shared_hotpatch_trampoline(&end);
+	memcpy(ftrace_plt, start, end - start);
+	set_memory_rox((unsigned long)ftrace_plt, 1);
+	return 0;
+}
+device_initcall(ftrace_plt_init);
+
+#endif /* CONFIG_MODULES */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+/*
+ * Hook the return address and push it in the stack of return addresses
+ * in current thread info.
+ */
+unsigned long prepare_ftrace_return(unsigned long ra, unsigned long sp,
+				    unsigned long ip)
+{
+	if (unlikely(ftrace_graph_is_dead()))
+		goto out;
+	if (unlikely(atomic_read(&current->tracing_graph_pause)))
+		goto out;
+	ip -= MCOUNT_INSN_SIZE;
+	if (!function_graph_enter(ra, ip, 0, (void *) sp))
+		ra = (unsigned long) return_to_handler;
+out:
+	return ra;
+}
+NOKPROBE_SYMBOL(prepare_ftrace_return);
+
+/*
+ * Patch the kernel code at ftrace_graph_caller location. The instruction
+ * there is branch relative on condition. To enable the ftrace graph code
+ * block, we simply patch the mask field of the instruction to zero and
+ * turn the instruction into a nop.
+ * To disable the ftrace graph code the mask field will be patched to
+ * all ones, which turns the instruction into an unconditional branch.
+ */
+int ftrace_enable_ftrace_graph_caller(void)
+{
+	int rc;
+
+	/* Expect brc 0xf,... */
+	rc = ftrace_patch_branch_mask(ftrace_graph_caller, 0xa7f4, false);
+	if (rc)
+		return rc;
+	text_poke_sync_lock();
+	return 0;
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+	int rc;
+
+	/* Expect brc 0x0,... */
+	rc = ftrace_patch_branch_mask(ftrace_graph_caller, 0xa704, true);
+	if (rc)
+		return rc;
+	text_poke_sync_lock();
+	return 0;
+}
+
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+#ifdef CONFIG_KPROBES_ON_FTRACE
+void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
+		struct ftrace_ops *ops, struct ftrace_regs *fregs)
+{
+	struct kprobe_ctlblk *kcb;
+	struct pt_regs *regs;
+	struct kprobe *p;
+	int bit;
+
+	bit = ftrace_test_recursion_trylock(ip, parent_ip);
+	if (bit < 0)
+		return;
+
+	regs = ftrace_get_regs(fregs);
+	p = get_kprobe((kprobe_opcode_t *)ip);
+	if (!regs || unlikely(!p) || kprobe_disabled(p))
+		goto out;
+
+	if (kprobe_running()) {
+		kprobes_inc_nmissed_count(p);
+		goto out;
+	}
+
+	__this_cpu_write(current_kprobe, p);
+
+	kcb = get_kprobe_ctlblk();
+	kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+
+	instruction_pointer_set(regs, ip);
+
+	if (!p->pre_handler || !p->pre_handler(p, regs)) {
+
+		instruction_pointer_set(regs, ip + MCOUNT_INSN_SIZE);
+
+		if (unlikely(p->post_handler)) {
+			kcb->kprobe_status = KPROBE_HIT_SSDONE;
+			p->post_handler(p, regs, 0);
+		}
+	}
+	__this_cpu_write(current_kprobe, NULL);
+out:
+	ftrace_test_recursion_unlock(bit);
+}
+NOKPROBE_SYMBOL(kprobe_ftrace_handler);
+
+int arch_prepare_kprobe_ftrace(struct kprobe *p)
+{
+	p->ainsn.insn = NULL;
+	return 0;
+}
+#endif
diff --git a/arch/s390/kernel/ftrace.h b/arch/s390/kernel/ftrace.h
new file mode 100644
index 0000000000..7f75a96164
--- /dev/null
+++ b/arch/s390/kernel/ftrace.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _FTRACE_H
+#define _FTRACE_H
+
+#include <asm/types.h>
+
+struct ftrace_hotpatch_trampoline {
+	u16 brasl_opc;
+	s32 brasl_disp;
+	s16: 16;
+	u64 rest_of_intercepted_function;
+	u64 interceptor;
+} __packed;
+
+extern struct ftrace_hotpatch_trampoline __ftrace_hotpatch_trampolines_start[];
+extern struct ftrace_hotpatch_trampoline __ftrace_hotpatch_trampolines_end[];
+extern const char ftrace_shared_hotpatch_trampoline_br[];
+extern const char ftrace_shared_hotpatch_trampoline_br_end[];
+extern const char ftrace_shared_hotpatch_trampoline_exrl[];
+extern const char ftrace_shared_hotpatch_trampoline_exrl_end[];
+extern const char ftrace_plt_template[];
+extern const char ftrace_plt_template_end[];
+
+#endif /* _FTRACE_H */
diff --git a/arch/s390/kernel/guarded_storage.c b/arch/s390/kernel/guarded_storage.c
new file mode 100644
index 0000000000..d14dd1c2e5
--- /dev/null
+++ b/arch/s390/kernel/guarded_storage.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/syscalls.h>
+#include <linux/signal.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <asm/guarded_storage.h>
+#include "entry.h"
+
+void guarded_storage_release(struct task_struct *tsk)
+{
+	kfree(tsk->thread.gs_cb);
+	kfree(tsk->thread.gs_bc_cb);
+}
+
+static int gs_enable(void)
+{
+	struct gs_cb *gs_cb;
+
+	if (!current->thread.gs_cb) {
+		gs_cb = kzalloc(sizeof(*gs_cb), GFP_KERNEL);
+		if (!gs_cb)
+			return -ENOMEM;
+		gs_cb->gsd = 25;
+		preempt_disable();
+		__ctl_set_bit(2, 4);
+		load_gs_cb(gs_cb);
+		current->thread.gs_cb = gs_cb;
+		preempt_enable();
+	}
+	return 0;
+}
+
+static int gs_disable(void)
+{
+	if (current->thread.gs_cb) {
+		preempt_disable();
+		kfree(current->thread.gs_cb);
+		current->thread.gs_cb = NULL;
+		__ctl_clear_bit(2, 4);
+		preempt_enable();
+	}
+	return 0;
+}
+
+static int gs_set_bc_cb(struct gs_cb __user *u_gs_cb)
+{
+	struct gs_cb *gs_cb;
+
+	gs_cb = current->thread.gs_bc_cb;
+	if (!gs_cb) {
+		gs_cb = kzalloc(sizeof(*gs_cb), GFP_KERNEL);
+		if (!gs_cb)
+			return -ENOMEM;
+		current->thread.gs_bc_cb = gs_cb;
+	}
+	if (copy_from_user(gs_cb, u_gs_cb, sizeof(*gs_cb)))
+		return -EFAULT;
+	return 0;
+}
+
+static int gs_clear_bc_cb(void)
+{
+	struct gs_cb *gs_cb;
+
+	gs_cb = current->thread.gs_bc_cb;
+	current->thread.gs_bc_cb = NULL;
+	kfree(gs_cb);
+	return 0;
+}
+
+void gs_load_bc_cb(struct pt_regs *regs)
+{
+	struct gs_cb *gs_cb;
+
+	preempt_disable();
+	clear_thread_flag(TIF_GUARDED_STORAGE);
+	gs_cb = current->thread.gs_bc_cb;
+	if (gs_cb) {
+		kfree(current->thread.gs_cb);
+		current->thread.gs_bc_cb = NULL;
+		__ctl_set_bit(2, 4);
+		load_gs_cb(gs_cb);
+		current->thread.gs_cb = gs_cb;
+	}
+	preempt_enable();
+}
+
+static int gs_broadcast(void)
+{
+	struct task_struct *sibling;
+
+	read_lock(&tasklist_lock);
+	for_each_thread(current, sibling) {
+		if (!sibling->thread.gs_bc_cb)
+			continue;
+		if (test_and_set_tsk_thread_flag(sibling, TIF_GUARDED_STORAGE))
+			kick_process(sibling);
+	}
+	read_unlock(&tasklist_lock);
+	return 0;
+}
+
+SYSCALL_DEFINE2(s390_guarded_storage, int, command,
+		struct gs_cb __user *, gs_cb)
+{
+	if (!MACHINE_HAS_GS)
+		return -EOPNOTSUPP;
+	switch (command) {
+	case GS_ENABLE:
+		return gs_enable();
+	case GS_DISABLE:
+		return gs_disable();
+	case GS_SET_BC_CB:
+		return gs_set_bc_cb(gs_cb);
+	case GS_CLEAR_BC_CB:
+		return gs_clear_bc_cb();
+	case GS_BROADCAST:
+		return gs_broadcast();
+	default:
+		return -EINVAL;
+	}
+}
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
new file mode 100644
index 0000000000..45413b04ef
--- /dev/null
+++ b/arch/s390/kernel/head64.S
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 1999, 2010
+ *
+ *   Author(s):	Hartmut Penner <hp@de.ibm.com>
+ *		Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *		Rob van der Heij <rvdhei@iae.nl>
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/thread_info.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+
+__HEAD
+SYM_CODE_START(startup_continue)
+	larl	%r1,tod_clock_base
+	mvc	0(16,%r1),__LC_BOOT_CLOCK
+#
+# Setup stack
+#
+	larl	%r14,init_task
+	stg	%r14,__LC_CURRENT
+	larl	%r15,init_thread_union+STACK_INIT_OFFSET
+	stg	%r15,__LC_KERNEL_STACK
+	brasl	%r14,sclp_early_adjust_va	# allow sclp_early_printk
+	brasl	%r14,startup_init		# s390 specific early init
+	brasl	%r14,start_kernel		# common init code
+#
+# We returned from start_kernel ?!? PANIK
+#
+	basr	%r13,0
+	lpswe	dw_psw-.(%r13)		# load disabled wait psw
+SYM_CODE_END(startup_continue)
+
+	.balign	16
+SYM_DATA_LOCAL(dw_psw,	.quad 0x0002000180000000,0x0000000000000000)
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
new file mode 100644
index 0000000000..e7239aaf42
--- /dev/null
+++ b/arch/s390/kernel/idle.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Idle functions for s390.
+ *
+ * Copyright IBM Corp. 2014
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <trace/events/power.h>
+#include <asm/cpu_mf.h>
+#include <asm/cputime.h>
+#include <asm/nmi.h>
+#include <asm/smp.h>
+#include "entry.h"
+
+static DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
+
+void account_idle_time_irq(void)
+{
+	struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
+	unsigned long idle_time;
+	u64 cycles_new[8];
+	int i;
+
+	if (smp_cpu_mtid) {
+		stcctm(MT_DIAG, smp_cpu_mtid, cycles_new);
+		for (i = 0; i < smp_cpu_mtid; i++)
+			this_cpu_add(mt_cycles[i], cycles_new[i] - idle->mt_cycles_enter[i]);
+	}
+
+	idle_time = S390_lowcore.int_clock - idle->clock_idle_enter;
+
+	S390_lowcore.steal_timer += idle->clock_idle_enter - S390_lowcore.last_update_clock;
+	S390_lowcore.last_update_clock = S390_lowcore.int_clock;
+
+	S390_lowcore.system_timer += S390_lowcore.last_update_timer - idle->timer_idle_enter;
+	S390_lowcore.last_update_timer = S390_lowcore.sys_enter_timer;
+
+	/* Account time spent with enabled wait psw loaded as idle time. */
+	WRITE_ONCE(idle->idle_time, READ_ONCE(idle->idle_time) + idle_time);
+	WRITE_ONCE(idle->idle_count, READ_ONCE(idle->idle_count) + 1);
+	account_idle_time(cputime_to_nsecs(idle_time));
+}
+
+void noinstr arch_cpu_idle(void)
+{
+	struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
+	unsigned long psw_mask;
+
+	/* Wait for external, I/O or machine check interrupt. */
+	psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT |
+		   PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
+	clear_cpu_flag(CIF_NOHZ_DELAY);
+
+	/* psw_idle() returns with interrupts disabled. */
+	psw_idle(idle, psw_mask);
+}
+
+static ssize_t show_idle_count(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
+
+	return sysfs_emit(buf, "%lu\n", READ_ONCE(idle->idle_count));
+}
+DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL);
+
+static ssize_t show_idle_time(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
+
+	return sysfs_emit(buf, "%lu\n", READ_ONCE(idle->idle_time) >> 12);
+}
+DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
+
+void arch_cpu_idle_enter(void)
+{
+}
+
+void arch_cpu_idle_exit(void)
+{
+}
+
+void __noreturn arch_cpu_idle_dead(void)
+{
+	cpu_die();
+}
diff --git a/arch/s390/kernel/ima_arch.c b/arch/s390/kernel/ima_arch.c
new file mode 100644
index 0000000000..f3c3e6e1c5
--- /dev/null
+++ b/arch/s390/kernel/ima_arch.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/ima.h>
+#include <asm/boot_data.h>
+
+bool arch_ima_get_secureboot(void)
+{
+	return ipl_secure_flag;
+}
+
+const char * const *arch_get_ima_policy(void)
+{
+	return NULL;
+}
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
new file mode 100644
index 0000000000..8d0b95c173
--- /dev/null
+++ b/arch/s390/kernel/ipl.c
@@ -0,0 +1,2522 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    ipl/reipl/dump support for Linux on s390.
+ *
+ *    Copyright IBM Corp. 2005, 2012
+ *    Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ *		 Volker Sameske <sameske@de.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/kstrtox.h>
+#include <linux/panic_notifier.h>
+#include <linux/reboot.h>
+#include <linux/ctype.h>
+#include <linux/fs.h>
+#include <linux/gfp.h>
+#include <linux/crash_dump.h>
+#include <linux/debug_locks.h>
+#include <asm/asm-extable.h>
+#include <asm/diag.h>
+#include <asm/ipl.h>
+#include <asm/smp.h>
+#include <asm/setup.h>
+#include <asm/cpcmd.h>
+#include <asm/ebcdic.h>
+#include <asm/sclp.h>
+#include <asm/checksum.h>
+#include <asm/debug.h>
+#include <asm/abs_lowcore.h>
+#include <asm/os_info.h>
+#include <asm/sections.h>
+#include <asm/boot_data.h>
+#include "entry.h"
+
+#define IPL_PARM_BLOCK_VERSION 0
+
+#define IPL_UNKNOWN_STR		"unknown"
+#define IPL_CCW_STR		"ccw"
+#define IPL_ECKD_STR		"eckd"
+#define IPL_ECKD_DUMP_STR	"eckd_dump"
+#define IPL_FCP_STR		"fcp"
+#define IPL_FCP_DUMP_STR	"fcp_dump"
+#define IPL_NVME_STR		"nvme"
+#define IPL_NVME_DUMP_STR	"nvme_dump"
+#define IPL_NSS_STR		"nss"
+
+#define DUMP_CCW_STR		"ccw"
+#define DUMP_ECKD_STR		"eckd"
+#define DUMP_FCP_STR		"fcp"
+#define DUMP_NVME_STR		"nvme"
+#define DUMP_NONE_STR		"none"
+
+/*
+ * Four shutdown trigger types are supported:
+ * - panic
+ * - halt
+ * - power off
+ * - reipl
+ * - restart
+ */
+#define ON_PANIC_STR		"on_panic"
+#define ON_HALT_STR		"on_halt"
+#define ON_POFF_STR		"on_poff"
+#define ON_REIPL_STR		"on_reboot"
+#define ON_RESTART_STR		"on_restart"
+
+struct shutdown_action;
+struct shutdown_trigger {
+	char *name;
+	struct shutdown_action *action;
+};
+
+/*
+ * The following shutdown action types are supported:
+ */
+#define SHUTDOWN_ACTION_IPL_STR		"ipl"
+#define SHUTDOWN_ACTION_REIPL_STR	"reipl"
+#define SHUTDOWN_ACTION_DUMP_STR	"dump"
+#define SHUTDOWN_ACTION_VMCMD_STR	"vmcmd"
+#define SHUTDOWN_ACTION_STOP_STR	"stop"
+#define SHUTDOWN_ACTION_DUMP_REIPL_STR	"dump_reipl"
+
+struct shutdown_action {
+	char *name;
+	void (*fn) (struct shutdown_trigger *trigger);
+	int (*init) (void);
+	int init_rc;
+};
+
+static char *ipl_type_str(enum ipl_type type)
+{
+	switch (type) {
+	case IPL_TYPE_CCW:
+		return IPL_CCW_STR;
+	case IPL_TYPE_ECKD:
+		return IPL_ECKD_STR;
+	case IPL_TYPE_ECKD_DUMP:
+		return IPL_ECKD_DUMP_STR;
+	case IPL_TYPE_FCP:
+		return IPL_FCP_STR;
+	case IPL_TYPE_FCP_DUMP:
+		return IPL_FCP_DUMP_STR;
+	case IPL_TYPE_NSS:
+		return IPL_NSS_STR;
+	case IPL_TYPE_NVME:
+		return IPL_NVME_STR;
+	case IPL_TYPE_NVME_DUMP:
+		return IPL_NVME_DUMP_STR;
+	case IPL_TYPE_UNKNOWN:
+	default:
+		return IPL_UNKNOWN_STR;
+	}
+}
+
+enum dump_type {
+	DUMP_TYPE_NONE	= 1,
+	DUMP_TYPE_CCW	= 2,
+	DUMP_TYPE_FCP	= 4,
+	DUMP_TYPE_NVME	= 8,
+	DUMP_TYPE_ECKD	= 16,
+};
+
+static char *dump_type_str(enum dump_type type)
+{
+	switch (type) {
+	case DUMP_TYPE_NONE:
+		return DUMP_NONE_STR;
+	case DUMP_TYPE_CCW:
+		return DUMP_CCW_STR;
+	case DUMP_TYPE_ECKD:
+		return DUMP_ECKD_STR;
+	case DUMP_TYPE_FCP:
+		return DUMP_FCP_STR;
+	case DUMP_TYPE_NVME:
+		return DUMP_NVME_STR;
+	default:
+		return NULL;
+	}
+}
+
+int __bootdata_preserved(ipl_block_valid);
+struct ipl_parameter_block __bootdata_preserved(ipl_block);
+int __bootdata_preserved(ipl_secure_flag);
+
+unsigned long __bootdata_preserved(ipl_cert_list_addr);
+unsigned long __bootdata_preserved(ipl_cert_list_size);
+
+unsigned long __bootdata(early_ipl_comp_list_addr);
+unsigned long __bootdata(early_ipl_comp_list_size);
+
+static int reipl_capabilities = IPL_TYPE_UNKNOWN;
+
+static enum ipl_type reipl_type = IPL_TYPE_UNKNOWN;
+static struct ipl_parameter_block *reipl_block_fcp;
+static struct ipl_parameter_block *reipl_block_nvme;
+static struct ipl_parameter_block *reipl_block_ccw;
+static struct ipl_parameter_block *reipl_block_eckd;
+static struct ipl_parameter_block *reipl_block_nss;
+static struct ipl_parameter_block *reipl_block_actual;
+
+static int dump_capabilities = DUMP_TYPE_NONE;
+static enum dump_type dump_type = DUMP_TYPE_NONE;
+static struct ipl_parameter_block *dump_block_fcp;
+static struct ipl_parameter_block *dump_block_nvme;
+static struct ipl_parameter_block *dump_block_ccw;
+static struct ipl_parameter_block *dump_block_eckd;
+
+static struct sclp_ipl_info sclp_ipl_info;
+
+static bool reipl_nvme_clear;
+static bool reipl_fcp_clear;
+static bool reipl_ccw_clear;
+static bool reipl_eckd_clear;
+
+static unsigned long os_info_flags;
+
+static inline int __diag308(unsigned long subcode, unsigned long addr)
+{
+	union register_pair r1;
+
+	r1.even = addr;
+	r1.odd	= 0;
+	asm volatile(
+		"	diag	%[r1],%[subcode],0x308\n"
+		"0:	nopr	%%r7\n"
+		EX_TABLE(0b,0b)
+		: [r1] "+&d" (r1.pair)
+		: [subcode] "d" (subcode)
+		: "cc", "memory");
+	return r1.odd;
+}
+
+int diag308(unsigned long subcode, void *addr)
+{
+	diag_stat_inc(DIAG_STAT_X308);
+	return __diag308(subcode, addr ? virt_to_phys(addr) : 0);
+}
+EXPORT_SYMBOL_GPL(diag308);
+
+/* SYSFS */
+
+#define IPL_ATTR_SHOW_FN(_prefix, _name, _format, args...)		\
+static ssize_t sys_##_prefix##_##_name##_show(struct kobject *kobj,	\
+		struct kobj_attribute *attr,				\
+		char *page)						\
+{									\
+	return scnprintf(page, PAGE_SIZE, _format, ##args);		\
+}
+
+#define IPL_ATTR_CCW_STORE_FN(_prefix, _name, _ipl_blk)			\
+static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj,	\
+		struct kobj_attribute *attr,				\
+		const char *buf, size_t len)				\
+{									\
+	unsigned long long ssid, devno;					\
+									\
+	if (sscanf(buf, "0.%llx.%llx\n", &ssid, &devno) != 2)		\
+		return -EINVAL;						\
+									\
+	if (ssid > __MAX_SSID || devno > __MAX_SUBCHANNEL)		\
+		return -EINVAL;						\
+									\
+	_ipl_blk.ssid = ssid;						\
+	_ipl_blk.devno = devno;						\
+	return len;							\
+}
+
+#define DEFINE_IPL_CCW_ATTR_RW(_prefix, _name, _ipl_blk)		\
+IPL_ATTR_SHOW_FN(_prefix, _name, "0.%x.%04x\n",				\
+		 _ipl_blk.ssid, _ipl_blk.devno);			\
+IPL_ATTR_CCW_STORE_FN(_prefix, _name, _ipl_blk);			\
+static struct kobj_attribute sys_##_prefix##_##_name##_attr =		\
+	__ATTR(_name, 0644,						\
+	       sys_##_prefix##_##_name##_show,				\
+	       sys_##_prefix##_##_name##_store)				\
+
+#define DEFINE_IPL_ATTR_RO(_prefix, _name, _format, _value)		\
+IPL_ATTR_SHOW_FN(_prefix, _name, _format, _value)			\
+static struct kobj_attribute sys_##_prefix##_##_name##_attr =		\
+	__ATTR(_name, 0444, sys_##_prefix##_##_name##_show, NULL)
+
+#define DEFINE_IPL_ATTR_RW(_prefix, _name, _fmt_out, _fmt_in, _value)	\
+IPL_ATTR_SHOW_FN(_prefix, _name, _fmt_out, (unsigned long long) _value)	\
+static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj,	\
+		struct kobj_attribute *attr,				\
+		const char *buf, size_t len)				\
+{									\
+	unsigned long long value;					\
+	if (sscanf(buf, _fmt_in, &value) != 1)				\
+		return -EINVAL;						\
+	_value = value;							\
+	return len;							\
+}									\
+static struct kobj_attribute sys_##_prefix##_##_name##_attr =		\
+	__ATTR(_name, 0644,						\
+			sys_##_prefix##_##_name##_show,			\
+			sys_##_prefix##_##_name##_store)
+
+#define DEFINE_IPL_ATTR_STR_RW(_prefix, _name, _fmt_out, _fmt_in, _value)\
+IPL_ATTR_SHOW_FN(_prefix, _name, _fmt_out, _value)			\
+static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj,	\
+		struct kobj_attribute *attr,				\
+		const char *buf, size_t len)				\
+{									\
+	strscpy(_value, buf, sizeof(_value));				\
+	strim(_value);							\
+	return len;							\
+}									\
+static struct kobj_attribute sys_##_prefix##_##_name##_attr =		\
+	__ATTR(_name, 0644,						\
+			sys_##_prefix##_##_name##_show,			\
+			sys_##_prefix##_##_name##_store)
+
+/*
+ * ipl section
+ */
+
+static __init enum ipl_type get_ipl_type(void)
+{
+	if (!ipl_block_valid)
+		return IPL_TYPE_UNKNOWN;
+
+	switch (ipl_block.pb0_hdr.pbt) {
+	case IPL_PBT_CCW:
+		return IPL_TYPE_CCW;
+	case IPL_PBT_FCP:
+		if (ipl_block.fcp.opt == IPL_PB0_FCP_OPT_DUMP)
+			return IPL_TYPE_FCP_DUMP;
+		else
+			return IPL_TYPE_FCP;
+	case IPL_PBT_NVME:
+		if (ipl_block.nvme.opt == IPL_PB0_NVME_OPT_DUMP)
+			return IPL_TYPE_NVME_DUMP;
+		else
+			return IPL_TYPE_NVME;
+	case IPL_PBT_ECKD:
+		if (ipl_block.eckd.opt == IPL_PB0_ECKD_OPT_DUMP)
+			return IPL_TYPE_ECKD_DUMP;
+		else
+			return IPL_TYPE_ECKD;
+	}
+	return IPL_TYPE_UNKNOWN;
+}
+
+struct ipl_info ipl_info;
+EXPORT_SYMBOL_GPL(ipl_info);
+
+static ssize_t ipl_type_show(struct kobject *kobj, struct kobj_attribute *attr,
+			     char *page)
+{
+	return sprintf(page, "%s\n", ipl_type_str(ipl_info.type));
+}
+
+static struct kobj_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type);
+
+static ssize_t ipl_secure_show(struct kobject *kobj,
+			       struct kobj_attribute *attr, char *page)
+{
+	return sprintf(page, "%i\n", !!ipl_secure_flag);
+}
+
+static struct kobj_attribute sys_ipl_secure_attr =
+	__ATTR(secure, 0444, ipl_secure_show, NULL);
+
+static ssize_t ipl_has_secure_show(struct kobject *kobj,
+				   struct kobj_attribute *attr, char *page)
+{
+	return sprintf(page, "%i\n", !!sclp.has_sipl);
+}
+
+static struct kobj_attribute sys_ipl_has_secure_attr =
+	__ATTR(has_secure, 0444, ipl_has_secure_show, NULL);
+
+static ssize_t ipl_vm_parm_show(struct kobject *kobj,
+				struct kobj_attribute *attr, char *page)
+{
+	char parm[DIAG308_VMPARM_SIZE + 1] = {};
+
+	if (ipl_block_valid && (ipl_block.pb0_hdr.pbt == IPL_PBT_CCW))
+		ipl_block_get_ascii_vmparm(parm, sizeof(parm), &ipl_block);
+	return sprintf(page, "%s\n", parm);
+}
+
+static struct kobj_attribute sys_ipl_vm_parm_attr =
+	__ATTR(parm, 0444, ipl_vm_parm_show, NULL);
+
+static ssize_t sys_ipl_device_show(struct kobject *kobj,
+				   struct kobj_attribute *attr, char *page)
+{
+	switch (ipl_info.type) {
+	case IPL_TYPE_CCW:
+		return sprintf(page, "0.%x.%04x\n", ipl_block.ccw.ssid,
+			       ipl_block.ccw.devno);
+	case IPL_TYPE_ECKD:
+	case IPL_TYPE_ECKD_DUMP:
+		return sprintf(page, "0.%x.%04x\n", ipl_block.eckd.ssid,
+			       ipl_block.eckd.devno);
+	case IPL_TYPE_FCP:
+	case IPL_TYPE_FCP_DUMP:
+		return sprintf(page, "0.0.%04x\n", ipl_block.fcp.devno);
+	case IPL_TYPE_NVME:
+	case IPL_TYPE_NVME_DUMP:
+		return sprintf(page, "%08ux\n", ipl_block.nvme.fid);
+	default:
+		return 0;
+	}
+}
+
+static struct kobj_attribute sys_ipl_device_attr =
+	__ATTR(device, 0444, sys_ipl_device_show, NULL);
+
+static ssize_t ipl_parameter_read(struct file *filp, struct kobject *kobj,
+				  struct bin_attribute *attr, char *buf,
+				  loff_t off, size_t count)
+{
+	return memory_read_from_buffer(buf, count, &off, &ipl_block,
+				       ipl_block.hdr.len);
+}
+static struct bin_attribute ipl_parameter_attr =
+	__BIN_ATTR(binary_parameter, 0444, ipl_parameter_read, NULL,
+		   PAGE_SIZE);
+
+static ssize_t ipl_scp_data_read(struct file *filp, struct kobject *kobj,
+				 struct bin_attribute *attr, char *buf,
+				 loff_t off, size_t count)
+{
+	unsigned int size = ipl_block.fcp.scp_data_len;
+	void *scp_data = &ipl_block.fcp.scp_data;
+
+	return memory_read_from_buffer(buf, count, &off, scp_data, size);
+}
+
+static ssize_t ipl_nvme_scp_data_read(struct file *filp, struct kobject *kobj,
+				 struct bin_attribute *attr, char *buf,
+				 loff_t off, size_t count)
+{
+	unsigned int size = ipl_block.nvme.scp_data_len;
+	void *scp_data = &ipl_block.nvme.scp_data;
+
+	return memory_read_from_buffer(buf, count, &off, scp_data, size);
+}
+
+static ssize_t ipl_eckd_scp_data_read(struct file *filp, struct kobject *kobj,
+				      struct bin_attribute *attr, char *buf,
+				      loff_t off, size_t count)
+{
+	unsigned int size = ipl_block.eckd.scp_data_len;
+	void *scp_data = &ipl_block.eckd.scp_data;
+
+	return memory_read_from_buffer(buf, count, &off, scp_data, size);
+}
+
+static struct bin_attribute ipl_scp_data_attr =
+	__BIN_ATTR(scp_data, 0444, ipl_scp_data_read, NULL, PAGE_SIZE);
+
+static struct bin_attribute ipl_nvme_scp_data_attr =
+	__BIN_ATTR(scp_data, 0444, ipl_nvme_scp_data_read, NULL, PAGE_SIZE);
+
+static struct bin_attribute ipl_eckd_scp_data_attr =
+	__BIN_ATTR(scp_data, 0444, ipl_eckd_scp_data_read, NULL, PAGE_SIZE);
+
+static struct bin_attribute *ipl_fcp_bin_attrs[] = {
+	&ipl_parameter_attr,
+	&ipl_scp_data_attr,
+	NULL,
+};
+
+static struct bin_attribute *ipl_nvme_bin_attrs[] = {
+	&ipl_parameter_attr,
+	&ipl_nvme_scp_data_attr,
+	NULL,
+};
+
+static struct bin_attribute *ipl_eckd_bin_attrs[] = {
+	&ipl_parameter_attr,
+	&ipl_eckd_scp_data_attr,
+	NULL,
+};
+
+/* FCP ipl device attributes */
+
+DEFINE_IPL_ATTR_RO(ipl_fcp, wwpn, "0x%016llx\n",
+		   (unsigned long long)ipl_block.fcp.wwpn);
+DEFINE_IPL_ATTR_RO(ipl_fcp, lun, "0x%016llx\n",
+		   (unsigned long long)ipl_block.fcp.lun);
+DEFINE_IPL_ATTR_RO(ipl_fcp, bootprog, "%lld\n",
+		   (unsigned long long)ipl_block.fcp.bootprog);
+DEFINE_IPL_ATTR_RO(ipl_fcp, br_lba, "%lld\n",
+		   (unsigned long long)ipl_block.fcp.br_lba);
+
+/* NVMe ipl device attributes */
+DEFINE_IPL_ATTR_RO(ipl_nvme, fid, "0x%08llx\n",
+		   (unsigned long long)ipl_block.nvme.fid);
+DEFINE_IPL_ATTR_RO(ipl_nvme, nsid, "0x%08llx\n",
+		   (unsigned long long)ipl_block.nvme.nsid);
+DEFINE_IPL_ATTR_RO(ipl_nvme, bootprog, "%lld\n",
+		   (unsigned long long)ipl_block.nvme.bootprog);
+DEFINE_IPL_ATTR_RO(ipl_nvme, br_lba, "%lld\n",
+		   (unsigned long long)ipl_block.nvme.br_lba);
+
+/* ECKD ipl device attributes */
+DEFINE_IPL_ATTR_RO(ipl_eckd, bootprog, "%lld\n",
+		   (unsigned long long)ipl_block.eckd.bootprog);
+
+#define IPL_ATTR_BR_CHR_SHOW_FN(_name, _ipb)				\
+static ssize_t eckd_##_name##_br_chr_show(struct kobject *kobj,		\
+					  struct kobj_attribute *attr,	\
+					  char *buf)			\
+{									\
+	struct ipl_pb0_eckd *ipb = &(_ipb);				\
+									\
+	if (!ipb->br_chr.cyl &&						\
+	    !ipb->br_chr.head &&					\
+	    !ipb->br_chr.record)					\
+		return sprintf(buf, "auto\n");				\
+									\
+	return sprintf(buf, "0x%x,0x%x,0x%x\n",				\
+			ipb->br_chr.cyl,				\
+			ipb->br_chr.head,				\
+			ipb->br_chr.record);				\
+}
+
+#define IPL_ATTR_BR_CHR_STORE_FN(_name, _ipb)				\
+static ssize_t eckd_##_name##_br_chr_store(struct kobject *kobj,	\
+					   struct kobj_attribute *attr,	\
+					   const char *buf, size_t len)	\
+{									\
+	struct ipl_pb0_eckd *ipb = &(_ipb);				\
+	unsigned long args[3] = { 0 };					\
+	char *p, *p1, *tmp = NULL;					\
+	int i, rc;							\
+									\
+	if (!strncmp(buf, "auto", 4))					\
+		goto out;						\
+									\
+	tmp = kstrdup(buf, GFP_KERNEL);					\
+	p = tmp;							\
+	for (i = 0; i < 3; i++) {					\
+		p1 = strsep(&p, ", ");					\
+		if (!p1) {						\
+			rc = -EINVAL;					\
+			goto err;					\
+		}							\
+		rc = kstrtoul(p1, 0, args + i);				\
+		if (rc)							\
+			goto err;					\
+	}								\
+									\
+	rc = -EINVAL;							\
+	if (i != 3)							\
+		goto err;						\
+									\
+	if ((args[0] || args[1]) && !args[2])				\
+		goto err;						\
+									\
+	if (args[0] > UINT_MAX || args[1] > 255 || args[2] > 255)	\
+		goto err;						\
+									\
+out:									\
+	ipb->br_chr.cyl = args[0];					\
+	ipb->br_chr.head = args[1];					\
+	ipb->br_chr.record = args[2];					\
+	rc = len;							\
+err:									\
+	kfree(tmp);							\
+	return rc;							\
+}
+
+IPL_ATTR_BR_CHR_SHOW_FN(ipl, ipl_block.eckd);
+static struct kobj_attribute sys_ipl_eckd_br_chr_attr =
+	__ATTR(br_chr, 0644, eckd_ipl_br_chr_show, NULL);
+
+IPL_ATTR_BR_CHR_SHOW_FN(reipl, reipl_block_eckd->eckd);
+IPL_ATTR_BR_CHR_STORE_FN(reipl, reipl_block_eckd->eckd);
+
+static struct kobj_attribute sys_reipl_eckd_br_chr_attr =
+	__ATTR(br_chr, 0644, eckd_reipl_br_chr_show, eckd_reipl_br_chr_store);
+
+static ssize_t ipl_ccw_loadparm_show(struct kobject *kobj,
+				     struct kobj_attribute *attr, char *page)
+{
+	char loadparm[LOADPARM_LEN + 1] = {};
+
+	if (!sclp_ipl_info.is_valid)
+		return sprintf(page, "#unknown#\n");
+	memcpy(loadparm, &sclp_ipl_info.loadparm, LOADPARM_LEN);
+	EBCASC(loadparm, LOADPARM_LEN);
+	strim(loadparm);
+	return sprintf(page, "%s\n", loadparm);
+}
+
+static struct kobj_attribute sys_ipl_ccw_loadparm_attr =
+	__ATTR(loadparm, 0444, ipl_ccw_loadparm_show, NULL);
+
+static struct attribute *ipl_fcp_attrs[] = {
+	&sys_ipl_device_attr.attr,
+	&sys_ipl_fcp_wwpn_attr.attr,
+	&sys_ipl_fcp_lun_attr.attr,
+	&sys_ipl_fcp_bootprog_attr.attr,
+	&sys_ipl_fcp_br_lba_attr.attr,
+	&sys_ipl_ccw_loadparm_attr.attr,
+	NULL,
+};
+
+static struct attribute_group ipl_fcp_attr_group = {
+	.attrs = ipl_fcp_attrs,
+	.bin_attrs = ipl_fcp_bin_attrs,
+};
+
+static struct attribute *ipl_nvme_attrs[] = {
+	&sys_ipl_nvme_fid_attr.attr,
+	&sys_ipl_nvme_nsid_attr.attr,
+	&sys_ipl_nvme_bootprog_attr.attr,
+	&sys_ipl_nvme_br_lba_attr.attr,
+	&sys_ipl_ccw_loadparm_attr.attr,
+	NULL,
+};
+
+static struct attribute_group ipl_nvme_attr_group = {
+	.attrs = ipl_nvme_attrs,
+	.bin_attrs = ipl_nvme_bin_attrs,
+};
+
+static struct attribute *ipl_eckd_attrs[] = {
+	&sys_ipl_eckd_bootprog_attr.attr,
+	&sys_ipl_eckd_br_chr_attr.attr,
+	&sys_ipl_ccw_loadparm_attr.attr,
+	&sys_ipl_device_attr.attr,
+	NULL,
+};
+
+static struct attribute_group ipl_eckd_attr_group = {
+	.attrs = ipl_eckd_attrs,
+	.bin_attrs = ipl_eckd_bin_attrs,
+};
+
+/* CCW ipl device attributes */
+
+static struct attribute *ipl_ccw_attrs_vm[] = {
+	&sys_ipl_device_attr.attr,
+	&sys_ipl_ccw_loadparm_attr.attr,
+	&sys_ipl_vm_parm_attr.attr,
+	NULL,
+};
+
+static struct attribute *ipl_ccw_attrs_lpar[] = {
+	&sys_ipl_device_attr.attr,
+	&sys_ipl_ccw_loadparm_attr.attr,
+	NULL,
+};
+
+static struct attribute_group ipl_ccw_attr_group_vm = {
+	.attrs = ipl_ccw_attrs_vm,
+};
+
+static struct attribute_group ipl_ccw_attr_group_lpar = {
+	.attrs = ipl_ccw_attrs_lpar
+};
+
+static struct attribute *ipl_common_attrs[] = {
+	&sys_ipl_type_attr.attr,
+	&sys_ipl_secure_attr.attr,
+	&sys_ipl_has_secure_attr.attr,
+	NULL,
+};
+
+static struct attribute_group ipl_common_attr_group = {
+	.attrs = ipl_common_attrs,
+};
+
+static struct kset *ipl_kset;
+
+static void __ipl_run(void *unused)
+{
+	diag308(DIAG308_LOAD_CLEAR, NULL);
+}
+
+static void ipl_run(struct shutdown_trigger *trigger)
+{
+	smp_call_ipl_cpu(__ipl_run, NULL);
+}
+
+static int __init ipl_init(void)
+{
+	int rc;
+
+	ipl_kset = kset_create_and_add("ipl", NULL, firmware_kobj);
+	if (!ipl_kset) {
+		rc = -ENOMEM;
+		goto out;
+	}
+	rc = sysfs_create_group(&ipl_kset->kobj, &ipl_common_attr_group);
+	if (rc)
+		goto out;
+	switch (ipl_info.type) {
+	case IPL_TYPE_CCW:
+		if (MACHINE_IS_VM)
+			rc = sysfs_create_group(&ipl_kset->kobj,
+						&ipl_ccw_attr_group_vm);
+		else
+			rc = sysfs_create_group(&ipl_kset->kobj,
+						&ipl_ccw_attr_group_lpar);
+		break;
+	case IPL_TYPE_ECKD:
+	case IPL_TYPE_ECKD_DUMP:
+		rc = sysfs_create_group(&ipl_kset->kobj, &ipl_eckd_attr_group);
+		break;
+	case IPL_TYPE_FCP:
+	case IPL_TYPE_FCP_DUMP:
+		rc = sysfs_create_group(&ipl_kset->kobj, &ipl_fcp_attr_group);
+		break;
+	case IPL_TYPE_NVME:
+	case IPL_TYPE_NVME_DUMP:
+		rc = sysfs_create_group(&ipl_kset->kobj, &ipl_nvme_attr_group);
+		break;
+	default:
+		break;
+	}
+out:
+	if (rc)
+		panic("ipl_init failed: rc = %i\n", rc);
+
+	return 0;
+}
+
+static struct shutdown_action __refdata ipl_action = {
+	.name	= SHUTDOWN_ACTION_IPL_STR,
+	.fn	= ipl_run,
+	.init	= ipl_init,
+};
+
+/*
+ * reipl shutdown action: Reboot Linux on shutdown.
+ */
+
+/* VM IPL PARM attributes */
+static ssize_t reipl_generic_vmparm_show(struct ipl_parameter_block *ipb,
+					  char *page)
+{
+	char vmparm[DIAG308_VMPARM_SIZE + 1] = {};
+
+	ipl_block_get_ascii_vmparm(vmparm, sizeof(vmparm), ipb);
+	return sprintf(page, "%s\n", vmparm);
+}
+
+static ssize_t reipl_generic_vmparm_store(struct ipl_parameter_block *ipb,
+					  size_t vmparm_max,
+					  const char *buf, size_t len)
+{
+	int i, ip_len;
+
+	/* ignore trailing newline */
+	ip_len = len;
+	if ((len > 0) && (buf[len - 1] == '\n'))
+		ip_len--;
+
+	if (ip_len > vmparm_max)
+		return -EINVAL;
+
+	/* parm is used to store kernel options, check for common chars */
+	for (i = 0; i < ip_len; i++)
+		if (!(isalnum(buf[i]) || isascii(buf[i]) || isprint(buf[i])))
+			return -EINVAL;
+
+	memset(ipb->ccw.vm_parm, 0, DIAG308_VMPARM_SIZE);
+	ipb->ccw.vm_parm_len = ip_len;
+	if (ip_len > 0) {
+		ipb->ccw.vm_flags |= IPL_PB0_CCW_VM_FLAG_VP;
+		memcpy(ipb->ccw.vm_parm, buf, ip_len);
+		ASCEBC(ipb->ccw.vm_parm, ip_len);
+	} else {
+		ipb->ccw.vm_flags &= ~IPL_PB0_CCW_VM_FLAG_VP;
+	}
+
+	return len;
+}
+
+/* NSS wrapper */
+static ssize_t reipl_nss_vmparm_show(struct kobject *kobj,
+				     struct kobj_attribute *attr, char *page)
+{
+	return reipl_generic_vmparm_show(reipl_block_nss, page);
+}
+
+static ssize_t reipl_nss_vmparm_store(struct kobject *kobj,
+				      struct kobj_attribute *attr,
+				      const char *buf, size_t len)
+{
+	return reipl_generic_vmparm_store(reipl_block_nss, 56, buf, len);
+}
+
+/* CCW wrapper */
+static ssize_t reipl_ccw_vmparm_show(struct kobject *kobj,
+				     struct kobj_attribute *attr, char *page)
+{
+	return reipl_generic_vmparm_show(reipl_block_ccw, page);
+}
+
+static ssize_t reipl_ccw_vmparm_store(struct kobject *kobj,
+				      struct kobj_attribute *attr,
+				      const char *buf, size_t len)
+{
+	return reipl_generic_vmparm_store(reipl_block_ccw, 64, buf, len);
+}
+
+static struct kobj_attribute sys_reipl_nss_vmparm_attr =
+	__ATTR(parm, 0644, reipl_nss_vmparm_show,
+	       reipl_nss_vmparm_store);
+static struct kobj_attribute sys_reipl_ccw_vmparm_attr =
+	__ATTR(parm, 0644, reipl_ccw_vmparm_show,
+	       reipl_ccw_vmparm_store);
+
+/* FCP reipl device attributes */
+
+static ssize_t reipl_fcp_scpdata_read(struct file *filp, struct kobject *kobj,
+				      struct bin_attribute *attr,
+				      char *buf, loff_t off, size_t count)
+{
+	size_t size = reipl_block_fcp->fcp.scp_data_len;
+	void *scp_data = reipl_block_fcp->fcp.scp_data;
+
+	return memory_read_from_buffer(buf, count, &off, scp_data, size);
+}
+
+static ssize_t reipl_fcp_scpdata_write(struct file *filp, struct kobject *kobj,
+				       struct bin_attribute *attr,
+				       char *buf, loff_t off, size_t count)
+{
+	size_t scpdata_len = count;
+	size_t padding;
+
+
+	if (off)
+		return -EINVAL;
+
+	memcpy(reipl_block_fcp->fcp.scp_data, buf, count);
+	if (scpdata_len % 8) {
+		padding = 8 - (scpdata_len % 8);
+		memset(reipl_block_fcp->fcp.scp_data + scpdata_len,
+		       0, padding);
+		scpdata_len += padding;
+	}
+
+	reipl_block_fcp->hdr.len = IPL_BP_FCP_LEN + scpdata_len;
+	reipl_block_fcp->fcp.len = IPL_BP0_FCP_LEN + scpdata_len;
+	reipl_block_fcp->fcp.scp_data_len = scpdata_len;
+
+	return count;
+}
+static struct bin_attribute sys_reipl_fcp_scp_data_attr =
+	__BIN_ATTR(scp_data, 0644, reipl_fcp_scpdata_read,
+		   reipl_fcp_scpdata_write, DIAG308_SCPDATA_SIZE);
+
+static struct bin_attribute *reipl_fcp_bin_attrs[] = {
+	&sys_reipl_fcp_scp_data_attr,
+	NULL,
+};
+
+DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%llx\n",
+		   reipl_block_fcp->fcp.wwpn);
+DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%llx\n",
+		   reipl_block_fcp->fcp.lun);
+DEFINE_IPL_ATTR_RW(reipl_fcp, bootprog, "%lld\n", "%lld\n",
+		   reipl_block_fcp->fcp.bootprog);
+DEFINE_IPL_ATTR_RW(reipl_fcp, br_lba, "%lld\n", "%lld\n",
+		   reipl_block_fcp->fcp.br_lba);
+DEFINE_IPL_ATTR_RW(reipl_fcp, device, "0.0.%04llx\n", "0.0.%llx\n",
+		   reipl_block_fcp->fcp.devno);
+
+static void reipl_get_ascii_loadparm(char *loadparm,
+				     struct ipl_parameter_block *ibp)
+{
+	memcpy(loadparm, ibp->common.loadparm, LOADPARM_LEN);
+	EBCASC(loadparm, LOADPARM_LEN);
+	loadparm[LOADPARM_LEN] = 0;
+	strim(loadparm);
+}
+
+static ssize_t reipl_generic_loadparm_show(struct ipl_parameter_block *ipb,
+					   char *page)
+{
+	char buf[LOADPARM_LEN + 1];
+
+	reipl_get_ascii_loadparm(buf, ipb);
+	return sprintf(page, "%s\n", buf);
+}
+
+static ssize_t reipl_generic_loadparm_store(struct ipl_parameter_block *ipb,
+					    const char *buf, size_t len)
+{
+	int i, lp_len;
+
+	/* ignore trailing newline */
+	lp_len = len;
+	if ((len > 0) && (buf[len - 1] == '\n'))
+		lp_len--;
+	/* loadparm can have max 8 characters and must not start with a blank */
+	if ((lp_len > LOADPARM_LEN) || ((lp_len > 0) && (buf[0] == ' ')))
+		return -EINVAL;
+	/* loadparm can only contain "a-z,A-Z,0-9,SP,." */
+	for (i = 0; i < lp_len; i++) {
+		if (isalpha(buf[i]) || isdigit(buf[i]) || (buf[i] == ' ') ||
+		    (buf[i] == '.'))
+			continue;
+		return -EINVAL;
+	}
+	/* initialize loadparm with blanks */
+	memset(ipb->common.loadparm, ' ', LOADPARM_LEN);
+	/* copy and convert to ebcdic */
+	memcpy(ipb->common.loadparm, buf, lp_len);
+	ASCEBC(ipb->common.loadparm, LOADPARM_LEN);
+	ipb->common.flags |= IPL_PB0_FLAG_LOADPARM;
+	return len;
+}
+
+#define DEFINE_GENERIC_LOADPARM(name)							\
+static ssize_t reipl_##name##_loadparm_show(struct kobject *kobj,			\
+					    struct kobj_attribute *attr, char *page)	\
+{											\
+	return reipl_generic_loadparm_show(reipl_block_##name, page);			\
+}											\
+static ssize_t reipl_##name##_loadparm_store(struct kobject *kobj,			\
+					     struct kobj_attribute *attr,		\
+					     const char *buf, size_t len)		\
+{											\
+	return reipl_generic_loadparm_store(reipl_block_##name, buf, len);		\
+}											\
+static struct kobj_attribute sys_reipl_##name##_loadparm_attr =				\
+	__ATTR(loadparm, 0644, reipl_##name##_loadparm_show,				\
+	       reipl_##name##_loadparm_store)
+
+DEFINE_GENERIC_LOADPARM(fcp);
+DEFINE_GENERIC_LOADPARM(nvme);
+DEFINE_GENERIC_LOADPARM(ccw);
+DEFINE_GENERIC_LOADPARM(nss);
+DEFINE_GENERIC_LOADPARM(eckd);
+
+static ssize_t reipl_fcp_clear_show(struct kobject *kobj,
+				    struct kobj_attribute *attr, char *page)
+{
+	return sprintf(page, "%u\n", reipl_fcp_clear);
+}
+
+static ssize_t reipl_fcp_clear_store(struct kobject *kobj,
+				     struct kobj_attribute *attr,
+				     const char *buf, size_t len)
+{
+	if (kstrtobool(buf, &reipl_fcp_clear) < 0)
+		return -EINVAL;
+	return len;
+}
+
+static struct attribute *reipl_fcp_attrs[] = {
+	&sys_reipl_fcp_device_attr.attr,
+	&sys_reipl_fcp_wwpn_attr.attr,
+	&sys_reipl_fcp_lun_attr.attr,
+	&sys_reipl_fcp_bootprog_attr.attr,
+	&sys_reipl_fcp_br_lba_attr.attr,
+	&sys_reipl_fcp_loadparm_attr.attr,
+	NULL,
+};
+
+static struct attribute_group reipl_fcp_attr_group = {
+	.attrs = reipl_fcp_attrs,
+	.bin_attrs = reipl_fcp_bin_attrs,
+};
+
+static struct kobj_attribute sys_reipl_fcp_clear_attr =
+	__ATTR(clear, 0644, reipl_fcp_clear_show, reipl_fcp_clear_store);
+
+/* NVME reipl device attributes */
+
+static ssize_t reipl_nvme_scpdata_read(struct file *filp, struct kobject *kobj,
+				      struct bin_attribute *attr,
+				      char *buf, loff_t off, size_t count)
+{
+	size_t size = reipl_block_nvme->nvme.scp_data_len;
+	void *scp_data = reipl_block_nvme->nvme.scp_data;
+
+	return memory_read_from_buffer(buf, count, &off, scp_data, size);
+}
+
+static ssize_t reipl_nvme_scpdata_write(struct file *filp, struct kobject *kobj,
+				       struct bin_attribute *attr,
+				       char *buf, loff_t off, size_t count)
+{
+	size_t scpdata_len = count;
+	size_t padding;
+
+	if (off)
+		return -EINVAL;
+
+	memcpy(reipl_block_nvme->nvme.scp_data, buf, count);
+	if (scpdata_len % 8) {
+		padding = 8 - (scpdata_len % 8);
+		memset(reipl_block_nvme->nvme.scp_data + scpdata_len,
+		       0, padding);
+		scpdata_len += padding;
+	}
+
+	reipl_block_nvme->hdr.len = IPL_BP_FCP_LEN + scpdata_len;
+	reipl_block_nvme->nvme.len = IPL_BP0_FCP_LEN + scpdata_len;
+	reipl_block_nvme->nvme.scp_data_len = scpdata_len;
+
+	return count;
+}
+
+static struct bin_attribute sys_reipl_nvme_scp_data_attr =
+	__BIN_ATTR(scp_data, 0644, reipl_nvme_scpdata_read,
+		   reipl_nvme_scpdata_write, DIAG308_SCPDATA_SIZE);
+
+static struct bin_attribute *reipl_nvme_bin_attrs[] = {
+	&sys_reipl_nvme_scp_data_attr,
+	NULL,
+};
+
+DEFINE_IPL_ATTR_RW(reipl_nvme, fid, "0x%08llx\n", "%llx\n",
+		   reipl_block_nvme->nvme.fid);
+DEFINE_IPL_ATTR_RW(reipl_nvme, nsid, "0x%08llx\n", "%llx\n",
+		   reipl_block_nvme->nvme.nsid);
+DEFINE_IPL_ATTR_RW(reipl_nvme, bootprog, "%lld\n", "%lld\n",
+		   reipl_block_nvme->nvme.bootprog);
+DEFINE_IPL_ATTR_RW(reipl_nvme, br_lba, "%lld\n", "%lld\n",
+		   reipl_block_nvme->nvme.br_lba);
+
+static struct attribute *reipl_nvme_attrs[] = {
+	&sys_reipl_nvme_fid_attr.attr,
+	&sys_reipl_nvme_nsid_attr.attr,
+	&sys_reipl_nvme_bootprog_attr.attr,
+	&sys_reipl_nvme_br_lba_attr.attr,
+	&sys_reipl_nvme_loadparm_attr.attr,
+	NULL,
+};
+
+static struct attribute_group reipl_nvme_attr_group = {
+	.attrs = reipl_nvme_attrs,
+	.bin_attrs = reipl_nvme_bin_attrs
+};
+
+static ssize_t reipl_nvme_clear_show(struct kobject *kobj,
+				     struct kobj_attribute *attr, char *page)
+{
+	return sprintf(page, "%u\n", reipl_nvme_clear);
+}
+
+static ssize_t reipl_nvme_clear_store(struct kobject *kobj,
+				      struct kobj_attribute *attr,
+				      const char *buf, size_t len)
+{
+	if (kstrtobool(buf, &reipl_nvme_clear) < 0)
+		return -EINVAL;
+	return len;
+}
+
+static struct kobj_attribute sys_reipl_nvme_clear_attr =
+	__ATTR(clear, 0644, reipl_nvme_clear_show, reipl_nvme_clear_store);
+
+/* CCW reipl device attributes */
+DEFINE_IPL_CCW_ATTR_RW(reipl_ccw, device, reipl_block_ccw->ccw);
+
+static ssize_t reipl_ccw_clear_show(struct kobject *kobj,
+				    struct kobj_attribute *attr, char *page)
+{
+	return sprintf(page, "%u\n", reipl_ccw_clear);
+}
+
+static ssize_t reipl_ccw_clear_store(struct kobject *kobj,
+				     struct kobj_attribute *attr,
+				     const char *buf, size_t len)
+{
+	if (kstrtobool(buf, &reipl_ccw_clear) < 0)
+		return -EINVAL;
+	return len;
+}
+
+static struct kobj_attribute sys_reipl_ccw_clear_attr =
+	__ATTR(clear, 0644, reipl_ccw_clear_show, reipl_ccw_clear_store);
+
+static struct attribute *reipl_ccw_attrs_vm[] = {
+	&sys_reipl_ccw_device_attr.attr,
+	&sys_reipl_ccw_loadparm_attr.attr,
+	&sys_reipl_ccw_vmparm_attr.attr,
+	&sys_reipl_ccw_clear_attr.attr,
+	NULL,
+};
+
+static struct attribute *reipl_ccw_attrs_lpar[] = {
+	&sys_reipl_ccw_device_attr.attr,
+	&sys_reipl_ccw_loadparm_attr.attr,
+	&sys_reipl_ccw_clear_attr.attr,
+	NULL,
+};
+
+static struct attribute_group reipl_ccw_attr_group_vm = {
+	.name  = IPL_CCW_STR,
+	.attrs = reipl_ccw_attrs_vm,
+};
+
+static struct attribute_group reipl_ccw_attr_group_lpar = {
+	.name  = IPL_CCW_STR,
+	.attrs = reipl_ccw_attrs_lpar,
+};
+
+/* ECKD reipl device attributes */
+
+static ssize_t reipl_eckd_scpdata_read(struct file *filp, struct kobject *kobj,
+				       struct bin_attribute *attr,
+				       char *buf, loff_t off, size_t count)
+{
+	size_t size = reipl_block_eckd->eckd.scp_data_len;
+	void *scp_data = reipl_block_eckd->eckd.scp_data;
+
+	return memory_read_from_buffer(buf, count, &off, scp_data, size);
+}
+
+static ssize_t reipl_eckd_scpdata_write(struct file *filp, struct kobject *kobj,
+					struct bin_attribute *attr,
+					char *buf, loff_t off, size_t count)
+{
+	size_t scpdata_len = count;
+	size_t padding;
+
+	if (off)
+		return -EINVAL;
+
+	memcpy(reipl_block_eckd->eckd.scp_data, buf, count);
+	if (scpdata_len % 8) {
+		padding = 8 - (scpdata_len % 8);
+		memset(reipl_block_eckd->eckd.scp_data + scpdata_len,
+		       0, padding);
+		scpdata_len += padding;
+	}
+
+	reipl_block_eckd->hdr.len = IPL_BP_ECKD_LEN + scpdata_len;
+	reipl_block_eckd->eckd.len = IPL_BP0_ECKD_LEN + scpdata_len;
+	reipl_block_eckd->eckd.scp_data_len = scpdata_len;
+
+	return count;
+}
+
+static struct bin_attribute sys_reipl_eckd_scp_data_attr =
+	__BIN_ATTR(scp_data, 0644, reipl_eckd_scpdata_read,
+		   reipl_eckd_scpdata_write, DIAG308_SCPDATA_SIZE);
+
+static struct bin_attribute *reipl_eckd_bin_attrs[] = {
+	&sys_reipl_eckd_scp_data_attr,
+	NULL,
+};
+
+DEFINE_IPL_CCW_ATTR_RW(reipl_eckd, device, reipl_block_eckd->eckd);
+DEFINE_IPL_ATTR_RW(reipl_eckd, bootprog, "%lld\n", "%lld\n",
+		   reipl_block_eckd->eckd.bootprog);
+
+static struct attribute *reipl_eckd_attrs[] = {
+	&sys_reipl_eckd_device_attr.attr,
+	&sys_reipl_eckd_bootprog_attr.attr,
+	&sys_reipl_eckd_br_chr_attr.attr,
+	&sys_reipl_eckd_loadparm_attr.attr,
+	NULL,
+};
+
+static struct attribute_group reipl_eckd_attr_group = {
+	.attrs = reipl_eckd_attrs,
+	.bin_attrs = reipl_eckd_bin_attrs
+};
+
+static ssize_t reipl_eckd_clear_show(struct kobject *kobj,
+				     struct kobj_attribute *attr, char *page)
+{
+	return sprintf(page, "%u\n", reipl_eckd_clear);
+}
+
+static ssize_t reipl_eckd_clear_store(struct kobject *kobj,
+				      struct kobj_attribute *attr,
+				      const char *buf, size_t len)
+{
+	if (kstrtobool(buf, &reipl_eckd_clear) < 0)
+		return -EINVAL;
+	return len;
+}
+
+static struct kobj_attribute sys_reipl_eckd_clear_attr =
+	__ATTR(clear, 0644, reipl_eckd_clear_show, reipl_eckd_clear_store);
+
+/* NSS reipl device attributes */
+static void reipl_get_ascii_nss_name(char *dst,
+				     struct ipl_parameter_block *ipb)
+{
+	memcpy(dst, ipb->ccw.nss_name, NSS_NAME_SIZE);
+	EBCASC(dst, NSS_NAME_SIZE);
+	dst[NSS_NAME_SIZE] = 0;
+}
+
+static ssize_t reipl_nss_name_show(struct kobject *kobj,
+				   struct kobj_attribute *attr, char *page)
+{
+	char nss_name[NSS_NAME_SIZE + 1] = {};
+
+	reipl_get_ascii_nss_name(nss_name, reipl_block_nss);
+	return sprintf(page, "%s\n", nss_name);
+}
+
+static ssize_t reipl_nss_name_store(struct kobject *kobj,
+				    struct kobj_attribute *attr,
+				    const char *buf, size_t len)
+{
+	int nss_len;
+
+	/* ignore trailing newline */
+	nss_len = len;
+	if ((len > 0) && (buf[len - 1] == '\n'))
+		nss_len--;
+
+	if (nss_len > NSS_NAME_SIZE)
+		return -EINVAL;
+
+	memset(reipl_block_nss->ccw.nss_name, 0x40, NSS_NAME_SIZE);
+	if (nss_len > 0) {
+		reipl_block_nss->ccw.vm_flags |= IPL_PB0_CCW_VM_FLAG_NSS;
+		memcpy(reipl_block_nss->ccw.nss_name, buf, nss_len);
+		ASCEBC(reipl_block_nss->ccw.nss_name, nss_len);
+		EBC_TOUPPER(reipl_block_nss->ccw.nss_name, nss_len);
+	} else {
+		reipl_block_nss->ccw.vm_flags &= ~IPL_PB0_CCW_VM_FLAG_NSS;
+	}
+
+	return len;
+}
+
+static struct kobj_attribute sys_reipl_nss_name_attr =
+	__ATTR(name, 0644, reipl_nss_name_show,
+	       reipl_nss_name_store);
+
+static struct attribute *reipl_nss_attrs[] = {
+	&sys_reipl_nss_name_attr.attr,
+	&sys_reipl_nss_loadparm_attr.attr,
+	&sys_reipl_nss_vmparm_attr.attr,
+	NULL,
+};
+
+static struct attribute_group reipl_nss_attr_group = {
+	.name  = IPL_NSS_STR,
+	.attrs = reipl_nss_attrs,
+};
+
+void set_os_info_reipl_block(void)
+{
+	os_info_entry_add(OS_INFO_REIPL_BLOCK, reipl_block_actual,
+			  reipl_block_actual->hdr.len);
+}
+
+/* reipl type */
+
+static int reipl_set_type(enum ipl_type type)
+{
+	if (!(reipl_capabilities & type))
+		return -EINVAL;
+
+	switch(type) {
+	case IPL_TYPE_CCW:
+		reipl_block_actual = reipl_block_ccw;
+		break;
+	case IPL_TYPE_ECKD:
+		reipl_block_actual = reipl_block_eckd;
+		break;
+	case IPL_TYPE_FCP:
+		reipl_block_actual = reipl_block_fcp;
+		break;
+	case IPL_TYPE_NVME:
+		reipl_block_actual = reipl_block_nvme;
+		break;
+	case IPL_TYPE_NSS:
+		reipl_block_actual = reipl_block_nss;
+		break;
+	default:
+		break;
+	}
+	reipl_type = type;
+	return 0;
+}
+
+static ssize_t reipl_type_show(struct kobject *kobj,
+			       struct kobj_attribute *attr, char *page)
+{
+	return sprintf(page, "%s\n", ipl_type_str(reipl_type));
+}
+
+static ssize_t reipl_type_store(struct kobject *kobj,
+				struct kobj_attribute *attr,
+				const char *buf, size_t len)
+{
+	int rc = -EINVAL;
+
+	if (strncmp(buf, IPL_CCW_STR, strlen(IPL_CCW_STR)) == 0)
+		rc = reipl_set_type(IPL_TYPE_CCW);
+	else if (strncmp(buf, IPL_ECKD_STR, strlen(IPL_ECKD_STR)) == 0)
+		rc = reipl_set_type(IPL_TYPE_ECKD);
+	else if (strncmp(buf, IPL_FCP_STR, strlen(IPL_FCP_STR)) == 0)
+		rc = reipl_set_type(IPL_TYPE_FCP);
+	else if (strncmp(buf, IPL_NVME_STR, strlen(IPL_NVME_STR)) == 0)
+		rc = reipl_set_type(IPL_TYPE_NVME);
+	else if (strncmp(buf, IPL_NSS_STR, strlen(IPL_NSS_STR)) == 0)
+		rc = reipl_set_type(IPL_TYPE_NSS);
+	return (rc != 0) ? rc : len;
+}
+
+static struct kobj_attribute reipl_type_attr =
+	__ATTR(reipl_type, 0644, reipl_type_show, reipl_type_store);
+
+static struct kset *reipl_kset;
+static struct kset *reipl_fcp_kset;
+static struct kset *reipl_nvme_kset;
+static struct kset *reipl_eckd_kset;
+
+static void __reipl_run(void *unused)
+{
+	switch (reipl_type) {
+	case IPL_TYPE_CCW:
+		diag308(DIAG308_SET, reipl_block_ccw);
+		if (reipl_ccw_clear)
+			diag308(DIAG308_LOAD_CLEAR, NULL);
+		else
+			diag308(DIAG308_LOAD_NORMAL_DUMP, NULL);
+		break;
+	case IPL_TYPE_ECKD:
+		diag308(DIAG308_SET, reipl_block_eckd);
+		if (reipl_eckd_clear)
+			diag308(DIAG308_LOAD_CLEAR, NULL);
+		else
+			diag308(DIAG308_LOAD_NORMAL, NULL);
+		break;
+	case IPL_TYPE_FCP:
+		diag308(DIAG308_SET, reipl_block_fcp);
+		if (reipl_fcp_clear)
+			diag308(DIAG308_LOAD_CLEAR, NULL);
+		else
+			diag308(DIAG308_LOAD_NORMAL, NULL);
+		break;
+	case IPL_TYPE_NVME:
+		diag308(DIAG308_SET, reipl_block_nvme);
+		if (reipl_nvme_clear)
+			diag308(DIAG308_LOAD_CLEAR, NULL);
+		else
+			diag308(DIAG308_LOAD_NORMAL, NULL);
+		break;
+	case IPL_TYPE_NSS:
+		diag308(DIAG308_SET, reipl_block_nss);
+		diag308(DIAG308_LOAD_CLEAR, NULL);
+		break;
+	case IPL_TYPE_UNKNOWN:
+		diag308(DIAG308_LOAD_CLEAR, NULL);
+		break;
+	case IPL_TYPE_FCP_DUMP:
+	case IPL_TYPE_NVME_DUMP:
+	case IPL_TYPE_ECKD_DUMP:
+		break;
+	}
+	disabled_wait();
+}
+
+static void reipl_run(struct shutdown_trigger *trigger)
+{
+	smp_call_ipl_cpu(__reipl_run, NULL);
+}
+
+static void reipl_block_ccw_init(struct ipl_parameter_block *ipb)
+{
+	ipb->hdr.len = IPL_BP_CCW_LEN;
+	ipb->hdr.version = IPL_PARM_BLOCK_VERSION;
+	ipb->pb0_hdr.len = IPL_BP0_CCW_LEN;
+	ipb->pb0_hdr.pbt = IPL_PBT_CCW;
+}
+
+static void reipl_block_ccw_fill_parms(struct ipl_parameter_block *ipb)
+{
+	/* LOADPARM */
+	/* check if read scp info worked and set loadparm */
+	if (sclp_ipl_info.is_valid)
+		memcpy(ipb->ccw.loadparm, &sclp_ipl_info.loadparm, LOADPARM_LEN);
+	else
+		/* read scp info failed: set empty loadparm (EBCDIC blanks) */
+		memset(ipb->ccw.loadparm, 0x40, LOADPARM_LEN);
+	ipb->ccw.flags = IPL_PB0_FLAG_LOADPARM;
+
+	/* VM PARM */
+	if (MACHINE_IS_VM && ipl_block_valid &&
+	    (ipl_block.ccw.vm_flags & IPL_PB0_CCW_VM_FLAG_VP)) {
+
+		ipb->ccw.vm_flags |= IPL_PB0_CCW_VM_FLAG_VP;
+		ipb->ccw.vm_parm_len = ipl_block.ccw.vm_parm_len;
+		memcpy(ipb->ccw.vm_parm,
+		       ipl_block.ccw.vm_parm, DIAG308_VMPARM_SIZE);
+	}
+}
+
+static int __init reipl_nss_init(void)
+{
+	int rc;
+
+	if (!MACHINE_IS_VM)
+		return 0;
+
+	reipl_block_nss = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!reipl_block_nss)
+		return -ENOMEM;
+
+	rc = sysfs_create_group(&reipl_kset->kobj, &reipl_nss_attr_group);
+	if (rc)
+		return rc;
+
+	reipl_block_ccw_init(reipl_block_nss);
+	reipl_capabilities |= IPL_TYPE_NSS;
+	return 0;
+}
+
+static int __init reipl_ccw_init(void)
+{
+	int rc;
+
+	reipl_block_ccw = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!reipl_block_ccw)
+		return -ENOMEM;
+
+	rc = sysfs_create_group(&reipl_kset->kobj,
+				MACHINE_IS_VM ? &reipl_ccw_attr_group_vm
+					      : &reipl_ccw_attr_group_lpar);
+	if (rc)
+		return rc;
+
+	reipl_block_ccw_init(reipl_block_ccw);
+	if (ipl_info.type == IPL_TYPE_CCW) {
+		reipl_block_ccw->ccw.ssid = ipl_block.ccw.ssid;
+		reipl_block_ccw->ccw.devno = ipl_block.ccw.devno;
+		reipl_block_ccw_fill_parms(reipl_block_ccw);
+	}
+
+	reipl_capabilities |= IPL_TYPE_CCW;
+	return 0;
+}
+
+static int __init reipl_fcp_init(void)
+{
+	int rc;
+
+	reipl_block_fcp = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!reipl_block_fcp)
+		return -ENOMEM;
+
+	/* sysfs: create fcp kset for mixing attr group and bin attrs */
+	reipl_fcp_kset = kset_create_and_add(IPL_FCP_STR, NULL,
+					     &reipl_kset->kobj);
+	if (!reipl_fcp_kset) {
+		free_page((unsigned long) reipl_block_fcp);
+		return -ENOMEM;
+	}
+
+	rc = sysfs_create_group(&reipl_fcp_kset->kobj, &reipl_fcp_attr_group);
+	if (rc)
+		goto out1;
+
+	if (test_facility(141)) {
+		rc = sysfs_create_file(&reipl_fcp_kset->kobj,
+				       &sys_reipl_fcp_clear_attr.attr);
+		if (rc)
+			goto out2;
+	} else {
+		reipl_fcp_clear = true;
+	}
+
+	if (ipl_info.type == IPL_TYPE_FCP) {
+		memcpy(reipl_block_fcp, &ipl_block, sizeof(ipl_block));
+		/*
+		 * Fix loadparm: There are systems where the (SCSI) LOADPARM
+		 * is invalid in the SCSI IPL parameter block, so take it
+		 * always from sclp_ipl_info.
+		 */
+		memcpy(reipl_block_fcp->fcp.loadparm, sclp_ipl_info.loadparm,
+		       LOADPARM_LEN);
+	} else {
+		reipl_block_fcp->hdr.len = IPL_BP_FCP_LEN;
+		reipl_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION;
+		reipl_block_fcp->fcp.len = IPL_BP0_FCP_LEN;
+		reipl_block_fcp->fcp.pbt = IPL_PBT_FCP;
+		reipl_block_fcp->fcp.opt = IPL_PB0_FCP_OPT_IPL;
+	}
+	reipl_capabilities |= IPL_TYPE_FCP;
+	return 0;
+
+out2:
+	sysfs_remove_group(&reipl_fcp_kset->kobj, &reipl_fcp_attr_group);
+out1:
+	kset_unregister(reipl_fcp_kset);
+	free_page((unsigned long) reipl_block_fcp);
+	return rc;
+}
+
+static int __init reipl_nvme_init(void)
+{
+	int rc;
+
+	reipl_block_nvme = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!reipl_block_nvme)
+		return -ENOMEM;
+
+	/* sysfs: create kset for mixing attr group and bin attrs */
+	reipl_nvme_kset = kset_create_and_add(IPL_NVME_STR, NULL,
+					     &reipl_kset->kobj);
+	if (!reipl_nvme_kset) {
+		free_page((unsigned long) reipl_block_nvme);
+		return -ENOMEM;
+	}
+
+	rc = sysfs_create_group(&reipl_nvme_kset->kobj, &reipl_nvme_attr_group);
+	if (rc)
+		goto out1;
+
+	if (test_facility(141)) {
+		rc = sysfs_create_file(&reipl_nvme_kset->kobj,
+				       &sys_reipl_nvme_clear_attr.attr);
+		if (rc)
+			goto out2;
+	} else {
+		reipl_nvme_clear = true;
+	}
+
+	if (ipl_info.type == IPL_TYPE_NVME) {
+		memcpy(reipl_block_nvme, &ipl_block, sizeof(ipl_block));
+		/*
+		 * Fix loadparm: There are systems where the (SCSI) LOADPARM
+		 * is invalid in the IPL parameter block, so take it
+		 * always from sclp_ipl_info.
+		 */
+		memcpy(reipl_block_nvme->nvme.loadparm, sclp_ipl_info.loadparm,
+		       LOADPARM_LEN);
+	} else {
+		reipl_block_nvme->hdr.len = IPL_BP_NVME_LEN;
+		reipl_block_nvme->hdr.version = IPL_PARM_BLOCK_VERSION;
+		reipl_block_nvme->nvme.len = IPL_BP0_NVME_LEN;
+		reipl_block_nvme->nvme.pbt = IPL_PBT_NVME;
+		reipl_block_nvme->nvme.opt = IPL_PB0_NVME_OPT_IPL;
+	}
+	reipl_capabilities |= IPL_TYPE_NVME;
+	return 0;
+
+out2:
+	sysfs_remove_group(&reipl_nvme_kset->kobj, &reipl_nvme_attr_group);
+out1:
+	kset_unregister(reipl_nvme_kset);
+	free_page((unsigned long) reipl_block_nvme);
+	return rc;
+}
+
+static int __init reipl_eckd_init(void)
+{
+	int rc;
+
+	if (!sclp.has_sipl_eckd)
+		return 0;
+
+	reipl_block_eckd = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!reipl_block_eckd)
+		return -ENOMEM;
+
+	/* sysfs: create kset for mixing attr group and bin attrs */
+	reipl_eckd_kset = kset_create_and_add(IPL_ECKD_STR, NULL,
+					      &reipl_kset->kobj);
+	if (!reipl_eckd_kset) {
+		free_page((unsigned long)reipl_block_eckd);
+		return -ENOMEM;
+	}
+
+	rc = sysfs_create_group(&reipl_eckd_kset->kobj, &reipl_eckd_attr_group);
+	if (rc)
+		goto out1;
+
+	if (test_facility(141)) {
+		rc = sysfs_create_file(&reipl_eckd_kset->kobj,
+				       &sys_reipl_eckd_clear_attr.attr);
+		if (rc)
+			goto out2;
+	} else {
+		reipl_eckd_clear = true;
+	}
+
+	if (ipl_info.type == IPL_TYPE_ECKD) {
+		memcpy(reipl_block_eckd, &ipl_block, sizeof(ipl_block));
+	} else {
+		reipl_block_eckd->hdr.len = IPL_BP_ECKD_LEN;
+		reipl_block_eckd->hdr.version = IPL_PARM_BLOCK_VERSION;
+		reipl_block_eckd->eckd.len = IPL_BP0_ECKD_LEN;
+		reipl_block_eckd->eckd.pbt = IPL_PBT_ECKD;
+		reipl_block_eckd->eckd.opt = IPL_PB0_ECKD_OPT_IPL;
+	}
+	reipl_capabilities |= IPL_TYPE_ECKD;
+	return 0;
+
+out2:
+	sysfs_remove_group(&reipl_eckd_kset->kobj, &reipl_eckd_attr_group);
+out1:
+	kset_unregister(reipl_eckd_kset);
+	free_page((unsigned long)reipl_block_eckd);
+	return rc;
+}
+
+static int __init reipl_type_init(void)
+{
+	enum ipl_type reipl_type = ipl_info.type;
+	struct ipl_parameter_block *reipl_block;
+	unsigned long size;
+
+	reipl_block = os_info_old_entry(OS_INFO_REIPL_BLOCK, &size);
+	if (!reipl_block)
+		goto out;
+	/*
+	 * If we have an OS info reipl block, this will be used
+	 */
+	if (reipl_block->pb0_hdr.pbt == IPL_PBT_FCP) {
+		memcpy(reipl_block_fcp, reipl_block, size);
+		reipl_type = IPL_TYPE_FCP;
+	} else if (reipl_block->pb0_hdr.pbt == IPL_PBT_NVME) {
+		memcpy(reipl_block_nvme, reipl_block, size);
+		reipl_type = IPL_TYPE_NVME;
+	} else if (reipl_block->pb0_hdr.pbt == IPL_PBT_CCW) {
+		memcpy(reipl_block_ccw, reipl_block, size);
+		reipl_type = IPL_TYPE_CCW;
+	} else if (reipl_block->pb0_hdr.pbt == IPL_PBT_ECKD) {
+		memcpy(reipl_block_eckd, reipl_block, size);
+		reipl_type = IPL_TYPE_ECKD;
+	}
+out:
+	return reipl_set_type(reipl_type);
+}
+
+static int __init reipl_init(void)
+{
+	int rc;
+
+	reipl_kset = kset_create_and_add("reipl", NULL, firmware_kobj);
+	if (!reipl_kset)
+		return -ENOMEM;
+	rc = sysfs_create_file(&reipl_kset->kobj, &reipl_type_attr.attr);
+	if (rc) {
+		kset_unregister(reipl_kset);
+		return rc;
+	}
+	rc = reipl_ccw_init();
+	if (rc)
+		return rc;
+	rc = reipl_eckd_init();
+	if (rc)
+		return rc;
+	rc = reipl_fcp_init();
+	if (rc)
+		return rc;
+	rc = reipl_nvme_init();
+	if (rc)
+		return rc;
+	rc = reipl_nss_init();
+	if (rc)
+		return rc;
+	return reipl_type_init();
+}
+
+static struct shutdown_action __refdata reipl_action = {
+	.name	= SHUTDOWN_ACTION_REIPL_STR,
+	.fn	= reipl_run,
+	.init	= reipl_init,
+};
+
+/*
+ * dump shutdown action: Dump Linux on shutdown.
+ */
+
+/* FCP dump device attributes */
+
+DEFINE_IPL_ATTR_RW(dump_fcp, wwpn, "0x%016llx\n", "%llx\n",
+		   dump_block_fcp->fcp.wwpn);
+DEFINE_IPL_ATTR_RW(dump_fcp, lun, "0x%016llx\n", "%llx\n",
+		   dump_block_fcp->fcp.lun);
+DEFINE_IPL_ATTR_RW(dump_fcp, bootprog, "%lld\n", "%lld\n",
+		   dump_block_fcp->fcp.bootprog);
+DEFINE_IPL_ATTR_RW(dump_fcp, br_lba, "%lld\n", "%lld\n",
+		   dump_block_fcp->fcp.br_lba);
+DEFINE_IPL_ATTR_RW(dump_fcp, device, "0.0.%04llx\n", "0.0.%llx\n",
+		   dump_block_fcp->fcp.devno);
+
+static struct attribute *dump_fcp_attrs[] = {
+	&sys_dump_fcp_device_attr.attr,
+	&sys_dump_fcp_wwpn_attr.attr,
+	&sys_dump_fcp_lun_attr.attr,
+	&sys_dump_fcp_bootprog_attr.attr,
+	&sys_dump_fcp_br_lba_attr.attr,
+	NULL,
+};
+
+static struct attribute_group dump_fcp_attr_group = {
+	.name  = IPL_FCP_STR,
+	.attrs = dump_fcp_attrs,
+};
+
+/* NVME dump device attributes */
+DEFINE_IPL_ATTR_RW(dump_nvme, fid, "0x%08llx\n", "%llx\n",
+		   dump_block_nvme->nvme.fid);
+DEFINE_IPL_ATTR_RW(dump_nvme, nsid, "0x%08llx\n", "%llx\n",
+		   dump_block_nvme->nvme.nsid);
+DEFINE_IPL_ATTR_RW(dump_nvme, bootprog, "%lld\n", "%llx\n",
+		   dump_block_nvme->nvme.bootprog);
+DEFINE_IPL_ATTR_RW(dump_nvme, br_lba, "%lld\n", "%llx\n",
+		   dump_block_nvme->nvme.br_lba);
+
+static struct attribute *dump_nvme_attrs[] = {
+	&sys_dump_nvme_fid_attr.attr,
+	&sys_dump_nvme_nsid_attr.attr,
+	&sys_dump_nvme_bootprog_attr.attr,
+	&sys_dump_nvme_br_lba_attr.attr,
+	NULL,
+};
+
+static struct attribute_group dump_nvme_attr_group = {
+	.name  = IPL_NVME_STR,
+	.attrs = dump_nvme_attrs,
+};
+
+/* ECKD dump device attributes */
+DEFINE_IPL_CCW_ATTR_RW(dump_eckd, device, dump_block_eckd->eckd);
+DEFINE_IPL_ATTR_RW(dump_eckd, bootprog, "%lld\n", "%llx\n",
+		   dump_block_eckd->eckd.bootprog);
+
+IPL_ATTR_BR_CHR_SHOW_FN(dump, dump_block_eckd->eckd);
+IPL_ATTR_BR_CHR_STORE_FN(dump, dump_block_eckd->eckd);
+
+static struct kobj_attribute sys_dump_eckd_br_chr_attr =
+	__ATTR(br_chr, 0644, eckd_dump_br_chr_show, eckd_dump_br_chr_store);
+
+static struct attribute *dump_eckd_attrs[] = {
+	&sys_dump_eckd_device_attr.attr,
+	&sys_dump_eckd_bootprog_attr.attr,
+	&sys_dump_eckd_br_chr_attr.attr,
+	NULL,
+};
+
+static struct attribute_group dump_eckd_attr_group = {
+	.name  = IPL_ECKD_STR,
+	.attrs = dump_eckd_attrs,
+};
+
+/* CCW dump device attributes */
+DEFINE_IPL_CCW_ATTR_RW(dump_ccw, device, dump_block_ccw->ccw);
+
+static struct attribute *dump_ccw_attrs[] = {
+	&sys_dump_ccw_device_attr.attr,
+	NULL,
+};
+
+static struct attribute_group dump_ccw_attr_group = {
+	.name  = IPL_CCW_STR,
+	.attrs = dump_ccw_attrs,
+};
+
+/* dump type */
+
+static int dump_set_type(enum dump_type type)
+{
+	if (!(dump_capabilities & type))
+		return -EINVAL;
+	dump_type = type;
+	return 0;
+}
+
+static ssize_t dump_type_show(struct kobject *kobj,
+			      struct kobj_attribute *attr, char *page)
+{
+	return sprintf(page, "%s\n", dump_type_str(dump_type));
+}
+
+static ssize_t dump_type_store(struct kobject *kobj,
+			       struct kobj_attribute *attr,
+			       const char *buf, size_t len)
+{
+	int rc = -EINVAL;
+
+	if (strncmp(buf, DUMP_NONE_STR, strlen(DUMP_NONE_STR)) == 0)
+		rc = dump_set_type(DUMP_TYPE_NONE);
+	else if (strncmp(buf, DUMP_CCW_STR, strlen(DUMP_CCW_STR)) == 0)
+		rc = dump_set_type(DUMP_TYPE_CCW);
+	else if (strncmp(buf, DUMP_ECKD_STR, strlen(DUMP_ECKD_STR)) == 0)
+		rc = dump_set_type(DUMP_TYPE_ECKD);
+	else if (strncmp(buf, DUMP_FCP_STR, strlen(DUMP_FCP_STR)) == 0)
+		rc = dump_set_type(DUMP_TYPE_FCP);
+	else if (strncmp(buf, DUMP_NVME_STR, strlen(DUMP_NVME_STR)) == 0)
+		rc = dump_set_type(DUMP_TYPE_NVME);
+	return (rc != 0) ? rc : len;
+}
+
+static struct kobj_attribute dump_type_attr =
+	__ATTR(dump_type, 0644, dump_type_show, dump_type_store);
+
+static struct kset *dump_kset;
+
+static void diag308_dump(void *dump_block)
+{
+	diag308(DIAG308_SET, dump_block);
+	while (1) {
+		if (diag308(DIAG308_LOAD_NORMAL_DUMP, NULL) != 0x302)
+			break;
+		udelay(USEC_PER_SEC);
+	}
+}
+
+static void __dump_run(void *unused)
+{
+	switch (dump_type) {
+	case DUMP_TYPE_CCW:
+		diag308_dump(dump_block_ccw);
+		break;
+	case DUMP_TYPE_ECKD:
+		diag308_dump(dump_block_eckd);
+		break;
+	case DUMP_TYPE_FCP:
+		diag308_dump(dump_block_fcp);
+		break;
+	case DUMP_TYPE_NVME:
+		diag308_dump(dump_block_nvme);
+		break;
+	default:
+		break;
+	}
+}
+
+static void dump_run(struct shutdown_trigger *trigger)
+{
+	if (dump_type == DUMP_TYPE_NONE)
+		return;
+	smp_send_stop();
+	smp_call_ipl_cpu(__dump_run, NULL);
+}
+
+static int __init dump_ccw_init(void)
+{
+	int rc;
+
+	dump_block_ccw = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!dump_block_ccw)
+		return -ENOMEM;
+	rc = sysfs_create_group(&dump_kset->kobj, &dump_ccw_attr_group);
+	if (rc) {
+		free_page((unsigned long)dump_block_ccw);
+		return rc;
+	}
+	dump_block_ccw->hdr.len = IPL_BP_CCW_LEN;
+	dump_block_ccw->hdr.version = IPL_PARM_BLOCK_VERSION;
+	dump_block_ccw->ccw.len = IPL_BP0_CCW_LEN;
+	dump_block_ccw->ccw.pbt = IPL_PBT_CCW;
+	dump_capabilities |= DUMP_TYPE_CCW;
+	return 0;
+}
+
+static int __init dump_fcp_init(void)
+{
+	int rc;
+
+	if (!sclp_ipl_info.has_dump)
+		return 0; /* LDIPL DUMP is not installed */
+	dump_block_fcp = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!dump_block_fcp)
+		return -ENOMEM;
+	rc = sysfs_create_group(&dump_kset->kobj, &dump_fcp_attr_group);
+	if (rc) {
+		free_page((unsigned long)dump_block_fcp);
+		return rc;
+	}
+	dump_block_fcp->hdr.len = IPL_BP_FCP_LEN;
+	dump_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION;
+	dump_block_fcp->fcp.len = IPL_BP0_FCP_LEN;
+	dump_block_fcp->fcp.pbt = IPL_PBT_FCP;
+	dump_block_fcp->fcp.opt = IPL_PB0_FCP_OPT_DUMP;
+	dump_capabilities |= DUMP_TYPE_FCP;
+	return 0;
+}
+
+static int __init dump_nvme_init(void)
+{
+	int rc;
+
+	if (!sclp_ipl_info.has_dump)
+		return 0; /* LDIPL DUMP is not installed */
+	dump_block_nvme = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!dump_block_nvme)
+		return -ENOMEM;
+	rc = sysfs_create_group(&dump_kset->kobj, &dump_nvme_attr_group);
+	if (rc) {
+		free_page((unsigned long)dump_block_nvme);
+		return rc;
+	}
+	dump_block_nvme->hdr.len = IPL_BP_NVME_LEN;
+	dump_block_nvme->hdr.version = IPL_PARM_BLOCK_VERSION;
+	dump_block_nvme->fcp.len = IPL_BP0_NVME_LEN;
+	dump_block_nvme->fcp.pbt = IPL_PBT_NVME;
+	dump_block_nvme->fcp.opt = IPL_PB0_NVME_OPT_DUMP;
+	dump_capabilities |= DUMP_TYPE_NVME;
+	return 0;
+}
+
+static int __init dump_eckd_init(void)
+{
+	int rc;
+
+	if (!sclp_ipl_info.has_dump || !sclp.has_sipl_eckd)
+		return 0; /* LDIPL DUMP is not installed */
+	dump_block_eckd = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!dump_block_eckd)
+		return -ENOMEM;
+	rc = sysfs_create_group(&dump_kset->kobj, &dump_eckd_attr_group);
+	if (rc) {
+		free_page((unsigned long)dump_block_eckd);
+		return rc;
+	}
+	dump_block_eckd->hdr.len = IPL_BP_ECKD_LEN;
+	dump_block_eckd->hdr.version = IPL_PARM_BLOCK_VERSION;
+	dump_block_eckd->eckd.len = IPL_BP0_ECKD_LEN;
+	dump_block_eckd->eckd.pbt = IPL_PBT_ECKD;
+	dump_block_eckd->eckd.opt = IPL_PB0_ECKD_OPT_DUMP;
+	dump_capabilities |= DUMP_TYPE_ECKD;
+	return 0;
+}
+
+static int __init dump_init(void)
+{
+	int rc;
+
+	dump_kset = kset_create_and_add("dump", NULL, firmware_kobj);
+	if (!dump_kset)
+		return -ENOMEM;
+	rc = sysfs_create_file(&dump_kset->kobj, &dump_type_attr.attr);
+	if (rc) {
+		kset_unregister(dump_kset);
+		return rc;
+	}
+	rc = dump_ccw_init();
+	if (rc)
+		return rc;
+	rc = dump_eckd_init();
+	if (rc)
+		return rc;
+	rc = dump_fcp_init();
+	if (rc)
+		return rc;
+	rc = dump_nvme_init();
+	if (rc)
+		return rc;
+	dump_set_type(DUMP_TYPE_NONE);
+	return 0;
+}
+
+static struct shutdown_action __refdata dump_action = {
+	.name	= SHUTDOWN_ACTION_DUMP_STR,
+	.fn	= dump_run,
+	.init	= dump_init,
+};
+
+static void dump_reipl_run(struct shutdown_trigger *trigger)
+{
+	struct lowcore *abs_lc;
+	unsigned int csum;
+
+	/*
+	 * Set REIPL_CLEAR flag in os_info flags entry indicating
+	 * 'clear' sysfs attribute has been set on the panicked system
+	 * for specified reipl type.
+	 * Always set for IPL_TYPE_NSS and IPL_TYPE_UNKNOWN.
+	 */
+	if ((reipl_type == IPL_TYPE_CCW && reipl_ccw_clear) ||
+	    (reipl_type == IPL_TYPE_ECKD && reipl_eckd_clear) ||
+	    (reipl_type == IPL_TYPE_FCP && reipl_fcp_clear) ||
+	    (reipl_type == IPL_TYPE_NVME && reipl_nvme_clear) ||
+	    reipl_type == IPL_TYPE_NSS ||
+	    reipl_type == IPL_TYPE_UNKNOWN)
+		os_info_flags |= OS_INFO_FLAG_REIPL_CLEAR;
+	os_info_entry_add(OS_INFO_FLAGS_ENTRY, &os_info_flags, sizeof(os_info_flags));
+	csum = (__force unsigned int)
+	       csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0);
+	abs_lc = get_abs_lowcore();
+	abs_lc->ipib = __pa(reipl_block_actual);
+	abs_lc->ipib_checksum = csum;
+	put_abs_lowcore(abs_lc);
+	dump_run(trigger);
+}
+
+static struct shutdown_action __refdata dump_reipl_action = {
+	.name	= SHUTDOWN_ACTION_DUMP_REIPL_STR,
+	.fn	= dump_reipl_run,
+};
+
+/*
+ * vmcmd shutdown action: Trigger vm command on shutdown.
+ */
+
+static char vmcmd_on_reboot[128];
+static char vmcmd_on_panic[128];
+static char vmcmd_on_halt[128];
+static char vmcmd_on_poff[128];
+static char vmcmd_on_restart[128];
+
+DEFINE_IPL_ATTR_STR_RW(vmcmd, on_reboot, "%s\n", "%s\n", vmcmd_on_reboot);
+DEFINE_IPL_ATTR_STR_RW(vmcmd, on_panic, "%s\n", "%s\n", vmcmd_on_panic);
+DEFINE_IPL_ATTR_STR_RW(vmcmd, on_halt, "%s\n", "%s\n", vmcmd_on_halt);
+DEFINE_IPL_ATTR_STR_RW(vmcmd, on_poff, "%s\n", "%s\n", vmcmd_on_poff);
+DEFINE_IPL_ATTR_STR_RW(vmcmd, on_restart, "%s\n", "%s\n", vmcmd_on_restart);
+
+static struct attribute *vmcmd_attrs[] = {
+	&sys_vmcmd_on_reboot_attr.attr,
+	&sys_vmcmd_on_panic_attr.attr,
+	&sys_vmcmd_on_halt_attr.attr,
+	&sys_vmcmd_on_poff_attr.attr,
+	&sys_vmcmd_on_restart_attr.attr,
+	NULL,
+};
+
+static struct attribute_group vmcmd_attr_group = {
+	.attrs = vmcmd_attrs,
+};
+
+static struct kset *vmcmd_kset;
+
+static void vmcmd_run(struct shutdown_trigger *trigger)
+{
+	char *cmd;
+
+	if (strcmp(trigger->name, ON_REIPL_STR) == 0)
+		cmd = vmcmd_on_reboot;
+	else if (strcmp(trigger->name, ON_PANIC_STR) == 0)
+		cmd = vmcmd_on_panic;
+	else if (strcmp(trigger->name, ON_HALT_STR) == 0)
+		cmd = vmcmd_on_halt;
+	else if (strcmp(trigger->name, ON_POFF_STR) == 0)
+		cmd = vmcmd_on_poff;
+	else if (strcmp(trigger->name, ON_RESTART_STR) == 0)
+		cmd = vmcmd_on_restart;
+	else
+		return;
+
+	if (strlen(cmd) == 0)
+		return;
+	__cpcmd(cmd, NULL, 0, NULL);
+}
+
+static int vmcmd_init(void)
+{
+	if (!MACHINE_IS_VM)
+		return -EOPNOTSUPP;
+	vmcmd_kset = kset_create_and_add("vmcmd", NULL, firmware_kobj);
+	if (!vmcmd_kset)
+		return -ENOMEM;
+	return sysfs_create_group(&vmcmd_kset->kobj, &vmcmd_attr_group);
+}
+
+static struct shutdown_action vmcmd_action = {SHUTDOWN_ACTION_VMCMD_STR,
+					      vmcmd_run, vmcmd_init};
+
+/*
+ * stop shutdown action: Stop Linux on shutdown.
+ */
+
+static void stop_run(struct shutdown_trigger *trigger)
+{
+	if (strcmp(trigger->name, ON_PANIC_STR) == 0 ||
+	    strcmp(trigger->name, ON_RESTART_STR) == 0)
+		disabled_wait();
+	smp_stop_cpu();
+}
+
+static struct shutdown_action stop_action = {SHUTDOWN_ACTION_STOP_STR,
+					     stop_run, NULL};
+
+/* action list */
+
+static struct shutdown_action *shutdown_actions_list[] = {
+	&ipl_action, &reipl_action, &dump_reipl_action, &dump_action,
+	&vmcmd_action, &stop_action};
+#define SHUTDOWN_ACTIONS_COUNT (sizeof(shutdown_actions_list) / sizeof(void *))
+
+/*
+ * Trigger section
+ */
+
+static struct kset *shutdown_actions_kset;
+
+static int set_trigger(const char *buf, struct shutdown_trigger *trigger,
+		       size_t len)
+{
+	int i;
+
+	for (i = 0; i < SHUTDOWN_ACTIONS_COUNT; i++) {
+		if (sysfs_streq(buf, shutdown_actions_list[i]->name)) {
+			if (shutdown_actions_list[i]->init_rc) {
+				return shutdown_actions_list[i]->init_rc;
+			} else {
+				trigger->action = shutdown_actions_list[i];
+				return len;
+			}
+		}
+	}
+	return -EINVAL;
+}
+
+/* on reipl */
+
+static struct shutdown_trigger on_reboot_trigger = {ON_REIPL_STR,
+						    &reipl_action};
+
+static ssize_t on_reboot_show(struct kobject *kobj,
+			      struct kobj_attribute *attr, char *page)
+{
+	return sprintf(page, "%s\n", on_reboot_trigger.action->name);
+}
+
+static ssize_t on_reboot_store(struct kobject *kobj,
+			       struct kobj_attribute *attr,
+			       const char *buf, size_t len)
+{
+	return set_trigger(buf, &on_reboot_trigger, len);
+}
+static struct kobj_attribute on_reboot_attr = __ATTR_RW(on_reboot);
+
+static void do_machine_restart(char *__unused)
+{
+	smp_send_stop();
+	on_reboot_trigger.action->fn(&on_reboot_trigger);
+	reipl_run(NULL);
+}
+void (*_machine_restart)(char *command) = do_machine_restart;
+
+/* on panic */
+
+static struct shutdown_trigger on_panic_trigger = {ON_PANIC_STR, &stop_action};
+
+static ssize_t on_panic_show(struct kobject *kobj,
+			     struct kobj_attribute *attr, char *page)
+{
+	return sprintf(page, "%s\n", on_panic_trigger.action->name);
+}
+
+static ssize_t on_panic_store(struct kobject *kobj,
+			      struct kobj_attribute *attr,
+			      const char *buf, size_t len)
+{
+	return set_trigger(buf, &on_panic_trigger, len);
+}
+static struct kobj_attribute on_panic_attr = __ATTR_RW(on_panic);
+
+static void do_panic(void)
+{
+	lgr_info_log();
+	on_panic_trigger.action->fn(&on_panic_trigger);
+	stop_run(&on_panic_trigger);
+}
+
+/* on restart */
+
+static struct shutdown_trigger on_restart_trigger = {ON_RESTART_STR,
+	&stop_action};
+
+static ssize_t on_restart_show(struct kobject *kobj,
+			       struct kobj_attribute *attr, char *page)
+{
+	return sprintf(page, "%s\n", on_restart_trigger.action->name);
+}
+
+static ssize_t on_restart_store(struct kobject *kobj,
+				struct kobj_attribute *attr,
+				const char *buf, size_t len)
+{
+	return set_trigger(buf, &on_restart_trigger, len);
+}
+static struct kobj_attribute on_restart_attr = __ATTR_RW(on_restart);
+
+static void __do_restart(void *ignore)
+{
+	smp_send_stop();
+#ifdef CONFIG_CRASH_DUMP
+	crash_kexec(NULL);
+#endif
+	on_restart_trigger.action->fn(&on_restart_trigger);
+	stop_run(&on_restart_trigger);
+}
+
+void do_restart(void *arg)
+{
+	tracing_off();
+	debug_locks_off();
+	lgr_info_log();
+	smp_call_online_cpu(__do_restart, arg);
+}
+
+/* on halt */
+
+static struct shutdown_trigger on_halt_trigger = {ON_HALT_STR, &stop_action};
+
+static ssize_t on_halt_show(struct kobject *kobj,
+			    struct kobj_attribute *attr, char *page)
+{
+	return sprintf(page, "%s\n", on_halt_trigger.action->name);
+}
+
+static ssize_t on_halt_store(struct kobject *kobj,
+			     struct kobj_attribute *attr,
+			     const char *buf, size_t len)
+{
+	return set_trigger(buf, &on_halt_trigger, len);
+}
+static struct kobj_attribute on_halt_attr = __ATTR_RW(on_halt);
+
+static void do_machine_halt(void)
+{
+	smp_send_stop();
+	on_halt_trigger.action->fn(&on_halt_trigger);
+	stop_run(&on_halt_trigger);
+}
+void (*_machine_halt)(void) = do_machine_halt;
+
+/* on power off */
+
+static struct shutdown_trigger on_poff_trigger = {ON_POFF_STR, &stop_action};
+
+static ssize_t on_poff_show(struct kobject *kobj,
+			    struct kobj_attribute *attr, char *page)
+{
+	return sprintf(page, "%s\n", on_poff_trigger.action->name);
+}
+
+static ssize_t on_poff_store(struct kobject *kobj,
+			     struct kobj_attribute *attr,
+			     const char *buf, size_t len)
+{
+	return set_trigger(buf, &on_poff_trigger, len);
+}
+static struct kobj_attribute on_poff_attr = __ATTR_RW(on_poff);
+
+static void do_machine_power_off(void)
+{
+	smp_send_stop();
+	on_poff_trigger.action->fn(&on_poff_trigger);
+	stop_run(&on_poff_trigger);
+}
+void (*_machine_power_off)(void) = do_machine_power_off;
+
+static struct attribute *shutdown_action_attrs[] = {
+	&on_restart_attr.attr,
+	&on_reboot_attr.attr,
+	&on_panic_attr.attr,
+	&on_halt_attr.attr,
+	&on_poff_attr.attr,
+	NULL,
+};
+
+static struct attribute_group shutdown_action_attr_group = {
+	.attrs = shutdown_action_attrs,
+};
+
+static void __init shutdown_triggers_init(void)
+{
+	shutdown_actions_kset = kset_create_and_add("shutdown_actions", NULL,
+						    firmware_kobj);
+	if (!shutdown_actions_kset)
+		goto fail;
+	if (sysfs_create_group(&shutdown_actions_kset->kobj,
+			       &shutdown_action_attr_group))
+		goto fail;
+	return;
+fail:
+	panic("shutdown_triggers_init failed\n");
+}
+
+static void __init shutdown_actions_init(void)
+{
+	int i;
+
+	for (i = 0; i < SHUTDOWN_ACTIONS_COUNT; i++) {
+		if (!shutdown_actions_list[i]->init)
+			continue;
+		shutdown_actions_list[i]->init_rc =
+			shutdown_actions_list[i]->init();
+	}
+}
+
+static int __init s390_ipl_init(void)
+{
+	char str[8] = {0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40};
+
+	sclp_early_get_ipl_info(&sclp_ipl_info);
+	/*
+	 * Fix loadparm: There are systems where the (SCSI) LOADPARM
+	 * returned by read SCP info is invalid (contains EBCDIC blanks)
+	 * when the system has been booted via diag308. In that case we use
+	 * the value from diag308, if available.
+	 *
+	 * There are also systems where diag308 store does not work in
+	 * case the system is booted from HMC. Fortunately in this case
+	 * READ SCP info provides the correct value.
+	 */
+	if (memcmp(sclp_ipl_info.loadparm, str, sizeof(str)) == 0 && ipl_block_valid)
+		memcpy(sclp_ipl_info.loadparm, ipl_block.ccw.loadparm, LOADPARM_LEN);
+	shutdown_actions_init();
+	shutdown_triggers_init();
+	return 0;
+}
+
+__initcall(s390_ipl_init);
+
+static void __init strncpy_skip_quote(char *dst, char *src, int n)
+{
+	int sx, dx;
+
+	dx = 0;
+	for (sx = 0; src[sx] != 0; sx++) {
+		if (src[sx] == '"')
+			continue;
+		dst[dx++] = src[sx];
+		if (dx >= n)
+			break;
+	}
+}
+
+static int __init vmcmd_on_reboot_setup(char *str)
+{
+	if (!MACHINE_IS_VM)
+		return 1;
+	strncpy_skip_quote(vmcmd_on_reboot, str, 127);
+	vmcmd_on_reboot[127] = 0;
+	on_reboot_trigger.action = &vmcmd_action;
+	return 1;
+}
+__setup("vmreboot=", vmcmd_on_reboot_setup);
+
+static int __init vmcmd_on_panic_setup(char *str)
+{
+	if (!MACHINE_IS_VM)
+		return 1;
+	strncpy_skip_quote(vmcmd_on_panic, str, 127);
+	vmcmd_on_panic[127] = 0;
+	on_panic_trigger.action = &vmcmd_action;
+	return 1;
+}
+__setup("vmpanic=", vmcmd_on_panic_setup);
+
+static int __init vmcmd_on_halt_setup(char *str)
+{
+	if (!MACHINE_IS_VM)
+		return 1;
+	strncpy_skip_quote(vmcmd_on_halt, str, 127);
+	vmcmd_on_halt[127] = 0;
+	on_halt_trigger.action = &vmcmd_action;
+	return 1;
+}
+__setup("vmhalt=", vmcmd_on_halt_setup);
+
+static int __init vmcmd_on_poff_setup(char *str)
+{
+	if (!MACHINE_IS_VM)
+		return 1;
+	strncpy_skip_quote(vmcmd_on_poff, str, 127);
+	vmcmd_on_poff[127] = 0;
+	on_poff_trigger.action = &vmcmd_action;
+	return 1;
+}
+__setup("vmpoff=", vmcmd_on_poff_setup);
+
+static int on_panic_notify(struct notifier_block *self,
+			   unsigned long event, void *data)
+{
+	do_panic();
+	return NOTIFY_OK;
+}
+
+static struct notifier_block on_panic_nb = {
+	.notifier_call = on_panic_notify,
+	.priority = INT_MIN,
+};
+
+void __init setup_ipl(void)
+{
+	BUILD_BUG_ON(sizeof(struct ipl_parameter_block) != PAGE_SIZE);
+
+	ipl_info.type = get_ipl_type();
+	switch (ipl_info.type) {
+	case IPL_TYPE_CCW:
+		ipl_info.data.ccw.dev_id.ssid = ipl_block.ccw.ssid;
+		ipl_info.data.ccw.dev_id.devno = ipl_block.ccw.devno;
+		break;
+	case IPL_TYPE_ECKD:
+	case IPL_TYPE_ECKD_DUMP:
+		ipl_info.data.eckd.dev_id.ssid = ipl_block.eckd.ssid;
+		ipl_info.data.eckd.dev_id.devno = ipl_block.eckd.devno;
+		break;
+	case IPL_TYPE_FCP:
+	case IPL_TYPE_FCP_DUMP:
+		ipl_info.data.fcp.dev_id.ssid = 0;
+		ipl_info.data.fcp.dev_id.devno = ipl_block.fcp.devno;
+		ipl_info.data.fcp.wwpn = ipl_block.fcp.wwpn;
+		ipl_info.data.fcp.lun = ipl_block.fcp.lun;
+		break;
+	case IPL_TYPE_NVME:
+	case IPL_TYPE_NVME_DUMP:
+		ipl_info.data.nvme.fid = ipl_block.nvme.fid;
+		ipl_info.data.nvme.nsid = ipl_block.nvme.nsid;
+		break;
+	case IPL_TYPE_NSS:
+	case IPL_TYPE_UNKNOWN:
+		/* We have no info to copy */
+		break;
+	}
+	atomic_notifier_chain_register(&panic_notifier_list, &on_panic_nb);
+}
+
+void s390_reset_system(void)
+{
+	/* Disable prefixing */
+	set_prefix(0);
+
+	/* Disable lowcore protection */
+	__ctl_clear_bit(0, 28);
+	diag_amode31_ops.diag308_reset();
+}
+
+#ifdef CONFIG_KEXEC_FILE
+
+int ipl_report_add_component(struct ipl_report *report, struct kexec_buf *kbuf,
+			     unsigned char flags, unsigned short cert)
+{
+	struct ipl_report_component *comp;
+
+	comp = vzalloc(sizeof(*comp));
+	if (!comp)
+		return -ENOMEM;
+	list_add_tail(&comp->list, &report->components);
+
+	comp->entry.addr = kbuf->mem;
+	comp->entry.len = kbuf->memsz;
+	comp->entry.flags = flags;
+	comp->entry.certificate_index = cert;
+
+	report->size += sizeof(comp->entry);
+
+	return 0;
+}
+
+int ipl_report_add_certificate(struct ipl_report *report, void *key,
+			       unsigned long addr, unsigned long len)
+{
+	struct ipl_report_certificate *cert;
+
+	cert = vzalloc(sizeof(*cert));
+	if (!cert)
+		return -ENOMEM;
+	list_add_tail(&cert->list, &report->certificates);
+
+	cert->entry.addr = addr;
+	cert->entry.len = len;
+	cert->key = key;
+
+	report->size += sizeof(cert->entry);
+	report->size += cert->entry.len;
+
+	return 0;
+}
+
+struct ipl_report *ipl_report_init(struct ipl_parameter_block *ipib)
+{
+	struct ipl_report *report;
+
+	report = vzalloc(sizeof(*report));
+	if (!report)
+		return ERR_PTR(-ENOMEM);
+
+	report->ipib = ipib;
+	INIT_LIST_HEAD(&report->components);
+	INIT_LIST_HEAD(&report->certificates);
+
+	report->size = ALIGN(ipib->hdr.len, 8);
+	report->size += sizeof(struct ipl_rl_hdr);
+	report->size += sizeof(struct ipl_rb_components);
+	report->size += sizeof(struct ipl_rb_certificates);
+
+	return report;
+}
+
+void *ipl_report_finish(struct ipl_report *report)
+{
+	struct ipl_report_certificate *cert;
+	struct ipl_report_component *comp;
+	struct ipl_rb_certificates *certs;
+	struct ipl_parameter_block *ipib;
+	struct ipl_rb_components *comps;
+	struct ipl_rl_hdr *rl_hdr;
+	void *buf, *ptr;
+
+	buf = vzalloc(report->size);
+	if (!buf)
+		goto out;
+	ptr = buf;
+
+	memcpy(ptr, report->ipib, report->ipib->hdr.len);
+	ipib = ptr;
+	if (ipl_secure_flag)
+		ipib->hdr.flags |= IPL_PL_FLAG_SIPL;
+	ipib->hdr.flags |= IPL_PL_FLAG_IPLSR;
+	ptr += report->ipib->hdr.len;
+	ptr = PTR_ALIGN(ptr, 8);
+
+	rl_hdr = ptr;
+	ptr += sizeof(*rl_hdr);
+
+	comps = ptr;
+	comps->rbt = IPL_RBT_COMPONENTS;
+	ptr += sizeof(*comps);
+	list_for_each_entry(comp, &report->components, list) {
+		memcpy(ptr, &comp->entry, sizeof(comp->entry));
+		ptr += sizeof(comp->entry);
+	}
+	comps->len = ptr - (void *)comps;
+
+	certs = ptr;
+	certs->rbt = IPL_RBT_CERTIFICATES;
+	ptr += sizeof(*certs);
+	list_for_each_entry(cert, &report->certificates, list) {
+		memcpy(ptr, &cert->entry, sizeof(cert->entry));
+		ptr += sizeof(cert->entry);
+	}
+	certs->len = ptr - (void *)certs;
+	rl_hdr->len = ptr - (void *)rl_hdr;
+
+	list_for_each_entry(cert, &report->certificates, list) {
+		memcpy(ptr, cert->key, cert->entry.len);
+		ptr += cert->entry.len;
+	}
+
+	BUG_ON(ptr > buf + report->size);
+out:
+	return buf;
+}
+
+int ipl_report_free(struct ipl_report *report)
+{
+	struct ipl_report_component *comp, *ncomp;
+	struct ipl_report_certificate *cert, *ncert;
+
+	list_for_each_entry_safe(comp, ncomp, &report->components, list)
+		vfree(comp);
+
+	list_for_each_entry_safe(cert, ncert, &report->certificates, list)
+		vfree(cert);
+
+	vfree(report);
+
+	return 0;
+}
+
+#endif
diff --git a/arch/s390/kernel/ipl_vmparm.c b/arch/s390/kernel/ipl_vmparm.c
new file mode 100644
index 0000000000..b5245fadcf
--- /dev/null
+++ b/arch/s390/kernel/ipl_vmparm.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/minmax.h>
+#include <linux/string.h>
+#include <asm/ebcdic.h>
+#include <asm/ipl.h>
+
+/* VM IPL PARM routines */
+size_t ipl_block_get_ascii_vmparm(char *dest, size_t size,
+				  const struct ipl_parameter_block *ipb)
+{
+	int i;
+	size_t len;
+	char has_lowercase = 0;
+
+	len = 0;
+	if ((ipb->ccw.vm_flags & IPL_PB0_CCW_VM_FLAG_VP) &&
+	    (ipb->ccw.vm_parm_len > 0)) {
+
+		len = min_t(size_t, size - 1, ipb->ccw.vm_parm_len);
+		memcpy(dest, ipb->ccw.vm_parm, len);
+		/* If at least one character is lowercase, we assume mixed
+		 * case; otherwise we convert everything to lowercase.
+		 */
+		for (i = 0; i < len; i++)
+			if ((dest[i] > 0x80 && dest[i] < 0x8a) || /* a-i */
+			    (dest[i] > 0x90 && dest[i] < 0x9a) || /* j-r */
+			    (dest[i] > 0xa1 && dest[i] < 0xaa)) { /* s-z */
+				has_lowercase = 1;
+				break;
+			}
+		if (!has_lowercase)
+			EBC_TOLOWER(dest, len);
+		EBCASC(dest, len);
+	}
+	dest[len] = 0;
+
+	return len;
+}
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
new file mode 100644
index 0000000000..b020ff17d2
--- /dev/null
+++ b/arch/s390/kernel/irq.c
@@ -0,0 +1,402 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    Copyright IBM Corp. 2004, 2011
+ *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ *		 Holger Smolinski <Holger.Smolinski@de.ibm.com>,
+ *		 Thomas Spatzier <tspat@de.ibm.com>,
+ *
+ * This file contains interrupt related functions.
+ */
+
+#include <linux/kernel_stat.h>
+#include <linux/interrupt.h>
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
+#include <linux/profile.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/ftrace.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/irq.h>
+#include <linux/entry-common.h>
+#include <asm/irq_regs.h>
+#include <asm/cputime.h>
+#include <asm/lowcore.h>
+#include <asm/irq.h>
+#include <asm/hw_irq.h>
+#include <asm/stacktrace.h>
+#include <asm/softirq_stack.h>
+#include "entry.h"
+
+DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat);
+EXPORT_PER_CPU_SYMBOL_GPL(irq_stat);
+
+struct irq_class {
+	int irq;
+	char *name;
+	char *desc;
+};
+
+/*
+ * The list of "main" irq classes on s390. This is the list of interrupts
+ * that appear both in /proc/stat ("intr" line) and /proc/interrupts.
+ * Historically only external and I/O interrupts have been part of /proc/stat.
+ * We can't add the split external and I/O sub classes since the first field
+ * in the "intr" line in /proc/stat is supposed to be the sum of all other
+ * fields.
+ * Since the external and I/O interrupt fields are already sums we would end
+ * up with having a sum which accounts each interrupt twice.
+ */
+static const struct irq_class irqclass_main_desc[NR_IRQS_BASE] = {
+	{.irq = EXT_INTERRUPT,	.name = "EXT"},
+	{.irq = IO_INTERRUPT,	.name = "I/O"},
+	{.irq = THIN_INTERRUPT, .name = "AIO"},
+};
+
+/*
+ * The list of split external and I/O interrupts that appear only in
+ * /proc/interrupts.
+ * In addition this list contains non external / I/O events like NMIs.
+ */
+static const struct irq_class irqclass_sub_desc[] = {
+	{.irq = IRQEXT_CLK, .name = "CLK", .desc = "[EXT] Clock Comparator"},
+	{.irq = IRQEXT_EXC, .name = "EXC", .desc = "[EXT] External Call"},
+	{.irq = IRQEXT_EMS, .name = "EMS", .desc = "[EXT] Emergency Signal"},
+	{.irq = IRQEXT_TMR, .name = "TMR", .desc = "[EXT] CPU Timer"},
+	{.irq = IRQEXT_TLA, .name = "TAL", .desc = "[EXT] Timing Alert"},
+	{.irq = IRQEXT_PFL, .name = "PFL", .desc = "[EXT] Pseudo Page Fault"},
+	{.irq = IRQEXT_DSD, .name = "DSD", .desc = "[EXT] DASD Diag"},
+	{.irq = IRQEXT_VRT, .name = "VRT", .desc = "[EXT] Virtio"},
+	{.irq = IRQEXT_SCP, .name = "SCP", .desc = "[EXT] Service Call"},
+	{.irq = IRQEXT_IUC, .name = "IUC", .desc = "[EXT] IUCV"},
+	{.irq = IRQEXT_CMS, .name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling"},
+	{.irq = IRQEXT_CMC, .name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"},
+	{.irq = IRQEXT_FTP, .name = "FTP", .desc = "[EXT] HMC FTP Service"},
+	{.irq = IRQIO_CIO,  .name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"},
+	{.irq = IRQIO_DAS,  .name = "DAS", .desc = "[I/O] DASD"},
+	{.irq = IRQIO_C15,  .name = "C15", .desc = "[I/O] 3215"},
+	{.irq = IRQIO_C70,  .name = "C70", .desc = "[I/O] 3270"},
+	{.irq = IRQIO_TAP,  .name = "TAP", .desc = "[I/O] Tape"},
+	{.irq = IRQIO_VMR,  .name = "VMR", .desc = "[I/O] Unit Record Devices"},
+	{.irq = IRQIO_LCS,  .name = "LCS", .desc = "[I/O] LCS"},
+	{.irq = IRQIO_CTC,  .name = "CTC", .desc = "[I/O] CTC"},
+	{.irq = IRQIO_ADM,  .name = "ADM", .desc = "[I/O] EADM Subchannel"},
+	{.irq = IRQIO_CSC,  .name = "CSC", .desc = "[I/O] CHSC Subchannel"},
+	{.irq = IRQIO_VIR,  .name = "VIR", .desc = "[I/O] Virtual I/O Devices"},
+	{.irq = IRQIO_QAI,  .name = "QAI", .desc = "[AIO] QDIO Adapter Interrupt"},
+	{.irq = IRQIO_APB,  .name = "APB", .desc = "[AIO] AP Bus"},
+	{.irq = IRQIO_PCF,  .name = "PCF", .desc = "[AIO] PCI Floating Interrupt"},
+	{.irq = IRQIO_PCD,  .name = "PCD", .desc = "[AIO] PCI Directed Interrupt"},
+	{.irq = IRQIO_MSI,  .name = "MSI", .desc = "[AIO] MSI Interrupt"},
+	{.irq = IRQIO_VAI,  .name = "VAI", .desc = "[AIO] Virtual I/O Devices AI"},
+	{.irq = IRQIO_GAL,  .name = "GAL", .desc = "[AIO] GIB Alert"},
+	{.irq = NMI_NMI,    .name = "NMI", .desc = "[NMI] Machine Check"},
+	{.irq = CPU_RST,    .name = "RST", .desc = "[CPU] CPU Restart"},
+};
+
+static void do_IRQ(struct pt_regs *regs, int irq)
+{
+	if (tod_after_eq(S390_lowcore.int_clock,
+			 S390_lowcore.clock_comparator))
+		/* Serve timer interrupts first. */
+		clock_comparator_work();
+	generic_handle_irq(irq);
+}
+
+static int on_async_stack(void)
+{
+	unsigned long frame = current_frame_address();
+
+	return ((S390_lowcore.async_stack ^ frame) & ~(THREAD_SIZE - 1)) == 0;
+}
+
+static void do_irq_async(struct pt_regs *regs, int irq)
+{
+	if (on_async_stack()) {
+		do_IRQ(regs, irq);
+	} else {
+		call_on_stack(2, S390_lowcore.async_stack, void, do_IRQ,
+			      struct pt_regs *, regs, int, irq);
+	}
+}
+
+static int irq_pending(struct pt_regs *regs)
+{
+	int cc;
+
+	asm volatile("tpi 0\n"
+		     "ipm %0" : "=d" (cc) : : "cc");
+	return cc >> 28;
+}
+
+void noinstr do_io_irq(struct pt_regs *regs)
+{
+	irqentry_state_t state = irqentry_enter(regs);
+	struct pt_regs *old_regs = set_irq_regs(regs);
+	bool from_idle;
+
+	irq_enter_rcu();
+
+	if (user_mode(regs)) {
+		update_timer_sys();
+		if (static_branch_likely(&cpu_has_bear))
+			current->thread.last_break = regs->last_break;
+	}
+
+	from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT);
+	if (from_idle)
+		account_idle_time_irq();
+
+	do {
+		regs->tpi_info = S390_lowcore.tpi_info;
+		if (S390_lowcore.tpi_info.adapter_IO)
+			do_irq_async(regs, THIN_INTERRUPT);
+		else
+			do_irq_async(regs, IO_INTERRUPT);
+	} while (MACHINE_IS_LPAR && irq_pending(regs));
+
+	irq_exit_rcu();
+
+	set_irq_regs(old_regs);
+	irqentry_exit(regs, state);
+
+	if (from_idle)
+		regs->psw.mask &= ~(PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_WAIT);
+}
+
+void noinstr do_ext_irq(struct pt_regs *regs)
+{
+	irqentry_state_t state = irqentry_enter(regs);
+	struct pt_regs *old_regs = set_irq_regs(regs);
+	bool from_idle;
+
+	irq_enter_rcu();
+
+	if (user_mode(regs)) {
+		update_timer_sys();
+		if (static_branch_likely(&cpu_has_bear))
+			current->thread.last_break = regs->last_break;
+	}
+
+	regs->int_code = S390_lowcore.ext_int_code_addr;
+	regs->int_parm = S390_lowcore.ext_params;
+	regs->int_parm_long = S390_lowcore.ext_params2;
+
+	from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT);
+	if (from_idle)
+		account_idle_time_irq();
+
+	do_irq_async(regs, EXT_INTERRUPT);
+
+	irq_exit_rcu();
+	set_irq_regs(old_regs);
+	irqentry_exit(regs, state);
+
+	if (from_idle)
+		regs->psw.mask &= ~(PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_WAIT);
+}
+
+static void show_msi_interrupt(struct seq_file *p, int irq)
+{
+	struct irq_desc *desc;
+	unsigned long flags;
+	int cpu;
+
+	rcu_read_lock();
+	desc = irq_to_desc(irq);
+	if (!desc)
+		goto out;
+
+	raw_spin_lock_irqsave(&desc->lock, flags);
+	seq_printf(p, "%3d: ", irq);
+	for_each_online_cpu(cpu)
+		seq_printf(p, "%10u ", irq_desc_kstat_cpu(desc, cpu));
+
+	if (desc->irq_data.chip)
+		seq_printf(p, " %8s", desc->irq_data.chip->name);
+
+	if (desc->action)
+		seq_printf(p, "  %s", desc->action->name);
+
+	seq_putc(p, '\n');
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
+out:
+	rcu_read_unlock();
+}
+
+/*
+ * show_interrupts is needed by /proc/interrupts.
+ */
+int show_interrupts(struct seq_file *p, void *v)
+{
+	int index = *(loff_t *) v;
+	int cpu, irq;
+
+	cpus_read_lock();
+	if (index == 0) {
+		seq_puts(p, "           ");
+		for_each_online_cpu(cpu)
+			seq_printf(p, "CPU%-8d", cpu);
+		seq_putc(p, '\n');
+	}
+	if (index < NR_IRQS_BASE) {
+		seq_printf(p, "%s: ", irqclass_main_desc[index].name);
+		irq = irqclass_main_desc[index].irq;
+		for_each_online_cpu(cpu)
+			seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu));
+		seq_putc(p, '\n');
+		goto out;
+	}
+	if (index < nr_irqs) {
+		show_msi_interrupt(p, index);
+		goto out;
+	}
+	for (index = 0; index < NR_ARCH_IRQS; index++) {
+		seq_printf(p, "%s: ", irqclass_sub_desc[index].name);
+		irq = irqclass_sub_desc[index].irq;
+		for_each_online_cpu(cpu)
+			seq_printf(p, "%10u ",
+				   per_cpu(irq_stat, cpu).irqs[irq]);
+		if (irqclass_sub_desc[index].desc)
+			seq_printf(p, "  %s", irqclass_sub_desc[index].desc);
+		seq_putc(p, '\n');
+	}
+out:
+	cpus_read_unlock();
+	return 0;
+}
+
+unsigned int arch_dynirq_lower_bound(unsigned int from)
+{
+	return from < NR_IRQS_BASE ? NR_IRQS_BASE : from;
+}
+
+/*
+ * ext_int_hash[index] is the list head for all external interrupts that hash
+ * to this index.
+ */
+static struct hlist_head ext_int_hash[32] ____cacheline_aligned;
+
+struct ext_int_info {
+	ext_int_handler_t handler;
+	struct hlist_node entry;
+	struct rcu_head rcu;
+	u16 code;
+};
+
+/* ext_int_hash_lock protects the handler lists for external interrupts */
+static DEFINE_SPINLOCK(ext_int_hash_lock);
+
+static inline int ext_hash(u16 code)
+{
+	BUILD_BUG_ON(!is_power_of_2(ARRAY_SIZE(ext_int_hash)));
+
+	return (code + (code >> 9)) & (ARRAY_SIZE(ext_int_hash) - 1);
+}
+
+int register_external_irq(u16 code, ext_int_handler_t handler)
+{
+	struct ext_int_info *p;
+	unsigned long flags;
+	int index;
+
+	p = kmalloc(sizeof(*p), GFP_ATOMIC);
+	if (!p)
+		return -ENOMEM;
+	p->code = code;
+	p->handler = handler;
+	index = ext_hash(code);
+
+	spin_lock_irqsave(&ext_int_hash_lock, flags);
+	hlist_add_head_rcu(&p->entry, &ext_int_hash[index]);
+	spin_unlock_irqrestore(&ext_int_hash_lock, flags);
+	return 0;
+}
+EXPORT_SYMBOL(register_external_irq);
+
+int unregister_external_irq(u16 code, ext_int_handler_t handler)
+{
+	struct ext_int_info *p;
+	unsigned long flags;
+	int index = ext_hash(code);
+
+	spin_lock_irqsave(&ext_int_hash_lock, flags);
+	hlist_for_each_entry_rcu(p, &ext_int_hash[index], entry) {
+		if (p->code == code && p->handler == handler) {
+			hlist_del_rcu(&p->entry);
+			kfree_rcu(p, rcu);
+		}
+	}
+	spin_unlock_irqrestore(&ext_int_hash_lock, flags);
+	return 0;
+}
+EXPORT_SYMBOL(unregister_external_irq);
+
+static irqreturn_t do_ext_interrupt(int irq, void *dummy)
+{
+	struct pt_regs *regs = get_irq_regs();
+	struct ext_code ext_code;
+	struct ext_int_info *p;
+	int index;
+
+	ext_code.int_code = regs->int_code;
+	if (ext_code.code != EXT_IRQ_CLK_COMP)
+		set_cpu_flag(CIF_NOHZ_DELAY);
+
+	index = ext_hash(ext_code.code);
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(p, &ext_int_hash[index], entry) {
+		if (unlikely(p->code != ext_code.code))
+			continue;
+		p->handler(ext_code, regs->int_parm, regs->int_parm_long);
+	}
+	rcu_read_unlock();
+	return IRQ_HANDLED;
+}
+
+static void __init init_ext_interrupts(void)
+{
+	int idx;
+
+	for (idx = 0; idx < ARRAY_SIZE(ext_int_hash); idx++)
+		INIT_HLIST_HEAD(&ext_int_hash[idx]);
+
+	irq_set_chip_and_handler(EXT_INTERRUPT,
+				 &dummy_irq_chip, handle_percpu_irq);
+	if (request_irq(EXT_INTERRUPT, do_ext_interrupt, 0, "EXT", NULL))
+		panic("Failed to register EXT interrupt\n");
+}
+
+void __init init_IRQ(void)
+{
+	BUILD_BUG_ON(ARRAY_SIZE(irqclass_sub_desc) != NR_ARCH_IRQS);
+	init_cio_interrupts();
+	init_airq_interrupts();
+	init_ext_interrupts();
+}
+
+static DEFINE_SPINLOCK(irq_subclass_lock);
+static unsigned char irq_subclass_refcount[64];
+
+void irq_subclass_register(enum irq_subclass subclass)
+{
+	spin_lock(&irq_subclass_lock);
+	if (!irq_subclass_refcount[subclass])
+		ctl_set_bit(0, subclass);
+	irq_subclass_refcount[subclass]++;
+	spin_unlock(&irq_subclass_lock);
+}
+EXPORT_SYMBOL(irq_subclass_register);
+
+void irq_subclass_unregister(enum irq_subclass subclass)
+{
+	spin_lock(&irq_subclass_lock);
+	irq_subclass_refcount[subclass]--;
+	if (!irq_subclass_refcount[subclass])
+		ctl_clear_bit(0, subclass);
+	spin_unlock(&irq_subclass_lock);
+}
+EXPORT_SYMBOL(irq_subclass_unregister);
diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c
new file mode 100644
index 0000000000..e808bb8bc0
--- /dev/null
+++ b/arch/s390/kernel/jump_label.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Jump label s390 support
+ *
+ * Copyright IBM Corp. 2011
+ * Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+#include <linux/uaccess.h>
+#include <linux/jump_label.h>
+#include <linux/module.h>
+#include <asm/text-patching.h>
+#include <asm/ipl.h>
+
+struct insn {
+	u16 opcode;
+	s32 offset;
+} __packed;
+
+static void jump_label_make_nop(struct jump_entry *entry, struct insn *insn)
+{
+	/* brcl 0,offset */
+	insn->opcode = 0xc004;
+	insn->offset = (jump_entry_target(entry) - jump_entry_code(entry)) >> 1;
+}
+
+static void jump_label_make_branch(struct jump_entry *entry, struct insn *insn)
+{
+	/* brcl 15,offset */
+	insn->opcode = 0xc0f4;
+	insn->offset = (jump_entry_target(entry) - jump_entry_code(entry)) >> 1;
+}
+
+static void jump_label_bug(struct jump_entry *entry, struct insn *expected,
+			   struct insn *new)
+{
+	unsigned char *ipc = (unsigned char *)jump_entry_code(entry);
+	unsigned char *ipe = (unsigned char *)expected;
+	unsigned char *ipn = (unsigned char *)new;
+
+	pr_emerg("Jump label code mismatch at %pS [%px]\n", ipc, ipc);
+	pr_emerg("Found:    %6ph\n", ipc);
+	pr_emerg("Expected: %6ph\n", ipe);
+	pr_emerg("New:      %6ph\n", ipn);
+	panic("Corrupted kernel text");
+}
+
+static void jump_label_transform(struct jump_entry *entry,
+				 enum jump_label_type type)
+{
+	void *code = (void *)jump_entry_code(entry);
+	struct insn old, new;
+
+	if (type == JUMP_LABEL_JMP) {
+		jump_label_make_nop(entry, &old);
+		jump_label_make_branch(entry, &new);
+	} else {
+		jump_label_make_branch(entry, &old);
+		jump_label_make_nop(entry, &new);
+	}
+	if (memcmp(code, &old, sizeof(old)))
+		jump_label_bug(entry, &old, &new);
+	s390_kernel_write(code, &new, sizeof(new));
+}
+
+void arch_jump_label_transform(struct jump_entry *entry,
+			       enum jump_label_type type)
+{
+	jump_label_transform(entry, type);
+	text_poke_sync();
+}
+
+bool arch_jump_label_transform_queue(struct jump_entry *entry,
+				     enum jump_label_type type)
+{
+	jump_label_transform(entry, type);
+	return true;
+}
+
+void arch_jump_label_transform_apply(void)
+{
+	text_poke_sync();
+}
diff --git a/arch/s390/kernel/kdebugfs.c b/arch/s390/kernel/kdebugfs.c
new file mode 100644
index 0000000000..33130c7daf
--- /dev/null
+++ b/arch/s390/kernel/kdebugfs.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/debugfs.h>
+#include <linux/export.h>
+#include <linux/init.h>
+
+struct dentry *arch_debugfs_dir;
+EXPORT_SYMBOL(arch_debugfs_dir);
+
+static int __init arch_kdebugfs_init(void)
+{
+	arch_debugfs_dir = debugfs_create_dir("s390", NULL);
+	return 0;
+}
+postcore_initcall(arch_kdebugfs_init);
diff --git a/arch/s390/kernel/kexec_elf.c b/arch/s390/kernel/kexec_elf.c
new file mode 100644
index 0000000000..9da6fa30c4
--- /dev/null
+++ b/arch/s390/kernel/kexec_elf.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ELF loader for kexec_file_load system call.
+ *
+ * Copyright IBM Corp. 2018
+ *
+ * Author(s): Philipp Rudo <prudo@linux.vnet.ibm.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/kexec.h>
+#include <asm/ipl.h>
+#include <asm/setup.h>
+
+static int kexec_file_add_kernel_elf(struct kimage *image,
+				     struct s390_load_data *data)
+{
+	struct kexec_buf buf;
+	const Elf_Ehdr *ehdr;
+	const Elf_Phdr *phdr;
+	Elf_Addr entry;
+	void *kernel;
+	int i, ret;
+
+	kernel = image->kernel_buf;
+	ehdr = (Elf_Ehdr *)kernel;
+	buf.image = image;
+	if (image->type == KEXEC_TYPE_CRASH)
+		entry = STARTUP_KDUMP_OFFSET;
+	else
+		entry = ehdr->e_entry;
+
+	phdr = (void *)ehdr + ehdr->e_phoff;
+	for (i = 0; i < ehdr->e_phnum; i++, phdr++) {
+		if (phdr->p_type != PT_LOAD)
+			continue;
+
+		buf.buffer = kernel + phdr->p_offset;
+		buf.bufsz = phdr->p_filesz;
+
+		buf.mem = ALIGN(phdr->p_paddr, phdr->p_align);
+		if (image->type == KEXEC_TYPE_CRASH)
+			buf.mem += crashk_res.start;
+		buf.memsz = phdr->p_memsz;
+		data->memsz = ALIGN(data->memsz, phdr->p_align) + buf.memsz;
+
+		if (entry - phdr->p_paddr < phdr->p_memsz) {
+			data->kernel_buf = buf.buffer;
+			data->kernel_mem = buf.mem;
+			data->parm = buf.buffer + PARMAREA;
+		}
+
+		ipl_report_add_component(data->report, &buf,
+					 IPL_RB_COMPONENT_FLAG_SIGNED |
+					 IPL_RB_COMPONENT_FLAG_VERIFIED,
+					 IPL_RB_CERT_UNKNOWN);
+		ret = kexec_add_buffer(&buf);
+		if (ret)
+			return ret;
+	}
+
+	return data->memsz ? 0 : -EINVAL;
+}
+
+static void *s390_elf_load(struct kimage *image,
+			   char *kernel, unsigned long kernel_len,
+			   char *initrd, unsigned long initrd_len,
+			   char *cmdline, unsigned long cmdline_len)
+{
+	const Elf_Ehdr *ehdr;
+	const Elf_Phdr *phdr;
+	size_t size;
+	int i;
+
+	/* image->fobs->probe already checked for valid ELF magic number. */
+	ehdr = (Elf_Ehdr *)kernel;
+
+	if (ehdr->e_type != ET_EXEC ||
+	    ehdr->e_ident[EI_CLASS] != ELFCLASS64 ||
+	    !elf_check_arch(ehdr))
+		return ERR_PTR(-EINVAL);
+
+	if (!ehdr->e_phnum || ehdr->e_phentsize != sizeof(Elf_Phdr))
+		return ERR_PTR(-EINVAL);
+
+	size = ehdr->e_ehsize + ehdr->e_phoff;
+	size += ehdr->e_phentsize * ehdr->e_phnum;
+	if (size > kernel_len)
+		return ERR_PTR(-EINVAL);
+
+	phdr = (void *)ehdr + ehdr->e_phoff;
+	size = ALIGN(size, phdr->p_align);
+	for (i = 0; i < ehdr->e_phnum; i++, phdr++) {
+		if (phdr->p_type == PT_INTERP)
+			return ERR_PTR(-EINVAL);
+
+		if (phdr->p_offset > kernel_len)
+			return ERR_PTR(-EINVAL);
+
+		size += ALIGN(phdr->p_filesz, phdr->p_align);
+	}
+
+	if (size > kernel_len)
+		return ERR_PTR(-EINVAL);
+
+	return kexec_file_add_components(image, kexec_file_add_kernel_elf);
+}
+
+static int s390_elf_probe(const char *buf, unsigned long len)
+{
+	const Elf_Ehdr *ehdr;
+
+	if (len < sizeof(Elf_Ehdr))
+		return -ENOEXEC;
+
+	ehdr = (Elf_Ehdr *)buf;
+
+	/* Only check the ELF magic number here and do proper validity check
+	 * in the loader. Any check here that fails would send the erroneous
+	 * ELF file to the image loader that does not care what it gets.
+	 * (Most likely) causing behavior not intended by the user.
+	 */
+	if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0)
+		return -ENOEXEC;
+
+	return 0;
+}
+
+const struct kexec_file_ops s390_kexec_elf_ops = {
+	.probe = s390_elf_probe,
+	.load = s390_elf_load,
+#ifdef CONFIG_KEXEC_SIG
+	.verify_sig = s390_verify_sig,
+#endif /* CONFIG_KEXEC_SIG */
+};
diff --git a/arch/s390/kernel/kexec_image.c b/arch/s390/kernel/kexec_image.c
new file mode 100644
index 0000000000..af23eff577
--- /dev/null
+++ b/arch/s390/kernel/kexec_image.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Image loader for kexec_file_load system call.
+ *
+ * Copyright IBM Corp. 2018
+ *
+ * Author(s): Philipp Rudo <prudo@linux.vnet.ibm.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/kexec.h>
+#include <asm/ipl.h>
+#include <asm/setup.h>
+
+static int kexec_file_add_kernel_image(struct kimage *image,
+				       struct s390_load_data *data)
+{
+	struct kexec_buf buf;
+
+	buf.image = image;
+
+	buf.buffer = image->kernel_buf;
+	buf.bufsz = image->kernel_buf_len;
+
+	buf.mem = 0;
+	if (image->type == KEXEC_TYPE_CRASH)
+		buf.mem += crashk_res.start;
+	buf.memsz = buf.bufsz;
+
+	data->kernel_buf = image->kernel_buf;
+	data->kernel_mem = buf.mem;
+	data->parm = image->kernel_buf + PARMAREA;
+	data->memsz += buf.memsz;
+
+	ipl_report_add_component(data->report, &buf,
+				 IPL_RB_COMPONENT_FLAG_SIGNED |
+				 IPL_RB_COMPONENT_FLAG_VERIFIED,
+				 IPL_RB_CERT_UNKNOWN);
+	return kexec_add_buffer(&buf);
+}
+
+static void *s390_image_load(struct kimage *image,
+			     char *kernel, unsigned long kernel_len,
+			     char *initrd, unsigned long initrd_len,
+			     char *cmdline, unsigned long cmdline_len)
+{
+	return kexec_file_add_components(image, kexec_file_add_kernel_image);
+}
+
+static int s390_image_probe(const char *buf, unsigned long len)
+{
+	/* Can't reliably tell if an image is valid.  Therefore give the
+	 * user whatever he wants.
+	 */
+	return 0;
+}
+
+const struct kexec_file_ops s390_kexec_image_ops = {
+	.probe = s390_image_probe,
+	.load = s390_image_load,
+#ifdef CONFIG_KEXEC_SIG
+	.verify_sig = s390_verify_sig,
+#endif /* CONFIG_KEXEC_SIG */
+};
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
new file mode 100644
index 0000000000..d4b863ed0a
--- /dev/null
+++ b/arch/s390/kernel/kprobes.c
@@ -0,0 +1,525 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ *  Kernel Probes (KProbes)
+ *
+ * Copyright IBM Corp. 2002, 2006
+ *
+ * s390 port, used ppc64 as template. Mike Grundy <grundym@us.ibm.com>
+ */
+
+#define pr_fmt(fmt) "kprobes: " fmt
+
+#include <linux/moduleloader.h>
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+#include <linux/preempt.h>
+#include <linux/stop_machine.h>
+#include <linux/kdebug.h>
+#include <linux/uaccess.h>
+#include <linux/extable.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/hardirq.h>
+#include <linux/ftrace.h>
+#include <asm/set_memory.h>
+#include <asm/sections.h>
+#include <asm/dis.h>
+#include "kprobes.h"
+#include "entry.h"
+
+DEFINE_PER_CPU(struct kprobe *, current_kprobe);
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+
+struct kretprobe_blackpoint kretprobe_blacklist[] = { };
+
+static int insn_page_in_use;
+
+void *alloc_insn_page(void)
+{
+	void *page;
+
+	page = module_alloc(PAGE_SIZE);
+	if (!page)
+		return NULL;
+	set_memory_rox((unsigned long)page, 1);
+	return page;
+}
+
+static void *alloc_s390_insn_page(void)
+{
+	if (xchg(&insn_page_in_use, 1) == 1)
+		return NULL;
+	return &kprobes_insn_page;
+}
+
+static void free_s390_insn_page(void *page)
+{
+	xchg(&insn_page_in_use, 0);
+}
+
+struct kprobe_insn_cache kprobe_s390_insn_slots = {
+	.mutex = __MUTEX_INITIALIZER(kprobe_s390_insn_slots.mutex),
+	.alloc = alloc_s390_insn_page,
+	.free = free_s390_insn_page,
+	.pages = LIST_HEAD_INIT(kprobe_s390_insn_slots.pages),
+	.insn_size = MAX_INSN_SIZE,
+};
+
+static void copy_instruction(struct kprobe *p)
+{
+	kprobe_opcode_t insn[MAX_INSN_SIZE];
+	s64 disp, new_disp;
+	u64 addr, new_addr;
+	unsigned int len;
+
+	len = insn_length(*p->addr >> 8);
+	memcpy(&insn, p->addr, len);
+	p->opcode = insn[0];
+	if (probe_is_insn_relative_long(&insn[0])) {
+		/*
+		 * For pc-relative instructions in RIL-b or RIL-c format patch
+		 * the RI2 displacement field. We have already made sure that
+		 * the insn slot for the patched instruction is within the same
+		 * 2GB area as the original instruction (either kernel image or
+		 * module area). Therefore the new displacement will always fit.
+		 */
+		disp = *(s32 *)&insn[1];
+		addr = (u64)(unsigned long)p->addr;
+		new_addr = (u64)(unsigned long)p->ainsn.insn;
+		new_disp = ((addr + (disp * 2)) - new_addr) / 2;
+		*(s32 *)&insn[1] = new_disp;
+	}
+	s390_kernel_write(p->ainsn.insn, &insn, len);
+}
+NOKPROBE_SYMBOL(copy_instruction);
+
+static int s390_get_insn_slot(struct kprobe *p)
+{
+	/*
+	 * Get an insn slot that is within the same 2GB area like the original
+	 * instruction. That way instructions with a 32bit signed displacement
+	 * field can be patched and executed within the insn slot.
+	 */
+	p->ainsn.insn = NULL;
+	if (is_kernel((unsigned long)p->addr))
+		p->ainsn.insn = get_s390_insn_slot();
+	else if (is_module_addr(p->addr))
+		p->ainsn.insn = get_insn_slot();
+	return p->ainsn.insn ? 0 : -ENOMEM;
+}
+NOKPROBE_SYMBOL(s390_get_insn_slot);
+
+static void s390_free_insn_slot(struct kprobe *p)
+{
+	if (!p->ainsn.insn)
+		return;
+	if (is_kernel((unsigned long)p->addr))
+		free_s390_insn_slot(p->ainsn.insn, 0);
+	else
+		free_insn_slot(p->ainsn.insn, 0);
+	p->ainsn.insn = NULL;
+}
+NOKPROBE_SYMBOL(s390_free_insn_slot);
+
+/* Check if paddr is at an instruction boundary */
+static bool can_probe(unsigned long paddr)
+{
+	unsigned long addr, offset = 0;
+	kprobe_opcode_t insn;
+	struct kprobe *kp;
+
+	if (paddr & 0x01)
+		return false;
+
+	if (!kallsyms_lookup_size_offset(paddr, NULL, &offset))
+		return false;
+
+	/* Decode instructions */
+	addr = paddr - offset;
+	while (addr < paddr) {
+		if (copy_from_kernel_nofault(&insn, (void *)addr, sizeof(insn)))
+			return false;
+
+		if (insn >> 8 == 0) {
+			if (insn != BREAKPOINT_INSTRUCTION) {
+				/*
+				 * Note that QEMU inserts opcode 0x0000 to implement
+				 * software breakpoints for guests. Since the size of
+				 * the original instruction is unknown, stop following
+				 * instructions and prevent setting a kprobe.
+				 */
+				return false;
+			}
+			/*
+			 * Check if the instruction has been modified by another
+			 * kprobe, in which case the original instruction is
+			 * decoded.
+			 */
+			kp = get_kprobe((void *)addr);
+			if (!kp) {
+				/* not a kprobe */
+				return false;
+			}
+			insn = kp->opcode;
+		}
+		addr += insn_length(insn >> 8);
+	}
+	return addr == paddr;
+}
+
+int arch_prepare_kprobe(struct kprobe *p)
+{
+	if (!can_probe((unsigned long)p->addr))
+		return -EINVAL;
+	/* Make sure the probe isn't going on a difficult instruction */
+	if (probe_is_prohibited_opcode(p->addr))
+		return -EINVAL;
+	if (s390_get_insn_slot(p))
+		return -ENOMEM;
+	copy_instruction(p);
+	return 0;
+}
+NOKPROBE_SYMBOL(arch_prepare_kprobe);
+
+struct swap_insn_args {
+	struct kprobe *p;
+	unsigned int arm_kprobe : 1;
+};
+
+static int swap_instruction(void *data)
+{
+	struct swap_insn_args *args = data;
+	struct kprobe *p = args->p;
+	u16 opc;
+
+	opc = args->arm_kprobe ? BREAKPOINT_INSTRUCTION : p->opcode;
+	s390_kernel_write(p->addr, &opc, sizeof(opc));
+	return 0;
+}
+NOKPROBE_SYMBOL(swap_instruction);
+
+void arch_arm_kprobe(struct kprobe *p)
+{
+	struct swap_insn_args args = {.p = p, .arm_kprobe = 1};
+
+	stop_machine_cpuslocked(swap_instruction, &args, NULL);
+}
+NOKPROBE_SYMBOL(arch_arm_kprobe);
+
+void arch_disarm_kprobe(struct kprobe *p)
+{
+	struct swap_insn_args args = {.p = p, .arm_kprobe = 0};
+
+	stop_machine_cpuslocked(swap_instruction, &args, NULL);
+}
+NOKPROBE_SYMBOL(arch_disarm_kprobe);
+
+void arch_remove_kprobe(struct kprobe *p)
+{
+	s390_free_insn_slot(p);
+}
+NOKPROBE_SYMBOL(arch_remove_kprobe);
+
+static void enable_singlestep(struct kprobe_ctlblk *kcb,
+			      struct pt_regs *regs,
+			      unsigned long ip)
+{
+	struct per_regs per_kprobe;
+
+	/* Set up the PER control registers %cr9-%cr11 */
+	per_kprobe.control = PER_EVENT_IFETCH;
+	per_kprobe.start = ip;
+	per_kprobe.end = ip;
+
+	/* Save control regs and psw mask */
+	__ctl_store(kcb->kprobe_saved_ctl, 9, 11);
+	kcb->kprobe_saved_imask = regs->psw.mask &
+		(PSW_MASK_PER | PSW_MASK_IO | PSW_MASK_EXT);
+
+	/* Set PER control regs, turns on single step for the given address */
+	__ctl_load(per_kprobe, 9, 11);
+	regs->psw.mask |= PSW_MASK_PER;
+	regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT);
+	regs->psw.addr = ip;
+}
+NOKPROBE_SYMBOL(enable_singlestep);
+
+static void disable_singlestep(struct kprobe_ctlblk *kcb,
+			       struct pt_regs *regs,
+			       unsigned long ip)
+{
+	/* Restore control regs and psw mask, set new psw address */
+	__ctl_load(kcb->kprobe_saved_ctl, 9, 11);
+	regs->psw.mask &= ~PSW_MASK_PER;
+	regs->psw.mask |= kcb->kprobe_saved_imask;
+	regs->psw.addr = ip;
+}
+NOKPROBE_SYMBOL(disable_singlestep);
+
+/*
+ * Activate a kprobe by storing its pointer to current_kprobe. The
+ * previous kprobe is stored in kcb->prev_kprobe. A stack of up to
+ * two kprobes can be active, see KPROBE_REENTER.
+ */
+static void push_kprobe(struct kprobe_ctlblk *kcb, struct kprobe *p)
+{
+	kcb->prev_kprobe.kp = __this_cpu_read(current_kprobe);
+	kcb->prev_kprobe.status = kcb->kprobe_status;
+	__this_cpu_write(current_kprobe, p);
+}
+NOKPROBE_SYMBOL(push_kprobe);
+
+/*
+ * Deactivate a kprobe by backing up to the previous state. If the
+ * current state is KPROBE_REENTER prev_kprobe.kp will be non-NULL,
+ * for any other state prev_kprobe.kp will be NULL.
+ */
+static void pop_kprobe(struct kprobe_ctlblk *kcb)
+{
+	__this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
+	kcb->kprobe_status = kcb->prev_kprobe.status;
+	kcb->prev_kprobe.kp = NULL;
+}
+NOKPROBE_SYMBOL(pop_kprobe);
+
+static void kprobe_reenter_check(struct kprobe_ctlblk *kcb, struct kprobe *p)
+{
+	switch (kcb->kprobe_status) {
+	case KPROBE_HIT_SSDONE:
+	case KPROBE_HIT_ACTIVE:
+		kprobes_inc_nmissed_count(p);
+		break;
+	case KPROBE_HIT_SS:
+	case KPROBE_REENTER:
+	default:
+		/*
+		 * A kprobe on the code path to single step an instruction
+		 * is a BUG. The code path resides in the .kprobes.text
+		 * section and is executed with interrupts disabled.
+		 */
+		pr_err("Failed to recover from reentered kprobes.\n");
+		dump_kprobe(p);
+		BUG();
+	}
+}
+NOKPROBE_SYMBOL(kprobe_reenter_check);
+
+static int kprobe_handler(struct pt_regs *regs)
+{
+	struct kprobe_ctlblk *kcb;
+	struct kprobe *p;
+
+	/*
+	 * We want to disable preemption for the entire duration of kprobe
+	 * processing. That includes the calls to the pre/post handlers
+	 * and single stepping the kprobe instruction.
+	 */
+	preempt_disable();
+	kcb = get_kprobe_ctlblk();
+	p = get_kprobe((void *)(regs->psw.addr - 2));
+
+	if (p) {
+		if (kprobe_running()) {
+			/*
+			 * We have hit a kprobe while another is still
+			 * active. This can happen in the pre and post
+			 * handler. Single step the instruction of the
+			 * new probe but do not call any handler function
+			 * of this secondary kprobe.
+			 * push_kprobe and pop_kprobe saves and restores
+			 * the currently active kprobe.
+			 */
+			kprobe_reenter_check(kcb, p);
+			push_kprobe(kcb, p);
+			kcb->kprobe_status = KPROBE_REENTER;
+		} else {
+			/*
+			 * If we have no pre-handler or it returned 0, we
+			 * continue with single stepping. If we have a
+			 * pre-handler and it returned non-zero, it prepped
+			 * for changing execution path, so get out doing
+			 * nothing more here.
+			 */
+			push_kprobe(kcb, p);
+			kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+			if (p->pre_handler && p->pre_handler(p, regs)) {
+				pop_kprobe(kcb);
+				preempt_enable_no_resched();
+				return 1;
+			}
+			kcb->kprobe_status = KPROBE_HIT_SS;
+		}
+		enable_singlestep(kcb, regs, (unsigned long) p->ainsn.insn);
+		return 1;
+	} /* else:
+	   * No kprobe at this address and no active kprobe. The trap has
+	   * not been caused by a kprobe breakpoint. The race of breakpoint
+	   * vs. kprobe remove does not exist because on s390 as we use
+	   * stop_machine to arm/disarm the breakpoints.
+	   */
+	preempt_enable_no_resched();
+	return 0;
+}
+NOKPROBE_SYMBOL(kprobe_handler);
+
+/*
+ * Called after single-stepping.  p->addr is the address of the
+ * instruction whose first byte has been replaced by the "breakpoint"
+ * instruction.  To avoid the SMP problems that can occur when we
+ * temporarily put back the original opcode to single-step, we
+ * single-stepped a copy of the instruction.  The address of this
+ * copy is p->ainsn.insn.
+ */
+static void resume_execution(struct kprobe *p, struct pt_regs *regs)
+{
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	unsigned long ip = regs->psw.addr;
+	int fixup = probe_get_fixup_type(p->ainsn.insn);
+
+	if (fixup & FIXUP_PSW_NORMAL)
+		ip += (unsigned long) p->addr - (unsigned long) p->ainsn.insn;
+
+	if (fixup & FIXUP_BRANCH_NOT_TAKEN) {
+		int ilen = insn_length(p->ainsn.insn[0] >> 8);
+		if (ip - (unsigned long) p->ainsn.insn == ilen)
+			ip = (unsigned long) p->addr + ilen;
+	}
+
+	if (fixup & FIXUP_RETURN_REGISTER) {
+		int reg = (p->ainsn.insn[0] & 0xf0) >> 4;
+		regs->gprs[reg] += (unsigned long) p->addr -
+				   (unsigned long) p->ainsn.insn;
+	}
+
+	disable_singlestep(kcb, regs, ip);
+}
+NOKPROBE_SYMBOL(resume_execution);
+
+static int post_kprobe_handler(struct pt_regs *regs)
+{
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	struct kprobe *p = kprobe_running();
+
+	if (!p)
+		return 0;
+
+	resume_execution(p, regs);
+	if (kcb->kprobe_status != KPROBE_REENTER && p->post_handler) {
+		kcb->kprobe_status = KPROBE_HIT_SSDONE;
+		p->post_handler(p, regs, 0);
+	}
+	pop_kprobe(kcb);
+	preempt_enable_no_resched();
+
+	/*
+	 * if somebody else is singlestepping across a probe point, psw mask
+	 * will have PER set, in which case, continue the remaining processing
+	 * of do_single_step, as if this is not a probe hit.
+	 */
+	if (regs->psw.mask & PSW_MASK_PER)
+		return 0;
+
+	return 1;
+}
+NOKPROBE_SYMBOL(post_kprobe_handler);
+
+static int kprobe_trap_handler(struct pt_regs *regs, int trapnr)
+{
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	struct kprobe *p = kprobe_running();
+
+	switch(kcb->kprobe_status) {
+	case KPROBE_HIT_SS:
+	case KPROBE_REENTER:
+		/*
+		 * We are here because the instruction being single
+		 * stepped caused a page fault. We reset the current
+		 * kprobe and the nip points back to the probe address
+		 * and allow the page fault handler to continue as a
+		 * normal page fault.
+		 */
+		disable_singlestep(kcb, regs, (unsigned long) p->addr);
+		pop_kprobe(kcb);
+		preempt_enable_no_resched();
+		break;
+	case KPROBE_HIT_ACTIVE:
+	case KPROBE_HIT_SSDONE:
+		/*
+		 * In case the user-specified fault handler returned
+		 * zero, try to fix up.
+		 */
+		if (fixup_exception(regs))
+			return 1;
+		/*
+		 * fixup_exception() could not handle it,
+		 * Let do_page_fault() fix it.
+		 */
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+NOKPROBE_SYMBOL(kprobe_trap_handler);
+
+int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+{
+	int ret;
+
+	if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT))
+		local_irq_disable();
+	ret = kprobe_trap_handler(regs, trapnr);
+	if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT))
+		local_irq_restore(regs->psw.mask & ~PSW_MASK_PER);
+	return ret;
+}
+NOKPROBE_SYMBOL(kprobe_fault_handler);
+
+/*
+ * Wrapper routine to for handling exceptions.
+ */
+int kprobe_exceptions_notify(struct notifier_block *self,
+			     unsigned long val, void *data)
+{
+	struct die_args *args = (struct die_args *) data;
+	struct pt_regs *regs = args->regs;
+	int ret = NOTIFY_DONE;
+
+	if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT))
+		local_irq_disable();
+
+	switch (val) {
+	case DIE_BPT:
+		if (kprobe_handler(regs))
+			ret = NOTIFY_STOP;
+		break;
+	case DIE_SSTEP:
+		if (post_kprobe_handler(regs))
+			ret = NOTIFY_STOP;
+		break;
+	case DIE_TRAP:
+		if (!preemptible() && kprobe_running() &&
+		    kprobe_trap_handler(regs, args->trapnr))
+			ret = NOTIFY_STOP;
+		break;
+	default:
+		break;
+	}
+
+	if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT))
+		local_irq_restore(regs->psw.mask & ~PSW_MASK_PER);
+
+	return ret;
+}
+NOKPROBE_SYMBOL(kprobe_exceptions_notify);
+
+int __init arch_init_kprobes(void)
+{
+	return 0;
+}
+
+int arch_trampoline_kprobe(struct kprobe *p)
+{
+	return 0;
+}
+NOKPROBE_SYMBOL(arch_trampoline_kprobe);
diff --git a/arch/s390/kernel/kprobes.h b/arch/s390/kernel/kprobes.h
new file mode 100644
index 0000000000..dc3ed5098e
--- /dev/null
+++ b/arch/s390/kernel/kprobes.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#ifndef _ARCH_S390_KPROBES_H
+#define _ARCH_S390_KPROBES_H
+
+#include <linux/kprobes.h>
+
+DEFINE_INSN_CACHE_OPS(s390_insn);
+
+#endif
diff --git a/arch/s390/kernel/kprobes_insn_page.S b/arch/s390/kernel/kprobes_insn_page.S
new file mode 100644
index 0000000000..0fe4d725e9
--- /dev/null
+++ b/arch/s390/kernel/kprobes_insn_page.S
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/linkage.h>
+
+/*
+ * insn_page is a special 4k aligned dummy function for kprobes.
+ * It will contain all kprobed instructions that are out-of-line executed.
+ * The page must be within the kernel image to guarantee that the
+ * out-of-line instructions are within 2GB distance of their original
+ * location. Using a dummy function ensures that the insn_page is within
+ * the text section of the kernel and mapped read-only/executable from
+ * the beginning on, thus avoiding to split large mappings if the page
+ * would be in the data section instead.
+ */
+	.section .kprobes.text, "ax"
+	.balign 4096
+SYM_CODE_START(kprobes_insn_page)
+	.rept 2048
+	.word 0x07fe
+	.endr
+SYM_CODE_END(kprobes_insn_page)
+	.previous
diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c
new file mode 100644
index 0000000000..6652e54cf3
--- /dev/null
+++ b/arch/s390/kernel/lgr.c
@@ -0,0 +1,187 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Linux Guest Relocation (LGR) detection
+ *
+ * Copyright IBM Corp. 2012
+ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/timer.h>
+#include <linux/slab.h>
+#include <asm/facility.h>
+#include <asm/sysinfo.h>
+#include <asm/ebcdic.h>
+#include <asm/debug.h>
+#include <asm/ipl.h>
+
+#define LGR_TIMER_INTERVAL_SECS (30 * 60)
+#define VM_LEVEL_MAX 2 /* Maximum is 8, but we only record two levels */
+
+/*
+ * LGR info: Contains stfle and stsi data
+ */
+struct lgr_info {
+	/* Bit field with facility information: 4 DWORDs are stored */
+	u64 stfle_fac_list[4];
+	/* Level of system (1 = CEC, 2 = LPAR, 3 = z/VM */
+	u32 level;
+	/* Level 1: CEC info (stsi 1.1.1) */
+	char manufacturer[16];
+	char type[4];
+	char sequence[16];
+	char plant[4];
+	char model[16];
+	/* Level 2: LPAR info (stsi 2.2.2) */
+	u16 lpar_number;
+	char name[8];
+	/* Level 3: VM info (stsi 3.2.2) */
+	u8 vm_count;
+	struct {
+		char name[8];
+		char cpi[16];
+	} vm[VM_LEVEL_MAX];
+} __packed __aligned(8);
+
+/*
+ * LGR globals
+ */
+static char lgr_page[PAGE_SIZE] __aligned(PAGE_SIZE);
+static struct lgr_info lgr_info_last;
+static struct lgr_info lgr_info_cur;
+static struct debug_info *lgr_dbf;
+
+/*
+ * Copy buffer and then convert it to ASCII
+ */
+static void cpascii(char *dst, char *src, int size)
+{
+	memcpy(dst, src, size);
+	EBCASC(dst, size);
+}
+
+/*
+ * Fill LGR info with 1.1.1 stsi data
+ */
+static void lgr_stsi_1_1_1(struct lgr_info *lgr_info)
+{
+	struct sysinfo_1_1_1 *si = (void *) lgr_page;
+
+	if (stsi(si, 1, 1, 1))
+		return;
+	cpascii(lgr_info->manufacturer, si->manufacturer,
+		sizeof(si->manufacturer));
+	cpascii(lgr_info->type, si->type, sizeof(si->type));
+	cpascii(lgr_info->model, si->model, sizeof(si->model));
+	cpascii(lgr_info->sequence, si->sequence, sizeof(si->sequence));
+	cpascii(lgr_info->plant, si->plant, sizeof(si->plant));
+}
+
+/*
+ * Fill LGR info with 2.2.2 stsi data
+ */
+static void lgr_stsi_2_2_2(struct lgr_info *lgr_info)
+{
+	struct sysinfo_2_2_2 *si = (void *) lgr_page;
+
+	if (stsi(si, 2, 2, 2))
+		return;
+	cpascii(lgr_info->name, si->name, sizeof(si->name));
+	lgr_info->lpar_number = si->lpar_number;
+}
+
+/*
+ * Fill LGR info with 3.2.2 stsi data
+ */
+static void lgr_stsi_3_2_2(struct lgr_info *lgr_info)
+{
+	struct sysinfo_3_2_2 *si = (void *) lgr_page;
+	int i;
+
+	if (stsi(si, 3, 2, 2))
+		return;
+	for (i = 0; i < min_t(u8, si->count, VM_LEVEL_MAX); i++) {
+		cpascii(lgr_info->vm[i].name, si->vm[i].name,
+			sizeof(si->vm[i].name));
+		cpascii(lgr_info->vm[i].cpi, si->vm[i].cpi,
+			sizeof(si->vm[i].cpi));
+	}
+	lgr_info->vm_count = si->count;
+}
+
+/*
+ * Fill LGR info with current data
+ */
+static void lgr_info_get(struct lgr_info *lgr_info)
+{
+	int level;
+
+	memset(lgr_info, 0, sizeof(*lgr_info));
+	stfle(lgr_info->stfle_fac_list, ARRAY_SIZE(lgr_info->stfle_fac_list));
+	level = stsi(NULL, 0, 0, 0);
+	lgr_info->level = level;
+	if (level >= 1)
+		lgr_stsi_1_1_1(lgr_info);
+	if (level >= 2)
+		lgr_stsi_2_2_2(lgr_info);
+	if (level >= 3)
+		lgr_stsi_3_2_2(lgr_info);
+}
+
+/*
+ * Check if LGR info has changed and if yes log new LGR info to s390dbf
+ */
+void lgr_info_log(void)
+{
+	static DEFINE_SPINLOCK(lgr_info_lock);
+	unsigned long flags;
+
+	if (!spin_trylock_irqsave(&lgr_info_lock, flags))
+		return;
+	lgr_info_get(&lgr_info_cur);
+	if (memcmp(&lgr_info_last, &lgr_info_cur, sizeof(lgr_info_cur)) != 0) {
+		debug_event(lgr_dbf, 1, &lgr_info_cur, sizeof(lgr_info_cur));
+		lgr_info_last = lgr_info_cur;
+	}
+	spin_unlock_irqrestore(&lgr_info_lock, flags);
+}
+EXPORT_SYMBOL_GPL(lgr_info_log);
+
+static void lgr_timer_set(void);
+
+/*
+ * LGR timer callback
+ */
+static void lgr_timer_fn(struct timer_list *unused)
+{
+	lgr_info_log();
+	lgr_timer_set();
+}
+
+static struct timer_list lgr_timer;
+
+/*
+ * Setup next LGR timer
+ */
+static void lgr_timer_set(void)
+{
+	mod_timer(&lgr_timer, jiffies + msecs_to_jiffies(LGR_TIMER_INTERVAL_SECS * MSEC_PER_SEC));
+}
+
+/*
+ * Initialize LGR: Add s390dbf, write initial lgr_info and setup timer
+ */
+static int __init lgr_init(void)
+{
+	lgr_dbf = debug_register("lgr", 1, 1, sizeof(struct lgr_info));
+	if (!lgr_dbf)
+		return -ENOMEM;
+	debug_register_view(lgr_dbf, &debug_hex_ascii_view);
+	lgr_info_get(&lgr_info_last);
+	debug_event(lgr_dbf, 1, &lgr_info_last, sizeof(lgr_info_last));
+	timer_setup(&lgr_timer, lgr_timer_fn, TIMER_DEFERRABLE);
+	lgr_timer_set();
+	return 0;
+}
+device_initcall(lgr_init);
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
new file mode 100644
index 0000000000..ce65fc0167
--- /dev/null
+++ b/arch/s390/kernel/machine_kexec.c
@@ -0,0 +1,286 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2005, 2011
+ *
+ * Author(s): Rolf Adelsberger,
+ *	      Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#include <linux/device.h>
+#include <linux/mm.h>
+#include <linux/kexec.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/ftrace.h>
+#include <linux/debug_locks.h>
+#include <asm/pfault.h>
+#include <asm/cio.h>
+#include <asm/setup.h>
+#include <asm/smp.h>
+#include <asm/ipl.h>
+#include <asm/diag.h>
+#include <asm/elf.h>
+#include <asm/asm-offsets.h>
+#include <asm/cacheflush.h>
+#include <asm/abs_lowcore.h>
+#include <asm/os_info.h>
+#include <asm/set_memory.h>
+#include <asm/stacktrace.h>
+#include <asm/switch_to.h>
+#include <asm/nmi.h>
+#include <asm/sclp.h>
+
+typedef void (*relocate_kernel_t)(unsigned long, unsigned long, unsigned long);
+typedef int (*purgatory_t)(int);
+
+extern const unsigned char relocate_kernel[];
+extern const unsigned long long relocate_kernel_len;
+
+#ifdef CONFIG_CRASH_DUMP
+
+/*
+ * Reset the system, copy boot CPU registers to absolute zero,
+ * and jump to the kdump image
+ */
+static void __do_machine_kdump(void *data)
+{
+	struct kimage *image = data;
+	purgatory_t purgatory;
+	unsigned long prefix;
+
+	purgatory = (purgatory_t)image->start;
+
+	/* store_status() saved the prefix register to lowcore */
+	prefix = (unsigned long) S390_lowcore.prefixreg_save_area;
+
+	/* Now do the reset  */
+	s390_reset_system();
+
+	/*
+	 * Copy dump CPU store status info to absolute zero.
+	 * This need to be done *after* s390_reset_system set the
+	 * prefix register of this CPU to zero
+	 */
+	memcpy(absolute_pointer(__LC_FPREGS_SAVE_AREA),
+	       phys_to_virt(prefix + __LC_FPREGS_SAVE_AREA), 512);
+
+	call_nodat(1, int, purgatory, int, 1);
+
+	/* Die if kdump returns */
+	disabled_wait();
+}
+
+/*
+ * Start kdump: create a LGR log entry, store status of all CPUs and
+ * branch to __do_machine_kdump.
+ */
+static noinline void __machine_kdump(void *image)
+{
+	struct mcesa *mcesa;
+	union ctlreg2 cr2_old, cr2_new;
+	int this_cpu, cpu;
+
+	lgr_info_log();
+	/* Get status of the other CPUs */
+	this_cpu = smp_find_processor_id(stap());
+	for_each_online_cpu(cpu) {
+		if (cpu == this_cpu)
+			continue;
+		if (smp_store_status(cpu))
+			continue;
+	}
+	/* Store status of the boot CPU */
+	mcesa = __va(S390_lowcore.mcesad & MCESA_ORIGIN_MASK);
+	if (MACHINE_HAS_VX)
+		save_vx_regs((__vector128 *) mcesa->vector_save_area);
+	if (MACHINE_HAS_GS) {
+		__ctl_store(cr2_old.val, 2, 2);
+		cr2_new = cr2_old;
+		cr2_new.gse = 1;
+		__ctl_load(cr2_new.val, 2, 2);
+		save_gs_cb((struct gs_cb *) mcesa->guarded_storage_save_area);
+		__ctl_load(cr2_old.val, 2, 2);
+	}
+	/*
+	 * To create a good backchain for this CPU in the dump store_status
+	 * is passed the address of a function. The address is saved into
+	 * the PSW save area of the boot CPU and the function is invoked as
+	 * a tail call of store_status. The backchain in the dump will look
+	 * like this:
+	 *   restart_int_handler ->  __machine_kexec -> __do_machine_kdump
+	 * The call to store_status() will not return.
+	 */
+	store_status(__do_machine_kdump, image);
+}
+
+#endif /* CONFIG_CRASH_DUMP */
+
+/*
+ * Check if kdump checksums are valid: We call purgatory with parameter "0"
+ */
+static bool kdump_csum_valid(struct kimage *image)
+{
+#ifdef CONFIG_CRASH_DUMP
+	purgatory_t purgatory = (purgatory_t)image->start;
+	int rc;
+
+	rc = call_nodat(1, int, purgatory, int, 0);
+	return rc == 0;
+#else
+	return false;
+#endif
+}
+
+#ifdef CONFIG_CRASH_DUMP
+
+void crash_free_reserved_phys_range(unsigned long begin, unsigned long end)
+{
+	unsigned long addr, size;
+
+	for (addr = begin; addr < end; addr += PAGE_SIZE)
+		free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT));
+	size = begin - crashk_res.start;
+	if (size)
+		os_info_crashkernel_add(crashk_res.start, size);
+	else
+		os_info_crashkernel_add(0, 0);
+}
+
+static void crash_protect_pages(int protect)
+{
+	unsigned long size;
+
+	if (!crashk_res.end)
+		return;
+	size = resource_size(&crashk_res);
+	if (protect)
+		set_memory_ro(crashk_res.start, size >> PAGE_SHIFT);
+	else
+		set_memory_rw(crashk_res.start, size >> PAGE_SHIFT);
+}
+
+void arch_kexec_protect_crashkres(void)
+{
+	crash_protect_pages(1);
+}
+
+void arch_kexec_unprotect_crashkres(void)
+{
+	crash_protect_pages(0);
+}
+
+#endif
+
+/*
+ * Give back memory to hypervisor before new kdump is loaded
+ */
+static int machine_kexec_prepare_kdump(void)
+{
+#ifdef CONFIG_CRASH_DUMP
+	if (MACHINE_IS_VM)
+		diag10_range(PFN_DOWN(crashk_res.start),
+			     PFN_DOWN(crashk_res.end - crashk_res.start + 1));
+	return 0;
+#else
+	return -EINVAL;
+#endif
+}
+
+int machine_kexec_prepare(struct kimage *image)
+{
+	void *reboot_code_buffer;
+
+	if (image->type == KEXEC_TYPE_CRASH)
+		return machine_kexec_prepare_kdump();
+
+	/* We don't support anything but the default image type for now. */
+	if (image->type != KEXEC_TYPE_DEFAULT)
+		return -EINVAL;
+
+	/* Get the destination where the assembler code should be copied to.*/
+	reboot_code_buffer = page_to_virt(image->control_code_page);
+
+	/* Then copy it */
+	memcpy(reboot_code_buffer, relocate_kernel, relocate_kernel_len);
+	return 0;
+}
+
+void machine_kexec_cleanup(struct kimage *image)
+{
+}
+
+void arch_crash_save_vmcoreinfo(void)
+{
+	struct lowcore *abs_lc;
+
+	VMCOREINFO_SYMBOL(lowcore_ptr);
+	VMCOREINFO_SYMBOL(high_memory);
+	VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
+	vmcoreinfo_append_str("SAMODE31=%lx\n", (unsigned long)__samode31);
+	vmcoreinfo_append_str("EAMODE31=%lx\n", (unsigned long)__eamode31);
+	vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
+	abs_lc = get_abs_lowcore();
+	abs_lc->vmcore_info = paddr_vmcoreinfo_note();
+	put_abs_lowcore(abs_lc);
+}
+
+void machine_shutdown(void)
+{
+}
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+	set_os_info_reipl_block();
+}
+
+/*
+ * Do normal kexec
+ */
+static void __do_machine_kexec(void *data)
+{
+	unsigned long data_mover, entry, diag308_subcode;
+	struct kimage *image = data;
+
+	data_mover = page_to_phys(image->control_code_page);
+	entry = virt_to_phys(&image->head);
+	diag308_subcode = DIAG308_CLEAR_RESET;
+	if (sclp.has_iplcc)
+		diag308_subcode |= DIAG308_FLAG_EI;
+	s390_reset_system();
+
+	call_nodat(3, void, (relocate_kernel_t)data_mover,
+		   unsigned long, entry,
+		   unsigned long, image->start,
+		   unsigned long, diag308_subcode);
+
+	/* Die if kexec returns */
+	disabled_wait();
+}
+
+/*
+ * Reset system and call either kdump or normal kexec
+ */
+static void __machine_kexec(void *data)
+{
+	pfault_fini();
+	tracing_off();
+	debug_locks_off();
+#ifdef CONFIG_CRASH_DUMP
+	if (((struct kimage *) data)->type == KEXEC_TYPE_CRASH)
+		__machine_kdump(data);
+#endif
+	__do_machine_kexec(data);
+}
+
+/*
+ * Do either kdump or normal kexec. In case of kdump we first ask
+ * purgatory, if kdump checksums are valid.
+ */
+void machine_kexec(struct kimage *image)
+{
+	if (image->type == KEXEC_TYPE_CRASH && !kdump_csum_valid(image))
+		return;
+	tracer_disable();
+	smp_send_stop();
+	smp_call_ipl_cpu(__machine_kexec, image);
+}
diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c
new file mode 100644
index 0000000000..8d207b82d9
--- /dev/null
+++ b/arch/s390/kernel/machine_kexec_file.c
@@ -0,0 +1,379 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * s390 code for kexec_file_load system call
+ *
+ * Copyright IBM Corp. 2018
+ *
+ * Author(s): Philipp Rudo <prudo@linux.vnet.ibm.com>
+ */
+
+#define pr_fmt(fmt)	"kexec: " fmt
+
+#include <linux/elf.h>
+#include <linux/errno.h>
+#include <linux/kexec.h>
+#include <linux/module_signature.h>
+#include <linux/verification.h>
+#include <linux/vmalloc.h>
+#include <asm/boot_data.h>
+#include <asm/ipl.h>
+#include <asm/setup.h>
+
+const struct kexec_file_ops * const kexec_file_loaders[] = {
+	&s390_kexec_elf_ops,
+	&s390_kexec_image_ops,
+	NULL,
+};
+
+#ifdef CONFIG_KEXEC_SIG
+int s390_verify_sig(const char *kernel, unsigned long kernel_len)
+{
+	const unsigned long marker_len = sizeof(MODULE_SIG_STRING) - 1;
+	struct module_signature *ms;
+	unsigned long sig_len;
+	int ret;
+
+	/* Skip signature verification when not secure IPLed. */
+	if (!ipl_secure_flag)
+		return 0;
+
+	if (marker_len > kernel_len)
+		return -EKEYREJECTED;
+
+	if (memcmp(kernel + kernel_len - marker_len, MODULE_SIG_STRING,
+		   marker_len))
+		return -EKEYREJECTED;
+	kernel_len -= marker_len;
+
+	ms = (void *)kernel + kernel_len - sizeof(*ms);
+	kernel_len -= sizeof(*ms);
+
+	sig_len = be32_to_cpu(ms->sig_len);
+	if (sig_len >= kernel_len)
+		return -EKEYREJECTED;
+	kernel_len -= sig_len;
+
+	if (ms->id_type != PKEY_ID_PKCS7)
+		return -EKEYREJECTED;
+
+	if (ms->algo != 0 ||
+	    ms->hash != 0 ||
+	    ms->signer_len != 0 ||
+	    ms->key_id_len != 0 ||
+	    ms->__pad[0] != 0 ||
+	    ms->__pad[1] != 0 ||
+	    ms->__pad[2] != 0) {
+		return -EBADMSG;
+	}
+
+	ret = verify_pkcs7_signature(kernel, kernel_len,
+				     kernel + kernel_len, sig_len,
+				     VERIFY_USE_SECONDARY_KEYRING,
+				     VERIFYING_MODULE_SIGNATURE,
+				     NULL, NULL);
+	if (ret == -ENOKEY && IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING))
+		ret = verify_pkcs7_signature(kernel, kernel_len,
+					     kernel + kernel_len, sig_len,
+					     VERIFY_USE_PLATFORM_KEYRING,
+					     VERIFYING_MODULE_SIGNATURE,
+					     NULL, NULL);
+	return ret;
+}
+#endif /* CONFIG_KEXEC_SIG */
+
+static int kexec_file_update_purgatory(struct kimage *image,
+				       struct s390_load_data *data)
+{
+	u64 entry, type;
+	int ret;
+
+	if (image->type == KEXEC_TYPE_CRASH) {
+		entry = STARTUP_KDUMP_OFFSET;
+		type = KEXEC_TYPE_CRASH;
+	} else {
+		entry = STARTUP_NORMAL_OFFSET;
+		type = KEXEC_TYPE_DEFAULT;
+	}
+
+	ret = kexec_purgatory_get_set_symbol(image, "kernel_entry", &entry,
+					     sizeof(entry), false);
+	if (ret)
+		return ret;
+
+	ret = kexec_purgatory_get_set_symbol(image, "kernel_type", &type,
+					     sizeof(type), false);
+	if (ret)
+		return ret;
+
+	if (image->type == KEXEC_TYPE_CRASH) {
+		u64 crash_size;
+
+		ret = kexec_purgatory_get_set_symbol(image, "crash_start",
+						     &crashk_res.start,
+						     sizeof(crashk_res.start),
+						     false);
+		if (ret)
+			return ret;
+
+		crash_size = crashk_res.end - crashk_res.start + 1;
+		ret = kexec_purgatory_get_set_symbol(image, "crash_size",
+						     &crash_size,
+						     sizeof(crash_size),
+						     false);
+	}
+	return ret;
+}
+
+static int kexec_file_add_purgatory(struct kimage *image,
+				    struct s390_load_data *data)
+{
+	struct kexec_buf buf;
+	int ret;
+
+	buf.image = image;
+
+	data->memsz = ALIGN(data->memsz, PAGE_SIZE);
+	buf.mem = data->memsz;
+	if (image->type == KEXEC_TYPE_CRASH)
+		buf.mem += crashk_res.start;
+
+	ret = kexec_load_purgatory(image, &buf);
+	if (ret)
+		return ret;
+	data->memsz += buf.memsz;
+
+	return kexec_file_update_purgatory(image, data);
+}
+
+static int kexec_file_add_initrd(struct kimage *image,
+				 struct s390_load_data *data)
+{
+	struct kexec_buf buf;
+	int ret;
+
+	buf.image = image;
+
+	buf.buffer = image->initrd_buf;
+	buf.bufsz = image->initrd_buf_len;
+
+	data->memsz = ALIGN(data->memsz, PAGE_SIZE);
+	buf.mem = data->memsz;
+	if (image->type == KEXEC_TYPE_CRASH)
+		buf.mem += crashk_res.start;
+	buf.memsz = buf.bufsz;
+
+	data->parm->initrd_start = data->memsz;
+	data->parm->initrd_size = buf.memsz;
+	data->memsz += buf.memsz;
+
+	ret = kexec_add_buffer(&buf);
+	if (ret)
+		return ret;
+
+	return ipl_report_add_component(data->report, &buf, 0, 0);
+}
+
+static int kexec_file_add_ipl_report(struct kimage *image,
+				     struct s390_load_data *data)
+{
+	__u32 *lc_ipl_parmblock_ptr;
+	unsigned int len, ncerts;
+	struct kexec_buf buf;
+	unsigned long addr;
+	void *ptr, *end;
+	int ret;
+
+	buf.image = image;
+
+	data->memsz = ALIGN(data->memsz, PAGE_SIZE);
+	buf.mem = data->memsz;
+
+	ptr = __va(ipl_cert_list_addr);
+	end = ptr + ipl_cert_list_size;
+	ncerts = 0;
+	while (ptr < end) {
+		ncerts++;
+		len = *(unsigned int *)ptr;
+		ptr += sizeof(len);
+		ptr += len;
+	}
+
+	addr = data->memsz + data->report->size;
+	addr += ncerts * sizeof(struct ipl_rb_certificate_entry);
+	ptr = __va(ipl_cert_list_addr);
+	while (ptr < end) {
+		len = *(unsigned int *)ptr;
+		ptr += sizeof(len);
+		ipl_report_add_certificate(data->report, ptr, addr, len);
+		addr += len;
+		ptr += len;
+	}
+
+	ret = -ENOMEM;
+	buf.buffer = ipl_report_finish(data->report);
+	if (!buf.buffer)
+		goto out;
+	buf.bufsz = data->report->size;
+	buf.memsz = buf.bufsz;
+	image->arch.ipl_buf = buf.buffer;
+
+	data->memsz += buf.memsz;
+
+	lc_ipl_parmblock_ptr =
+		data->kernel_buf + offsetof(struct lowcore, ipl_parmblock_ptr);
+	*lc_ipl_parmblock_ptr = (__u32)buf.mem;
+
+	if (image->type == KEXEC_TYPE_CRASH)
+		buf.mem += crashk_res.start;
+
+	ret = kexec_add_buffer(&buf);
+out:
+	return ret;
+}
+
+void *kexec_file_add_components(struct kimage *image,
+				int (*add_kernel)(struct kimage *image,
+						  struct s390_load_data *data))
+{
+	unsigned long max_command_line_size = LEGACY_COMMAND_LINE_SIZE;
+	struct s390_load_data data = {0};
+	unsigned long minsize;
+	int ret;
+
+	data.report = ipl_report_init(&ipl_block);
+	if (IS_ERR(data.report))
+		return data.report;
+
+	ret = add_kernel(image, &data);
+	if (ret)
+		goto out;
+
+	ret = -EINVAL;
+	minsize = PARMAREA + offsetof(struct parmarea, command_line);
+	if (image->kernel_buf_len < minsize)
+		goto out;
+
+	if (data.parm->max_command_line_size)
+		max_command_line_size = data.parm->max_command_line_size;
+
+	if (minsize + max_command_line_size < minsize)
+		goto out;
+
+	if (image->kernel_buf_len < minsize + max_command_line_size)
+		goto out;
+
+	if (image->cmdline_buf_len >= max_command_line_size)
+		goto out;
+
+	memcpy(data.parm->command_line, image->cmdline_buf,
+	       image->cmdline_buf_len);
+
+	if (image->type == KEXEC_TYPE_CRASH) {
+		data.parm->oldmem_base = crashk_res.start;
+		data.parm->oldmem_size = crashk_res.end - crashk_res.start + 1;
+	}
+
+	if (image->initrd_buf) {
+		ret = kexec_file_add_initrd(image, &data);
+		if (ret)
+			goto out;
+	}
+
+	ret = kexec_file_add_purgatory(image, &data);
+	if (ret)
+		goto out;
+
+	if (data.kernel_mem == 0) {
+		unsigned long restart_psw =  0x0008000080000000UL;
+		restart_psw += image->start;
+		memcpy(data.kernel_buf, &restart_psw, sizeof(restart_psw));
+		image->start = 0;
+	}
+
+	ret = kexec_file_add_ipl_report(image, &data);
+out:
+	ipl_report_free(data.report);
+	return ERR_PTR(ret);
+}
+
+int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
+				     Elf_Shdr *section,
+				     const Elf_Shdr *relsec,
+				     const Elf_Shdr *symtab)
+{
+	const char *strtab, *name, *shstrtab;
+	const Elf_Shdr *sechdrs;
+	Elf_Rela *relas;
+	int i, r_type;
+	int ret;
+
+	/* String & section header string table */
+	sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff;
+	strtab = (char *)pi->ehdr + sechdrs[symtab->sh_link].sh_offset;
+	shstrtab = (char *)pi->ehdr + sechdrs[pi->ehdr->e_shstrndx].sh_offset;
+
+	relas = (void *)pi->ehdr + relsec->sh_offset;
+
+	for (i = 0; i < relsec->sh_size / sizeof(*relas); i++) {
+		const Elf_Sym *sym;	/* symbol to relocate */
+		unsigned long addr;	/* final location after relocation */
+		unsigned long val;	/* relocated symbol value */
+		void *loc;		/* tmp location to modify */
+
+		sym = (void *)pi->ehdr + symtab->sh_offset;
+		sym += ELF64_R_SYM(relas[i].r_info);
+
+		if (sym->st_name)
+			name = strtab + sym->st_name;
+		else
+			name = shstrtab + sechdrs[sym->st_shndx].sh_name;
+
+		if (sym->st_shndx == SHN_UNDEF) {
+			pr_err("Undefined symbol: %s\n", name);
+			return -ENOEXEC;
+		}
+
+		if (sym->st_shndx == SHN_COMMON) {
+			pr_err("symbol '%s' in common section\n", name);
+			return -ENOEXEC;
+		}
+
+		if (sym->st_shndx >= pi->ehdr->e_shnum &&
+		    sym->st_shndx != SHN_ABS) {
+			pr_err("Invalid section %d for symbol %s\n",
+			       sym->st_shndx, name);
+			return -ENOEXEC;
+		}
+
+		loc = pi->purgatory_buf;
+		loc += section->sh_offset;
+		loc += relas[i].r_offset;
+
+		val = sym->st_value;
+		if (sym->st_shndx != SHN_ABS)
+			val += pi->sechdrs[sym->st_shndx].sh_addr;
+		val += relas[i].r_addend;
+
+		addr = section->sh_addr + relas[i].r_offset;
+
+		r_type = ELF64_R_TYPE(relas[i].r_info);
+
+		if (r_type == R_390_PLT32DBL)
+			r_type = R_390_PC32DBL;
+
+		ret = arch_kexec_do_relocs(r_type, loc, val, addr);
+		if (ret) {
+			pr_err("Unknown rela relocation: %d\n", r_type);
+			return -ENOEXEC;
+		}
+	}
+	return 0;
+}
+
+int arch_kimage_file_post_load_cleanup(struct kimage *image)
+{
+	vfree(image->arch.ipl_buf);
+	image->arch.ipl_buf = NULL;
+
+	return kexec_image_post_load_cleanup_default(image);
+}
diff --git a/arch/s390/kernel/machine_kexec_reloc.c b/arch/s390/kernel/machine_kexec_reloc.c
new file mode 100644
index 0000000000..b7182cec48
--- /dev/null
+++ b/arch/s390/kernel/machine_kexec_reloc.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/elf.h>
+#include <asm/kexec.h>
+
+int arch_kexec_do_relocs(int r_type, void *loc, unsigned long val,
+			 unsigned long addr)
+{
+	switch (r_type) {
+	case R_390_NONE:
+		break;
+	case R_390_8:		/* Direct 8 bit.   */
+		*(u8 *)loc = val;
+		break;
+	case R_390_12:		/* Direct 12 bit.  */
+		*(u16 *)loc &= 0xf000;
+		*(u16 *)loc |= val & 0xfff;
+		break;
+	case R_390_16:		/* Direct 16 bit.  */
+		*(u16 *)loc = val;
+		break;
+	case R_390_20:		/* Direct 20 bit.  */
+		*(u32 *)loc &= 0xf00000ff;
+		*(u32 *)loc |= (val & 0xfff) << 16;	/* DL */
+		*(u32 *)loc |= (val & 0xff000) >> 4;	/* DH */
+		break;
+	case R_390_32:		/* Direct 32 bit.  */
+		*(u32 *)loc = val;
+		break;
+	case R_390_64:		/* Direct 64 bit.  */
+	case R_390_GLOB_DAT:
+	case R_390_JMP_SLOT:
+		*(u64 *)loc = val;
+		break;
+	case R_390_PC16:	/* PC relative 16 bit.	*/
+		*(u16 *)loc = (val - addr);
+		break;
+	case R_390_PC16DBL:	/* PC relative 16 bit shifted by 1.  */
+		*(u16 *)loc = (val - addr) >> 1;
+		break;
+	case R_390_PC32DBL:	/* PC relative 32 bit shifted by 1.  */
+		*(u32 *)loc = (val - addr) >> 1;
+		break;
+	case R_390_PC32:	/* PC relative 32 bit.	*/
+		*(u32 *)loc = (val - addr);
+		break;
+	case R_390_PC64:	/* PC relative 64 bit.	*/
+		*(u64 *)loc = (val - addr);
+		break;
+	case R_390_RELATIVE:
+		*(unsigned long *) loc = val;
+		break;
+	default:
+		return 1;
+	}
+	return 0;
+}
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
new file mode 100644
index 0000000000..ae4d4fd9af
--- /dev/null
+++ b/arch/s390/kernel/mcount.S
@@ -0,0 +1,195 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2008, 2009
+ *
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/ftrace.h>
+#include <asm/nospec-insn.h>
+#include <asm/ptrace.h>
+
+#define STACK_FRAME_SIZE_PTREGS		(STACK_FRAME_OVERHEAD + __PT_SIZE)
+#define STACK_PTREGS			(STACK_FRAME_OVERHEAD)
+#define STACK_PTREGS_GPRS		(STACK_PTREGS + __PT_GPRS)
+#define STACK_PTREGS_PSW		(STACK_PTREGS + __PT_PSW)
+
+#define STACK_FRAME_SIZE_FREGS		(STACK_FRAME_OVERHEAD + __FTRACE_REGS_SIZE)
+#define STACK_FREGS			(STACK_FRAME_OVERHEAD)
+#define STACK_FREGS_PTREGS		(STACK_FRAME_OVERHEAD + __FTRACE_REGS_PT_REGS)
+#define STACK_FREGS_PTREGS_GPRS		(STACK_FREGS_PTREGS + __PT_GPRS)
+#define STACK_FREGS_PTREGS_PSW		(STACK_FREGS_PTREGS + __PT_PSW)
+#define STACK_FREGS_PTREGS_ORIG_GPR2	(STACK_FREGS_PTREGS + __PT_ORIG_GPR2)
+#define STACK_FREGS_PTREGS_FLAGS	(STACK_FREGS_PTREGS + __PT_FLAGS)
+
+/* packed stack: allocate just enough for r14, r15 and backchain */
+#define TRACED_FUNC_FRAME_SIZE	24
+
+#ifdef CONFIG_FUNCTION_TRACER
+
+	GEN_BR_THUNK %r1
+	GEN_BR_THUNK %r14
+
+	.section .kprobes.text, "ax"
+
+SYM_FUNC_START(ftrace_stub)
+	BR_EX	%r14
+SYM_FUNC_END(ftrace_stub)
+
+SYM_CODE_START(ftrace_stub_direct_tramp)
+	lgr	%r1, %r0
+	BR_EX	%r1
+SYM_CODE_END(ftrace_stub_direct_tramp)
+
+	.macro	ftrace_regs_entry, allregs=0
+	stg	%r14,(__SF_GPRS+8*8)(%r15)	# save traced function caller
+
+	.if \allregs == 1
+	# save psw mask
+	# don't put any instructions clobbering CC before this point
+	epsw	%r1,%r14
+	risbg	%r14,%r1,0,31,32
+	.endif
+
+	lgr	%r1,%r15
+	# allocate stack frame for ftrace_caller to contain traced function
+	aghi	%r15,-TRACED_FUNC_FRAME_SIZE
+	stg	%r1,__SF_BACKCHAIN(%r15)
+	stg	%r0,(__SF_GPRS+8*8)(%r15)
+	stg	%r15,(__SF_GPRS+9*8)(%r15)
+	# allocate ftrace_regs and stack frame for ftrace_trace_function
+	aghi	%r15,-STACK_FRAME_SIZE_FREGS
+	stg	%r1,(STACK_FREGS_PTREGS_GPRS+15*8)(%r15)
+	xc	STACK_FREGS_PTREGS_ORIG_GPR2(8,%r15),STACK_FREGS_PTREGS_ORIG_GPR2(%r15)
+
+	.if \allregs == 1
+	stg	%r14,(STACK_FREGS_PTREGS_PSW)(%r15)
+	mvghi	STACK_FREGS_PTREGS_FLAGS(%r15),_PIF_FTRACE_FULL_REGS
+	.else
+	xc	STACK_FREGS_PTREGS_FLAGS(8,%r15),STACK_FREGS_PTREGS_FLAGS(%r15)
+	.endif
+
+	lg	%r14,(__SF_GPRS+8*8)(%r1)	# restore original return address
+	aghi	%r1,-TRACED_FUNC_FRAME_SIZE
+	stg	%r1,__SF_BACKCHAIN(%r15)
+	stg	%r0,(STACK_FREGS_PTREGS_PSW+8)(%r15)
+	stmg	%r2,%r14,(STACK_FREGS_PTREGS_GPRS+2*8)(%r15)
+	.endm
+
+SYM_CODE_START(ftrace_regs_caller)
+	ftrace_regs_entry	1
+	j	ftrace_common
+SYM_CODE_END(ftrace_regs_caller)
+
+SYM_CODE_START(ftrace_caller)
+	ftrace_regs_entry	0
+	j	ftrace_common
+SYM_CODE_END(ftrace_caller)
+
+SYM_CODE_START(ftrace_common)
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+	aghik	%r2,%r0,-MCOUNT_INSN_SIZE
+	lgrl	%r4,function_trace_op
+	lgrl	%r1,ftrace_func
+#else
+	lgr	%r2,%r0
+	aghi	%r2,-MCOUNT_INSN_SIZE
+	larl	%r4,function_trace_op
+	lg	%r4,0(%r4)
+	larl	%r1,ftrace_func
+	lg	%r1,0(%r1)
+#endif
+	lgr	%r3,%r14
+	la	%r5,STACK_FREGS(%r15)
+	BASR_EX	%r14,%r1
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+# The j instruction gets runtime patched to a nop instruction.
+# See ftrace_enable_ftrace_graph_caller.
+SYM_INNER_LABEL(ftrace_graph_caller, SYM_L_GLOBAL)
+	j	.Lftrace_graph_caller_end
+	lmg	%r2,%r3,(STACK_FREGS_PTREGS_GPRS+14*8)(%r15)
+	lg	%r4,(STACK_FREGS_PTREGS_PSW+8)(%r15)
+	brasl	%r14,prepare_ftrace_return
+	stg	%r2,(STACK_FREGS_PTREGS_GPRS+14*8)(%r15)
+.Lftrace_graph_caller_end:
+#endif
+	lg	%r0,(STACK_FREGS_PTREGS_PSW+8)(%r15)
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+	ltg	%r1,STACK_FREGS_PTREGS_ORIG_GPR2(%r15)
+	locgrz	%r1,%r0
+#else
+	lg	%r1,STACK_FREGS_PTREGS_ORIG_GPR2(%r15)
+	ltgr	%r1,%r1
+	jnz	0f
+	lgr	%r1,%r0
+#endif
+0:	lmg	%r2,%r15,(STACK_FREGS_PTREGS_GPRS+2*8)(%r15)
+	BR_EX	%r1
+SYM_CODE_END(ftrace_common)
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+SYM_FUNC_START(return_to_handler)
+	stmg	%r2,%r5,32(%r15)
+	lgr	%r1,%r15
+	aghi	%r15,-(STACK_FRAME_OVERHEAD+__FGRAPH_RET_SIZE)
+	stg	%r1,__SF_BACKCHAIN(%r15)
+	la	%r3,STACK_FRAME_OVERHEAD(%r15)
+	stg	%r1,__FGRAPH_RET_FP(%r3)
+	stg	%r2,__FGRAPH_RET_GPR2(%r3)
+	lgr	%r2,%r3
+	brasl	%r14,ftrace_return_to_handler
+	aghi	%r15,STACK_FRAME_OVERHEAD+__FGRAPH_RET_SIZE
+	lgr	%r14,%r2
+	lmg	%r2,%r5,32(%r15)
+	BR_EX	%r14
+SYM_FUNC_END(return_to_handler)
+
+#endif
+#endif /* CONFIG_FUNCTION_TRACER */
+
+SYM_CODE_START(ftrace_shared_hotpatch_trampoline_br)
+	lmg	%r0,%r1,2(%r1)
+	br	%r1
+SYM_INNER_LABEL(ftrace_shared_hotpatch_trampoline_br_end, SYM_L_GLOBAL)
+SYM_CODE_END(ftrace_shared_hotpatch_trampoline_br)
+
+#ifdef CONFIG_EXPOLINE
+SYM_CODE_START(ftrace_shared_hotpatch_trampoline_exrl)
+	lmg	%r0,%r1,2(%r1)
+	exrl	%r0,0f
+	j	.
+0:	br	%r1
+SYM_INNER_LABEL(ftrace_shared_hotpatch_trampoline_exrl_end, SYM_L_GLOBAL)
+SYM_CODE_END(ftrace_shared_hotpatch_trampoline_exrl)
+#endif /* CONFIG_EXPOLINE */
+
+#ifdef CONFIG_RETHOOK
+
+SYM_CODE_START(arch_rethook_trampoline)
+	stg	%r14,(__SF_GPRS+8*8)(%r15)
+	lay	%r15,-STACK_FRAME_SIZE_PTREGS(%r15)
+	stmg	%r0,%r14,STACK_PTREGS_GPRS(%r15)
+
+	# store original stack pointer in backchain and pt_regs
+	lay	%r7,STACK_FRAME_SIZE_PTREGS(%r15)
+	stg	%r7,__SF_BACKCHAIN(%r15)
+	stg	%r7,STACK_PTREGS_GPRS+(15*8)(%r15)
+
+	# store full psw
+	epsw	%r2,%r3
+	risbg	%r3,%r2,0,31,32
+	stg	%r3,STACK_PTREGS_PSW(%r15)
+	larl	%r1,arch_rethook_trampoline
+	stg	%r1,STACK_PTREGS_PSW+8(%r15)
+
+	lay	%r2,STACK_PTREGS(%r15)
+	brasl	%r14,arch_rethook_trampoline_callback
+
+	mvc	__SF_EMPTY(16,%r7),STACK_PTREGS_PSW(%r15)
+	lmg	%r0,%r15,STACK_PTREGS_GPRS(%r15)
+	lpswe	__SF_EMPTY(%r15)
+SYM_CODE_END(arch_rethook_trampoline)
+
+#endif /* CONFIG_RETHOOK */
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
new file mode 100644
index 0000000000..42215f9404
--- /dev/null
+++ b/arch/s390/kernel/module.c
@@ -0,0 +1,576 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ *  Kernel module help for s390.
+ *
+ *  S390 version
+ *    Copyright IBM Corp. 2002, 2003
+ *    Author(s): Arnd Bergmann (arndb@de.ibm.com)
+ *		 Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ *  based on i386 version
+ *    Copyright (C) 2001 Rusty Russell.
+ */
+#include <linux/module.h>
+#include <linux/elf.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <linux/ftrace.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/kasan.h>
+#include <linux/moduleloader.h>
+#include <linux/bug.h>
+#include <linux/memory.h>
+#include <asm/alternative.h>
+#include <asm/nospec-branch.h>
+#include <asm/facility.h>
+#include <asm/ftrace.lds.h>
+#include <asm/set_memory.h>
+#include <asm/setup.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(fmt , ...)
+#endif
+
+#define PLT_ENTRY_SIZE 22
+
+static unsigned long get_module_load_offset(void)
+{
+	static DEFINE_MUTEX(module_kaslr_mutex);
+	static unsigned long module_load_offset;
+
+	if (!kaslr_enabled())
+		return 0;
+	/*
+	 * Calculate the module_load_offset the first time this code
+	 * is called. Once calculated it stays the same until reboot.
+	 */
+	mutex_lock(&module_kaslr_mutex);
+	if (!module_load_offset)
+		module_load_offset = get_random_u32_inclusive(1, 1024) * PAGE_SIZE;
+	mutex_unlock(&module_kaslr_mutex);
+	return module_load_offset;
+}
+
+void *module_alloc(unsigned long size)
+{
+	gfp_t gfp_mask = GFP_KERNEL;
+	void *p;
+
+	if (PAGE_ALIGN(size) > MODULES_LEN)
+		return NULL;
+	p = __vmalloc_node_range(size, MODULE_ALIGN,
+				 MODULES_VADDR + get_module_load_offset(),
+				 MODULES_END, gfp_mask, PAGE_KERNEL,
+				 VM_FLUSH_RESET_PERMS | VM_DEFER_KMEMLEAK,
+				 NUMA_NO_NODE, __builtin_return_address(0));
+	if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) {
+		vfree(p);
+		return NULL;
+	}
+	return p;
+}
+
+#ifdef CONFIG_FUNCTION_TRACER
+void module_arch_cleanup(struct module *mod)
+{
+	module_memfree(mod->arch.trampolines_start);
+}
+#endif
+
+void module_arch_freeing_init(struct module *mod)
+{
+	if (is_livepatch_module(mod) &&
+	    mod->state == MODULE_STATE_LIVE)
+		return;
+
+	vfree(mod->arch.syminfo);
+	mod->arch.syminfo = NULL;
+}
+
+static void check_rela(Elf_Rela *rela, struct module *me)
+{
+	struct mod_arch_syminfo *info;
+
+	info = me->arch.syminfo + ELF_R_SYM (rela->r_info);
+	switch (ELF_R_TYPE (rela->r_info)) {
+	case R_390_GOT12:	/* 12 bit GOT offset.  */
+	case R_390_GOT16:	/* 16 bit GOT offset.  */
+	case R_390_GOT20:	/* 20 bit GOT offset.  */
+	case R_390_GOT32:	/* 32 bit GOT offset.  */
+	case R_390_GOT64:	/* 64 bit GOT offset.  */
+	case R_390_GOTENT:	/* 32 bit PC rel. to GOT entry shifted by 1. */
+	case R_390_GOTPLT12:	/* 12 bit offset to jump slot.	*/
+	case R_390_GOTPLT16:	/* 16 bit offset to jump slot.  */
+	case R_390_GOTPLT20:	/* 20 bit offset to jump slot.  */
+	case R_390_GOTPLT32:	/* 32 bit offset to jump slot.  */
+	case R_390_GOTPLT64:	/* 64 bit offset to jump slot.	*/
+	case R_390_GOTPLTENT:	/* 32 bit rel. offset to jump slot >> 1. */
+		if (info->got_offset == -1UL) {
+			info->got_offset = me->arch.got_size;
+			me->arch.got_size += sizeof(void*);
+		}
+		break;
+	case R_390_PLT16DBL:	/* 16 bit PC rel. PLT shifted by 1.  */
+	case R_390_PLT32DBL:	/* 32 bit PC rel. PLT shifted by 1.  */
+	case R_390_PLT32:	/* 32 bit PC relative PLT address.  */
+	case R_390_PLT64:	/* 64 bit PC relative PLT address.  */
+	case R_390_PLTOFF16:	/* 16 bit offset from GOT to PLT. */
+	case R_390_PLTOFF32:	/* 32 bit offset from GOT to PLT. */
+	case R_390_PLTOFF64:	/* 16 bit offset from GOT to PLT. */
+		if (info->plt_offset == -1UL) {
+			info->plt_offset = me->arch.plt_size;
+			me->arch.plt_size += PLT_ENTRY_SIZE;
+		}
+		break;
+	case R_390_COPY:
+	case R_390_GLOB_DAT:
+	case R_390_JMP_SLOT:
+	case R_390_RELATIVE:
+		/* Only needed if we want to support loading of 
+		   modules linked with -shared. */
+		break;
+	}
+}
+
+/*
+ * Account for GOT and PLT relocations. We can't add sections for
+ * got and plt but we can increase the core module size.
+ */
+int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
+			      char *secstrings, struct module *me)
+{
+	Elf_Shdr *symtab;
+	Elf_Sym *symbols;
+	Elf_Rela *rela;
+	char *strings;
+	int nrela, i, j;
+	struct module_memory *mod_mem;
+
+	/* Find symbol table and string table. */
+	symtab = NULL;
+	for (i = 0; i < hdr->e_shnum; i++)
+		switch (sechdrs[i].sh_type) {
+		case SHT_SYMTAB:
+			symtab = sechdrs + i;
+			break;
+		}
+	if (!symtab) {
+		printk(KERN_ERR "module %s: no symbol table\n", me->name);
+		return -ENOEXEC;
+	}
+
+	/* Allocate one syminfo structure per symbol. */
+	me->arch.nsyms = symtab->sh_size / sizeof(Elf_Sym);
+	me->arch.syminfo = vmalloc(array_size(sizeof(struct mod_arch_syminfo),
+					      me->arch.nsyms));
+	if (!me->arch.syminfo)
+		return -ENOMEM;
+	symbols = (void *) hdr + symtab->sh_offset;
+	strings = (void *) hdr + sechdrs[symtab->sh_link].sh_offset;
+	for (i = 0; i < me->arch.nsyms; i++) {
+		if (symbols[i].st_shndx == SHN_UNDEF &&
+		    strcmp(strings + symbols[i].st_name,
+			   "_GLOBAL_OFFSET_TABLE_") == 0)
+			/* "Define" it as absolute. */
+			symbols[i].st_shndx = SHN_ABS;
+		me->arch.syminfo[i].got_offset = -1UL;
+		me->arch.syminfo[i].plt_offset = -1UL;
+		me->arch.syminfo[i].got_initialized = 0;
+		me->arch.syminfo[i].plt_initialized = 0;
+	}
+
+	/* Search for got/plt relocations. */
+	me->arch.got_size = me->arch.plt_size = 0;
+	for (i = 0; i < hdr->e_shnum; i++) {
+		if (sechdrs[i].sh_type != SHT_RELA)
+			continue;
+		nrela = sechdrs[i].sh_size / sizeof(Elf_Rela);
+		rela = (void *) hdr + sechdrs[i].sh_offset;
+		for (j = 0; j < nrela; j++)
+			check_rela(rela + j, me);
+	}
+
+	/* Increase core size by size of got & plt and set start
+	   offsets for got and plt. */
+	mod_mem = &me->mem[MOD_TEXT];
+	mod_mem->size = ALIGN(mod_mem->size, 4);
+	me->arch.got_offset = mod_mem->size;
+	mod_mem->size += me->arch.got_size;
+	me->arch.plt_offset = mod_mem->size;
+	if (me->arch.plt_size) {
+		if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable)
+			me->arch.plt_size += PLT_ENTRY_SIZE;
+		mod_mem->size += me->arch.plt_size;
+	}
+	return 0;
+}
+
+static int apply_rela_bits(Elf_Addr loc, Elf_Addr val,
+			   int sign, int bits, int shift,
+			   void *(*write)(void *dest, const void *src, size_t len))
+{
+	unsigned long umax;
+	long min, max;
+	void *dest = (void *)loc;
+
+	if (val & ((1UL << shift) - 1))
+		return -ENOEXEC;
+	if (sign) {
+		val = (Elf_Addr)(((long) val) >> shift);
+		min = -(1L << (bits - 1));
+		max = (1L << (bits - 1)) - 1;
+		if ((long) val < min || (long) val > max)
+			return -ENOEXEC;
+	} else {
+		val >>= shift;
+		umax = ((1UL << (bits - 1)) << 1) - 1;
+		if ((unsigned long) val > umax)
+			return -ENOEXEC;
+	}
+
+	if (bits == 8) {
+		unsigned char tmp = val;
+		write(dest, &tmp, 1);
+	} else if (bits == 12) {
+		unsigned short tmp = (val & 0xfff) |
+			(*(unsigned short *) loc & 0xf000);
+		write(dest, &tmp, 2);
+	} else if (bits == 16) {
+		unsigned short tmp = val;
+		write(dest, &tmp, 2);
+	} else if (bits == 20) {
+		unsigned int tmp = (val & 0xfff) << 16 |
+			(val & 0xff000) >> 4 | (*(unsigned int *) loc & 0xf00000ff);
+		write(dest, &tmp, 4);
+	} else if (bits == 32) {
+		unsigned int tmp = val;
+		write(dest, &tmp, 4);
+	} else if (bits == 64) {
+		unsigned long tmp = val;
+		write(dest, &tmp, 8);
+	}
+	return 0;
+}
+
+static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
+		      const char *strtab, struct module *me,
+		      void *(*write)(void *dest, const void *src, size_t len))
+{
+	struct mod_arch_syminfo *info;
+	Elf_Addr loc, val;
+	int r_type, r_sym;
+	int rc = -ENOEXEC;
+
+	/* This is where to make the change */
+	loc = base + rela->r_offset;
+	/* This is the symbol it is referring to.  Note that all
+	   undefined symbols have been resolved.  */
+	r_sym = ELF_R_SYM(rela->r_info);
+	r_type = ELF_R_TYPE(rela->r_info);
+	info = me->arch.syminfo + r_sym;
+	val = symtab[r_sym].st_value;
+
+	switch (r_type) {
+	case R_390_NONE:	/* No relocation.  */
+		rc = 0;
+		break;
+	case R_390_8:		/* Direct 8 bit.   */
+	case R_390_12:		/* Direct 12 bit.  */
+	case R_390_16:		/* Direct 16 bit.  */
+	case R_390_20:		/* Direct 20 bit.  */
+	case R_390_32:		/* Direct 32 bit.  */
+	case R_390_64:		/* Direct 64 bit.  */
+		val += rela->r_addend;
+		if (r_type == R_390_8)
+			rc = apply_rela_bits(loc, val, 0, 8, 0, write);
+		else if (r_type == R_390_12)
+			rc = apply_rela_bits(loc, val, 0, 12, 0, write);
+		else if (r_type == R_390_16)
+			rc = apply_rela_bits(loc, val, 0, 16, 0, write);
+		else if (r_type == R_390_20)
+			rc = apply_rela_bits(loc, val, 1, 20, 0, write);
+		else if (r_type == R_390_32)
+			rc = apply_rela_bits(loc, val, 0, 32, 0, write);
+		else if (r_type == R_390_64)
+			rc = apply_rela_bits(loc, val, 0, 64, 0, write);
+		break;
+	case R_390_PC16:	/* PC relative 16 bit.  */
+	case R_390_PC16DBL:	/* PC relative 16 bit shifted by 1.  */
+	case R_390_PC32DBL:	/* PC relative 32 bit shifted by 1.  */
+	case R_390_PC32:	/* PC relative 32 bit.  */
+	case R_390_PC64:	/* PC relative 64 bit.	*/
+		val += rela->r_addend - loc;
+		if (r_type == R_390_PC16)
+			rc = apply_rela_bits(loc, val, 1, 16, 0, write);
+		else if (r_type == R_390_PC16DBL)
+			rc = apply_rela_bits(loc, val, 1, 16, 1, write);
+		else if (r_type == R_390_PC32DBL)
+			rc = apply_rela_bits(loc, val, 1, 32, 1, write);
+		else if (r_type == R_390_PC32)
+			rc = apply_rela_bits(loc, val, 1, 32, 0, write);
+		else if (r_type == R_390_PC64)
+			rc = apply_rela_bits(loc, val, 1, 64, 0, write);
+		break;
+	case R_390_GOT12:	/* 12 bit GOT offset.  */
+	case R_390_GOT16:	/* 16 bit GOT offset.  */
+	case R_390_GOT20:	/* 20 bit GOT offset.  */
+	case R_390_GOT32:	/* 32 bit GOT offset.  */
+	case R_390_GOT64:	/* 64 bit GOT offset.  */
+	case R_390_GOTENT:	/* 32 bit PC rel. to GOT entry shifted by 1. */
+	case R_390_GOTPLT12:	/* 12 bit offset to jump slot.	*/
+	case R_390_GOTPLT20:	/* 20 bit offset to jump slot.  */
+	case R_390_GOTPLT16:	/* 16 bit offset to jump slot.  */
+	case R_390_GOTPLT32:	/* 32 bit offset to jump slot.  */
+	case R_390_GOTPLT64:	/* 64 bit offset to jump slot.	*/
+	case R_390_GOTPLTENT:	/* 32 bit rel. offset to jump slot >> 1. */
+		if (info->got_initialized == 0) {
+			Elf_Addr *gotent = me->mem[MOD_TEXT].base +
+					   me->arch.got_offset +
+					   info->got_offset;
+
+			write(gotent, &val, sizeof(*gotent));
+			info->got_initialized = 1;
+		}
+		val = info->got_offset + rela->r_addend;
+		if (r_type == R_390_GOT12 ||
+		    r_type == R_390_GOTPLT12)
+			rc = apply_rela_bits(loc, val, 0, 12, 0, write);
+		else if (r_type == R_390_GOT16 ||
+			 r_type == R_390_GOTPLT16)
+			rc = apply_rela_bits(loc, val, 0, 16, 0, write);
+		else if (r_type == R_390_GOT20 ||
+			 r_type == R_390_GOTPLT20)
+			rc = apply_rela_bits(loc, val, 1, 20, 0, write);
+		else if (r_type == R_390_GOT32 ||
+			 r_type == R_390_GOTPLT32)
+			rc = apply_rela_bits(loc, val, 0, 32, 0, write);
+		else if (r_type == R_390_GOT64 ||
+			 r_type == R_390_GOTPLT64)
+			rc = apply_rela_bits(loc, val, 0, 64, 0, write);
+		else if (r_type == R_390_GOTENT ||
+			 r_type == R_390_GOTPLTENT) {
+			val += (Elf_Addr)me->mem[MOD_TEXT].base +
+				me->arch.got_offset - loc;
+			rc = apply_rela_bits(loc, val, 1, 32, 1, write);
+		}
+		break;
+	case R_390_PLT16DBL:	/* 16 bit PC rel. PLT shifted by 1.  */
+	case R_390_PLT32DBL:	/* 32 bit PC rel. PLT shifted by 1.  */
+	case R_390_PLT32:	/* 32 bit PC relative PLT address.  */
+	case R_390_PLT64:	/* 64 bit PC relative PLT address.  */
+	case R_390_PLTOFF16:	/* 16 bit offset from GOT to PLT. */
+	case R_390_PLTOFF32:	/* 32 bit offset from GOT to PLT. */
+	case R_390_PLTOFF64:	/* 16 bit offset from GOT to PLT. */
+		if (info->plt_initialized == 0) {
+			unsigned char insn[PLT_ENTRY_SIZE];
+			char *plt_base;
+			char *ip;
+
+			plt_base = me->mem[MOD_TEXT].base + me->arch.plt_offset;
+			ip = plt_base + info->plt_offset;
+			*(int *)insn = 0x0d10e310;	/* basr 1,0  */
+			*(int *)&insn[4] = 0x100c0004;	/* lg	1,12(1) */
+			if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable) {
+				char *jump_r1;
+
+				jump_r1 = plt_base + me->arch.plt_size -
+					PLT_ENTRY_SIZE;
+				/* brcl	0xf,__jump_r1 */
+				*(short *)&insn[8] = 0xc0f4;
+				*(int *)&insn[10] = (jump_r1 - (ip + 8)) / 2;
+			} else {
+				*(int *)&insn[8] = 0x07f10000;	/* br %r1 */
+			}
+			*(long *)&insn[14] = val;
+
+			write(ip, insn, sizeof(insn));
+			info->plt_initialized = 1;
+		}
+		if (r_type == R_390_PLTOFF16 ||
+		    r_type == R_390_PLTOFF32 ||
+		    r_type == R_390_PLTOFF64)
+			val = me->arch.plt_offset - me->arch.got_offset +
+				info->plt_offset + rela->r_addend;
+		else {
+			if (!((r_type == R_390_PLT16DBL &&
+			       val - loc + 0xffffUL < 0x1ffffeUL) ||
+			      (r_type == R_390_PLT32DBL &&
+			       val - loc + 0xffffffffULL < 0x1fffffffeULL)))
+				val = (Elf_Addr) me->mem[MOD_TEXT].base +
+					me->arch.plt_offset +
+					info->plt_offset;
+			val += rela->r_addend - loc;
+		}
+		if (r_type == R_390_PLT16DBL)
+			rc = apply_rela_bits(loc, val, 1, 16, 1, write);
+		else if (r_type == R_390_PLTOFF16)
+			rc = apply_rela_bits(loc, val, 0, 16, 0, write);
+		else if (r_type == R_390_PLT32DBL)
+			rc = apply_rela_bits(loc, val, 1, 32, 1, write);
+		else if (r_type == R_390_PLT32 ||
+			 r_type == R_390_PLTOFF32)
+			rc = apply_rela_bits(loc, val, 0, 32, 0, write);
+		else if (r_type == R_390_PLT64 ||
+			 r_type == R_390_PLTOFF64)
+			rc = apply_rela_bits(loc, val, 0, 64, 0, write);
+		break;
+	case R_390_GOTOFF16:	/* 16 bit offset to GOT.  */
+	case R_390_GOTOFF32:	/* 32 bit offset to GOT.  */
+	case R_390_GOTOFF64:	/* 64 bit offset to GOT. */
+		val = val + rela->r_addend -
+			((Elf_Addr) me->mem[MOD_TEXT].base + me->arch.got_offset);
+		if (r_type == R_390_GOTOFF16)
+			rc = apply_rela_bits(loc, val, 0, 16, 0, write);
+		else if (r_type == R_390_GOTOFF32)
+			rc = apply_rela_bits(loc, val, 0, 32, 0, write);
+		else if (r_type == R_390_GOTOFF64)
+			rc = apply_rela_bits(loc, val, 0, 64, 0, write);
+		break;
+	case R_390_GOTPC:	/* 32 bit PC relative offset to GOT. */
+	case R_390_GOTPCDBL:	/* 32 bit PC rel. off. to GOT shifted by 1. */
+		val = (Elf_Addr) me->mem[MOD_TEXT].base + me->arch.got_offset +
+			rela->r_addend - loc;
+		if (r_type == R_390_GOTPC)
+			rc = apply_rela_bits(loc, val, 1, 32, 0, write);
+		else if (r_type == R_390_GOTPCDBL)
+			rc = apply_rela_bits(loc, val, 1, 32, 1, write);
+		break;
+	case R_390_COPY:
+	case R_390_GLOB_DAT:	/* Create GOT entry.  */
+	case R_390_JMP_SLOT:	/* Create PLT entry.  */
+	case R_390_RELATIVE:	/* Adjust by program base.  */
+		/* Only needed if we want to support loading of 
+		   modules linked with -shared. */
+		return -ENOEXEC;
+	default:
+		printk(KERN_ERR "module %s: unknown relocation: %u\n",
+		       me->name, r_type);
+		return -ENOEXEC;
+	}
+	if (rc) {
+		printk(KERN_ERR "module %s: relocation error for symbol %s "
+		       "(r_type %i, value 0x%lx)\n",
+		       me->name, strtab + symtab[r_sym].st_name,
+		       r_type, (unsigned long) val);
+		return rc;
+	}
+	return 0;
+}
+
+static int __apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
+		       unsigned int symindex, unsigned int relsec,
+		       struct module *me,
+		       void *(*write)(void *dest, const void *src, size_t len))
+{
+	Elf_Addr base;
+	Elf_Sym *symtab;
+	Elf_Rela *rela;
+	unsigned long i, n;
+	int rc;
+
+	DEBUGP("Applying relocate section %u to %u\n",
+	       relsec, sechdrs[relsec].sh_info);
+	base = sechdrs[sechdrs[relsec].sh_info].sh_addr;
+	symtab = (Elf_Sym *) sechdrs[symindex].sh_addr;
+	rela = (Elf_Rela *) sechdrs[relsec].sh_addr;
+	n = sechdrs[relsec].sh_size / sizeof(Elf_Rela);
+
+	for (i = 0; i < n; i++, rela++) {
+		rc = apply_rela(rela, base, symtab, strtab, me, write);
+		if (rc)
+			return rc;
+	}
+	return 0;
+}
+
+int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
+		       unsigned int symindex, unsigned int relsec,
+		       struct module *me)
+{
+	bool early = me->state == MODULE_STATE_UNFORMED;
+	void *(*write)(void *, const void *, size_t) = memcpy;
+
+	if (!early)
+		write = s390_kernel_write;
+
+	return __apply_relocate_add(sechdrs, strtab, symindex, relsec, me,
+				    write);
+}
+
+#ifdef CONFIG_FUNCTION_TRACER
+static int module_alloc_ftrace_hotpatch_trampolines(struct module *me,
+						    const Elf_Shdr *s)
+{
+	char *start, *end;
+	int numpages;
+	size_t size;
+
+	size = FTRACE_HOTPATCH_TRAMPOLINES_SIZE(s->sh_size);
+	numpages = DIV_ROUND_UP(size, PAGE_SIZE);
+	start = module_alloc(numpages * PAGE_SIZE);
+	if (!start)
+		return -ENOMEM;
+	set_memory_rox((unsigned long)start, numpages);
+	end = start + size;
+
+	me->arch.trampolines_start = (struct ftrace_hotpatch_trampoline *)start;
+	me->arch.trampolines_end = (struct ftrace_hotpatch_trampoline *)end;
+	me->arch.next_trampoline = me->arch.trampolines_start;
+
+	return 0;
+}
+#endif /* CONFIG_FUNCTION_TRACER */
+
+int module_finalize(const Elf_Ehdr *hdr,
+		    const Elf_Shdr *sechdrs,
+		    struct module *me)
+{
+	const Elf_Shdr *s;
+	char *secstrings, *secname;
+	void *aseg;
+#ifdef CONFIG_FUNCTION_TRACER
+	int ret;
+#endif
+
+	if (IS_ENABLED(CONFIG_EXPOLINE) &&
+	    !nospec_disable && me->arch.plt_size) {
+		unsigned int *ij;
+
+		ij = me->mem[MOD_TEXT].base + me->arch.plt_offset +
+			me->arch.plt_size - PLT_ENTRY_SIZE;
+		ij[0] = 0xc6000000;	/* exrl	%r0,.+10	*/
+		ij[1] = 0x0005a7f4;	/* j	.		*/
+		ij[2] = 0x000007f1;	/* br	%r1		*/
+	}
+
+	secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+	for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
+		aseg = (void *) s->sh_addr;
+		secname = secstrings + s->sh_name;
+
+		if (!strcmp(".altinstructions", secname))
+			/* patch .altinstructions */
+			apply_alternatives(aseg, aseg + s->sh_size);
+
+		if (IS_ENABLED(CONFIG_EXPOLINE) &&
+		    (str_has_prefix(secname, ".s390_indirect")))
+			nospec_revert(aseg, aseg + s->sh_size);
+
+		if (IS_ENABLED(CONFIG_EXPOLINE) &&
+		    (str_has_prefix(secname, ".s390_return")))
+			nospec_revert(aseg, aseg + s->sh_size);
+
+#ifdef CONFIG_FUNCTION_TRACER
+		if (!strcmp(FTRACE_CALLSITE_SECTION, secname)) {
+			ret = module_alloc_ftrace_hotpatch_trampolines(me, s);
+			if (ret < 0)
+				return ret;
+		}
+#endif /* CONFIG_FUNCTION_TRACER */
+	}
+
+	return 0;
+}
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
new file mode 100644
index 0000000000..38ec048752
--- /dev/null
+++ b/arch/s390/kernel/nmi.c
@@ -0,0 +1,513 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *   Machine check handler
+ *
+ *    Copyright IBM Corp. 2000, 2009
+ *    Author(s): Ingo Adlung <adlung@de.ibm.com>,
+ *		 Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ *		 Cornelia Huck <cornelia.huck@de.ibm.com>,
+ */
+
+#include <linux/kernel_stat.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/entry-common.h>
+#include <linux/hardirq.h>
+#include <linux/log2.h>
+#include <linux/kprobes.h>
+#include <linux/kmemleak.h>
+#include <linux/time.h>
+#include <linux/module.h>
+#include <linux/sched/signal.h>
+#include <linux/kvm_host.h>
+#include <linux/export.h>
+#include <asm/lowcore.h>
+#include <asm/smp.h>
+#include <asm/stp.h>
+#include <asm/cputime.h>
+#include <asm/nmi.h>
+#include <asm/crw.h>
+#include <asm/switch_to.h>
+#include <asm/ctl_reg.h>
+#include <asm/asm-offsets.h>
+#include <asm/pai.h>
+#include <asm/vx-insn.h>
+
+struct mcck_struct {
+	unsigned int kill_task : 1;
+	unsigned int channel_report : 1;
+	unsigned int warning : 1;
+	unsigned int stp_queue : 1;
+	unsigned long mcck_code;
+};
+
+static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck);
+
+static inline int nmi_needs_mcesa(void)
+{
+	return MACHINE_HAS_VX || MACHINE_HAS_GS;
+}
+
+/*
+ * The initial machine check extended save area for the boot CPU.
+ * It will be replaced on the boot CPU reinit with an allocated
+ * structure. The structure is required for machine check happening
+ * early in the boot process.
+ */
+static struct mcesa boot_mcesa __aligned(MCESA_MAX_SIZE);
+
+void __init nmi_alloc_mcesa_early(u64 *mcesad)
+{
+	if (!nmi_needs_mcesa())
+		return;
+	*mcesad = __pa(&boot_mcesa);
+	if (MACHINE_HAS_GS)
+		*mcesad |= ilog2(MCESA_MAX_SIZE);
+}
+
+int nmi_alloc_mcesa(u64 *mcesad)
+{
+	unsigned long size;
+	void *origin;
+
+	*mcesad = 0;
+	if (!nmi_needs_mcesa())
+		return 0;
+	size = MACHINE_HAS_GS ? MCESA_MAX_SIZE : MCESA_MIN_SIZE;
+	origin = kmalloc(size, GFP_KERNEL);
+	if (!origin)
+		return -ENOMEM;
+	/* The pointer is stored with mcesa_bits ORed in */
+	kmemleak_not_leak(origin);
+	*mcesad = __pa(origin);
+	if (MACHINE_HAS_GS)
+		*mcesad |= ilog2(MCESA_MAX_SIZE);
+	return 0;
+}
+
+void nmi_free_mcesa(u64 *mcesad)
+{
+	if (!nmi_needs_mcesa())
+		return;
+	kfree(__va(*mcesad & MCESA_ORIGIN_MASK));
+}
+
+static __always_inline char *nmi_puts(char *dest, const char *src)
+{
+	while (*src)
+		*dest++ = *src++;
+	*dest = 0;
+	return dest;
+}
+
+static __always_inline char *u64_to_hex(char *dest, u64 val)
+{
+	int i, num;
+
+	for (i = 1; i <= 16; i++) {
+		num = (val >> (64 - 4 * i)) & 0xf;
+		if (num >= 10)
+			*dest++ = 'A' + num - 10;
+		else
+			*dest++ = '0' + num;
+	}
+	*dest = 0;
+	return dest;
+}
+
+static notrace void s390_handle_damage(void)
+{
+	union ctlreg0 cr0, cr0_new;
+	char message[100];
+	psw_t psw_save;
+	char *ptr;
+
+	smp_emergency_stop();
+	diag_amode31_ops.diag308_reset();
+	ptr = nmi_puts(message, "System stopped due to unrecoverable machine check, code: 0x");
+	u64_to_hex(ptr, S390_lowcore.mcck_interruption_code);
+
+	/*
+	 * Disable low address protection and make machine check new PSW a
+	 * disabled wait PSW. Any additional machine check cannot be handled.
+	 */
+	__ctl_store(cr0.val, 0, 0);
+	cr0_new = cr0;
+	cr0_new.lap = 0;
+	__ctl_load(cr0_new.val, 0, 0);
+	psw_save = S390_lowcore.mcck_new_psw;
+	psw_bits(S390_lowcore.mcck_new_psw).io = 0;
+	psw_bits(S390_lowcore.mcck_new_psw).ext = 0;
+	psw_bits(S390_lowcore.mcck_new_psw).wait = 1;
+	sclp_emergency_printk(message);
+
+	/*
+	 * Restore machine check new PSW and control register 0 to original
+	 * values. This makes possible system dump analysis easier.
+	 */
+	S390_lowcore.mcck_new_psw = psw_save;
+	__ctl_load(cr0.val, 0, 0);
+	disabled_wait();
+	while (1);
+}
+NOKPROBE_SYMBOL(s390_handle_damage);
+
+/*
+ * Main machine check handler function. Will be called with interrupts disabled
+ * and machine checks enabled.
+ */
+void s390_handle_mcck(void)
+{
+	struct mcck_struct mcck;
+
+	/*
+	 * Disable machine checks and get the current state of accumulated
+	 * machine checks. Afterwards delete the old state and enable machine
+	 * checks again.
+	 */
+	local_mcck_disable();
+	mcck = *this_cpu_ptr(&cpu_mcck);
+	memset(this_cpu_ptr(&cpu_mcck), 0, sizeof(mcck));
+	local_mcck_enable();
+
+	if (mcck.channel_report)
+		crw_handle_channel_report();
+	/*
+	 * A warning may remain for a prolonged period on the bare iron.
+	 * (actually until the machine is powered off, or the problem is gone)
+	 * So we just stop listening for the WARNING MCH and avoid continuously
+	 * being interrupted.  One caveat is however, that we must do this per
+	 * processor and cannot use the smp version of ctl_clear_bit().
+	 * On VM we only get one interrupt per virtally presented machinecheck.
+	 * Though one suffices, we may get one interrupt per (virtual) cpu.
+	 */
+	if (mcck.warning) {	/* WARNING pending ? */
+		static int mchchk_wng_posted = 0;
+
+		/* Use single cpu clear, as we cannot handle smp here. */
+		__ctl_clear_bit(14, 24);	/* Disable WARNING MCH */
+		if (xchg(&mchchk_wng_posted, 1) == 0)
+			kill_cad_pid(SIGPWR, 1);
+	}
+	if (mcck.stp_queue)
+		stp_queue_work();
+	if (mcck.kill_task) {
+		printk(KERN_EMERG "mcck: Terminating task because of machine "
+		       "malfunction (code 0x%016lx).\n", mcck.mcck_code);
+		printk(KERN_EMERG "mcck: task: %s, pid: %d.\n",
+		       current->comm, current->pid);
+		if (is_global_init(current))
+			panic("mcck: Attempting to kill init!\n");
+		do_send_sig_info(SIGKILL, SEND_SIG_PRIV, current, PIDTYPE_PID);
+	}
+}
+
+/*
+ * returns 0 if register contents could be validated
+ * returns 1 otherwise
+ */
+static int notrace s390_validate_registers(union mci mci)
+{
+	struct mcesa *mcesa;
+	void *fpt_save_area;
+	union ctlreg2 cr2;
+	int kill_task;
+	u64 zero;
+
+	kill_task = 0;
+	zero = 0;
+
+	if (!mci.gr || !mci.fp)
+		kill_task = 1;
+	fpt_save_area = &S390_lowcore.floating_pt_save_area;
+	if (!mci.fc) {
+		kill_task = 1;
+		asm volatile(
+			"	lfpc	%0\n"
+			:
+			: "Q" (zero));
+	} else {
+		asm volatile(
+			"	lfpc	%0\n"
+			:
+			: "Q" (S390_lowcore.fpt_creg_save_area));
+	}
+
+	mcesa = __va(S390_lowcore.mcesad & MCESA_ORIGIN_MASK);
+	if (!MACHINE_HAS_VX) {
+		/* Validate floating point registers */
+		asm volatile(
+			"	ld	0,0(%0)\n"
+			"	ld	1,8(%0)\n"
+			"	ld	2,16(%0)\n"
+			"	ld	3,24(%0)\n"
+			"	ld	4,32(%0)\n"
+			"	ld	5,40(%0)\n"
+			"	ld	6,48(%0)\n"
+			"	ld	7,56(%0)\n"
+			"	ld	8,64(%0)\n"
+			"	ld	9,72(%0)\n"
+			"	ld	10,80(%0)\n"
+			"	ld	11,88(%0)\n"
+			"	ld	12,96(%0)\n"
+			"	ld	13,104(%0)\n"
+			"	ld	14,112(%0)\n"
+			"	ld	15,120(%0)\n"
+			:
+			: "a" (fpt_save_area)
+			: "memory");
+	} else {
+		/* Validate vector registers */
+		union ctlreg0 cr0;
+
+		/*
+		 * The vector validity must only be checked if not running a
+		 * KVM guest. For KVM guests the machine check is forwarded by
+		 * KVM and it is the responsibility of the guest to take
+		 * appropriate actions. The host vector or FPU values have been
+		 * saved by KVM and will be restored by KVM.
+		 */
+		if (!mci.vr && !test_cpu_flag(CIF_MCCK_GUEST))
+			kill_task = 1;
+		cr0.val = S390_lowcore.cregs_save_area[0];
+		cr0.afp = cr0.vx = 1;
+		__ctl_load(cr0.val, 0, 0);
+		asm volatile(
+			"	la	1,%0\n"
+			"	VLM	0,15,0,1\n"
+			"	VLM	16,31,256,1\n"
+			:
+			: "Q" (*(struct vx_array *)mcesa->vector_save_area)
+			: "1");
+		__ctl_load(S390_lowcore.cregs_save_area[0], 0, 0);
+	}
+	/* Validate access registers */
+	asm volatile(
+		"	lam	0,15,0(%0)\n"
+		:
+		: "a" (&S390_lowcore.access_regs_save_area)
+		: "memory");
+	if (!mci.ar)
+		kill_task = 1;
+	/* Validate guarded storage registers */
+	cr2.val = S390_lowcore.cregs_save_area[2];
+	if (cr2.gse) {
+		if (!mci.gs) {
+			/*
+			 * 2 cases:
+			 * - machine check in kernel or userspace
+			 * - machine check while running SIE (KVM guest)
+			 * For kernel or userspace the userspace values of
+			 * guarded storage control can not be recreated, the
+			 * process must be terminated.
+			 * For SIE the guest values of guarded storage can not
+			 * be recreated. This is either due to a bug or due to
+			 * GS being disabled in the guest. The guest will be
+			 * notified by KVM code and the guests machine check
+			 * handling must take care of this.  The host values
+			 * are saved by KVM and are not affected.
+			 */
+			if (!test_cpu_flag(CIF_MCCK_GUEST))
+				kill_task = 1;
+		} else {
+			load_gs_cb((struct gs_cb *)mcesa->guarded_storage_save_area);
+		}
+	}
+	/*
+	 * The getcpu vdso syscall reads CPU number from the programmable
+	 * field of the TOD clock. Disregard the TOD programmable register
+	 * validity bit and load the CPU number into the TOD programmable
+	 * field unconditionally.
+	 */
+	set_tod_programmable_field(raw_smp_processor_id());
+	/* Validate clock comparator register */
+	set_clock_comparator(S390_lowcore.clock_comparator);
+
+	if (!mci.ms || !mci.pm || !mci.ia)
+		kill_task = 1;
+
+	return kill_task;
+}
+NOKPROBE_SYMBOL(s390_validate_registers);
+
+/*
+ * Backup the guest's machine check info to its description block
+ */
+static void notrace s390_backup_mcck_info(struct pt_regs *regs)
+{
+	struct mcck_volatile_info *mcck_backup;
+	struct sie_page *sie_page;
+
+	/* r14 contains the sie block, which was set in sie64a */
+	struct kvm_s390_sie_block *sie_block = phys_to_virt(regs->gprs[14]);
+
+	if (sie_block == NULL)
+		/* Something's seriously wrong, stop system. */
+		s390_handle_damage();
+
+	sie_page = container_of(sie_block, struct sie_page, sie_block);
+	mcck_backup = &sie_page->mcck_info;
+	mcck_backup->mcic = S390_lowcore.mcck_interruption_code &
+				~(MCCK_CODE_CP | MCCK_CODE_EXT_DAMAGE);
+	mcck_backup->ext_damage_code = S390_lowcore.external_damage_code;
+	mcck_backup->failing_storage_address
+			= S390_lowcore.failing_storage_address;
+}
+NOKPROBE_SYMBOL(s390_backup_mcck_info);
+
+#define MAX_IPD_COUNT	29
+#define MAX_IPD_TIME	(5 * 60 * USEC_PER_SEC) /* 5 minutes */
+
+#define ED_STP_ISLAND	6	/* External damage STP island check */
+#define ED_STP_SYNC	7	/* External damage STP sync check */
+
+#define MCCK_CODE_NO_GUEST	(MCCK_CODE_CP | MCCK_CODE_EXT_DAMAGE)
+
+/*
+ * machine check handler.
+ */
+void notrace s390_do_machine_check(struct pt_regs *regs)
+{
+	static int ipd_count;
+	static DEFINE_SPINLOCK(ipd_lock);
+	static unsigned long long last_ipd;
+	struct mcck_struct *mcck;
+	unsigned long long tmp;
+	irqentry_state_t irq_state;
+	union mci mci;
+	unsigned long mcck_dam_code;
+	int mcck_pending = 0;
+
+	irq_state = irqentry_nmi_enter(regs);
+
+	if (user_mode(regs))
+		update_timer_mcck();
+	inc_irq_stat(NMI_NMI);
+	mci.val = S390_lowcore.mcck_interruption_code;
+	mcck = this_cpu_ptr(&cpu_mcck);
+
+	/*
+	 * Reinject the instruction processing damages' machine checks
+	 * including Delayed Access Exception into the guest
+	 * instead of damaging the host if they happen in the guest.
+	 */
+	if (mci.pd && !test_cpu_flag(CIF_MCCK_GUEST)) {
+		if (mci.b) {
+			/* Processing backup -> verify if we can survive this */
+			u64 z_mcic, o_mcic, t_mcic;
+			z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<29);
+			o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 |
+				  1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
+				  1ULL<<30 | 1ULL<<21 | 1ULL<<20 | 1ULL<<17 |
+				  1ULL<<16);
+			t_mcic = mci.val;
+
+			if (((t_mcic & z_mcic) != 0) ||
+			    ((t_mcic & o_mcic) != o_mcic)) {
+				s390_handle_damage();
+			}
+
+			/*
+			 * Nullifying exigent condition, therefore we might
+			 * retry this instruction.
+			 */
+			spin_lock(&ipd_lock);
+			tmp = get_tod_clock();
+			if (((tmp - last_ipd) >> 12) < MAX_IPD_TIME)
+				ipd_count++;
+			else
+				ipd_count = 1;
+			last_ipd = tmp;
+			if (ipd_count == MAX_IPD_COUNT)
+				s390_handle_damage();
+			spin_unlock(&ipd_lock);
+		} else {
+			/* Processing damage -> stopping machine */
+			s390_handle_damage();
+		}
+	}
+	if (s390_validate_registers(mci)) {
+		if (!user_mode(regs))
+			s390_handle_damage();
+		/*
+		 * Couldn't restore all register contents for the
+		 * user space process -> mark task for termination.
+		 */
+		mcck->kill_task = 1;
+		mcck->mcck_code = mci.val;
+		mcck_pending = 1;
+	}
+
+	/*
+	 * Backup the machine check's info if it happens when the guest
+	 * is running.
+	 */
+	if (test_cpu_flag(CIF_MCCK_GUEST))
+		s390_backup_mcck_info(regs);
+
+	if (mci.cd) {
+		/* Timing facility damage */
+		s390_handle_damage();
+	}
+	if (mci.ed && mci.ec) {
+		/* External damage */
+		if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC))
+			mcck->stp_queue |= stp_sync_check();
+		if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND))
+			mcck->stp_queue |= stp_island_check();
+		mcck_pending = 1;
+	}
+	/*
+	 * Reinject storage related machine checks into the guest if they
+	 * happen when the guest is running.
+	 */
+	if (!test_cpu_flag(CIF_MCCK_GUEST)) {
+		/* Storage error uncorrected */
+		if (mci.se)
+			s390_handle_damage();
+		/* Storage key-error uncorrected */
+		if (mci.ke)
+			s390_handle_damage();
+		/* Storage degradation */
+		if (mci.ds && mci.fa)
+			s390_handle_damage();
+	}
+	if (mci.cp) {
+		/* Channel report word pending */
+		mcck->channel_report = 1;
+		mcck_pending = 1;
+	}
+	if (mci.w) {
+		/* Warning pending */
+		mcck->warning = 1;
+		mcck_pending = 1;
+	}
+
+	/*
+	 * If there are only Channel Report Pending and External Damage
+	 * machine checks, they will not be reinjected into the guest
+	 * because they refer to host conditions only.
+	 */
+	mcck_dam_code = (mci.val & MCIC_SUBCLASS_MASK);
+	if (test_cpu_flag(CIF_MCCK_GUEST) &&
+	(mcck_dam_code & MCCK_CODE_NO_GUEST) != mcck_dam_code) {
+		/* Set exit reason code for host's later handling */
+		*((long *)(regs->gprs[15] + __SF_SIE_REASON)) = -EINTR;
+	}
+	clear_cpu_flag(CIF_MCCK_GUEST);
+
+	if (mcck_pending)
+		schedule_mcck_handler();
+
+	irqentry_nmi_exit(regs, irq_state);
+}
+NOKPROBE_SYMBOL(s390_do_machine_check);
+
+static int __init machine_check_init(void)
+{
+	ctl_set_bit(14, 25);	/* enable external damage MCH */
+	ctl_set_bit(14, 27);	/* enable system recovery MCH */
+	ctl_set_bit(14, 24);	/* enable warning MCH */
+	return 0;
+}
+early_initcall(machine_check_init);
diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c
new file mode 100644
index 0000000000..d1b16d83e4
--- /dev/null
+++ b/arch/s390/kernel/nospec-branch.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/cpu.h>
+#include <asm/nospec-branch.h>
+
+static int __init nobp_setup_early(char *str)
+{
+	bool enabled;
+	int rc;
+
+	rc = kstrtobool(str, &enabled);
+	if (rc)
+		return rc;
+	if (enabled && test_facility(82)) {
+		/*
+		 * The user explicitly requested nobp=1, enable it and
+		 * disable the expoline support.
+		 */
+		__set_facility(82, alt_stfle_fac_list);
+		if (IS_ENABLED(CONFIG_EXPOLINE))
+			nospec_disable = 1;
+	} else {
+		__clear_facility(82, alt_stfle_fac_list);
+	}
+	return 0;
+}
+early_param("nobp", nobp_setup_early);
+
+static int __init nospec_setup_early(char *str)
+{
+	__clear_facility(82, alt_stfle_fac_list);
+	return 0;
+}
+early_param("nospec", nospec_setup_early);
+
+static int __init nospec_report(void)
+{
+	if (test_facility(156))
+		pr_info("Spectre V2 mitigation: etokens\n");
+	if (nospec_uses_trampoline())
+		pr_info("Spectre V2 mitigation: execute trampolines\n");
+	if (__test_facility(82, alt_stfle_fac_list))
+		pr_info("Spectre V2 mitigation: limited branch prediction\n");
+	return 0;
+}
+arch_initcall(nospec_report);
+
+#ifdef CONFIG_EXPOLINE
+
+int nospec_disable = IS_ENABLED(CONFIG_EXPOLINE_OFF);
+
+static int __init nospectre_v2_setup_early(char *str)
+{
+	nospec_disable = 1;
+	return 0;
+}
+early_param("nospectre_v2", nospectre_v2_setup_early);
+
+void __init nospec_auto_detect(void)
+{
+	if (test_facility(156) || cpu_mitigations_off()) {
+		/*
+		 * The machine supports etokens.
+		 * Disable expolines and disable nobp.
+		 */
+		if (__is_defined(CC_USING_EXPOLINE))
+			nospec_disable = 1;
+		__clear_facility(82, alt_stfle_fac_list);
+	} else if (__is_defined(CC_USING_EXPOLINE)) {
+		/*
+		 * The kernel has been compiled with expolines.
+		 * Keep expolines enabled and disable nobp.
+		 */
+		nospec_disable = 0;
+		__clear_facility(82, alt_stfle_fac_list);
+	}
+	/*
+	 * If the kernel has not been compiled with expolines the
+	 * nobp setting decides what is done, this depends on the
+	 * CONFIG_KERNEL_NP option and the nobp/nospec parameters.
+	 */
+}
+
+static int __init spectre_v2_setup_early(char *str)
+{
+	if (str && !strncmp(str, "on", 2)) {
+		nospec_disable = 0;
+		__clear_facility(82, alt_stfle_fac_list);
+	}
+	if (str && !strncmp(str, "off", 3))
+		nospec_disable = 1;
+	if (str && !strncmp(str, "auto", 4))
+		nospec_auto_detect();
+	return 0;
+}
+early_param("spectre_v2", spectre_v2_setup_early);
+
+static void __init_or_module __nospec_revert(s32 *start, s32 *end)
+{
+	enum { BRCL_EXPOLINE, BRASL_EXPOLINE } type;
+	static const u8 branch[] = { 0x47, 0x00, 0x07, 0x00 };
+	u8 *instr, *thunk, *br;
+	u8 insnbuf[6];
+	s32 *epo;
+
+	/* Second part of the instruction replace is always a nop */
+	memcpy(insnbuf + 2, branch, sizeof(branch));
+	for (epo = start; epo < end; epo++) {
+		instr = (u8 *) epo + *epo;
+		if (instr[0] == 0xc0 && (instr[1] & 0x0f) == 0x04)
+			type = BRCL_EXPOLINE;	/* brcl instruction */
+		else if (instr[0] == 0xc0 && (instr[1] & 0x0f) == 0x05)
+			type = BRASL_EXPOLINE;	/* brasl instruction */
+		else
+			continue;
+		thunk = instr + (*(int *)(instr + 2)) * 2;
+		if (thunk[0] == 0xc6 && thunk[1] == 0x00)
+			/* exrl %r0,<target-br> */
+			br = thunk + (*(int *)(thunk + 2)) * 2;
+		else
+			continue;
+		if (br[0] != 0x07 || (br[1] & 0xf0) != 0xf0)
+			continue;
+		switch (type) {
+		case BRCL_EXPOLINE:
+			/* brcl to thunk, replace with br + nop */
+			insnbuf[0] = br[0];
+			insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f);
+			break;
+		case BRASL_EXPOLINE:
+			/* brasl to thunk, replace with basr + nop */
+			insnbuf[0] = 0x0d;
+			insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f);
+			break;
+		}
+
+		s390_kernel_write(instr, insnbuf, 6);
+	}
+}
+
+void __init_or_module nospec_revert(s32 *start, s32 *end)
+{
+	if (nospec_disable)
+		__nospec_revert(start, end);
+}
+
+extern s32 __nospec_call_start[], __nospec_call_end[];
+extern s32 __nospec_return_start[], __nospec_return_end[];
+void __init nospec_init_branches(void)
+{
+	nospec_revert(__nospec_call_start, __nospec_call_end);
+	nospec_revert(__nospec_return_start, __nospec_return_end);
+}
+
+#endif /* CONFIG_EXPOLINE */
diff --git a/arch/s390/kernel/nospec-sysfs.c b/arch/s390/kernel/nospec-sysfs.c
new file mode 100644
index 0000000000..52d4353188
--- /dev/null
+++ b/arch/s390/kernel/nospec-sysfs.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/device.h>
+#include <linux/cpu.h>
+#include <asm/facility.h>
+#include <asm/nospec-branch.h>
+
+ssize_t cpu_show_spectre_v1(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	if (test_facility(156))
+		return sprintf(buf, "Mitigation: etokens\n");
+	if (nospec_uses_trampoline())
+		return sprintf(buf, "Mitigation: execute trampolines\n");
+	if (__test_facility(82, alt_stfle_fac_list))
+		return sprintf(buf, "Mitigation: limited branch prediction\n");
+	return sprintf(buf, "Vulnerable\n");
+}
diff --git a/arch/s390/kernel/numa.c b/arch/s390/kernel/numa.c
new file mode 100644
index 0000000000..23ab9f02f2
--- /dev/null
+++ b/arch/s390/kernel/numa.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NUMA support for s390
+ *
+ * Implement NUMA core code.
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#include <linux/kernel.h>
+#include <linux/mmzone.h>
+#include <linux/cpumask.h>
+#include <linux/memblock.h>
+#include <linux/node.h>
+#include <asm/numa.h>
+
+struct pglist_data *node_data[MAX_NUMNODES];
+EXPORT_SYMBOL(node_data);
+
+void __init numa_setup(void)
+{
+	int nid;
+
+	nodes_clear(node_possible_map);
+	node_set(0, node_possible_map);
+	node_set_online(0);
+	for (nid = 0; nid < MAX_NUMNODES; nid++) {
+		NODE_DATA(nid) = memblock_alloc(sizeof(pg_data_t), 8);
+		if (!NODE_DATA(nid))
+			panic("%s: Failed to allocate %zu bytes align=0x%x\n",
+			      __func__, sizeof(pg_data_t), 8);
+	}
+	NODE_DATA(0)->node_spanned_pages = memblock_end_of_DRAM() >> PAGE_SHIFT;
+	NODE_DATA(0)->node_id = 0;
+}
diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c
new file mode 100644
index 0000000000..6e1824141b
--- /dev/null
+++ b/arch/s390/kernel/os_info.c
@@ -0,0 +1,173 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * OS info memory interface
+ *
+ * Copyright IBM Corp. 2012
+ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#define KMSG_COMPONENT "os_info"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/crash_dump.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <asm/checksum.h>
+#include <asm/abs_lowcore.h>
+#include <asm/os_info.h>
+#include <asm/maccess.h>
+#include <asm/asm-offsets.h>
+
+/*
+ * OS info structure has to be page aligned
+ */
+static struct os_info os_info __page_aligned_data;
+
+/*
+ * Compute checksum over OS info structure
+ */
+u32 os_info_csum(struct os_info *os_info)
+{
+	int size = sizeof(*os_info) - offsetof(struct os_info, version_major);
+	return (__force u32)csum_partial(&os_info->version_major, size, 0);
+}
+
+/*
+ * Add crashkernel info to OS info and update checksum
+ */
+void os_info_crashkernel_add(unsigned long base, unsigned long size)
+{
+	os_info.crashkernel_addr = (u64)(unsigned long)base;
+	os_info.crashkernel_size = (u64)(unsigned long)size;
+	os_info.csum = os_info_csum(&os_info);
+}
+
+/*
+ * Add OS info entry and update checksum
+ */
+void os_info_entry_add(int nr, void *ptr, u64 size)
+{
+	os_info.entry[nr].addr = __pa(ptr);
+	os_info.entry[nr].size = size;
+	os_info.entry[nr].csum = (__force u32)csum_partial(ptr, size, 0);
+	os_info.csum = os_info_csum(&os_info);
+}
+
+/*
+ * Initialize OS info structure and set lowcore pointer
+ */
+void __init os_info_init(void)
+{
+	struct lowcore *abs_lc;
+
+	os_info.version_major = OS_INFO_VERSION_MAJOR;
+	os_info.version_minor = OS_INFO_VERSION_MINOR;
+	os_info.magic = OS_INFO_MAGIC;
+	os_info.csum = os_info_csum(&os_info);
+	abs_lc = get_abs_lowcore();
+	abs_lc->os_info = __pa(&os_info);
+	put_abs_lowcore(abs_lc);
+}
+
+#ifdef CONFIG_CRASH_DUMP
+
+static struct os_info *os_info_old;
+
+/*
+ * Allocate and copy OS info entry from oldmem
+ */
+static void os_info_old_alloc(int nr, int align)
+{
+	unsigned long addr, size = 0;
+	char *buf, *buf_align, *msg;
+	u32 csum;
+
+	addr = os_info_old->entry[nr].addr;
+	if (!addr) {
+		msg = "not available";
+		goto fail;
+	}
+	size = os_info_old->entry[nr].size;
+	buf = kmalloc(size + align - 1, GFP_KERNEL);
+	if (!buf) {
+		msg = "alloc failed";
+		goto fail;
+	}
+	buf_align = PTR_ALIGN(buf, align);
+	if (copy_oldmem_kernel(buf_align, addr, size)) {
+		msg = "copy failed";
+		goto fail_free;
+	}
+	csum = (__force u32)csum_partial(buf_align, size, 0);
+	if (csum != os_info_old->entry[nr].csum) {
+		msg = "checksum failed";
+		goto fail_free;
+	}
+	os_info_old->entry[nr].addr = (u64)(unsigned long)buf_align;
+	msg = "copied";
+	goto out;
+fail_free:
+	kfree(buf);
+fail:
+	os_info_old->entry[nr].addr = 0;
+out:
+	pr_info("entry %i: %s (addr=0x%lx size=%lu)\n",
+		nr, msg, addr, size);
+}
+
+/*
+ * Initialize os info and os info entries from oldmem
+ */
+static void os_info_old_init(void)
+{
+	static int os_info_init;
+	unsigned long addr;
+
+	if (os_info_init)
+		return;
+	if (!oldmem_data.start)
+		goto fail;
+	if (copy_oldmem_kernel(&addr, __LC_OS_INFO, sizeof(addr)))
+		goto fail;
+	if (addr == 0 || addr % PAGE_SIZE)
+		goto fail;
+	os_info_old = kzalloc(sizeof(*os_info_old), GFP_KERNEL);
+	if (!os_info_old)
+		goto fail;
+	if (copy_oldmem_kernel(os_info_old, addr, sizeof(*os_info_old)))
+		goto fail_free;
+	if (os_info_old->magic != OS_INFO_MAGIC)
+		goto fail_free;
+	if (os_info_old->csum != os_info_csum(os_info_old))
+		goto fail_free;
+	if (os_info_old->version_major > OS_INFO_VERSION_MAJOR)
+		goto fail_free;
+	os_info_old_alloc(OS_INFO_VMCOREINFO, 1);
+	os_info_old_alloc(OS_INFO_REIPL_BLOCK, 1);
+	pr_info("crashkernel: addr=0x%lx size=%lu\n",
+		(unsigned long) os_info_old->crashkernel_addr,
+		(unsigned long) os_info_old->crashkernel_size);
+	os_info_init = 1;
+	return;
+fail_free:
+	kfree(os_info_old);
+fail:
+	os_info_init = 1;
+	os_info_old = NULL;
+}
+
+/*
+ * Return pointer to os infor entry and its size
+ */
+void *os_info_old_entry(int nr, unsigned long *size)
+{
+	os_info_old_init();
+
+	if (!os_info_old)
+		return NULL;
+	if (!os_info_old->entry[nr].addr)
+		return NULL;
+	*size = (unsigned long) os_info_old->entry[nr].size;
+	return (void *)(unsigned long)os_info_old->entry[nr].addr;
+}
+#endif
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
new file mode 100644
index 0000000000..850c11ea63
--- /dev/null
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -0,0 +1,1950 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Performance event support for s390x - CPU-measurement Counter Facility
+ *
+ *  Copyright IBM Corp. 2012, 2023
+ *  Author(s): Hendrik Brueckner <brueckner@linux.ibm.com>
+ *	       Thomas Richter <tmricht@linux.ibm.com>
+ */
+#define KMSG_COMPONENT	"cpum_cf"
+#define pr_fmt(fmt)	KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/miscdevice.h>
+#include <linux/perf_event.h>
+
+#include <asm/cpu_mf.h>
+#include <asm/hwctrset.h>
+#include <asm/debug.h>
+
+enum cpumf_ctr_set {
+	CPUMF_CTR_SET_BASIC   = 0,    /* Basic Counter Set */
+	CPUMF_CTR_SET_USER    = 1,    /* Problem-State Counter Set */
+	CPUMF_CTR_SET_CRYPTO  = 2,    /* Crypto-Activity Counter Set */
+	CPUMF_CTR_SET_EXT     = 3,    /* Extended Counter Set */
+	CPUMF_CTR_SET_MT_DIAG = 4,    /* MT-diagnostic Counter Set */
+
+	/* Maximum number of counter sets */
+	CPUMF_CTR_SET_MAX,
+};
+
+#define CPUMF_LCCTL_ENABLE_SHIFT    16
+#define CPUMF_LCCTL_ACTCTL_SHIFT     0
+
+static inline void ctr_set_enable(u64 *state, u64 ctrsets)
+{
+	*state |= ctrsets << CPUMF_LCCTL_ENABLE_SHIFT;
+}
+
+static inline void ctr_set_disable(u64 *state, u64 ctrsets)
+{
+	*state &= ~(ctrsets << CPUMF_LCCTL_ENABLE_SHIFT);
+}
+
+static inline void ctr_set_start(u64 *state, u64 ctrsets)
+{
+	*state |= ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT;
+}
+
+static inline void ctr_set_stop(u64 *state, u64 ctrsets)
+{
+	*state &= ~(ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT);
+}
+
+static inline int ctr_stcctm(enum cpumf_ctr_set set, u64 range, u64 *dest)
+{
+	switch (set) {
+	case CPUMF_CTR_SET_BASIC:
+		return stcctm(BASIC, range, dest);
+	case CPUMF_CTR_SET_USER:
+		return stcctm(PROBLEM_STATE, range, dest);
+	case CPUMF_CTR_SET_CRYPTO:
+		return stcctm(CRYPTO_ACTIVITY, range, dest);
+	case CPUMF_CTR_SET_EXT:
+		return stcctm(EXTENDED, range, dest);
+	case CPUMF_CTR_SET_MT_DIAG:
+		return stcctm(MT_DIAG_CLEARING, range, dest);
+	case CPUMF_CTR_SET_MAX:
+		return 3;
+	}
+	return 3;
+}
+
+struct cpu_cf_events {
+	refcount_t refcnt;		/* Reference count */
+	atomic_t		ctr_set[CPUMF_CTR_SET_MAX];
+	u64			state;		/* For perf_event_open SVC */
+	u64			dev_state;	/* For /dev/hwctr */
+	unsigned int		flags;
+	size_t used;			/* Bytes used in data */
+	size_t usedss;			/* Bytes used in start/stop */
+	unsigned char start[PAGE_SIZE];	/* Counter set at event add */
+	unsigned char stop[PAGE_SIZE];	/* Counter set at event delete */
+	unsigned char data[PAGE_SIZE];	/* Counter set at /dev/hwctr */
+	unsigned int sets;		/* # Counter set saved in memory */
+};
+
+static unsigned int cfdiag_cpu_speed;	/* CPU speed for CF_DIAG trailer */
+static debug_info_t *cf_dbg;
+
+/*
+ * The CPU Measurement query counter information instruction contains
+ * information which varies per machine generation, but is constant and
+ * does not change when running on a particular machine, such as counter
+ * first and second version number. This is needed to determine the size
+ * of counter sets. Extract this information at device driver initialization.
+ */
+static struct cpumf_ctr_info	cpumf_ctr_info;
+
+struct cpu_cf_ptr {
+	struct cpu_cf_events *cpucf;
+};
+
+static struct cpu_cf_root {		/* Anchor to per CPU data */
+	refcount_t refcnt;		/* Overall active events */
+	struct cpu_cf_ptr __percpu *cfptr;
+} cpu_cf_root;
+
+/*
+ * Serialize event initialization and event removal. Both are called from
+ * user space in task context with perf_event_open() and close()
+ * system calls.
+ *
+ * This mutex serializes functions cpum_cf_alloc_cpu() called at event
+ * initialization via cpumf_pmu_event_init() and function cpum_cf_free_cpu()
+ * called at event removal via call back function hw_perf_event_destroy()
+ * when the event is deleted. They are serialized to enforce correct
+ * bookkeeping of pointer and reference counts anchored by
+ * struct cpu_cf_root and the access to cpu_cf_root::refcnt and the
+ * per CPU pointers stored in cpu_cf_root::cfptr.
+ */
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+/*
+ * Get pointer to per-cpu structure.
+ *
+ * Function get_cpu_cfhw() is called from
+ * - cfset_copy_all(): This function is protected by cpus_read_lock(), so
+ *   CPU hot plug remove can not happen. Event removal requires a close()
+ *   first.
+ *
+ * Function this_cpu_cfhw() is called from perf common code functions:
+ * - pmu_{en|dis}able(), pmu_{add|del}()and pmu_{start|stop}():
+ *   All functions execute with interrupts disabled on that particular CPU.
+ * - cfset_ioctl_{on|off}, cfset_cpu_read(): see comment cfset_copy_all().
+ *
+ * Therefore it is safe to access the CPU specific pointer to the event.
+ */
+static struct cpu_cf_events *get_cpu_cfhw(int cpu)
+{
+	struct cpu_cf_ptr __percpu *p = cpu_cf_root.cfptr;
+
+	if (p) {
+		struct cpu_cf_ptr *q = per_cpu_ptr(p, cpu);
+
+		return q->cpucf;
+	}
+	return NULL;
+}
+
+static struct cpu_cf_events *this_cpu_cfhw(void)
+{
+	return get_cpu_cfhw(smp_processor_id());
+}
+
+/* Disable counter sets on dedicated CPU */
+static void cpum_cf_reset_cpu(void *flags)
+{
+	lcctl(0);
+}
+
+/* Free per CPU data when the last event is removed. */
+static void cpum_cf_free_root(void)
+{
+	if (!refcount_dec_and_test(&cpu_cf_root.refcnt))
+		return;
+	free_percpu(cpu_cf_root.cfptr);
+	cpu_cf_root.cfptr = NULL;
+	irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+	on_each_cpu(cpum_cf_reset_cpu, NULL, 1);
+	debug_sprintf_event(cf_dbg, 4, "%s root.refcnt %u cfptr %d\n",
+			    __func__, refcount_read(&cpu_cf_root.refcnt),
+			    !cpu_cf_root.cfptr);
+}
+
+/*
+ * On initialization of first event also allocate per CPU data dynamically.
+ * Start with an array of pointers, the array size is the maximum number of
+ * CPUs possible, which might be larger than the number of CPUs currently
+ * online.
+ */
+static int cpum_cf_alloc_root(void)
+{
+	int rc = 0;
+
+	if (refcount_inc_not_zero(&cpu_cf_root.refcnt))
+		return rc;
+
+	/* The memory is already zeroed. */
+	cpu_cf_root.cfptr = alloc_percpu(struct cpu_cf_ptr);
+	if (cpu_cf_root.cfptr) {
+		refcount_set(&cpu_cf_root.refcnt, 1);
+		on_each_cpu(cpum_cf_reset_cpu, NULL, 1);
+		irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+	} else {
+		rc = -ENOMEM;
+	}
+
+	return rc;
+}
+
+/* Free CPU counter data structure for a PMU */
+static void cpum_cf_free_cpu(int cpu)
+{
+	struct cpu_cf_events *cpuhw;
+	struct cpu_cf_ptr *p;
+
+	mutex_lock(&pmc_reserve_mutex);
+	/*
+	 * When invoked via CPU hotplug handler, there might be no events
+	 * installed or that particular CPU might not have an
+	 * event installed. This anchor pointer can be NULL!
+	 */
+	if (!cpu_cf_root.cfptr)
+		goto out;
+	p = per_cpu_ptr(cpu_cf_root.cfptr, cpu);
+	cpuhw = p->cpucf;
+	/*
+	 * Might be zero when called from CPU hotplug handler and no event
+	 * installed on that CPU, but on different CPUs.
+	 */
+	if (!cpuhw)
+		goto out;
+
+	if (refcount_dec_and_test(&cpuhw->refcnt)) {
+		kfree(cpuhw);
+		p->cpucf = NULL;
+	}
+	cpum_cf_free_root();
+out:
+	mutex_unlock(&pmc_reserve_mutex);
+}
+
+/* Allocate CPU counter data structure for a PMU. Called under mutex lock. */
+static int cpum_cf_alloc_cpu(int cpu)
+{
+	struct cpu_cf_events *cpuhw;
+	struct cpu_cf_ptr *p;
+	int rc;
+
+	mutex_lock(&pmc_reserve_mutex);
+	rc = cpum_cf_alloc_root();
+	if (rc)
+		goto unlock;
+	p = per_cpu_ptr(cpu_cf_root.cfptr, cpu);
+	cpuhw = p->cpucf;
+
+	if (!cpuhw) {
+		cpuhw = kzalloc(sizeof(*cpuhw), GFP_KERNEL);
+		if (cpuhw) {
+			p->cpucf = cpuhw;
+			refcount_set(&cpuhw->refcnt, 1);
+		} else {
+			rc = -ENOMEM;
+		}
+	} else {
+		refcount_inc(&cpuhw->refcnt);
+	}
+	if (rc) {
+		/*
+		 * Error in allocation of event, decrement anchor. Since
+		 * cpu_cf_event in not created, its destroy() function is not
+		 * invoked. Adjust the reference counter for the anchor.
+		 */
+		cpum_cf_free_root();
+	}
+unlock:
+	mutex_unlock(&pmc_reserve_mutex);
+	return rc;
+}
+
+/*
+ * Create/delete per CPU data structures for /dev/hwctr interface and events
+ * created by perf_event_open().
+ * If cpu is -1, track task on all available CPUs. This requires
+ * allocation of hardware data structures for all CPUs. This setup handles
+ * perf_event_open() with task context and /dev/hwctr interface.
+ * If cpu is non-zero install event on this CPU only. This setup handles
+ * perf_event_open() with CPU context.
+ */
+static int cpum_cf_alloc(int cpu)
+{
+	cpumask_var_t mask;
+	int rc;
+
+	if (cpu == -1) {
+		if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+			return -ENOMEM;
+		for_each_online_cpu(cpu) {
+			rc = cpum_cf_alloc_cpu(cpu);
+			if (rc) {
+				for_each_cpu(cpu, mask)
+					cpum_cf_free_cpu(cpu);
+				break;
+			}
+			cpumask_set_cpu(cpu, mask);
+		}
+		free_cpumask_var(mask);
+	} else {
+		rc = cpum_cf_alloc_cpu(cpu);
+	}
+	return rc;
+}
+
+static void cpum_cf_free(int cpu)
+{
+	if (cpu == -1) {
+		for_each_online_cpu(cpu)
+			cpum_cf_free_cpu(cpu);
+	} else {
+		cpum_cf_free_cpu(cpu);
+	}
+}
+
+#define	CF_DIAG_CTRSET_DEF		0xfeef	/* Counter set header mark */
+						/* interval in seconds */
+
+/* Counter sets are stored as data stream in a page sized memory buffer and
+ * exported to user space via raw data attached to the event sample data.
+ * Each counter set starts with an eight byte header consisting of:
+ * - a two byte eye catcher (0xfeef)
+ * - a one byte counter set number
+ * - a two byte counter set size (indicates the number of counters in this set)
+ * - a three byte reserved value (must be zero) to make the header the same
+ *   size as a counter value.
+ * All counter values are eight byte in size.
+ *
+ * All counter sets are followed by a 64 byte trailer.
+ * The trailer consists of a:
+ * - flag field indicating valid fields when corresponding bit set
+ * - the counter facility first and second version number
+ * - the CPU speed if nonzero
+ * - the time stamp the counter sets have been collected
+ * - the time of day (TOD) base value
+ * - the machine type.
+ *
+ * The counter sets are saved when the process is prepared to be executed on a
+ * CPU and saved again when the process is going to be removed from a CPU.
+ * The difference of both counter sets are calculated and stored in the event
+ * sample data area.
+ */
+struct cf_ctrset_entry {	/* CPU-M CF counter set entry (8 byte) */
+	unsigned int def:16;	/* 0-15  Data Entry Format */
+	unsigned int set:16;	/* 16-31 Counter set identifier */
+	unsigned int ctr:16;	/* 32-47 Number of stored counters */
+	unsigned int res1:16;	/* 48-63 Reserved */
+};
+
+struct cf_trailer_entry {	/* CPU-M CF_DIAG trailer (64 byte) */
+	/* 0 - 7 */
+	union {
+		struct {
+			unsigned int clock_base:1;	/* TOD clock base set */
+			unsigned int speed:1;		/* CPU speed set */
+			/* Measurement alerts */
+			unsigned int mtda:1;	/* Loss of MT ctr. data alert */
+			unsigned int caca:1;	/* Counter auth. change alert */
+			unsigned int lcda:1;	/* Loss of counter data alert */
+		};
+		unsigned long flags;	/* 0-63    All indicators */
+	};
+	/* 8 - 15 */
+	unsigned int cfvn:16;			/* 64-79   Ctr First Version */
+	unsigned int csvn:16;			/* 80-95   Ctr Second Version */
+	unsigned int cpu_speed:32;		/* 96-127  CPU speed */
+	/* 16 - 23 */
+	unsigned long timestamp;		/* 128-191 Timestamp (TOD) */
+	/* 24 - 55 */
+	union {
+		struct {
+			unsigned long progusage1;
+			unsigned long progusage2;
+			unsigned long progusage3;
+			unsigned long tod_base;
+		};
+		unsigned long progusage[4];
+	};
+	/* 56 - 63 */
+	unsigned int mach_type:16;		/* Machine type */
+	unsigned int res1:16;			/* Reserved */
+	unsigned int res2:32;			/* Reserved */
+};
+
+/* Create the trailer data at the end of a page. */
+static void cfdiag_trailer(struct cf_trailer_entry *te)
+{
+	struct cpuid cpuid;
+
+	te->cfvn = cpumf_ctr_info.cfvn;		/* Counter version numbers */
+	te->csvn = cpumf_ctr_info.csvn;
+
+	get_cpu_id(&cpuid);			/* Machine type */
+	te->mach_type = cpuid.machine;
+	te->cpu_speed = cfdiag_cpu_speed;
+	if (te->cpu_speed)
+		te->speed = 1;
+	te->clock_base = 1;			/* Save clock base */
+	te->tod_base = tod_clock_base.tod;
+	te->timestamp = get_tod_clock_fast();
+}
+
+/*
+ * The number of counters per counter set varies between machine generations,
+ * but is constant when running on a particular machine generation.
+ * Determine each counter set size at device driver initialization and
+ * retrieve it later.
+ */
+static size_t cpumf_ctr_setsizes[CPUMF_CTR_SET_MAX];
+static void cpum_cf_make_setsize(enum cpumf_ctr_set ctrset)
+{
+	size_t ctrset_size = 0;
+
+	switch (ctrset) {
+	case CPUMF_CTR_SET_BASIC:
+		if (cpumf_ctr_info.cfvn >= 1)
+			ctrset_size = 6;
+		break;
+	case CPUMF_CTR_SET_USER:
+		if (cpumf_ctr_info.cfvn == 1)
+			ctrset_size = 6;
+		else if (cpumf_ctr_info.cfvn >= 3)
+			ctrset_size = 2;
+		break;
+	case CPUMF_CTR_SET_CRYPTO:
+		if (cpumf_ctr_info.csvn >= 1 && cpumf_ctr_info.csvn <= 5)
+			ctrset_size = 16;
+		else if (cpumf_ctr_info.csvn == 6 || cpumf_ctr_info.csvn == 7)
+			ctrset_size = 20;
+		break;
+	case CPUMF_CTR_SET_EXT:
+		if (cpumf_ctr_info.csvn == 1)
+			ctrset_size = 32;
+		else if (cpumf_ctr_info.csvn == 2)
+			ctrset_size = 48;
+		else if (cpumf_ctr_info.csvn >= 3 && cpumf_ctr_info.csvn <= 5)
+			ctrset_size = 128;
+		else if (cpumf_ctr_info.csvn == 6 || cpumf_ctr_info.csvn == 7)
+			ctrset_size = 160;
+		break;
+	case CPUMF_CTR_SET_MT_DIAG:
+		if (cpumf_ctr_info.csvn > 3)
+			ctrset_size = 48;
+		break;
+	case CPUMF_CTR_SET_MAX:
+		break;
+	}
+	cpumf_ctr_setsizes[ctrset] = ctrset_size;
+}
+
+/*
+ * Return the maximum possible counter set size (in number of 8 byte counters)
+ * depending on type and model number.
+ */
+static size_t cpum_cf_read_setsize(enum cpumf_ctr_set ctrset)
+{
+	return cpumf_ctr_setsizes[ctrset];
+}
+
+/* Read a counter set. The counter set number determines the counter set and
+ * the CPUM-CF first and second version number determine the number of
+ * available counters in each counter set.
+ * Each counter set starts with header containing the counter set number and
+ * the number of eight byte counters.
+ *
+ * The functions returns the number of bytes occupied by this counter set
+ * including the header.
+ * If there is no counter in the counter set, this counter set is useless and
+ * zero is returned on this case.
+ *
+ * Note that the counter sets may not be enabled or active and the stcctm
+ * instruction might return error 3. Depending on error_ok value this is ok,
+ * for example when called from cpumf_pmu_start() call back function.
+ */
+static size_t cfdiag_getctrset(struct cf_ctrset_entry *ctrdata, int ctrset,
+			       size_t room, bool error_ok)
+{
+	size_t ctrset_size, need = 0;
+	int rc = 3;				/* Assume write failure */
+
+	ctrdata->def = CF_DIAG_CTRSET_DEF;
+	ctrdata->set = ctrset;
+	ctrdata->res1 = 0;
+	ctrset_size = cpum_cf_read_setsize(ctrset);
+
+	if (ctrset_size) {			/* Save data */
+		need = ctrset_size * sizeof(u64) + sizeof(*ctrdata);
+		if (need <= room) {
+			rc = ctr_stcctm(ctrset, ctrset_size,
+					(u64 *)(ctrdata + 1));
+		}
+		if (rc != 3 || error_ok)
+			ctrdata->ctr = ctrset_size;
+		else
+			need = 0;
+	}
+
+	return need;
+}
+
+static const u64 cpumf_ctr_ctl[CPUMF_CTR_SET_MAX] = {
+	[CPUMF_CTR_SET_BASIC]	= 0x02,
+	[CPUMF_CTR_SET_USER]	= 0x04,
+	[CPUMF_CTR_SET_CRYPTO]	= 0x08,
+	[CPUMF_CTR_SET_EXT]	= 0x01,
+	[CPUMF_CTR_SET_MT_DIAG] = 0x20,
+};
+
+/* Read out all counter sets and save them in the provided data buffer.
+ * The last 64 byte host an artificial trailer entry.
+ */
+static size_t cfdiag_getctr(void *data, size_t sz, unsigned long auth,
+			    bool error_ok)
+{
+	struct cf_trailer_entry *trailer;
+	size_t offset = 0, done;
+	int i;
+
+	memset(data, 0, sz);
+	sz -= sizeof(*trailer);		/* Always room for trailer */
+	for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
+		struct cf_ctrset_entry *ctrdata = data + offset;
+
+		if (!(auth & cpumf_ctr_ctl[i]))
+			continue;	/* Counter set not authorized */
+
+		done = cfdiag_getctrset(ctrdata, i, sz - offset, error_ok);
+		offset += done;
+	}
+	trailer = data + offset;
+	cfdiag_trailer(trailer);
+	return offset + sizeof(*trailer);
+}
+
+/* Calculate the difference for each counter in a counter set. */
+static void cfdiag_diffctrset(u64 *pstart, u64 *pstop, int counters)
+{
+	for (; --counters >= 0; ++pstart, ++pstop)
+		if (*pstop >= *pstart)
+			*pstop -= *pstart;
+		else
+			*pstop = *pstart - *pstop + 1;
+}
+
+/* Scan the counter sets and calculate the difference of each counter
+ * in each set. The result is the increment of each counter during the
+ * period the counter set has been activated.
+ *
+ * Return true on success.
+ */
+static int cfdiag_diffctr(struct cpu_cf_events *cpuhw, unsigned long auth)
+{
+	struct cf_trailer_entry *trailer_start, *trailer_stop;
+	struct cf_ctrset_entry *ctrstart, *ctrstop;
+	size_t offset = 0;
+
+	auth &= (1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1;
+	do {
+		ctrstart = (struct cf_ctrset_entry *)(cpuhw->start + offset);
+		ctrstop = (struct cf_ctrset_entry *)(cpuhw->stop + offset);
+
+		if (memcmp(ctrstop, ctrstart, sizeof(*ctrstop))) {
+			pr_err_once("cpum_cf_diag counter set compare error "
+				    "in set %i\n", ctrstart->set);
+			return 0;
+		}
+		auth &= ~cpumf_ctr_ctl[ctrstart->set];
+		if (ctrstart->def == CF_DIAG_CTRSET_DEF) {
+			cfdiag_diffctrset((u64 *)(ctrstart + 1),
+					  (u64 *)(ctrstop + 1), ctrstart->ctr);
+			offset += ctrstart->ctr * sizeof(u64) +
+							sizeof(*ctrstart);
+		}
+	} while (ctrstart->def && auth);
+
+	/* Save time_stamp from start of event in stop's trailer */
+	trailer_start = (struct cf_trailer_entry *)(cpuhw->start + offset);
+	trailer_stop = (struct cf_trailer_entry *)(cpuhw->stop + offset);
+	trailer_stop->progusage[0] = trailer_start->timestamp;
+
+	return 1;
+}
+
+static enum cpumf_ctr_set get_counter_set(u64 event)
+{
+	int set = CPUMF_CTR_SET_MAX;
+
+	if (event < 32)
+		set = CPUMF_CTR_SET_BASIC;
+	else if (event < 64)
+		set = CPUMF_CTR_SET_USER;
+	else if (event < 128)
+		set = CPUMF_CTR_SET_CRYPTO;
+	else if (event < 288)
+		set = CPUMF_CTR_SET_EXT;
+	else if (event >= 448 && event < 496)
+		set = CPUMF_CTR_SET_MT_DIAG;
+
+	return set;
+}
+
+static int validate_ctr_version(const u64 config, enum cpumf_ctr_set set)
+{
+	u16 mtdiag_ctl;
+	int err = 0;
+
+	/* check required version for counter sets */
+	switch (set) {
+	case CPUMF_CTR_SET_BASIC:
+	case CPUMF_CTR_SET_USER:
+		if (cpumf_ctr_info.cfvn < 1)
+			err = -EOPNOTSUPP;
+		break;
+	case CPUMF_CTR_SET_CRYPTO:
+		if ((cpumf_ctr_info.csvn >= 1 && cpumf_ctr_info.csvn <= 5 &&
+		     config > 79) || (cpumf_ctr_info.csvn >= 6 && config > 83))
+			err = -EOPNOTSUPP;
+		break;
+	case CPUMF_CTR_SET_EXT:
+		if (cpumf_ctr_info.csvn < 1)
+			err = -EOPNOTSUPP;
+		if ((cpumf_ctr_info.csvn == 1 && config > 159) ||
+		    (cpumf_ctr_info.csvn == 2 && config > 175) ||
+		    (cpumf_ctr_info.csvn >= 3 && cpumf_ctr_info.csvn <= 5 &&
+		     config > 255) ||
+		    (cpumf_ctr_info.csvn >= 6 && config > 287))
+			err = -EOPNOTSUPP;
+		break;
+	case CPUMF_CTR_SET_MT_DIAG:
+		if (cpumf_ctr_info.csvn <= 3)
+			err = -EOPNOTSUPP;
+		/*
+		 * MT-diagnostic counters are read-only.  The counter set
+		 * is automatically enabled and activated on all CPUs with
+		 * multithreading (SMT).  Deactivation of multithreading
+		 * also disables the counter set.  State changes are ignored
+		 * by lcctl().	Because Linux controls SMT enablement through
+		 * a kernel parameter only, the counter set is either disabled
+		 * or enabled and active.
+		 *
+		 * Thus, the counters can only be used if SMT is on and the
+		 * counter set is enabled and active.
+		 */
+		mtdiag_ctl = cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG];
+		if (!((cpumf_ctr_info.auth_ctl & mtdiag_ctl) &&
+		      (cpumf_ctr_info.enable_ctl & mtdiag_ctl) &&
+		      (cpumf_ctr_info.act_ctl & mtdiag_ctl)))
+			err = -EOPNOTSUPP;
+		break;
+	case CPUMF_CTR_SET_MAX:
+		err = -EOPNOTSUPP;
+	}
+
+	return err;
+}
+
+/*
+ * Change the CPUMF state to active.
+ * Enable and activate the CPU-counter sets according
+ * to the per-cpu control state.
+ */
+static void cpumf_pmu_enable(struct pmu *pmu)
+{
+	struct cpu_cf_events *cpuhw = this_cpu_cfhw();
+	int err;
+
+	if (!cpuhw || (cpuhw->flags & PMU_F_ENABLED))
+		return;
+
+	err = lcctl(cpuhw->state | cpuhw->dev_state);
+	if (err)
+		pr_err("Enabling the performance measuring unit failed with rc=%x\n", err);
+	else
+		cpuhw->flags |= PMU_F_ENABLED;
+}
+
+/*
+ * Change the CPUMF state to inactive.
+ * Disable and enable (inactive) the CPU-counter sets according
+ * to the per-cpu control state.
+ */
+static void cpumf_pmu_disable(struct pmu *pmu)
+{
+	struct cpu_cf_events *cpuhw = this_cpu_cfhw();
+	u64 inactive;
+	int err;
+
+	if (!cpuhw || !(cpuhw->flags & PMU_F_ENABLED))
+		return;
+
+	inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1);
+	inactive |= cpuhw->dev_state;
+	err = lcctl(inactive);
+	if (err)
+		pr_err("Disabling the performance measuring unit failed with rc=%x\n", err);
+	else
+		cpuhw->flags &= ~PMU_F_ENABLED;
+}
+
+/* Release the PMU if event is the last perf event */
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+	cpum_cf_free(event->cpu);
+}
+
+/* CPUMF <-> perf event mappings for kernel+userspace (basic set) */
+static const int cpumf_generic_events_basic[] = {
+	[PERF_COUNT_HW_CPU_CYCLES]	    = 0,
+	[PERF_COUNT_HW_INSTRUCTIONS]	    = 1,
+	[PERF_COUNT_HW_CACHE_REFERENCES]    = -1,
+	[PERF_COUNT_HW_CACHE_MISSES]	    = -1,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1,
+	[PERF_COUNT_HW_BRANCH_MISSES]	    = -1,
+	[PERF_COUNT_HW_BUS_CYCLES]	    = -1,
+};
+/* CPUMF <-> perf event mappings for userspace (problem-state set) */
+static const int cpumf_generic_events_user[] = {
+	[PERF_COUNT_HW_CPU_CYCLES]	    = 32,
+	[PERF_COUNT_HW_INSTRUCTIONS]	    = 33,
+	[PERF_COUNT_HW_CACHE_REFERENCES]    = -1,
+	[PERF_COUNT_HW_CACHE_MISSES]	    = -1,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1,
+	[PERF_COUNT_HW_BRANCH_MISSES]	    = -1,
+	[PERF_COUNT_HW_BUS_CYCLES]	    = -1,
+};
+
+static int is_userspace_event(u64 ev)
+{
+	return cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev ||
+	       cpumf_generic_events_user[PERF_COUNT_HW_INSTRUCTIONS] == ev;
+}
+
+static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
+{
+	struct perf_event_attr *attr = &event->attr;
+	struct hw_perf_event *hwc = &event->hw;
+	enum cpumf_ctr_set set;
+	u64 ev;
+
+	switch (type) {
+	case PERF_TYPE_RAW:
+		/* Raw events are used to access counters directly,
+		 * hence do not permit excludes */
+		if (attr->exclude_kernel || attr->exclude_user ||
+		    attr->exclude_hv)
+			return -EOPNOTSUPP;
+		ev = attr->config;
+		break;
+
+	case PERF_TYPE_HARDWARE:
+		if (is_sampling_event(event))	/* No sampling support */
+			return -ENOENT;
+		ev = attr->config;
+		if (!attr->exclude_user && attr->exclude_kernel) {
+			/*
+			 * Count user space (problem-state) only
+			 * Handle events 32 and 33 as 0:u and 1:u
+			 */
+			if (!is_userspace_event(ev)) {
+				if (ev >= ARRAY_SIZE(cpumf_generic_events_user))
+					return -EOPNOTSUPP;
+				ev = cpumf_generic_events_user[ev];
+			}
+		} else if (!attr->exclude_kernel && attr->exclude_user) {
+			/* No support for kernel space counters only */
+			return -EOPNOTSUPP;
+		} else {
+			/* Count user and kernel space, incl. events 32 + 33 */
+			if (!is_userspace_event(ev)) {
+				if (ev >= ARRAY_SIZE(cpumf_generic_events_basic))
+					return -EOPNOTSUPP;
+				ev = cpumf_generic_events_basic[ev];
+			}
+		}
+		break;
+
+	default:
+		return -ENOENT;
+	}
+
+	if (ev == -1)
+		return -ENOENT;
+
+	if (ev > PERF_CPUM_CF_MAX_CTR)
+		return -ENOENT;
+
+	/* Obtain the counter set to which the specified counter belongs */
+	set = get_counter_set(ev);
+	switch (set) {
+	case CPUMF_CTR_SET_BASIC:
+	case CPUMF_CTR_SET_USER:
+	case CPUMF_CTR_SET_CRYPTO:
+	case CPUMF_CTR_SET_EXT:
+	case CPUMF_CTR_SET_MT_DIAG:
+		/*
+		 * Use the hardware perf event structure to store the
+		 * counter number in the 'config' member and the counter
+		 * set number in the 'config_base' as bit mask.
+		 * It is later used to enable/disable the counter(s).
+		 */
+		hwc->config = ev;
+		hwc->config_base = cpumf_ctr_ctl[set];
+		break;
+	case CPUMF_CTR_SET_MAX:
+		/* The counter could not be associated to a counter set */
+		return -EINVAL;
+	}
+
+	/* Initialize for using the CPU-measurement counter facility */
+	if (cpum_cf_alloc(event->cpu))
+		return -ENOMEM;
+	event->destroy = hw_perf_event_destroy;
+
+	/*
+	 * Finally, validate version and authorization of the counter set.
+	 * If the particular CPU counter set is not authorized,
+	 * return with -ENOENT in order to fall back to other
+	 * PMUs that might suffice the event request.
+	 */
+	if (!(hwc->config_base & cpumf_ctr_info.auth_ctl))
+		return -ENOENT;
+	return validate_ctr_version(hwc->config, set);
+}
+
+/* Events CPU_CYLCES and INSTRUCTIONS can be submitted with two different
+ * attribute::type values:
+ * - PERF_TYPE_HARDWARE:
+ * - pmu->type:
+ * Handle both type of invocations identical. They address the same hardware.
+ * The result is different when event modifiers exclude_kernel and/or
+ * exclude_user are also set.
+ */
+static int cpumf_pmu_event_type(struct perf_event *event)
+{
+	u64 ev = event->attr.config;
+
+	if (cpumf_generic_events_basic[PERF_COUNT_HW_CPU_CYCLES] == ev ||
+	    cpumf_generic_events_basic[PERF_COUNT_HW_INSTRUCTIONS] == ev ||
+	    cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev ||
+	    cpumf_generic_events_user[PERF_COUNT_HW_INSTRUCTIONS] == ev)
+		return PERF_TYPE_HARDWARE;
+	return PERF_TYPE_RAW;
+}
+
+static int cpumf_pmu_event_init(struct perf_event *event)
+{
+	unsigned int type = event->attr.type;
+	int err;
+
+	if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW)
+		err = __hw_perf_event_init(event, type);
+	else if (event->pmu->type == type)
+		/* Registered as unknown PMU */
+		err = __hw_perf_event_init(event, cpumf_pmu_event_type(event));
+	else
+		return -ENOENT;
+
+	if (unlikely(err) && event->destroy)
+		event->destroy(event);
+
+	return err;
+}
+
+static int hw_perf_event_reset(struct perf_event *event)
+{
+	u64 prev, new;
+	int err;
+
+	do {
+		prev = local64_read(&event->hw.prev_count);
+		err = ecctr(event->hw.config, &new);
+		if (err) {
+			if (err != 3)
+				break;
+			/* The counter is not (yet) available. This
+			 * might happen if the counter set to which
+			 * this counter belongs is in the disabled
+			 * state.
+			 */
+			new = 0;
+		}
+	} while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
+
+	return err;
+}
+
+static void hw_perf_event_update(struct perf_event *event)
+{
+	u64 prev, new, delta;
+	int err;
+
+	do {
+		prev = local64_read(&event->hw.prev_count);
+		err = ecctr(event->hw.config, &new);
+		if (err)
+			return;
+	} while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
+
+	delta = (prev <= new) ? new - prev
+			      : (-1ULL - prev) + new + 1;	 /* overflow */
+	local64_add(delta, &event->count);
+}
+
+static void cpumf_pmu_read(struct perf_event *event)
+{
+	if (event->hw.state & PERF_HES_STOPPED)
+		return;
+
+	hw_perf_event_update(event);
+}
+
+static void cpumf_pmu_start(struct perf_event *event, int flags)
+{
+	struct cpu_cf_events *cpuhw = this_cpu_cfhw();
+	struct hw_perf_event *hwc = &event->hw;
+	int i;
+
+	if (!(hwc->state & PERF_HES_STOPPED))
+		return;
+
+	hwc->state = 0;
+
+	/* (Re-)enable and activate the counter set */
+	ctr_set_enable(&cpuhw->state, hwc->config_base);
+	ctr_set_start(&cpuhw->state, hwc->config_base);
+
+	/* The counter set to which this counter belongs can be already active.
+	 * Because all counters in a set are active, the event->hw.prev_count
+	 * needs to be synchronized.  At this point, the counter set can be in
+	 * the inactive or disabled state.
+	 */
+	if (hwc->config == PERF_EVENT_CPUM_CF_DIAG) {
+		cpuhw->usedss = cfdiag_getctr(cpuhw->start,
+					      sizeof(cpuhw->start),
+					      hwc->config_base, true);
+	} else {
+		hw_perf_event_reset(event);
+	}
+
+	/* Increment refcount for counter sets */
+	for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i)
+		if ((hwc->config_base & cpumf_ctr_ctl[i]))
+			atomic_inc(&cpuhw->ctr_set[i]);
+}
+
+/* Create perf event sample with the counter sets as raw data.	The sample
+ * is then pushed to the event subsystem and the function checks for
+ * possible event overflows. If an event overflow occurs, the PMU is
+ * stopped.
+ *
+ * Return non-zero if an event overflow occurred.
+ */
+static int cfdiag_push_sample(struct perf_event *event,
+			      struct cpu_cf_events *cpuhw)
+{
+	struct perf_sample_data data;
+	struct perf_raw_record raw;
+	struct pt_regs regs;
+	int overflow;
+
+	/* Setup perf sample */
+	perf_sample_data_init(&data, 0, event->hw.last_period);
+	memset(&regs, 0, sizeof(regs));
+	memset(&raw, 0, sizeof(raw));
+
+	if (event->attr.sample_type & PERF_SAMPLE_CPU)
+		data.cpu_entry.cpu = event->cpu;
+	if (event->attr.sample_type & PERF_SAMPLE_RAW) {
+		raw.frag.size = cpuhw->usedss;
+		raw.frag.data = cpuhw->stop;
+		perf_sample_save_raw_data(&data, &raw);
+	}
+
+	overflow = perf_event_overflow(event, &data, &regs);
+	if (overflow)
+		event->pmu->stop(event, 0);
+
+	perf_event_update_userpage(event);
+	return overflow;
+}
+
+static void cpumf_pmu_stop(struct perf_event *event, int flags)
+{
+	struct cpu_cf_events *cpuhw = this_cpu_cfhw();
+	struct hw_perf_event *hwc = &event->hw;
+	int i;
+
+	if (!(hwc->state & PERF_HES_STOPPED)) {
+		/* Decrement reference count for this counter set and if this
+		 * is the last used counter in the set, clear activation
+		 * control and set the counter set state to inactive.
+		 */
+		for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
+			if (!(hwc->config_base & cpumf_ctr_ctl[i]))
+				continue;
+			if (!atomic_dec_return(&cpuhw->ctr_set[i]))
+				ctr_set_stop(&cpuhw->state, cpumf_ctr_ctl[i]);
+		}
+		hwc->state |= PERF_HES_STOPPED;
+	}
+
+	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+		if (hwc->config == PERF_EVENT_CPUM_CF_DIAG) {
+			local64_inc(&event->count);
+			cpuhw->usedss = cfdiag_getctr(cpuhw->stop,
+						      sizeof(cpuhw->stop),
+						      event->hw.config_base,
+						      false);
+			if (cfdiag_diffctr(cpuhw, event->hw.config_base))
+				cfdiag_push_sample(event, cpuhw);
+		} else {
+			hw_perf_event_update(event);
+		}
+		hwc->state |= PERF_HES_UPTODATE;
+	}
+}
+
+static int cpumf_pmu_add(struct perf_event *event, int flags)
+{
+	struct cpu_cf_events *cpuhw = this_cpu_cfhw();
+
+	ctr_set_enable(&cpuhw->state, event->hw.config_base);
+	event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+	if (flags & PERF_EF_START)
+		cpumf_pmu_start(event, PERF_EF_RELOAD);
+
+	return 0;
+}
+
+static void cpumf_pmu_del(struct perf_event *event, int flags)
+{
+	struct cpu_cf_events *cpuhw = this_cpu_cfhw();
+	int i;
+
+	cpumf_pmu_stop(event, PERF_EF_UPDATE);
+
+	/* Check if any counter in the counter set is still used.  If not used,
+	 * change the counter set to the disabled state.  This also clears the
+	 * content of all counters in the set.
+	 *
+	 * When a new perf event has been added but not yet started, this can
+	 * clear enable control and resets all counters in a set.  Therefore,
+	 * cpumf_pmu_start() always has to reenable a counter set.
+	 */
+	for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i)
+		if (!atomic_read(&cpuhw->ctr_set[i]))
+			ctr_set_disable(&cpuhw->state, cpumf_ctr_ctl[i]);
+}
+
+/* Performance monitoring unit for s390x */
+static struct pmu cpumf_pmu = {
+	.task_ctx_nr  = perf_sw_context,
+	.capabilities = PERF_PMU_CAP_NO_INTERRUPT,
+	.pmu_enable   = cpumf_pmu_enable,
+	.pmu_disable  = cpumf_pmu_disable,
+	.event_init   = cpumf_pmu_event_init,
+	.add	      = cpumf_pmu_add,
+	.del	      = cpumf_pmu_del,
+	.start	      = cpumf_pmu_start,
+	.stop	      = cpumf_pmu_stop,
+	.read	      = cpumf_pmu_read,
+};
+
+static struct cfset_session {		/* CPUs and counter set bit mask */
+	struct list_head head;		/* Head of list of active processes */
+} cfset_session = {
+	.head = LIST_HEAD_INIT(cfset_session.head)
+};
+
+static refcount_t cfset_opencnt = REFCOUNT_INIT(0);	/* Access count */
+/*
+ * Synchronize access to device /dev/hwc. This mutex protects against
+ * concurrent access to functions cfset_open() and cfset_release().
+ * Same for CPU hotplug add and remove events triggering
+ * cpum_cf_online_cpu() and cpum_cf_offline_cpu().
+ * It also serializes concurrent device ioctl access from multiple
+ * processes accessing /dev/hwc.
+ *
+ * The mutex protects concurrent access to the /dev/hwctr session management
+ * struct cfset_session and reference counting variable cfset_opencnt.
+ */
+static DEFINE_MUTEX(cfset_ctrset_mutex);
+
+/*
+ * CPU hotplug handles only /dev/hwctr device.
+ * For perf_event_open() the CPU hotplug handling is done on kernel common
+ * code:
+ * - CPU add: Nothing is done since a file descriptor can not be created
+ *   and returned to the user.
+ * - CPU delete: Handled by common code via pmu_disable(), pmu_stop() and
+ *   pmu_delete(). The event itself is removed when the file descriptor is
+ *   closed.
+ */
+static int cfset_online_cpu(unsigned int cpu);
+
+static int cpum_cf_online_cpu(unsigned int cpu)
+{
+	int rc = 0;
+
+	/*
+	 * Ignore notification for perf_event_open().
+	 * Handle only /dev/hwctr device sessions.
+	 */
+	mutex_lock(&cfset_ctrset_mutex);
+	if (refcount_read(&cfset_opencnt)) {
+		rc = cpum_cf_alloc_cpu(cpu);
+		if (!rc)
+			cfset_online_cpu(cpu);
+	}
+	mutex_unlock(&cfset_ctrset_mutex);
+	return rc;
+}
+
+static int cfset_offline_cpu(unsigned int cpu);
+
+static int cpum_cf_offline_cpu(unsigned int cpu)
+{
+	/*
+	 * During task exit processing of grouped perf events triggered by CPU
+	 * hotplug processing, pmu_disable() is called as part of perf context
+	 * removal process. Therefore do not trigger event removal now for
+	 * perf_event_open() created events. Perf common code triggers event
+	 * destruction when the event file descriptor is closed.
+	 *
+	 * Handle only /dev/hwctr device sessions.
+	 */
+	mutex_lock(&cfset_ctrset_mutex);
+	if (refcount_read(&cfset_opencnt)) {
+		cfset_offline_cpu(cpu);
+		cpum_cf_free_cpu(cpu);
+	}
+	mutex_unlock(&cfset_ctrset_mutex);
+	return 0;
+}
+
+/* Return true if store counter set multiple instruction is available */
+static inline int stccm_avail(void)
+{
+	return test_facility(142);
+}
+
+/* CPU-measurement alerts for the counter facility */
+static void cpumf_measurement_alert(struct ext_code ext_code,
+				    unsigned int alert, unsigned long unused)
+{
+	struct cpu_cf_events *cpuhw;
+
+	if (!(alert & CPU_MF_INT_CF_MASK))
+		return;
+
+	inc_irq_stat(IRQEXT_CMC);
+
+	/*
+	 * Measurement alerts are shared and might happen when the PMU
+	 * is not reserved.  Ignore these alerts in this case.
+	 */
+	cpuhw = this_cpu_cfhw();
+	if (!cpuhw)
+		return;
+
+	/* counter authorization change alert */
+	if (alert & CPU_MF_INT_CF_CACA)
+		qctri(&cpumf_ctr_info);
+
+	/* loss of counter data alert */
+	if (alert & CPU_MF_INT_CF_LCDA)
+		pr_err("CPU[%i] Counter data was lost\n", smp_processor_id());
+
+	/* loss of MT counter data alert */
+	if (alert & CPU_MF_INT_CF_MTDA)
+		pr_warn("CPU[%i] MT counter data was lost\n",
+			smp_processor_id());
+}
+
+static int cfset_init(void);
+static int __init cpumf_pmu_init(void)
+{
+	int rc;
+
+	/* Extract counter measurement facility information */
+	if (!cpum_cf_avail() || qctri(&cpumf_ctr_info))
+		return -ENODEV;
+
+	/* Determine and store counter set sizes for later reference */
+	for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc)
+		cpum_cf_make_setsize(rc);
+
+	/*
+	 * Clear bit 15 of cr0 to unauthorize problem-state to
+	 * extract measurement counters
+	 */
+	ctl_clear_bit(0, 48);
+
+	/* register handler for measurement-alert interruptions */
+	rc = register_external_irq(EXT_IRQ_MEASURE_ALERT,
+				   cpumf_measurement_alert);
+	if (rc) {
+		pr_err("Registering for CPU-measurement alerts failed with rc=%i\n", rc);
+		return rc;
+	}
+
+	/* Setup s390dbf facility */
+	cf_dbg = debug_register(KMSG_COMPONENT, 2, 1, 128);
+	if (!cf_dbg) {
+		pr_err("Registration of s390dbf(cpum_cf) failed\n");
+		rc = -ENOMEM;
+		goto out1;
+	}
+	debug_register_view(cf_dbg, &debug_sprintf_view);
+
+	cpumf_pmu.attr_groups = cpumf_cf_event_group();
+	rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", -1);
+	if (rc) {
+		pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc);
+		goto out2;
+	} else if (stccm_avail()) {	/* Setup counter set device */
+		cfset_init();
+	}
+
+	rc = cpuhp_setup_state(CPUHP_AP_PERF_S390_CF_ONLINE,
+			       "perf/s390/cf:online",
+			       cpum_cf_online_cpu, cpum_cf_offline_cpu);
+	return rc;
+
+out2:
+	debug_unregister_view(cf_dbg, &debug_sprintf_view);
+	debug_unregister(cf_dbg);
+out1:
+	unregister_external_irq(EXT_IRQ_MEASURE_ALERT, cpumf_measurement_alert);
+	return rc;
+}
+
+/* Support for the CPU Measurement Facility counter set extraction using
+ * device /dev/hwctr. This allows user space programs to extract complete
+ * counter set via normal file operations.
+ */
+
+struct cfset_call_on_cpu_parm {		/* Parm struct for smp_call_on_cpu */
+	unsigned int sets;		/* Counter set bit mask */
+	atomic_t cpus_ack;		/* # CPUs successfully executed func */
+};
+
+struct cfset_request {			/* CPUs and counter set bit mask */
+	unsigned long ctrset;		/* Bit mask of counter set to read */
+	cpumask_t mask;			/* CPU mask to read from */
+	struct list_head node;		/* Chain to cfset_session.head */
+};
+
+static void cfset_session_init(void)
+{
+	INIT_LIST_HEAD(&cfset_session.head);
+}
+
+/* Remove current request from global bookkeeping. Maintain a counter set bit
+ * mask on a per CPU basis.
+ * Done in process context under mutex protection.
+ */
+static void cfset_session_del(struct cfset_request *p)
+{
+	list_del(&p->node);
+}
+
+/* Add current request to global bookkeeping. Maintain a counter set bit mask
+ * on a per CPU basis.
+ * Done in process context under mutex protection.
+ */
+static void cfset_session_add(struct cfset_request *p)
+{
+	list_add(&p->node, &cfset_session.head);
+}
+
+/* The /dev/hwctr device access uses PMU_F_IN_USE to mark the device access
+ * path is currently used.
+ * The cpu_cf_events::dev_state is used to denote counter sets in use by this
+ * interface. It is always or'ed in. If this interface is not active, its
+ * value is zero and no additional counter sets will be included.
+ *
+ * The cpu_cf_events::state is used by the perf_event_open SVC and remains
+ * unchanged.
+ *
+ * perf_pmu_enable() and perf_pmu_enable() and its call backs
+ * cpumf_pmu_enable() and  cpumf_pmu_disable() are called by the
+ * performance measurement subsystem to enable per process
+ * CPU Measurement counter facility.
+ * The XXX_enable() and XXX_disable functions are used to turn off
+ * x86 performance monitoring interrupt (PMI) during scheduling.
+ * s390 uses these calls to temporarily stop and resume the active CPU
+ * counters sets during scheduling.
+ *
+ * We do allow concurrent access of perf_event_open() SVC and /dev/hwctr
+ * device access.  The perf_event_open() SVC interface makes a lot of effort
+ * to only run the counters while the calling process is actively scheduled
+ * to run.
+ * When /dev/hwctr interface is also used at the same time, the counter sets
+ * will keep running, even when the process is scheduled off a CPU.
+ * However this is not a problem and does not lead to wrong counter values
+ * for the perf_event_open() SVC. The current counter value will be recorded
+ * during schedule-in. At schedule-out time the current counter value is
+ * extracted again and the delta is calculated and added to the event.
+ */
+/* Stop all counter sets via ioctl interface */
+static void cfset_ioctl_off(void *parm)
+{
+	struct cpu_cf_events *cpuhw = this_cpu_cfhw();
+	struct cfset_call_on_cpu_parm *p = parm;
+	int rc;
+
+	/* Check if any counter set used by /dev/hwctr */
+	for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc)
+		if ((p->sets & cpumf_ctr_ctl[rc])) {
+			if (!atomic_dec_return(&cpuhw->ctr_set[rc])) {
+				ctr_set_disable(&cpuhw->dev_state,
+						cpumf_ctr_ctl[rc]);
+				ctr_set_stop(&cpuhw->dev_state,
+					     cpumf_ctr_ctl[rc]);
+			}
+		}
+	/* Keep perf_event_open counter sets */
+	rc = lcctl(cpuhw->dev_state | cpuhw->state);
+	if (rc)
+		pr_err("Counter set stop %#llx of /dev/%s failed rc=%i\n",
+		       cpuhw->state, S390_HWCTR_DEVICE, rc);
+	if (!cpuhw->dev_state)
+		cpuhw->flags &= ~PMU_F_IN_USE;
+}
+
+/* Start counter sets on particular CPU */
+static void cfset_ioctl_on(void *parm)
+{
+	struct cpu_cf_events *cpuhw = this_cpu_cfhw();
+	struct cfset_call_on_cpu_parm *p = parm;
+	int rc;
+
+	cpuhw->flags |= PMU_F_IN_USE;
+	ctr_set_enable(&cpuhw->dev_state, p->sets);
+	ctr_set_start(&cpuhw->dev_state, p->sets);
+	for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc)
+		if ((p->sets & cpumf_ctr_ctl[rc]))
+			atomic_inc(&cpuhw->ctr_set[rc]);
+	rc = lcctl(cpuhw->dev_state | cpuhw->state);	/* Start counter sets */
+	if (!rc)
+		atomic_inc(&p->cpus_ack);
+	else
+		pr_err("Counter set start %#llx of /dev/%s failed rc=%i\n",
+		       cpuhw->dev_state | cpuhw->state, S390_HWCTR_DEVICE, rc);
+}
+
+static void cfset_release_cpu(void *p)
+{
+	struct cpu_cf_events *cpuhw = this_cpu_cfhw();
+	int rc;
+
+	cpuhw->dev_state = 0;
+	rc = lcctl(cpuhw->state);	/* Keep perf_event_open counter sets */
+	if (rc)
+		pr_err("Counter set release %#llx of /dev/%s failed rc=%i\n",
+		       cpuhw->state, S390_HWCTR_DEVICE, rc);
+}
+
+/* This modifies the process CPU mask to adopt it to the currently online
+ * CPUs. Offline CPUs can not be addresses. This call terminates the access
+ * and is usually followed by close() or a new iotcl(..., START, ...) which
+ * creates a new request structure.
+ */
+static void cfset_all_stop(struct cfset_request *req)
+{
+	struct cfset_call_on_cpu_parm p = {
+		.sets = req->ctrset,
+	};
+
+	cpumask_and(&req->mask, &req->mask, cpu_online_mask);
+	on_each_cpu_mask(&req->mask, cfset_ioctl_off, &p, 1);
+}
+
+/* Release function is also called when application gets terminated without
+ * doing a proper ioctl(..., S390_HWCTR_STOP, ...) command.
+ */
+static int cfset_release(struct inode *inode, struct file *file)
+{
+	mutex_lock(&cfset_ctrset_mutex);
+	/* Open followed by close/exit has no private_data */
+	if (file->private_data) {
+		cfset_all_stop(file->private_data);
+		cfset_session_del(file->private_data);
+		kfree(file->private_data);
+		file->private_data = NULL;
+	}
+	if (refcount_dec_and_test(&cfset_opencnt)) {	/* Last close */
+		on_each_cpu(cfset_release_cpu, NULL, 1);
+		cpum_cf_free(-1);
+	}
+	mutex_unlock(&cfset_ctrset_mutex);
+	return 0;
+}
+
+/*
+ * Open via /dev/hwctr device. Allocate all per CPU resources on the first
+ * open of the device. The last close releases all per CPU resources.
+ * Parallel perf_event_open system calls also use per CPU resources.
+ * These invocations are handled via reference counting on the per CPU data
+ * structures.
+ */
+static int cfset_open(struct inode *inode, struct file *file)
+{
+	int rc = 0;
+
+	if (!perfmon_capable())
+		return -EPERM;
+	file->private_data = NULL;
+
+	mutex_lock(&cfset_ctrset_mutex);
+	if (!refcount_inc_not_zero(&cfset_opencnt)) {	/* First open */
+		rc = cpum_cf_alloc(-1);
+		if (!rc) {
+			cfset_session_init();
+			refcount_set(&cfset_opencnt, 1);
+		}
+	}
+	mutex_unlock(&cfset_ctrset_mutex);
+
+	/* nonseekable_open() never fails */
+	return rc ?: nonseekable_open(inode, file);
+}
+
+static int cfset_all_start(struct cfset_request *req)
+{
+	struct cfset_call_on_cpu_parm p = {
+		.sets = req->ctrset,
+		.cpus_ack = ATOMIC_INIT(0),
+	};
+	cpumask_var_t mask;
+	int rc = 0;
+
+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
+	cpumask_and(mask, &req->mask, cpu_online_mask);
+	on_each_cpu_mask(mask, cfset_ioctl_on, &p, 1);
+	if (atomic_read(&p.cpus_ack) != cpumask_weight(mask)) {
+		on_each_cpu_mask(mask, cfset_ioctl_off, &p, 1);
+		rc = -EIO;
+	}
+	free_cpumask_var(mask);
+	return rc;
+}
+
+/* Return the maximum required space for all possible CPUs in case one
+ * CPU will be onlined during the START, READ, STOP cycles.
+ * To find out the size of the counter sets, any one CPU will do. They
+ * all have the same counter sets.
+ */
+static size_t cfset_needspace(unsigned int sets)
+{
+	size_t bytes = 0;
+	int i;
+
+	for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
+		if (!(sets & cpumf_ctr_ctl[i]))
+			continue;
+		bytes += cpum_cf_read_setsize(i) * sizeof(u64) +
+			 sizeof(((struct s390_ctrset_setdata *)0)->set) +
+			 sizeof(((struct s390_ctrset_setdata *)0)->no_cnts);
+	}
+	bytes = sizeof(((struct s390_ctrset_read *)0)->no_cpus) + nr_cpu_ids *
+		(bytes + sizeof(((struct s390_ctrset_cpudata *)0)->cpu_nr) +
+		     sizeof(((struct s390_ctrset_cpudata *)0)->no_sets));
+	return bytes;
+}
+
+static int cfset_all_copy(unsigned long arg, cpumask_t *mask)
+{
+	struct s390_ctrset_read __user *ctrset_read;
+	unsigned int cpu, cpus, rc = 0;
+	void __user *uptr;
+
+	ctrset_read = (struct s390_ctrset_read __user *)arg;
+	uptr = ctrset_read->data;
+	for_each_cpu(cpu, mask) {
+		struct cpu_cf_events *cpuhw = get_cpu_cfhw(cpu);
+		struct s390_ctrset_cpudata __user *ctrset_cpudata;
+
+		ctrset_cpudata = uptr;
+		rc  = put_user(cpu, &ctrset_cpudata->cpu_nr);
+		rc |= put_user(cpuhw->sets, &ctrset_cpudata->no_sets);
+		rc |= copy_to_user(ctrset_cpudata->data, cpuhw->data,
+				   cpuhw->used);
+		if (rc) {
+			rc = -EFAULT;
+			goto out;
+		}
+		uptr += sizeof(struct s390_ctrset_cpudata) + cpuhw->used;
+		cond_resched();
+	}
+	cpus = cpumask_weight(mask);
+	if (put_user(cpus, &ctrset_read->no_cpus))
+		rc = -EFAULT;
+out:
+	return rc;
+}
+
+static size_t cfset_cpuset_read(struct s390_ctrset_setdata *p, int ctrset,
+				int ctrset_size, size_t room)
+{
+	size_t need = 0;
+	int rc = -1;
+
+	need = sizeof(*p) + sizeof(u64) * ctrset_size;
+	if (need <= room) {
+		p->set = cpumf_ctr_ctl[ctrset];
+		p->no_cnts = ctrset_size;
+		rc = ctr_stcctm(ctrset, ctrset_size, (u64 *)p->cv);
+		if (rc == 3)		/* Nothing stored */
+			need = 0;
+	}
+	return need;
+}
+
+/* Read all counter sets. */
+static void cfset_cpu_read(void *parm)
+{
+	struct cpu_cf_events *cpuhw = this_cpu_cfhw();
+	struct cfset_call_on_cpu_parm *p = parm;
+	int set, set_size;
+	size_t space;
+
+	/* No data saved yet */
+	cpuhw->used = 0;
+	cpuhw->sets = 0;
+	memset(cpuhw->data, 0, sizeof(cpuhw->data));
+
+	/* Scan the counter sets */
+	for (set = CPUMF_CTR_SET_BASIC; set < CPUMF_CTR_SET_MAX; ++set) {
+		struct s390_ctrset_setdata *sp = (void *)cpuhw->data +
+						 cpuhw->used;
+
+		if (!(p->sets & cpumf_ctr_ctl[set]))
+			continue;	/* Counter set not in list */
+		set_size = cpum_cf_read_setsize(set);
+		space = sizeof(cpuhw->data) - cpuhw->used;
+		space = cfset_cpuset_read(sp, set, set_size, space);
+		if (space) {
+			cpuhw->used += space;
+			cpuhw->sets += 1;
+		}
+	}
+}
+
+static int cfset_all_read(unsigned long arg, struct cfset_request *req)
+{
+	struct cfset_call_on_cpu_parm p;
+	cpumask_var_t mask;
+	int rc;
+
+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	p.sets = req->ctrset;
+	cpumask_and(mask, &req->mask, cpu_online_mask);
+	on_each_cpu_mask(mask, cfset_cpu_read, &p, 1);
+	rc = cfset_all_copy(arg, mask);
+	free_cpumask_var(mask);
+	return rc;
+}
+
+static long cfset_ioctl_read(unsigned long arg, struct cfset_request *req)
+{
+	int ret = -ENODATA;
+
+	if (req && req->ctrset)
+		ret = cfset_all_read(arg, req);
+	return ret;
+}
+
+static long cfset_ioctl_stop(struct file *file)
+{
+	struct cfset_request *req = file->private_data;
+	int ret = -ENXIO;
+
+	if (req) {
+		cfset_all_stop(req);
+		cfset_session_del(req);
+		kfree(req);
+		file->private_data = NULL;
+		ret = 0;
+	}
+	return ret;
+}
+
+static long cfset_ioctl_start(unsigned long arg, struct file *file)
+{
+	struct s390_ctrset_start __user *ustart;
+	struct s390_ctrset_start start;
+	struct cfset_request *preq;
+	void __user *umask;
+	unsigned int len;
+	int ret = 0;
+	size_t need;
+
+	if (file->private_data)
+		return -EBUSY;
+	ustart = (struct s390_ctrset_start __user *)arg;
+	if (copy_from_user(&start, ustart, sizeof(start)))
+		return -EFAULT;
+	if (start.version != S390_HWCTR_START_VERSION)
+		return -EINVAL;
+	if (start.counter_sets & ~(cpumf_ctr_ctl[CPUMF_CTR_SET_BASIC] |
+				   cpumf_ctr_ctl[CPUMF_CTR_SET_USER] |
+				   cpumf_ctr_ctl[CPUMF_CTR_SET_CRYPTO] |
+				   cpumf_ctr_ctl[CPUMF_CTR_SET_EXT] |
+				   cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG]))
+		return -EINVAL;		/* Invalid counter set */
+	if (!start.counter_sets)
+		return -EINVAL;		/* No counter set at all? */
+
+	preq = kzalloc(sizeof(*preq), GFP_KERNEL);
+	if (!preq)
+		return -ENOMEM;
+	cpumask_clear(&preq->mask);
+	len = min_t(u64, start.cpumask_len, cpumask_size());
+	umask = (void __user *)start.cpumask;
+	if (copy_from_user(&preq->mask, umask, len)) {
+		kfree(preq);
+		return -EFAULT;
+	}
+	if (cpumask_empty(&preq->mask)) {
+		kfree(preq);
+		return -EINVAL;
+	}
+	need = cfset_needspace(start.counter_sets);
+	if (put_user(need, &ustart->data_bytes)) {
+		kfree(preq);
+		return -EFAULT;
+	}
+	preq->ctrset = start.counter_sets;
+	ret = cfset_all_start(preq);
+	if (!ret) {
+		cfset_session_add(preq);
+		file->private_data = preq;
+	} else {
+		kfree(preq);
+	}
+	return ret;
+}
+
+/* Entry point to the /dev/hwctr device interface.
+ * The ioctl system call supports three subcommands:
+ * S390_HWCTR_START: Start the specified counter sets on a CPU list. The
+ *    counter set keeps running until explicitly stopped. Returns the number
+ *    of bytes needed to store the counter values. If another S390_HWCTR_START
+ *    ioctl subcommand is called without a previous S390_HWCTR_STOP stop
+ *    command on the same file descriptor, -EBUSY is returned.
+ * S390_HWCTR_READ: Read the counter set values from specified CPU list given
+ *    with the S390_HWCTR_START command.
+ * S390_HWCTR_STOP: Stops the counter sets on the CPU list given with the
+ *    previous S390_HWCTR_START subcommand.
+ */
+static long cfset_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	int ret;
+
+	cpus_read_lock();
+	mutex_lock(&cfset_ctrset_mutex);
+	switch (cmd) {
+	case S390_HWCTR_START:
+		ret = cfset_ioctl_start(arg, file);
+		break;
+	case S390_HWCTR_STOP:
+		ret = cfset_ioctl_stop(file);
+		break;
+	case S390_HWCTR_READ:
+		ret = cfset_ioctl_read(arg, file->private_data);
+		break;
+	default:
+		ret = -ENOTTY;
+		break;
+	}
+	mutex_unlock(&cfset_ctrset_mutex);
+	cpus_read_unlock();
+	return ret;
+}
+
+static const struct file_operations cfset_fops = {
+	.owner = THIS_MODULE,
+	.open = cfset_open,
+	.release = cfset_release,
+	.unlocked_ioctl	= cfset_ioctl,
+	.compat_ioctl = cfset_ioctl,
+	.llseek = no_llseek
+};
+
+static struct miscdevice cfset_dev = {
+	.name	= S390_HWCTR_DEVICE,
+	.minor	= MISC_DYNAMIC_MINOR,
+	.fops	= &cfset_fops,
+	.mode	= 0666,
+};
+
+/* Hotplug add of a CPU. Scan through all active processes and add
+ * that CPU to the list of CPUs supplied with ioctl(..., START, ...).
+ */
+static int cfset_online_cpu(unsigned int cpu)
+{
+	struct cfset_call_on_cpu_parm p;
+	struct cfset_request *rp;
+
+	if (!list_empty(&cfset_session.head)) {
+		list_for_each_entry(rp, &cfset_session.head, node) {
+			p.sets = rp->ctrset;
+			cfset_ioctl_on(&p);
+			cpumask_set_cpu(cpu, &rp->mask);
+		}
+	}
+	return 0;
+}
+
+/* Hotplug remove of a CPU. Scan through all active processes and clear
+ * that CPU from the list of CPUs supplied with ioctl(..., START, ...).
+ * Adjust reference counts.
+ */
+static int cfset_offline_cpu(unsigned int cpu)
+{
+	struct cfset_call_on_cpu_parm p;
+	struct cfset_request *rp;
+
+	if (!list_empty(&cfset_session.head)) {
+		list_for_each_entry(rp, &cfset_session.head, node) {
+			p.sets = rp->ctrset;
+			cfset_ioctl_off(&p);
+			cpumask_clear_cpu(cpu, &rp->mask);
+		}
+	}
+	return 0;
+}
+
+static void cfdiag_read(struct perf_event *event)
+{
+}
+
+static int get_authctrsets(void)
+{
+	unsigned long auth = 0;
+	enum cpumf_ctr_set i;
+
+	for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
+		if (cpumf_ctr_info.auth_ctl & cpumf_ctr_ctl[i])
+			auth |= cpumf_ctr_ctl[i];
+	}
+	return auth;
+}
+
+/* Setup the event. Test for authorized counter sets and only include counter
+ * sets which are authorized at the time of the setup. Including unauthorized
+ * counter sets result in specification exception (and panic).
+ */
+static int cfdiag_event_init2(struct perf_event *event)
+{
+	struct perf_event_attr *attr = &event->attr;
+	int err = 0;
+
+	/* Set sample_period to indicate sampling */
+	event->hw.config = attr->config;
+	event->hw.sample_period = attr->sample_period;
+	local64_set(&event->hw.period_left, event->hw.sample_period);
+	local64_set(&event->count, 0);
+	event->hw.last_period = event->hw.sample_period;
+
+	/* Add all authorized counter sets to config_base. The
+	 * the hardware init function is either called per-cpu or just once
+	 * for all CPUS (event->cpu == -1).  This depends on the whether
+	 * counting is started for all CPUs or on a per workload base where
+	 * the perf event moves from one CPU to another CPU.
+	 * Checking the authorization on any CPU is fine as the hardware
+	 * applies the same authorization settings to all CPUs.
+	 */
+	event->hw.config_base = get_authctrsets();
+
+	/* No authorized counter sets, nothing to count/sample */
+	if (!event->hw.config_base)
+		err = -EINVAL;
+
+	return err;
+}
+
+static int cfdiag_event_init(struct perf_event *event)
+{
+	struct perf_event_attr *attr = &event->attr;
+	int err = -ENOENT;
+
+	if (event->attr.config != PERF_EVENT_CPUM_CF_DIAG ||
+	    event->attr.type != event->pmu->type)
+		goto out;
+
+	/* Raw events are used to access counters directly,
+	 * hence do not permit excludes.
+	 * This event is useless without PERF_SAMPLE_RAW to return counter set
+	 * values as raw data.
+	 */
+	if (attr->exclude_kernel || attr->exclude_user || attr->exclude_hv ||
+	    !(attr->sample_type & (PERF_SAMPLE_CPU | PERF_SAMPLE_RAW))) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	/* Initialize for using the CPU-measurement counter facility */
+	if (cpum_cf_alloc(event->cpu))
+		return -ENOMEM;
+	event->destroy = hw_perf_event_destroy;
+
+	err = cfdiag_event_init2(event);
+	if (unlikely(err))
+		event->destroy(event);
+out:
+	return err;
+}
+
+/* Create cf_diag/events/CF_DIAG event sysfs file. This counter is used
+ * to collect the complete counter sets for a scheduled process. Target
+ * are complete counter sets attached as raw data to the artificial event.
+ * This results in complete counter sets available when a process is
+ * scheduled. Contains the delta of every counter while the process was
+ * running.
+ */
+CPUMF_EVENT_ATTR(CF_DIAG, CF_DIAG, PERF_EVENT_CPUM_CF_DIAG);
+
+static struct attribute *cfdiag_events_attr[] = {
+	CPUMF_EVENT_PTR(CF_DIAG, CF_DIAG),
+	NULL,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *cfdiag_format_attr[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static struct attribute_group cfdiag_events_group = {
+	.name = "events",
+	.attrs = cfdiag_events_attr,
+};
+static struct attribute_group cfdiag_format_group = {
+	.name = "format",
+	.attrs = cfdiag_format_attr,
+};
+static const struct attribute_group *cfdiag_attr_groups[] = {
+	&cfdiag_events_group,
+	&cfdiag_format_group,
+	NULL,
+};
+
+/* Performance monitoring unit for event CF_DIAG. Since this event
+ * is also started and stopped via the perf_event_open() system call, use
+ * the same event enable/disable call back functions. They do not
+ * have a pointer to the perf_event strcture as first parameter.
+ *
+ * The functions XXX_add, XXX_del, XXX_start and XXX_stop are also common.
+ * Reuse them and distinguish the event (always first parameter) via
+ * 'config' member.
+ */
+static struct pmu cf_diag = {
+	.task_ctx_nr  = perf_sw_context,
+	.event_init   = cfdiag_event_init,
+	.pmu_enable   = cpumf_pmu_enable,
+	.pmu_disable  = cpumf_pmu_disable,
+	.add	      = cpumf_pmu_add,
+	.del	      = cpumf_pmu_del,
+	.start	      = cpumf_pmu_start,
+	.stop	      = cpumf_pmu_stop,
+	.read	      = cfdiag_read,
+
+	.attr_groups  = cfdiag_attr_groups
+};
+
+/* Calculate memory needed to store all counter sets together with header and
+ * trailer data. This is independent of the counter set authorization which
+ * can vary depending on the configuration.
+ */
+static size_t cfdiag_maxsize(struct cpumf_ctr_info *info)
+{
+	size_t max_size = sizeof(struct cf_trailer_entry);
+	enum cpumf_ctr_set i;
+
+	for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
+		size_t size = cpum_cf_read_setsize(i);
+
+		if (size)
+			max_size += size * sizeof(u64) +
+				    sizeof(struct cf_ctrset_entry);
+	}
+	return max_size;
+}
+
+/* Get the CPU speed, try sampling facility first and CPU attributes second. */
+static void cfdiag_get_cpu_speed(void)
+{
+	unsigned long mhz;
+
+	if (cpum_sf_avail()) {			/* Sampling facility first */
+		struct hws_qsi_info_block si;
+
+		memset(&si, 0, sizeof(si));
+		if (!qsi(&si)) {
+			cfdiag_cpu_speed = si.cpu_speed;
+			return;
+		}
+	}
+
+	/* Fallback: CPU speed extract static part. Used in case
+	 * CPU Measurement Sampling Facility is turned off.
+	 */
+	mhz = __ecag(ECAG_CPU_ATTRIBUTE, 0);
+	if (mhz != -1UL)
+		cfdiag_cpu_speed = mhz & 0xffffffff;
+}
+
+static int cfset_init(void)
+{
+	size_t need;
+	int rc;
+
+	cfdiag_get_cpu_speed();
+	/* Make sure the counter set data fits into predefined buffer. */
+	need = cfdiag_maxsize(&cpumf_ctr_info);
+	if (need > sizeof(((struct cpu_cf_events *)0)->start)) {
+		pr_err("Insufficient memory for PMU(cpum_cf_diag) need=%zu\n",
+		       need);
+		return -ENOMEM;
+	}
+
+	rc = misc_register(&cfset_dev);
+	if (rc) {
+		pr_err("Registration of /dev/%s failed rc=%i\n",
+		       cfset_dev.name, rc);
+		goto out;
+	}
+
+	rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", -1);
+	if (rc) {
+		misc_deregister(&cfset_dev);
+		pr_err("Registration of PMU(cpum_cf_diag) failed with rc=%i\n",
+		       rc);
+	}
+out:
+	return rc;
+}
+
+device_initcall(cpumf_pmu_init);
diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c
new file mode 100644
index 0000000000..0d64aafd15
--- /dev/null
+++ b/arch/s390/kernel/perf_cpum_cf_events.c
@@ -0,0 +1,909 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Perf PMU sysfs events attributes for available CPU-measurement counters
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/perf_event.h>
+#include <asm/cpu_mf.h>
+
+
+/* BEGIN: CPUM_CF COUNTER DEFINITIONS =================================== */
+
+CPUMF_EVENT_ATTR(cf_fvn1, CPU_CYCLES, 0x0000);
+CPUMF_EVENT_ATTR(cf_fvn1, INSTRUCTIONS, 0x0001);
+CPUMF_EVENT_ATTR(cf_fvn1, L1I_DIR_WRITES, 0x0002);
+CPUMF_EVENT_ATTR(cf_fvn1, L1I_PENALTY_CYCLES, 0x0003);
+CPUMF_EVENT_ATTR(cf_fvn1, PROBLEM_STATE_CPU_CYCLES, 0x0020);
+CPUMF_EVENT_ATTR(cf_fvn1, PROBLEM_STATE_INSTRUCTIONS, 0x0021);
+CPUMF_EVENT_ATTR(cf_fvn1, PROBLEM_STATE_L1I_DIR_WRITES, 0x0022);
+CPUMF_EVENT_ATTR(cf_fvn1, PROBLEM_STATE_L1I_PENALTY_CYCLES, 0x0023);
+CPUMF_EVENT_ATTR(cf_fvn1, PROBLEM_STATE_L1D_DIR_WRITES, 0x0024);
+CPUMF_EVENT_ATTR(cf_fvn1, PROBLEM_STATE_L1D_PENALTY_CYCLES, 0x0025);
+CPUMF_EVENT_ATTR(cf_fvn1, L1D_DIR_WRITES, 0x0004);
+CPUMF_EVENT_ATTR(cf_fvn1, L1D_PENALTY_CYCLES, 0x0005);
+CPUMF_EVENT_ATTR(cf_fvn3, CPU_CYCLES, 0x0000);
+CPUMF_EVENT_ATTR(cf_fvn3, INSTRUCTIONS, 0x0001);
+CPUMF_EVENT_ATTR(cf_fvn3, L1I_DIR_WRITES, 0x0002);
+CPUMF_EVENT_ATTR(cf_fvn3, L1I_PENALTY_CYCLES, 0x0003);
+CPUMF_EVENT_ATTR(cf_fvn3, PROBLEM_STATE_CPU_CYCLES, 0x0020);
+CPUMF_EVENT_ATTR(cf_fvn3, PROBLEM_STATE_INSTRUCTIONS, 0x0021);
+CPUMF_EVENT_ATTR(cf_fvn3, L1D_DIR_WRITES, 0x0004);
+CPUMF_EVENT_ATTR(cf_fvn3, L1D_PENALTY_CYCLES, 0x0005);
+CPUMF_EVENT_ATTR(cf_svn_12345, PRNG_FUNCTIONS, 0x0040);
+CPUMF_EVENT_ATTR(cf_svn_12345, PRNG_CYCLES, 0x0041);
+CPUMF_EVENT_ATTR(cf_svn_12345, PRNG_BLOCKED_FUNCTIONS, 0x0042);
+CPUMF_EVENT_ATTR(cf_svn_12345, PRNG_BLOCKED_CYCLES, 0x0043);
+CPUMF_EVENT_ATTR(cf_svn_12345, SHA_FUNCTIONS, 0x0044);
+CPUMF_EVENT_ATTR(cf_svn_12345, SHA_CYCLES, 0x0045);
+CPUMF_EVENT_ATTR(cf_svn_12345, SHA_BLOCKED_FUNCTIONS, 0x0046);
+CPUMF_EVENT_ATTR(cf_svn_12345, SHA_BLOCKED_CYCLES, 0x0047);
+CPUMF_EVENT_ATTR(cf_svn_12345, DEA_FUNCTIONS, 0x0048);
+CPUMF_EVENT_ATTR(cf_svn_12345, DEA_CYCLES, 0x0049);
+CPUMF_EVENT_ATTR(cf_svn_12345, DEA_BLOCKED_FUNCTIONS, 0x004a);
+CPUMF_EVENT_ATTR(cf_svn_12345, DEA_BLOCKED_CYCLES, 0x004b);
+CPUMF_EVENT_ATTR(cf_svn_12345, AES_FUNCTIONS, 0x004c);
+CPUMF_EVENT_ATTR(cf_svn_12345, AES_CYCLES, 0x004d);
+CPUMF_EVENT_ATTR(cf_svn_12345, AES_BLOCKED_FUNCTIONS, 0x004e);
+CPUMF_EVENT_ATTR(cf_svn_12345, AES_BLOCKED_CYCLES, 0x004f);
+CPUMF_EVENT_ATTR(cf_svn_6, ECC_FUNCTION_COUNT, 0x0050);
+CPUMF_EVENT_ATTR(cf_svn_6, ECC_CYCLES_COUNT, 0x0051);
+CPUMF_EVENT_ATTR(cf_svn_6, ECC_BLOCKED_FUNCTION_COUNT, 0x0052);
+CPUMF_EVENT_ATTR(cf_svn_6, ECC_BLOCKED_CYCLES_COUNT, 0x0053);
+CPUMF_EVENT_ATTR(cf_z10, L1I_L2_SOURCED_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z10, L1D_L2_SOURCED_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z10, L1I_L3_LOCAL_WRITES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z10, L1D_L3_LOCAL_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z10, L1I_L3_REMOTE_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_z10, L1D_L3_REMOTE_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_z10, L1D_LMEM_SOURCED_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z10, L1I_LMEM_SOURCED_WRITES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z10, L1D_RO_EXCL_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z10, L1I_CACHELINE_INVALIDATES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z10, ITLB1_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z10, DTLB1_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_z10, TLB2_PTE_WRITES, 0x008c);
+CPUMF_EVENT_ATTR(cf_z10, TLB2_CRSTE_WRITES, 0x008d);
+CPUMF_EVENT_ATTR(cf_z10, TLB2_CRSTE_HPAGE_WRITES, 0x008e);
+CPUMF_EVENT_ATTR(cf_z10, ITLB1_MISSES, 0x0091);
+CPUMF_EVENT_ATTR(cf_z10, DTLB1_MISSES, 0x0092);
+CPUMF_EVENT_ATTR(cf_z10, L2C_STORES_SENT, 0x0093);
+CPUMF_EVENT_ATTR(cf_z196, L1D_L2_SOURCED_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z196, L1I_L2_SOURCED_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z196, DTLB1_MISSES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z196, ITLB1_MISSES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z196, L2C_STORES_SENT, 0x0085);
+CPUMF_EVENT_ATTR(cf_z196, L1D_OFFBOOK_L3_SOURCED_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z196, L1D_ONBOOK_L4_SOURCED_WRITES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z196, L1I_ONBOOK_L4_SOURCED_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z196, L1D_RO_EXCL_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z196, L1D_OFFBOOK_L4_SOURCED_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z196, L1I_OFFBOOK_L4_SOURCED_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_z196, DTLB1_HPAGE_WRITES, 0x008c);
+CPUMF_EVENT_ATTR(cf_z196, L1D_LMEM_SOURCED_WRITES, 0x008d);
+CPUMF_EVENT_ATTR(cf_z196, L1I_LMEM_SOURCED_WRITES, 0x008e);
+CPUMF_EVENT_ATTR(cf_z196, L1I_OFFBOOK_L3_SOURCED_WRITES, 0x008f);
+CPUMF_EVENT_ATTR(cf_z196, DTLB1_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_z196, ITLB1_WRITES, 0x0091);
+CPUMF_EVENT_ATTR(cf_z196, TLB2_PTE_WRITES, 0x0092);
+CPUMF_EVENT_ATTR(cf_z196, TLB2_CRSTE_HPAGE_WRITES, 0x0093);
+CPUMF_EVENT_ATTR(cf_z196, TLB2_CRSTE_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_z196, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0096);
+CPUMF_EVENT_ATTR(cf_z196, L1D_OFFCHIP_L3_SOURCED_WRITES, 0x0098);
+CPUMF_EVENT_ATTR(cf_z196, L1I_ONCHIP_L3_SOURCED_WRITES, 0x0099);
+CPUMF_EVENT_ATTR(cf_z196, L1I_OFFCHIP_L3_SOURCED_WRITES, 0x009b);
+CPUMF_EVENT_ATTR(cf_zec12, DTLB1_MISSES, 0x0080);
+CPUMF_EVENT_ATTR(cf_zec12, ITLB1_MISSES, 0x0081);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_L2I_SOURCED_WRITES, 0x0082);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_L2I_SOURCED_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_L2D_SOURCED_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_zec12, DTLB1_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_LMEM_SOURCED_WRITES, 0x0087);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_LMEM_SOURCED_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_RO_EXCL_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_zec12, DTLB1_HPAGE_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_zec12, ITLB1_WRITES, 0x008c);
+CPUMF_EVENT_ATTR(cf_zec12, TLB2_PTE_WRITES, 0x008d);
+CPUMF_EVENT_ATTR(cf_zec12, TLB2_CRSTE_HPAGE_WRITES, 0x008e);
+CPUMF_EVENT_ATTR(cf_zec12, TLB2_CRSTE_WRITES, 0x008f);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES, 0x0091);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES, 0x0092);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_ONBOOK_L4_SOURCED_WRITES, 0x0093);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L4_SOURCED_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TEND, 0x0095);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0096);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES_IV, 0x0097);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES_IV, 0x0098);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES, 0x0099);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES, 0x009a);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES, 0x009b);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_ONBOOK_L4_SOURCED_WRITES, 0x009c);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L4_SOURCED_WRITES, 0x009d);
+CPUMF_EVENT_ATTR(cf_zec12, TX_C_TEND, 0x009e);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x009f);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES_IV, 0x00a0);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV, 0x00a1);
+CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TABORT, 0x00b1);
+CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_NO_SPECIAL, 0x00b2);
+CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_SPECIAL, 0x00b3);
+CPUMF_EVENT_ATTR(cf_z13, L1D_RO_EXCL_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z13, DTLB1_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z13, DTLB1_MISSES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z13, DTLB1_HPAGE_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z13, DTLB1_GPAGE_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_z13, L1D_L2D_SOURCED_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_z13, ITLB1_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z13, ITLB1_MISSES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z13, L1I_L2I_SOURCED_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z13, TLB2_PTE_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z13, TLB2_CRSTE_HPAGE_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z13, TLB2_CRSTE_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_z13, TX_C_TEND, 0x008c);
+CPUMF_EVENT_ATTR(cf_z13, TX_NC_TEND, 0x008d);
+CPUMF_EVENT_ATTR(cf_z13, L1C_TLB1_MISSES, 0x008f);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0091);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_L4_SOURCED_WRITES, 0x0092);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES_IV, 0x0093);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_L4_SOURCED_WRITES, 0x0095);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES_IV, 0x0096);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES, 0x0097);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_SCOL_L4_SOURCED_WRITES, 0x0098);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV, 0x0099);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES, 0x009a);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_FCOL_L4_SOURCED_WRITES, 0x009b);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV, 0x009c);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES, 0x009d);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_MEM_SOURCED_WRITES, 0x009e);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_MEM_SOURCED_WRITES, 0x009f);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_MEM_SOURCED_WRITES, 0x00a0);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONCHIP_MEM_SOURCED_WRITES, 0x00a1);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES, 0x00a2);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x00a3);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_L4_SOURCED_WRITES, 0x00a4);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES_IV, 0x00a5);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES, 0x00a6);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_L4_SOURCED_WRITES, 0x00a7);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES_IV, 0x00a8);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES, 0x00a9);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_SCOL_L4_SOURCED_WRITES, 0x00aa);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV, 0x00ab);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES, 0x00ac);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_FCOL_L4_SOURCED_WRITES, 0x00ad);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV, 0x00ae);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES, 0x00af);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_MEM_SOURCED_WRITES, 0x00b0);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_MEM_SOURCED_WRITES, 0x00b1);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_MEM_SOURCED_WRITES, 0x00b2);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONCHIP_MEM_SOURCED_WRITES, 0x00b3);
+CPUMF_EVENT_ATTR(cf_z13, TX_NC_TABORT, 0x00da);
+CPUMF_EVENT_ATTR(cf_z13, TX_C_TABORT_NO_SPECIAL, 0x00db);
+CPUMF_EVENT_ATTR(cf_z13, TX_C_TABORT_SPECIAL, 0x00dc);
+CPUMF_EVENT_ATTR(cf_z13, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
+CPUMF_EVENT_ATTR(cf_z13, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
+CPUMF_EVENT_ATTR(cf_z14, L1D_RO_EXCL_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z14, DTLB2_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z14, DTLB2_MISSES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z14, DTLB2_HPAGE_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z14, DTLB2_GPAGE_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_z14, L1D_L2D_SOURCED_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_z14, ITLB2_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z14, ITLB2_MISSES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z14, L1I_L2I_SOURCED_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z14, TLB2_PTE_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z14, TLB2_CRSTE_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z14, TLB2_ENGINES_BUSY, 0x008b);
+CPUMF_EVENT_ATTR(cf_z14, TX_C_TEND, 0x008c);
+CPUMF_EVENT_ATTR(cf_z14, TX_NC_TEND, 0x008d);
+CPUMF_EVENT_ATTR(cf_z14, L1C_TLB2_MISSES, 0x008f);
+CPUMF_EVENT_ATTR(cf_z14, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_z14, L1D_ONCHIP_MEMORY_SOURCED_WRITES, 0x0091);
+CPUMF_EVENT_ATTR(cf_z14, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0092);
+CPUMF_EVENT_ATTR(cf_z14, L1D_ONCLUSTER_L3_SOURCED_WRITES, 0x0093);
+CPUMF_EVENT_ATTR(cf_z14, L1D_ONCLUSTER_MEMORY_SOURCED_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_z14, L1D_ONCLUSTER_L3_SOURCED_WRITES_IV, 0x0095);
+CPUMF_EVENT_ATTR(cf_z14, L1D_OFFCLUSTER_L3_SOURCED_WRITES, 0x0096);
+CPUMF_EVENT_ATTR(cf_z14, L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES, 0x0097);
+CPUMF_EVENT_ATTR(cf_z14, L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV, 0x0098);
+CPUMF_EVENT_ATTR(cf_z14, L1D_OFFDRAWER_L3_SOURCED_WRITES, 0x0099);
+CPUMF_EVENT_ATTR(cf_z14, L1D_OFFDRAWER_MEMORY_SOURCED_WRITES, 0x009a);
+CPUMF_EVENT_ATTR(cf_z14, L1D_OFFDRAWER_L3_SOURCED_WRITES_IV, 0x009b);
+CPUMF_EVENT_ATTR(cf_z14, L1D_ONDRAWER_L4_SOURCED_WRITES, 0x009c);
+CPUMF_EVENT_ATTR(cf_z14, L1D_OFFDRAWER_L4_SOURCED_WRITES, 0x009d);
+CPUMF_EVENT_ATTR(cf_z14, L1D_ONCHIP_L3_SOURCED_WRITES_RO, 0x009e);
+CPUMF_EVENT_ATTR(cf_z14, L1I_ONCHIP_L3_SOURCED_WRITES, 0x00a2);
+CPUMF_EVENT_ATTR(cf_z14, L1I_ONCHIP_MEMORY_SOURCED_WRITES, 0x00a3);
+CPUMF_EVENT_ATTR(cf_z14, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x00a4);
+CPUMF_EVENT_ATTR(cf_z14, L1I_ONCLUSTER_L3_SOURCED_WRITES, 0x00a5);
+CPUMF_EVENT_ATTR(cf_z14, L1I_ONCLUSTER_MEMORY_SOURCED_WRITES, 0x00a6);
+CPUMF_EVENT_ATTR(cf_z14, L1I_ONCLUSTER_L3_SOURCED_WRITES_IV, 0x00a7);
+CPUMF_EVENT_ATTR(cf_z14, L1I_OFFCLUSTER_L3_SOURCED_WRITES, 0x00a8);
+CPUMF_EVENT_ATTR(cf_z14, L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES, 0x00a9);
+CPUMF_EVENT_ATTR(cf_z14, L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV, 0x00aa);
+CPUMF_EVENT_ATTR(cf_z14, L1I_OFFDRAWER_L3_SOURCED_WRITES, 0x00ab);
+CPUMF_EVENT_ATTR(cf_z14, L1I_OFFDRAWER_MEMORY_SOURCED_WRITES, 0x00ac);
+CPUMF_EVENT_ATTR(cf_z14, L1I_OFFDRAWER_L3_SOURCED_WRITES_IV, 0x00ad);
+CPUMF_EVENT_ATTR(cf_z14, L1I_ONDRAWER_L4_SOURCED_WRITES, 0x00ae);
+CPUMF_EVENT_ATTR(cf_z14, L1I_OFFDRAWER_L4_SOURCED_WRITES, 0x00af);
+CPUMF_EVENT_ATTR(cf_z14, BCD_DFP_EXECUTION_SLOTS, 0x00e0);
+CPUMF_EVENT_ATTR(cf_z14, VX_BCD_EXECUTION_SLOTS, 0x00e1);
+CPUMF_EVENT_ATTR(cf_z14, DECIMAL_INSTRUCTIONS, 0x00e2);
+CPUMF_EVENT_ATTR(cf_z14, LAST_HOST_TRANSLATIONS, 0x00e8);
+CPUMF_EVENT_ATTR(cf_z14, TX_NC_TABORT, 0x00f3);
+CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_NO_SPECIAL, 0x00f4);
+CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_SPECIAL, 0x00f5);
+CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
+CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
+
+CPUMF_EVENT_ATTR(cf_z15, L1D_RO_EXCL_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z15, DTLB2_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z15, DTLB2_MISSES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z15, DTLB2_HPAGE_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z15, DTLB2_GPAGE_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_z15, L1D_L2D_SOURCED_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_z15, ITLB2_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z15, ITLB2_MISSES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z15, L1I_L2I_SOURCED_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z15, TLB2_PTE_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z15, TLB2_CRSTE_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z15, TLB2_ENGINES_BUSY, 0x008b);
+CPUMF_EVENT_ATTR(cf_z15, TX_C_TEND, 0x008c);
+CPUMF_EVENT_ATTR(cf_z15, TX_NC_TEND, 0x008d);
+CPUMF_EVENT_ATTR(cf_z15, L1C_TLB2_MISSES, 0x008f);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCHIP_MEMORY_SOURCED_WRITES, 0x0091);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0092);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCLUSTER_L3_SOURCED_WRITES, 0x0093);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCLUSTER_MEMORY_SOURCED_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCLUSTER_L3_SOURCED_WRITES_IV, 0x0095);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFCLUSTER_L3_SOURCED_WRITES, 0x0096);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES, 0x0097);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV, 0x0098);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFDRAWER_L3_SOURCED_WRITES, 0x0099);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFDRAWER_MEMORY_SOURCED_WRITES, 0x009a);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFDRAWER_L3_SOURCED_WRITES_IV, 0x009b);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONDRAWER_L4_SOURCED_WRITES, 0x009c);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFDRAWER_L4_SOURCED_WRITES, 0x009d);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES_RO, 0x009e);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCHIP_L3_SOURCED_WRITES, 0x00a2);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCHIP_MEMORY_SOURCED_WRITES, 0x00a3);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x00a4);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCLUSTER_L3_SOURCED_WRITES, 0x00a5);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCLUSTER_MEMORY_SOURCED_WRITES, 0x00a6);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCLUSTER_L3_SOURCED_WRITES_IV, 0x00a7);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFCLUSTER_L3_SOURCED_WRITES, 0x00a8);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES, 0x00a9);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV, 0x00aa);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFDRAWER_L3_SOURCED_WRITES, 0x00ab);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFDRAWER_MEMORY_SOURCED_WRITES, 0x00ac);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFDRAWER_L3_SOURCED_WRITES_IV, 0x00ad);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONDRAWER_L4_SOURCED_WRITES, 0x00ae);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFDRAWER_L4_SOURCED_WRITES, 0x00af);
+CPUMF_EVENT_ATTR(cf_z15, BCD_DFP_EXECUTION_SLOTS, 0x00e0);
+CPUMF_EVENT_ATTR(cf_z15, VX_BCD_EXECUTION_SLOTS, 0x00e1);
+CPUMF_EVENT_ATTR(cf_z15, DECIMAL_INSTRUCTIONS, 0x00e2);
+CPUMF_EVENT_ATTR(cf_z15, LAST_HOST_TRANSLATIONS, 0x00e8);
+CPUMF_EVENT_ATTR(cf_z15, TX_NC_TABORT, 0x00f3);
+CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_NO_SPECIAL, 0x00f4);
+CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_SPECIAL, 0x00f5);
+CPUMF_EVENT_ATTR(cf_z15, DFLT_ACCESS, 0x00f7);
+CPUMF_EVENT_ATTR(cf_z15, DFLT_CYCLES, 0x00fc);
+CPUMF_EVENT_ATTR(cf_z15, DFLT_CC, 0x00108);
+CPUMF_EVENT_ATTR(cf_z15, DFLT_CCFINISH, 0x00109);
+CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
+CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
+CPUMF_EVENT_ATTR(cf_z16, L1D_RO_EXCL_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z16, DTLB2_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z16, DTLB2_MISSES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z16, CRSTE_1MB_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z16, DTLB2_GPAGE_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_z16, ITLB2_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z16, ITLB2_MISSES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z16, TLB2_PTE_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z16, TLB2_CRSTE_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z16, TLB2_ENGINES_BUSY, 0x008b);
+CPUMF_EVENT_ATTR(cf_z16, TX_C_TEND, 0x008c);
+CPUMF_EVENT_ATTR(cf_z16, TX_NC_TEND, 0x008d);
+CPUMF_EVENT_ATTR(cf_z16, L1C_TLB2_MISSES, 0x008f);
+CPUMF_EVENT_ATTR(cf_z16, DCW_REQ, 0x0091);
+CPUMF_EVENT_ATTR(cf_z16, DCW_REQ_IV, 0x0092);
+CPUMF_EVENT_ATTR(cf_z16, DCW_REQ_CHIP_HIT, 0x0093);
+CPUMF_EVENT_ATTR(cf_z16, DCW_REQ_DRAWER_HIT, 0x0094);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_CHIP, 0x0095);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_CHIP_IV, 0x0096);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_CHIP_CHIP_HIT, 0x0097);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_CHIP_DRAWER_HIT, 0x0098);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_MODULE, 0x0099);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_DRAWER, 0x009a);
+CPUMF_EVENT_ATTR(cf_z16, DCW_OFF_DRAWER, 0x009b);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_CHIP_MEMORY, 0x009c);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_MODULE_MEMORY, 0x009d);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_DRAWER_MEMORY, 0x009e);
+CPUMF_EVENT_ATTR(cf_z16, DCW_OFF_DRAWER_MEMORY, 0x009f);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_MODULE_IV, 0x00a0);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_MODULE_CHIP_HIT, 0x00a1);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_MODULE_DRAWER_HIT, 0x00a2);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_DRAWER_IV, 0x00a3);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_DRAWER_CHIP_HIT, 0x00a4);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_DRAWER_DRAWER_HIT, 0x00a5);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_OFF_DRAWER_IV, 0x00a6);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_OFF_DRAWER_CHIP_HIT, 0x00a7);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_OFF_DRAWER_DRAWER_HIT, 0x00a8);
+CPUMF_EVENT_ATTR(cf_z16, ICW_REQ, 0x00a9);
+CPUMF_EVENT_ATTR(cf_z16, ICW_REQ_IV, 0x00aa);
+CPUMF_EVENT_ATTR(cf_z16, ICW_REQ_CHIP_HIT, 0x00ab);
+CPUMF_EVENT_ATTR(cf_z16, ICW_REQ_DRAWER_HIT, 0x00ac);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_CHIP, 0x00ad);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_CHIP_IV, 0x00ae);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_CHIP_CHIP_HIT, 0x00af);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_CHIP_DRAWER_HIT, 0x00b0);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_MODULE, 0x00b1);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_DRAWER, 0x00b2);
+CPUMF_EVENT_ATTR(cf_z16, ICW_OFF_DRAWER, 0x00b3);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_CHIP_MEMORY, 0x00b4);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_MODULE_MEMORY, 0x00b5);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_DRAWER_MEMORY, 0x00b6);
+CPUMF_EVENT_ATTR(cf_z16, ICW_OFF_DRAWER_MEMORY, 0x00b7);
+CPUMF_EVENT_ATTR(cf_z16, BCD_DFP_EXECUTION_SLOTS, 0x00e0);
+CPUMF_EVENT_ATTR(cf_z16, VX_BCD_EXECUTION_SLOTS, 0x00e1);
+CPUMF_EVENT_ATTR(cf_z16, DECIMAL_INSTRUCTIONS, 0x00e2);
+CPUMF_EVENT_ATTR(cf_z16, LAST_HOST_TRANSLATIONS, 0x00e8);
+CPUMF_EVENT_ATTR(cf_z16, TX_NC_TABORT, 0x00f4);
+CPUMF_EVENT_ATTR(cf_z16, TX_C_TABORT_NO_SPECIAL, 0x00f5);
+CPUMF_EVENT_ATTR(cf_z16, TX_C_TABORT_SPECIAL, 0x00f6);
+CPUMF_EVENT_ATTR(cf_z16, DFLT_ACCESS, 0x00f8);
+CPUMF_EVENT_ATTR(cf_z16, DFLT_CYCLES, 0x00fd);
+CPUMF_EVENT_ATTR(cf_z16, SORTL, 0x0100);
+CPUMF_EVENT_ATTR(cf_z16, DFLT_CC, 0x0109);
+CPUMF_EVENT_ATTR(cf_z16, DFLT_CCFINISH, 0x010a);
+CPUMF_EVENT_ATTR(cf_z16, NNPA_INVOCATIONS, 0x010b);
+CPUMF_EVENT_ATTR(cf_z16, NNPA_COMPLETIONS, 0x010c);
+CPUMF_EVENT_ATTR(cf_z16, NNPA_WAIT_LOCK, 0x010d);
+CPUMF_EVENT_ATTR(cf_z16, NNPA_HOLD_LOCK, 0x010e);
+CPUMF_EVENT_ATTR(cf_z16, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
+CPUMF_EVENT_ATTR(cf_z16, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
+
+static struct attribute *cpumcf_fvn1_pmu_event_attr[] __initdata = {
+	CPUMF_EVENT_PTR(cf_fvn1, CPU_CYCLES),
+	CPUMF_EVENT_PTR(cf_fvn1, INSTRUCTIONS),
+	CPUMF_EVENT_PTR(cf_fvn1, L1I_DIR_WRITES),
+	CPUMF_EVENT_PTR(cf_fvn1, L1I_PENALTY_CYCLES),
+	CPUMF_EVENT_PTR(cf_fvn1, PROBLEM_STATE_CPU_CYCLES),
+	CPUMF_EVENT_PTR(cf_fvn1, PROBLEM_STATE_INSTRUCTIONS),
+	CPUMF_EVENT_PTR(cf_fvn1, PROBLEM_STATE_L1I_DIR_WRITES),
+	CPUMF_EVENT_PTR(cf_fvn1, PROBLEM_STATE_L1I_PENALTY_CYCLES),
+	CPUMF_EVENT_PTR(cf_fvn1, PROBLEM_STATE_L1D_DIR_WRITES),
+	CPUMF_EVENT_PTR(cf_fvn1, PROBLEM_STATE_L1D_PENALTY_CYCLES),
+	CPUMF_EVENT_PTR(cf_fvn1, L1D_DIR_WRITES),
+	CPUMF_EVENT_PTR(cf_fvn1, L1D_PENALTY_CYCLES),
+	NULL,
+};
+
+static struct attribute *cpumcf_fvn3_pmu_event_attr[] __initdata = {
+	CPUMF_EVENT_PTR(cf_fvn3, CPU_CYCLES),
+	CPUMF_EVENT_PTR(cf_fvn3, INSTRUCTIONS),
+	CPUMF_EVENT_PTR(cf_fvn3, L1I_DIR_WRITES),
+	CPUMF_EVENT_PTR(cf_fvn3, L1I_PENALTY_CYCLES),
+	CPUMF_EVENT_PTR(cf_fvn3, PROBLEM_STATE_CPU_CYCLES),
+	CPUMF_EVENT_PTR(cf_fvn3, PROBLEM_STATE_INSTRUCTIONS),
+	CPUMF_EVENT_PTR(cf_fvn3, L1D_DIR_WRITES),
+	CPUMF_EVENT_PTR(cf_fvn3, L1D_PENALTY_CYCLES),
+	NULL,
+};
+
+static struct attribute *cpumcf_svn_12345_pmu_event_attr[] __initdata = {
+	CPUMF_EVENT_PTR(cf_svn_12345, PRNG_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, PRNG_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, SHA_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, SHA_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, SHA_BLOCKED_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, SHA_BLOCKED_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, DEA_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, DEA_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, DEA_BLOCKED_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, DEA_BLOCKED_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, AES_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, AES_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, AES_BLOCKED_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, AES_BLOCKED_CYCLES),
+	NULL,
+};
+
+static struct attribute *cpumcf_svn_67_pmu_event_attr[] __initdata = {
+	CPUMF_EVENT_PTR(cf_svn_12345, PRNG_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, PRNG_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, SHA_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, SHA_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, SHA_BLOCKED_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, SHA_BLOCKED_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, DEA_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, DEA_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, DEA_BLOCKED_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, DEA_BLOCKED_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, AES_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, AES_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, AES_BLOCKED_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, AES_BLOCKED_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_6, ECC_FUNCTION_COUNT),
+	CPUMF_EVENT_PTR(cf_svn_6, ECC_CYCLES_COUNT),
+	CPUMF_EVENT_PTR(cf_svn_6, ECC_BLOCKED_FUNCTION_COUNT),
+	CPUMF_EVENT_PTR(cf_svn_6, ECC_BLOCKED_CYCLES_COUNT),
+	NULL,
+};
+
+static struct attribute *cpumcf_z10_pmu_event_attr[] __initdata = {
+	CPUMF_EVENT_PTR(cf_z10, L1I_L2_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1D_L2_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1I_L3_LOCAL_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1D_L3_LOCAL_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1I_L3_REMOTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1D_L3_REMOTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1D_LMEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1I_LMEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1D_RO_EXCL_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1I_CACHELINE_INVALIDATES),
+	CPUMF_EVENT_PTR(cf_z10, ITLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, DTLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, TLB2_PTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, TLB2_CRSTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, TLB2_CRSTE_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, ITLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_z10, DTLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_z10, L2C_STORES_SENT),
+	NULL,
+};
+
+static struct attribute *cpumcf_z196_pmu_event_attr[] __initdata = {
+	CPUMF_EVENT_PTR(cf_z196, L1D_L2_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_L2_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, DTLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_z196, ITLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_z196, L2C_STORES_SENT),
+	CPUMF_EVENT_PTR(cf_z196, L1D_OFFBOOK_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1D_ONBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_ONBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1D_RO_EXCL_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1D_OFFBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_OFFBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, DTLB1_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1D_LMEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_LMEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_OFFBOOK_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, DTLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, ITLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, TLB2_PTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, TLB2_CRSTE_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, TLB2_CRSTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1D_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1D_OFFCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_OFFCHIP_L3_SOURCED_WRITES),
+	NULL,
+};
+
+static struct attribute *cpumcf_zec12_pmu_event_attr[] __initdata = {
+	CPUMF_EVENT_PTR(cf_zec12, DTLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_zec12, ITLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_L2I_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_L2I_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_L2D_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, DTLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_LMEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_LMEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_RO_EXCL_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, DTLB1_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, ITLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, TLB2_PTE_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, TLB2_CRSTE_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, TLB2_CRSTE_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_ONBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, TX_NC_TEND),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_ONBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, TX_C_TEND),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_zec12, TX_NC_TABORT),
+	CPUMF_EVENT_PTR(cf_zec12, TX_C_TABORT_NO_SPECIAL),
+	CPUMF_EVENT_PTR(cf_zec12, TX_C_TABORT_SPECIAL),
+	NULL,
+};
+
+static struct attribute *cpumcf_z13_pmu_event_attr[] __initdata = {
+	CPUMF_EVENT_PTR(cf_z13, L1D_RO_EXCL_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, DTLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, DTLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_z13, DTLB1_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, DTLB1_GPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_L2D_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, ITLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, ITLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_L2I_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, TLB2_PTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, TLB2_CRSTE_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, TLB2_CRSTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, TX_C_TEND),
+	CPUMF_EVENT_PTR(cf_z13, TX_NC_TEND),
+	CPUMF_EVENT_PTR(cf_z13, L1C_TLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_SCOL_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_FCOL_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_MEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_MEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_MEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_ONCHIP_MEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_SCOL_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_FCOL_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_MEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_MEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_MEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_ONCHIP_MEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, TX_NC_TABORT),
+	CPUMF_EVENT_PTR(cf_z13, TX_C_TABORT_NO_SPECIAL),
+	CPUMF_EVENT_PTR(cf_z13, TX_C_TABORT_SPECIAL),
+	CPUMF_EVENT_PTR(cf_z13, MT_DIAG_CYCLES_ONE_THR_ACTIVE),
+	CPUMF_EVENT_PTR(cf_z13, MT_DIAG_CYCLES_TWO_THR_ACTIVE),
+	NULL,
+};
+
+static struct attribute *cpumcf_z14_pmu_event_attr[] __initdata = {
+	CPUMF_EVENT_PTR(cf_z14, L1D_RO_EXCL_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, DTLB2_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, DTLB2_MISSES),
+	CPUMF_EVENT_PTR(cf_z14, DTLB2_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, DTLB2_GPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, L1D_L2D_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, ITLB2_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, ITLB2_MISSES),
+	CPUMF_EVENT_PTR(cf_z14, L1I_L2I_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, TLB2_PTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, TLB2_CRSTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, TLB2_ENGINES_BUSY),
+	CPUMF_EVENT_PTR(cf_z14, TX_C_TEND),
+	CPUMF_EVENT_PTR(cf_z14, TX_NC_TEND),
+	CPUMF_EVENT_PTR(cf_z14, L1C_TLB2_MISSES),
+	CPUMF_EVENT_PTR(cf_z14, L1D_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, L1D_ONCHIP_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, L1D_ONCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z14, L1D_ONCLUSTER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, L1D_ONCLUSTER_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, L1D_ONCLUSTER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z14, L1D_OFFCLUSTER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z14, L1D_OFFDRAWER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, L1D_OFFDRAWER_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, L1D_OFFDRAWER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z14, L1D_ONDRAWER_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, L1D_OFFDRAWER_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, L1D_ONCHIP_L3_SOURCED_WRITES_RO),
+	CPUMF_EVENT_PTR(cf_z14, L1I_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, L1I_ONCHIP_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, L1I_ONCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z14, L1I_ONCLUSTER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, L1I_ONCLUSTER_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, L1I_ONCLUSTER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z14, L1I_OFFCLUSTER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z14, L1I_OFFDRAWER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, L1I_OFFDRAWER_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, L1I_OFFDRAWER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z14, L1I_ONDRAWER_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, L1I_OFFDRAWER_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, BCD_DFP_EXECUTION_SLOTS),
+	CPUMF_EVENT_PTR(cf_z14, VX_BCD_EXECUTION_SLOTS),
+	CPUMF_EVENT_PTR(cf_z14, DECIMAL_INSTRUCTIONS),
+	CPUMF_EVENT_PTR(cf_z14, LAST_HOST_TRANSLATIONS),
+	CPUMF_EVENT_PTR(cf_z14, TX_NC_TABORT),
+	CPUMF_EVENT_PTR(cf_z14, TX_C_TABORT_NO_SPECIAL),
+	CPUMF_EVENT_PTR(cf_z14, TX_C_TABORT_SPECIAL),
+	CPUMF_EVENT_PTR(cf_z14, MT_DIAG_CYCLES_ONE_THR_ACTIVE),
+	CPUMF_EVENT_PTR(cf_z14, MT_DIAG_CYCLES_TWO_THR_ACTIVE),
+	NULL,
+};
+
+static struct attribute *cpumcf_z15_pmu_event_attr[] __initdata = {
+	CPUMF_EVENT_PTR(cf_z15, L1D_RO_EXCL_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, DTLB2_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, DTLB2_MISSES),
+	CPUMF_EVENT_PTR(cf_z15, DTLB2_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, DTLB2_GPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_L2D_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, ITLB2_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, ITLB2_MISSES),
+	CPUMF_EVENT_PTR(cf_z15, L1I_L2I_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, TLB2_PTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, TLB2_CRSTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, TLB2_ENGINES_BUSY),
+	CPUMF_EVENT_PTR(cf_z15, TX_C_TEND),
+	CPUMF_EVENT_PTR(cf_z15, TX_NC_TEND),
+	CPUMF_EVENT_PTR(cf_z15, L1C_TLB2_MISSES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_ONCHIP_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z15, L1D_ONCLUSTER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_ONCLUSTER_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_ONCLUSTER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z15, L1D_OFFCLUSTER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z15, L1D_OFFDRAWER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_OFFDRAWER_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_OFFDRAWER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z15, L1D_ONDRAWER_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_OFFDRAWER_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES_RO),
+	CPUMF_EVENT_PTR(cf_z15, L1I_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1I_ONCHIP_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1I_ONCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z15, L1I_ONCLUSTER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1I_ONCLUSTER_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1I_ONCLUSTER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z15, L1I_OFFCLUSTER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z15, L1I_OFFDRAWER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1I_OFFDRAWER_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1I_OFFDRAWER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z15, L1I_ONDRAWER_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1I_OFFDRAWER_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, BCD_DFP_EXECUTION_SLOTS),
+	CPUMF_EVENT_PTR(cf_z15, VX_BCD_EXECUTION_SLOTS),
+	CPUMF_EVENT_PTR(cf_z15, DECIMAL_INSTRUCTIONS),
+	CPUMF_EVENT_PTR(cf_z15, LAST_HOST_TRANSLATIONS),
+	CPUMF_EVENT_PTR(cf_z15, TX_NC_TABORT),
+	CPUMF_EVENT_PTR(cf_z15, TX_C_TABORT_NO_SPECIAL),
+	CPUMF_EVENT_PTR(cf_z15, TX_C_TABORT_SPECIAL),
+	CPUMF_EVENT_PTR(cf_z15, DFLT_ACCESS),
+	CPUMF_EVENT_PTR(cf_z15, DFLT_CYCLES),
+	CPUMF_EVENT_PTR(cf_z15, DFLT_CC),
+	CPUMF_EVENT_PTR(cf_z15, DFLT_CCFINISH),
+	CPUMF_EVENT_PTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE),
+	CPUMF_EVENT_PTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE),
+	NULL,
+};
+
+static struct attribute *cpumcf_z16_pmu_event_attr[] __initdata = {
+	CPUMF_EVENT_PTR(cf_z16, L1D_RO_EXCL_WRITES),
+	CPUMF_EVENT_PTR(cf_z16, DTLB2_WRITES),
+	CPUMF_EVENT_PTR(cf_z16, DTLB2_MISSES),
+	CPUMF_EVENT_PTR(cf_z16, CRSTE_1MB_WRITES),
+	CPUMF_EVENT_PTR(cf_z16, DTLB2_GPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z16, ITLB2_WRITES),
+	CPUMF_EVENT_PTR(cf_z16, ITLB2_MISSES),
+	CPUMF_EVENT_PTR(cf_z16, TLB2_PTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z16, TLB2_CRSTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z16, TLB2_ENGINES_BUSY),
+	CPUMF_EVENT_PTR(cf_z16, TX_C_TEND),
+	CPUMF_EVENT_PTR(cf_z16, TX_NC_TEND),
+	CPUMF_EVENT_PTR(cf_z16, L1C_TLB2_MISSES),
+	CPUMF_EVENT_PTR(cf_z16, DCW_REQ),
+	CPUMF_EVENT_PTR(cf_z16, DCW_REQ_IV),
+	CPUMF_EVENT_PTR(cf_z16, DCW_REQ_CHIP_HIT),
+	CPUMF_EVENT_PTR(cf_z16, DCW_REQ_DRAWER_HIT),
+	CPUMF_EVENT_PTR(cf_z16, DCW_ON_CHIP),
+	CPUMF_EVENT_PTR(cf_z16, DCW_ON_CHIP_IV),
+	CPUMF_EVENT_PTR(cf_z16, DCW_ON_CHIP_CHIP_HIT),
+	CPUMF_EVENT_PTR(cf_z16, DCW_ON_CHIP_DRAWER_HIT),
+	CPUMF_EVENT_PTR(cf_z16, DCW_ON_MODULE),
+	CPUMF_EVENT_PTR(cf_z16, DCW_ON_DRAWER),
+	CPUMF_EVENT_PTR(cf_z16, DCW_OFF_DRAWER),
+	CPUMF_EVENT_PTR(cf_z16, DCW_ON_CHIP_MEMORY),
+	CPUMF_EVENT_PTR(cf_z16, DCW_ON_MODULE_MEMORY),
+	CPUMF_EVENT_PTR(cf_z16, DCW_ON_DRAWER_MEMORY),
+	CPUMF_EVENT_PTR(cf_z16, DCW_OFF_DRAWER_MEMORY),
+	CPUMF_EVENT_PTR(cf_z16, IDCW_ON_MODULE_IV),
+	CPUMF_EVENT_PTR(cf_z16, IDCW_ON_MODULE_CHIP_HIT),
+	CPUMF_EVENT_PTR(cf_z16, IDCW_ON_MODULE_DRAWER_HIT),
+	CPUMF_EVENT_PTR(cf_z16, IDCW_ON_DRAWER_IV),
+	CPUMF_EVENT_PTR(cf_z16, IDCW_ON_DRAWER_CHIP_HIT),
+	CPUMF_EVENT_PTR(cf_z16, IDCW_ON_DRAWER_DRAWER_HIT),
+	CPUMF_EVENT_PTR(cf_z16, IDCW_OFF_DRAWER_IV),
+	CPUMF_EVENT_PTR(cf_z16, IDCW_OFF_DRAWER_CHIP_HIT),
+	CPUMF_EVENT_PTR(cf_z16, IDCW_OFF_DRAWER_DRAWER_HIT),
+	CPUMF_EVENT_PTR(cf_z16, ICW_REQ),
+	CPUMF_EVENT_PTR(cf_z16, ICW_REQ_IV),
+	CPUMF_EVENT_PTR(cf_z16, ICW_REQ_CHIP_HIT),
+	CPUMF_EVENT_PTR(cf_z16, ICW_REQ_DRAWER_HIT),
+	CPUMF_EVENT_PTR(cf_z16, ICW_ON_CHIP),
+	CPUMF_EVENT_PTR(cf_z16, ICW_ON_CHIP_IV),
+	CPUMF_EVENT_PTR(cf_z16, ICW_ON_CHIP_CHIP_HIT),
+	CPUMF_EVENT_PTR(cf_z16, ICW_ON_CHIP_DRAWER_HIT),
+	CPUMF_EVENT_PTR(cf_z16, ICW_ON_MODULE),
+	CPUMF_EVENT_PTR(cf_z16, ICW_ON_DRAWER),
+	CPUMF_EVENT_PTR(cf_z16, ICW_OFF_DRAWER),
+	CPUMF_EVENT_PTR(cf_z16, ICW_ON_CHIP_MEMORY),
+	CPUMF_EVENT_PTR(cf_z16, ICW_ON_MODULE_MEMORY),
+	CPUMF_EVENT_PTR(cf_z16, ICW_ON_DRAWER_MEMORY),
+	CPUMF_EVENT_PTR(cf_z16, ICW_OFF_DRAWER_MEMORY),
+	CPUMF_EVENT_PTR(cf_z16, BCD_DFP_EXECUTION_SLOTS),
+	CPUMF_EVENT_PTR(cf_z16, VX_BCD_EXECUTION_SLOTS),
+	CPUMF_EVENT_PTR(cf_z16, DECIMAL_INSTRUCTIONS),
+	CPUMF_EVENT_PTR(cf_z16, LAST_HOST_TRANSLATIONS),
+	CPUMF_EVENT_PTR(cf_z16, TX_NC_TABORT),
+	CPUMF_EVENT_PTR(cf_z16, TX_C_TABORT_NO_SPECIAL),
+	CPUMF_EVENT_PTR(cf_z16, TX_C_TABORT_SPECIAL),
+	CPUMF_EVENT_PTR(cf_z16, DFLT_ACCESS),
+	CPUMF_EVENT_PTR(cf_z16, DFLT_CYCLES),
+	CPUMF_EVENT_PTR(cf_z16, SORTL),
+	CPUMF_EVENT_PTR(cf_z16, DFLT_CC),
+	CPUMF_EVENT_PTR(cf_z16, DFLT_CCFINISH),
+	CPUMF_EVENT_PTR(cf_z16, NNPA_INVOCATIONS),
+	CPUMF_EVENT_PTR(cf_z16, NNPA_COMPLETIONS),
+	CPUMF_EVENT_PTR(cf_z16, NNPA_WAIT_LOCK),
+	CPUMF_EVENT_PTR(cf_z16, NNPA_HOLD_LOCK),
+	CPUMF_EVENT_PTR(cf_z16, MT_DIAG_CYCLES_ONE_THR_ACTIVE),
+	CPUMF_EVENT_PTR(cf_z16, MT_DIAG_CYCLES_TWO_THR_ACTIVE),
+	NULL,
+};
+
+/* END: CPUM_CF COUNTER DEFINITIONS ===================================== */
+
+static struct attribute_group cpumcf_pmu_events_group = {
+	.name = "events",
+};
+
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *cpumcf_pmu_format_attr[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static struct attribute_group cpumcf_pmu_format_group = {
+	.name = "format",
+	.attrs = cpumcf_pmu_format_attr,
+};
+
+static const struct attribute_group *cpumcf_pmu_attr_groups[] = {
+	&cpumcf_pmu_events_group,
+	&cpumcf_pmu_format_group,
+	NULL,
+};
+
+
+static __init struct attribute **merge_attr(struct attribute **a,
+					    struct attribute **b,
+					    struct attribute **c)
+{
+	struct attribute **new;
+	int j, i;
+
+	for (j = 0; a[j]; j++)
+		;
+	for (i = 0; b[i]; i++)
+		j++;
+	for (i = 0; c[i]; i++)
+		j++;
+	j++;
+
+	new = kmalloc_array(j, sizeof(struct attribute *), GFP_KERNEL);
+	if (!new)
+		return NULL;
+	j = 0;
+	for (i = 0; a[i]; i++)
+		new[j++] = a[i];
+	for (i = 0; b[i]; i++)
+		new[j++] = b[i];
+	for (i = 0; c[i]; i++)
+		new[j++] = c[i];
+	new[j] = NULL;
+
+	return new;
+}
+
+__init const struct attribute_group **cpumf_cf_event_group(void)
+{
+	struct attribute **combined, **model, **cfvn, **csvn;
+	struct attribute *none[] = { NULL };
+	struct cpumf_ctr_info ci;
+	struct cpuid cpu_id;
+
+	/* Determine generic counters set(s) */
+	qctri(&ci);
+	switch (ci.cfvn) {
+	case 1:
+		cfvn = cpumcf_fvn1_pmu_event_attr;
+		break;
+	case 3:
+		cfvn = cpumcf_fvn3_pmu_event_attr;
+		break;
+	default:
+		cfvn = none;
+	}
+
+	/* Determine version specific crypto set */
+	switch (ci.csvn) {
+	case 1 ... 5:
+		csvn = cpumcf_svn_12345_pmu_event_attr;
+		break;
+	case 6 ... 7:
+		csvn = cpumcf_svn_67_pmu_event_attr;
+		break;
+	default:
+		csvn = none;
+	}
+
+	/* Determine model-specific counter set(s) */
+	get_cpu_id(&cpu_id);
+	switch (cpu_id.machine) {
+	case 0x2097:
+	case 0x2098:
+		model = cpumcf_z10_pmu_event_attr;
+		break;
+	case 0x2817:
+	case 0x2818:
+		model = cpumcf_z196_pmu_event_attr;
+		break;
+	case 0x2827:
+	case 0x2828:
+		model = cpumcf_zec12_pmu_event_attr;
+		break;
+	case 0x2964:
+	case 0x2965:
+		model = cpumcf_z13_pmu_event_attr;
+		break;
+	case 0x3906:
+	case 0x3907:
+		model = cpumcf_z14_pmu_event_attr;
+		break;
+	case 0x8561:
+	case 0x8562:
+		model = cpumcf_z15_pmu_event_attr;
+		break;
+	case 0x3931:
+	case 0x3932:
+		model = cpumcf_z16_pmu_event_attr;
+		break;
+	default:
+		model = none;
+		break;
+	}
+
+	combined = merge_attr(cfvn, csvn, model);
+	if (combined)
+		cpumcf_pmu_events_group.attrs = combined;
+	return cpumcf_pmu_attr_groups;
+}
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
new file mode 100644
index 0000000000..06efad5b4f
--- /dev/null
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -0,0 +1,2280 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Performance event support for the System z CPU-measurement Sampling Facility
+ *
+ * Copyright IBM Corp. 2013, 2018
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+#define KMSG_COMPONENT	"cpum_sf"
+#define pr_fmt(fmt)	KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/pid.h>
+#include <linux/notifier.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/moduleparam.h>
+#include <asm/cpu_mf.h>
+#include <asm/irq.h>
+#include <asm/debug.h>
+#include <asm/timex.h>
+#include <linux/io.h>
+
+/* Minimum number of sample-data-block-tables:
+ * At least one table is required for the sampling buffer structure.
+ * A single table contains up to 511 pointers to sample-data-blocks.
+ */
+#define CPUM_SF_MIN_SDBT	1
+
+/* Number of sample-data-blocks per sample-data-block-table (SDBT):
+ * A table contains SDB pointers (8 bytes) and one table-link entry
+ * that points to the origin of the next SDBT.
+ */
+#define CPUM_SF_SDB_PER_TABLE	((PAGE_SIZE - 8) / 8)
+
+/* Maximum page offset for an SDBT table-link entry:
+ * If this page offset is reached, a table-link entry to the next SDBT
+ * must be added.
+ */
+#define CPUM_SF_SDBT_TL_OFFSET	(CPUM_SF_SDB_PER_TABLE * 8)
+static inline int require_table_link(const void *sdbt)
+{
+	return ((unsigned long)sdbt & ~PAGE_MASK) == CPUM_SF_SDBT_TL_OFFSET;
+}
+
+/* Minimum and maximum sampling buffer sizes:
+ *
+ * This number represents the maximum size of the sampling buffer taking
+ * the number of sample-data-block-tables into account.  Note that these
+ * numbers apply to the basic-sampling function only.
+ * The maximum number of SDBs is increased by CPUM_SF_SDB_DIAG_FACTOR if
+ * the diagnostic-sampling function is active.
+ *
+ * Sampling buffer size		Buffer characteristics
+ * ---------------------------------------------------
+ *	 64KB		    ==	  16 pages (4KB per page)
+ *				   1 page  for SDB-tables
+ *				  15 pages for SDBs
+ *
+ *  32MB		    ==	8192 pages (4KB per page)
+ *				  16 pages for SDB-tables
+ *				8176 pages for SDBs
+ */
+static unsigned long __read_mostly CPUM_SF_MIN_SDB = 15;
+static unsigned long __read_mostly CPUM_SF_MAX_SDB = 8176;
+static unsigned long __read_mostly CPUM_SF_SDB_DIAG_FACTOR = 1;
+
+struct sf_buffer {
+	unsigned long	 *sdbt;	    /* Sample-data-block-table origin */
+	/* buffer characteristics (required for buffer increments) */
+	unsigned long  num_sdb;	    /* Number of sample-data-blocks */
+	unsigned long num_sdbt;	    /* Number of sample-data-block-tables */
+	unsigned long	 *tail;	    /* last sample-data-block-table */
+};
+
+struct aux_buffer {
+	struct sf_buffer sfb;
+	unsigned long head;	   /* index of SDB of buffer head */
+	unsigned long alert_mark;  /* index of SDB of alert request position */
+	unsigned long empty_mark;  /* mark of SDB not marked full */
+	unsigned long *sdb_index;  /* SDB address for fast lookup */
+	unsigned long *sdbt_index; /* SDBT address for fast lookup */
+};
+
+struct cpu_hw_sf {
+	/* CPU-measurement sampling information block */
+	struct hws_qsi_info_block qsi;
+	/* CPU-measurement sampling control block */
+	struct hws_lsctl_request_block lsctl;
+	struct sf_buffer sfb;	    /* Sampling buffer */
+	unsigned int flags;	    /* Status flags */
+	struct perf_event *event;   /* Scheduled perf event */
+	struct perf_output_handle handle; /* AUX buffer output handle */
+};
+static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf);
+
+/* Debug feature */
+static debug_info_t *sfdbg;
+
+/* Sampling control helper functions */
+static inline unsigned long freq_to_sample_rate(struct hws_qsi_info_block *qsi,
+						unsigned long freq)
+{
+	return (USEC_PER_SEC / freq) * qsi->cpu_speed;
+}
+
+static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi,
+						unsigned long rate)
+{
+	return USEC_PER_SEC * qsi->cpu_speed / rate;
+}
+
+/* Return TOD timestamp contained in an trailer entry */
+static inline unsigned long long trailer_timestamp(struct hws_trailer_entry *te)
+{
+	/* TOD in STCKE format */
+	if (te->header.t)
+		return *((unsigned long long *)&te->timestamp[1]);
+
+	/* TOD in STCK format */
+	return *((unsigned long long *)&te->timestamp[0]);
+}
+
+/* Return pointer to trailer entry of an sample data block */
+static inline struct hws_trailer_entry *trailer_entry_ptr(unsigned long v)
+{
+	void *ret;
+
+	ret = (void *)v;
+	ret += PAGE_SIZE;
+	ret -= sizeof(struct hws_trailer_entry);
+
+	return ret;
+}
+
+/*
+ * Return true if the entry in the sample data block table (sdbt)
+ * is a link to the next sdbt
+ */
+static inline int is_link_entry(unsigned long *s)
+{
+	return *s & 0x1UL ? 1 : 0;
+}
+
+/* Return pointer to the linked sdbt */
+static inline unsigned long *get_next_sdbt(unsigned long *s)
+{
+	return phys_to_virt(*s & ~0x1UL);
+}
+
+/*
+ * sf_disable() - Switch off sampling facility
+ */
+static int sf_disable(void)
+{
+	struct hws_lsctl_request_block sreq;
+
+	memset(&sreq, 0, sizeof(sreq));
+	return lsctl(&sreq);
+}
+
+/*
+ * sf_buffer_available() - Check for an allocated sampling buffer
+ */
+static int sf_buffer_available(struct cpu_hw_sf *cpuhw)
+{
+	return !!cpuhw->sfb.sdbt;
+}
+
+/*
+ * deallocate sampling facility buffer
+ */
+static void free_sampling_buffer(struct sf_buffer *sfb)
+{
+	unsigned long *sdbt, *curr;
+
+	if (!sfb->sdbt)
+		return;
+
+	sdbt = sfb->sdbt;
+	curr = sdbt;
+
+	/* Free the SDBT after all SDBs are processed... */
+	while (1) {
+		if (!*curr || !sdbt)
+			break;
+
+		/* Process table-link entries */
+		if (is_link_entry(curr)) {
+			curr = get_next_sdbt(curr);
+			if (sdbt)
+				free_page((unsigned long)sdbt);
+
+			/* If the origin is reached, sampling buffer is freed */
+			if (curr == sfb->sdbt)
+				break;
+			else
+				sdbt = curr;
+		} else {
+			/* Process SDB pointer */
+			if (*curr) {
+				free_page((unsigned long)phys_to_virt(*curr));
+				curr++;
+			}
+		}
+	}
+
+	debug_sprintf_event(sfdbg, 5, "%s: freed sdbt %#lx\n", __func__,
+			    (unsigned long)sfb->sdbt);
+	memset(sfb, 0, sizeof(*sfb));
+}
+
+static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags)
+{
+	struct hws_trailer_entry *te;
+	unsigned long sdb;
+
+	/* Allocate and initialize sample-data-block */
+	sdb = get_zeroed_page(gfp_flags);
+	if (!sdb)
+		return -ENOMEM;
+	te = trailer_entry_ptr(sdb);
+	te->header.a = 1;
+
+	/* Link SDB into the sample-data-block-table */
+	*sdbt = virt_to_phys((void *)sdb);
+
+	return 0;
+}
+
+/*
+ * realloc_sampling_buffer() - extend sampler memory
+ *
+ * Allocates new sample-data-blocks and adds them to the specified sampling
+ * buffer memory.
+ *
+ * Important: This modifies the sampling buffer and must be called when the
+ *	      sampling facility is disabled.
+ *
+ * Returns zero on success, non-zero otherwise.
+ */
+static int realloc_sampling_buffer(struct sf_buffer *sfb,
+				   unsigned long num_sdb, gfp_t gfp_flags)
+{
+	int i, rc;
+	unsigned long *new, *tail, *tail_prev = NULL;
+
+	if (!sfb->sdbt || !sfb->tail)
+		return -EINVAL;
+
+	if (!is_link_entry(sfb->tail))
+		return -EINVAL;
+
+	/* Append to the existing sampling buffer, overwriting the table-link
+	 * register.
+	 * The tail variables always points to the "tail" (last and table-link)
+	 * entry in an SDB-table.
+	 */
+	tail = sfb->tail;
+
+	/* Do a sanity check whether the table-link entry points to
+	 * the sampling buffer origin.
+	 */
+	if (sfb->sdbt != get_next_sdbt(tail)) {
+		debug_sprintf_event(sfdbg, 3, "%s: "
+				    "sampling buffer is not linked: origin %#lx"
+				    " tail %#lx\n", __func__,
+				    (unsigned long)sfb->sdbt,
+				    (unsigned long)tail);
+		return -EINVAL;
+	}
+
+	/* Allocate remaining SDBs */
+	rc = 0;
+	for (i = 0; i < num_sdb; i++) {
+		/* Allocate a new SDB-table if it is full. */
+		if (require_table_link(tail)) {
+			new = (unsigned long *)get_zeroed_page(gfp_flags);
+			if (!new) {
+				rc = -ENOMEM;
+				break;
+			}
+			sfb->num_sdbt++;
+			/* Link current page to tail of chain */
+			*tail = virt_to_phys((void *)new) + 1;
+			tail_prev = tail;
+			tail = new;
+		}
+
+		/* Allocate a new sample-data-block.
+		 * If there is not enough memory, stop the realloc process
+		 * and simply use what was allocated.  If this is a temporary
+		 * issue, a new realloc call (if required) might succeed.
+		 */
+		rc = alloc_sample_data_block(tail, gfp_flags);
+		if (rc) {
+			/* Undo last SDBT. An SDBT with no SDB at its first
+			 * entry but with an SDBT entry instead can not be
+			 * handled by the interrupt handler code.
+			 * Avoid this situation.
+			 */
+			if (tail_prev) {
+				sfb->num_sdbt--;
+				free_page((unsigned long)new);
+				tail = tail_prev;
+			}
+			break;
+		}
+		sfb->num_sdb++;
+		tail++;
+		tail_prev = new = NULL;	/* Allocated at least one SBD */
+	}
+
+	/* Link sampling buffer to its origin */
+	*tail = virt_to_phys(sfb->sdbt) + 1;
+	sfb->tail = tail;
+
+	debug_sprintf_event(sfdbg, 4, "%s: new buffer"
+			    " settings: sdbt %lu sdb %lu\n", __func__,
+			    sfb->num_sdbt, sfb->num_sdb);
+	return rc;
+}
+
+/*
+ * allocate_sampling_buffer() - allocate sampler memory
+ *
+ * Allocates and initializes a sampling buffer structure using the
+ * specified number of sample-data-blocks (SDB).  For each allocation,
+ * a 4K page is used.  The number of sample-data-block-tables (SDBT)
+ * are calculated from SDBs.
+ * Also set the ALERT_REQ mask in each SDBs trailer.
+ *
+ * Returns zero on success, non-zero otherwise.
+ */
+static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb)
+{
+	int rc;
+
+	if (sfb->sdbt)
+		return -EINVAL;
+
+	/* Allocate the sample-data-block-table origin */
+	sfb->sdbt = (unsigned long *)get_zeroed_page(GFP_KERNEL);
+	if (!sfb->sdbt)
+		return -ENOMEM;
+	sfb->num_sdb = 0;
+	sfb->num_sdbt = 1;
+
+	/* Link the table origin to point to itself to prepare for
+	 * realloc_sampling_buffer() invocation.
+	 */
+	sfb->tail = sfb->sdbt;
+	*sfb->tail = virt_to_phys((void *)sfb->sdbt) + 1;
+
+	/* Allocate requested number of sample-data-blocks */
+	rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL);
+	if (rc) {
+		free_sampling_buffer(sfb);
+		debug_sprintf_event(sfdbg, 4, "%s: "
+			"realloc_sampling_buffer failed with rc %i\n",
+			__func__, rc);
+	} else
+		debug_sprintf_event(sfdbg, 4,
+			"%s: tear %#lx dear %#lx\n", __func__,
+			(unsigned long)sfb->sdbt, (unsigned long)*sfb->sdbt);
+	return rc;
+}
+
+static void sfb_set_limits(unsigned long min, unsigned long max)
+{
+	struct hws_qsi_info_block si;
+
+	CPUM_SF_MIN_SDB = min;
+	CPUM_SF_MAX_SDB = max;
+
+	memset(&si, 0, sizeof(si));
+	if (!qsi(&si))
+		CPUM_SF_SDB_DIAG_FACTOR = DIV_ROUND_UP(si.dsdes, si.bsdes);
+}
+
+static unsigned long sfb_max_limit(struct hw_perf_event *hwc)
+{
+	return SAMPL_DIAG_MODE(hwc) ? CPUM_SF_MAX_SDB * CPUM_SF_SDB_DIAG_FACTOR
+				    : CPUM_SF_MAX_SDB;
+}
+
+static unsigned long sfb_pending_allocs(struct sf_buffer *sfb,
+					struct hw_perf_event *hwc)
+{
+	if (!sfb->sdbt)
+		return SFB_ALLOC_REG(hwc);
+	if (SFB_ALLOC_REG(hwc) > sfb->num_sdb)
+		return SFB_ALLOC_REG(hwc) - sfb->num_sdb;
+	return 0;
+}
+
+static int sfb_has_pending_allocs(struct sf_buffer *sfb,
+				   struct hw_perf_event *hwc)
+{
+	return sfb_pending_allocs(sfb, hwc) > 0;
+}
+
+static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc)
+{
+	/* Limit the number of SDBs to not exceed the maximum */
+	num = min_t(unsigned long, num, sfb_max_limit(hwc) - SFB_ALLOC_REG(hwc));
+	if (num)
+		SFB_ALLOC_REG(hwc) += num;
+}
+
+static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc)
+{
+	SFB_ALLOC_REG(hwc) = 0;
+	sfb_account_allocs(num, hwc);
+}
+
+static void deallocate_buffers(struct cpu_hw_sf *cpuhw)
+{
+	if (cpuhw->sfb.sdbt)
+		free_sampling_buffer(&cpuhw->sfb);
+}
+
+static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
+{
+	unsigned long n_sdb, freq;
+	size_t sample_size;
+
+	/* Calculate sampling buffers using 4K pages
+	 *
+	 *    1. The sampling size is 32 bytes for basic sampling. This size
+	 *	 is the same for all machine types. Diagnostic
+	 *	 sampling uses auxlilary data buffer setup which provides the
+	 *	 memory for SDBs using linux common code auxiliary trace
+	 *	 setup.
+	 *
+	 *    2. Function alloc_sampling_buffer() sets the Alert Request
+	 *	 Control indicator to trigger a measurement-alert to harvest
+	 *	 sample-data-blocks (SDB). This is done per SDB. This
+	 *	 measurement alert interrupt fires quick enough to handle
+	 *	 one SDB, on very high frequency and work loads there might
+	 *	 be 2 to 3 SBDs available for sample processing.
+	 *	 Currently there is no need for setup alert request on every
+	 *	 n-th page. This is counterproductive as one IRQ triggers
+	 *	 a very high number of samples to be processed at one IRQ.
+	 *
+	 *    3. Use the sampling frequency as input.
+	 *	 Compute the number of SDBs and ensure a minimum
+	 *	 of CPUM_SF_MIN_SDB.  Depending on frequency add some more
+	 *	 SDBs to handle a higher sampling rate.
+	 *	 Use a minimum of CPUM_SF_MIN_SDB and allow for 100 samples
+	 *	 (one SDB) for every 10000 HZ frequency increment.
+	 *
+	 *    4. Compute the number of sample-data-block-tables (SDBT) and
+	 *	 ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up
+	 *	 to 511 SDBs).
+	 */
+	sample_size = sizeof(struct hws_basic_entry);
+	freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc));
+	n_sdb = CPUM_SF_MIN_SDB + DIV_ROUND_UP(freq, 10000);
+
+	/* If there is already a sampling buffer allocated, it is very likely
+	 * that the sampling facility is enabled too.  If the event to be
+	 * initialized requires a greater sampling buffer, the allocation must
+	 * be postponed.  Changing the sampling buffer requires the sampling
+	 * facility to be in the disabled state.  So, account the number of
+	 * required SDBs and let cpumsf_pmu_enable() resize the buffer just
+	 * before the event is started.
+	 */
+	sfb_init_allocs(n_sdb, hwc);
+	if (sf_buffer_available(cpuhw))
+		return 0;
+
+	debug_sprintf_event(sfdbg, 3,
+			    "%s: rate %lu f %lu sdb %lu/%lu"
+			    " sample_size %lu cpuhw %p\n", __func__,
+			    SAMPL_RATE(hwc), freq, n_sdb, sfb_max_limit(hwc),
+			    sample_size, cpuhw);
+
+	return alloc_sampling_buffer(&cpuhw->sfb,
+				     sfb_pending_allocs(&cpuhw->sfb, hwc));
+}
+
+static unsigned long min_percent(unsigned int percent, unsigned long base,
+				 unsigned long min)
+{
+	return min_t(unsigned long, min, DIV_ROUND_UP(percent * base, 100));
+}
+
+static unsigned long compute_sfb_extent(unsigned long ratio, unsigned long base)
+{
+	/* Use a percentage-based approach to extend the sampling facility
+	 * buffer.  Accept up to 5% sample data loss.
+	 * Vary the extents between 1% to 5% of the current number of
+	 * sample-data-blocks.
+	 */
+	if (ratio <= 5)
+		return 0;
+	if (ratio <= 25)
+		return min_percent(1, base, 1);
+	if (ratio <= 50)
+		return min_percent(1, base, 1);
+	if (ratio <= 75)
+		return min_percent(2, base, 2);
+	if (ratio <= 100)
+		return min_percent(3, base, 3);
+	if (ratio <= 250)
+		return min_percent(4, base, 4);
+
+	return min_percent(5, base, 8);
+}
+
+static void sfb_account_overflows(struct cpu_hw_sf *cpuhw,
+				  struct hw_perf_event *hwc)
+{
+	unsigned long ratio, num;
+
+	if (!OVERFLOW_REG(hwc))
+		return;
+
+	/* The sample_overflow contains the average number of sample data
+	 * that has been lost because sample-data-blocks were full.
+	 *
+	 * Calculate the total number of sample data entries that has been
+	 * discarded.  Then calculate the ratio of lost samples to total samples
+	 * per second in percent.
+	 */
+	ratio = DIV_ROUND_UP(100 * OVERFLOW_REG(hwc) * cpuhw->sfb.num_sdb,
+			     sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc)));
+
+	/* Compute number of sample-data-blocks */
+	num = compute_sfb_extent(ratio, cpuhw->sfb.num_sdb);
+	if (num)
+		sfb_account_allocs(num, hwc);
+
+	debug_sprintf_event(sfdbg, 5, "%s: overflow %llu ratio %lu num %lu\n",
+			    __func__, OVERFLOW_REG(hwc), ratio, num);
+	OVERFLOW_REG(hwc) = 0;
+}
+
+/* extend_sampling_buffer() - Extend sampling buffer
+ * @sfb:	Sampling buffer structure (for local CPU)
+ * @hwc:	Perf event hardware structure
+ *
+ * Use this function to extend the sampling buffer based on the overflow counter
+ * and postponed allocation extents stored in the specified Perf event hardware.
+ *
+ * Important: This function disables the sampling facility in order to safely
+ *	      change the sampling buffer structure.  Do not call this function
+ *	      when the PMU is active.
+ */
+static void extend_sampling_buffer(struct sf_buffer *sfb,
+				   struct hw_perf_event *hwc)
+{
+	unsigned long num, num_old;
+	int rc;
+
+	num = sfb_pending_allocs(sfb, hwc);
+	if (!num)
+		return;
+	num_old = sfb->num_sdb;
+
+	/* Disable the sampling facility to reset any states and also
+	 * clear pending measurement alerts.
+	 */
+	sf_disable();
+
+	/* Extend the sampling buffer.
+	 * This memory allocation typically happens in an atomic context when
+	 * called by perf.  Because this is a reallocation, it is fine if the
+	 * new SDB-request cannot be satisfied immediately.
+	 */
+	rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC);
+	if (rc)
+		debug_sprintf_event(sfdbg, 5, "%s: realloc failed with rc %i\n",
+				    __func__, rc);
+
+	if (sfb_has_pending_allocs(sfb, hwc))
+		debug_sprintf_event(sfdbg, 5, "%s: "
+				    "req %lu alloc %lu remaining %lu\n",
+				    __func__, num, sfb->num_sdb - num_old,
+				    sfb_pending_allocs(sfb, hwc));
+}
+
+/* Number of perf events counting hardware events */
+static atomic_t num_events;
+/* Used to avoid races in calling reserve/release_cpumf_hardware */
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+#define PMC_INIT      0
+#define PMC_RELEASE   1
+#define PMC_FAILURE   2
+static void setup_pmc_cpu(void *flags)
+{
+	struct cpu_hw_sf *cpusf = this_cpu_ptr(&cpu_hw_sf);
+	int err = 0;
+
+	switch (*((int *)flags)) {
+	case PMC_INIT:
+		memset(cpusf, 0, sizeof(*cpusf));
+		err = qsi(&cpusf->qsi);
+		if (err)
+			break;
+		cpusf->flags |= PMU_F_RESERVED;
+		err = sf_disable();
+		break;
+	case PMC_RELEASE:
+		cpusf->flags &= ~PMU_F_RESERVED;
+		err = sf_disable();
+		if (!err)
+			deallocate_buffers(cpusf);
+		break;
+	}
+	if (err) {
+		*((int *)flags) |= PMC_FAILURE;
+		pr_err("Switching off the sampling facility failed with rc %i\n", err);
+	}
+}
+
+static void release_pmc_hardware(void)
+{
+	int flags = PMC_RELEASE;
+
+	irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+	on_each_cpu(setup_pmc_cpu, &flags, 1);
+}
+
+static int reserve_pmc_hardware(void)
+{
+	int flags = PMC_INIT;
+
+	on_each_cpu(setup_pmc_cpu, &flags, 1);
+	if (flags & PMC_FAILURE) {
+		release_pmc_hardware();
+		return -ENODEV;
+	}
+	irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+
+	return 0;
+}
+
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+	/* Release PMC if this is the last perf event */
+	if (!atomic_add_unless(&num_events, -1, 1)) {
+		mutex_lock(&pmc_reserve_mutex);
+		if (atomic_dec_return(&num_events) == 0)
+			release_pmc_hardware();
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+}
+
+static void hw_init_period(struct hw_perf_event *hwc, u64 period)
+{
+	hwc->sample_period = period;
+	hwc->last_period = hwc->sample_period;
+	local64_set(&hwc->period_left, hwc->sample_period);
+}
+
+static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si,
+				   unsigned long rate)
+{
+	return clamp_t(unsigned long, rate,
+		       si->min_sampl_rate, si->max_sampl_rate);
+}
+
+static u32 cpumsf_pid_type(struct perf_event *event,
+			   u32 pid, enum pid_type type)
+{
+	struct task_struct *tsk;
+
+	/* Idle process */
+	if (!pid)
+		goto out;
+
+	tsk = find_task_by_pid_ns(pid, &init_pid_ns);
+	pid = -1;
+	if (tsk) {
+		/*
+		 * Only top level events contain the pid namespace in which
+		 * they are created.
+		 */
+		if (event->parent)
+			event = event->parent;
+		pid = __task_pid_nr_ns(tsk, type, event->ns);
+		/*
+		 * See also 1d953111b648
+		 * "perf/core: Don't report zero PIDs for exiting tasks".
+		 */
+		if (!pid && !pid_alive(tsk))
+			pid = -1;
+	}
+out:
+	return pid;
+}
+
+static void cpumsf_output_event_pid(struct perf_event *event,
+				    struct perf_sample_data *data,
+				    struct pt_regs *regs)
+{
+	u32 pid;
+	struct perf_event_header header;
+	struct perf_output_handle handle;
+
+	/*
+	 * Obtain the PID from the basic-sampling data entry and
+	 * correct the data->tid_entry.pid value.
+	 */
+	pid = data->tid_entry.pid;
+
+	/* Protect callchain buffers, tasks */
+	rcu_read_lock();
+
+	perf_prepare_sample(data, event, regs);
+	perf_prepare_header(&header, data, event, regs);
+	if (perf_output_begin(&handle, data, event, header.size))
+		goto out;
+
+	/* Update the process ID (see also kernel/events/core.c) */
+	data->tid_entry.pid = cpumsf_pid_type(event, pid, PIDTYPE_TGID);
+	data->tid_entry.tid = cpumsf_pid_type(event, pid, PIDTYPE_PID);
+
+	perf_output_sample(&handle, &header, data, event);
+	perf_output_end(&handle);
+out:
+	rcu_read_unlock();
+}
+
+static unsigned long getrate(bool freq, unsigned long sample,
+			     struct hws_qsi_info_block *si)
+{
+	unsigned long rate;
+
+	if (freq) {
+		rate = freq_to_sample_rate(si, sample);
+		rate = hw_limit_rate(si, rate);
+	} else {
+		/* The min/max sampling rates specifies the valid range
+		 * of sample periods.  If the specified sample period is
+		 * out of range, limit the period to the range boundary.
+		 */
+		rate = hw_limit_rate(si, sample);
+
+		/* The perf core maintains a maximum sample rate that is
+		 * configurable through the sysctl interface.  Ensure the
+		 * sampling rate does not exceed this value.  This also helps
+		 * to avoid throttling when pushing samples with
+		 * perf_event_overflow().
+		 */
+		if (sample_rate_to_freq(si, rate) >
+		    sysctl_perf_event_sample_rate) {
+			debug_sprintf_event(sfdbg, 1, "%s: "
+					    "Sampling rate exceeds maximum "
+					    "perf sample rate\n", __func__);
+			rate = 0;
+		}
+	}
+	return rate;
+}
+
+/* The sampling information (si) contains information about the
+ * min/max sampling intervals and the CPU speed.  So calculate the
+ * correct sampling interval and avoid the whole period adjust
+ * feedback loop.
+ *
+ * Since the CPU Measurement sampling facility can not handle frequency
+ * calculate the sampling interval when frequency is specified using
+ * this formula:
+ *	interval := cpu_speed * 1000000 / sample_freq
+ *
+ * Returns errno on bad input and zero on success with parameter interval
+ * set to the correct sampling rate.
+ *
+ * Note: This function turns off freq bit to avoid calling function
+ * perf_adjust_period(). This causes frequency adjustment in the common
+ * code part which causes tremendous variations in the counter values.
+ */
+static int __hw_perf_event_init_rate(struct perf_event *event,
+				     struct hws_qsi_info_block *si)
+{
+	struct perf_event_attr *attr = &event->attr;
+	struct hw_perf_event *hwc = &event->hw;
+	unsigned long rate;
+
+	if (attr->freq) {
+		if (!attr->sample_freq)
+			return -EINVAL;
+		rate = getrate(attr->freq, attr->sample_freq, si);
+		attr->freq = 0;		/* Don't call  perf_adjust_period() */
+		SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FREQ_MODE;
+	} else {
+		rate = getrate(attr->freq, attr->sample_period, si);
+		if (!rate)
+			return -EINVAL;
+	}
+	attr->sample_period = rate;
+	SAMPL_RATE(hwc) = rate;
+	hw_init_period(hwc, SAMPL_RATE(hwc));
+	debug_sprintf_event(sfdbg, 4, "%s: cpu %d period %#llx freq %d,%#lx\n",
+			    __func__, event->cpu, event->attr.sample_period,
+			    event->attr.freq, SAMPLE_FREQ_MODE(hwc));
+	return 0;
+}
+
+static int __hw_perf_event_init(struct perf_event *event)
+{
+	struct cpu_hw_sf *cpuhw;
+	struct hws_qsi_info_block si;
+	struct perf_event_attr *attr = &event->attr;
+	struct hw_perf_event *hwc = &event->hw;
+	int cpu, err;
+
+	/* Reserve CPU-measurement sampling facility */
+	err = 0;
+	if (!atomic_inc_not_zero(&num_events)) {
+		mutex_lock(&pmc_reserve_mutex);
+		if (atomic_read(&num_events) == 0 && reserve_pmc_hardware())
+			err = -EBUSY;
+		else
+			atomic_inc(&num_events);
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+	event->destroy = hw_perf_event_destroy;
+
+	if (err)
+		goto out;
+
+	/* Access per-CPU sampling information (query sampling info) */
+	/*
+	 * The event->cpu value can be -1 to count on every CPU, for example,
+	 * when attaching to a task.  If this is specified, use the query
+	 * sampling info from the current CPU, otherwise use event->cpu to
+	 * retrieve the per-CPU information.
+	 * Later, cpuhw indicates whether to allocate sampling buffers for a
+	 * particular CPU (cpuhw!=NULL) or each online CPU (cpuw==NULL).
+	 */
+	memset(&si, 0, sizeof(si));
+	cpuhw = NULL;
+	if (event->cpu == -1)
+		qsi(&si);
+	else {
+		/* Event is pinned to a particular CPU, retrieve the per-CPU
+		 * sampling structure for accessing the CPU-specific QSI.
+		 */
+		cpuhw = &per_cpu(cpu_hw_sf, event->cpu);
+		si = cpuhw->qsi;
+	}
+
+	/* Check sampling facility authorization and, if not authorized,
+	 * fall back to other PMUs.  It is safe to check any CPU because
+	 * the authorization is identical for all configured CPUs.
+	 */
+	if (!si.as) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	if (si.ribm & CPU_MF_SF_RIBM_NOTAV) {
+		pr_warn("CPU Measurement Facility sampling is temporarily not available\n");
+		err = -EBUSY;
+		goto out;
+	}
+
+	/* Always enable basic sampling */
+	SAMPL_FLAGS(hwc) = PERF_CPUM_SF_BASIC_MODE;
+
+	/* Check if diagnostic sampling is requested.  Deny if the required
+	 * sampling authorization is missing.
+	 */
+	if (attr->config == PERF_EVENT_CPUM_SF_DIAG) {
+		if (!si.ad) {
+			err = -EPERM;
+			goto out;
+		}
+		SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_DIAG_MODE;
+	}
+
+	err =  __hw_perf_event_init_rate(event, &si);
+	if (err)
+		goto out;
+
+	/* Initialize sample data overflow accounting */
+	hwc->extra_reg.reg = REG_OVERFLOW;
+	OVERFLOW_REG(hwc) = 0;
+
+	/* Use AUX buffer. No need to allocate it by ourself */
+	if (attr->config == PERF_EVENT_CPUM_SF_DIAG)
+		return 0;
+
+	/* Allocate the per-CPU sampling buffer using the CPU information
+	 * from the event.  If the event is not pinned to a particular
+	 * CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling
+	 * buffers for each online CPU.
+	 */
+	if (cpuhw)
+		/* Event is pinned to a particular CPU */
+		err = allocate_buffers(cpuhw, hwc);
+	else {
+		/* Event is not pinned, allocate sampling buffer on
+		 * each online CPU
+		 */
+		for_each_online_cpu(cpu) {
+			cpuhw = &per_cpu(cpu_hw_sf, cpu);
+			err = allocate_buffers(cpuhw, hwc);
+			if (err)
+				break;
+		}
+	}
+
+	/* If PID/TID sampling is active, replace the default overflow
+	 * handler to extract and resolve the PIDs from the basic-sampling
+	 * data entries.
+	 */
+	if (event->attr.sample_type & PERF_SAMPLE_TID)
+		if (is_default_overflow_handler(event))
+			event->overflow_handler = cpumsf_output_event_pid;
+out:
+	return err;
+}
+
+static bool is_callchain_event(struct perf_event *event)
+{
+	u64 sample_type = event->attr.sample_type;
+
+	return sample_type & (PERF_SAMPLE_CALLCHAIN | PERF_SAMPLE_REGS_USER |
+			      PERF_SAMPLE_STACK_USER);
+}
+
+static int cpumsf_pmu_event_init(struct perf_event *event)
+{
+	int err;
+
+	/* No support for taken branch sampling */
+	/* No support for callchain, stacks and registers */
+	if (has_branch_stack(event) || is_callchain_event(event))
+		return -EOPNOTSUPP;
+
+	switch (event->attr.type) {
+	case PERF_TYPE_RAW:
+		if ((event->attr.config != PERF_EVENT_CPUM_SF) &&
+		    (event->attr.config != PERF_EVENT_CPUM_SF_DIAG))
+			return -ENOENT;
+		break;
+	case PERF_TYPE_HARDWARE:
+		/* Support sampling of CPU cycles in addition to the
+		 * counter facility.  However, the counter facility
+		 * is more precise and, hence, restrict this PMU to
+		 * sampling events only.
+		 */
+		if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES)
+			return -ENOENT;
+		if (!is_sampling_event(event))
+			return -ENOENT;
+		break;
+	default:
+		return -ENOENT;
+	}
+
+	/* Force reset of idle/hv excludes regardless of what the
+	 * user requested.
+	 */
+	if (event->attr.exclude_hv)
+		event->attr.exclude_hv = 0;
+	if (event->attr.exclude_idle)
+		event->attr.exclude_idle = 0;
+
+	err = __hw_perf_event_init(event);
+	if (unlikely(err))
+		if (event->destroy)
+			event->destroy(event);
+	return err;
+}
+
+static void cpumsf_pmu_enable(struct pmu *pmu)
+{
+	struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
+	struct hw_perf_event *hwc;
+	int err;
+
+	if (cpuhw->flags & PMU_F_ENABLED)
+		return;
+
+	if (cpuhw->flags & PMU_F_ERR_MASK)
+		return;
+
+	/* Check whether to extent the sampling buffer.
+	 *
+	 * Two conditions trigger an increase of the sampling buffer for a
+	 * perf event:
+	 *    1. Postponed buffer allocations from the event initialization.
+	 *    2. Sampling overflows that contribute to pending allocations.
+	 *
+	 * Note that the extend_sampling_buffer() function disables the sampling
+	 * facility, but it can be fully re-enabled using sampling controls that
+	 * have been saved in cpumsf_pmu_disable().
+	 */
+	if (cpuhw->event) {
+		hwc = &cpuhw->event->hw;
+		if (!(SAMPL_DIAG_MODE(hwc))) {
+			/*
+			 * Account number of overflow-designated
+			 * buffer extents
+			 */
+			sfb_account_overflows(cpuhw, hwc);
+			extend_sampling_buffer(&cpuhw->sfb, hwc);
+		}
+		/* Rate may be adjusted with ioctl() */
+		cpuhw->lsctl.interval = SAMPL_RATE(&cpuhw->event->hw);
+	}
+
+	/* (Re)enable the PMU and sampling facility */
+	cpuhw->flags |= PMU_F_ENABLED;
+	barrier();
+
+	err = lsctl(&cpuhw->lsctl);
+	if (err) {
+		cpuhw->flags &= ~PMU_F_ENABLED;
+		pr_err("Loading sampling controls failed: op 1 err %i\n", err);
+		return;
+	}
+
+	/* Load current program parameter */
+	lpp(&S390_lowcore.lpp);
+
+	debug_sprintf_event(sfdbg, 6, "%s: es %i cs %i ed %i cd %i "
+			    "interval %#lx tear %#lx dear %#lx\n", __func__,
+			    cpuhw->lsctl.es, cpuhw->lsctl.cs, cpuhw->lsctl.ed,
+			    cpuhw->lsctl.cd, cpuhw->lsctl.interval,
+			    cpuhw->lsctl.tear, cpuhw->lsctl.dear);
+}
+
+static void cpumsf_pmu_disable(struct pmu *pmu)
+{
+	struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
+	struct hws_lsctl_request_block inactive;
+	struct hws_qsi_info_block si;
+	int err;
+
+	if (!(cpuhw->flags & PMU_F_ENABLED))
+		return;
+
+	if (cpuhw->flags & PMU_F_ERR_MASK)
+		return;
+
+	/* Switch off sampling activation control */
+	inactive = cpuhw->lsctl;
+	inactive.cs = 0;
+	inactive.cd = 0;
+
+	err = lsctl(&inactive);
+	if (err) {
+		pr_err("Loading sampling controls failed: op 2 err %i\n", err);
+		return;
+	}
+
+	/* Save state of TEAR and DEAR register contents */
+	err = qsi(&si);
+	if (!err) {
+		/* TEAR/DEAR values are valid only if the sampling facility is
+		 * enabled.  Note that cpumsf_pmu_disable() might be called even
+		 * for a disabled sampling facility because cpumsf_pmu_enable()
+		 * controls the enable/disable state.
+		 */
+		if (si.es) {
+			cpuhw->lsctl.tear = si.tear;
+			cpuhw->lsctl.dear = si.dear;
+		}
+	} else
+		debug_sprintf_event(sfdbg, 3, "%s: qsi() failed with err %i\n",
+				    __func__, err);
+
+	cpuhw->flags &= ~PMU_F_ENABLED;
+}
+
+/* perf_exclude_event() - Filter event
+ * @event:	The perf event
+ * @regs:	pt_regs structure
+ * @sde_regs:	Sample-data-entry (sde) regs structure
+ *
+ * Filter perf events according to their exclude specification.
+ *
+ * Return non-zero if the event shall be excluded.
+ */
+static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs,
+			      struct perf_sf_sde_regs *sde_regs)
+{
+	if (event->attr.exclude_user && user_mode(regs))
+		return 1;
+	if (event->attr.exclude_kernel && !user_mode(regs))
+		return 1;
+	if (event->attr.exclude_guest && sde_regs->in_guest)
+		return 1;
+	if (event->attr.exclude_host && !sde_regs->in_guest)
+		return 1;
+	return 0;
+}
+
+/* perf_push_sample() - Push samples to perf
+ * @event:	The perf event
+ * @sample:	Hardware sample data
+ *
+ * Use the hardware sample data to create perf event sample.  The sample
+ * is the pushed to the event subsystem and the function checks for
+ * possible event overflows.  If an event overflow occurs, the PMU is
+ * stopped.
+ *
+ * Return non-zero if an event overflow occurred.
+ */
+static int perf_push_sample(struct perf_event *event,
+			    struct hws_basic_entry *basic)
+{
+	int overflow;
+	struct pt_regs regs;
+	struct perf_sf_sde_regs *sde_regs;
+	struct perf_sample_data data;
+
+	/* Setup perf sample */
+	perf_sample_data_init(&data, 0, event->hw.last_period);
+
+	/* Setup pt_regs to look like an CPU-measurement external interrupt
+	 * using the Program Request Alert code.  The regs.int_parm_long
+	 * field which is unused contains additional sample-data-entry related
+	 * indicators.
+	 */
+	memset(&regs, 0, sizeof(regs));
+	regs.int_code = 0x1407;
+	regs.int_parm = CPU_MF_INT_SF_PRA;
+	sde_regs = (struct perf_sf_sde_regs *) &regs.int_parm_long;
+
+	psw_bits(regs.psw).ia	= basic->ia;
+	psw_bits(regs.psw).dat	= basic->T;
+	psw_bits(regs.psw).wait = basic->W;
+	psw_bits(regs.psw).pstate = basic->P;
+	psw_bits(regs.psw).as	= basic->AS;
+
+	/*
+	 * Use the hardware provided configuration level to decide if the
+	 * sample belongs to a guest or host. If that is not available,
+	 * fall back to the following heuristics:
+	 * A non-zero guest program parameter always indicates a guest
+	 * sample. Some early samples or samples from guests without
+	 * lpp usage would be misaccounted to the host. We use the asn
+	 * value as an addon heuristic to detect most of these guest samples.
+	 * If the value differs from 0xffff (the host value), we assume to
+	 * be a KVM guest.
+	 */
+	switch (basic->CL) {
+	case 1: /* logical partition */
+		sde_regs->in_guest = 0;
+		break;
+	case 2: /* virtual machine */
+		sde_regs->in_guest = 1;
+		break;
+	default: /* old machine, use heuristics */
+		if (basic->gpp || basic->prim_asn != 0xffff)
+			sde_regs->in_guest = 1;
+		break;
+	}
+
+	/*
+	 * Store the PID value from the sample-data-entry to be
+	 * processed and resolved by cpumsf_output_event_pid().
+	 */
+	data.tid_entry.pid = basic->hpp & LPP_PID_MASK;
+
+	overflow = 0;
+	if (perf_exclude_event(event, &regs, sde_regs))
+		goto out;
+	if (perf_event_overflow(event, &data, &regs)) {
+		overflow = 1;
+		event->pmu->stop(event, 0);
+	}
+	perf_event_update_userpage(event);
+out:
+	return overflow;
+}
+
+static void perf_event_count_update(struct perf_event *event, u64 count)
+{
+	local64_add(count, &event->count);
+}
+
+/* hw_collect_samples() - Walk through a sample-data-block and collect samples
+ * @event:	The perf event
+ * @sdbt:	Sample-data-block table
+ * @overflow:	Event overflow counter
+ *
+ * Walks through a sample-data-block and collects sampling data entries that are
+ * then pushed to the perf event subsystem.  Depending on the sampling function,
+ * there can be either basic-sampling or combined-sampling data entries.  A
+ * combined-sampling data entry consists of a basic- and a diagnostic-sampling
+ * data entry.	The sampling function is determined by the flags in the perf
+ * event hardware structure.  The function always works with a combined-sampling
+ * data entry but ignores the the diagnostic portion if it is not available.
+ *
+ * Note that the implementation focuses on basic-sampling data entries and, if
+ * such an entry is not valid, the entire combined-sampling data entry is
+ * ignored.
+ *
+ * The overflow variables counts the number of samples that has been discarded
+ * due to a perf event overflow.
+ */
+static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
+			       unsigned long long *overflow)
+{
+	struct hws_trailer_entry *te;
+	struct hws_basic_entry *sample;
+
+	te = trailer_entry_ptr((unsigned long)sdbt);
+	sample = (struct hws_basic_entry *)sdbt;
+	while ((unsigned long *)sample < (unsigned long *)te) {
+		/* Check for an empty sample */
+		if (!sample->def || sample->LS)
+			break;
+
+		/* Update perf event period */
+		perf_event_count_update(event, SAMPL_RATE(&event->hw));
+
+		/* Check whether sample is valid */
+		if (sample->def == 0x0001) {
+			/* If an event overflow occurred, the PMU is stopped to
+			 * throttle event delivery.  Remaining sample data is
+			 * discarded.
+			 */
+			if (!*overflow) {
+				/* Check whether sample is consistent */
+				if (sample->I == 0 && sample->W == 0) {
+					/* Deliver sample data to perf */
+					*overflow = perf_push_sample(event,
+								     sample);
+				}
+			} else
+				/* Count discarded samples */
+				*overflow += 1;
+		} else {
+			debug_sprintf_event(sfdbg, 4,
+					    "%s: Found unknown"
+					    " sampling data entry: te->f %i"
+					    " basic.def %#4x (%p)\n", __func__,
+					    te->header.f, sample->def, sample);
+			/* Sample slot is not yet written or other record.
+			 *
+			 * This condition can occur if the buffer was reused
+			 * from a combined basic- and diagnostic-sampling.
+			 * If only basic-sampling is then active, entries are
+			 * written into the larger diagnostic entries.
+			 * This is typically the case for sample-data-blocks
+			 * that are not full.  Stop processing if the first
+			 * invalid format was detected.
+			 */
+			if (!te->header.f)
+				break;
+		}
+
+		/* Reset sample slot and advance to next sample */
+		sample->def = 0;
+		sample++;
+	}
+}
+
+/* hw_perf_event_update() - Process sampling buffer
+ * @event:	The perf event
+ * @flush_all:	Flag to also flush partially filled sample-data-blocks
+ *
+ * Processes the sampling buffer and create perf event samples.
+ * The sampling buffer position are retrieved and saved in the TEAR_REG
+ * register of the specified perf event.
+ *
+ * Only full sample-data-blocks are processed.	Specify the flush_all flag
+ * to also walk through partially filled sample-data-blocks.
+ */
+static void hw_perf_event_update(struct perf_event *event, int flush_all)
+{
+	unsigned long long event_overflow, sampl_overflow, num_sdb;
+	union hws_trailer_header old, prev, new;
+	struct hw_perf_event *hwc = &event->hw;
+	struct hws_trailer_entry *te;
+	unsigned long *sdbt, sdb;
+	int done;
+
+	/*
+	 * AUX buffer is used when in diagnostic sampling mode.
+	 * No perf events/samples are created.
+	 */
+	if (SAMPL_DIAG_MODE(&event->hw))
+		return;
+
+	sdbt = (unsigned long *)TEAR_REG(hwc);
+	done = event_overflow = sampl_overflow = num_sdb = 0;
+	while (!done) {
+		/* Get the trailer entry of the sample-data-block */
+		sdb = (unsigned long)phys_to_virt(*sdbt);
+		te = trailer_entry_ptr(sdb);
+
+		/* Leave loop if no more work to do (block full indicator) */
+		if (!te->header.f) {
+			done = 1;
+			if (!flush_all)
+				break;
+		}
+
+		/* Check the sample overflow count */
+		if (te->header.overflow)
+			/* Account sample overflows and, if a particular limit
+			 * is reached, extend the sampling buffer.
+			 * For details, see sfb_account_overflows().
+			 */
+			sampl_overflow += te->header.overflow;
+
+		/* Timestamps are valid for full sample-data-blocks only */
+		debug_sprintf_event(sfdbg, 6, "%s: sdbt %#lx/%#lx "
+				    "overflow %llu timestamp %#llx\n",
+				    __func__, sdb, (unsigned long)sdbt,
+				    te->header.overflow,
+				    (te->header.f) ? trailer_timestamp(te) : 0ULL);
+
+		/* Collect all samples from a single sample-data-block and
+		 * flag if an (perf) event overflow happened.  If so, the PMU
+		 * is stopped and remaining samples will be discarded.
+		 */
+		hw_collect_samples(event, (unsigned long *)sdb, &event_overflow);
+		num_sdb++;
+
+		/* Reset trailer (using compare-double-and-swap) */
+		prev.val = READ_ONCE_ALIGNED_128(te->header.val);
+		do {
+			old.val = prev.val;
+			new.val = prev.val;
+			new.f = 0;
+			new.a = 1;
+			new.overflow = 0;
+			prev.val = cmpxchg128(&te->header.val, old.val, new.val);
+		} while (prev.val != old.val);
+
+		/* Advance to next sample-data-block */
+		sdbt++;
+		if (is_link_entry(sdbt))
+			sdbt = get_next_sdbt(sdbt);
+
+		/* Update event hardware registers */
+		TEAR_REG(hwc) = (unsigned long) sdbt;
+
+		/* Stop processing sample-data if all samples of the current
+		 * sample-data-block were flushed even if it was not full.
+		 */
+		if (flush_all && done)
+			break;
+	}
+
+	/* Account sample overflows in the event hardware structure */
+	if (sampl_overflow)
+		OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) +
+						 sampl_overflow, 1 + num_sdb);
+
+	/* Perf_event_overflow() and perf_event_account_interrupt() limit
+	 * the interrupt rate to an upper limit. Roughly 1000 samples per
+	 * task tick.
+	 * Hitting this limit results in a large number
+	 * of throttled REF_REPORT_THROTTLE entries and the samples
+	 * are dropped.
+	 * Slightly increase the interval to avoid hitting this limit.
+	 */
+	if (event_overflow) {
+		SAMPL_RATE(hwc) += DIV_ROUND_UP(SAMPL_RATE(hwc), 10);
+		debug_sprintf_event(sfdbg, 1, "%s: rate adjustment %ld\n",
+				    __func__,
+				    DIV_ROUND_UP(SAMPL_RATE(hwc), 10));
+	}
+
+	if (sampl_overflow || event_overflow)
+		debug_sprintf_event(sfdbg, 4, "%s: "
+				    "overflows: sample %llu event %llu"
+				    " total %llu num_sdb %llu\n",
+				    __func__, sampl_overflow, event_overflow,
+				    OVERFLOW_REG(hwc), num_sdb);
+}
+
+static inline unsigned long aux_sdb_index(struct aux_buffer *aux,
+					  unsigned long i)
+{
+	return i % aux->sfb.num_sdb;
+}
+
+static inline unsigned long aux_sdb_num(unsigned long start, unsigned long end)
+{
+	return end >= start ? end - start + 1 : 0;
+}
+
+static inline unsigned long aux_sdb_num_alert(struct aux_buffer *aux)
+{
+	return aux_sdb_num(aux->head, aux->alert_mark);
+}
+
+static inline unsigned long aux_sdb_num_empty(struct aux_buffer *aux)
+{
+	return aux_sdb_num(aux->head, aux->empty_mark);
+}
+
+/*
+ * Get trailer entry by index of SDB.
+ */
+static struct hws_trailer_entry *aux_sdb_trailer(struct aux_buffer *aux,
+						 unsigned long index)
+{
+	unsigned long sdb;
+
+	index = aux_sdb_index(aux, index);
+	sdb = aux->sdb_index[index];
+	return trailer_entry_ptr(sdb);
+}
+
+/*
+ * Finish sampling on the cpu. Called by cpumsf_pmu_del() with pmu
+ * disabled. Collect the full SDBs in AUX buffer which have not reached
+ * the point of alert indicator. And ignore the SDBs which are not
+ * full.
+ *
+ * 1. Scan SDBs to see how much data is there and consume them.
+ * 2. Remove alert indicator in the buffer.
+ */
+static void aux_output_end(struct perf_output_handle *handle)
+{
+	unsigned long i, range_scan, idx;
+	struct aux_buffer *aux;
+	struct hws_trailer_entry *te;
+
+	aux = perf_get_aux(handle);
+	if (!aux)
+		return;
+
+	range_scan = aux_sdb_num_alert(aux);
+	for (i = 0, idx = aux->head; i < range_scan; i++, idx++) {
+		te = aux_sdb_trailer(aux, idx);
+		if (!te->header.f)
+			break;
+	}
+	/* i is num of SDBs which are full */
+	perf_aux_output_end(handle, i << PAGE_SHIFT);
+
+	/* Remove alert indicators in the buffer */
+	te = aux_sdb_trailer(aux, aux->alert_mark);
+	te->header.a = 0;
+
+	debug_sprintf_event(sfdbg, 6, "%s: SDBs %ld range %ld head %ld\n",
+			    __func__, i, range_scan, aux->head);
+}
+
+/*
+ * Start sampling on the CPU. Called by cpumsf_pmu_add() when an event
+ * is first added to the CPU or rescheduled again to the CPU. It is called
+ * with pmu disabled.
+ *
+ * 1. Reset the trailer of SDBs to get ready for new data.
+ * 2. Tell the hardware where to put the data by reset the SDBs buffer
+ *    head(tear/dear).
+ */
+static int aux_output_begin(struct perf_output_handle *handle,
+			    struct aux_buffer *aux,
+			    struct cpu_hw_sf *cpuhw)
+{
+	unsigned long range, i, range_scan, idx, head, base, offset;
+	struct hws_trailer_entry *te;
+
+	if (WARN_ON_ONCE(handle->head & ~PAGE_MASK))
+		return -EINVAL;
+
+	aux->head = handle->head >> PAGE_SHIFT;
+	range = (handle->size + 1) >> PAGE_SHIFT;
+	if (range <= 1)
+		return -ENOMEM;
+
+	/*
+	 * SDBs between aux->head and aux->empty_mark are already ready
+	 * for new data. range_scan is num of SDBs not within them.
+	 */
+	debug_sprintf_event(sfdbg, 6,
+			    "%s: range %ld head %ld alert %ld empty %ld\n",
+			    __func__, range, aux->head, aux->alert_mark,
+			    aux->empty_mark);
+	if (range > aux_sdb_num_empty(aux)) {
+		range_scan = range - aux_sdb_num_empty(aux);
+		idx = aux->empty_mark + 1;
+		for (i = 0; i < range_scan; i++, idx++) {
+			te = aux_sdb_trailer(aux, idx);
+			te->header.f = 0;
+			te->header.a = 0;
+			te->header.overflow = 0;
+		}
+		/* Save the position of empty SDBs */
+		aux->empty_mark = aux->head + range - 1;
+	}
+
+	/* Set alert indicator */
+	aux->alert_mark = aux->head + range/2 - 1;
+	te = aux_sdb_trailer(aux, aux->alert_mark);
+	te->header.a = 1;
+
+	/* Reset hardware buffer head */
+	head = aux_sdb_index(aux, aux->head);
+	base = aux->sdbt_index[head / CPUM_SF_SDB_PER_TABLE];
+	offset = head % CPUM_SF_SDB_PER_TABLE;
+	cpuhw->lsctl.tear = virt_to_phys((void *)base) + offset * sizeof(unsigned long);
+	cpuhw->lsctl.dear = virt_to_phys((void *)aux->sdb_index[head]);
+
+	debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld empty %ld "
+			    "index %ld tear %#lx dear %#lx\n", __func__,
+			    aux->head, aux->alert_mark, aux->empty_mark,
+			    head / CPUM_SF_SDB_PER_TABLE,
+			    cpuhw->lsctl.tear, cpuhw->lsctl.dear);
+
+	return 0;
+}
+
+/*
+ * Set alert indicator on SDB at index @alert_index while sampler is running.
+ *
+ * Return true if successfully.
+ * Return false if full indicator is already set by hardware sampler.
+ */
+static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index,
+			  unsigned long long *overflow)
+{
+	union hws_trailer_header old, prev, new;
+	struct hws_trailer_entry *te;
+
+	te = aux_sdb_trailer(aux, alert_index);
+	prev.val = READ_ONCE_ALIGNED_128(te->header.val);
+	do {
+		old.val = prev.val;
+		new.val = prev.val;
+		*overflow = old.overflow;
+		if (old.f) {
+			/*
+			 * SDB is already set by hardware.
+			 * Abort and try to set somewhere
+			 * behind.
+			 */
+			return false;
+		}
+		new.a = 1;
+		new.overflow = 0;
+		prev.val = cmpxchg128(&te->header.val, old.val, new.val);
+	} while (prev.val != old.val);
+	return true;
+}
+
+/*
+ * aux_reset_buffer() - Scan and setup SDBs for new samples
+ * @aux:	The AUX buffer to set
+ * @range:	The range of SDBs to scan started from aux->head
+ * @overflow:	Set to overflow count
+ *
+ * Set alert indicator on the SDB at index of aux->alert_mark. If this SDB is
+ * marked as empty, check if it is already set full by the hardware sampler.
+ * If yes, that means new data is already there before we can set an alert
+ * indicator. Caller should try to set alert indicator to some position behind.
+ *
+ * Scan the SDBs in AUX buffer from behind aux->empty_mark. They are used
+ * previously and have already been consumed by user space. Reset these SDBs
+ * (clear full indicator and alert indicator) for new data.
+ * If aux->alert_mark fall in this area, just set it. Overflow count is
+ * recorded while scanning.
+ *
+ * SDBs between aux->head and aux->empty_mark are already reset at last time.
+ * and ready for new samples. So scanning on this area could be skipped.
+ *
+ * Return true if alert indicator is set successfully and false if not.
+ */
+static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
+			     unsigned long long *overflow)
+{
+	unsigned long i, range_scan, idx, idx_old;
+	union hws_trailer_header old, prev, new;
+	unsigned long long orig_overflow;
+	struct hws_trailer_entry *te;
+
+	debug_sprintf_event(sfdbg, 6, "%s: range %ld head %ld alert %ld "
+			    "empty %ld\n", __func__, range, aux->head,
+			    aux->alert_mark, aux->empty_mark);
+	if (range <= aux_sdb_num_empty(aux))
+		/*
+		 * No need to scan. All SDBs in range are marked as empty.
+		 * Just set alert indicator. Should check race with hardware
+		 * sampler.
+		 */
+		return aux_set_alert(aux, aux->alert_mark, overflow);
+
+	if (aux->alert_mark <= aux->empty_mark)
+		/*
+		 * Set alert indicator on empty SDB. Should check race
+		 * with hardware sampler.
+		 */
+		if (!aux_set_alert(aux, aux->alert_mark, overflow))
+			return false;
+
+	/*
+	 * Scan the SDBs to clear full and alert indicator used previously.
+	 * Start scanning from one SDB behind empty_mark. If the new alert
+	 * indicator fall into this range, set it.
+	 */
+	range_scan = range - aux_sdb_num_empty(aux);
+	idx_old = idx = aux->empty_mark + 1;
+	for (i = 0; i < range_scan; i++, idx++) {
+		te = aux_sdb_trailer(aux, idx);
+		prev.val = READ_ONCE_ALIGNED_128(te->header.val);
+		do {
+			old.val = prev.val;
+			new.val = prev.val;
+			orig_overflow = old.overflow;
+			new.f = 0;
+			new.overflow = 0;
+			if (idx == aux->alert_mark)
+				new.a = 1;
+			else
+				new.a = 0;
+			prev.val = cmpxchg128(&te->header.val, old.val, new.val);
+		} while (prev.val != old.val);
+		*overflow += orig_overflow;
+	}
+
+	/* Update empty_mark to new position */
+	aux->empty_mark = aux->head + range - 1;
+
+	debug_sprintf_event(sfdbg, 6, "%s: range_scan %ld idx %ld..%ld "
+			    "empty %ld\n", __func__, range_scan, idx_old,
+			    idx - 1, aux->empty_mark);
+	return true;
+}
+
+/*
+ * Measurement alert handler for diagnostic mode sampling.
+ */
+static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
+{
+	struct aux_buffer *aux;
+	int done = 0;
+	unsigned long range = 0, size;
+	unsigned long long overflow = 0;
+	struct perf_output_handle *handle = &cpuhw->handle;
+	unsigned long num_sdb;
+
+	aux = perf_get_aux(handle);
+	if (WARN_ON_ONCE(!aux))
+		return;
+
+	/* Inform user space new data arrived */
+	size = aux_sdb_num_alert(aux) << PAGE_SHIFT;
+	debug_sprintf_event(sfdbg, 6, "%s: #alert %ld\n", __func__,
+			    size >> PAGE_SHIFT);
+	perf_aux_output_end(handle, size);
+
+	num_sdb = aux->sfb.num_sdb;
+	while (!done) {
+		/* Get an output handle */
+		aux = perf_aux_output_begin(handle, cpuhw->event);
+		if (handle->size == 0) {
+			pr_err("The AUX buffer with %lu pages for the "
+			       "diagnostic-sampling mode is full\n",
+				num_sdb);
+			break;
+		}
+		if (WARN_ON_ONCE(!aux))
+			return;
+
+		/* Update head and alert_mark to new position */
+		aux->head = handle->head >> PAGE_SHIFT;
+		range = (handle->size + 1) >> PAGE_SHIFT;
+		if (range == 1)
+			aux->alert_mark = aux->head;
+		else
+			aux->alert_mark = aux->head + range/2 - 1;
+
+		if (aux_reset_buffer(aux, range, &overflow)) {
+			if (!overflow) {
+				done = 1;
+				break;
+			}
+			size = range << PAGE_SHIFT;
+			perf_aux_output_end(&cpuhw->handle, size);
+			pr_err("Sample data caused the AUX buffer with %lu "
+			       "pages to overflow\n", aux->sfb.num_sdb);
+			debug_sprintf_event(sfdbg, 1, "%s: head %ld range %ld "
+					    "overflow %lld\n", __func__,
+					    aux->head, range, overflow);
+		} else {
+			size = aux_sdb_num_alert(aux) << PAGE_SHIFT;
+			perf_aux_output_end(&cpuhw->handle, size);
+			debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld "
+					    "already full, try another\n",
+					    __func__,
+					    aux->head, aux->alert_mark);
+		}
+	}
+
+	if (done)
+		debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld "
+				    "empty %ld\n", __func__, aux->head,
+				    aux->alert_mark, aux->empty_mark);
+}
+
+/*
+ * Callback when freeing AUX buffers.
+ */
+static void aux_buffer_free(void *data)
+{
+	struct aux_buffer *aux = data;
+	unsigned long i, num_sdbt;
+
+	if (!aux)
+		return;
+
+	/* Free SDBT. SDB is freed by the caller */
+	num_sdbt = aux->sfb.num_sdbt;
+	for (i = 0; i < num_sdbt; i++)
+		free_page(aux->sdbt_index[i]);
+
+	kfree(aux->sdbt_index);
+	kfree(aux->sdb_index);
+	kfree(aux);
+
+	debug_sprintf_event(sfdbg, 4, "%s: SDBTs %lu\n", __func__, num_sdbt);
+}
+
+static void aux_sdb_init(unsigned long sdb)
+{
+	struct hws_trailer_entry *te;
+
+	te = trailer_entry_ptr(sdb);
+
+	/* Save clock base */
+	te->clock_base = 1;
+	te->progusage2 = tod_clock_base.tod;
+}
+
+/*
+ * aux_buffer_setup() - Setup AUX buffer for diagnostic mode sampling
+ * @event:	Event the buffer is setup for, event->cpu == -1 means current
+ * @pages:	Array of pointers to buffer pages passed from perf core
+ * @nr_pages:	Total pages
+ * @snapshot:	Flag for snapshot mode
+ *
+ * This is the callback when setup an event using AUX buffer. Perf tool can
+ * trigger this by an additional mmap() call on the event. Unlike the buffer
+ * for basic samples, AUX buffer belongs to the event. It is scheduled with
+ * the task among online cpus when it is a per-thread event.
+ *
+ * Return the private AUX buffer structure if success or NULL if fails.
+ */
+static void *aux_buffer_setup(struct perf_event *event, void **pages,
+			      int nr_pages, bool snapshot)
+{
+	struct sf_buffer *sfb;
+	struct aux_buffer *aux;
+	unsigned long *new, *tail;
+	int i, n_sdbt;
+
+	if (!nr_pages || !pages)
+		return NULL;
+
+	if (nr_pages > CPUM_SF_MAX_SDB * CPUM_SF_SDB_DIAG_FACTOR) {
+		pr_err("AUX buffer size (%i pages) is larger than the "
+		       "maximum sampling buffer limit\n",
+		       nr_pages);
+		return NULL;
+	} else if (nr_pages < CPUM_SF_MIN_SDB * CPUM_SF_SDB_DIAG_FACTOR) {
+		pr_err("AUX buffer size (%i pages) is less than the "
+		       "minimum sampling buffer limit\n",
+		       nr_pages);
+		return NULL;
+	}
+
+	/* Allocate aux_buffer struct for the event */
+	aux = kzalloc(sizeof(struct aux_buffer), GFP_KERNEL);
+	if (!aux)
+		goto no_aux;
+	sfb = &aux->sfb;
+
+	/* Allocate sdbt_index for fast reference */
+	n_sdbt = DIV_ROUND_UP(nr_pages, CPUM_SF_SDB_PER_TABLE);
+	aux->sdbt_index = kmalloc_array(n_sdbt, sizeof(void *), GFP_KERNEL);
+	if (!aux->sdbt_index)
+		goto no_sdbt_index;
+
+	/* Allocate sdb_index for fast reference */
+	aux->sdb_index = kmalloc_array(nr_pages, sizeof(void *), GFP_KERNEL);
+	if (!aux->sdb_index)
+		goto no_sdb_index;
+
+	/* Allocate the first SDBT */
+	sfb->num_sdbt = 0;
+	sfb->sdbt = (unsigned long *)get_zeroed_page(GFP_KERNEL);
+	if (!sfb->sdbt)
+		goto no_sdbt;
+	aux->sdbt_index[sfb->num_sdbt++] = (unsigned long)sfb->sdbt;
+	tail = sfb->tail = sfb->sdbt;
+
+	/*
+	 * Link the provided pages of AUX buffer to SDBT.
+	 * Allocate SDBT if needed.
+	 */
+	for (i = 0; i < nr_pages; i++, tail++) {
+		if (require_table_link(tail)) {
+			new = (unsigned long *)get_zeroed_page(GFP_KERNEL);
+			if (!new)
+				goto no_sdbt;
+			aux->sdbt_index[sfb->num_sdbt++] = (unsigned long)new;
+			/* Link current page to tail of chain */
+			*tail = virt_to_phys(new) + 1;
+			tail = new;
+		}
+		/* Tail is the entry in a SDBT */
+		*tail = virt_to_phys(pages[i]);
+		aux->sdb_index[i] = (unsigned long)pages[i];
+		aux_sdb_init((unsigned long)pages[i]);
+	}
+	sfb->num_sdb = nr_pages;
+
+	/* Link the last entry in the SDBT to the first SDBT */
+	*tail = virt_to_phys(sfb->sdbt) + 1;
+	sfb->tail = tail;
+
+	/*
+	 * Initial all SDBs are zeroed. Mark it as empty.
+	 * So there is no need to clear the full indicator
+	 * when this event is first added.
+	 */
+	aux->empty_mark = sfb->num_sdb - 1;
+
+	debug_sprintf_event(sfdbg, 4, "%s: SDBTs %lu SDBs %lu\n", __func__,
+			    sfb->num_sdbt, sfb->num_sdb);
+
+	return aux;
+
+no_sdbt:
+	/* SDBs (AUX buffer pages) are freed by caller */
+	for (i = 0; i < sfb->num_sdbt; i++)
+		free_page(aux->sdbt_index[i]);
+	kfree(aux->sdb_index);
+no_sdb_index:
+	kfree(aux->sdbt_index);
+no_sdbt_index:
+	kfree(aux);
+no_aux:
+	return NULL;
+}
+
+static void cpumsf_pmu_read(struct perf_event *event)
+{
+	/* Nothing to do ... updates are interrupt-driven */
+}
+
+/* Check if the new sampling period/frequency is appropriate.
+ *
+ * Return non-zero on error and zero on passed checks.
+ */
+static int cpumsf_pmu_check_period(struct perf_event *event, u64 value)
+{
+	struct hws_qsi_info_block si;
+	unsigned long rate;
+	bool do_freq;
+
+	memset(&si, 0, sizeof(si));
+	if (event->cpu == -1) {
+		if (qsi(&si))
+			return -ENODEV;
+	} else {
+		/* Event is pinned to a particular CPU, retrieve the per-CPU
+		 * sampling structure for accessing the CPU-specific QSI.
+		 */
+		struct cpu_hw_sf *cpuhw = &per_cpu(cpu_hw_sf, event->cpu);
+
+		si = cpuhw->qsi;
+	}
+
+	do_freq = !!SAMPLE_FREQ_MODE(&event->hw);
+	rate = getrate(do_freq, value, &si);
+	if (!rate)
+		return -EINVAL;
+
+	event->attr.sample_period = rate;
+	SAMPL_RATE(&event->hw) = rate;
+	hw_init_period(&event->hw, SAMPL_RATE(&event->hw));
+	debug_sprintf_event(sfdbg, 4, "%s:"
+			    " cpu %d value %#llx period %#llx freq %d\n",
+			    __func__, event->cpu, value,
+			    event->attr.sample_period, do_freq);
+	return 0;
+}
+
+/* Activate sampling control.
+ * Next call of pmu_enable() starts sampling.
+ */
+static void cpumsf_pmu_start(struct perf_event *event, int flags)
+{
+	struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
+
+	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+		return;
+
+	if (flags & PERF_EF_RELOAD)
+		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+
+	perf_pmu_disable(event->pmu);
+	event->hw.state = 0;
+	cpuhw->lsctl.cs = 1;
+	if (SAMPL_DIAG_MODE(&event->hw))
+		cpuhw->lsctl.cd = 1;
+	perf_pmu_enable(event->pmu);
+}
+
+/* Deactivate sampling control.
+ * Next call of pmu_enable() stops sampling.
+ */
+static void cpumsf_pmu_stop(struct perf_event *event, int flags)
+{
+	struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
+
+	if (event->hw.state & PERF_HES_STOPPED)
+		return;
+
+	perf_pmu_disable(event->pmu);
+	cpuhw->lsctl.cs = 0;
+	cpuhw->lsctl.cd = 0;
+	event->hw.state |= PERF_HES_STOPPED;
+
+	if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) {
+		hw_perf_event_update(event, 1);
+		event->hw.state |= PERF_HES_UPTODATE;
+	}
+	perf_pmu_enable(event->pmu);
+}
+
+static int cpumsf_pmu_add(struct perf_event *event, int flags)
+{
+	struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
+	struct aux_buffer *aux;
+	int err;
+
+	if (cpuhw->flags & PMU_F_IN_USE)
+		return -EAGAIN;
+
+	if (!SAMPL_DIAG_MODE(&event->hw) && !cpuhw->sfb.sdbt)
+		return -EINVAL;
+
+	err = 0;
+	perf_pmu_disable(event->pmu);
+
+	event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+	/* Set up sampling controls.  Always program the sampling register
+	 * using the SDB-table start.  Reset TEAR_REG event hardware register
+	 * that is used by hw_perf_event_update() to store the sampling buffer
+	 * position after samples have been flushed.
+	 */
+	cpuhw->lsctl.s = 0;
+	cpuhw->lsctl.h = 1;
+	cpuhw->lsctl.interval = SAMPL_RATE(&event->hw);
+	if (!SAMPL_DIAG_MODE(&event->hw)) {
+		cpuhw->lsctl.tear = virt_to_phys(cpuhw->sfb.sdbt);
+		cpuhw->lsctl.dear = *(unsigned long *)cpuhw->sfb.sdbt;
+		TEAR_REG(&event->hw) = (unsigned long)cpuhw->sfb.sdbt;
+	}
+
+	/* Ensure sampling functions are in the disabled state.  If disabled,
+	 * switch on sampling enable control. */
+	if (WARN_ON_ONCE(cpuhw->lsctl.es == 1 || cpuhw->lsctl.ed == 1)) {
+		err = -EAGAIN;
+		goto out;
+	}
+	if (SAMPL_DIAG_MODE(&event->hw)) {
+		aux = perf_aux_output_begin(&cpuhw->handle, event);
+		if (!aux) {
+			err = -EINVAL;
+			goto out;
+		}
+		err = aux_output_begin(&cpuhw->handle, aux, cpuhw);
+		if (err)
+			goto out;
+		cpuhw->lsctl.ed = 1;
+	}
+	cpuhw->lsctl.es = 1;
+
+	/* Set in_use flag and store event */
+	cpuhw->event = event;
+	cpuhw->flags |= PMU_F_IN_USE;
+
+	if (flags & PERF_EF_START)
+		cpumsf_pmu_start(event, PERF_EF_RELOAD);
+out:
+	perf_event_update_userpage(event);
+	perf_pmu_enable(event->pmu);
+	return err;
+}
+
+static void cpumsf_pmu_del(struct perf_event *event, int flags)
+{
+	struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
+
+	perf_pmu_disable(event->pmu);
+	cpumsf_pmu_stop(event, PERF_EF_UPDATE);
+
+	cpuhw->lsctl.es = 0;
+	cpuhw->lsctl.ed = 0;
+	cpuhw->flags &= ~PMU_F_IN_USE;
+	cpuhw->event = NULL;
+
+	if (SAMPL_DIAG_MODE(&event->hw))
+		aux_output_end(&cpuhw->handle);
+	perf_event_update_userpage(event);
+	perf_pmu_enable(event->pmu);
+}
+
+CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF);
+CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG);
+
+/* Attribute list for CPU_SF.
+ *
+ * The availablitiy depends on the CPU_MF sampling facility authorization
+ * for basic + diagnositic samples. This is determined at initialization
+ * time by the sampling facility device driver.
+ * If the authorization for basic samples is turned off, it should be
+ * also turned off for diagnostic sampling.
+ *
+ * During initialization of the device driver, check the authorization
+ * level for diagnostic sampling and installs the attribute
+ * file for diagnostic sampling if necessary.
+ *
+ * For now install a placeholder to reference all possible attributes:
+ * SF_CYCLES_BASIC and SF_CYCLES_BASIC_DIAG.
+ * Add another entry for the final NULL pointer.
+ */
+enum {
+	SF_CYCLES_BASIC_ATTR_IDX = 0,
+	SF_CYCLES_BASIC_DIAG_ATTR_IDX,
+	SF_CYCLES_ATTR_MAX
+};
+
+static struct attribute *cpumsf_pmu_events_attr[SF_CYCLES_ATTR_MAX + 1] = {
+	[SF_CYCLES_BASIC_ATTR_IDX] = CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC)
+};
+
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *cpumsf_pmu_format_attr[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static struct attribute_group cpumsf_pmu_events_group = {
+	.name = "events",
+	.attrs = cpumsf_pmu_events_attr,
+};
+
+static struct attribute_group cpumsf_pmu_format_group = {
+	.name = "format",
+	.attrs = cpumsf_pmu_format_attr,
+};
+
+static const struct attribute_group *cpumsf_pmu_attr_groups[] = {
+	&cpumsf_pmu_events_group,
+	&cpumsf_pmu_format_group,
+	NULL,
+};
+
+static struct pmu cpumf_sampling = {
+	.pmu_enable   = cpumsf_pmu_enable,
+	.pmu_disable  = cpumsf_pmu_disable,
+
+	.event_init   = cpumsf_pmu_event_init,
+	.add	      = cpumsf_pmu_add,
+	.del	      = cpumsf_pmu_del,
+
+	.start	      = cpumsf_pmu_start,
+	.stop	      = cpumsf_pmu_stop,
+	.read	      = cpumsf_pmu_read,
+
+	.attr_groups  = cpumsf_pmu_attr_groups,
+
+	.setup_aux    = aux_buffer_setup,
+	.free_aux     = aux_buffer_free,
+
+	.check_period = cpumsf_pmu_check_period,
+};
+
+static void cpumf_measurement_alert(struct ext_code ext_code,
+				    unsigned int alert, unsigned long unused)
+{
+	struct cpu_hw_sf *cpuhw;
+
+	if (!(alert & CPU_MF_INT_SF_MASK))
+		return;
+	inc_irq_stat(IRQEXT_CMS);
+	cpuhw = this_cpu_ptr(&cpu_hw_sf);
+
+	/* Measurement alerts are shared and might happen when the PMU
+	 * is not reserved.  Ignore these alerts in this case. */
+	if (!(cpuhw->flags & PMU_F_RESERVED))
+		return;
+
+	/* The processing below must take care of multiple alert events that
+	 * might be indicated concurrently. */
+
+	/* Program alert request */
+	if (alert & CPU_MF_INT_SF_PRA) {
+		if (cpuhw->flags & PMU_F_IN_USE)
+			if (SAMPL_DIAG_MODE(&cpuhw->event->hw))
+				hw_collect_aux(cpuhw);
+			else
+				hw_perf_event_update(cpuhw->event, 0);
+		else
+			WARN_ON_ONCE(!(cpuhw->flags & PMU_F_IN_USE));
+	}
+
+	/* Report measurement alerts only for non-PRA codes */
+	if (alert != CPU_MF_INT_SF_PRA)
+		debug_sprintf_event(sfdbg, 6, "%s: alert %#x\n", __func__,
+				    alert);
+
+	/* Sampling authorization change request */
+	if (alert & CPU_MF_INT_SF_SACA)
+		qsi(&cpuhw->qsi);
+
+	/* Loss of sample data due to high-priority machine activities */
+	if (alert & CPU_MF_INT_SF_LSDA) {
+		pr_err("Sample data was lost\n");
+		cpuhw->flags |= PMU_F_ERR_LSDA;
+		sf_disable();
+	}
+
+	/* Invalid sampling buffer entry */
+	if (alert & (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE)) {
+		pr_err("A sampling buffer entry is incorrect (alert=0x%x)\n",
+		       alert);
+		cpuhw->flags |= PMU_F_ERR_IBE;
+		sf_disable();
+	}
+}
+
+static int cpusf_pmu_setup(unsigned int cpu, int flags)
+{
+	/* Ignore the notification if no events are scheduled on the PMU.
+	 * This might be racy...
+	 */
+	if (!atomic_read(&num_events))
+		return 0;
+
+	local_irq_disable();
+	setup_pmc_cpu(&flags);
+	local_irq_enable();
+	return 0;
+}
+
+static int s390_pmu_sf_online_cpu(unsigned int cpu)
+{
+	return cpusf_pmu_setup(cpu, PMC_INIT);
+}
+
+static int s390_pmu_sf_offline_cpu(unsigned int cpu)
+{
+	return cpusf_pmu_setup(cpu, PMC_RELEASE);
+}
+
+static int param_get_sfb_size(char *buffer, const struct kernel_param *kp)
+{
+	if (!cpum_sf_avail())
+		return -ENODEV;
+	return sprintf(buffer, "%lu,%lu", CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
+}
+
+static int param_set_sfb_size(const char *val, const struct kernel_param *kp)
+{
+	int rc;
+	unsigned long min, max;
+
+	if (!cpum_sf_avail())
+		return -ENODEV;
+	if (!val || !strlen(val))
+		return -EINVAL;
+
+	/* Valid parameter values: "min,max" or "max" */
+	min = CPUM_SF_MIN_SDB;
+	max = CPUM_SF_MAX_SDB;
+	if (strchr(val, ','))
+		rc = (sscanf(val, "%lu,%lu", &min, &max) == 2) ? 0 : -EINVAL;
+	else
+		rc = kstrtoul(val, 10, &max);
+
+	if (min < 2 || min >= max || max > get_num_physpages())
+		rc = -EINVAL;
+	if (rc)
+		return rc;
+
+	sfb_set_limits(min, max);
+	pr_info("The sampling buffer limits have changed to: "
+		"min %lu max %lu (diag %lu)\n",
+		CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB, CPUM_SF_SDB_DIAG_FACTOR);
+	return 0;
+}
+
+#define param_check_sfb_size(name, p) __param_check(name, p, void)
+static const struct kernel_param_ops param_ops_sfb_size = {
+	.set = param_set_sfb_size,
+	.get = param_get_sfb_size,
+};
+
+#define RS_INIT_FAILURE_QSI	  0x0001
+#define RS_INIT_FAILURE_BSDES	  0x0002
+#define RS_INIT_FAILURE_ALRT	  0x0003
+#define RS_INIT_FAILURE_PERF	  0x0004
+static void __init pr_cpumsf_err(unsigned int reason)
+{
+	pr_err("Sampling facility support for perf is not available: "
+	       "reason %#x\n", reason);
+}
+
+static int __init init_cpum_sampling_pmu(void)
+{
+	struct hws_qsi_info_block si;
+	int err;
+
+	if (!cpum_sf_avail())
+		return -ENODEV;
+
+	memset(&si, 0, sizeof(si));
+	if (qsi(&si)) {
+		pr_cpumsf_err(RS_INIT_FAILURE_QSI);
+		return -ENODEV;
+	}
+
+	if (!si.as && !si.ad)
+		return -ENODEV;
+
+	if (si.bsdes != sizeof(struct hws_basic_entry)) {
+		pr_cpumsf_err(RS_INIT_FAILURE_BSDES);
+		return -EINVAL;
+	}
+
+	if (si.ad) {
+		sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
+		/* Sampling of diagnostic data authorized,
+		 * install event into attribute list of PMU device.
+		 */
+		cpumsf_pmu_events_attr[SF_CYCLES_BASIC_DIAG_ATTR_IDX] =
+			CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG);
+	}
+
+	sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80);
+	if (!sfdbg) {
+		pr_err("Registering for s390dbf failed\n");
+		return -ENOMEM;
+	}
+	debug_register_view(sfdbg, &debug_sprintf_view);
+
+	err = register_external_irq(EXT_IRQ_MEASURE_ALERT,
+				    cpumf_measurement_alert);
+	if (err) {
+		pr_cpumsf_err(RS_INIT_FAILURE_ALRT);
+		debug_unregister(sfdbg);
+		goto out;
+	}
+
+	err = perf_pmu_register(&cpumf_sampling, "cpum_sf", PERF_TYPE_RAW);
+	if (err) {
+		pr_cpumsf_err(RS_INIT_FAILURE_PERF);
+		unregister_external_irq(EXT_IRQ_MEASURE_ALERT,
+					cpumf_measurement_alert);
+		debug_unregister(sfdbg);
+		goto out;
+	}
+
+	cpuhp_setup_state(CPUHP_AP_PERF_S390_SF_ONLINE, "perf/s390/sf:online",
+			  s390_pmu_sf_online_cpu, s390_pmu_sf_offline_cpu);
+out:
+	return err;
+}
+
+arch_initcall(init_cpum_sampling_pmu);
+core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0644);
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
new file mode 100644
index 0000000000..c27321cb09
--- /dev/null
+++ b/arch/s390/kernel/perf_event.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Performance event support for s390x
+ *
+ *  Copyright IBM Corp. 2012, 2013
+ *  Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+#define KMSG_COMPONENT	"perf"
+#define pr_fmt(fmt)	KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/perf_event.h>
+#include <linux/kvm_host.h>
+#include <linux/percpu.h>
+#include <linux/export.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <linux/sysfs.h>
+#include <asm/irq.h>
+#include <asm/cpu_mf.h>
+#include <asm/lowcore.h>
+#include <asm/processor.h>
+#include <asm/sysinfo.h>
+#include <asm/unwind.h>
+
+static struct kvm_s390_sie_block *sie_block(struct pt_regs *regs)
+{
+	struct stack_frame *stack = (struct stack_frame *) regs->gprs[15];
+
+	if (!stack)
+		return NULL;
+
+	return (struct kvm_s390_sie_block *)stack->sie_control_block;
+}
+
+static bool is_in_guest(struct pt_regs *regs)
+{
+	if (user_mode(regs))
+		return false;
+#if IS_ENABLED(CONFIG_KVM)
+	return instruction_pointer(regs) == (unsigned long) &sie_exit;
+#else
+	return false;
+#endif
+}
+
+static unsigned long guest_is_user_mode(struct pt_regs *regs)
+{
+	return sie_block(regs)->gpsw.mask & PSW_MASK_PSTATE;
+}
+
+static unsigned long instruction_pointer_guest(struct pt_regs *regs)
+{
+	return sie_block(regs)->gpsw.addr;
+}
+
+unsigned long perf_instruction_pointer(struct pt_regs *regs)
+{
+	return is_in_guest(regs) ? instruction_pointer_guest(regs)
+				 : instruction_pointer(regs);
+}
+
+static unsigned long perf_misc_guest_flags(struct pt_regs *regs)
+{
+	return guest_is_user_mode(regs) ? PERF_RECORD_MISC_GUEST_USER
+					: PERF_RECORD_MISC_GUEST_KERNEL;
+}
+
+static unsigned long perf_misc_flags_sf(struct pt_regs *regs)
+{
+	struct perf_sf_sde_regs *sde_regs;
+	unsigned long flags;
+
+	sde_regs = (struct perf_sf_sde_regs *) &regs->int_parm_long;
+	if (sde_regs->in_guest)
+		flags = user_mode(regs) ? PERF_RECORD_MISC_GUEST_USER
+					: PERF_RECORD_MISC_GUEST_KERNEL;
+	else
+		flags = user_mode(regs) ? PERF_RECORD_MISC_USER
+					: PERF_RECORD_MISC_KERNEL;
+	return flags;
+}
+
+unsigned long perf_misc_flags(struct pt_regs *regs)
+{
+	/* Check if the cpum_sf PMU has created the pt_regs structure.
+	 * In this case, perf misc flags can be easily extracted.  Otherwise,
+	 * do regular checks on the pt_regs content.
+	 */
+	if (regs->int_code == 0x1407 && regs->int_parm == CPU_MF_INT_SF_PRA)
+		if (!regs->gprs[15])
+			return perf_misc_flags_sf(regs);
+
+	if (is_in_guest(regs))
+		return perf_misc_guest_flags(regs);
+
+	return user_mode(regs) ? PERF_RECORD_MISC_USER
+			       : PERF_RECORD_MISC_KERNEL;
+}
+
+static void print_debug_cf(void)
+{
+	struct cpumf_ctr_info cf_info;
+	int cpu = smp_processor_id();
+
+	memset(&cf_info, 0, sizeof(cf_info));
+	if (!qctri(&cf_info))
+		pr_info("CPU[%i] CPUM_CF: ver=%u.%u A=%04x E=%04x C=%04x\n",
+			cpu, cf_info.cfvn, cf_info.csvn,
+			cf_info.auth_ctl, cf_info.enable_ctl, cf_info.act_ctl);
+}
+
+static void print_debug_sf(void)
+{
+	struct hws_qsi_info_block si;
+	int cpu = smp_processor_id();
+
+	memset(&si, 0, sizeof(si));
+	if (qsi(&si))
+		return;
+
+	pr_info("CPU[%i] CPUM_SF: basic=%i diag=%i min=%lu max=%lu cpu_speed=%u\n",
+		cpu, si.as, si.ad, si.min_sampl_rate, si.max_sampl_rate,
+		si.cpu_speed);
+
+	if (si.as)
+		pr_info("CPU[%i] CPUM_SF: Basic-sampling: a=%i e=%i c=%i"
+			" bsdes=%i tear=%016lx dear=%016lx\n", cpu,
+			si.as, si.es, si.cs, si.bsdes, si.tear, si.dear);
+	if (si.ad)
+		pr_info("CPU[%i] CPUM_SF: Diagnostic-sampling: a=%i e=%i c=%i"
+			" dsdes=%i tear=%016lx dear=%016lx\n", cpu,
+			si.ad, si.ed, si.cd, si.dsdes, si.tear, si.dear);
+}
+
+void perf_event_print_debug(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	if (cpum_cf_avail())
+		print_debug_cf();
+	if (cpum_sf_avail())
+		print_debug_sf();
+	local_irq_restore(flags);
+}
+
+/* Service level infrastructure */
+static void sl_print_counter(struct seq_file *m)
+{
+	struct cpumf_ctr_info ci;
+
+	memset(&ci, 0, sizeof(ci));
+	if (qctri(&ci))
+		return;
+
+	seq_printf(m, "CPU-MF: Counter facility: version=%u.%u "
+		   "authorization=%04x\n", ci.cfvn, ci.csvn, ci.auth_ctl);
+}
+
+static void sl_print_sampling(struct seq_file *m)
+{
+	struct hws_qsi_info_block si;
+
+	memset(&si, 0, sizeof(si));
+	if (qsi(&si))
+		return;
+
+	if (!si.as && !si.ad)
+		return;
+
+	seq_printf(m, "CPU-MF: Sampling facility: min_rate=%lu max_rate=%lu"
+		   " cpu_speed=%u\n", si.min_sampl_rate, si.max_sampl_rate,
+		   si.cpu_speed);
+	if (si.as)
+		seq_printf(m, "CPU-MF: Sampling facility: mode=basic"
+			   " sample_size=%u\n", si.bsdes);
+	if (si.ad)
+		seq_printf(m, "CPU-MF: Sampling facility: mode=diagnostic"
+			   " sample_size=%u\n", si.dsdes);
+}
+
+static void service_level_perf_print(struct seq_file *m,
+				     struct service_level *sl)
+{
+	if (cpum_cf_avail())
+		sl_print_counter(m);
+	if (cpum_sf_avail())
+		sl_print_sampling(m);
+}
+
+static struct service_level service_level_perf = {
+	.seq_print = service_level_perf_print,
+};
+
+static int __init service_level_perf_register(void)
+{
+	return register_service_level(&service_level_perf);
+}
+arch_initcall(service_level_perf_register);
+
+void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
+			   struct pt_regs *regs)
+{
+	struct unwind_state state;
+	unsigned long addr;
+
+	unwind_for_each_frame(&state, current, regs, 0) {
+		addr = unwind_get_return_address(&state);
+		if (!addr || perf_callchain_store(entry, addr))
+			return;
+	}
+}
+
+/* Perf definitions for PMU event attributes in sysfs */
+ssize_t cpumf_events_sysfs_show(struct device *dev,
+				struct device_attribute *attr, char *page)
+{
+	struct perf_pmu_events_attr *pmu_attr;
+
+	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+	return sprintf(page, "event=0x%04llx\n", pmu_attr->id);
+}
diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c
new file mode 100644
index 0000000000..fe7d1774de
--- /dev/null
+++ b/arch/s390/kernel/perf_pai_crypto.c
@@ -0,0 +1,698 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Performance event support - Processor Activity Instrumentation Facility
+ *
+ *  Copyright IBM Corp. 2022
+ *  Author(s): Thomas Richter <tmricht@linux.ibm.com>
+ */
+#define KMSG_COMPONENT	"pai_crypto"
+#define pr_fmt(fmt)	KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/io.h>
+#include <linux/perf_event.h>
+
+#include <asm/ctl_reg.h>
+#include <asm/pai.h>
+#include <asm/debug.h>
+
+static debug_info_t *cfm_dbg;
+static unsigned int paicrypt_cnt;	/* Size of the mapped counter sets */
+					/* extracted with QPACI instruction */
+
+DEFINE_STATIC_KEY_FALSE(pai_key);
+
+struct pai_userdata {
+	u16 num;
+	u64 value;
+} __packed;
+
+struct paicrypt_map {
+	unsigned long *page;		/* Page for CPU to store counters */
+	struct pai_userdata *save;	/* Page to store no-zero counters */
+	unsigned int active_events;	/* # of PAI crypto users */
+	refcount_t refcnt;		/* Reference count mapped buffers */
+	enum paievt_mode mode;		/* Type of event */
+	struct perf_event *event;	/* Perf event for sampling */
+};
+
+static DEFINE_PER_CPU(struct paicrypt_map, paicrypt_map);
+
+/* Release the PMU if event is the last perf event */
+static DEFINE_MUTEX(pai_reserve_mutex);
+
+/* Adjust usage counters and remove allocated memory when all users are
+ * gone.
+ */
+static void paicrypt_event_destroy(struct perf_event *event)
+{
+	struct paicrypt_map *cpump = per_cpu_ptr(&paicrypt_map, event->cpu);
+
+	cpump->event = NULL;
+	static_branch_dec(&pai_key);
+	mutex_lock(&pai_reserve_mutex);
+	debug_sprintf_event(cfm_dbg, 5, "%s event %#llx cpu %d users %d"
+			    " mode %d refcnt %u\n", __func__,
+			    event->attr.config, event->cpu,
+			    cpump->active_events, cpump->mode,
+			    refcount_read(&cpump->refcnt));
+	if (refcount_dec_and_test(&cpump->refcnt)) {
+		debug_sprintf_event(cfm_dbg, 4, "%s page %#lx save %p\n",
+				    __func__, (unsigned long)cpump->page,
+				    cpump->save);
+		free_page((unsigned long)cpump->page);
+		cpump->page = NULL;
+		kvfree(cpump->save);
+		cpump->save = NULL;
+		cpump->mode = PAI_MODE_NONE;
+	}
+	mutex_unlock(&pai_reserve_mutex);
+}
+
+static u64 paicrypt_getctr(struct paicrypt_map *cpump, int nr, bool kernel)
+{
+	if (kernel)
+		nr += PAI_CRYPTO_MAXCTR;
+	return cpump->page[nr];
+}
+
+/* Read the counter values. Return value from location in CMP. For event
+ * CRYPTO_ALL sum up all events.
+ */
+static u64 paicrypt_getdata(struct perf_event *event, bool kernel)
+{
+	struct paicrypt_map *cpump = this_cpu_ptr(&paicrypt_map);
+	u64 sum = 0;
+	int i;
+
+	if (event->attr.config != PAI_CRYPTO_BASE) {
+		return paicrypt_getctr(cpump,
+				       event->attr.config - PAI_CRYPTO_BASE,
+				       kernel);
+	}
+
+	for (i = 1; i <= paicrypt_cnt; i++) {
+		u64 val = paicrypt_getctr(cpump, i, kernel);
+
+		if (!val)
+			continue;
+		sum += val;
+	}
+	return sum;
+}
+
+static u64 paicrypt_getall(struct perf_event *event)
+{
+	u64 sum = 0;
+
+	if (!event->attr.exclude_kernel)
+		sum += paicrypt_getdata(event, true);
+	if (!event->attr.exclude_user)
+		sum += paicrypt_getdata(event, false);
+
+	return sum;
+}
+
+/* Used to avoid races in checking concurrent access of counting and
+ * sampling for crypto events
+ *
+ * Only one instance of event pai_crypto/CRYPTO_ALL/ for sampling is
+ * allowed and when this event is running, no counting event is allowed.
+ * Several counting events are allowed in parallel, but no sampling event
+ * is allowed while one (or more) counting events are running.
+ *
+ * This function is called in process context and it is save to block.
+ * When the event initialization functions fails, no other call back will
+ * be invoked.
+ *
+ * Allocate the memory for the event.
+ */
+static int paicrypt_busy(struct perf_event_attr *a, struct paicrypt_map *cpump)
+{
+	int rc = 0;
+
+	mutex_lock(&pai_reserve_mutex);
+	if (a->sample_period) {		/* Sampling requested */
+		if (cpump->mode != PAI_MODE_NONE)
+			rc = -EBUSY;	/* ... sampling/counting active */
+	} else {			/* Counting requested */
+		if (cpump->mode == PAI_MODE_SAMPLING)
+			rc = -EBUSY;	/* ... and sampling active */
+	}
+	if (rc)
+		goto unlock;
+
+	/* Allocate memory for counter page and counter extraction.
+	 * Only the first counting event has to allocate a page.
+	 */
+	if (cpump->page) {
+		refcount_inc(&cpump->refcnt);
+		goto unlock;
+	}
+
+	rc = -ENOMEM;
+	cpump->page = (unsigned long *)get_zeroed_page(GFP_KERNEL);
+	if (!cpump->page)
+		goto unlock;
+	cpump->save = kvmalloc_array(paicrypt_cnt + 1,
+				     sizeof(struct pai_userdata), GFP_KERNEL);
+	if (!cpump->save) {
+		free_page((unsigned long)cpump->page);
+		cpump->page = NULL;
+		goto unlock;
+	}
+	rc = 0;
+	refcount_set(&cpump->refcnt, 1);
+
+unlock:
+	/* If rc is non-zero, do not set mode and reference count */
+	if (!rc) {
+		cpump->mode = a->sample_period ? PAI_MODE_SAMPLING
+					       : PAI_MODE_COUNTING;
+	}
+	debug_sprintf_event(cfm_dbg, 5, "%s sample_period %#llx users %d"
+			    " mode %d refcnt %u page %#lx save %p rc %d\n",
+			    __func__, a->sample_period, cpump->active_events,
+			    cpump->mode, refcount_read(&cpump->refcnt),
+			    (unsigned long)cpump->page, cpump->save, rc);
+	mutex_unlock(&pai_reserve_mutex);
+	return rc;
+}
+
+/* Might be called on different CPU than the one the event is intended for. */
+static int paicrypt_event_init(struct perf_event *event)
+{
+	struct perf_event_attr *a = &event->attr;
+	struct paicrypt_map *cpump;
+	int rc;
+
+	/* PAI crypto PMU registered as PERF_TYPE_RAW, check event type */
+	if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type)
+		return -ENOENT;
+	/* PAI crypto event must be in valid range */
+	if (a->config < PAI_CRYPTO_BASE ||
+	    a->config > PAI_CRYPTO_BASE + paicrypt_cnt)
+		return -EINVAL;
+	/* Allow only CPU wide operation, no process context for now. */
+	if (event->hw.target || event->cpu == -1)
+		return -ENOENT;
+	/* Allow only CRYPTO_ALL for sampling. */
+	if (a->sample_period && a->config != PAI_CRYPTO_BASE)
+		return -EINVAL;
+
+	cpump = per_cpu_ptr(&paicrypt_map, event->cpu);
+	rc = paicrypt_busy(a, cpump);
+	if (rc)
+		return rc;
+
+	/* Event initialization sets last_tag to 0. When later on the events
+	 * are deleted and re-added, do not reset the event count value to zero.
+	 * Events are added, deleted and re-added when 2 or more events
+	 * are active at the same time.
+	 */
+	event->hw.last_tag = 0;
+	cpump->event = event;
+	event->destroy = paicrypt_event_destroy;
+
+	if (a->sample_period) {
+		a->sample_period = 1;
+		a->freq = 0;
+		/* Register for paicrypt_sched_task() to be called */
+		event->attach_state |= PERF_ATTACH_SCHED_CB;
+		/* Add raw data which contain the memory mapped counters */
+		a->sample_type |= PERF_SAMPLE_RAW;
+		/* Turn off inheritance */
+		a->inherit = 0;
+	}
+
+	static_branch_inc(&pai_key);
+	return 0;
+}
+
+static void paicrypt_read(struct perf_event *event)
+{
+	u64 prev, new, delta;
+
+	prev = local64_read(&event->hw.prev_count);
+	new = paicrypt_getall(event);
+	local64_set(&event->hw.prev_count, new);
+	delta = (prev <= new) ? new - prev
+			      : (-1ULL - prev) + new + 1;	 /* overflow */
+	local64_add(delta, &event->count);
+}
+
+static void paicrypt_start(struct perf_event *event, int flags)
+{
+	u64 sum;
+
+	if (!event->hw.last_tag) {
+		event->hw.last_tag = 1;
+		sum = paicrypt_getall(event);		/* Get current value */
+		local64_set(&event->count, 0);
+		local64_set(&event->hw.prev_count, sum);
+	}
+}
+
+static int paicrypt_add(struct perf_event *event, int flags)
+{
+	struct paicrypt_map *cpump = this_cpu_ptr(&paicrypt_map);
+	unsigned long ccd;
+
+	if (++cpump->active_events == 1) {
+		ccd = virt_to_phys(cpump->page) | PAI_CRYPTO_KERNEL_OFFSET;
+		WRITE_ONCE(S390_lowcore.ccd, ccd);
+		__ctl_set_bit(0, 50);
+	}
+	cpump->event = event;
+	if (flags & PERF_EF_START && !event->attr.sample_period) {
+		/* Only counting needs initial counter value */
+		paicrypt_start(event, PERF_EF_RELOAD);
+	}
+	event->hw.state = 0;
+	if (event->attr.sample_period)
+		perf_sched_cb_inc(event->pmu);
+	return 0;
+}
+
+static void paicrypt_stop(struct perf_event *event, int flags)
+{
+	paicrypt_read(event);
+	event->hw.state = PERF_HES_STOPPED;
+}
+
+static void paicrypt_del(struct perf_event *event, int flags)
+{
+	struct paicrypt_map *cpump = this_cpu_ptr(&paicrypt_map);
+
+	if (event->attr.sample_period)
+		perf_sched_cb_dec(event->pmu);
+	if (!event->attr.sample_period)
+		/* Only counting needs to read counter */
+		paicrypt_stop(event, PERF_EF_UPDATE);
+	if (--cpump->active_events == 0) {
+		__ctl_clear_bit(0, 50);
+		WRITE_ONCE(S390_lowcore.ccd, 0);
+	}
+}
+
+/* Create raw data and save it in buffer. Returns number of bytes copied.
+ * Saves only positive counter entries of the form
+ * 2 bytes: Number of counter
+ * 8 bytes: Value of counter
+ */
+static size_t paicrypt_copy(struct pai_userdata *userdata,
+			    struct paicrypt_map *cpump,
+			    bool exclude_user, bool exclude_kernel)
+{
+	int i, outidx = 0;
+
+	for (i = 1; i <= paicrypt_cnt; i++) {
+		u64 val = 0;
+
+		if (!exclude_kernel)
+			val += paicrypt_getctr(cpump, i, true);
+		if (!exclude_user)
+			val += paicrypt_getctr(cpump, i, false);
+		if (val) {
+			userdata[outidx].num = i;
+			userdata[outidx].value = val;
+			outidx++;
+		}
+	}
+	return outidx * sizeof(struct pai_userdata);
+}
+
+static int paicrypt_push_sample(void)
+{
+	struct paicrypt_map *cpump = this_cpu_ptr(&paicrypt_map);
+	struct perf_event *event = cpump->event;
+	struct perf_sample_data data;
+	struct perf_raw_record raw;
+	struct pt_regs regs;
+	size_t rawsize;
+	int overflow;
+
+	if (!cpump->event)		/* No event active */
+		return 0;
+	rawsize = paicrypt_copy(cpump->save, cpump,
+				cpump->event->attr.exclude_user,
+				cpump->event->attr.exclude_kernel);
+	if (!rawsize)			/* No incremented counters */
+		return 0;
+
+	/* Setup perf sample */
+	memset(&regs, 0, sizeof(regs));
+	memset(&raw, 0, sizeof(raw));
+	memset(&data, 0, sizeof(data));
+	perf_sample_data_init(&data, 0, event->hw.last_period);
+	if (event->attr.sample_type & PERF_SAMPLE_TID) {
+		data.tid_entry.pid = task_tgid_nr(current);
+		data.tid_entry.tid = task_pid_nr(current);
+	}
+	if (event->attr.sample_type & PERF_SAMPLE_TIME)
+		data.time = event->clock();
+	if (event->attr.sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER))
+		data.id = event->id;
+	if (event->attr.sample_type & PERF_SAMPLE_CPU) {
+		data.cpu_entry.cpu = smp_processor_id();
+		data.cpu_entry.reserved = 0;
+	}
+	if (event->attr.sample_type & PERF_SAMPLE_RAW) {
+		raw.frag.size = rawsize;
+		raw.frag.data = cpump->save;
+		perf_sample_save_raw_data(&data, &raw);
+	}
+
+	overflow = perf_event_overflow(event, &data, &regs);
+	perf_event_update_userpage(event);
+	/* Clear lowcore page after read */
+	memset(cpump->page, 0, PAGE_SIZE);
+	return overflow;
+}
+
+/* Called on schedule-in and schedule-out. No access to event structure,
+ * but for sampling only event CRYPTO_ALL is allowed.
+ */
+static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
+{
+	/* We started with a clean page on event installation. So read out
+	 * results on schedule_out and if page was dirty, clear values.
+	 */
+	if (!sched_in)
+		paicrypt_push_sample();
+}
+
+/* Attribute definitions for paicrypt interface. As with other CPU
+ * Measurement Facilities, there is one attribute per mapped counter.
+ * The number of mapped counters may vary per machine generation. Use
+ * the QUERY PROCESSOR ACTIVITY COUNTER INFORMATION (QPACI) instruction
+ * to determine the number of mapped counters. The instructions returns
+ * a positive number, which is the highest number of supported counters.
+ * All counters less than this number are also supported, there are no
+ * holes. A returned number of zero means no support for mapped counters.
+ *
+ * The identification of the counter is a unique number. The chosen range
+ * is 0x1000 + offset in mapped kernel page.
+ * All CPU Measurement Facility counters identifiers must be unique and
+ * the numbers from 0 to 496 are already used for the CPU Measurement
+ * Counter facility. Numbers 0xb0000, 0xbc000 and 0xbd000 are already
+ * used for the CPU Measurement Sampling facility.
+ */
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *paicrypt_format_attr[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static struct attribute_group paicrypt_events_group = {
+	.name = "events",
+	.attrs = NULL			/* Filled in attr_event_init() */
+};
+
+static struct attribute_group paicrypt_format_group = {
+	.name = "format",
+	.attrs = paicrypt_format_attr,
+};
+
+static const struct attribute_group *paicrypt_attr_groups[] = {
+	&paicrypt_events_group,
+	&paicrypt_format_group,
+	NULL,
+};
+
+/* Performance monitoring unit for mapped counters */
+static struct pmu paicrypt = {
+	.task_ctx_nr  = perf_invalid_context,
+	.event_init   = paicrypt_event_init,
+	.add	      = paicrypt_add,
+	.del	      = paicrypt_del,
+	.start	      = paicrypt_start,
+	.stop	      = paicrypt_stop,
+	.read	      = paicrypt_read,
+	.sched_task   = paicrypt_sched_task,
+	.attr_groups  = paicrypt_attr_groups
+};
+
+/* List of symbolic PAI counter names. */
+static const char * const paicrypt_ctrnames[] = {
+	[0] = "CRYPTO_ALL",
+	[1] = "KM_DEA",
+	[2] = "KM_TDEA_128",
+	[3] = "KM_TDEA_192",
+	[4] = "KM_ENCRYPTED_DEA",
+	[5] = "KM_ENCRYPTED_TDEA_128",
+	[6] = "KM_ENCRYPTED_TDEA_192",
+	[7] = "KM_AES_128",
+	[8] = "KM_AES_192",
+	[9] = "KM_AES_256",
+	[10] = "KM_ENCRYPTED_AES_128",
+	[11] = "KM_ENCRYPTED_AES_192",
+	[12] = "KM_ENCRYPTED_AES_256",
+	[13] = "KM_XTS_AES_128",
+	[14] = "KM_XTS_AES_256",
+	[15] = "KM_XTS_ENCRYPTED_AES_128",
+	[16] = "KM_XTS_ENCRYPTED_AES_256",
+	[17] = "KMC_DEA",
+	[18] = "KMC_TDEA_128",
+	[19] = "KMC_TDEA_192",
+	[20] = "KMC_ENCRYPTED_DEA",
+	[21] = "KMC_ENCRYPTED_TDEA_128",
+	[22] = "KMC_ENCRYPTED_TDEA_192",
+	[23] = "KMC_AES_128",
+	[24] = "KMC_AES_192",
+	[25] = "KMC_AES_256",
+	[26] = "KMC_ENCRYPTED_AES_128",
+	[27] = "KMC_ENCRYPTED_AES_192",
+	[28] = "KMC_ENCRYPTED_AES_256",
+	[29] = "KMC_PRNG",
+	[30] = "KMA_GCM_AES_128",
+	[31] = "KMA_GCM_AES_192",
+	[32] = "KMA_GCM_AES_256",
+	[33] = "KMA_GCM_ENCRYPTED_AES_128",
+	[34] = "KMA_GCM_ENCRYPTED_AES_192",
+	[35] = "KMA_GCM_ENCRYPTED_AES_256",
+	[36] = "KMF_DEA",
+	[37] = "KMF_TDEA_128",
+	[38] = "KMF_TDEA_192",
+	[39] = "KMF_ENCRYPTED_DEA",
+	[40] = "KMF_ENCRYPTED_TDEA_128",
+	[41] = "KMF_ENCRYPTED_TDEA_192",
+	[42] = "KMF_AES_128",
+	[43] = "KMF_AES_192",
+	[44] = "KMF_AES_256",
+	[45] = "KMF_ENCRYPTED_AES_128",
+	[46] = "KMF_ENCRYPTED_AES_192",
+	[47] = "KMF_ENCRYPTED_AES_256",
+	[48] = "KMCTR_DEA",
+	[49] = "KMCTR_TDEA_128",
+	[50] = "KMCTR_TDEA_192",
+	[51] = "KMCTR_ENCRYPTED_DEA",
+	[52] = "KMCTR_ENCRYPTED_TDEA_128",
+	[53] = "KMCTR_ENCRYPTED_TDEA_192",
+	[54] = "KMCTR_AES_128",
+	[55] = "KMCTR_AES_192",
+	[56] = "KMCTR_AES_256",
+	[57] = "KMCTR_ENCRYPTED_AES_128",
+	[58] = "KMCTR_ENCRYPTED_AES_192",
+	[59] = "KMCTR_ENCRYPTED_AES_256",
+	[60] = "KMO_DEA",
+	[61] = "KMO_TDEA_128",
+	[62] = "KMO_TDEA_192",
+	[63] = "KMO_ENCRYPTED_DEA",
+	[64] = "KMO_ENCRYPTED_TDEA_128",
+	[65] = "KMO_ENCRYPTED_TDEA_192",
+	[66] = "KMO_AES_128",
+	[67] = "KMO_AES_192",
+	[68] = "KMO_AES_256",
+	[69] = "KMO_ENCRYPTED_AES_128",
+	[70] = "KMO_ENCRYPTED_AES_192",
+	[71] = "KMO_ENCRYPTED_AES_256",
+	[72] = "KIMD_SHA_1",
+	[73] = "KIMD_SHA_256",
+	[74] = "KIMD_SHA_512",
+	[75] = "KIMD_SHA3_224",
+	[76] = "KIMD_SHA3_256",
+	[77] = "KIMD_SHA3_384",
+	[78] = "KIMD_SHA3_512",
+	[79] = "KIMD_SHAKE_128",
+	[80] = "KIMD_SHAKE_256",
+	[81] = "KIMD_GHASH",
+	[82] = "KLMD_SHA_1",
+	[83] = "KLMD_SHA_256",
+	[84] = "KLMD_SHA_512",
+	[85] = "KLMD_SHA3_224",
+	[86] = "KLMD_SHA3_256",
+	[87] = "KLMD_SHA3_384",
+	[88] = "KLMD_SHA3_512",
+	[89] = "KLMD_SHAKE_128",
+	[90] = "KLMD_SHAKE_256",
+	[91] = "KMAC_DEA",
+	[92] = "KMAC_TDEA_128",
+	[93] = "KMAC_TDEA_192",
+	[94] = "KMAC_ENCRYPTED_DEA",
+	[95] = "KMAC_ENCRYPTED_TDEA_128",
+	[96] = "KMAC_ENCRYPTED_TDEA_192",
+	[97] = "KMAC_AES_128",
+	[98] = "KMAC_AES_192",
+	[99] = "KMAC_AES_256",
+	[100] = "KMAC_ENCRYPTED_AES_128",
+	[101] = "KMAC_ENCRYPTED_AES_192",
+	[102] = "KMAC_ENCRYPTED_AES_256",
+	[103] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_DEA",
+	[104] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_TDEA_128",
+	[105] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_TDEA_192",
+	[106] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_DEA",
+	[107] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_TDEA_128",
+	[108] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_TDEA_192",
+	[109] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_128",
+	[110] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_192",
+	[111] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_256",
+	[112] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_128",
+	[113] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_192",
+	[114] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_256A",
+	[115] = "PCC_COMPUTE_XTS_PARAMETER_USING_AES_128",
+	[116] = "PCC_COMPUTE_XTS_PARAMETER_USING_AES_256",
+	[117] = "PCC_COMPUTE_XTS_PARAMETER_USING_ENCRYPTED_AES_128",
+	[118] = "PCC_COMPUTE_XTS_PARAMETER_USING_ENCRYPTED_AES_256",
+	[119] = "PCC_SCALAR_MULTIPLY_P256",
+	[120] = "PCC_SCALAR_MULTIPLY_P384",
+	[121] = "PCC_SCALAR_MULTIPLY_P521",
+	[122] = "PCC_SCALAR_MULTIPLY_ED25519",
+	[123] = "PCC_SCALAR_MULTIPLY_ED448",
+	[124] = "PCC_SCALAR_MULTIPLY_X25519",
+	[125] = "PCC_SCALAR_MULTIPLY_X448",
+	[126] = "PRNO_SHA_512_DRNG",
+	[127] = "PRNO_TRNG_QUERY_RAW_TO_CONDITIONED_RATIO",
+	[128] = "PRNO_TRNG",
+	[129] = "KDSA_ECDSA_VERIFY_P256",
+	[130] = "KDSA_ECDSA_VERIFY_P384",
+	[131] = "KDSA_ECDSA_VERIFY_P521",
+	[132] = "KDSA_ECDSA_SIGN_P256",
+	[133] = "KDSA_ECDSA_SIGN_P384",
+	[134] = "KDSA_ECDSA_SIGN_P521",
+	[135] = "KDSA_ENCRYPTED_ECDSA_SIGN_P256",
+	[136] = "KDSA_ENCRYPTED_ECDSA_SIGN_P384",
+	[137] = "KDSA_ENCRYPTED_ECDSA_SIGN_P521",
+	[138] = "KDSA_EDDSA_VERIFY_ED25519",
+	[139] = "KDSA_EDDSA_VERIFY_ED448",
+	[140] = "KDSA_EDDSA_SIGN_ED25519",
+	[141] = "KDSA_EDDSA_SIGN_ED448",
+	[142] = "KDSA_ENCRYPTED_EDDSA_SIGN_ED25519",
+	[143] = "KDSA_ENCRYPTED_EDDSA_SIGN_ED448",
+	[144] = "PCKMO_ENCRYPT_DEA_KEY",
+	[145] = "PCKMO_ENCRYPT_TDEA_128_KEY",
+	[146] = "PCKMO_ENCRYPT_TDEA_192_KEY",
+	[147] = "PCKMO_ENCRYPT_AES_128_KEY",
+	[148] = "PCKMO_ENCRYPT_AES_192_KEY",
+	[149] = "PCKMO_ENCRYPT_AES_256_KEY",
+	[150] = "PCKMO_ENCRYPT_ECC_P256_KEY",
+	[151] = "PCKMO_ENCRYPT_ECC_P384_KEY",
+	[152] = "PCKMO_ENCRYPT_ECC_P521_KEY",
+	[153] = "PCKMO_ENCRYPT_ECC_ED25519_KEY",
+	[154] = "PCKMO_ENCRYPT_ECC_ED448_KEY",
+	[155] = "IBM_RESERVED_155",
+	[156] = "IBM_RESERVED_156",
+};
+
+static void __init attr_event_free(struct attribute **attrs, int num)
+{
+	struct perf_pmu_events_attr *pa;
+	int i;
+
+	for (i = 0; i < num; i++) {
+		struct device_attribute *dap;
+
+		dap = container_of(attrs[i], struct device_attribute, attr);
+		pa = container_of(dap, struct perf_pmu_events_attr, attr);
+		kfree(pa);
+	}
+	kfree(attrs);
+}
+
+static int __init attr_event_init_one(struct attribute **attrs, int num)
+{
+	struct perf_pmu_events_attr *pa;
+
+	pa = kzalloc(sizeof(*pa), GFP_KERNEL);
+	if (!pa)
+		return -ENOMEM;
+
+	sysfs_attr_init(&pa->attr.attr);
+	pa->id = PAI_CRYPTO_BASE + num;
+	pa->attr.attr.name = paicrypt_ctrnames[num];
+	pa->attr.attr.mode = 0444;
+	pa->attr.show = cpumf_events_sysfs_show;
+	pa->attr.store = NULL;
+	attrs[num] = &pa->attr.attr;
+	return 0;
+}
+
+/* Create PMU sysfs event attributes on the fly. */
+static int __init attr_event_init(void)
+{
+	struct attribute **attrs;
+	int ret, i;
+
+	attrs = kmalloc_array(ARRAY_SIZE(paicrypt_ctrnames) + 1, sizeof(*attrs),
+			      GFP_KERNEL);
+	if (!attrs)
+		return -ENOMEM;
+	for (i = 0; i < ARRAY_SIZE(paicrypt_ctrnames); i++) {
+		ret = attr_event_init_one(attrs, i);
+		if (ret) {
+			attr_event_free(attrs, i - 1);
+			return ret;
+		}
+	}
+	attrs[i] = NULL;
+	paicrypt_events_group.attrs = attrs;
+	return 0;
+}
+
+static int __init paicrypt_init(void)
+{
+	struct qpaci_info_block ib;
+	int rc;
+
+	if (!test_facility(196))
+		return 0;
+
+	qpaci(&ib);
+	paicrypt_cnt = ib.num_cc;
+	if (paicrypt_cnt == 0)
+		return 0;
+	if (paicrypt_cnt >= PAI_CRYPTO_MAXCTR)
+		paicrypt_cnt = PAI_CRYPTO_MAXCTR - 1;
+
+	rc = attr_event_init();		/* Export known PAI crypto events */
+	if (rc) {
+		pr_err("Creation of PMU pai_crypto /sysfs failed\n");
+		return rc;
+	}
+
+	/* Setup s390dbf facility */
+	cfm_dbg = debug_register(KMSG_COMPONENT, 2, 256, 128);
+	if (!cfm_dbg) {
+		pr_err("Registration of s390dbf pai_crypto failed\n");
+		return -ENOMEM;
+	}
+	debug_register_view(cfm_dbg, &debug_sprintf_view);
+
+	rc = perf_pmu_register(&paicrypt, "pai_crypto", -1);
+	if (rc) {
+		pr_err("Registering the pai_crypto PMU failed with rc=%i\n",
+		       rc);
+		debug_unregister_view(cfm_dbg, &debug_sprintf_view);
+		debug_unregister(cfm_dbg);
+		return rc;
+	}
+	return 0;
+}
+
+device_initcall(paicrypt_init);
diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c
new file mode 100644
index 0000000000..c57c1a2032
--- /dev/null
+++ b/arch/s390/kernel/perf_pai_ext.c
@@ -0,0 +1,667 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Performance event support - Processor Activity Instrumentation Extension
+ * Facility
+ *
+ *  Copyright IBM Corp. 2022
+ *  Author(s): Thomas Richter <tmricht@linux.ibm.com>
+ */
+#define KMSG_COMPONENT	"pai_ext"
+#define pr_fmt(fmt)	KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/io.h>
+#include <linux/perf_event.h>
+
+#include <asm/ctl_reg.h>
+#include <asm/pai.h>
+#include <asm/debug.h>
+
+#define	PAIE1_CB_SZ		0x200	/* Size of PAIE1 control block */
+#define	PAIE1_CTRBLOCK_SZ	0x400	/* Size of PAIE1 counter blocks */
+
+static debug_info_t *paiext_dbg;
+static unsigned int paiext_cnt;	/* Extracted with QPACI instruction */
+
+struct pai_userdata {
+	u16 num;
+	u64 value;
+} __packed;
+
+/* Create the PAI extension 1 control block area.
+ * The PAI extension control block 1 is pointed to by lowcore
+ * address 0x1508 for each CPU. This control block is 512 bytes in size
+ * and requires a 512 byte boundary alignment.
+ */
+struct paiext_cb {		/* PAI extension 1 control block */
+	u64 header;		/* Not used */
+	u64 reserved1;
+	u64 acc;		/* Addr to analytics counter control block */
+	u8 reserved2[488];
+} __packed;
+
+struct paiext_map {
+	unsigned long *area;		/* Area for CPU to store counters */
+	struct pai_userdata *save;	/* Area to store non-zero counters */
+	enum paievt_mode mode;		/* Type of event */
+	unsigned int active_events;	/* # of PAI Extension users */
+	refcount_t refcnt;
+	struct perf_event *event;	/* Perf event for sampling */
+	struct paiext_cb *paiext_cb;	/* PAI extension control block area */
+};
+
+struct paiext_mapptr {
+	struct paiext_map *mapptr;
+};
+
+static struct paiext_root {		/* Anchor to per CPU data */
+	refcount_t refcnt;		/* Overall active events */
+	struct paiext_mapptr __percpu *mapptr;
+} paiext_root;
+
+/* Free per CPU data when the last event is removed. */
+static void paiext_root_free(void)
+{
+	if (refcount_dec_and_test(&paiext_root.refcnt)) {
+		free_percpu(paiext_root.mapptr);
+		paiext_root.mapptr = NULL;
+	}
+}
+
+/* On initialization of first event also allocate per CPU data dynamically.
+ * Start with an array of pointers, the array size is the maximum number of
+ * CPUs possible, which might be larger than the number of CPUs currently
+ * online.
+ */
+static int paiext_root_alloc(void)
+{
+	if (!refcount_inc_not_zero(&paiext_root.refcnt)) {
+		/* The memory is already zeroed. */
+		paiext_root.mapptr = alloc_percpu(struct paiext_mapptr);
+		if (!paiext_root.mapptr) {
+			/* Returning without refcnt adjustment is ok. The
+			 * error code is handled by paiext_alloc() which
+			 * decrements refcnt when an event can not be
+			 * created.
+			 */
+			return -ENOMEM;
+		}
+		refcount_set(&paiext_root.refcnt, 1);
+	}
+	return 0;
+}
+
+/* Protects against concurrent increment of sampler and counter member
+ * increments at the same time and prohibits concurrent execution of
+ * counting and sampling events.
+ * Ensures that analytics counter block is deallocated only when the
+ * sampling and counting on that cpu is zero.
+ * For details see paiext_alloc().
+ */
+static DEFINE_MUTEX(paiext_reserve_mutex);
+
+/* Free all memory allocated for event counting/sampling setup */
+static void paiext_free(struct paiext_mapptr *mp)
+{
+	kfree(mp->mapptr->area);
+	kfree(mp->mapptr->paiext_cb);
+	kvfree(mp->mapptr->save);
+	kfree(mp->mapptr);
+	mp->mapptr = NULL;
+}
+
+/* Release the PMU if event is the last perf event */
+static void paiext_event_destroy(struct perf_event *event)
+{
+	struct paiext_mapptr *mp = per_cpu_ptr(paiext_root.mapptr, event->cpu);
+	struct paiext_map *cpump = mp->mapptr;
+
+	mutex_lock(&paiext_reserve_mutex);
+	cpump->event = NULL;
+	if (refcount_dec_and_test(&cpump->refcnt))	/* Last reference gone */
+		paiext_free(mp);
+	paiext_root_free();
+	mutex_unlock(&paiext_reserve_mutex);
+	debug_sprintf_event(paiext_dbg, 4, "%s cpu %d mapptr %p\n", __func__,
+			    event->cpu, mp->mapptr);
+
+}
+
+/* Used to avoid races in checking concurrent access of counting and
+ * sampling for pai_extension events.
+ *
+ * Only one instance of event pai_ext/NNPA_ALL/ for sampling is
+ * allowed and when this event is running, no counting event is allowed.
+ * Several counting events are allowed in parallel, but no sampling event
+ * is allowed while one (or more) counting events are running.
+ *
+ * This function is called in process context and it is safe to block.
+ * When the event initialization functions fails, no other call back will
+ * be invoked.
+ *
+ * Allocate the memory for the event.
+ */
+static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event)
+{
+	struct paiext_mapptr *mp;
+	struct paiext_map *cpump;
+	int rc;
+
+	mutex_lock(&paiext_reserve_mutex);
+
+	rc = paiext_root_alloc();
+	if (rc)
+		goto unlock;
+
+	mp = per_cpu_ptr(paiext_root.mapptr, event->cpu);
+	cpump = mp->mapptr;
+	if (!cpump) {			/* Paiext_map allocated? */
+		rc = -ENOMEM;
+		cpump = kzalloc(sizeof(*cpump), GFP_KERNEL);
+		if (!cpump)
+			goto undo;
+
+		/* Allocate memory for counter area and counter extraction.
+		 * These are
+		 * - a 512 byte block and requires 512 byte boundary alignment.
+		 * - a 1KB byte block and requires 1KB boundary alignment.
+		 * Only the first counting event has to allocate the area.
+		 *
+		 * Note: This works with commit 59bb47985c1d by default.
+		 * Backporting this to kernels without this commit might
+		 * need adjustment.
+		 */
+		mp->mapptr = cpump;
+		cpump->area = kzalloc(PAIE1_CTRBLOCK_SZ, GFP_KERNEL);
+		cpump->paiext_cb = kzalloc(PAIE1_CB_SZ, GFP_KERNEL);
+		cpump->save = kvmalloc_array(paiext_cnt + 1,
+					     sizeof(struct pai_userdata),
+					     GFP_KERNEL);
+		if (!cpump->save || !cpump->area || !cpump->paiext_cb) {
+			paiext_free(mp);
+			goto undo;
+		}
+		refcount_set(&cpump->refcnt, 1);
+		cpump->mode = a->sample_period ? PAI_MODE_SAMPLING
+					       : PAI_MODE_COUNTING;
+	} else {
+		/* Multiple invocation, check what is active.
+		 * Supported are multiple counter events or only one sampling
+		 * event concurrently at any one time.
+		 */
+		if (cpump->mode == PAI_MODE_SAMPLING ||
+		    (cpump->mode == PAI_MODE_COUNTING && a->sample_period)) {
+			rc = -EBUSY;
+			goto undo;
+		}
+		refcount_inc(&cpump->refcnt);
+	}
+
+	rc = 0;
+	cpump->event = event;
+
+undo:
+	if (rc) {
+		/* Error in allocation of event, decrement anchor. Since
+		 * the event in not created, its destroy() function is never
+		 * invoked. Adjust the reference counter for the anchor.
+		 */
+		paiext_root_free();
+	}
+unlock:
+	mutex_unlock(&paiext_reserve_mutex);
+	/* If rc is non-zero, no increment of counter/sampler was done. */
+	return rc;
+}
+
+/* The PAI extension 1 control block supports up to 128 entries. Return
+ * the index within PAIE1_CB given the event number. Also validate event
+ * number.
+ */
+static int paiext_event_valid(struct perf_event *event)
+{
+	u64 cfg = event->attr.config;
+
+	if (cfg >= PAI_NNPA_BASE && cfg <= PAI_NNPA_BASE + paiext_cnt) {
+		/* Offset NNPA in paiext_cb */
+		event->hw.config_base = offsetof(struct paiext_cb, acc);
+		return 0;
+	}
+	return -EINVAL;
+}
+
+/* Might be called on different CPU than the one the event is intended for. */
+static int paiext_event_init(struct perf_event *event)
+{
+	struct perf_event_attr *a = &event->attr;
+	int rc;
+
+	/* PMU pai_ext registered as PERF_TYPE_RAW, check event type */
+	if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type)
+		return -ENOENT;
+	/* PAI extension event must be valid and in supported range */
+	rc = paiext_event_valid(event);
+	if (rc)
+		return rc;
+	/* Allow only CPU wide operation, no process context for now. */
+	if (event->hw.target || event->cpu == -1)
+		return -ENOENT;
+	/* Allow only event NNPA_ALL for sampling. */
+	if (a->sample_period && a->config != PAI_NNPA_BASE)
+		return -EINVAL;
+	/* Prohibit exclude_user event selection */
+	if (a->exclude_user)
+		return -EINVAL;
+
+	rc = paiext_alloc(a, event);
+	if (rc)
+		return rc;
+	event->hw.last_tag = 0;
+	event->destroy = paiext_event_destroy;
+
+	if (a->sample_period) {
+		a->sample_period = 1;
+		a->freq = 0;
+		/* Register for paicrypt_sched_task() to be called */
+		event->attach_state |= PERF_ATTACH_SCHED_CB;
+		/* Add raw data which are the memory mapped counters */
+		a->sample_type |= PERF_SAMPLE_RAW;
+		/* Turn off inheritance */
+		a->inherit = 0;
+	}
+
+	return 0;
+}
+
+static u64 paiext_getctr(struct paiext_map *cpump, int nr)
+{
+	return cpump->area[nr];
+}
+
+/* Read the counter values. Return value from location in buffer. For event
+ * NNPA_ALL sum up all events.
+ */
+static u64 paiext_getdata(struct perf_event *event)
+{
+	struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+	struct paiext_map *cpump = mp->mapptr;
+	u64 sum = 0;
+	int i;
+
+	if (event->attr.config != PAI_NNPA_BASE)
+		return paiext_getctr(cpump, event->attr.config - PAI_NNPA_BASE);
+
+	for (i = 1; i <= paiext_cnt; i++)
+		sum += paiext_getctr(cpump, i);
+
+	return sum;
+}
+
+static u64 paiext_getall(struct perf_event *event)
+{
+	return paiext_getdata(event);
+}
+
+static void paiext_read(struct perf_event *event)
+{
+	u64 prev, new, delta;
+
+	prev = local64_read(&event->hw.prev_count);
+	new = paiext_getall(event);
+	local64_set(&event->hw.prev_count, new);
+	delta = new - prev;
+	local64_add(delta, &event->count);
+}
+
+static void paiext_start(struct perf_event *event, int flags)
+{
+	u64 sum;
+
+	if (event->hw.last_tag)
+		return;
+	event->hw.last_tag = 1;
+	sum = paiext_getall(event);		/* Get current value */
+	local64_set(&event->hw.prev_count, sum);
+	local64_set(&event->count, 0);
+}
+
+static int paiext_add(struct perf_event *event, int flags)
+{
+	struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+	struct paiext_map *cpump = mp->mapptr;
+	struct paiext_cb *pcb = cpump->paiext_cb;
+
+	if (++cpump->active_events == 1) {
+		S390_lowcore.aicd = virt_to_phys(cpump->paiext_cb);
+		pcb->acc = virt_to_phys(cpump->area) | 0x1;
+		/* Enable CPU instruction lookup for PAIE1 control block */
+		__ctl_set_bit(0, 49);
+		debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n",
+				    __func__, S390_lowcore.aicd, pcb->acc);
+	}
+	if (flags & PERF_EF_START && !event->attr.sample_period) {
+		/* Only counting needs initial counter value */
+		paiext_start(event, PERF_EF_RELOAD);
+	}
+	event->hw.state = 0;
+	if (event->attr.sample_period) {
+		cpump->event = event;
+		perf_sched_cb_inc(event->pmu);
+	}
+	return 0;
+}
+
+static void paiext_stop(struct perf_event *event, int flags)
+{
+	paiext_read(event);
+	event->hw.state = PERF_HES_STOPPED;
+}
+
+static void paiext_del(struct perf_event *event, int flags)
+{
+	struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+	struct paiext_map *cpump = mp->mapptr;
+	struct paiext_cb *pcb = cpump->paiext_cb;
+
+	if (event->attr.sample_period)
+		perf_sched_cb_dec(event->pmu);
+	if (!event->attr.sample_period) {
+		/* Only counting needs to read counter */
+		paiext_stop(event, PERF_EF_UPDATE);
+	}
+	if (--cpump->active_events == 0) {
+		/* Disable CPU instruction lookup for PAIE1 control block */
+		__ctl_clear_bit(0, 49);
+		pcb->acc = 0;
+		S390_lowcore.aicd = 0;
+		debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n",
+				    __func__, S390_lowcore.aicd, pcb->acc);
+	}
+}
+
+/* Create raw data and save it in buffer. Returns number of bytes copied.
+ * Saves only positive counter entries of the form
+ * 2 bytes: Number of counter
+ * 8 bytes: Value of counter
+ */
+static size_t paiext_copy(struct paiext_map *cpump)
+{
+	struct pai_userdata *userdata = cpump->save;
+	int i, outidx = 0;
+
+	for (i = 1; i <= paiext_cnt; i++) {
+		u64 val = paiext_getctr(cpump, i);
+
+		if (val) {
+			userdata[outidx].num = i;
+			userdata[outidx].value = val;
+			outidx++;
+		}
+	}
+	return outidx * sizeof(*userdata);
+}
+
+/* Write sample when one or more counters values are nonzero.
+ *
+ * Note: The function paiext_sched_task() and paiext_push_sample() are not
+ * invoked after function paiext_del() has been called because of function
+ * perf_sched_cb_dec().
+ * The function paiext_sched_task() and paiext_push_sample() are only
+ * called when sampling is active. Function perf_sched_cb_inc()
+ * has been invoked to install function paiext_sched_task() as call back
+ * to run at context switch time (see paiext_add()).
+ *
+ * This causes function perf_event_context_sched_out() and
+ * perf_event_context_sched_in() to check whether the PMU has installed an
+ * sched_task() callback. That callback is not active after paiext_del()
+ * returns and has deleted the event on that CPU.
+ */
+static int paiext_push_sample(void)
+{
+	struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+	struct paiext_map *cpump = mp->mapptr;
+	struct perf_event *event = cpump->event;
+	struct perf_sample_data data;
+	struct perf_raw_record raw;
+	struct pt_regs regs;
+	size_t rawsize;
+	int overflow;
+
+	rawsize = paiext_copy(cpump);
+	if (!rawsize)			/* No incremented counters */
+		return 0;
+
+	/* Setup perf sample */
+	memset(&regs, 0, sizeof(regs));
+	memset(&raw, 0, sizeof(raw));
+	memset(&data, 0, sizeof(data));
+	perf_sample_data_init(&data, 0, event->hw.last_period);
+	if (event->attr.sample_type & PERF_SAMPLE_TID) {
+		data.tid_entry.pid = task_tgid_nr(current);
+		data.tid_entry.tid = task_pid_nr(current);
+	}
+	if (event->attr.sample_type & PERF_SAMPLE_TIME)
+		data.time = event->clock();
+	if (event->attr.sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER))
+		data.id = event->id;
+	if (event->attr.sample_type & PERF_SAMPLE_CPU)
+		data.cpu_entry.cpu = smp_processor_id();
+	if (event->attr.sample_type & PERF_SAMPLE_RAW) {
+		raw.frag.size = rawsize;
+		raw.frag.data = cpump->save;
+		perf_sample_save_raw_data(&data, &raw);
+	}
+
+	overflow = perf_event_overflow(event, &data, &regs);
+	perf_event_update_userpage(event);
+	/* Clear lowcore area after read */
+	memset(cpump->area, 0, PAIE1_CTRBLOCK_SZ);
+	return overflow;
+}
+
+/* Called on schedule-in and schedule-out. No access to event structure,
+ * but for sampling only event NNPA_ALL is allowed.
+ */
+static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
+{
+	/* We started with a clean page on event installation. So read out
+	 * results on schedule_out and if page was dirty, clear values.
+	 */
+	if (!sched_in)
+		paiext_push_sample();
+}
+
+/* Attribute definitions for pai extension1 interface. As with other CPU
+ * Measurement Facilities, there is one attribute per mapped counter.
+ * The number of mapped counters may vary per machine generation. Use
+ * the QUERY PROCESSOR ACTIVITY COUNTER INFORMATION (QPACI) instruction
+ * to determine the number of mapped counters. The instructions returns
+ * a positive number, which is the highest number of supported counters.
+ * All counters less than this number are also supported, there are no
+ * holes. A returned number of zero means no support for mapped counters.
+ *
+ * The identification of the counter is a unique number. The chosen range
+ * is 0x1800 + offset in mapped kernel page.
+ * All CPU Measurement Facility counters identifiers must be unique and
+ * the numbers from 0 to 496 are already used for the CPU Measurement
+ * Counter facility. Number 0x1000 to 0x103e are used for PAI cryptography
+ * counters.
+ * Numbers 0xb0000, 0xbc000 and 0xbd000 are already
+ * used for the CPU Measurement Sampling facility.
+ */
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *paiext_format_attr[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static struct attribute_group paiext_events_group = {
+	.name = "events",
+	.attrs = NULL,			/* Filled in attr_event_init() */
+};
+
+static struct attribute_group paiext_format_group = {
+	.name = "format",
+	.attrs = paiext_format_attr,
+};
+
+static const struct attribute_group *paiext_attr_groups[] = {
+	&paiext_events_group,
+	&paiext_format_group,
+	NULL,
+};
+
+/* Performance monitoring unit for mapped counters */
+static struct pmu paiext = {
+	.task_ctx_nr  = perf_invalid_context,
+	.event_init   = paiext_event_init,
+	.add	      = paiext_add,
+	.del	      = paiext_del,
+	.start	      = paiext_start,
+	.stop	      = paiext_stop,
+	.read	      = paiext_read,
+	.sched_task   = paiext_sched_task,
+	.attr_groups  = paiext_attr_groups,
+};
+
+/* List of symbolic PAI extension 1 NNPA counter names. */
+static const char * const paiext_ctrnames[] = {
+	[0] = "NNPA_ALL",
+	[1] = "NNPA_ADD",
+	[2] = "NNPA_SUB",
+	[3] = "NNPA_MUL",
+	[4] = "NNPA_DIV",
+	[5] = "NNPA_MIN",
+	[6] = "NNPA_MAX",
+	[7] = "NNPA_LOG",
+	[8] = "NNPA_EXP",
+	[9] = "NNPA_IBM_RESERVED_9",
+	[10] = "NNPA_RELU",
+	[11] = "NNPA_TANH",
+	[12] = "NNPA_SIGMOID",
+	[13] = "NNPA_SOFTMAX",
+	[14] = "NNPA_BATCHNORM",
+	[15] = "NNPA_MAXPOOL2D",
+	[16] = "NNPA_AVGPOOL2D",
+	[17] = "NNPA_LSTMACT",
+	[18] = "NNPA_GRUACT",
+	[19] = "NNPA_CONVOLUTION",
+	[20] = "NNPA_MATMUL_OP",
+	[21] = "NNPA_MATMUL_OP_BCAST23",
+	[22] = "NNPA_SMALLBATCH",
+	[23] = "NNPA_LARGEDIM",
+	[24] = "NNPA_SMALLTENSOR",
+	[25] = "NNPA_1MFRAME",
+	[26] = "NNPA_2GFRAME",
+	[27] = "NNPA_ACCESSEXCEPT",
+};
+
+static void __init attr_event_free(struct attribute **attrs, int num)
+{
+	struct perf_pmu_events_attr *pa;
+	struct device_attribute *dap;
+	int i;
+
+	for (i = 0; i < num; i++) {
+		dap = container_of(attrs[i], struct device_attribute, attr);
+		pa = container_of(dap, struct perf_pmu_events_attr, attr);
+		kfree(pa);
+	}
+	kfree(attrs);
+}
+
+static int __init attr_event_init_one(struct attribute **attrs, int num)
+{
+	struct perf_pmu_events_attr *pa;
+
+	pa = kzalloc(sizeof(*pa), GFP_KERNEL);
+	if (!pa)
+		return -ENOMEM;
+
+	sysfs_attr_init(&pa->attr.attr);
+	pa->id = PAI_NNPA_BASE + num;
+	pa->attr.attr.name = paiext_ctrnames[num];
+	pa->attr.attr.mode = 0444;
+	pa->attr.show = cpumf_events_sysfs_show;
+	pa->attr.store = NULL;
+	attrs[num] = &pa->attr.attr;
+	return 0;
+}
+
+/* Create PMU sysfs event attributes on the fly. */
+static int __init attr_event_init(void)
+{
+	struct attribute **attrs;
+	int ret, i;
+
+	attrs = kmalloc_array(ARRAY_SIZE(paiext_ctrnames) + 1, sizeof(*attrs),
+			      GFP_KERNEL);
+	if (!attrs)
+		return -ENOMEM;
+	for (i = 0; i < ARRAY_SIZE(paiext_ctrnames); i++) {
+		ret = attr_event_init_one(attrs, i);
+		if (ret) {
+			attr_event_free(attrs, i - 1);
+			return ret;
+		}
+	}
+	attrs[i] = NULL;
+	paiext_events_group.attrs = attrs;
+	return 0;
+}
+
+static int __init paiext_init(void)
+{
+	struct qpaci_info_block ib;
+	int rc = -ENOMEM;
+
+	if (!test_facility(197))
+		return 0;
+
+	qpaci(&ib);
+	paiext_cnt = ib.num_nnpa;
+	if (paiext_cnt >= PAI_NNPA_MAXCTR)
+		paiext_cnt = PAI_NNPA_MAXCTR;
+	if (!paiext_cnt)
+		return 0;
+
+	rc = attr_event_init();
+	if (rc) {
+		pr_err("Creation of PMU " KMSG_COMPONENT " /sysfs failed\n");
+		return rc;
+	}
+
+	/* Setup s390dbf facility */
+	paiext_dbg = debug_register(KMSG_COMPONENT, 2, 256, 128);
+	if (!paiext_dbg) {
+		pr_err("Registration of s390dbf " KMSG_COMPONENT " failed\n");
+		rc = -ENOMEM;
+		goto out_init;
+	}
+	debug_register_view(paiext_dbg, &debug_sprintf_view);
+
+	rc = perf_pmu_register(&paiext, KMSG_COMPONENT, -1);
+	if (rc) {
+		pr_err("Registration of " KMSG_COMPONENT " PMU failed with "
+		       "rc=%i\n", rc);
+		goto out_pmu;
+	}
+
+	return 0;
+
+out_pmu:
+	debug_unregister_view(paiext_dbg, &debug_sprintf_view);
+	debug_unregister(paiext_dbg);
+out_init:
+	attr_event_free(paiext_events_group.attrs,
+			ARRAY_SIZE(paiext_ctrnames) + 1);
+	return rc;
+}
+
+device_initcall(paiext_init);
diff --git a/arch/s390/kernel/perf_regs.c b/arch/s390/kernel/perf_regs.c
new file mode 100644
index 0000000000..6e9e5d5e92
--- /dev/null
+++ b/arch/s390/kernel/perf_regs.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/perf_event.h>
+#include <linux/perf_regs.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/bug.h>
+#include <asm/ptrace.h>
+#include <asm/fpu/api.h>
+#include <asm/fpu/types.h>
+
+u64 perf_reg_value(struct pt_regs *regs, int idx)
+{
+	freg_t fp;
+
+	if (idx >= PERF_REG_S390_R0 && idx <= PERF_REG_S390_R15)
+		return regs->gprs[idx];
+
+	if (idx >= PERF_REG_S390_FP0 && idx <= PERF_REG_S390_FP15) {
+		if (!user_mode(regs))
+			return 0;
+
+		idx -= PERF_REG_S390_FP0;
+		fp = MACHINE_HAS_VX ? *(freg_t *)(current->thread.fpu.vxrs + idx)
+				    : current->thread.fpu.fprs[idx];
+		return fp.ui;
+	}
+
+	if (idx == PERF_REG_S390_MASK)
+		return regs->psw.mask;
+	if (idx == PERF_REG_S390_PC)
+		return regs->psw.addr;
+
+	WARN_ON_ONCE((u32)idx >= PERF_REG_S390_MAX);
+	return 0;
+}
+
+#define REG_RESERVED (~((1UL << PERF_REG_S390_MAX) - 1))
+
+int perf_reg_validate(u64 mask)
+{
+	if (!mask || mask & REG_RESERVED)
+		return -EINVAL;
+
+	return 0;
+}
+
+u64 perf_reg_abi(struct task_struct *task)
+{
+	if (test_tsk_thread_flag(task, TIF_31BIT))
+		return PERF_SAMPLE_REGS_ABI_32;
+
+	return PERF_SAMPLE_REGS_ABI_64;
+}
+
+void perf_get_regs_user(struct perf_regs *regs_user,
+			struct pt_regs *regs)
+{
+	/*
+	 * Use the regs from the first interruption and let
+	 * perf_sample_regs_intr() handle interrupts (regs == get_irq_regs()).
+	 *
+	 * Also save FPU registers for user-space tasks only.
+	 */
+	regs_user->regs = task_pt_regs(current);
+	if (user_mode(regs_user->regs))
+		save_fpu_regs();
+	regs_user->abi = perf_reg_abi(current);
+}
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
new file mode 100644
index 0000000000..2580004177
--- /dev/null
+++ b/arch/s390/kernel/process.c
@@ -0,0 +1,240 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This file handles the architecture dependent parts of process handling.
+ *
+ *    Copyright IBM Corp. 1999, 2009
+ *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ *		 Hartmut Penner <hp@de.ibm.com>,
+ *		 Denis Joseph Barrow,
+ */
+
+#include <linux/elf-randomize.h>
+#include <linux/compiler.h>
+#include <linux/cpu.h>
+#include <linux/sched.h>
+#include <linux/sched/debug.h>
+#include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/elfcore.h>
+#include <linux/smp.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/tick.h>
+#include <linux/personality.h>
+#include <linux/syscalls.h>
+#include <linux/compat.h>
+#include <linux/kprobes.h>
+#include <linux/random.h>
+#include <linux/export.h>
+#include <linux/init_task.h>
+#include <linux/entry-common.h>
+#include <linux/io.h>
+#include <asm/cpu_mf.h>
+#include <asm/processor.h>
+#include <asm/vtimer.h>
+#include <asm/exec.h>
+#include <asm/irq.h>
+#include <asm/nmi.h>
+#include <asm/smp.h>
+#include <asm/stacktrace.h>
+#include <asm/switch_to.h>
+#include <asm/runtime_instr.h>
+#include <asm/unwind.h>
+#include "entry.h"
+
+void ret_from_fork(void) asm("ret_from_fork");
+
+void __ret_from_fork(struct task_struct *prev, struct pt_regs *regs)
+{
+	void (*func)(void *arg);
+
+	schedule_tail(prev);
+
+	if (!user_mode(regs)) {
+		/* Kernel thread */
+		func = (void *)regs->gprs[9];
+		func((void *)regs->gprs[10]);
+	}
+	clear_pt_regs_flag(regs, PIF_SYSCALL);
+	syscall_exit_to_user_mode(regs);
+}
+
+void flush_thread(void)
+{
+}
+
+void arch_setup_new_exec(void)
+{
+	if (S390_lowcore.current_pid != current->pid) {
+		S390_lowcore.current_pid = current->pid;
+		if (test_facility(40))
+			lpp(&S390_lowcore.lpp);
+	}
+}
+
+void arch_release_task_struct(struct task_struct *tsk)
+{
+	runtime_instr_release(tsk);
+	guarded_storage_release(tsk);
+}
+
+int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
+{
+	/*
+	 * Save the floating-point or vector register state of the current
+	 * task and set the CIF_FPU flag to lazy restore the FPU register
+	 * state when returning to user space.
+	 */
+	save_fpu_regs();
+
+	memcpy(dst, src, arch_task_struct_size);
+	dst->thread.fpu.regs = dst->thread.fpu.fprs;
+
+	/*
+	 * Don't transfer over the runtime instrumentation or the guarded
+	 * storage control block pointers. These fields are cleared here instead
+	 * of in copy_thread() to avoid premature freeing of associated memory
+	 * on fork() failure. Wait to clear the RI flag because ->stack still
+	 * refers to the source thread.
+	 */
+	dst->thread.ri_cb = NULL;
+	dst->thread.gs_cb = NULL;
+	dst->thread.gs_bc_cb = NULL;
+
+	return 0;
+}
+
+int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
+{
+	unsigned long clone_flags = args->flags;
+	unsigned long new_stackp = args->stack;
+	unsigned long tls = args->tls;
+	struct fake_frame
+	{
+		struct stack_frame sf;
+		struct pt_regs childregs;
+	} *frame;
+
+	frame = container_of(task_pt_regs(p), struct fake_frame, childregs);
+	p->thread.ksp = (unsigned long) frame;
+	/* Save access registers to new thread structure. */
+	save_access_regs(&p->thread.acrs[0]);
+	/* start new process with ar4 pointing to the correct address space */
+	/* Don't copy debug registers */
+	memset(&p->thread.per_user, 0, sizeof(p->thread.per_user));
+	memset(&p->thread.per_event, 0, sizeof(p->thread.per_event));
+	clear_tsk_thread_flag(p, TIF_SINGLE_STEP);
+	p->thread.per_flags = 0;
+	/* Initialize per thread user and system timer values */
+	p->thread.user_timer = 0;
+	p->thread.guest_timer = 0;
+	p->thread.system_timer = 0;
+	p->thread.hardirq_timer = 0;
+	p->thread.softirq_timer = 0;
+	p->thread.last_break = 1;
+
+	frame->sf.back_chain = 0;
+	frame->sf.gprs[11 - 6] = (unsigned long)&frame->childregs;
+	frame->sf.gprs[12 - 6] = (unsigned long)p;
+	/* new return point is ret_from_fork */
+	frame->sf.gprs[14 - 6] = (unsigned long)ret_from_fork;
+	/* fake return stack for resume(), don't go back to schedule */
+	frame->sf.gprs[15 - 6] = (unsigned long)frame;
+
+	/* Store access registers to kernel stack of new process. */
+	if (unlikely(args->fn)) {
+		/* kernel thread */
+		memset(&frame->childregs, 0, sizeof(struct pt_regs));
+		frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_IO |
+					    PSW_MASK_EXT | PSW_MASK_MCHECK;
+		frame->childregs.gprs[9] = (unsigned long)args->fn;
+		frame->childregs.gprs[10] = (unsigned long)args->fn_arg;
+		frame->childregs.orig_gpr2 = -1;
+		frame->childregs.last_break = 1;
+		return 0;
+	}
+	frame->childregs = *current_pt_regs();
+	frame->childregs.gprs[2] = 0;	/* child returns 0 on fork. */
+	frame->childregs.flags = 0;
+	if (new_stackp)
+		frame->childregs.gprs[15] = new_stackp;
+	/*
+	 * Clear the runtime instrumentation flag after the above childregs
+	 * copy. The CB pointer was already cleared in arch_dup_task_struct().
+	 */
+	frame->childregs.psw.mask &= ~PSW_MASK_RI;
+
+	/* Set a new TLS ?  */
+	if (clone_flags & CLONE_SETTLS) {
+		if (is_compat_task()) {
+			p->thread.acrs[0] = (unsigned int)tls;
+		} else {
+			p->thread.acrs[0] = (unsigned int)(tls >> 32);
+			p->thread.acrs[1] = (unsigned int)tls;
+		}
+	}
+	/*
+	 * s390 stores the svc return address in arch_data when calling
+	 * sigreturn()/restart_syscall() via vdso. 1 means no valid address
+	 * stored.
+	 */
+	p->restart_block.arch_data = 1;
+	return 0;
+}
+
+void execve_tail(void)
+{
+	current->thread.fpu.fpc = 0;
+	asm volatile("sfpc %0" : : "d" (0));
+}
+
+unsigned long __get_wchan(struct task_struct *p)
+{
+	struct unwind_state state;
+	unsigned long ip = 0;
+
+	if (!task_stack_page(p))
+		return 0;
+
+	if (!try_get_task_stack(p))
+		return 0;
+
+	unwind_for_each_frame(&state, p, NULL, 0) {
+		if (state.stack_info.type != STACK_TYPE_TASK) {
+			ip = 0;
+			break;
+		}
+
+		ip = unwind_get_return_address(&state);
+		if (!ip)
+			break;
+
+		if (!in_sched_functions(ip))
+			break;
+	}
+
+	put_task_stack(p);
+	return ip;
+}
+
+unsigned long arch_align_stack(unsigned long sp)
+{
+	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
+		sp -= get_random_u32_below(PAGE_SIZE);
+	return sp & ~0xf;
+}
+
+static inline unsigned long brk_rnd(void)
+{
+	return (get_random_u16() & BRK_RND_MASK) << PAGE_SHIFT;
+}
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+	unsigned long ret;
+
+	ret = PAGE_ALIGN(mm->brk + brk_rnd());
+	return (ret > mm->brk) ? ret : mm->brk;
+}
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
new file mode 100644
index 0000000000..0a999c8226
--- /dev/null
+++ b/arch/s390/kernel/processor.c
@@ -0,0 +1,366 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Copyright IBM Corp. 2008
+ *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+
+#define KMSG_COMPONENT "cpu"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/stop_machine.h>
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <linux/random.h>
+#include <linux/sched/mm.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+#include <linux/mm_types.h>
+#include <linux/delay.h>
+#include <linux/cpu.h>
+
+#include <asm/diag.h>
+#include <asm/facility.h>
+#include <asm/elf.h>
+#include <asm/lowcore.h>
+#include <asm/param.h>
+#include <asm/sclp.h>
+#include <asm/smp.h>
+
+unsigned long __read_mostly elf_hwcap;
+char elf_platform[ELF_PLATFORM_SIZE];
+
+struct cpu_info {
+	unsigned int cpu_mhz_dynamic;
+	unsigned int cpu_mhz_static;
+	struct cpuid cpu_id;
+};
+
+static DEFINE_PER_CPU(struct cpu_info, cpu_info);
+static DEFINE_PER_CPU(int, cpu_relax_retry);
+
+static bool machine_has_cpu_mhz;
+
+void __init cpu_detect_mhz_feature(void)
+{
+	if (test_facility(34) && __ecag(ECAG_CPU_ATTRIBUTE, 0) != -1UL)
+		machine_has_cpu_mhz = true;
+}
+
+static void update_cpu_mhz(void *arg)
+{
+	unsigned long mhz;
+	struct cpu_info *c;
+
+	mhz = __ecag(ECAG_CPU_ATTRIBUTE, 0);
+	c = this_cpu_ptr(&cpu_info);
+	c->cpu_mhz_dynamic = mhz >> 32;
+	c->cpu_mhz_static = mhz & 0xffffffff;
+}
+
+void s390_update_cpu_mhz(void)
+{
+	s390_adjust_jiffies();
+	if (machine_has_cpu_mhz)
+		on_each_cpu(update_cpu_mhz, NULL, 0);
+}
+
+void notrace stop_machine_yield(const struct cpumask *cpumask)
+{
+	int cpu, this_cpu;
+
+	this_cpu = smp_processor_id();
+	if (__this_cpu_inc_return(cpu_relax_retry) >= spin_retry) {
+		__this_cpu_write(cpu_relax_retry, 0);
+		cpu = cpumask_next_wrap(this_cpu, cpumask, this_cpu, false);
+		if (cpu >= nr_cpu_ids)
+			return;
+		if (arch_vcpu_is_preempted(cpu))
+			smp_yield_cpu(cpu);
+	}
+}
+
+/*
+ * cpu_init - initializes state that is per-CPU.
+ */
+void cpu_init(void)
+{
+	struct cpuid *id = this_cpu_ptr(&cpu_info.cpu_id);
+
+	get_cpu_id(id);
+	if (machine_has_cpu_mhz)
+		update_cpu_mhz(NULL);
+	mmgrab(&init_mm);
+	current->active_mm = &init_mm;
+	BUG_ON(current->mm);
+	enter_lazy_tlb(&init_mm, current);
+}
+
+static void show_facilities(struct seq_file *m)
+{
+	unsigned int bit;
+
+	seq_puts(m, "facilities      :");
+	for_each_set_bit_inv(bit, (long *)&stfle_fac_list, MAX_FACILITY_BIT)
+		seq_printf(m, " %d", bit);
+	seq_putc(m, '\n');
+}
+
+static void show_cpu_summary(struct seq_file *m, void *v)
+{
+	static const char *hwcap_str[] = {
+		[HWCAP_NR_ESAN3]	= "esan3",
+		[HWCAP_NR_ZARCH]	= "zarch",
+		[HWCAP_NR_STFLE]	= "stfle",
+		[HWCAP_NR_MSA]		= "msa",
+		[HWCAP_NR_LDISP]	= "ldisp",
+		[HWCAP_NR_EIMM]		= "eimm",
+		[HWCAP_NR_DFP]		= "dfp",
+		[HWCAP_NR_HPAGE]	= "edat",
+		[HWCAP_NR_ETF3EH]	= "etf3eh",
+		[HWCAP_NR_HIGH_GPRS]	= "highgprs",
+		[HWCAP_NR_TE]		= "te",
+		[HWCAP_NR_VXRS]		= "vx",
+		[HWCAP_NR_VXRS_BCD]	= "vxd",
+		[HWCAP_NR_VXRS_EXT]	= "vxe",
+		[HWCAP_NR_GS]		= "gs",
+		[HWCAP_NR_VXRS_EXT2]	= "vxe2",
+		[HWCAP_NR_VXRS_PDE]	= "vxp",
+		[HWCAP_NR_SORT]		= "sort",
+		[HWCAP_NR_DFLT]		= "dflt",
+		[HWCAP_NR_VXRS_PDE2]	= "vxp2",
+		[HWCAP_NR_NNPA]		= "nnpa",
+		[HWCAP_NR_PCI_MIO]	= "pcimio",
+		[HWCAP_NR_SIE]		= "sie",
+	};
+	int i, cpu;
+
+	BUILD_BUG_ON(ARRAY_SIZE(hwcap_str) != HWCAP_NR_MAX);
+	seq_printf(m, "vendor_id       : IBM/S390\n"
+		   "# processors    : %i\n"
+		   "bogomips per cpu: %lu.%02lu\n",
+		   num_online_cpus(), loops_per_jiffy/(500000/HZ),
+		   (loops_per_jiffy/(5000/HZ))%100);
+	seq_printf(m, "max thread id   : %d\n", smp_cpu_mtid);
+	seq_puts(m, "features\t: ");
+	for (i = 0; i < ARRAY_SIZE(hwcap_str); i++)
+		if (hwcap_str[i] && (elf_hwcap & (1UL << i)))
+			seq_printf(m, "%s ", hwcap_str[i]);
+	seq_puts(m, "\n");
+	show_facilities(m);
+	show_cacheinfo(m);
+	for_each_online_cpu(cpu) {
+		struct cpuid *id = &per_cpu(cpu_info.cpu_id, cpu);
+
+		seq_printf(m, "processor %d: "
+			   "version = %02X,  "
+			   "identification = %06X,  "
+			   "machine = %04X\n",
+			   cpu, id->version, id->ident, id->machine);
+	}
+}
+
+static int __init setup_hwcaps(void)
+{
+	/* instructions named N3, "backported" to esa-mode */
+	elf_hwcap |= HWCAP_ESAN3;
+
+	/* z/Architecture mode active */
+	elf_hwcap |= HWCAP_ZARCH;
+
+	/* store-facility-list-extended */
+	if (test_facility(7))
+		elf_hwcap |= HWCAP_STFLE;
+
+	/* message-security assist */
+	if (test_facility(17))
+		elf_hwcap |= HWCAP_MSA;
+
+	/* long-displacement */
+	if (test_facility(19))
+		elf_hwcap |= HWCAP_LDISP;
+
+	/* extended-immediate */
+	elf_hwcap |= HWCAP_EIMM;
+
+	/* extended-translation facility 3 enhancement */
+	if (test_facility(22) && test_facility(30))
+		elf_hwcap |= HWCAP_ETF3EH;
+
+	/* decimal floating point & perform floating point operation */
+	if (test_facility(42) && test_facility(44))
+		elf_hwcap |= HWCAP_DFP;
+
+	/* huge page support */
+	if (MACHINE_HAS_EDAT1)
+		elf_hwcap |= HWCAP_HPAGE;
+
+	/* 64-bit register support for 31-bit processes */
+	elf_hwcap |= HWCAP_HIGH_GPRS;
+
+	/* transactional execution */
+	if (MACHINE_HAS_TE)
+		elf_hwcap |= HWCAP_TE;
+
+	/*
+	 * Vector extension can be disabled with the "novx" parameter.
+	 * Use MACHINE_HAS_VX instead of facility bit 129.
+	 */
+	if (MACHINE_HAS_VX) {
+		elf_hwcap |= HWCAP_VXRS;
+		if (test_facility(134))
+			elf_hwcap |= HWCAP_VXRS_BCD;
+		if (test_facility(135))
+			elf_hwcap |= HWCAP_VXRS_EXT;
+		if (test_facility(148))
+			elf_hwcap |= HWCAP_VXRS_EXT2;
+		if (test_facility(152))
+			elf_hwcap |= HWCAP_VXRS_PDE;
+		if (test_facility(192))
+			elf_hwcap |= HWCAP_VXRS_PDE2;
+	}
+
+	if (test_facility(150))
+		elf_hwcap |= HWCAP_SORT;
+
+	if (test_facility(151))
+		elf_hwcap |= HWCAP_DFLT;
+
+	if (test_facility(165))
+		elf_hwcap |= HWCAP_NNPA;
+
+	/* guarded storage */
+	if (MACHINE_HAS_GS)
+		elf_hwcap |= HWCAP_GS;
+
+	if (MACHINE_HAS_PCI_MIO)
+		elf_hwcap |= HWCAP_PCI_MIO;
+
+	/* virtualization support */
+	if (sclp.has_sief2)
+		elf_hwcap |= HWCAP_SIE;
+
+	return 0;
+}
+arch_initcall(setup_hwcaps);
+
+static int __init setup_elf_platform(void)
+{
+	struct cpuid cpu_id;
+
+	get_cpu_id(&cpu_id);
+	add_device_randomness(&cpu_id, sizeof(cpu_id));
+	switch (cpu_id.machine) {
+	default:	/* Use "z10" as default. */
+		strcpy(elf_platform, "z10");
+		break;
+	case 0x2817:
+	case 0x2818:
+		strcpy(elf_platform, "z196");
+		break;
+	case 0x2827:
+	case 0x2828:
+		strcpy(elf_platform, "zEC12");
+		break;
+	case 0x2964:
+	case 0x2965:
+		strcpy(elf_platform, "z13");
+		break;
+	case 0x3906:
+	case 0x3907:
+		strcpy(elf_platform, "z14");
+		break;
+	case 0x8561:
+	case 0x8562:
+		strcpy(elf_platform, "z15");
+		break;
+	case 0x3931:
+	case 0x3932:
+		strcpy(elf_platform, "z16");
+		break;
+	}
+	return 0;
+}
+arch_initcall(setup_elf_platform);
+
+static void show_cpu_topology(struct seq_file *m, unsigned long n)
+{
+#ifdef CONFIG_SCHED_TOPOLOGY
+	seq_printf(m, "physical id     : %d\n", topology_physical_package_id(n));
+	seq_printf(m, "core id         : %d\n", topology_core_id(n));
+	seq_printf(m, "book id         : %d\n", topology_book_id(n));
+	seq_printf(m, "drawer id       : %d\n", topology_drawer_id(n));
+	seq_printf(m, "dedicated       : %d\n", topology_cpu_dedicated(n));
+	seq_printf(m, "address         : %d\n", smp_cpu_get_cpu_address(n));
+	seq_printf(m, "siblings        : %d\n", cpumask_weight(topology_core_cpumask(n)));
+	seq_printf(m, "cpu cores       : %d\n", topology_booted_cores(n));
+#endif /* CONFIG_SCHED_TOPOLOGY */
+}
+
+static void show_cpu_ids(struct seq_file *m, unsigned long n)
+{
+	struct cpuid *id = &per_cpu(cpu_info.cpu_id, n);
+
+	seq_printf(m, "version         : %02X\n", id->version);
+	seq_printf(m, "identification  : %06X\n", id->ident);
+	seq_printf(m, "machine         : %04X\n", id->machine);
+}
+
+static void show_cpu_mhz(struct seq_file *m, unsigned long n)
+{
+	struct cpu_info *c = per_cpu_ptr(&cpu_info, n);
+
+	if (!machine_has_cpu_mhz)
+		return;
+	seq_printf(m, "cpu MHz dynamic : %d\n", c->cpu_mhz_dynamic);
+	seq_printf(m, "cpu MHz static  : %d\n", c->cpu_mhz_static);
+}
+
+/*
+ * show_cpuinfo - Get information on one CPU for use by procfs.
+ */
+static int show_cpuinfo(struct seq_file *m, void *v)
+{
+	unsigned long n = (unsigned long) v - 1;
+	unsigned long first = cpumask_first(cpu_online_mask);
+
+	if (n == first)
+		show_cpu_summary(m, v);
+	seq_printf(m, "\ncpu number      : %ld\n", n);
+	show_cpu_topology(m, n);
+	show_cpu_ids(m, n);
+	show_cpu_mhz(m, n);
+	return 0;
+}
+
+static inline void *c_update(loff_t *pos)
+{
+	if (*pos)
+		*pos = cpumask_next(*pos - 1, cpu_online_mask);
+	else
+		*pos = cpumask_first(cpu_online_mask);
+	return *pos < nr_cpu_ids ? (void *)*pos + 1 : NULL;
+}
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+	cpus_read_lock();
+	return c_update(pos);
+}
+
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return c_update(pos);
+}
+
+static void c_stop(struct seq_file *m, void *v)
+{
+	cpus_read_unlock();
+}
+
+const struct seq_operations cpuinfo_op = {
+	.start	= c_start,
+	.next	= c_next,
+	.stop	= c_stop,
+	.show	= show_cpuinfo,
+};
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
new file mode 100644
index 0000000000..ea244a73ef
--- /dev/null
+++ b/arch/s390/kernel/ptrace.c
@@ -0,0 +1,1608 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Ptrace user space interface.
+ *
+ *    Copyright IBM Corp. 1999, 2010
+ *    Author(s): Denis Joseph Barrow
+ *               Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+
+#include "asm/ptrace.h"
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/security.h>
+#include <linux/audit.h>
+#include <linux/signal.h>
+#include <linux/elf.h>
+#include <linux/regset.h>
+#include <linux/seccomp.h>
+#include <linux/compat.h>
+#include <trace/syscall.h>
+#include <asm/page.h>
+#include <linux/uaccess.h>
+#include <asm/unistd.h>
+#include <asm/switch_to.h>
+#include <asm/runtime_instr.h>
+#include <asm/facility.h>
+
+#include "entry.h"
+
+#ifdef CONFIG_COMPAT
+#include "compat_ptrace.h"
+#endif
+
+void update_cr_regs(struct task_struct *task)
+{
+	struct pt_regs *regs = task_pt_regs(task);
+	struct thread_struct *thread = &task->thread;
+	struct per_regs old, new;
+	union ctlreg0 cr0_old, cr0_new;
+	union ctlreg2 cr2_old, cr2_new;
+	int cr0_changed, cr2_changed;
+
+	__ctl_store(cr0_old.val, 0, 0);
+	__ctl_store(cr2_old.val, 2, 2);
+	cr0_new = cr0_old;
+	cr2_new = cr2_old;
+	/* Take care of the enable/disable of transactional execution. */
+	if (MACHINE_HAS_TE) {
+		/* Set or clear transaction execution TXC bit 8. */
+		cr0_new.tcx = 1;
+		if (task->thread.per_flags & PER_FLAG_NO_TE)
+			cr0_new.tcx = 0;
+		/* Set or clear transaction execution TDC bits 62 and 63. */
+		cr2_new.tdc = 0;
+		if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
+			if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND)
+				cr2_new.tdc = 1;
+			else
+				cr2_new.tdc = 2;
+		}
+	}
+	/* Take care of enable/disable of guarded storage. */
+	if (MACHINE_HAS_GS) {
+		cr2_new.gse = 0;
+		if (task->thread.gs_cb)
+			cr2_new.gse = 1;
+	}
+	/* Load control register 0/2 iff changed */
+	cr0_changed = cr0_new.val != cr0_old.val;
+	cr2_changed = cr2_new.val != cr2_old.val;
+	if (cr0_changed)
+		__ctl_load(cr0_new.val, 0, 0);
+	if (cr2_changed)
+		__ctl_load(cr2_new.val, 2, 2);
+	/* Copy user specified PER registers */
+	new.control = thread->per_user.control;
+	new.start = thread->per_user.start;
+	new.end = thread->per_user.end;
+
+	/* merge TIF_SINGLE_STEP into user specified PER registers. */
+	if (test_tsk_thread_flag(task, TIF_SINGLE_STEP) ||
+	    test_tsk_thread_flag(task, TIF_UPROBE_SINGLESTEP)) {
+		if (test_tsk_thread_flag(task, TIF_BLOCK_STEP))
+			new.control |= PER_EVENT_BRANCH;
+		else
+			new.control |= PER_EVENT_IFETCH;
+		new.control |= PER_CONTROL_SUSPENSION;
+		new.control |= PER_EVENT_TRANSACTION_END;
+		if (test_tsk_thread_flag(task, TIF_UPROBE_SINGLESTEP))
+			new.control |= PER_EVENT_IFETCH;
+		new.start = 0;
+		new.end = -1UL;
+	}
+
+	/* Take care of the PER enablement bit in the PSW. */
+	if (!(new.control & PER_EVENT_MASK)) {
+		regs->psw.mask &= ~PSW_MASK_PER;
+		return;
+	}
+	regs->psw.mask |= PSW_MASK_PER;
+	__ctl_store(old, 9, 11);
+	if (memcmp(&new, &old, sizeof(struct per_regs)) != 0)
+		__ctl_load(new, 9, 11);
+}
+
+void user_enable_single_step(struct task_struct *task)
+{
+	clear_tsk_thread_flag(task, TIF_BLOCK_STEP);
+	set_tsk_thread_flag(task, TIF_SINGLE_STEP);
+}
+
+void user_disable_single_step(struct task_struct *task)
+{
+	clear_tsk_thread_flag(task, TIF_BLOCK_STEP);
+	clear_tsk_thread_flag(task, TIF_SINGLE_STEP);
+}
+
+void user_enable_block_step(struct task_struct *task)
+{
+	set_tsk_thread_flag(task, TIF_SINGLE_STEP);
+	set_tsk_thread_flag(task, TIF_BLOCK_STEP);
+}
+
+/*
+ * Called by kernel/ptrace.c when detaching..
+ *
+ * Clear all debugging related fields.
+ */
+void ptrace_disable(struct task_struct *task)
+{
+	memset(&task->thread.per_user, 0, sizeof(task->thread.per_user));
+	memset(&task->thread.per_event, 0, sizeof(task->thread.per_event));
+	clear_tsk_thread_flag(task, TIF_SINGLE_STEP);
+	clear_tsk_thread_flag(task, TIF_PER_TRAP);
+	task->thread.per_flags = 0;
+}
+
+#define __ADDR_MASK 7
+
+static inline unsigned long __peek_user_per(struct task_struct *child,
+					    addr_t addr)
+{
+	if (addr == offsetof(struct per_struct_kernel, cr9))
+		/* Control bits of the active per set. */
+		return test_thread_flag(TIF_SINGLE_STEP) ?
+			PER_EVENT_IFETCH : child->thread.per_user.control;
+	else if (addr == offsetof(struct per_struct_kernel, cr10))
+		/* Start address of the active per set. */
+		return test_thread_flag(TIF_SINGLE_STEP) ?
+			0 : child->thread.per_user.start;
+	else if (addr == offsetof(struct per_struct_kernel, cr11))
+		/* End address of the active per set. */
+		return test_thread_flag(TIF_SINGLE_STEP) ?
+			-1UL : child->thread.per_user.end;
+	else if (addr == offsetof(struct per_struct_kernel, bits))
+		/* Single-step bit. */
+		return test_thread_flag(TIF_SINGLE_STEP) ?
+			(1UL << (BITS_PER_LONG - 1)) : 0;
+	else if (addr == offsetof(struct per_struct_kernel, starting_addr))
+		/* Start address of the user specified per set. */
+		return child->thread.per_user.start;
+	else if (addr == offsetof(struct per_struct_kernel, ending_addr))
+		/* End address of the user specified per set. */
+		return child->thread.per_user.end;
+	else if (addr == offsetof(struct per_struct_kernel, perc_atmid))
+		/* PER code, ATMID and AI of the last PER trap */
+		return (unsigned long)
+			child->thread.per_event.cause << (BITS_PER_LONG - 16);
+	else if (addr == offsetof(struct per_struct_kernel, address))
+		/* Address of the last PER trap */
+		return child->thread.per_event.address;
+	else if (addr == offsetof(struct per_struct_kernel, access_id))
+		/* Access id of the last PER trap */
+		return (unsigned long)
+			child->thread.per_event.paid << (BITS_PER_LONG - 8);
+	return 0;
+}
+
+/*
+ * Read the word at offset addr from the user area of a process. The
+ * trouble here is that the information is littered over different
+ * locations. The process registers are found on the kernel stack,
+ * the floating point stuff and the trace settings are stored in
+ * the task structure. In addition the different structures in
+ * struct user contain pad bytes that should be read as zeroes.
+ * Lovely...
+ */
+static unsigned long __peek_user(struct task_struct *child, addr_t addr)
+{
+	addr_t offset, tmp;
+
+	if (addr < offsetof(struct user, regs.acrs)) {
+		/*
+		 * psw and gprs are stored on the stack
+		 */
+		tmp = *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr);
+		if (addr == offsetof(struct user, regs.psw.mask)) {
+			/* Return a clean psw mask. */
+			tmp &= PSW_MASK_USER | PSW_MASK_RI;
+			tmp |= PSW_USER_BITS;
+		}
+
+	} else if (addr < offsetof(struct user, regs.orig_gpr2)) {
+		/*
+		 * access registers are stored in the thread structure
+		 */
+		offset = addr - offsetof(struct user, regs.acrs);
+		/*
+		 * Very special case: old & broken 64 bit gdb reading
+		 * from acrs[15]. Result is a 64 bit value. Read the
+		 * 32 bit acrs[15] value and shift it by 32. Sick...
+		 */
+		if (addr == offsetof(struct user, regs.acrs[15]))
+			tmp = ((unsigned long) child->thread.acrs[15]) << 32;
+		else
+			tmp = *(addr_t *)((addr_t) &child->thread.acrs + offset);
+
+	} else if (addr == offsetof(struct user, regs.orig_gpr2)) {
+		/*
+		 * orig_gpr2 is stored on the kernel stack
+		 */
+		tmp = (addr_t) task_pt_regs(child)->orig_gpr2;
+
+	} else if (addr < offsetof(struct user, regs.fp_regs)) {
+		/*
+		 * prevent reads of padding hole between
+		 * orig_gpr2 and fp_regs on s390.
+		 */
+		tmp = 0;
+
+	} else if (addr == offsetof(struct user, regs.fp_regs.fpc)) {
+		/*
+		 * floating point control reg. is in the thread structure
+		 */
+		tmp = child->thread.fpu.fpc;
+		tmp <<= BITS_PER_LONG - 32;
+
+	} else if (addr < offsetof(struct user, regs.fp_regs) + sizeof(s390_fp_regs)) {
+		/*
+		 * floating point regs. are either in child->thread.fpu
+		 * or the child->thread.fpu.vxrs array
+		 */
+		offset = addr - offsetof(struct user, regs.fp_regs.fprs);
+		if (MACHINE_HAS_VX)
+			tmp = *(addr_t *)
+			       ((addr_t) child->thread.fpu.vxrs + 2*offset);
+		else
+			tmp = *(addr_t *)
+			       ((addr_t) child->thread.fpu.fprs + offset);
+
+	} else if (addr < offsetof(struct user, regs.per_info) + sizeof(per_struct)) {
+		/*
+		 * Handle access to the per_info structure.
+		 */
+		addr -= offsetof(struct user, regs.per_info);
+		tmp = __peek_user_per(child, addr);
+
+	} else
+		tmp = 0;
+
+	return tmp;
+}
+
+static int
+peek_user(struct task_struct *child, addr_t addr, addr_t data)
+{
+	addr_t tmp, mask;
+
+	/*
+	 * Stupid gdb peeks/pokes the access registers in 64 bit with
+	 * an alignment of 4. Programmers from hell...
+	 */
+	mask = __ADDR_MASK;
+	if (addr >= offsetof(struct user, regs.acrs) &&
+	    addr < offsetof(struct user, regs.orig_gpr2))
+		mask = 3;
+	if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK)
+		return -EIO;
+
+	tmp = __peek_user(child, addr);
+	return put_user(tmp, (addr_t __user *) data);
+}
+
+static inline void __poke_user_per(struct task_struct *child,
+				   addr_t addr, addr_t data)
+{
+	/*
+	 * There are only three fields in the per_info struct that the
+	 * debugger user can write to.
+	 * 1) cr9: the debugger wants to set a new PER event mask
+	 * 2) starting_addr: the debugger wants to set a new starting
+	 *    address to use with the PER event mask.
+	 * 3) ending_addr: the debugger wants to set a new ending
+	 *    address to use with the PER event mask.
+	 * The user specified PER event mask and the start and end
+	 * addresses are used only if single stepping is not in effect.
+	 * Writes to any other field in per_info are ignored.
+	 */
+	if (addr == offsetof(struct per_struct_kernel, cr9))
+		/* PER event mask of the user specified per set. */
+		child->thread.per_user.control =
+			data & (PER_EVENT_MASK | PER_CONTROL_MASK);
+	else if (addr == offsetof(struct per_struct_kernel, starting_addr))
+		/* Starting address of the user specified per set. */
+		child->thread.per_user.start = data;
+	else if (addr == offsetof(struct per_struct_kernel, ending_addr))
+		/* Ending address of the user specified per set. */
+		child->thread.per_user.end = data;
+}
+
+/*
+ * Write a word to the user area of a process at location addr. This
+ * operation does have an additional problem compared to peek_user.
+ * Stores to the program status word and on the floating point
+ * control register needs to get checked for validity.
+ */
+static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
+{
+	addr_t offset;
+
+
+	if (addr < offsetof(struct user, regs.acrs)) {
+		struct pt_regs *regs = task_pt_regs(child);
+		/*
+		 * psw and gprs are stored on the stack
+		 */
+		if (addr == offsetof(struct user, regs.psw.mask)) {
+			unsigned long mask = PSW_MASK_USER;
+
+			mask |= is_ri_task(child) ? PSW_MASK_RI : 0;
+			if ((data ^ PSW_USER_BITS) & ~mask)
+				/* Invalid psw mask. */
+				return -EINVAL;
+			if ((data & PSW_MASK_ASC) == PSW_ASC_HOME)
+				/* Invalid address-space-control bits */
+				return -EINVAL;
+			if ((data & PSW_MASK_EA) && !(data & PSW_MASK_BA))
+				/* Invalid addressing mode bits */
+				return -EINVAL;
+		}
+
+		if (test_pt_regs_flag(regs, PIF_SYSCALL) &&
+			addr == offsetof(struct user, regs.gprs[2])) {
+			struct pt_regs *regs = task_pt_regs(child);
+
+			regs->int_code = 0x20000 | (data & 0xffff);
+		}
+		*(addr_t *)((addr_t) &regs->psw + addr) = data;
+	} else if (addr < offsetof(struct user, regs.orig_gpr2)) {
+		/*
+		 * access registers are stored in the thread structure
+		 */
+		offset = addr - offsetof(struct user, regs.acrs);
+		/*
+		 * Very special case: old & broken 64 bit gdb writing
+		 * to acrs[15] with a 64 bit value. Ignore the lower
+		 * half of the value and write the upper 32 bit to
+		 * acrs[15]. Sick...
+		 */
+		if (addr == offsetof(struct user, regs.acrs[15]))
+			child->thread.acrs[15] = (unsigned int) (data >> 32);
+		else
+			*(addr_t *)((addr_t) &child->thread.acrs + offset) = data;
+
+	} else if (addr == offsetof(struct user, regs.orig_gpr2)) {
+		/*
+		 * orig_gpr2 is stored on the kernel stack
+		 */
+		task_pt_regs(child)->orig_gpr2 = data;
+
+	} else if (addr < offsetof(struct user, regs.fp_regs)) {
+		/*
+		 * prevent writes of padding hole between
+		 * orig_gpr2 and fp_regs on s390.
+		 */
+		return 0;
+
+	} else if (addr == offsetof(struct user, regs.fp_regs.fpc)) {
+		/*
+		 * floating point control reg. is in the thread structure
+		 */
+		if ((unsigned int) data != 0 ||
+		    test_fp_ctl(data >> (BITS_PER_LONG - 32)))
+			return -EINVAL;
+		child->thread.fpu.fpc = data >> (BITS_PER_LONG - 32);
+
+	} else if (addr < offsetof(struct user, regs.fp_regs) + sizeof(s390_fp_regs)) {
+		/*
+		 * floating point regs. are either in child->thread.fpu
+		 * or the child->thread.fpu.vxrs array
+		 */
+		offset = addr - offsetof(struct user, regs.fp_regs.fprs);
+		if (MACHINE_HAS_VX)
+			*(addr_t *)((addr_t)
+				child->thread.fpu.vxrs + 2*offset) = data;
+		else
+			*(addr_t *)((addr_t)
+				child->thread.fpu.fprs + offset) = data;
+
+	} else if (addr < offsetof(struct user, regs.per_info) + sizeof(per_struct)) {
+		/*
+		 * Handle access to the per_info structure.
+		 */
+		addr -= offsetof(struct user, regs.per_info);
+		__poke_user_per(child, addr, data);
+
+	}
+
+	return 0;
+}
+
+static int poke_user(struct task_struct *child, addr_t addr, addr_t data)
+{
+	addr_t mask;
+
+	/*
+	 * Stupid gdb peeks/pokes the access registers in 64 bit with
+	 * an alignment of 4. Programmers from hell indeed...
+	 */
+	mask = __ADDR_MASK;
+	if (addr >= offsetof(struct user, regs.acrs) &&
+	    addr < offsetof(struct user, regs.orig_gpr2))
+		mask = 3;
+	if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK)
+		return -EIO;
+
+	return __poke_user(child, addr, data);
+}
+
+long arch_ptrace(struct task_struct *child, long request,
+		 unsigned long addr, unsigned long data)
+{
+	ptrace_area parea; 
+	int copied, ret;
+
+	switch (request) {
+	case PTRACE_PEEKUSR:
+		/* read the word at location addr in the USER area. */
+		return peek_user(child, addr, data);
+
+	case PTRACE_POKEUSR:
+		/* write the word at location addr in the USER area */
+		return poke_user(child, addr, data);
+
+	case PTRACE_PEEKUSR_AREA:
+	case PTRACE_POKEUSR_AREA:
+		if (copy_from_user(&parea, (void __force __user *) addr,
+							sizeof(parea)))
+			return -EFAULT;
+		addr = parea.kernel_addr;
+		data = parea.process_addr;
+		copied = 0;
+		while (copied < parea.len) {
+			if (request == PTRACE_PEEKUSR_AREA)
+				ret = peek_user(child, addr, data);
+			else {
+				addr_t utmp;
+				if (get_user(utmp,
+					     (addr_t __force __user *) data))
+					return -EFAULT;
+				ret = poke_user(child, addr, utmp);
+			}
+			if (ret)
+				return ret;
+			addr += sizeof(unsigned long);
+			data += sizeof(unsigned long);
+			copied += sizeof(unsigned long);
+		}
+		return 0;
+	case PTRACE_GET_LAST_BREAK:
+		return put_user(child->thread.last_break, (unsigned long __user *)data);
+	case PTRACE_ENABLE_TE:
+		if (!MACHINE_HAS_TE)
+			return -EIO;
+		child->thread.per_flags &= ~PER_FLAG_NO_TE;
+		return 0;
+	case PTRACE_DISABLE_TE:
+		if (!MACHINE_HAS_TE)
+			return -EIO;
+		child->thread.per_flags |= PER_FLAG_NO_TE;
+		child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND;
+		return 0;
+	case PTRACE_TE_ABORT_RAND:
+		if (!MACHINE_HAS_TE || (child->thread.per_flags & PER_FLAG_NO_TE))
+			return -EIO;
+		switch (data) {
+		case 0UL:
+			child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND;
+			break;
+		case 1UL:
+			child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND;
+			child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND_TEND;
+			break;
+		case 2UL:
+			child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND;
+			child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND_TEND;
+			break;
+		default:
+			return -EINVAL;
+		}
+		return 0;
+	default:
+		return ptrace_request(child, request, addr, data);
+	}
+}
+
+#ifdef CONFIG_COMPAT
+/*
+ * Now the fun part starts... a 31 bit program running in the
+ * 31 bit emulation tracing another program. PTRACE_PEEKTEXT,
+ * PTRACE_PEEKDATA, PTRACE_POKETEXT and PTRACE_POKEDATA are easy
+ * to handle, the difference to the 64 bit versions of the requests
+ * is that the access is done in multiples of 4 byte instead of
+ * 8 bytes (sizeof(unsigned long) on 31/64 bit).
+ * The ugly part are PTRACE_PEEKUSR, PTRACE_PEEKUSR_AREA,
+ * PTRACE_POKEUSR and PTRACE_POKEUSR_AREA. If the traced program
+ * is a 31 bit program too, the content of struct user can be
+ * emulated. A 31 bit program peeking into the struct user of
+ * a 64 bit program is a no-no.
+ */
+
+/*
+ * Same as peek_user_per but for a 31 bit program.
+ */
+static inline __u32 __peek_user_per_compat(struct task_struct *child,
+					   addr_t addr)
+{
+	if (addr == offsetof(struct compat_per_struct_kernel, cr9))
+		/* Control bits of the active per set. */
+		return (__u32) test_thread_flag(TIF_SINGLE_STEP) ?
+			PER_EVENT_IFETCH : child->thread.per_user.control;
+	else if (addr == offsetof(struct compat_per_struct_kernel, cr10))
+		/* Start address of the active per set. */
+		return (__u32) test_thread_flag(TIF_SINGLE_STEP) ?
+			0 : child->thread.per_user.start;
+	else if (addr == offsetof(struct compat_per_struct_kernel, cr11))
+		/* End address of the active per set. */
+		return test_thread_flag(TIF_SINGLE_STEP) ?
+			PSW32_ADDR_INSN : child->thread.per_user.end;
+	else if (addr == offsetof(struct compat_per_struct_kernel, bits))
+		/* Single-step bit. */
+		return (__u32) test_thread_flag(TIF_SINGLE_STEP) ?
+			0x80000000 : 0;
+	else if (addr == offsetof(struct compat_per_struct_kernel, starting_addr))
+		/* Start address of the user specified per set. */
+		return (__u32) child->thread.per_user.start;
+	else if (addr == offsetof(struct compat_per_struct_kernel, ending_addr))
+		/* End address of the user specified per set. */
+		return (__u32) child->thread.per_user.end;
+	else if (addr == offsetof(struct compat_per_struct_kernel, perc_atmid))
+		/* PER code, ATMID and AI of the last PER trap */
+		return (__u32) child->thread.per_event.cause << 16;
+	else if (addr == offsetof(struct compat_per_struct_kernel, address))
+		/* Address of the last PER trap */
+		return (__u32) child->thread.per_event.address;
+	else if (addr == offsetof(struct compat_per_struct_kernel, access_id))
+		/* Access id of the last PER trap */
+		return (__u32) child->thread.per_event.paid << 24;
+	return 0;
+}
+
+/*
+ * Same as peek_user but for a 31 bit program.
+ */
+static u32 __peek_user_compat(struct task_struct *child, addr_t addr)
+{
+	addr_t offset;
+	__u32 tmp;
+
+	if (addr < offsetof(struct compat_user, regs.acrs)) {
+		struct pt_regs *regs = task_pt_regs(child);
+		/*
+		 * psw and gprs are stored on the stack
+		 */
+		if (addr == offsetof(struct compat_user, regs.psw.mask)) {
+			/* Fake a 31 bit psw mask. */
+			tmp = (__u32)(regs->psw.mask >> 32);
+			tmp &= PSW32_MASK_USER | PSW32_MASK_RI;
+			tmp |= PSW32_USER_BITS;
+		} else if (addr == offsetof(struct compat_user, regs.psw.addr)) {
+			/* Fake a 31 bit psw address. */
+			tmp = (__u32) regs->psw.addr |
+				(__u32)(regs->psw.mask & PSW_MASK_BA);
+		} else {
+			/* gpr 0-15 */
+			tmp = *(__u32 *)((addr_t) &regs->psw + addr*2 + 4);
+		}
+	} else if (addr < offsetof(struct compat_user, regs.orig_gpr2)) {
+		/*
+		 * access registers are stored in the thread structure
+		 */
+		offset = addr - offsetof(struct compat_user, regs.acrs);
+		tmp = *(__u32*)((addr_t) &child->thread.acrs + offset);
+
+	} else if (addr == offsetof(struct compat_user, regs.orig_gpr2)) {
+		/*
+		 * orig_gpr2 is stored on the kernel stack
+		 */
+		tmp = *(__u32*)((addr_t) &task_pt_regs(child)->orig_gpr2 + 4);
+
+	} else if (addr < offsetof(struct compat_user, regs.fp_regs)) {
+		/*
+		 * prevent reads of padding hole between
+		 * orig_gpr2 and fp_regs on s390.
+		 */
+		tmp = 0;
+
+	} else if (addr == offsetof(struct compat_user, regs.fp_regs.fpc)) {
+		/*
+		 * floating point control reg. is in the thread structure
+		 */
+		tmp = child->thread.fpu.fpc;
+
+	} else if (addr < offsetof(struct compat_user, regs.fp_regs) + sizeof(s390_fp_regs)) {
+		/*
+		 * floating point regs. are either in child->thread.fpu
+		 * or the child->thread.fpu.vxrs array
+		 */
+		offset = addr - offsetof(struct compat_user, regs.fp_regs.fprs);
+		if (MACHINE_HAS_VX)
+			tmp = *(__u32 *)
+			       ((addr_t) child->thread.fpu.vxrs + 2*offset);
+		else
+			tmp = *(__u32 *)
+			       ((addr_t) child->thread.fpu.fprs + offset);
+
+	} else if (addr < offsetof(struct compat_user, regs.per_info) + sizeof(struct compat_per_struct_kernel)) {
+		/*
+		 * Handle access to the per_info structure.
+		 */
+		addr -= offsetof(struct compat_user, regs.per_info);
+		tmp = __peek_user_per_compat(child, addr);
+
+	} else
+		tmp = 0;
+
+	return tmp;
+}
+
+static int peek_user_compat(struct task_struct *child,
+			    addr_t addr, addr_t data)
+{
+	__u32 tmp;
+
+	if (!is_compat_task() || (addr & 3) || addr > sizeof(struct user) - 3)
+		return -EIO;
+
+	tmp = __peek_user_compat(child, addr);
+	return put_user(tmp, (__u32 __user *) data);
+}
+
+/*
+ * Same as poke_user_per but for a 31 bit program.
+ */
+static inline void __poke_user_per_compat(struct task_struct *child,
+					  addr_t addr, __u32 data)
+{
+	if (addr == offsetof(struct compat_per_struct_kernel, cr9))
+		/* PER event mask of the user specified per set. */
+		child->thread.per_user.control =
+			data & (PER_EVENT_MASK | PER_CONTROL_MASK);
+	else if (addr == offsetof(struct compat_per_struct_kernel, starting_addr))
+		/* Starting address of the user specified per set. */
+		child->thread.per_user.start = data;
+	else if (addr == offsetof(struct compat_per_struct_kernel, ending_addr))
+		/* Ending address of the user specified per set. */
+		child->thread.per_user.end = data;
+}
+
+/*
+ * Same as poke_user but for a 31 bit program.
+ */
+static int __poke_user_compat(struct task_struct *child,
+			      addr_t addr, addr_t data)
+{
+	__u32 tmp = (__u32) data;
+	addr_t offset;
+
+	if (addr < offsetof(struct compat_user, regs.acrs)) {
+		struct pt_regs *regs = task_pt_regs(child);
+		/*
+		 * psw, gprs, acrs and orig_gpr2 are stored on the stack
+		 */
+		if (addr == offsetof(struct compat_user, regs.psw.mask)) {
+			__u32 mask = PSW32_MASK_USER;
+
+			mask |= is_ri_task(child) ? PSW32_MASK_RI : 0;
+			/* Build a 64 bit psw mask from 31 bit mask. */
+			if ((tmp ^ PSW32_USER_BITS) & ~mask)
+				/* Invalid psw mask. */
+				return -EINVAL;
+			if ((data & PSW32_MASK_ASC) == PSW32_ASC_HOME)
+				/* Invalid address-space-control bits */
+				return -EINVAL;
+			regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) |
+				(regs->psw.mask & PSW_MASK_BA) |
+				(__u64)(tmp & mask) << 32;
+		} else if (addr == offsetof(struct compat_user, regs.psw.addr)) {
+			/* Build a 64 bit psw address from 31 bit address. */
+			regs->psw.addr = (__u64) tmp & PSW32_ADDR_INSN;
+			/* Transfer 31 bit amode bit to psw mask. */
+			regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) |
+				(__u64)(tmp & PSW32_ADDR_AMODE);
+		} else {
+			if (test_pt_regs_flag(regs, PIF_SYSCALL) &&
+				addr == offsetof(struct compat_user, regs.gprs[2])) {
+				struct pt_regs *regs = task_pt_regs(child);
+
+				regs->int_code = 0x20000 | (data & 0xffff);
+			}
+			/* gpr 0-15 */
+			*(__u32*)((addr_t) &regs->psw + addr*2 + 4) = tmp;
+		}
+	} else if (addr < offsetof(struct compat_user, regs.orig_gpr2)) {
+		/*
+		 * access registers are stored in the thread structure
+		 */
+		offset = addr - offsetof(struct compat_user, regs.acrs);
+		*(__u32*)((addr_t) &child->thread.acrs + offset) = tmp;
+
+	} else if (addr == offsetof(struct compat_user, regs.orig_gpr2)) {
+		/*
+		 * orig_gpr2 is stored on the kernel stack
+		 */
+		*(__u32*)((addr_t) &task_pt_regs(child)->orig_gpr2 + 4) = tmp;
+
+	} else if (addr < offsetof(struct compat_user, regs.fp_regs)) {
+		/*
+		 * prevent writess of padding hole between
+		 * orig_gpr2 and fp_regs on s390.
+		 */
+		return 0;
+
+	} else if (addr == offsetof(struct compat_user, regs.fp_regs.fpc)) {
+		/*
+		 * floating point control reg. is in the thread structure
+		 */
+		if (test_fp_ctl(tmp))
+			return -EINVAL;
+		child->thread.fpu.fpc = data;
+
+	} else if (addr < offsetof(struct compat_user, regs.fp_regs) + sizeof(s390_fp_regs)) {
+		/*
+		 * floating point regs. are either in child->thread.fpu
+		 * or the child->thread.fpu.vxrs array
+		 */
+		offset = addr - offsetof(struct compat_user, regs.fp_regs.fprs);
+		if (MACHINE_HAS_VX)
+			*(__u32 *)((addr_t)
+				child->thread.fpu.vxrs + 2*offset) = tmp;
+		else
+			*(__u32 *)((addr_t)
+				child->thread.fpu.fprs + offset) = tmp;
+
+	} else if (addr < offsetof(struct compat_user, regs.per_info) + sizeof(struct compat_per_struct_kernel)) {
+		/*
+		 * Handle access to the per_info structure.
+		 */
+		addr -= offsetof(struct compat_user, regs.per_info);
+		__poke_user_per_compat(child, addr, data);
+	}
+
+	return 0;
+}
+
+static int poke_user_compat(struct task_struct *child,
+			    addr_t addr, addr_t data)
+{
+	if (!is_compat_task() || (addr & 3) ||
+	    addr > sizeof(struct compat_user) - 3)
+		return -EIO;
+
+	return __poke_user_compat(child, addr, data);
+}
+
+long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
+			compat_ulong_t caddr, compat_ulong_t cdata)
+{
+	unsigned long addr = caddr;
+	unsigned long data = cdata;
+	compat_ptrace_area parea;
+	int copied, ret;
+
+	switch (request) {
+	case PTRACE_PEEKUSR:
+		/* read the word at location addr in the USER area. */
+		return peek_user_compat(child, addr, data);
+
+	case PTRACE_POKEUSR:
+		/* write the word at location addr in the USER area */
+		return poke_user_compat(child, addr, data);
+
+	case PTRACE_PEEKUSR_AREA:
+	case PTRACE_POKEUSR_AREA:
+		if (copy_from_user(&parea, (void __force __user *) addr,
+							sizeof(parea)))
+			return -EFAULT;
+		addr = parea.kernel_addr;
+		data = parea.process_addr;
+		copied = 0;
+		while (copied < parea.len) {
+			if (request == PTRACE_PEEKUSR_AREA)
+				ret = peek_user_compat(child, addr, data);
+			else {
+				__u32 utmp;
+				if (get_user(utmp,
+					     (__u32 __force __user *) data))
+					return -EFAULT;
+				ret = poke_user_compat(child, addr, utmp);
+			}
+			if (ret)
+				return ret;
+			addr += sizeof(unsigned int);
+			data += sizeof(unsigned int);
+			copied += sizeof(unsigned int);
+		}
+		return 0;
+	case PTRACE_GET_LAST_BREAK:
+		return put_user(child->thread.last_break, (unsigned int __user *)data);
+	}
+	return compat_ptrace_request(child, request, addr, data);
+}
+#endif
+
+/*
+ * user_regset definitions.
+ */
+
+static int s390_regs_get(struct task_struct *target,
+			 const struct user_regset *regset,
+			 struct membuf to)
+{
+	unsigned pos;
+	if (target == current)
+		save_access_regs(target->thread.acrs);
+
+	for (pos = 0; pos < sizeof(s390_regs); pos += sizeof(long))
+		membuf_store(&to, __peek_user(target, pos));
+	return 0;
+}
+
+static int s390_regs_set(struct task_struct *target,
+			 const struct user_regset *regset,
+			 unsigned int pos, unsigned int count,
+			 const void *kbuf, const void __user *ubuf)
+{
+	int rc = 0;
+
+	if (target == current)
+		save_access_regs(target->thread.acrs);
+
+	if (kbuf) {
+		const unsigned long *k = kbuf;
+		while (count > 0 && !rc) {
+			rc = __poke_user(target, pos, *k++);
+			count -= sizeof(*k);
+			pos += sizeof(*k);
+		}
+	} else {
+		const unsigned long  __user *u = ubuf;
+		while (count > 0 && !rc) {
+			unsigned long word;
+			rc = __get_user(word, u++);
+			if (rc)
+				break;
+			rc = __poke_user(target, pos, word);
+			count -= sizeof(*u);
+			pos += sizeof(*u);
+		}
+	}
+
+	if (rc == 0 && target == current)
+		restore_access_regs(target->thread.acrs);
+
+	return rc;
+}
+
+static int s390_fpregs_get(struct task_struct *target,
+			   const struct user_regset *regset,
+			   struct membuf to)
+{
+	_s390_fp_regs fp_regs;
+
+	if (target == current)
+		save_fpu_regs();
+
+	fp_regs.fpc = target->thread.fpu.fpc;
+	fpregs_store(&fp_regs, &target->thread.fpu);
+
+	return membuf_write(&to, &fp_regs, sizeof(fp_regs));
+}
+
+static int s390_fpregs_set(struct task_struct *target,
+			   const struct user_regset *regset, unsigned int pos,
+			   unsigned int count, const void *kbuf,
+			   const void __user *ubuf)
+{
+	int rc = 0;
+	freg_t fprs[__NUM_FPRS];
+
+	if (target == current)
+		save_fpu_regs();
+
+	if (MACHINE_HAS_VX)
+		convert_vx_to_fp(fprs, target->thread.fpu.vxrs);
+	else
+		memcpy(&fprs, target->thread.fpu.fprs, sizeof(fprs));
+
+	/* If setting FPC, must validate it first. */
+	if (count > 0 && pos < offsetof(s390_fp_regs, fprs)) {
+		u32 ufpc[2] = { target->thread.fpu.fpc, 0 };
+		rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ufpc,
+					0, offsetof(s390_fp_regs, fprs));
+		if (rc)
+			return rc;
+		if (ufpc[1] != 0 || test_fp_ctl(ufpc[0]))
+			return -EINVAL;
+		target->thread.fpu.fpc = ufpc[0];
+	}
+
+	if (rc == 0 && count > 0)
+		rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+					fprs, offsetof(s390_fp_regs, fprs), -1);
+	if (rc)
+		return rc;
+
+	if (MACHINE_HAS_VX)
+		convert_fp_to_vx(target->thread.fpu.vxrs, fprs);
+	else
+		memcpy(target->thread.fpu.fprs, &fprs, sizeof(fprs));
+
+	return rc;
+}
+
+static int s390_last_break_get(struct task_struct *target,
+			       const struct user_regset *regset,
+			       struct membuf to)
+{
+	return membuf_store(&to, target->thread.last_break);
+}
+
+static int s390_last_break_set(struct task_struct *target,
+			       const struct user_regset *regset,
+			       unsigned int pos, unsigned int count,
+			       const void *kbuf, const void __user *ubuf)
+{
+	return 0;
+}
+
+static int s390_tdb_get(struct task_struct *target,
+			const struct user_regset *regset,
+			struct membuf to)
+{
+	struct pt_regs *regs = task_pt_regs(target);
+	size_t size;
+
+	if (!(regs->int_code & 0x200))
+		return -ENODATA;
+	size = sizeof(target->thread.trap_tdb.data);
+	return membuf_write(&to, target->thread.trap_tdb.data, size);
+}
+
+static int s390_tdb_set(struct task_struct *target,
+			const struct user_regset *regset,
+			unsigned int pos, unsigned int count,
+			const void *kbuf, const void __user *ubuf)
+{
+	return 0;
+}
+
+static int s390_vxrs_low_get(struct task_struct *target,
+			     const struct user_regset *regset,
+			     struct membuf to)
+{
+	__u64 vxrs[__NUM_VXRS_LOW];
+	int i;
+
+	if (!MACHINE_HAS_VX)
+		return -ENODEV;
+	if (target == current)
+		save_fpu_regs();
+	for (i = 0; i < __NUM_VXRS_LOW; i++)
+		vxrs[i] = target->thread.fpu.vxrs[i].low;
+	return membuf_write(&to, vxrs, sizeof(vxrs));
+}
+
+static int s390_vxrs_low_set(struct task_struct *target,
+			     const struct user_regset *regset,
+			     unsigned int pos, unsigned int count,
+			     const void *kbuf, const void __user *ubuf)
+{
+	__u64 vxrs[__NUM_VXRS_LOW];
+	int i, rc;
+
+	if (!MACHINE_HAS_VX)
+		return -ENODEV;
+	if (target == current)
+		save_fpu_regs();
+
+	for (i = 0; i < __NUM_VXRS_LOW; i++)
+		vxrs[i] = target->thread.fpu.vxrs[i].low;
+
+	rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
+	if (rc == 0)
+		for (i = 0; i < __NUM_VXRS_LOW; i++)
+			target->thread.fpu.vxrs[i].low = vxrs[i];
+
+	return rc;
+}
+
+static int s390_vxrs_high_get(struct task_struct *target,
+			      const struct user_regset *regset,
+			      struct membuf to)
+{
+	if (!MACHINE_HAS_VX)
+		return -ENODEV;
+	if (target == current)
+		save_fpu_regs();
+	return membuf_write(&to, target->thread.fpu.vxrs + __NUM_VXRS_LOW,
+			    __NUM_VXRS_HIGH * sizeof(__vector128));
+}
+
+static int s390_vxrs_high_set(struct task_struct *target,
+			      const struct user_regset *regset,
+			      unsigned int pos, unsigned int count,
+			      const void *kbuf, const void __user *ubuf)
+{
+	int rc;
+
+	if (!MACHINE_HAS_VX)
+		return -ENODEV;
+	if (target == current)
+		save_fpu_regs();
+
+	rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				target->thread.fpu.vxrs + __NUM_VXRS_LOW, 0, -1);
+	return rc;
+}
+
+static int s390_system_call_get(struct task_struct *target,
+				const struct user_regset *regset,
+				struct membuf to)
+{
+	return membuf_store(&to, target->thread.system_call);
+}
+
+static int s390_system_call_set(struct task_struct *target,
+				const struct user_regset *regset,
+				unsigned int pos, unsigned int count,
+				const void *kbuf, const void __user *ubuf)
+{
+	unsigned int *data = &target->thread.system_call;
+	return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				  data, 0, sizeof(unsigned int));
+}
+
+static int s390_gs_cb_get(struct task_struct *target,
+			  const struct user_regset *regset,
+			  struct membuf to)
+{
+	struct gs_cb *data = target->thread.gs_cb;
+
+	if (!MACHINE_HAS_GS)
+		return -ENODEV;
+	if (!data)
+		return -ENODATA;
+	if (target == current)
+		save_gs_cb(data);
+	return membuf_write(&to, data, sizeof(struct gs_cb));
+}
+
+static int s390_gs_cb_set(struct task_struct *target,
+			  const struct user_regset *regset,
+			  unsigned int pos, unsigned int count,
+			  const void *kbuf, const void __user *ubuf)
+{
+	struct gs_cb gs_cb = { }, *data = NULL;
+	int rc;
+
+	if (!MACHINE_HAS_GS)
+		return -ENODEV;
+	if (!target->thread.gs_cb) {
+		data = kzalloc(sizeof(*data), GFP_KERNEL);
+		if (!data)
+			return -ENOMEM;
+	}
+	if (!target->thread.gs_cb)
+		gs_cb.gsd = 25;
+	else if (target == current)
+		save_gs_cb(&gs_cb);
+	else
+		gs_cb = *target->thread.gs_cb;
+	rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				&gs_cb, 0, sizeof(gs_cb));
+	if (rc) {
+		kfree(data);
+		return -EFAULT;
+	}
+	preempt_disable();
+	if (!target->thread.gs_cb)
+		target->thread.gs_cb = data;
+	*target->thread.gs_cb = gs_cb;
+	if (target == current) {
+		__ctl_set_bit(2, 4);
+		restore_gs_cb(target->thread.gs_cb);
+	}
+	preempt_enable();
+	return rc;
+}
+
+static int s390_gs_bc_get(struct task_struct *target,
+			  const struct user_regset *regset,
+			  struct membuf to)
+{
+	struct gs_cb *data = target->thread.gs_bc_cb;
+
+	if (!MACHINE_HAS_GS)
+		return -ENODEV;
+	if (!data)
+		return -ENODATA;
+	return membuf_write(&to, data, sizeof(struct gs_cb));
+}
+
+static int s390_gs_bc_set(struct task_struct *target,
+			  const struct user_regset *regset,
+			  unsigned int pos, unsigned int count,
+			  const void *kbuf, const void __user *ubuf)
+{
+	struct gs_cb *data = target->thread.gs_bc_cb;
+
+	if (!MACHINE_HAS_GS)
+		return -ENODEV;
+	if (!data) {
+		data = kzalloc(sizeof(*data), GFP_KERNEL);
+		if (!data)
+			return -ENOMEM;
+		target->thread.gs_bc_cb = data;
+	}
+	return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				  data, 0, sizeof(struct gs_cb));
+}
+
+static bool is_ri_cb_valid(struct runtime_instr_cb *cb)
+{
+	return (cb->rca & 0x1f) == 0 &&
+		(cb->roa & 0xfff) == 0 &&
+		(cb->rla & 0xfff) == 0xfff &&
+		cb->s == 1 &&
+		cb->k == 1 &&
+		cb->h == 0 &&
+		cb->reserved1 == 0 &&
+		cb->ps == 1 &&
+		cb->qs == 0 &&
+		cb->pc == 1 &&
+		cb->qc == 0 &&
+		cb->reserved2 == 0 &&
+		cb->reserved3 == 0 &&
+		cb->reserved4 == 0 &&
+		cb->reserved5 == 0 &&
+		cb->reserved6 == 0 &&
+		cb->reserved7 == 0 &&
+		cb->reserved8 == 0 &&
+		cb->rla >= cb->roa &&
+		cb->rca >= cb->roa &&
+		cb->rca <= cb->rla+1 &&
+		cb->m < 3;
+}
+
+static int s390_runtime_instr_get(struct task_struct *target,
+				const struct user_regset *regset,
+				struct membuf to)
+{
+	struct runtime_instr_cb *data = target->thread.ri_cb;
+
+	if (!test_facility(64))
+		return -ENODEV;
+	if (!data)
+		return -ENODATA;
+
+	return membuf_write(&to, data, sizeof(struct runtime_instr_cb));
+}
+
+static int s390_runtime_instr_set(struct task_struct *target,
+				  const struct user_regset *regset,
+				  unsigned int pos, unsigned int count,
+				  const void *kbuf, const void __user *ubuf)
+{
+	struct runtime_instr_cb ri_cb = { }, *data = NULL;
+	int rc;
+
+	if (!test_facility(64))
+		return -ENODEV;
+
+	if (!target->thread.ri_cb) {
+		data = kzalloc(sizeof(*data), GFP_KERNEL);
+		if (!data)
+			return -ENOMEM;
+	}
+
+	if (target->thread.ri_cb) {
+		if (target == current)
+			store_runtime_instr_cb(&ri_cb);
+		else
+			ri_cb = *target->thread.ri_cb;
+	}
+
+	rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				&ri_cb, 0, sizeof(struct runtime_instr_cb));
+	if (rc) {
+		kfree(data);
+		return -EFAULT;
+	}
+
+	if (!is_ri_cb_valid(&ri_cb)) {
+		kfree(data);
+		return -EINVAL;
+	}
+	/*
+	 * Override access key in any case, since user space should
+	 * not be able to set it, nor should it care about it.
+	 */
+	ri_cb.key = PAGE_DEFAULT_KEY >> 4;
+	preempt_disable();
+	if (!target->thread.ri_cb)
+		target->thread.ri_cb = data;
+	*target->thread.ri_cb = ri_cb;
+	if (target == current)
+		load_runtime_instr_cb(target->thread.ri_cb);
+	preempt_enable();
+
+	return 0;
+}
+
+static const struct user_regset s390_regsets[] = {
+	{
+		.core_note_type = NT_PRSTATUS,
+		.n = sizeof(s390_regs) / sizeof(long),
+		.size = sizeof(long),
+		.align = sizeof(long),
+		.regset_get = s390_regs_get,
+		.set = s390_regs_set,
+	},
+	{
+		.core_note_type = NT_PRFPREG,
+		.n = sizeof(s390_fp_regs) / sizeof(long),
+		.size = sizeof(long),
+		.align = sizeof(long),
+		.regset_get = s390_fpregs_get,
+		.set = s390_fpregs_set,
+	},
+	{
+		.core_note_type = NT_S390_SYSTEM_CALL,
+		.n = 1,
+		.size = sizeof(unsigned int),
+		.align = sizeof(unsigned int),
+		.regset_get = s390_system_call_get,
+		.set = s390_system_call_set,
+	},
+	{
+		.core_note_type = NT_S390_LAST_BREAK,
+		.n = 1,
+		.size = sizeof(long),
+		.align = sizeof(long),
+		.regset_get = s390_last_break_get,
+		.set = s390_last_break_set,
+	},
+	{
+		.core_note_type = NT_S390_TDB,
+		.n = 1,
+		.size = 256,
+		.align = 1,
+		.regset_get = s390_tdb_get,
+		.set = s390_tdb_set,
+	},
+	{
+		.core_note_type = NT_S390_VXRS_LOW,
+		.n = __NUM_VXRS_LOW,
+		.size = sizeof(__u64),
+		.align = sizeof(__u64),
+		.regset_get = s390_vxrs_low_get,
+		.set = s390_vxrs_low_set,
+	},
+	{
+		.core_note_type = NT_S390_VXRS_HIGH,
+		.n = __NUM_VXRS_HIGH,
+		.size = sizeof(__vector128),
+		.align = sizeof(__vector128),
+		.regset_get = s390_vxrs_high_get,
+		.set = s390_vxrs_high_set,
+	},
+	{
+		.core_note_type = NT_S390_GS_CB,
+		.n = sizeof(struct gs_cb) / sizeof(__u64),
+		.size = sizeof(__u64),
+		.align = sizeof(__u64),
+		.regset_get = s390_gs_cb_get,
+		.set = s390_gs_cb_set,
+	},
+	{
+		.core_note_type = NT_S390_GS_BC,
+		.n = sizeof(struct gs_cb) / sizeof(__u64),
+		.size = sizeof(__u64),
+		.align = sizeof(__u64),
+		.regset_get = s390_gs_bc_get,
+		.set = s390_gs_bc_set,
+	},
+	{
+		.core_note_type = NT_S390_RI_CB,
+		.n = sizeof(struct runtime_instr_cb) / sizeof(__u64),
+		.size = sizeof(__u64),
+		.align = sizeof(__u64),
+		.regset_get = s390_runtime_instr_get,
+		.set = s390_runtime_instr_set,
+	},
+};
+
+static const struct user_regset_view user_s390_view = {
+	.name = "s390x",
+	.e_machine = EM_S390,
+	.regsets = s390_regsets,
+	.n = ARRAY_SIZE(s390_regsets)
+};
+
+#ifdef CONFIG_COMPAT
+static int s390_compat_regs_get(struct task_struct *target,
+				const struct user_regset *regset,
+				struct membuf to)
+{
+	unsigned n;
+
+	if (target == current)
+		save_access_regs(target->thread.acrs);
+
+	for (n = 0; n < sizeof(s390_compat_regs); n += sizeof(compat_ulong_t))
+		membuf_store(&to, __peek_user_compat(target, n));
+	return 0;
+}
+
+static int s390_compat_regs_set(struct task_struct *target,
+				const struct user_regset *regset,
+				unsigned int pos, unsigned int count,
+				const void *kbuf, const void __user *ubuf)
+{
+	int rc = 0;
+
+	if (target == current)
+		save_access_regs(target->thread.acrs);
+
+	if (kbuf) {
+		const compat_ulong_t *k = kbuf;
+		while (count > 0 && !rc) {
+			rc = __poke_user_compat(target, pos, *k++);
+			count -= sizeof(*k);
+			pos += sizeof(*k);
+		}
+	} else {
+		const compat_ulong_t  __user *u = ubuf;
+		while (count > 0 && !rc) {
+			compat_ulong_t word;
+			rc = __get_user(word, u++);
+			if (rc)
+				break;
+			rc = __poke_user_compat(target, pos, word);
+			count -= sizeof(*u);
+			pos += sizeof(*u);
+		}
+	}
+
+	if (rc == 0 && target == current)
+		restore_access_regs(target->thread.acrs);
+
+	return rc;
+}
+
+static int s390_compat_regs_high_get(struct task_struct *target,
+				     const struct user_regset *regset,
+				     struct membuf to)
+{
+	compat_ulong_t *gprs_high;
+	int i;
+
+	gprs_high = (compat_ulong_t *)task_pt_regs(target)->gprs;
+	for (i = 0; i < NUM_GPRS; i++, gprs_high += 2)
+		membuf_store(&to, *gprs_high);
+	return 0;
+}
+
+static int s390_compat_regs_high_set(struct task_struct *target,
+				     const struct user_regset *regset,
+				     unsigned int pos, unsigned int count,
+				     const void *kbuf, const void __user *ubuf)
+{
+	compat_ulong_t *gprs_high;
+	int rc = 0;
+
+	gprs_high = (compat_ulong_t *)
+		&task_pt_regs(target)->gprs[pos / sizeof(compat_ulong_t)];
+	if (kbuf) {
+		const compat_ulong_t *k = kbuf;
+		while (count > 0) {
+			*gprs_high = *k++;
+			*gprs_high += 2;
+			count -= sizeof(*k);
+		}
+	} else {
+		const compat_ulong_t  __user *u = ubuf;
+		while (count > 0 && !rc) {
+			unsigned long word;
+			rc = __get_user(word, u++);
+			if (rc)
+				break;
+			*gprs_high = word;
+			*gprs_high += 2;
+			count -= sizeof(*u);
+		}
+	}
+
+	return rc;
+}
+
+static int s390_compat_last_break_get(struct task_struct *target,
+				      const struct user_regset *regset,
+				      struct membuf to)
+{
+	compat_ulong_t last_break = target->thread.last_break;
+
+	return membuf_store(&to, (unsigned long)last_break);
+}
+
+static int s390_compat_last_break_set(struct task_struct *target,
+				      const struct user_regset *regset,
+				      unsigned int pos, unsigned int count,
+				      const void *kbuf, const void __user *ubuf)
+{
+	return 0;
+}
+
+static const struct user_regset s390_compat_regsets[] = {
+	{
+		.core_note_type = NT_PRSTATUS,
+		.n = sizeof(s390_compat_regs) / sizeof(compat_long_t),
+		.size = sizeof(compat_long_t),
+		.align = sizeof(compat_long_t),
+		.regset_get = s390_compat_regs_get,
+		.set = s390_compat_regs_set,
+	},
+	{
+		.core_note_type = NT_PRFPREG,
+		.n = sizeof(s390_fp_regs) / sizeof(compat_long_t),
+		.size = sizeof(compat_long_t),
+		.align = sizeof(compat_long_t),
+		.regset_get = s390_fpregs_get,
+		.set = s390_fpregs_set,
+	},
+	{
+		.core_note_type = NT_S390_SYSTEM_CALL,
+		.n = 1,
+		.size = sizeof(compat_uint_t),
+		.align = sizeof(compat_uint_t),
+		.regset_get = s390_system_call_get,
+		.set = s390_system_call_set,
+	},
+	{
+		.core_note_type = NT_S390_LAST_BREAK,
+		.n = 1,
+		.size = sizeof(long),
+		.align = sizeof(long),
+		.regset_get = s390_compat_last_break_get,
+		.set = s390_compat_last_break_set,
+	},
+	{
+		.core_note_type = NT_S390_TDB,
+		.n = 1,
+		.size = 256,
+		.align = 1,
+		.regset_get = s390_tdb_get,
+		.set = s390_tdb_set,
+	},
+	{
+		.core_note_type = NT_S390_VXRS_LOW,
+		.n = __NUM_VXRS_LOW,
+		.size = sizeof(__u64),
+		.align = sizeof(__u64),
+		.regset_get = s390_vxrs_low_get,
+		.set = s390_vxrs_low_set,
+	},
+	{
+		.core_note_type = NT_S390_VXRS_HIGH,
+		.n = __NUM_VXRS_HIGH,
+		.size = sizeof(__vector128),
+		.align = sizeof(__vector128),
+		.regset_get = s390_vxrs_high_get,
+		.set = s390_vxrs_high_set,
+	},
+	{
+		.core_note_type = NT_S390_HIGH_GPRS,
+		.n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t),
+		.size = sizeof(compat_long_t),
+		.align = sizeof(compat_long_t),
+		.regset_get = s390_compat_regs_high_get,
+		.set = s390_compat_regs_high_set,
+	},
+	{
+		.core_note_type = NT_S390_GS_CB,
+		.n = sizeof(struct gs_cb) / sizeof(__u64),
+		.size = sizeof(__u64),
+		.align = sizeof(__u64),
+		.regset_get = s390_gs_cb_get,
+		.set = s390_gs_cb_set,
+	},
+	{
+		.core_note_type = NT_S390_GS_BC,
+		.n = sizeof(struct gs_cb) / sizeof(__u64),
+		.size = sizeof(__u64),
+		.align = sizeof(__u64),
+		.regset_get = s390_gs_bc_get,
+		.set = s390_gs_bc_set,
+	},
+	{
+		.core_note_type = NT_S390_RI_CB,
+		.n = sizeof(struct runtime_instr_cb) / sizeof(__u64),
+		.size = sizeof(__u64),
+		.align = sizeof(__u64),
+		.regset_get = s390_runtime_instr_get,
+		.set = s390_runtime_instr_set,
+	},
+};
+
+static const struct user_regset_view user_s390_compat_view = {
+	.name = "s390",
+	.e_machine = EM_S390,
+	.regsets = s390_compat_regsets,
+	.n = ARRAY_SIZE(s390_compat_regsets)
+};
+#endif
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+#ifdef CONFIG_COMPAT
+	if (test_tsk_thread_flag(task, TIF_31BIT))
+		return &user_s390_compat_view;
+#endif
+	return &user_s390_view;
+}
+
+static const char *gpr_names[NUM_GPRS] = {
+	"r0", "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
+	"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
+};
+
+unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset)
+{
+	if (offset >= NUM_GPRS)
+		return 0;
+	return regs->gprs[offset];
+}
+
+int regs_query_register_offset(const char *name)
+{
+	unsigned long offset;
+
+	if (!name || *name != 'r')
+		return -EINVAL;
+	if (kstrtoul(name + 1, 10, &offset))
+		return -EINVAL;
+	if (offset >= NUM_GPRS)
+		return -EINVAL;
+	return offset;
+}
+
+const char *regs_query_register_name(unsigned int offset)
+{
+	if (offset >= NUM_GPRS)
+		return NULL;
+	return gpr_names[offset];
+}
+
+static int regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr)
+{
+	unsigned long ksp = kernel_stack_pointer(regs);
+
+	return (addr & ~(THREAD_SIZE - 1)) == (ksp & ~(THREAD_SIZE - 1));
+}
+
+/**
+ * regs_get_kernel_stack_nth() - get Nth entry of the stack
+ * @regs:pt_regs which contains kernel stack pointer.
+ * @n:stack entry number.
+ *
+ * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
+ * is specifined by @regs. If the @n th entry is NOT in the kernel stack,
+ * this returns 0.
+ */
+unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n)
+{
+	unsigned long addr;
+
+	addr = kernel_stack_pointer(regs) + n * sizeof(long);
+	if (!regs_within_kernel_stack(regs, addr))
+		return 0;
+	return *(unsigned long *)addr;
+}
diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S
new file mode 100644
index 0000000000..88087a32eb
--- /dev/null
+++ b/arch/s390/kernel/reipl.S
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *    Copyright IBM Corp 2000, 2011
+ *    Author(s): Holger Smolinski <Holger.Smolinski@de.ibm.com>,
+ *		 Denis Joseph Barrow,
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/nospec-insn.h>
+#include <asm/sigp.h>
+
+	GEN_BR_THUNK %r9
+
+#
+# Issue "store status" for the current CPU to its prefix page
+# and call passed function afterwards
+#
+# r2 = Function to be called after store status
+# r3 = Parameter for function
+#
+SYM_CODE_START(store_status)
+	/* Save register one and load save area base */
+	stg	%r1,__LC_SAVE_AREA_RESTART
+	/* General purpose registers */
+	lghi	%r1,__LC_GPREGS_SAVE_AREA
+	stmg	%r0,%r15,0(%r1)
+	mvc	8(8,%r1),__LC_SAVE_AREA_RESTART
+	/* Control registers */
+	lghi	%r1,__LC_CREGS_SAVE_AREA
+	stctg	%c0,%c15,0(%r1)
+	/* Access registers */
+	lghi	%r1,__LC_AREGS_SAVE_AREA
+	stam	%a0,%a15,0(%r1)
+	/* Floating point registers */
+	lghi	%r1,__LC_FPREGS_SAVE_AREA
+	std	%f0, 0x00(%r1)
+	std	%f1, 0x08(%r1)
+	std	%f2, 0x10(%r1)
+	std	%f3, 0x18(%r1)
+	std	%f4, 0x20(%r1)
+	std	%f5, 0x28(%r1)
+	std	%f6, 0x30(%r1)
+	std	%f7, 0x38(%r1)
+	std	%f8, 0x40(%r1)
+	std	%f9, 0x48(%r1)
+	std	%f10,0x50(%r1)
+	std	%f11,0x58(%r1)
+	std	%f12,0x60(%r1)
+	std	%f13,0x68(%r1)
+	std	%f14,0x70(%r1)
+	std	%f15,0x78(%r1)
+	/* Floating point control register */
+	lghi	%r1,__LC_FP_CREG_SAVE_AREA
+	stfpc	0(%r1)
+	/* CPU timer */
+	lghi	%r1,__LC_CPU_TIMER_SAVE_AREA
+	stpt	0(%r1)
+	/* Store prefix register */
+	lghi	%r1,__LC_PREFIX_SAVE_AREA
+	stpx	0(%r1)
+	/* Clock comparator - seven bytes */
+	lghi	%r1,__LC_CLOCK_COMP_SAVE_AREA
+	larl	%r4,clkcmp
+	stckc	0(%r4)
+	mvc	1(7,%r1),1(%r4)
+	/* Program status word */
+	lghi	%r1,__LC_PSW_SAVE_AREA
+	epsw	%r4,%r5
+	st	%r4,0(%r1)
+	st	%r5,4(%r1)
+	stg	%r2,8(%r1)
+	lgr	%r9,%r2
+	lgr	%r2,%r3
+	BR_EX	%r9
+SYM_CODE_END(store_status)
+
+	.section .bss
+	.balign	8
+SYM_DATA_LOCAL(clkcmp,	.quad 0x0000000000000000)
+	.previous
diff --git a/arch/s390/kernel/relocate_kernel.S b/arch/s390/kernel/relocate_kernel.S
new file mode 100644
index 0000000000..0ae297c82a
--- /dev/null
+++ b/arch/s390/kernel/relocate_kernel.S
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2005
+ *
+ * Author(s): Rolf Adelsberger
+ *
+ */
+
+#include <linux/linkage.h>
+#include <asm/page.h>
+#include <asm/sigp.h>
+
+/*
+ * moves the new kernel to its destination...
+ * %r2 = pointer to first kimage_entry_t
+ * %r3 = start address - where to jump to after the job is done...
+ * %r4 = subcode
+ *
+ * %r5 will be used as temp. storage
+ * %r6 holds the destination address
+ * %r7 = PAGE_SIZE
+ * %r8 holds the source address
+ * %r9 = PAGE_SIZE
+ *
+ * 0xf000 is a page_mask
+ */
+
+	.text
+SYM_CODE_START(relocate_kernel)
+	basr	%r13,0		# base address
+.base:
+	lghi	%r7,PAGE_SIZE	# load PAGE_SIZE in r7
+	lghi	%r9,PAGE_SIZE	# load PAGE_SIZE in r9
+	lg	%r5,0(%r2)	# read another word for indirection page
+	aghi	%r2,8		# increment pointer
+	tml	%r5,0x1		# is it a destination page?
+	je	.indir_check	# NO, goto "indir_check"
+	lgr	%r6,%r5		# r6 = r5
+	nill	%r6,0xf000	# mask it out and...
+	j	.base		# ...next iteration
+.indir_check:
+	tml	%r5,0x2		# is it a indirection page?
+	je	.done_test	# NO, goto "done_test"
+	nill	%r5,0xf000	# YES, mask out,
+	lgr	%r2,%r5		# move it into the right register,
+	j	.base		# and read next...
+.done_test:
+	tml	%r5,0x4		# is it the done indicator?
+	je	.source_test	# NO! Well, then it should be the source indicator...
+	j	.done		# ok, lets finish it here...
+.source_test:
+	tml	%r5,0x8		# it should be a source indicator...
+	je	.base		# NO, ignore it...
+	lgr	%r8,%r5		# r8 = r5
+	nill	%r8,0xf000	# masking
+0:	mvcle	%r6,%r8,0x0	# copy PAGE_SIZE bytes from r8 to r6 - pad with 0
+	jo	0b
+	j	.base
+.done:
+	lgr	%r0,%r4		# subcode
+	cghi	%r3,0
+	je	.diag
+	la	%r4,load_psw-.base(%r13)	# load psw-address into the register
+	o	%r3,4(%r4)	# or load address into psw
+	st	%r3,4(%r4)
+	mvc	0(8,%r0),0(%r4)	# copy psw to absolute address 0
+.diag:
+	diag	%r0,%r0,0x308
+SYM_CODE_END(relocate_kernel)
+
+	.balign	8
+SYM_DATA_START_LOCAL(load_psw)
+	.long	0x00080000,0x80000000
+SYM_DATA_END_LABEL(load_psw, SYM_L_LOCAL, relocate_kernel_end)
+	.balign 8
+SYM_DATA(relocate_kernel_len, .quad relocate_kernel_end - relocate_kernel)
diff --git a/arch/s390/kernel/rethook.c b/arch/s390/kernel/rethook.c
new file mode 100644
index 0000000000..af10e6bdd3
--- /dev/null
+++ b/arch/s390/kernel/rethook.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/rethook.h>
+#include <linux/kprobes.h>
+#include "rethook.h"
+
+void arch_rethook_prepare(struct rethook_node *rh, struct pt_regs *regs, bool mcount)
+{
+	rh->ret_addr = regs->gprs[14];
+	rh->frame = regs->gprs[15];
+
+	/* Replace the return addr with trampoline addr */
+	regs->gprs[14] = (unsigned long)&arch_rethook_trampoline;
+}
+NOKPROBE_SYMBOL(arch_rethook_prepare);
+
+void arch_rethook_fixup_return(struct pt_regs *regs,
+			       unsigned long correct_ret_addr)
+{
+	/* Replace fake return address with real one. */
+	regs->gprs[14] = correct_ret_addr;
+}
+NOKPROBE_SYMBOL(arch_rethook_fixup_return);
+
+/*
+ * Called from arch_rethook_trampoline
+ */
+unsigned long arch_rethook_trampoline_callback(struct pt_regs *regs)
+{
+	return rethook_trampoline_handler(regs, regs->gprs[15]);
+}
+NOKPROBE_SYMBOL(arch_rethook_trampoline_callback);
+
+/* assembler function that handles the rethook must not be probed itself */
+NOKPROBE_SYMBOL(arch_rethook_trampoline);
diff --git a/arch/s390/kernel/rethook.h b/arch/s390/kernel/rethook.h
new file mode 100644
index 0000000000..32f069eed3
--- /dev/null
+++ b/arch/s390/kernel/rethook.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __S390_RETHOOK_H
+#define __S390_RETHOOK_H
+
+unsigned long arch_rethook_trampoline_callback(struct pt_regs *regs);
+
+#endif
diff --git a/arch/s390/kernel/runtime_instr.c b/arch/s390/kernel/runtime_instr.c
new file mode 100644
index 0000000000..1788a5454b
--- /dev/null
+++ b/arch/s390/kernel/runtime_instr.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2012
+ * Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/syscalls.h>
+#include <linux/signal.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/kernel_stat.h>
+#include <linux/sched/task_stack.h>
+
+#include <asm/runtime_instr.h>
+#include <asm/cpu_mf.h>
+#include <asm/irq.h>
+
+#include "entry.h"
+
+/* empty control block to disable RI by loading it */
+struct runtime_instr_cb runtime_instr_empty_cb;
+
+void runtime_instr_release(struct task_struct *tsk)
+{
+	kfree(tsk->thread.ri_cb);
+}
+
+static void disable_runtime_instr(void)
+{
+	struct task_struct *task = current;
+	struct pt_regs *regs;
+
+	if (!task->thread.ri_cb)
+		return;
+	regs = task_pt_regs(task);
+	preempt_disable();
+	load_runtime_instr_cb(&runtime_instr_empty_cb);
+	kfree(task->thread.ri_cb);
+	task->thread.ri_cb = NULL;
+	preempt_enable();
+
+	/*
+	 * Make sure the RI bit is deleted from the PSW. If the user did not
+	 * switch off RI before the system call the process will get a
+	 * specification exception otherwise.
+	 */
+	regs->psw.mask &= ~PSW_MASK_RI;
+}
+
+static void init_runtime_instr_cb(struct runtime_instr_cb *cb)
+{
+	cb->rla = 0xfff;
+	cb->s = 1;
+	cb->k = 1;
+	cb->ps = 1;
+	cb->pc = 1;
+	cb->key = PAGE_DEFAULT_KEY >> 4;
+	cb->v = 1;
+}
+
+/*
+ * The signum argument is unused. In older kernels it was used to
+ * specify a real-time signal. For backwards compatibility user space
+ * should pass a valid real-time signal number (the signum argument
+ * was checked in older kernels).
+ */
+SYSCALL_DEFINE2(s390_runtime_instr, int, command, int, signum)
+{
+	struct runtime_instr_cb *cb;
+
+	if (!test_facility(64))
+		return -EOPNOTSUPP;
+
+	if (command == S390_RUNTIME_INSTR_STOP) {
+		disable_runtime_instr();
+		return 0;
+	}
+
+	if (command != S390_RUNTIME_INSTR_START)
+		return -EINVAL;
+
+	if (!current->thread.ri_cb) {
+		cb = kzalloc(sizeof(*cb), GFP_KERNEL);
+		if (!cb)
+			return -ENOMEM;
+	} else {
+		cb = current->thread.ri_cb;
+		memset(cb, 0, sizeof(*cb));
+	}
+
+	init_runtime_instr_cb(cb);
+
+	/* now load the control block to make it available */
+	preempt_disable();
+	current->thread.ri_cb = cb;
+	load_runtime_instr_cb(cb);
+	preempt_enable();
+	return 0;
+}
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
new file mode 100644
index 0000000000..de6ad0fb23
--- /dev/null
+++ b/arch/s390/kernel/setup.c
@@ -0,0 +1,1012 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999, 2012
+ *    Author(s): Hartmut Penner (hp@de.ibm.com),
+ *               Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ *  Derived from "arch/i386/kernel/setup.c"
+ *    Copyright (C) 1995, Linus Torvalds
+ */
+
+/*
+ * This file handles the architecture-dependent parts of initialization
+ */
+
+#define KMSG_COMPONENT "setup"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/sched/task.h>
+#include <linux/cpu.h>
+#include <linux/kernel.h>
+#include <linux/memblock.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/random.h>
+#include <linux/user.h>
+#include <linux/tty.h>
+#include <linux/ioport.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/initrd.h>
+#include <linux/root_dev.h>
+#include <linux/console.h>
+#include <linux/kernel_stat.h>
+#include <linux/dma-map-ops.h>
+#include <linux/device.h>
+#include <linux/notifier.h>
+#include <linux/pfn.h>
+#include <linux/ctype.h>
+#include <linux/reboot.h>
+#include <linux/topology.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>
+#include <linux/memory.h>
+#include <linux/compat.h>
+#include <linux/start_kernel.h>
+#include <linux/hugetlb.h>
+#include <linux/kmemleak.h>
+
+#include <asm/archrandom.h>
+#include <asm/boot_data.h>
+#include <asm/ipl.h>
+#include <asm/facility.h>
+#include <asm/smp.h>
+#include <asm/mmu_context.h>
+#include <asm/cpcmd.h>
+#include <asm/abs_lowcore.h>
+#include <asm/nmi.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/sections.h>
+#include <asm/ebcdic.h>
+#include <asm/diag.h>
+#include <asm/os_info.h>
+#include <asm/sclp.h>
+#include <asm/stacktrace.h>
+#include <asm/sysinfo.h>
+#include <asm/numa.h>
+#include <asm/alternative.h>
+#include <asm/nospec-branch.h>
+#include <asm/physmem_info.h>
+#include <asm/maccess.h>
+#include <asm/uv.h>
+#include <asm/asm-offsets.h>
+#include "entry.h"
+
+/*
+ * Machine setup..
+ */
+unsigned int console_mode = 0;
+EXPORT_SYMBOL(console_mode);
+
+unsigned int console_devno = -1;
+EXPORT_SYMBOL(console_devno);
+
+unsigned int console_irq = -1;
+EXPORT_SYMBOL(console_irq);
+
+/*
+ * Some code and data needs to stay below 2 GB, even when the kernel would be
+ * relocated above 2 GB, because it has to use 31 bit addresses.
+ * Such code and data is part of the .amode31 section.
+ */
+char __amode31_ref *__samode31 = _samode31;
+char __amode31_ref *__eamode31 = _eamode31;
+char __amode31_ref *__stext_amode31 = _stext_amode31;
+char __amode31_ref *__etext_amode31 = _etext_amode31;
+struct exception_table_entry __amode31_ref *__start_amode31_ex_table = _start_amode31_ex_table;
+struct exception_table_entry __amode31_ref *__stop_amode31_ex_table = _stop_amode31_ex_table;
+
+/*
+ * Control registers CR2, CR5 and CR15 are initialized with addresses
+ * of tables that must be placed below 2G which is handled by the AMODE31
+ * sections.
+ * Because the AMODE31 sections are relocated below 2G at startup,
+ * the content of control registers CR2, CR5 and CR15 must be updated
+ * with new addresses after the relocation. The initial initialization of
+ * control registers occurs in head64.S and then gets updated again after AMODE31
+ * relocation. We must access the relevant AMODE31 tables indirectly via
+ * pointers placed in the .amode31.refs linker section. Those pointers get
+ * updated automatically during AMODE31 relocation and always contain a valid
+ * address within AMODE31 sections.
+ */
+
+static __amode31_data u32 __ctl_duct_amode31[16] __aligned(64);
+
+static __amode31_data u64 __ctl_aste_amode31[8] __aligned(64) = {
+	[1] = 0xffffffffffffffff
+};
+
+static __amode31_data u32 __ctl_duald_amode31[32] __aligned(128) = {
+	0x80000000, 0, 0, 0,
+	0x80000000, 0, 0, 0,
+	0x80000000, 0, 0, 0,
+	0x80000000, 0, 0, 0,
+	0x80000000, 0, 0, 0,
+	0x80000000, 0, 0, 0,
+	0x80000000, 0, 0, 0,
+	0x80000000, 0, 0, 0
+};
+
+static __amode31_data u32 __ctl_linkage_stack_amode31[8] __aligned(64) = {
+	0, 0, 0x89000000, 0,
+	0, 0, 0x8a000000, 0
+};
+
+static u64 __amode31_ref *__ctl_aste = __ctl_aste_amode31;
+static u32 __amode31_ref *__ctl_duald = __ctl_duald_amode31;
+static u32 __amode31_ref *__ctl_linkage_stack = __ctl_linkage_stack_amode31;
+static u32 __amode31_ref *__ctl_duct = __ctl_duct_amode31;
+
+unsigned long __bootdata_preserved(max_mappable);
+unsigned long __bootdata(ident_map_size);
+struct physmem_info __bootdata(physmem_info);
+
+unsigned long __bootdata_preserved(__kaslr_offset);
+int __bootdata_preserved(__kaslr_enabled);
+unsigned int __bootdata_preserved(zlib_dfltcc_support);
+EXPORT_SYMBOL(zlib_dfltcc_support);
+u64 __bootdata_preserved(stfle_fac_list[16]);
+EXPORT_SYMBOL(stfle_fac_list);
+u64 __bootdata_preserved(alt_stfle_fac_list[16]);
+struct oldmem_data __bootdata_preserved(oldmem_data);
+
+unsigned long VMALLOC_START;
+EXPORT_SYMBOL(VMALLOC_START);
+
+unsigned long VMALLOC_END;
+EXPORT_SYMBOL(VMALLOC_END);
+
+struct page *vmemmap;
+EXPORT_SYMBOL(vmemmap);
+unsigned long vmemmap_size;
+
+unsigned long MODULES_VADDR;
+unsigned long MODULES_END;
+
+/* An array with a pointer to the lowcore of every CPU. */
+struct lowcore *lowcore_ptr[NR_CPUS];
+EXPORT_SYMBOL(lowcore_ptr);
+
+DEFINE_STATIC_KEY_FALSE(cpu_has_bear);
+
+/*
+ * The Write Back bit position in the physaddr is given by the SLPC PCI.
+ * Leaving the mask zero always uses write through which is safe
+ */
+unsigned long mio_wb_bit_mask __ro_after_init;
+
+/*
+ * This is set up by the setup-routine at boot-time
+ * for S390 need to find out, what we have to setup
+ * using address 0x10400 ...
+ */
+
+#include <asm/setup.h>
+
+/*
+ * condev= and conmode= setup parameter.
+ */
+
+static int __init condev_setup(char *str)
+{
+	int vdev;
+
+	vdev = simple_strtoul(str, &str, 0);
+	if (vdev >= 0 && vdev < 65536) {
+		console_devno = vdev;
+		console_irq = -1;
+	}
+	return 1;
+}
+
+__setup("condev=", condev_setup);
+
+static void __init set_preferred_console(void)
+{
+	if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
+		add_preferred_console("ttyS", 0, NULL);
+	else if (CONSOLE_IS_3270)
+		add_preferred_console("tty3270", 0, NULL);
+	else if (CONSOLE_IS_VT220)
+		add_preferred_console("ttysclp", 0, NULL);
+	else if (CONSOLE_IS_HVC)
+		add_preferred_console("hvc", 0, NULL);
+}
+
+static int __init conmode_setup(char *str)
+{
+#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
+	if (!strcmp(str, "hwc") || !strcmp(str, "sclp"))
+                SET_CONSOLE_SCLP;
+#endif
+#if defined(CONFIG_TN3215_CONSOLE)
+	if (!strcmp(str, "3215"))
+		SET_CONSOLE_3215;
+#endif
+#if defined(CONFIG_TN3270_CONSOLE)
+	if (!strcmp(str, "3270"))
+		SET_CONSOLE_3270;
+#endif
+	set_preferred_console();
+        return 1;
+}
+
+__setup("conmode=", conmode_setup);
+
+static void __init conmode_default(void)
+{
+	char query_buffer[1024];
+	char *ptr;
+
+        if (MACHINE_IS_VM) {
+		cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL);
+		console_devno = simple_strtoul(query_buffer + 5, NULL, 16);
+		ptr = strstr(query_buffer, "SUBCHANNEL =");
+		console_irq = simple_strtoul(ptr + 13, NULL, 16);
+		cpcmd("QUERY TERM", query_buffer, 1024, NULL);
+		ptr = strstr(query_buffer, "CONMODE");
+		/*
+		 * Set the conmode to 3215 so that the device recognition 
+		 * will set the cu_type of the console to 3215. If the
+		 * conmode is 3270 and we don't set it back then both
+		 * 3215 and the 3270 driver will try to access the console
+		 * device (3215 as console and 3270 as normal tty).
+		 */
+		cpcmd("TERM CONMODE 3215", NULL, 0, NULL);
+		if (ptr == NULL) {
+#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
+			SET_CONSOLE_SCLP;
+#endif
+			return;
+		}
+		if (str_has_prefix(ptr + 8, "3270")) {
+#if defined(CONFIG_TN3270_CONSOLE)
+			SET_CONSOLE_3270;
+#elif defined(CONFIG_TN3215_CONSOLE)
+			SET_CONSOLE_3215;
+#elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
+			SET_CONSOLE_SCLP;
+#endif
+		} else if (str_has_prefix(ptr + 8, "3215")) {
+#if defined(CONFIG_TN3215_CONSOLE)
+			SET_CONSOLE_3215;
+#elif defined(CONFIG_TN3270_CONSOLE)
+			SET_CONSOLE_3270;
+#elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
+			SET_CONSOLE_SCLP;
+#endif
+		}
+	} else if (MACHINE_IS_KVM) {
+		if (sclp.has_vt220 && IS_ENABLED(CONFIG_SCLP_VT220_CONSOLE))
+			SET_CONSOLE_VT220;
+		else if (sclp.has_linemode && IS_ENABLED(CONFIG_SCLP_CONSOLE))
+			SET_CONSOLE_SCLP;
+		else
+			SET_CONSOLE_HVC;
+	} else {
+#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
+		SET_CONSOLE_SCLP;
+#endif
+	}
+}
+
+#ifdef CONFIG_CRASH_DUMP
+static void __init setup_zfcpdump(void)
+{
+	if (!is_ipl_type_dump())
+		return;
+	if (oldmem_data.start)
+		return;
+	strcat(boot_command_line, " cio_ignore=all,!ipldev,!condev");
+	console_loglevel = 2;
+}
+#else
+static inline void setup_zfcpdump(void) {}
+#endif /* CONFIG_CRASH_DUMP */
+
+ /*
+ * Reboot, halt and power_off stubs. They just call _machine_restart,
+ * _machine_halt or _machine_power_off. 
+ */
+
+void machine_restart(char *command)
+{
+	if ((!in_interrupt() && !in_atomic()) || oops_in_progress)
+		/*
+		 * Only unblank the console if we are called in enabled
+		 * context or a bust_spinlocks cleared the way for us.
+		 */
+		console_unblank();
+	_machine_restart(command);
+}
+
+void machine_halt(void)
+{
+	if (!in_interrupt() || oops_in_progress)
+		/*
+		 * Only unblank the console if we are called in enabled
+		 * context or a bust_spinlocks cleared the way for us.
+		 */
+		console_unblank();
+	_machine_halt();
+}
+
+void machine_power_off(void)
+{
+	if (!in_interrupt() || oops_in_progress)
+		/*
+		 * Only unblank the console if we are called in enabled
+		 * context or a bust_spinlocks cleared the way for us.
+		 */
+		console_unblank();
+	_machine_power_off();
+}
+
+/*
+ * Dummy power off function.
+ */
+void (*pm_power_off)(void) = machine_power_off;
+EXPORT_SYMBOL_GPL(pm_power_off);
+
+void *restart_stack;
+
+unsigned long stack_alloc(void)
+{
+#ifdef CONFIG_VMAP_STACK
+	void *ret;
+
+	ret = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP,
+			     NUMA_NO_NODE, __builtin_return_address(0));
+	kmemleak_not_leak(ret);
+	return (unsigned long)ret;
+#else
+	return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
+#endif
+}
+
+void stack_free(unsigned long stack)
+{
+#ifdef CONFIG_VMAP_STACK
+	vfree((void *) stack);
+#else
+	free_pages(stack, THREAD_SIZE_ORDER);
+#endif
+}
+
+void __init __noreturn arch_call_rest_init(void)
+{
+	smp_reinit_ipl_cpu();
+	rest_init();
+}
+
+static unsigned long __init stack_alloc_early(void)
+{
+	unsigned long stack;
+
+	stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE);
+	if (!stack) {
+		panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
+		      __func__, THREAD_SIZE, THREAD_SIZE);
+	}
+	return stack;
+}
+
+static void __init setup_lowcore(void)
+{
+	struct lowcore *lc, *abs_lc;
+
+	/*
+	 * Setup lowcore for boot cpu
+	 */
+	BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * PAGE_SIZE);
+	lc = memblock_alloc_low(sizeof(*lc), sizeof(*lc));
+	if (!lc)
+		panic("%s: Failed to allocate %zu bytes align=%zx\n",
+		      __func__, sizeof(*lc), sizeof(*lc));
+
+	lc->restart_psw.mask = PSW_KERNEL_BITS & ~PSW_MASK_DAT;
+	lc->restart_psw.addr = __pa(restart_int_handler);
+	lc->external_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
+	lc->external_new_psw.addr = (unsigned long) ext_int_handler;
+	lc->svc_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
+	lc->svc_new_psw.addr = (unsigned long) system_call;
+	lc->program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
+	lc->program_new_psw.addr = (unsigned long) pgm_check_handler;
+	lc->mcck_new_psw.mask = PSW_KERNEL_BITS;
+	lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler;
+	lc->io_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
+	lc->io_new_psw.addr = (unsigned long) io_int_handler;
+	lc->clock_comparator = clock_comparator_max;
+	lc->current_task = (unsigned long)&init_task;
+	lc->lpp = LPP_MAGIC;
+	lc->machine_flags = S390_lowcore.machine_flags;
+	lc->preempt_count = S390_lowcore.preempt_count;
+	nmi_alloc_mcesa_early(&lc->mcesad);
+	lc->sys_enter_timer = S390_lowcore.sys_enter_timer;
+	lc->exit_timer = S390_lowcore.exit_timer;
+	lc->user_timer = S390_lowcore.user_timer;
+	lc->system_timer = S390_lowcore.system_timer;
+	lc->steal_timer = S390_lowcore.steal_timer;
+	lc->last_update_timer = S390_lowcore.last_update_timer;
+	lc->last_update_clock = S390_lowcore.last_update_clock;
+	/*
+	 * Allocate the global restart stack which is the same for
+	 * all CPUs in case *one* of them does a PSW restart.
+	 */
+	restart_stack = (void *)(stack_alloc_early() + STACK_INIT_OFFSET);
+	lc->mcck_stack = stack_alloc_early() + STACK_INIT_OFFSET;
+	lc->async_stack = stack_alloc_early() + STACK_INIT_OFFSET;
+	lc->nodat_stack = stack_alloc_early() + STACK_INIT_OFFSET;
+	lc->kernel_stack = S390_lowcore.kernel_stack;
+	/*
+	 * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant
+	 * restart data to the absolute zero lowcore. This is necessary if
+	 * PSW restart is done on an offline CPU that has lowcore zero.
+	 */
+	lc->restart_stack = (unsigned long) restart_stack;
+	lc->restart_fn = (unsigned long) do_restart;
+	lc->restart_data = 0;
+	lc->restart_source = -1U;
+	__ctl_store(lc->cregs_save_area, 0, 15);
+	lc->spinlock_lockval = arch_spin_lockval(0);
+	lc->spinlock_index = 0;
+	arch_spin_lock_setup(0);
+	lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
+	lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
+	lc->preempt_count = PREEMPT_DISABLED;
+	lc->kernel_asce = S390_lowcore.kernel_asce;
+	lc->user_asce = S390_lowcore.user_asce;
+
+	abs_lc = get_abs_lowcore();
+	abs_lc->restart_stack = lc->restart_stack;
+	abs_lc->restart_fn = lc->restart_fn;
+	abs_lc->restart_data = lc->restart_data;
+	abs_lc->restart_source = lc->restart_source;
+	abs_lc->restart_psw = lc->restart_psw;
+	abs_lc->restart_flags = RESTART_FLAG_CTLREGS;
+	memcpy(abs_lc->cregs_save_area, lc->cregs_save_area, sizeof(abs_lc->cregs_save_area));
+	abs_lc->program_new_psw = lc->program_new_psw;
+	abs_lc->mcesad = lc->mcesad;
+	put_abs_lowcore(abs_lc);
+
+	set_prefix(__pa(lc));
+	lowcore_ptr[0] = lc;
+	if (abs_lowcore_map(0, lowcore_ptr[0], false))
+		panic("Couldn't setup absolute lowcore");
+}
+
+static struct resource code_resource = {
+	.name  = "Kernel code",
+	.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
+};
+
+static struct resource data_resource = {
+	.name = "Kernel data",
+	.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
+};
+
+static struct resource bss_resource = {
+	.name = "Kernel bss",
+	.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
+};
+
+static struct resource __initdata *standard_resources[] = {
+	&code_resource,
+	&data_resource,
+	&bss_resource,
+};
+
+static void __init setup_resources(void)
+{
+	struct resource *res, *std_res, *sub_res;
+	phys_addr_t start, end;
+	int j;
+	u64 i;
+
+	code_resource.start = (unsigned long) _text;
+	code_resource.end = (unsigned long) _etext - 1;
+	data_resource.start = (unsigned long) _etext;
+	data_resource.end = (unsigned long) _edata - 1;
+	bss_resource.start = (unsigned long) __bss_start;
+	bss_resource.end = (unsigned long) __bss_stop - 1;
+
+	for_each_mem_range(i, &start, &end) {
+		res = memblock_alloc(sizeof(*res), 8);
+		if (!res)
+			panic("%s: Failed to allocate %zu bytes align=0x%x\n",
+			      __func__, sizeof(*res), 8);
+		res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
+
+		res->name = "System RAM";
+		res->start = start;
+		/*
+		 * In memblock, end points to the first byte after the
+		 * range while in resources, end points to the last byte in
+		 * the range.
+		 */
+		res->end = end - 1;
+		request_resource(&iomem_resource, res);
+
+		for (j = 0; j < ARRAY_SIZE(standard_resources); j++) {
+			std_res = standard_resources[j];
+			if (std_res->start < res->start ||
+			    std_res->start > res->end)
+				continue;
+			if (std_res->end > res->end) {
+				sub_res = memblock_alloc(sizeof(*sub_res), 8);
+				if (!sub_res)
+					panic("%s: Failed to allocate %zu bytes align=0x%x\n",
+					      __func__, sizeof(*sub_res), 8);
+				*sub_res = *std_res;
+				sub_res->end = res->end;
+				std_res->start = res->end + 1;
+				request_resource(res, sub_res);
+			} else {
+				request_resource(res, std_res);
+			}
+		}
+	}
+#ifdef CONFIG_CRASH_DUMP
+	/*
+	 * Re-add removed crash kernel memory as reserved memory. This makes
+	 * sure it will be mapped with the identity mapping and struct pages
+	 * will be created, so it can be resized later on.
+	 * However add it later since the crash kernel resource should not be
+	 * part of the System RAM resource.
+	 */
+	if (crashk_res.end) {
+		memblock_add_node(crashk_res.start, resource_size(&crashk_res),
+				  0, MEMBLOCK_NONE);
+		memblock_reserve(crashk_res.start, resource_size(&crashk_res));
+		insert_resource(&iomem_resource, &crashk_res);
+	}
+#endif
+}
+
+static void __init setup_memory_end(void)
+{
+	max_pfn = max_low_pfn = PFN_DOWN(ident_map_size);
+	pr_notice("The maximum memory size is %luMB\n", ident_map_size >> 20);
+}
+
+#ifdef CONFIG_CRASH_DUMP
+
+/*
+ * When kdump is enabled, we have to ensure that no memory from the area
+ * [0 - crashkernel memory size] is set offline - it will be exchanged with
+ * the crashkernel memory region when kdump is triggered. The crashkernel
+ * memory region can never get offlined (pages are unmovable).
+ */
+static int kdump_mem_notifier(struct notifier_block *nb,
+			      unsigned long action, void *data)
+{
+	struct memory_notify *arg = data;
+
+	if (action != MEM_GOING_OFFLINE)
+		return NOTIFY_OK;
+	if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res)))
+		return NOTIFY_BAD;
+	return NOTIFY_OK;
+}
+
+static struct notifier_block kdump_mem_nb = {
+	.notifier_call = kdump_mem_notifier,
+};
+
+#endif
+
+/*
+ * Reserve page tables created by decompressor
+ */
+static void __init reserve_pgtables(void)
+{
+	unsigned long start, end;
+	struct reserved_range *range;
+
+	for_each_physmem_reserved_type_range(RR_VMEM, range, &start, &end)
+		memblock_reserve(start, end - start);
+}
+
+/*
+ * Reserve memory for kdump kernel to be loaded with kexec
+ */
+static void __init reserve_crashkernel(void)
+{
+#ifdef CONFIG_CRASH_DUMP
+	unsigned long long crash_base, crash_size;
+	phys_addr_t low, high;
+	int rc;
+
+	rc = parse_crashkernel(boot_command_line, ident_map_size, &crash_size,
+			       &crash_base);
+
+	crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN);
+	crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN);
+	if (rc || crash_size == 0)
+		return;
+
+	if (memblock.memory.regions[0].size < crash_size) {
+		pr_info("crashkernel reservation failed: %s\n",
+			"first memory chunk must be at least crashkernel size");
+		return;
+	}
+
+	low = crash_base ?: oldmem_data.start;
+	high = low + crash_size;
+	if (low >= oldmem_data.start && high <= oldmem_data.start + oldmem_data.size) {
+		/* The crashkernel fits into OLDMEM, reuse OLDMEM */
+		crash_base = low;
+	} else {
+		/* Find suitable area in free memory */
+		low = max_t(unsigned long, crash_size, sclp.hsa_size);
+		high = crash_base ? crash_base + crash_size : ULONG_MAX;
+
+		if (crash_base && crash_base < low) {
+			pr_info("crashkernel reservation failed: %s\n",
+				"crash_base too low");
+			return;
+		}
+		low = crash_base ?: low;
+		crash_base = memblock_phys_alloc_range(crash_size,
+						       KEXEC_CRASH_MEM_ALIGN,
+						       low, high);
+	}
+
+	if (!crash_base) {
+		pr_info("crashkernel reservation failed: %s\n",
+			"no suitable area found");
+		return;
+	}
+
+	if (register_memory_notifier(&kdump_mem_nb)) {
+		memblock_phys_free(crash_base, crash_size);
+		return;
+	}
+
+	if (!oldmem_data.start && MACHINE_IS_VM)
+		diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size));
+	crashk_res.start = crash_base;
+	crashk_res.end = crash_base + crash_size - 1;
+	memblock_remove(crash_base, crash_size);
+	pr_info("Reserving %lluMB of memory at %lluMB "
+		"for crashkernel (System RAM: %luMB)\n",
+		crash_size >> 20, crash_base >> 20,
+		(unsigned long)memblock.memory.total_size >> 20);
+	os_info_crashkernel_add(crash_base, crash_size);
+#endif
+}
+
+/*
+ * Reserve the initrd from being used by memblock
+ */
+static void __init reserve_initrd(void)
+{
+	unsigned long addr, size;
+
+	if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD) || !get_physmem_reserved(RR_INITRD, &addr, &size))
+		return;
+	initrd_start = (unsigned long)__va(addr);
+	initrd_end = initrd_start + size;
+	memblock_reserve(addr, size);
+}
+
+/*
+ * Reserve the memory area used to pass the certificate lists
+ */
+static void __init reserve_certificate_list(void)
+{
+	if (ipl_cert_list_addr)
+		memblock_reserve(ipl_cert_list_addr, ipl_cert_list_size);
+}
+
+static void __init reserve_physmem_info(void)
+{
+	unsigned long addr, size;
+
+	if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size))
+		memblock_reserve(addr, size);
+}
+
+static void __init free_physmem_info(void)
+{
+	unsigned long addr, size;
+
+	if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size))
+		memblock_phys_free(addr, size);
+}
+
+static void __init memblock_add_physmem_info(void)
+{
+	unsigned long start, end;
+	int i;
+
+	pr_debug("physmem info source: %s (%hhd)\n",
+		 get_physmem_info_source(), physmem_info.info_source);
+	/* keep memblock lists close to the kernel */
+	memblock_set_bottom_up(true);
+	for_each_physmem_usable_range(i, &start, &end)
+		memblock_add(start, end - start);
+	for_each_physmem_online_range(i, &start, &end)
+		memblock_physmem_add(start, end - start);
+	memblock_set_bottom_up(false);
+	memblock_set_node(0, ULONG_MAX, &memblock.memory, 0);
+}
+
+/*
+ * Reserve memory used for lowcore/command line/kernel image.
+ */
+static void __init reserve_kernel(void)
+{
+	memblock_reserve(0, STARTUP_NORMAL_OFFSET);
+	memblock_reserve(OLDMEM_BASE, sizeof(unsigned long));
+	memblock_reserve(OLDMEM_SIZE, sizeof(unsigned long));
+	memblock_reserve(physmem_info.reserved[RR_AMODE31].start, __eamode31 - __samode31);
+	memblock_reserve(__pa(sclp_early_sccb), EXT_SCCB_READ_SCP);
+	memblock_reserve(__pa(_stext), _end - _stext);
+}
+
+static void __init setup_memory(void)
+{
+	phys_addr_t start, end;
+	u64 i;
+
+	/*
+	 * Init storage key for present memory
+	 */
+	for_each_mem_range(i, &start, &end)
+		storage_key_init_range(start, end);
+
+	psw_set_key(PAGE_DEFAULT_KEY);
+}
+
+static void __init relocate_amode31_section(void)
+{
+	unsigned long amode31_size = __eamode31 - __samode31;
+	long amode31_offset, *ptr;
+
+	amode31_offset = physmem_info.reserved[RR_AMODE31].start - (unsigned long)__samode31;
+	pr_info("Relocating AMODE31 section of size 0x%08lx\n", amode31_size);
+
+	/* Move original AMODE31 section to the new one */
+	memmove((void *)physmem_info.reserved[RR_AMODE31].start, __samode31, amode31_size);
+	/* Zero out the old AMODE31 section to catch invalid accesses within it */
+	memset(__samode31, 0, amode31_size);
+
+	/* Update all AMODE31 region references */
+	for (ptr = _start_amode31_refs; ptr != _end_amode31_refs; ptr++)
+		*ptr += amode31_offset;
+}
+
+/* This must be called after AMODE31 relocation */
+static void __init setup_cr(void)
+{
+	union ctlreg2 cr2;
+	union ctlreg5 cr5;
+	union ctlreg15 cr15;
+
+	__ctl_duct[1] = (unsigned long)__ctl_aste;
+	__ctl_duct[2] = (unsigned long)__ctl_aste;
+	__ctl_duct[4] = (unsigned long)__ctl_duald;
+
+	/* Update control registers CR2, CR5 and CR15 */
+	__ctl_store(cr2.val, 2, 2);
+	__ctl_store(cr5.val, 5, 5);
+	__ctl_store(cr15.val, 15, 15);
+	cr2.ducto = (unsigned long)__ctl_duct >> 6;
+	cr5.pasteo = (unsigned long)__ctl_duct >> 6;
+	cr15.lsea = (unsigned long)__ctl_linkage_stack >> 3;
+	__ctl_load(cr2.val, 2, 2);
+	__ctl_load(cr5.val, 5, 5);
+	__ctl_load(cr15.val, 15, 15);
+}
+
+/*
+ * Add system information as device randomness
+ */
+static void __init setup_randomness(void)
+{
+	struct sysinfo_3_2_2 *vmms;
+
+	vmms = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+	if (!vmms)
+		panic("Failed to allocate memory for sysinfo structure\n");
+	if (stsi(vmms, 3, 2, 2) == 0 && vmms->count)
+		add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count);
+	memblock_free(vmms, PAGE_SIZE);
+
+	if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG))
+		static_branch_enable(&s390_arch_random_available);
+}
+
+/*
+ * Find the correct size for the task_struct. This depends on
+ * the size of the struct fpu at the end of the thread_struct
+ * which is embedded in the task_struct.
+ */
+static void __init setup_task_size(void)
+{
+	int task_size = sizeof(struct task_struct);
+
+	if (!MACHINE_HAS_VX) {
+		task_size -= sizeof(__vector128) * __NUM_VXRS;
+		task_size += sizeof(freg_t) * __NUM_FPRS;
+	}
+	arch_task_struct_size = task_size;
+}
+
+/*
+ * Issue diagnose 318 to set the control program name and
+ * version codes.
+ */
+static void __init setup_control_program_code(void)
+{
+	union diag318_info diag318_info = {
+		.cpnc = CPNC_LINUX,
+		.cpvc = 0,
+	};
+
+	if (!sclp.has_diag318)
+		return;
+
+	diag_stat_inc(DIAG_STAT_X318);
+	asm volatile("diag %0,0,0x318\n" : : "d" (diag318_info.val));
+}
+
+/*
+ * Print the component list from the IPL report
+ */
+static void __init log_component_list(void)
+{
+	struct ipl_rb_component_entry *ptr, *end;
+	char *str;
+
+	if (!early_ipl_comp_list_addr)
+		return;
+	if (ipl_block.hdr.flags & IPL_PL_FLAG_SIPL)
+		pr_info("Linux is running with Secure-IPL enabled\n");
+	else
+		pr_info("Linux is running with Secure-IPL disabled\n");
+	ptr = __va(early_ipl_comp_list_addr);
+	end = (void *) ptr + early_ipl_comp_list_size;
+	pr_info("The IPL report contains the following components:\n");
+	while (ptr < end) {
+		if (ptr->flags & IPL_RB_COMPONENT_FLAG_SIGNED) {
+			if (ptr->flags & IPL_RB_COMPONENT_FLAG_VERIFIED)
+				str = "signed, verified";
+			else
+				str = "signed, verification failed";
+		} else {
+			str = "not signed";
+		}
+		pr_info("%016llx - %016llx (%s)\n",
+			ptr->addr, ptr->addr + ptr->len, str);
+		ptr++;
+	}
+}
+
+/*
+ * Setup function called from init/main.c just after the banner
+ * was printed.
+ */
+
+void __init setup_arch(char **cmdline_p)
+{
+        /*
+         * print what head.S has found out about the machine
+         */
+	if (MACHINE_IS_VM)
+		pr_info("Linux is running as a z/VM "
+			"guest operating system in 64-bit mode\n");
+	else if (MACHINE_IS_KVM)
+		pr_info("Linux is running under KVM in 64-bit mode\n");
+	else if (MACHINE_IS_LPAR)
+		pr_info("Linux is running natively in 64-bit mode\n");
+	else
+		pr_info("Linux is running as a guest in 64-bit mode\n");
+
+	log_component_list();
+
+	/* Have one command line that is parsed and saved in /proc/cmdline */
+	/* boot_command_line has been already set up in early.c */
+	*cmdline_p = boot_command_line;
+
+        ROOT_DEV = Root_RAM0;
+
+	setup_initial_init_mm(_text, _etext, _edata, _end);
+
+	if (IS_ENABLED(CONFIG_EXPOLINE_AUTO))
+		nospec_auto_detect();
+
+	jump_label_init();
+	parse_early_param();
+#ifdef CONFIG_CRASH_DUMP
+	/* Deactivate elfcorehdr= kernel parameter */
+	elfcorehdr_addr = ELFCORE_ADDR_MAX;
+#endif
+
+	os_info_init();
+	setup_ipl();
+	setup_task_size();
+	setup_control_program_code();
+
+	/* Do some memory reservations *before* memory is added to memblock */
+	reserve_pgtables();
+	reserve_kernel();
+	reserve_initrd();
+	reserve_certificate_list();
+	reserve_physmem_info();
+	memblock_set_current_limit(ident_map_size);
+	memblock_allow_resize();
+
+	/* Get information about *all* installed memory */
+	memblock_add_physmem_info();
+
+	free_physmem_info();
+	setup_memory_end();
+	memblock_dump_all();
+	setup_memory();
+
+	relocate_amode31_section();
+	setup_cr();
+	setup_uv();
+	dma_contiguous_reserve(ident_map_size);
+	vmcp_cma_reserve();
+	if (MACHINE_HAS_EDAT2)
+		hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
+
+	reserve_crashkernel();
+#ifdef CONFIG_CRASH_DUMP
+	/*
+	 * Be aware that smp_save_dump_secondary_cpus() triggers a system reset.
+	 * Therefore CPU and device initialization should be done afterwards.
+	 */
+	smp_save_dump_secondary_cpus();
+#endif
+
+	setup_resources();
+	setup_lowcore();
+	smp_fill_possible_mask();
+	cpu_detect_mhz_feature();
+        cpu_init();
+	numa_setup();
+	smp_detect_cpus();
+	topology_init_early();
+
+	if (test_facility(193))
+		static_branch_enable(&cpu_has_bear);
+
+	/*
+	 * Create kernel page tables.
+	 */
+        paging_init();
+
+	/*
+	 * After paging_init created the kernel page table, the new PSWs
+	 * in lowcore can now run with DAT enabled.
+	 */
+#ifdef CONFIG_CRASH_DUMP
+	smp_save_dump_ipl_cpu();
+#endif
+
+        /* Setup default console */
+	conmode_default();
+	set_preferred_console();
+
+	apply_alternative_instructions();
+	if (IS_ENABLED(CONFIG_EXPOLINE))
+		nospec_init_branches();
+
+	/* Setup zfcp/nvme dump support */
+	setup_zfcpdump();
+
+	/* Add system specific data to the random pool */
+	setup_randomness();
+}
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
new file mode 100644
index 0000000000..d63557d386
--- /dev/null
+++ b/arch/s390/kernel/signal.c
@@ -0,0 +1,534 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    Copyright IBM Corp. 1999, 2006
+ *    Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
+ *
+ *    Based on Intel version
+ * 
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  1997-11-28  Modified for POSIX.1b signals by Richard Henderson
+ */
+
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/entry-common.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+#include <linux/ptrace.h>
+#include <linux/unistd.h>
+#include <linux/stddef.h>
+#include <linux/tty.h>
+#include <linux/personality.h>
+#include <linux/binfmts.h>
+#include <linux/syscalls.h>
+#include <linux/compat.h>
+#include <asm/ucontext.h>
+#include <linux/uaccess.h>
+#include <asm/lowcore.h>
+#include <asm/switch_to.h>
+#include <asm/vdso.h>
+#include "entry.h"
+
+/*
+ * Layout of an old-style signal-frame:
+ *	-----------------------------------------
+ *	| save area (_SIGNAL_FRAMESIZE)		|
+ *	-----------------------------------------
+ *	| struct sigcontext			|
+ *	|	oldmask				|
+ *	|	_sigregs *			|
+ *	-----------------------------------------
+ *	| _sigregs with				|
+ *	|	_s390_regs_common		|
+ *	|	_s390_fp_regs			|
+ *	-----------------------------------------
+ *	| int signo				|
+ *	-----------------------------------------
+ *	| _sigregs_ext with			|
+ *	|	gprs_high 64 byte (opt)		|
+ *	|	vxrs_low 128 byte (opt)		|
+ *	|	vxrs_high 256 byte (opt)	|
+ *	|	reserved 128 byte (opt)		|
+ *	-----------------------------------------
+ *	| __u16 svc_insn			|
+ *	-----------------------------------------
+ * The svc_insn entry with the sigreturn system call opcode does not
+ * have a fixed position and moves if gprs_high or vxrs exist.
+ * Future extensions will be added to _sigregs_ext.
+ */
+struct sigframe
+{
+	__u8 callee_used_stack[__SIGNAL_FRAMESIZE];
+	struct sigcontext sc;
+	_sigregs sregs;
+	int signo;
+	_sigregs_ext sregs_ext;
+	__u16 svc_insn;		/* Offset of svc_insn is NOT fixed! */
+};
+
+/*
+ * Layout of an rt signal-frame:
+ *	-----------------------------------------
+ *	| save area (_SIGNAL_FRAMESIZE)		|
+ *	-----------------------------------------
+ *	| svc __NR_rt_sigreturn 2 byte		|
+ *	-----------------------------------------
+ *	| struct siginfo			|
+ *	-----------------------------------------
+ *	| struct ucontext_extended with		|
+ *	|	unsigned long uc_flags		|
+ *	|	struct ucontext *uc_link	|
+ *	|	stack_t uc_stack		|
+ *	|	_sigregs uc_mcontext with	|
+ *	|		_s390_regs_common	|
+ *	|		_s390_fp_regs		|
+ *	|	sigset_t uc_sigmask		|
+ *	|	_sigregs_ext uc_mcontext_ext	|
+ *	|		gprs_high 64 byte (opt)	|
+ *	|		vxrs_low 128 byte (opt)	|
+ *	|		vxrs_high 256 byte (opt)|
+ *	|		reserved 128 byte (opt)	|
+ *	-----------------------------------------
+ * Future extensions will be added to _sigregs_ext.
+ */
+struct rt_sigframe
+{
+	__u8 callee_used_stack[__SIGNAL_FRAMESIZE];
+	__u16 svc_insn;
+	struct siginfo info;
+	struct ucontext_extended uc;
+};
+
+/* Store registers needed to create the signal frame */
+static void store_sigregs(void)
+{
+	save_access_regs(current->thread.acrs);
+	save_fpu_regs();
+}
+
+/* Load registers after signal return */
+static void load_sigregs(void)
+{
+	restore_access_regs(current->thread.acrs);
+}
+
+/* Returns non-zero on fault. */
+static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
+{
+	_sigregs user_sregs;
+
+	/* Copy a 'clean' PSW mask to the user to avoid leaking
+	   information about whether PER is currently on.  */
+	user_sregs.regs.psw.mask = PSW_USER_BITS |
+		(regs->psw.mask & (PSW_MASK_USER | PSW_MASK_RI));
+	user_sregs.regs.psw.addr = regs->psw.addr;
+	memcpy(&user_sregs.regs.gprs, &regs->gprs, sizeof(sregs->regs.gprs));
+	memcpy(&user_sregs.regs.acrs, current->thread.acrs,
+	       sizeof(user_sregs.regs.acrs));
+	fpregs_store(&user_sregs.fpregs, &current->thread.fpu);
+	if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs)))
+		return -EFAULT;
+	return 0;
+}
+
+static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
+{
+	_sigregs user_sregs;
+
+	/* Always make any pending restarted system call return -EINTR */
+	current->restart_block.fn = do_no_restart_syscall;
+
+	if (__copy_from_user(&user_sregs, sregs, sizeof(user_sregs)))
+		return -EFAULT;
+
+	if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW_MASK_RI))
+		return -EINVAL;
+
+	/* Test the floating-point-control word. */
+	if (test_fp_ctl(user_sregs.fpregs.fpc))
+		return -EINVAL;
+
+	/* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */
+	regs->psw.mask = (regs->psw.mask & ~(PSW_MASK_USER | PSW_MASK_RI)) |
+		(user_sregs.regs.psw.mask & (PSW_MASK_USER | PSW_MASK_RI));
+	/* Check for invalid user address space control. */
+	if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_HOME)
+		regs->psw.mask = PSW_ASC_PRIMARY |
+			(regs->psw.mask & ~PSW_MASK_ASC);
+	/* Check for invalid amode */
+	if (regs->psw.mask & PSW_MASK_EA)
+		regs->psw.mask |= PSW_MASK_BA;
+	regs->psw.addr = user_sregs.regs.psw.addr;
+	memcpy(&regs->gprs, &user_sregs.regs.gprs, sizeof(sregs->regs.gprs));
+	memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
+	       sizeof(current->thread.acrs));
+
+	fpregs_load(&user_sregs.fpregs, &current->thread.fpu);
+
+	clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
+	return 0;
+}
+
+/* Returns non-zero on fault. */
+static int save_sigregs_ext(struct pt_regs *regs,
+			    _sigregs_ext __user *sregs_ext)
+{
+	__u64 vxrs[__NUM_VXRS_LOW];
+	int i;
+
+	/* Save vector registers to signal stack */
+	if (MACHINE_HAS_VX) {
+		for (i = 0; i < __NUM_VXRS_LOW; i++)
+			vxrs[i] = current->thread.fpu.vxrs[i].low;
+		if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
+				   sizeof(sregs_ext->vxrs_low)) ||
+		    __copy_to_user(&sregs_ext->vxrs_high,
+				   current->thread.fpu.vxrs + __NUM_VXRS_LOW,
+				   sizeof(sregs_ext->vxrs_high)))
+			return -EFAULT;
+	}
+	return 0;
+}
+
+static int restore_sigregs_ext(struct pt_regs *regs,
+			       _sigregs_ext __user *sregs_ext)
+{
+	__u64 vxrs[__NUM_VXRS_LOW];
+	int i;
+
+	/* Restore vector registers from signal stack */
+	if (MACHINE_HAS_VX) {
+		if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
+				     sizeof(sregs_ext->vxrs_low)) ||
+		    __copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW,
+				     &sregs_ext->vxrs_high,
+				     sizeof(sregs_ext->vxrs_high)))
+			return -EFAULT;
+		for (i = 0; i < __NUM_VXRS_LOW; i++)
+			current->thread.fpu.vxrs[i].low = vxrs[i];
+	}
+	return 0;
+}
+
+SYSCALL_DEFINE0(sigreturn)
+{
+	struct pt_regs *regs = task_pt_regs(current);
+	struct sigframe __user *frame =
+		(struct sigframe __user *) regs->gprs[15];
+	sigset_t set;
+
+	if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE))
+		goto badframe;
+	set_current_blocked(&set);
+	save_fpu_regs();
+	if (restore_sigregs(regs, &frame->sregs))
+		goto badframe;
+	if (restore_sigregs_ext(regs, &frame->sregs_ext))
+		goto badframe;
+	load_sigregs();
+	return regs->gprs[2];
+badframe:
+	force_sig(SIGSEGV);
+	return 0;
+}
+
+SYSCALL_DEFINE0(rt_sigreturn)
+{
+	struct pt_regs *regs = task_pt_regs(current);
+	struct rt_sigframe __user *frame =
+		(struct rt_sigframe __user *)regs->gprs[15];
+	sigset_t set;
+
+	if (__copy_from_user(&set.sig, &frame->uc.uc_sigmask, sizeof(set)))
+		goto badframe;
+	set_current_blocked(&set);
+	if (restore_altstack(&frame->uc.uc_stack))
+		goto badframe;
+	save_fpu_regs();
+	if (restore_sigregs(regs, &frame->uc.uc_mcontext))
+		goto badframe;
+	if (restore_sigregs_ext(regs, &frame->uc.uc_mcontext_ext))
+		goto badframe;
+	load_sigregs();
+	return regs->gprs[2];
+badframe:
+	force_sig(SIGSEGV);
+	return 0;
+}
+
+/*
+ * Determine which stack to use..
+ */
+static inline void __user *
+get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
+{
+	unsigned long sp;
+
+	/* Default to using normal stack */
+	sp = regs->gprs[15];
+
+	/* Overflow on alternate signal stack gives SIGSEGV. */
+	if (on_sig_stack(sp) && !on_sig_stack((sp - frame_size) & -8UL))
+		return (void __user *) -1UL;
+
+	/* This is the X/Open sanctioned signal stack switching.  */
+	if (ka->sa.sa_flags & SA_ONSTACK) {
+		if (! sas_ss_flags(sp))
+			sp = current->sas_ss_sp + current->sas_ss_size;
+	}
+
+	return (void __user *)((sp - frame_size) & -8ul);
+}
+
+static int setup_frame(int sig, struct k_sigaction *ka,
+		       sigset_t *set, struct pt_regs * regs)
+{
+	struct sigframe __user *frame;
+	struct sigcontext sc;
+	unsigned long restorer;
+	size_t frame_size;
+
+	/*
+	 * gprs_high are only present for a 31-bit task running on
+	 * a 64-bit kernel (see compat_signal.c) but the space for
+	 * gprs_high need to be allocated if vector registers are
+	 * included in the signal frame on a 31-bit system.
+	 */
+	frame_size = sizeof(*frame) - sizeof(frame->sregs_ext);
+	if (MACHINE_HAS_VX)
+		frame_size += sizeof(frame->sregs_ext);
+	frame = get_sigframe(ka, regs, frame_size);
+	if (frame == (void __user *) -1UL)
+		return -EFAULT;
+
+	/* Set up backchain. */
+	if (__put_user(regs->gprs[15], (addr_t __user *) frame))
+		return -EFAULT;
+
+	/* Create struct sigcontext on the signal stack */
+	memcpy(&sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE);
+	sc.sregs = (_sigregs __user __force *) &frame->sregs;
+	if (__copy_to_user(&frame->sc, &sc, sizeof(frame->sc)))
+		return -EFAULT;
+
+	/* Store registers needed to create the signal frame */
+	store_sigregs();
+
+	/* Create _sigregs on the signal stack */
+	if (save_sigregs(regs, &frame->sregs))
+		return -EFAULT;
+
+	/* Place signal number on stack to allow backtrace from handler.  */
+	if (__put_user(regs->gprs[2], (int __user *) &frame->signo))
+		return -EFAULT;
+
+	/* Create _sigregs_ext on the signal stack */
+	if (save_sigregs_ext(regs, &frame->sregs_ext))
+		return -EFAULT;
+
+	/* Set up to return from userspace.  If provided, use a stub
+	   already in userspace.  */
+	if (ka->sa.sa_flags & SA_RESTORER)
+		restorer = (unsigned long) ka->sa.sa_restorer;
+	else
+		restorer = VDSO64_SYMBOL(current, sigreturn);
+
+	/* Set up registers for signal handler */
+	regs->gprs[14] = restorer;
+	regs->gprs[15] = (unsigned long) frame;
+	/* Force default amode and default user address space control. */
+	regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
+		(PSW_USER_BITS & PSW_MASK_ASC) |
+		(regs->psw.mask & ~PSW_MASK_ASC);
+	regs->psw.addr = (unsigned long) ka->sa.sa_handler;
+
+	regs->gprs[2] = sig;
+	regs->gprs[3] = (unsigned long) &frame->sc;
+
+	/* We forgot to include these in the sigcontext.
+	   To avoid breaking binary compatibility, they are passed as args. */
+	if (sig == SIGSEGV || sig == SIGBUS || sig == SIGILL ||
+	    sig == SIGTRAP || sig == SIGFPE) {
+		/* set extra registers only for synchronous signals */
+		regs->gprs[4] = regs->int_code & 127;
+		regs->gprs[5] = regs->int_parm_long;
+		regs->gprs[6] = current->thread.last_break;
+	}
+	return 0;
+}
+
+static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
+			  struct pt_regs *regs)
+{
+	struct rt_sigframe __user *frame;
+	unsigned long uc_flags, restorer;
+	size_t frame_size;
+
+	frame_size = sizeof(struct rt_sigframe) - sizeof(_sigregs_ext);
+	/*
+	 * gprs_high are only present for a 31-bit task running on
+	 * a 64-bit kernel (see compat_signal.c) but the space for
+	 * gprs_high need to be allocated if vector registers are
+	 * included in the signal frame on a 31-bit system.
+	 */
+	uc_flags = 0;
+	if (MACHINE_HAS_VX) {
+		frame_size += sizeof(_sigregs_ext);
+		uc_flags |= UC_VXRS;
+	}
+	frame = get_sigframe(&ksig->ka, regs, frame_size);
+	if (frame == (void __user *) -1UL)
+		return -EFAULT;
+
+	/* Set up backchain. */
+	if (__put_user(regs->gprs[15], (addr_t __user *) frame))
+		return -EFAULT;
+
+	/* Set up to return from userspace.  If provided, use a stub
+	   already in userspace.  */
+	if (ksig->ka.sa.sa_flags & SA_RESTORER)
+		restorer = (unsigned long) ksig->ka.sa.sa_restorer;
+	else
+		restorer = VDSO64_SYMBOL(current, rt_sigreturn);
+
+	/* Create siginfo on the signal stack */
+	if (copy_siginfo_to_user(&frame->info, &ksig->info))
+		return -EFAULT;
+
+	/* Store registers needed to create the signal frame */
+	store_sigregs();
+
+	/* Create ucontext on the signal stack. */
+	if (__put_user(uc_flags, &frame->uc.uc_flags) ||
+	    __put_user(NULL, &frame->uc.uc_link) ||
+	    __save_altstack(&frame->uc.uc_stack, regs->gprs[15]) ||
+	    save_sigregs(regs, &frame->uc.uc_mcontext) ||
+	    __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)) ||
+	    save_sigregs_ext(regs, &frame->uc.uc_mcontext_ext))
+		return -EFAULT;
+
+	/* Set up registers for signal handler */
+	regs->gprs[14] = restorer;
+	regs->gprs[15] = (unsigned long) frame;
+	/* Force default amode and default user address space control. */
+	regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
+		(PSW_USER_BITS & PSW_MASK_ASC) |
+		(regs->psw.mask & ~PSW_MASK_ASC);
+	regs->psw.addr = (unsigned long) ksig->ka.sa.sa_handler;
+
+	regs->gprs[2] = ksig->sig;
+	regs->gprs[3] = (unsigned long) &frame->info;
+	regs->gprs[4] = (unsigned long) &frame->uc;
+	regs->gprs[5] = current->thread.last_break;
+	return 0;
+}
+
+static void handle_signal(struct ksignal *ksig, sigset_t *oldset,
+			  struct pt_regs *regs)
+{
+	int ret;
+
+	/* Set up the stack frame */
+	if (ksig->ka.sa.sa_flags & SA_SIGINFO)
+		ret = setup_rt_frame(ksig, oldset, regs);
+	else
+		ret = setup_frame(ksig->sig, &ksig->ka, oldset, regs);
+
+	signal_setup_done(ret, ksig, test_thread_flag(TIF_SINGLE_STEP));
+}
+
+/*
+ * Note that 'init' is a special process: it doesn't get signals it doesn't
+ * want to handle. Thus you cannot kill init even with a SIGKILL even by
+ * mistake.
+ *
+ * Note that we go through the signals twice: once to check the signals that
+ * the kernel can handle, and then we build all the user-level signal handling
+ * stack-frames in one go after that.
+ */
+
+void arch_do_signal_or_restart(struct pt_regs *regs)
+{
+	struct ksignal ksig;
+	sigset_t *oldset = sigmask_to_save();
+
+	/*
+	 * Get signal to deliver. When running under ptrace, at this point
+	 * the debugger may change all our registers, including the system
+	 * call information.
+	 */
+	current->thread.system_call =
+		test_pt_regs_flag(regs, PIF_SYSCALL) ? regs->int_code : 0;
+
+	if (get_signal(&ksig)) {
+		/* Whee!  Actually deliver the signal.  */
+		if (current->thread.system_call) {
+			regs->int_code = current->thread.system_call;
+			/* Check for system call restarting. */
+			switch (regs->gprs[2]) {
+			case -ERESTART_RESTARTBLOCK:
+			case -ERESTARTNOHAND:
+				regs->gprs[2] = -EINTR;
+				break;
+			case -ERESTARTSYS:
+				if (!(ksig.ka.sa.sa_flags & SA_RESTART)) {
+					regs->gprs[2] = -EINTR;
+					break;
+				}
+				fallthrough;
+			case -ERESTARTNOINTR:
+				regs->gprs[2] = regs->orig_gpr2;
+				regs->psw.addr =
+					__rewind_psw(regs->psw,
+						     regs->int_code >> 16);
+				break;
+			}
+		}
+		/* No longer in a system call */
+		clear_pt_regs_flag(regs, PIF_SYSCALL);
+
+		rseq_signal_deliver(&ksig, regs);
+		if (is_compat_task())
+			handle_signal32(&ksig, oldset, regs);
+		else
+			handle_signal(&ksig, oldset, regs);
+		return;
+	}
+
+	/* No handlers present - check for system call restart */
+	clear_pt_regs_flag(regs, PIF_SYSCALL);
+	if (current->thread.system_call) {
+		regs->int_code = current->thread.system_call;
+		switch (regs->gprs[2]) {
+		case -ERESTART_RESTARTBLOCK:
+			/* Restart with sys_restart_syscall */
+			regs->gprs[2] = regs->orig_gpr2;
+			current->restart_block.arch_data = regs->psw.addr;
+			if (is_compat_task())
+				regs->psw.addr = VDSO32_SYMBOL(current, restart_syscall);
+			else
+				regs->psw.addr = VDSO64_SYMBOL(current, restart_syscall);
+			if (test_thread_flag(TIF_SINGLE_STEP))
+				clear_thread_flag(TIF_PER_TRAP);
+			break;
+		case -ERESTARTNOHAND:
+		case -ERESTARTSYS:
+		case -ERESTARTNOINTR:
+			regs->gprs[2] = regs->orig_gpr2;
+			regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16);
+			if (test_thread_flag(TIF_SINGLE_STEP))
+				clear_thread_flag(TIF_PER_TRAP);
+			break;
+		}
+	}
+
+	/*
+	 * If there's no signal to deliver, we just put the saved sigmask back.
+	 */
+	restore_saved_sigmask();
+}
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
new file mode 100644
index 0000000000..a4edb7ea66
--- /dev/null
+++ b/arch/s390/kernel/smp.c
@@ -0,0 +1,1317 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  SMP related functions
+ *
+ *    Copyright IBM Corp. 1999, 2012
+ *    Author(s): Denis Joseph Barrow,
+ *		 Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ *
+ *  based on other smp stuff by
+ *    (c) 1995 Alan Cox, CymruNET Ltd  <alan@cymru.net>
+ *    (c) 1998 Ingo Molnar
+ *
+ * The code outside of smp.c uses logical cpu numbers, only smp.c does
+ * the translation of logical to physical cpu ids. All new code that
+ * operates on physical cpu numbers needs to go into smp.c.
+ */
+
+#define KMSG_COMPONENT "cpu"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/workqueue.h>
+#include <linux/memblock.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/err.h>
+#include <linux/spinlock.h>
+#include <linux/kernel_stat.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/irqflags.h>
+#include <linux/irq_work.h>
+#include <linux/cpu.h>
+#include <linux/slab.h>
+#include <linux/sched/hotplug.h>
+#include <linux/sched/task_stack.h>
+#include <linux/crash_dump.h>
+#include <linux/kprobes.h>
+#include <asm/asm-offsets.h>
+#include <asm/pfault.h>
+#include <asm/diag.h>
+#include <asm/switch_to.h>
+#include <asm/facility.h>
+#include <asm/ipl.h>
+#include <asm/setup.h>
+#include <asm/irq.h>
+#include <asm/tlbflush.h>
+#include <asm/vtimer.h>
+#include <asm/abs_lowcore.h>
+#include <asm/sclp.h>
+#include <asm/debug.h>
+#include <asm/os_info.h>
+#include <asm/sigp.h>
+#include <asm/idle.h>
+#include <asm/nmi.h>
+#include <asm/stacktrace.h>
+#include <asm/topology.h>
+#include <asm/vdso.h>
+#include <asm/maccess.h>
+#include "entry.h"
+
+enum {
+	ec_schedule = 0,
+	ec_call_function_single,
+	ec_stop_cpu,
+	ec_mcck_pending,
+	ec_irq_work,
+};
+
+enum {
+	CPU_STATE_STANDBY,
+	CPU_STATE_CONFIGURED,
+};
+
+static DEFINE_PER_CPU(struct cpu *, cpu_device);
+
+struct pcpu {
+	unsigned long ec_mask;		/* bit mask for ec_xxx functions */
+	unsigned long ec_clk;		/* sigp timestamp for ec_xxx */
+	signed char state;		/* physical cpu state */
+	signed char polarization;	/* physical polarization */
+	u16 address;			/* physical cpu address */
+};
+
+static u8 boot_core_type;
+static struct pcpu pcpu_devices[NR_CPUS];
+
+unsigned int smp_cpu_mt_shift;
+EXPORT_SYMBOL(smp_cpu_mt_shift);
+
+unsigned int smp_cpu_mtid;
+EXPORT_SYMBOL(smp_cpu_mtid);
+
+#ifdef CONFIG_CRASH_DUMP
+__vector128 __initdata boot_cpu_vector_save_area[__NUM_VXRS];
+#endif
+
+static unsigned int smp_max_threads __initdata = -1U;
+cpumask_t cpu_setup_mask;
+
+static int __init early_nosmt(char *s)
+{
+	smp_max_threads = 1;
+	return 0;
+}
+early_param("nosmt", early_nosmt);
+
+static int __init early_smt(char *s)
+{
+	get_option(&s, &smp_max_threads);
+	return 0;
+}
+early_param("smt", early_smt);
+
+/*
+ * The smp_cpu_state_mutex must be held when changing the state or polarization
+ * member of a pcpu data structure within the pcpu_devices array.
+ */
+DEFINE_MUTEX(smp_cpu_state_mutex);
+
+/*
+ * Signal processor helper functions.
+ */
+static inline int __pcpu_sigp_relax(u16 addr, u8 order, unsigned long parm)
+{
+	int cc;
+
+	while (1) {
+		cc = __pcpu_sigp(addr, order, parm, NULL);
+		if (cc != SIGP_CC_BUSY)
+			return cc;
+		cpu_relax();
+	}
+}
+
+static int pcpu_sigp_retry(struct pcpu *pcpu, u8 order, u32 parm)
+{
+	int cc, retry;
+
+	for (retry = 0; ; retry++) {
+		cc = __pcpu_sigp(pcpu->address, order, parm, NULL);
+		if (cc != SIGP_CC_BUSY)
+			break;
+		if (retry >= 3)
+			udelay(10);
+	}
+	return cc;
+}
+
+static inline int pcpu_stopped(struct pcpu *pcpu)
+{
+	u32 status;
+
+	if (__pcpu_sigp(pcpu->address, SIGP_SENSE,
+			0, &status) != SIGP_CC_STATUS_STORED)
+		return 0;
+	return !!(status & (SIGP_STATUS_CHECK_STOP|SIGP_STATUS_STOPPED));
+}
+
+static inline int pcpu_running(struct pcpu *pcpu)
+{
+	if (__pcpu_sigp(pcpu->address, SIGP_SENSE_RUNNING,
+			0, NULL) != SIGP_CC_STATUS_STORED)
+		return 1;
+	/* Status stored condition code is equivalent to cpu not running. */
+	return 0;
+}
+
+/*
+ * Find struct pcpu by cpu address.
+ */
+static struct pcpu *pcpu_find_address(const struct cpumask *mask, u16 address)
+{
+	int cpu;
+
+	for_each_cpu(cpu, mask)
+		if (pcpu_devices[cpu].address == address)
+			return pcpu_devices + cpu;
+	return NULL;
+}
+
+static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit)
+{
+	int order;
+
+	if (test_and_set_bit(ec_bit, &pcpu->ec_mask))
+		return;
+	order = pcpu_running(pcpu) ? SIGP_EXTERNAL_CALL : SIGP_EMERGENCY_SIGNAL;
+	pcpu->ec_clk = get_tod_clock_fast();
+	pcpu_sigp_retry(pcpu, order, 0);
+}
+
+static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
+{
+	unsigned long async_stack, nodat_stack, mcck_stack;
+	struct lowcore *lc;
+
+	lc = (struct lowcore *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
+	nodat_stack = __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
+	async_stack = stack_alloc();
+	mcck_stack = stack_alloc();
+	if (!lc || !nodat_stack || !async_stack || !mcck_stack)
+		goto out;
+	memcpy(lc, &S390_lowcore, 512);
+	memset((char *) lc + 512, 0, sizeof(*lc) - 512);
+	lc->async_stack = async_stack + STACK_INIT_OFFSET;
+	lc->nodat_stack = nodat_stack + STACK_INIT_OFFSET;
+	lc->mcck_stack = mcck_stack + STACK_INIT_OFFSET;
+	lc->cpu_nr = cpu;
+	lc->spinlock_lockval = arch_spin_lockval(cpu);
+	lc->spinlock_index = 0;
+	lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
+	lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
+	lc->preempt_count = PREEMPT_DISABLED;
+	if (nmi_alloc_mcesa(&lc->mcesad))
+		goto out;
+	if (abs_lowcore_map(cpu, lc, true))
+		goto out_mcesa;
+	lowcore_ptr[cpu] = lc;
+	pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, __pa(lc));
+	return 0;
+
+out_mcesa:
+	nmi_free_mcesa(&lc->mcesad);
+out:
+	stack_free(mcck_stack);
+	stack_free(async_stack);
+	free_pages(nodat_stack, THREAD_SIZE_ORDER);
+	free_pages((unsigned long) lc, LC_ORDER);
+	return -ENOMEM;
+}
+
+static void pcpu_free_lowcore(struct pcpu *pcpu)
+{
+	unsigned long async_stack, nodat_stack, mcck_stack;
+	struct lowcore *lc;
+	int cpu;
+
+	cpu = pcpu - pcpu_devices;
+	lc = lowcore_ptr[cpu];
+	nodat_stack = lc->nodat_stack - STACK_INIT_OFFSET;
+	async_stack = lc->async_stack - STACK_INIT_OFFSET;
+	mcck_stack = lc->mcck_stack - STACK_INIT_OFFSET;
+	pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0);
+	lowcore_ptr[cpu] = NULL;
+	abs_lowcore_unmap(cpu);
+	nmi_free_mcesa(&lc->mcesad);
+	stack_free(async_stack);
+	stack_free(mcck_stack);
+	free_pages(nodat_stack, THREAD_SIZE_ORDER);
+	free_pages((unsigned long) lc, LC_ORDER);
+}
+
+static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
+{
+	struct lowcore *lc, *abs_lc;
+
+	lc = lowcore_ptr[cpu];
+	cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask);
+	cpumask_set_cpu(cpu, mm_cpumask(&init_mm));
+	lc->cpu_nr = cpu;
+	lc->restart_flags = RESTART_FLAG_CTLREGS;
+	lc->spinlock_lockval = arch_spin_lockval(cpu);
+	lc->spinlock_index = 0;
+	lc->percpu_offset = __per_cpu_offset[cpu];
+	lc->kernel_asce = S390_lowcore.kernel_asce;
+	lc->user_asce = s390_invalid_asce;
+	lc->machine_flags = S390_lowcore.machine_flags;
+	lc->user_timer = lc->system_timer =
+		lc->steal_timer = lc->avg_steal_timer = 0;
+	abs_lc = get_abs_lowcore();
+	memcpy(lc->cregs_save_area, abs_lc->cregs_save_area, sizeof(lc->cregs_save_area));
+	put_abs_lowcore(abs_lc);
+	lc->cregs_save_area[1] = lc->kernel_asce;
+	lc->cregs_save_area[7] = lc->user_asce;
+	save_access_regs((unsigned int *) lc->access_regs_save_area);
+	arch_spin_lock_setup(cpu);
+}
+
+static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk)
+{
+	struct lowcore *lc;
+	int cpu;
+
+	cpu = pcpu - pcpu_devices;
+	lc = lowcore_ptr[cpu];
+	lc->kernel_stack = (unsigned long)task_stack_page(tsk) + STACK_INIT_OFFSET;
+	lc->current_task = (unsigned long)tsk;
+	lc->lpp = LPP_MAGIC;
+	lc->current_pid = tsk->pid;
+	lc->user_timer = tsk->thread.user_timer;
+	lc->guest_timer = tsk->thread.guest_timer;
+	lc->system_timer = tsk->thread.system_timer;
+	lc->hardirq_timer = tsk->thread.hardirq_timer;
+	lc->softirq_timer = tsk->thread.softirq_timer;
+	lc->steal_timer = 0;
+}
+
+static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data)
+{
+	struct lowcore *lc;
+	int cpu;
+
+	cpu = pcpu - pcpu_devices;
+	lc = lowcore_ptr[cpu];
+	lc->restart_stack = lc->kernel_stack;
+	lc->restart_fn = (unsigned long) func;
+	lc->restart_data = (unsigned long) data;
+	lc->restart_source = -1U;
+	pcpu_sigp_retry(pcpu, SIGP_RESTART, 0);
+}
+
+typedef void (pcpu_delegate_fn)(void *);
+
+/*
+ * Call function via PSW restart on pcpu and stop the current cpu.
+ */
+static void __pcpu_delegate(pcpu_delegate_fn *func, void *data)
+{
+	func(data);	/* should not return */
+}
+
+static void pcpu_delegate(struct pcpu *pcpu,
+			  pcpu_delegate_fn *func,
+			  void *data, unsigned long stack)
+{
+	struct lowcore *lc, *abs_lc;
+	unsigned int source_cpu;
+
+	lc = lowcore_ptr[pcpu - pcpu_devices];
+	source_cpu = stap();
+
+	if (pcpu->address == source_cpu) {
+		call_on_stack(2, stack, void, __pcpu_delegate,
+			      pcpu_delegate_fn *, func, void *, data);
+	}
+	/* Stop target cpu (if func returns this stops the current cpu). */
+	pcpu_sigp_retry(pcpu, SIGP_STOP, 0);
+	pcpu_sigp_retry(pcpu, SIGP_CPU_RESET, 0);
+	/* Restart func on the target cpu and stop the current cpu. */
+	if (lc) {
+		lc->restart_stack = stack;
+		lc->restart_fn = (unsigned long)func;
+		lc->restart_data = (unsigned long)data;
+		lc->restart_source = source_cpu;
+	} else {
+		abs_lc = get_abs_lowcore();
+		abs_lc->restart_stack = stack;
+		abs_lc->restart_fn = (unsigned long)func;
+		abs_lc->restart_data = (unsigned long)data;
+		abs_lc->restart_source = source_cpu;
+		put_abs_lowcore(abs_lc);
+	}
+	asm volatile(
+		"0:	sigp	0,%0,%2	# sigp restart to target cpu\n"
+		"	brc	2,0b	# busy, try again\n"
+		"1:	sigp	0,%1,%3	# sigp stop to current cpu\n"
+		"	brc	2,1b	# busy, try again\n"
+		: : "d" (pcpu->address), "d" (source_cpu),
+		    "K" (SIGP_RESTART), "K" (SIGP_STOP)
+		: "0", "1", "cc");
+	for (;;) ;
+}
+
+/*
+ * Enable additional logical cpus for multi-threading.
+ */
+static int pcpu_set_smt(unsigned int mtid)
+{
+	int cc;
+
+	if (smp_cpu_mtid == mtid)
+		return 0;
+	cc = __pcpu_sigp(0, SIGP_SET_MULTI_THREADING, mtid, NULL);
+	if (cc == 0) {
+		smp_cpu_mtid = mtid;
+		smp_cpu_mt_shift = 0;
+		while (smp_cpu_mtid >= (1U << smp_cpu_mt_shift))
+			smp_cpu_mt_shift++;
+		pcpu_devices[0].address = stap();
+	}
+	return cc;
+}
+
+/*
+ * Call function on an online CPU.
+ */
+void smp_call_online_cpu(void (*func)(void *), void *data)
+{
+	struct pcpu *pcpu;
+
+	/* Use the current cpu if it is online. */
+	pcpu = pcpu_find_address(cpu_online_mask, stap());
+	if (!pcpu)
+		/* Use the first online cpu. */
+		pcpu = pcpu_devices + cpumask_first(cpu_online_mask);
+	pcpu_delegate(pcpu, func, data, (unsigned long) restart_stack);
+}
+
+/*
+ * Call function on the ipl CPU.
+ */
+void smp_call_ipl_cpu(void (*func)(void *), void *data)
+{
+	struct lowcore *lc = lowcore_ptr[0];
+
+	if (pcpu_devices[0].address == stap())
+		lc = &S390_lowcore;
+
+	pcpu_delegate(&pcpu_devices[0], func, data,
+		      lc->nodat_stack);
+}
+
+int smp_find_processor_id(u16 address)
+{
+	int cpu;
+
+	for_each_present_cpu(cpu)
+		if (pcpu_devices[cpu].address == address)
+			return cpu;
+	return -1;
+}
+
+void schedule_mcck_handler(void)
+{
+	pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_mcck_pending);
+}
+
+bool notrace arch_vcpu_is_preempted(int cpu)
+{
+	if (test_cpu_flag_of(CIF_ENABLED_WAIT, cpu))
+		return false;
+	if (pcpu_running(pcpu_devices + cpu))
+		return false;
+	return true;
+}
+EXPORT_SYMBOL(arch_vcpu_is_preempted);
+
+void notrace smp_yield_cpu(int cpu)
+{
+	if (!MACHINE_HAS_DIAG9C)
+		return;
+	diag_stat_inc_norecursion(DIAG_STAT_X09C);
+	asm volatile("diag %0,0,0x9c"
+		     : : "d" (pcpu_devices[cpu].address));
+}
+EXPORT_SYMBOL_GPL(smp_yield_cpu);
+
+/*
+ * Send cpus emergency shutdown signal. This gives the cpus the
+ * opportunity to complete outstanding interrupts.
+ */
+void notrace smp_emergency_stop(void)
+{
+	static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED;
+	static cpumask_t cpumask;
+	u64 end;
+	int cpu;
+
+	arch_spin_lock(&lock);
+	cpumask_copy(&cpumask, cpu_online_mask);
+	cpumask_clear_cpu(smp_processor_id(), &cpumask);
+
+	end = get_tod_clock() + (1000000UL << 12);
+	for_each_cpu(cpu, &cpumask) {
+		struct pcpu *pcpu = pcpu_devices + cpu;
+		set_bit(ec_stop_cpu, &pcpu->ec_mask);
+		while (__pcpu_sigp(pcpu->address, SIGP_EMERGENCY_SIGNAL,
+				   0, NULL) == SIGP_CC_BUSY &&
+		       get_tod_clock() < end)
+			cpu_relax();
+	}
+	while (get_tod_clock() < end) {
+		for_each_cpu(cpu, &cpumask)
+			if (pcpu_stopped(pcpu_devices + cpu))
+				cpumask_clear_cpu(cpu, &cpumask);
+		if (cpumask_empty(&cpumask))
+			break;
+		cpu_relax();
+	}
+	arch_spin_unlock(&lock);
+}
+NOKPROBE_SYMBOL(smp_emergency_stop);
+
+/*
+ * Stop all cpus but the current one.
+ */
+void smp_send_stop(void)
+{
+	int cpu;
+
+	/* Disable all interrupts/machine checks */
+	__load_psw_mask(PSW_KERNEL_BITS);
+	trace_hardirqs_off();
+
+	debug_set_critical();
+
+	if (oops_in_progress)
+		smp_emergency_stop();
+
+	/* stop all processors */
+	for_each_online_cpu(cpu) {
+		if (cpu == smp_processor_id())
+			continue;
+		pcpu_sigp_retry(pcpu_devices + cpu, SIGP_STOP, 0);
+		while (!pcpu_stopped(pcpu_devices + cpu))
+			cpu_relax();
+	}
+}
+
+/*
+ * This is the main routine where commands issued by other
+ * cpus are handled.
+ */
+static void smp_handle_ext_call(void)
+{
+	unsigned long bits;
+
+	/* handle bit signal external calls */
+	bits = xchg(&pcpu_devices[smp_processor_id()].ec_mask, 0);
+	if (test_bit(ec_stop_cpu, &bits))
+		smp_stop_cpu();
+	if (test_bit(ec_schedule, &bits))
+		scheduler_ipi();
+	if (test_bit(ec_call_function_single, &bits))
+		generic_smp_call_function_single_interrupt();
+	if (test_bit(ec_mcck_pending, &bits))
+		s390_handle_mcck();
+	if (test_bit(ec_irq_work, &bits))
+		irq_work_run();
+}
+
+static void do_ext_call_interrupt(struct ext_code ext_code,
+				  unsigned int param32, unsigned long param64)
+{
+	inc_irq_stat(ext_code.code == 0x1202 ? IRQEXT_EXC : IRQEXT_EMS);
+	smp_handle_ext_call();
+}
+
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
+{
+	int cpu;
+
+	for_each_cpu(cpu, mask)
+		pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single);
+}
+
+void arch_send_call_function_single_ipi(int cpu)
+{
+	pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single);
+}
+
+/*
+ * this function sends a 'reschedule' IPI to another CPU.
+ * it goes straight through and wastes no time serializing
+ * anything. Worst case is that we lose a reschedule ...
+ */
+void arch_smp_send_reschedule(int cpu)
+{
+	pcpu_ec_call(pcpu_devices + cpu, ec_schedule);
+}
+
+#ifdef CONFIG_IRQ_WORK
+void arch_irq_work_raise(void)
+{
+	pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_irq_work);
+}
+#endif
+
+/*
+ * parameter area for the set/clear control bit callbacks
+ */
+struct ec_creg_mask_parms {
+	unsigned long orval;
+	unsigned long andval;
+	int cr;
+};
+
+/*
+ * callback for setting/clearing control bits
+ */
+static void smp_ctl_bit_callback(void *info)
+{
+	struct ec_creg_mask_parms *pp = info;
+	unsigned long cregs[16];
+
+	__ctl_store(cregs, 0, 15);
+	cregs[pp->cr] = (cregs[pp->cr] & pp->andval) | pp->orval;
+	__ctl_load(cregs, 0, 15);
+}
+
+static DEFINE_SPINLOCK(ctl_lock);
+
+void smp_ctl_set_clear_bit(int cr, int bit, bool set)
+{
+	struct ec_creg_mask_parms parms = { .cr = cr, };
+	struct lowcore *abs_lc;
+	u64 ctlreg;
+
+	if (set) {
+		parms.orval = 1UL << bit;
+		parms.andval = -1UL;
+	} else {
+		parms.orval = 0;
+		parms.andval = ~(1UL << bit);
+	}
+	spin_lock(&ctl_lock);
+	abs_lc = get_abs_lowcore();
+	ctlreg = abs_lc->cregs_save_area[cr];
+	ctlreg = (ctlreg & parms.andval) | parms.orval;
+	abs_lc->cregs_save_area[cr] = ctlreg;
+	put_abs_lowcore(abs_lc);
+	on_each_cpu(smp_ctl_bit_callback, &parms, 1);
+	spin_unlock(&ctl_lock);
+}
+EXPORT_SYMBOL(smp_ctl_set_clear_bit);
+
+#ifdef CONFIG_CRASH_DUMP
+
+int smp_store_status(int cpu)
+{
+	struct lowcore *lc;
+	struct pcpu *pcpu;
+	unsigned long pa;
+
+	pcpu = pcpu_devices + cpu;
+	lc = lowcore_ptr[cpu];
+	pa = __pa(&lc->floating_pt_save_area);
+	if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS,
+			      pa) != SIGP_CC_ORDER_CODE_ACCEPTED)
+		return -EIO;
+	if (!MACHINE_HAS_VX && !MACHINE_HAS_GS)
+		return 0;
+	pa = lc->mcesad & MCESA_ORIGIN_MASK;
+	if (MACHINE_HAS_GS)
+		pa |= lc->mcesad & MCESA_LC_MASK;
+	if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS,
+			      pa) != SIGP_CC_ORDER_CODE_ACCEPTED)
+		return -EIO;
+	return 0;
+}
+
+/*
+ * Collect CPU state of the previous, crashed system.
+ * There are four cases:
+ * 1) standard zfcp/nvme dump
+ *    condition: OLDMEM_BASE == NULL && is_ipl_type_dump() == true
+ *    The state for all CPUs except the boot CPU needs to be collected
+ *    with sigp stop-and-store-status. The boot CPU state is located in
+ *    the absolute lowcore of the memory stored in the HSA. The zcore code
+ *    will copy the boot CPU state from the HSA.
+ * 2) stand-alone kdump for SCSI/NVMe (zfcp/nvme dump with swapped memory)
+ *    condition: OLDMEM_BASE != NULL && is_ipl_type_dump() == true
+ *    The state for all CPUs except the boot CPU needs to be collected
+ *    with sigp stop-and-store-status. The firmware or the boot-loader
+ *    stored the registers of the boot CPU in the absolute lowcore in the
+ *    memory of the old system.
+ * 3) kdump and the old kernel did not store the CPU state,
+ *    or stand-alone kdump for DASD
+ *    condition: OLDMEM_BASE != NULL && !is_kdump_kernel()
+ *    The state for all CPUs except the boot CPU needs to be collected
+ *    with sigp stop-and-store-status. The kexec code or the boot-loader
+ *    stored the registers of the boot CPU in the memory of the old system.
+ * 4) kdump and the old kernel stored the CPU state
+ *    condition: OLDMEM_BASE != NULL && is_kdump_kernel()
+ *    This case does not exist for s390 anymore, setup_arch explicitly
+ *    deactivates the elfcorehdr= kernel parameter
+ */
+static bool dump_available(void)
+{
+	return oldmem_data.start || is_ipl_type_dump();
+}
+
+void __init smp_save_dump_ipl_cpu(void)
+{
+	struct save_area *sa;
+	void *regs;
+
+	if (!dump_available())
+		return;
+	sa = save_area_alloc(true);
+	regs = memblock_alloc(512, 8);
+	if (!sa || !regs)
+		panic("could not allocate memory for boot CPU save area\n");
+	copy_oldmem_kernel(regs, __LC_FPREGS_SAVE_AREA, 512);
+	save_area_add_regs(sa, regs);
+	memblock_free(regs, 512);
+	if (MACHINE_HAS_VX)
+		save_area_add_vxrs(sa, boot_cpu_vector_save_area);
+}
+
+void __init smp_save_dump_secondary_cpus(void)
+{
+	int addr, boot_cpu_addr, max_cpu_addr;
+	struct save_area *sa;
+	void *page;
+
+	if (!dump_available())
+		return;
+	/* Allocate a page as dumping area for the store status sigps */
+	page = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
+	if (!page)
+		panic("ERROR: Failed to allocate %lx bytes below %lx\n",
+		      PAGE_SIZE, 1UL << 31);
+
+	/* Set multi-threading state to the previous system. */
+	pcpu_set_smt(sclp.mtid_prev);
+	boot_cpu_addr = stap();
+	max_cpu_addr = SCLP_MAX_CORES << sclp.mtid_prev;
+	for (addr = 0; addr <= max_cpu_addr; addr++) {
+		if (addr == boot_cpu_addr)
+			continue;
+		if (__pcpu_sigp_relax(addr, SIGP_SENSE, 0) ==
+		    SIGP_CC_NOT_OPERATIONAL)
+			continue;
+		sa = save_area_alloc(false);
+		if (!sa)
+			panic("could not allocate memory for save area\n");
+		__pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, __pa(page));
+		save_area_add_regs(sa, page);
+		if (MACHINE_HAS_VX) {
+			__pcpu_sigp_relax(addr, SIGP_STORE_ADDITIONAL_STATUS, __pa(page));
+			save_area_add_vxrs(sa, page);
+		}
+	}
+	memblock_free(page, PAGE_SIZE);
+	diag_amode31_ops.diag308_reset();
+	pcpu_set_smt(0);
+}
+#endif /* CONFIG_CRASH_DUMP */
+
+void smp_cpu_set_polarization(int cpu, int val)
+{
+	pcpu_devices[cpu].polarization = val;
+}
+
+int smp_cpu_get_polarization(int cpu)
+{
+	return pcpu_devices[cpu].polarization;
+}
+
+int smp_cpu_get_cpu_address(int cpu)
+{
+	return pcpu_devices[cpu].address;
+}
+
+static void __ref smp_get_core_info(struct sclp_core_info *info, int early)
+{
+	static int use_sigp_detection;
+	int address;
+
+	if (use_sigp_detection || sclp_get_core_info(info, early)) {
+		use_sigp_detection = 1;
+		for (address = 0;
+		     address < (SCLP_MAX_CORES << smp_cpu_mt_shift);
+		     address += (1U << smp_cpu_mt_shift)) {
+			if (__pcpu_sigp_relax(address, SIGP_SENSE, 0) ==
+			    SIGP_CC_NOT_OPERATIONAL)
+				continue;
+			info->core[info->configured].core_id =
+				address >> smp_cpu_mt_shift;
+			info->configured++;
+		}
+		info->combined = info->configured;
+	}
+}
+
+static int smp_add_present_cpu(int cpu);
+
+static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail,
+			bool configured, bool early)
+{
+	struct pcpu *pcpu;
+	int cpu, nr, i;
+	u16 address;
+
+	nr = 0;
+	if (sclp.has_core_type && core->type != boot_core_type)
+		return nr;
+	cpu = cpumask_first(avail);
+	address = core->core_id << smp_cpu_mt_shift;
+	for (i = 0; (i <= smp_cpu_mtid) && (cpu < nr_cpu_ids); i++) {
+		if (pcpu_find_address(cpu_present_mask, address + i))
+			continue;
+		pcpu = pcpu_devices + cpu;
+		pcpu->address = address + i;
+		if (configured)
+			pcpu->state = CPU_STATE_CONFIGURED;
+		else
+			pcpu->state = CPU_STATE_STANDBY;
+		smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+		set_cpu_present(cpu, true);
+		if (!early && smp_add_present_cpu(cpu) != 0)
+			set_cpu_present(cpu, false);
+		else
+			nr++;
+		cpumask_clear_cpu(cpu, avail);
+		cpu = cpumask_next(cpu, avail);
+	}
+	return nr;
+}
+
+static int __smp_rescan_cpus(struct sclp_core_info *info, bool early)
+{
+	struct sclp_core_entry *core;
+	static cpumask_t avail;
+	bool configured;
+	u16 core_id;
+	int nr, i;
+
+	cpus_read_lock();
+	mutex_lock(&smp_cpu_state_mutex);
+	nr = 0;
+	cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask);
+	/*
+	 * Add IPL core first (which got logical CPU number 0) to make sure
+	 * that all SMT threads get subsequent logical CPU numbers.
+	 */
+	if (early) {
+		core_id = pcpu_devices[0].address >> smp_cpu_mt_shift;
+		for (i = 0; i < info->configured; i++) {
+			core = &info->core[i];
+			if (core->core_id == core_id) {
+				nr += smp_add_core(core, &avail, true, early);
+				break;
+			}
+		}
+	}
+	for (i = 0; i < info->combined; i++) {
+		configured = i < info->configured;
+		nr += smp_add_core(&info->core[i], &avail, configured, early);
+	}
+	mutex_unlock(&smp_cpu_state_mutex);
+	cpus_read_unlock();
+	return nr;
+}
+
+void __init smp_detect_cpus(void)
+{
+	unsigned int cpu, mtid, c_cpus, s_cpus;
+	struct sclp_core_info *info;
+	u16 address;
+
+	/* Get CPU information */
+	info = memblock_alloc(sizeof(*info), 8);
+	if (!info)
+		panic("%s: Failed to allocate %zu bytes align=0x%x\n",
+		      __func__, sizeof(*info), 8);
+	smp_get_core_info(info, 1);
+	/* Find boot CPU type */
+	if (sclp.has_core_type) {
+		address = stap();
+		for (cpu = 0; cpu < info->combined; cpu++)
+			if (info->core[cpu].core_id == address) {
+				/* The boot cpu dictates the cpu type. */
+				boot_core_type = info->core[cpu].type;
+				break;
+			}
+		if (cpu >= info->combined)
+			panic("Could not find boot CPU type");
+	}
+
+	/* Set multi-threading state for the current system */
+	mtid = boot_core_type ? sclp.mtid : sclp.mtid_cp;
+	mtid = (mtid < smp_max_threads) ? mtid : smp_max_threads - 1;
+	pcpu_set_smt(mtid);
+
+	/* Print number of CPUs */
+	c_cpus = s_cpus = 0;
+	for (cpu = 0; cpu < info->combined; cpu++) {
+		if (sclp.has_core_type &&
+		    info->core[cpu].type != boot_core_type)
+			continue;
+		if (cpu < info->configured)
+			c_cpus += smp_cpu_mtid + 1;
+		else
+			s_cpus += smp_cpu_mtid + 1;
+	}
+	pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus);
+
+	/* Add CPUs present at boot */
+	__smp_rescan_cpus(info, true);
+	memblock_free(info, sizeof(*info));
+}
+
+/*
+ *	Activate a secondary processor.
+ */
+static void smp_start_secondary(void *cpuvoid)
+{
+	int cpu = raw_smp_processor_id();
+
+	S390_lowcore.last_update_clock = get_tod_clock();
+	S390_lowcore.restart_stack = (unsigned long)restart_stack;
+	S390_lowcore.restart_fn = (unsigned long)do_restart;
+	S390_lowcore.restart_data = 0;
+	S390_lowcore.restart_source = -1U;
+	S390_lowcore.restart_flags = 0;
+	restore_access_regs(S390_lowcore.access_regs_save_area);
+	cpu_init();
+	rcu_cpu_starting(cpu);
+	init_cpu_timer();
+	vtime_init();
+	vdso_getcpu_init();
+	pfault_init();
+	cpumask_set_cpu(cpu, &cpu_setup_mask);
+	update_cpu_masks();
+	notify_cpu_starting(cpu);
+	if (topology_cpu_dedicated(cpu))
+		set_cpu_flag(CIF_DEDICATED_CPU);
+	else
+		clear_cpu_flag(CIF_DEDICATED_CPU);
+	set_cpu_online(cpu, true);
+	inc_irq_stat(CPU_RST);
+	local_irq_enable();
+	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
+}
+
+/* Upping and downing of CPUs */
+int __cpu_up(unsigned int cpu, struct task_struct *tidle)
+{
+	struct pcpu *pcpu = pcpu_devices + cpu;
+	int rc;
+
+	if (pcpu->state != CPU_STATE_CONFIGURED)
+		return -EIO;
+	if (pcpu_sigp_retry(pcpu, SIGP_INITIAL_CPU_RESET, 0) !=
+	    SIGP_CC_ORDER_CODE_ACCEPTED)
+		return -EIO;
+
+	rc = pcpu_alloc_lowcore(pcpu, cpu);
+	if (rc)
+		return rc;
+	/*
+	 * Make sure global control register contents do not change
+	 * until new CPU has initialized control registers.
+	 */
+	spin_lock(&ctl_lock);
+	pcpu_prepare_secondary(pcpu, cpu);
+	pcpu_attach_task(pcpu, tidle);
+	pcpu_start_fn(pcpu, smp_start_secondary, NULL);
+	/* Wait until cpu puts itself in the online & active maps */
+	while (!cpu_online(cpu))
+		cpu_relax();
+	spin_unlock(&ctl_lock);
+	return 0;
+}
+
+static unsigned int setup_possible_cpus __initdata;
+
+static int __init _setup_possible_cpus(char *s)
+{
+	get_option(&s, &setup_possible_cpus);
+	return 0;
+}
+early_param("possible_cpus", _setup_possible_cpus);
+
+int __cpu_disable(void)
+{
+	unsigned long cregs[16];
+	int cpu;
+
+	/* Handle possible pending IPIs */
+	smp_handle_ext_call();
+	cpu = smp_processor_id();
+	set_cpu_online(cpu, false);
+	cpumask_clear_cpu(cpu, &cpu_setup_mask);
+	update_cpu_masks();
+	/* Disable pseudo page faults on this cpu. */
+	pfault_fini();
+	/* Disable interrupt sources via control register. */
+	__ctl_store(cregs, 0, 15);
+	cregs[0]  &= ~0x0000ee70UL;	/* disable all external interrupts */
+	cregs[6]  &= ~0xff000000UL;	/* disable all I/O interrupts */
+	cregs[14] &= ~0x1f000000UL;	/* disable most machine checks */
+	__ctl_load(cregs, 0, 15);
+	clear_cpu_flag(CIF_NOHZ_DELAY);
+	return 0;
+}
+
+void __cpu_die(unsigned int cpu)
+{
+	struct pcpu *pcpu;
+
+	/* Wait until target cpu is down */
+	pcpu = pcpu_devices + cpu;
+	while (!pcpu_stopped(pcpu))
+		cpu_relax();
+	pcpu_free_lowcore(pcpu);
+	cpumask_clear_cpu(cpu, mm_cpumask(&init_mm));
+	cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask);
+}
+
+void __noreturn cpu_die(void)
+{
+	idle_task_exit();
+	pcpu_sigp_retry(pcpu_devices + smp_processor_id(), SIGP_STOP, 0);
+	for (;;) ;
+}
+
+void __init smp_fill_possible_mask(void)
+{
+	unsigned int possible, sclp_max, cpu;
+
+	sclp_max = max(sclp.mtid, sclp.mtid_cp) + 1;
+	sclp_max = min(smp_max_threads, sclp_max);
+	sclp_max = (sclp.max_cores * sclp_max) ?: nr_cpu_ids;
+	possible = setup_possible_cpus ?: nr_cpu_ids;
+	possible = min(possible, sclp_max);
+	for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++)
+		set_cpu_possible(cpu, true);
+}
+
+void __init smp_prepare_cpus(unsigned int max_cpus)
+{
+	/* request the 0x1201 emergency signal external interrupt */
+	if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt))
+		panic("Couldn't request external interrupt 0x1201");
+	/* request the 0x1202 external call external interrupt */
+	if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt))
+		panic("Couldn't request external interrupt 0x1202");
+}
+
+void __init smp_prepare_boot_cpu(void)
+{
+	struct pcpu *pcpu = pcpu_devices;
+
+	WARN_ON(!cpu_present(0) || !cpu_online(0));
+	pcpu->state = CPU_STATE_CONFIGURED;
+	S390_lowcore.percpu_offset = __per_cpu_offset[0];
+	smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN);
+}
+
+void __init smp_setup_processor_id(void)
+{
+	pcpu_devices[0].address = stap();
+	S390_lowcore.cpu_nr = 0;
+	S390_lowcore.spinlock_lockval = arch_spin_lockval(0);
+	S390_lowcore.spinlock_index = 0;
+}
+
+/*
+ * the frequency of the profiling timer can be changed
+ * by writing a multiplier value into /proc/profile.
+ *
+ * usually you want to run this on all CPUs ;)
+ */
+int setup_profiling_timer(unsigned int multiplier)
+{
+	return 0;
+}
+
+static ssize_t cpu_configure_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	ssize_t count;
+
+	mutex_lock(&smp_cpu_state_mutex);
+	count = sprintf(buf, "%d\n", pcpu_devices[dev->id].state);
+	mutex_unlock(&smp_cpu_state_mutex);
+	return count;
+}
+
+static ssize_t cpu_configure_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t count)
+{
+	struct pcpu *pcpu;
+	int cpu, val, rc, i;
+	char delim;
+
+	if (sscanf(buf, "%d %c", &val, &delim) != 1)
+		return -EINVAL;
+	if (val != 0 && val != 1)
+		return -EINVAL;
+	cpus_read_lock();
+	mutex_lock(&smp_cpu_state_mutex);
+	rc = -EBUSY;
+	/* disallow configuration changes of online cpus and cpu 0 */
+	cpu = dev->id;
+	cpu = smp_get_base_cpu(cpu);
+	if (cpu == 0)
+		goto out;
+	for (i = 0; i <= smp_cpu_mtid; i++)
+		if (cpu_online(cpu + i))
+			goto out;
+	pcpu = pcpu_devices + cpu;
+	rc = 0;
+	switch (val) {
+	case 0:
+		if (pcpu->state != CPU_STATE_CONFIGURED)
+			break;
+		rc = sclp_core_deconfigure(pcpu->address >> smp_cpu_mt_shift);
+		if (rc)
+			break;
+		for (i = 0; i <= smp_cpu_mtid; i++) {
+			if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i))
+				continue;
+			pcpu[i].state = CPU_STATE_STANDBY;
+			smp_cpu_set_polarization(cpu + i,
+						 POLARIZATION_UNKNOWN);
+		}
+		topology_expect_change();
+		break;
+	case 1:
+		if (pcpu->state != CPU_STATE_STANDBY)
+			break;
+		rc = sclp_core_configure(pcpu->address >> smp_cpu_mt_shift);
+		if (rc)
+			break;
+		for (i = 0; i <= smp_cpu_mtid; i++) {
+			if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i))
+				continue;
+			pcpu[i].state = CPU_STATE_CONFIGURED;
+			smp_cpu_set_polarization(cpu + i,
+						 POLARIZATION_UNKNOWN);
+		}
+		topology_expect_change();
+		break;
+	default:
+		break;
+	}
+out:
+	mutex_unlock(&smp_cpu_state_mutex);
+	cpus_read_unlock();
+	return rc ? rc : count;
+}
+static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store);
+
+static ssize_t show_cpu_address(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", pcpu_devices[dev->id].address);
+}
+static DEVICE_ATTR(address, 0444, show_cpu_address, NULL);
+
+static struct attribute *cpu_common_attrs[] = {
+	&dev_attr_configure.attr,
+	&dev_attr_address.attr,
+	NULL,
+};
+
+static struct attribute_group cpu_common_attr_group = {
+	.attrs = cpu_common_attrs,
+};
+
+static struct attribute *cpu_online_attrs[] = {
+	&dev_attr_idle_count.attr,
+	&dev_attr_idle_time_us.attr,
+	NULL,
+};
+
+static struct attribute_group cpu_online_attr_group = {
+	.attrs = cpu_online_attrs,
+};
+
+static int smp_cpu_online(unsigned int cpu)
+{
+	struct device *s = &per_cpu(cpu_device, cpu)->dev;
+
+	return sysfs_create_group(&s->kobj, &cpu_online_attr_group);
+}
+
+static int smp_cpu_pre_down(unsigned int cpu)
+{
+	struct device *s = &per_cpu(cpu_device, cpu)->dev;
+
+	sysfs_remove_group(&s->kobj, &cpu_online_attr_group);
+	return 0;
+}
+
+static int smp_add_present_cpu(int cpu)
+{
+	struct device *s;
+	struct cpu *c;
+	int rc;
+
+	c = kzalloc(sizeof(*c), GFP_KERNEL);
+	if (!c)
+		return -ENOMEM;
+	per_cpu(cpu_device, cpu) = c;
+	s = &c->dev;
+	c->hotpluggable = 1;
+	rc = register_cpu(c, cpu);
+	if (rc)
+		goto out;
+	rc = sysfs_create_group(&s->kobj, &cpu_common_attr_group);
+	if (rc)
+		goto out_cpu;
+	rc = topology_cpu_init(c);
+	if (rc)
+		goto out_topology;
+	return 0;
+
+out_topology:
+	sysfs_remove_group(&s->kobj, &cpu_common_attr_group);
+out_cpu:
+	unregister_cpu(c);
+out:
+	return rc;
+}
+
+int __ref smp_rescan_cpus(void)
+{
+	struct sclp_core_info *info;
+	int nr;
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+	smp_get_core_info(info, 0);
+	nr = __smp_rescan_cpus(info, false);
+	kfree(info);
+	if (nr)
+		topology_schedule_update();
+	return 0;
+}
+
+static ssize_t __ref rescan_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf,
+				  size_t count)
+{
+	int rc;
+
+	rc = lock_device_hotplug_sysfs();
+	if (rc)
+		return rc;
+	rc = smp_rescan_cpus();
+	unlock_device_hotplug();
+	return rc ? rc : count;
+}
+static DEVICE_ATTR_WO(rescan);
+
+static int __init s390_smp_init(void)
+{
+	struct device *dev_root;
+	int cpu, rc = 0;
+
+	dev_root = bus_get_dev_root(&cpu_subsys);
+	if (dev_root) {
+		rc = device_create_file(dev_root, &dev_attr_rescan);
+		put_device(dev_root);
+		if (rc)
+			return rc;
+	}
+
+	for_each_present_cpu(cpu) {
+		rc = smp_add_present_cpu(cpu);
+		if (rc)
+			goto out;
+	}
+
+	rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "s390/smp:online",
+			       smp_cpu_online, smp_cpu_pre_down);
+	rc = rc <= 0 ? rc : 0;
+out:
+	return rc;
+}
+subsys_initcall(s390_smp_init);
+
+static __always_inline void set_new_lowcore(struct lowcore *lc)
+{
+	union register_pair dst, src;
+	u32 pfx;
+
+	src.even = (unsigned long) &S390_lowcore;
+	src.odd  = sizeof(S390_lowcore);
+	dst.even = (unsigned long) lc;
+	dst.odd  = sizeof(*lc);
+	pfx = __pa(lc);
+
+	asm volatile(
+		"	mvcl	%[dst],%[src]\n"
+		"	spx	%[pfx]\n"
+		: [dst] "+&d" (dst.pair), [src] "+&d" (src.pair)
+		: [pfx] "Q" (pfx)
+		: "memory", "cc");
+}
+
+int __init smp_reinit_ipl_cpu(void)
+{
+	unsigned long async_stack, nodat_stack, mcck_stack;
+	struct lowcore *lc, *lc_ipl;
+	unsigned long flags, cr0;
+	u64 mcesad;
+
+	lc_ipl = lowcore_ptr[0];
+	lc = (struct lowcore *)	__get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
+	nodat_stack = __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
+	async_stack = stack_alloc();
+	mcck_stack = stack_alloc();
+	if (!lc || !nodat_stack || !async_stack || !mcck_stack || nmi_alloc_mcesa(&mcesad))
+		panic("Couldn't allocate memory");
+
+	local_irq_save(flags);
+	local_mcck_disable();
+	set_new_lowcore(lc);
+	S390_lowcore.nodat_stack = nodat_stack + STACK_INIT_OFFSET;
+	S390_lowcore.async_stack = async_stack + STACK_INIT_OFFSET;
+	S390_lowcore.mcck_stack = mcck_stack + STACK_INIT_OFFSET;
+	__ctl_store(cr0, 0, 0);
+	__ctl_clear_bit(0, 28); /* disable lowcore protection */
+	S390_lowcore.mcesad = mcesad;
+	__ctl_load(cr0, 0, 0);
+	if (abs_lowcore_map(0, lc, false))
+		panic("Couldn't remap absolute lowcore");
+	lowcore_ptr[0] = lc;
+	local_mcck_enable();
+	local_irq_restore(flags);
+
+	memblock_free_late(__pa(lc_ipl->mcck_stack - STACK_INIT_OFFSET), THREAD_SIZE);
+	memblock_free_late(__pa(lc_ipl->async_stack - STACK_INIT_OFFSET), THREAD_SIZE);
+	memblock_free_late(__pa(lc_ipl->nodat_stack - STACK_INIT_OFFSET), THREAD_SIZE);
+	memblock_free_late(__pa(lc_ipl), sizeof(*lc_ipl));
+	return 0;
+}
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
new file mode 100644
index 0000000000..0787010139
--- /dev/null
+++ b/arch/s390/kernel/stacktrace.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Stack trace management functions
+ *
+ *  Copyright IBM Corp. 2006
+ */
+
+#include <linux/stacktrace.h>
+#include <asm/stacktrace.h>
+#include <asm/unwind.h>
+#include <asm/kprobes.h>
+
+void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
+		     struct task_struct *task, struct pt_regs *regs)
+{
+	struct unwind_state state;
+	unsigned long addr;
+
+	unwind_for_each_frame(&state, task, regs, 0) {
+		addr = unwind_get_return_address(&state);
+		if (!addr || !consume_entry(cookie, addr))
+			break;
+	}
+}
+
+int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
+			     void *cookie, struct task_struct *task)
+{
+	struct unwind_state state;
+	unsigned long addr;
+
+	unwind_for_each_frame(&state, task, NULL, 0) {
+		if (state.stack_info.type != STACK_TYPE_TASK)
+			return -EINVAL;
+
+		if (state.regs)
+			return -EINVAL;
+
+		addr = unwind_get_return_address(&state);
+		if (!addr)
+			return -EINVAL;
+
+#ifdef CONFIG_RETHOOK
+		/*
+		 * Mark stacktraces with krethook functions on them
+		 * as unreliable.
+		 */
+		if (state.ip == (unsigned long)arch_rethook_trampoline)
+			return -EINVAL;
+#endif
+
+		if (!consume_entry(cookie, addr))
+			return -EINVAL;
+	}
+
+	/* Check for stack corruption */
+	if (unwind_error(&state))
+		return -EINVAL;
+	return 0;
+}
diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c
new file mode 100644
index 0000000000..30bb20461d
--- /dev/null
+++ b/arch/s390/kernel/sthyi.c
@@ -0,0 +1,517 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * store hypervisor information instruction emulation functions.
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Janosch Frank <frankja@linux.vnet.ibm.com>
+ */
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+#include <linux/vmalloc.h>
+#include <linux/syscalls.h>
+#include <linux/mutex.h>
+#include <asm/asm-offsets.h>
+#include <asm/sclp.h>
+#include <asm/diag.h>
+#include <asm/sysinfo.h>
+#include <asm/ebcdic.h>
+#include <asm/facility.h>
+#include <asm/sthyi.h>
+#include "entry.h"
+
+#define DED_WEIGHT 0xffff
+/*
+ * CP and IFL as EBCDIC strings, SP/0x40 determines the end of string
+ * as they are justified with spaces.
+ */
+#define CP  0xc3d7404040404040UL
+#define IFL 0xc9c6d34040404040UL
+
+enum hdr_flags {
+	HDR_NOT_LPAR   = 0x10,
+	HDR_STACK_INCM = 0x20,
+	HDR_STSI_UNAV  = 0x40,
+	HDR_PERF_UNAV  = 0x80,
+};
+
+enum mac_validity {
+	MAC_NAME_VLD = 0x20,
+	MAC_ID_VLD   = 0x40,
+	MAC_CNT_VLD  = 0x80,
+};
+
+enum par_flag {
+	PAR_MT_EN = 0x80,
+};
+
+enum par_validity {
+	PAR_GRP_VLD  = 0x08,
+	PAR_ID_VLD   = 0x10,
+	PAR_ABS_VLD  = 0x20,
+	PAR_WGHT_VLD = 0x40,
+	PAR_PCNT_VLD  = 0x80,
+};
+
+struct hdr_sctn {
+	u8 infhflg1;
+	u8 infhflg2; /* reserved */
+	u8 infhval1; /* reserved */
+	u8 infhval2; /* reserved */
+	u8 reserved[3];
+	u8 infhygct;
+	u16 infhtotl;
+	u16 infhdln;
+	u16 infmoff;
+	u16 infmlen;
+	u16 infpoff;
+	u16 infplen;
+	u16 infhoff1;
+	u16 infhlen1;
+	u16 infgoff1;
+	u16 infglen1;
+	u16 infhoff2;
+	u16 infhlen2;
+	u16 infgoff2;
+	u16 infglen2;
+	u16 infhoff3;
+	u16 infhlen3;
+	u16 infgoff3;
+	u16 infglen3;
+	u8 reserved2[4];
+} __packed;
+
+struct mac_sctn {
+	u8 infmflg1; /* reserved */
+	u8 infmflg2; /* reserved */
+	u8 infmval1;
+	u8 infmval2; /* reserved */
+	u16 infmscps;
+	u16 infmdcps;
+	u16 infmsifl;
+	u16 infmdifl;
+	char infmname[8];
+	char infmtype[4];
+	char infmmanu[16];
+	char infmseq[16];
+	char infmpman[4];
+	u8 reserved[4];
+} __packed;
+
+struct par_sctn {
+	u8 infpflg1;
+	u8 infpflg2; /* reserved */
+	u8 infpval1;
+	u8 infpval2; /* reserved */
+	u16 infppnum;
+	u16 infpscps;
+	u16 infpdcps;
+	u16 infpsifl;
+	u16 infpdifl;
+	u16 reserved;
+	char infppnam[8];
+	u32 infpwbcp;
+	u32 infpabcp;
+	u32 infpwbif;
+	u32 infpabif;
+	char infplgnm[8];
+	u32 infplgcp;
+	u32 infplgif;
+} __packed;
+
+struct sthyi_sctns {
+	struct hdr_sctn hdr;
+	struct mac_sctn mac;
+	struct par_sctn par;
+} __packed;
+
+struct cpu_inf {
+	u64 lpar_cap;
+	u64 lpar_grp_cap;
+	u64 lpar_weight;
+	u64 all_weight;
+	int cpu_num_ded;
+	int cpu_num_shd;
+};
+
+struct lpar_cpu_inf {
+	struct cpu_inf cp;
+	struct cpu_inf ifl;
+};
+
+/*
+ * STHYI requires extensive locking in the higher hypervisors
+ * and is very computational/memory expensive. Therefore we
+ * cache the retrieved data whose valid period is 1s.
+ */
+#define CACHE_VALID_JIFFIES	HZ
+
+struct sthyi_info {
+	void *info;
+	unsigned long end;
+};
+
+static DEFINE_MUTEX(sthyi_mutex);
+static struct sthyi_info sthyi_cache;
+
+static inline u64 cpu_id(u8 ctidx, void *diag224_buf)
+{
+	return *((u64 *)(diag224_buf + (ctidx + 1) * DIAG204_CPU_NAME_LEN));
+}
+
+/*
+ * Scales the cpu capping from the lpar range to the one expected in
+ * sthyi data.
+ *
+ * diag204 reports a cap in hundredths of processor units.
+ * z/VM's range for one core is 0 - 0x10000.
+ */
+static u32 scale_cap(u32 in)
+{
+	return (0x10000 * in) / 100;
+}
+
+static void fill_hdr(struct sthyi_sctns *sctns)
+{
+	sctns->hdr.infhdln = sizeof(sctns->hdr);
+	sctns->hdr.infmoff = sizeof(sctns->hdr);
+	sctns->hdr.infmlen = sizeof(sctns->mac);
+	sctns->hdr.infplen = sizeof(sctns->par);
+	sctns->hdr.infpoff = sctns->hdr.infhdln + sctns->hdr.infmlen;
+	sctns->hdr.infhtotl = sctns->hdr.infpoff + sctns->hdr.infplen;
+}
+
+static void fill_stsi_mac(struct sthyi_sctns *sctns,
+			  struct sysinfo_1_1_1 *sysinfo)
+{
+	sclp_ocf_cpc_name_copy(sctns->mac.infmname);
+	if (*(u64 *)sctns->mac.infmname != 0)
+		sctns->mac.infmval1 |= MAC_NAME_VLD;
+
+	if (stsi(sysinfo, 1, 1, 1))
+		return;
+
+	memcpy(sctns->mac.infmtype, sysinfo->type, sizeof(sctns->mac.infmtype));
+	memcpy(sctns->mac.infmmanu, sysinfo->manufacturer, sizeof(sctns->mac.infmmanu));
+	memcpy(sctns->mac.infmpman, sysinfo->plant, sizeof(sctns->mac.infmpman));
+	memcpy(sctns->mac.infmseq, sysinfo->sequence, sizeof(sctns->mac.infmseq));
+
+	sctns->mac.infmval1 |= MAC_ID_VLD;
+}
+
+static void fill_stsi_par(struct sthyi_sctns *sctns,
+			  struct sysinfo_2_2_2 *sysinfo)
+{
+	if (stsi(sysinfo, 2, 2, 2))
+		return;
+
+	sctns->par.infppnum = sysinfo->lpar_number;
+	memcpy(sctns->par.infppnam, sysinfo->name, sizeof(sctns->par.infppnam));
+
+	sctns->par.infpval1 |= PAR_ID_VLD;
+}
+
+static void fill_stsi(struct sthyi_sctns *sctns)
+{
+	void *sysinfo;
+
+	/* Errors are handled through the validity bits in the response. */
+	sysinfo = (void *)__get_free_page(GFP_KERNEL);
+	if (!sysinfo)
+		return;
+
+	fill_stsi_mac(sctns, sysinfo);
+	fill_stsi_par(sctns, sysinfo);
+
+	free_pages((unsigned long)sysinfo, 0);
+}
+
+static void fill_diag_mac(struct sthyi_sctns *sctns,
+			  struct diag204_x_phys_block *block,
+			  void *diag224_buf)
+{
+	int i;
+
+	for (i = 0; i < block->hdr.cpus; i++) {
+		switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) {
+		case CP:
+			if (block->cpus[i].weight == DED_WEIGHT)
+				sctns->mac.infmdcps++;
+			else
+				sctns->mac.infmscps++;
+			break;
+		case IFL:
+			if (block->cpus[i].weight == DED_WEIGHT)
+				sctns->mac.infmdifl++;
+			else
+				sctns->mac.infmsifl++;
+			break;
+		}
+	}
+	sctns->mac.infmval1 |= MAC_CNT_VLD;
+}
+
+/* Returns a pointer to the the next partition block. */
+static struct diag204_x_part_block *lpar_cpu_inf(struct lpar_cpu_inf *part_inf,
+						 bool this_lpar,
+						 void *diag224_buf,
+						 struct diag204_x_part_block *block)
+{
+	int i, capped = 0, weight_cp = 0, weight_ifl = 0;
+	struct cpu_inf *cpu_inf;
+
+	for (i = 0; i < block->hdr.rcpus; i++) {
+		if (!(block->cpus[i].cflag & DIAG204_CPU_ONLINE))
+			continue;
+
+		switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) {
+		case CP:
+			cpu_inf = &part_inf->cp;
+			if (block->cpus[i].cur_weight < DED_WEIGHT)
+				weight_cp |= block->cpus[i].cur_weight;
+			break;
+		case IFL:
+			cpu_inf = &part_inf->ifl;
+			if (block->cpus[i].cur_weight < DED_WEIGHT)
+				weight_ifl |= block->cpus[i].cur_weight;
+			break;
+		default:
+			continue;
+		}
+
+		if (!this_lpar)
+			continue;
+
+		capped |= block->cpus[i].cflag & DIAG204_CPU_CAPPED;
+		cpu_inf->lpar_cap |= block->cpus[i].cpu_type_cap;
+		cpu_inf->lpar_grp_cap |= block->cpus[i].group_cpu_type_cap;
+
+		if (block->cpus[i].weight == DED_WEIGHT)
+			cpu_inf->cpu_num_ded += 1;
+		else
+			cpu_inf->cpu_num_shd += 1;
+	}
+
+	if (this_lpar && capped) {
+		part_inf->cp.lpar_weight = weight_cp;
+		part_inf->ifl.lpar_weight = weight_ifl;
+	}
+	part_inf->cp.all_weight += weight_cp;
+	part_inf->ifl.all_weight += weight_ifl;
+	return (struct diag204_x_part_block *)&block->cpus[i];
+}
+
+static void fill_diag(struct sthyi_sctns *sctns)
+{
+	int i, r, pages;
+	bool this_lpar;
+	void *diag204_buf;
+	void *diag224_buf = NULL;
+	struct diag204_x_info_blk_hdr *ti_hdr;
+	struct diag204_x_part_block *part_block;
+	struct diag204_x_phys_block *phys_block;
+	struct lpar_cpu_inf lpar_inf = {};
+
+	/* Errors are handled through the validity bits in the response. */
+	pages = diag204((unsigned long)DIAG204_SUBC_RSI |
+			(unsigned long)DIAG204_INFO_EXT, 0, NULL);
+	if (pages <= 0)
+		return;
+
+	diag204_buf = __vmalloc_node(array_size(pages, PAGE_SIZE),
+				     PAGE_SIZE, GFP_KERNEL, NUMA_NO_NODE,
+				     __builtin_return_address(0));
+	if (!diag204_buf)
+		return;
+
+	r = diag204((unsigned long)DIAG204_SUBC_STIB7 |
+		    (unsigned long)DIAG204_INFO_EXT, pages, diag204_buf);
+	if (r < 0)
+		goto out;
+
+	diag224_buf = (void *)__get_free_page(GFP_KERNEL | GFP_DMA);
+	if (!diag224_buf || diag224(diag224_buf))
+		goto out;
+
+	ti_hdr = diag204_buf;
+	part_block = diag204_buf + sizeof(*ti_hdr);
+
+	for (i = 0; i < ti_hdr->npar; i++) {
+		/*
+		 * For the calling lpar we also need to get the cpu
+		 * caps and weights. The time information block header
+		 * specifies the offset to the partition block of the
+		 * caller lpar, so we know when we process its data.
+		 */
+		this_lpar = (void *)part_block - diag204_buf == ti_hdr->this_part;
+		part_block = lpar_cpu_inf(&lpar_inf, this_lpar, diag224_buf,
+					  part_block);
+	}
+
+	phys_block = (struct diag204_x_phys_block *)part_block;
+	part_block = diag204_buf + ti_hdr->this_part;
+	if (part_block->hdr.mtid)
+		sctns->par.infpflg1 = PAR_MT_EN;
+
+	sctns->par.infpval1 |= PAR_GRP_VLD;
+	sctns->par.infplgcp = scale_cap(lpar_inf.cp.lpar_grp_cap);
+	sctns->par.infplgif = scale_cap(lpar_inf.ifl.lpar_grp_cap);
+	memcpy(sctns->par.infplgnm, part_block->hdr.hardware_group_name,
+	       sizeof(sctns->par.infplgnm));
+
+	sctns->par.infpscps = lpar_inf.cp.cpu_num_shd;
+	sctns->par.infpdcps = lpar_inf.cp.cpu_num_ded;
+	sctns->par.infpsifl = lpar_inf.ifl.cpu_num_shd;
+	sctns->par.infpdifl = lpar_inf.ifl.cpu_num_ded;
+	sctns->par.infpval1 |= PAR_PCNT_VLD;
+
+	sctns->par.infpabcp = scale_cap(lpar_inf.cp.lpar_cap);
+	sctns->par.infpabif = scale_cap(lpar_inf.ifl.lpar_cap);
+	sctns->par.infpval1 |= PAR_ABS_VLD;
+
+	/*
+	 * Everything below needs global performance data to be
+	 * meaningful.
+	 */
+	if (!(ti_hdr->flags & DIAG204_LPAR_PHYS_FLG)) {
+		sctns->hdr.infhflg1 |= HDR_PERF_UNAV;
+		goto out;
+	}
+
+	fill_diag_mac(sctns, phys_block, diag224_buf);
+
+	if (lpar_inf.cp.lpar_weight) {
+		sctns->par.infpwbcp = sctns->mac.infmscps * 0x10000 *
+			lpar_inf.cp.lpar_weight / lpar_inf.cp.all_weight;
+	}
+
+	if (lpar_inf.ifl.lpar_weight) {
+		sctns->par.infpwbif = sctns->mac.infmsifl * 0x10000 *
+			lpar_inf.ifl.lpar_weight / lpar_inf.ifl.all_weight;
+	}
+	sctns->par.infpval1 |= PAR_WGHT_VLD;
+
+out:
+	free_page((unsigned long)diag224_buf);
+	vfree(diag204_buf);
+}
+
+static int sthyi(u64 vaddr, u64 *rc)
+{
+	union register_pair r1 = { .even = 0, }; /* subcode */
+	union register_pair r2 = { .even = vaddr, };
+	int cc;
+
+	asm volatile(
+		".insn   rre,0xB2560000,%[r1],%[r2]\n"
+		"ipm     %[cc]\n"
+		"srl     %[cc],28\n"
+		: [cc] "=&d" (cc), [r2] "+&d" (r2.pair)
+		: [r1] "d" (r1.pair)
+		: "memory", "cc");
+	*rc = r2.odd;
+	return cc;
+}
+
+static int fill_dst(void *dst, u64 *rc)
+{
+	struct sthyi_sctns *sctns = (struct sthyi_sctns *)dst;
+
+	/*
+	 * If the facility is on, we don't want to emulate the instruction.
+	 * We ask the hypervisor to provide the data.
+	 */
+	if (test_facility(74))
+		return sthyi((u64)dst, rc);
+
+	fill_hdr(sctns);
+	fill_stsi(sctns);
+	fill_diag(sctns);
+	*rc = 0;
+	return 0;
+}
+
+static int sthyi_init_cache(void)
+{
+	if (sthyi_cache.info)
+		return 0;
+	sthyi_cache.info = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!sthyi_cache.info)
+		return -ENOMEM;
+	sthyi_cache.end = jiffies - 1; /* expired */
+	return 0;
+}
+
+static int sthyi_update_cache(u64 *rc)
+{
+	int r;
+
+	memset(sthyi_cache.info, 0, PAGE_SIZE);
+	r = fill_dst(sthyi_cache.info, rc);
+	if (r)
+		return r;
+	sthyi_cache.end = jiffies + CACHE_VALID_JIFFIES;
+	return r;
+}
+
+/*
+ * sthyi_fill - Fill page with data returned by the STHYI instruction
+ *
+ * @dst: Pointer to zeroed page
+ * @rc:  Pointer for storing the return code of the instruction
+ *
+ * Fills the destination with system information returned by the STHYI
+ * instruction. The data is generated by emulation or execution of STHYI,
+ * if available. The return value is either a negative error value or
+ * the condition code that would be returned, the rc parameter is the
+ * return code which is passed in register R2 + 1.
+ */
+int sthyi_fill(void *dst, u64 *rc)
+{
+	int r;
+
+	mutex_lock(&sthyi_mutex);
+	r = sthyi_init_cache();
+	if (r)
+		goto out;
+
+	if (time_is_before_jiffies(sthyi_cache.end)) {
+		/* cache expired */
+		r = sthyi_update_cache(rc);
+		if (r)
+			goto out;
+	}
+	*rc = 0;
+	memcpy(dst, sthyi_cache.info, PAGE_SIZE);
+out:
+	mutex_unlock(&sthyi_mutex);
+	return r;
+}
+EXPORT_SYMBOL_GPL(sthyi_fill);
+
+SYSCALL_DEFINE4(s390_sthyi, unsigned long, function_code, void __user *, buffer,
+		u64 __user *, return_code, unsigned long, flags)
+{
+	u64 sthyi_rc;
+	void *info;
+	int r;
+
+	if (flags)
+		return -EINVAL;
+	if (function_code != STHYI_FC_CP_IFL_CAP)
+		return -EOPNOTSUPP;
+	info = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+	r = sthyi_fill(info, &sthyi_rc);
+	if (r < 0)
+		goto out;
+	if (return_code && put_user(sthyi_rc, return_code)) {
+		r = -EFAULT;
+		goto out;
+	}
+	if (copy_to_user(buffer, info, PAGE_SIZE))
+		r = -EFAULT;
+out:
+	free_page((unsigned long)info);
+	return r;
+}
diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c
new file mode 100644
index 0000000000..dc2355c623
--- /dev/null
+++ b/arch/s390/kernel/syscall.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999, 2000
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ *               Thomas Spatzier (tspat@de.ibm.com)
+ *
+ *  Derived from "arch/i386/kernel/sys_i386.c"
+ *
+ *  This file contains various random system calls that
+ *  have a non-standard calling sequence on the Linux/s390
+ *  platform.
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/smp.h>
+#include <linux/sem.h>
+#include <linux/msg.h>
+#include <linux/shm.h>
+#include <linux/stat.h>
+#include <linux/syscalls.h>
+#include <linux/mman.h>
+#include <linux/file.h>
+#include <linux/utsname.h>
+#include <linux/personality.h>
+#include <linux/unistd.h>
+#include <linux/ipc.h>
+#include <linux/uaccess.h>
+#include <linux/string.h>
+#include <linux/thread_info.h>
+#include <linux/entry-common.h>
+
+#include <asm/ptrace.h>
+#include <asm/vtime.h>
+
+#include "entry.h"
+
+/*
+ * Perform the mmap() system call. Linux for S/390 isn't able to handle more
+ * than 5 system call parameters, so this system call uses a memory block
+ * for parameter passing.
+ */
+
+struct s390_mmap_arg_struct {
+	unsigned long addr;
+	unsigned long len;
+	unsigned long prot;
+	unsigned long flags;
+	unsigned long fd;
+	unsigned long offset;
+};
+
+SYSCALL_DEFINE1(mmap2, struct s390_mmap_arg_struct __user *, arg)
+{
+	struct s390_mmap_arg_struct a;
+	int error = -EFAULT;
+
+	if (copy_from_user(&a, arg, sizeof(a)))
+		goto out;
+	error = ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset);
+out:
+	return error;
+}
+
+#ifdef CONFIG_SYSVIPC
+/*
+ * sys_ipc() is the de-multiplexer for the SysV IPC calls.
+ */
+SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, unsigned long, second,
+		unsigned long, third, void __user *, ptr)
+{
+	if (call >> 16)
+		return -EINVAL;
+	/* The s390 sys_ipc variant has only five parameters instead of six
+	 * like the generic variant. The only difference is the handling of
+	 * the SEMTIMEDOP subcall where on s390 the third parameter is used
+	 * as a pointer to a struct timespec where the generic variant uses
+	 * the fifth parameter.
+	 * Therefore we can call the generic variant by simply passing the
+	 * third parameter also as fifth parameter.
+	 */
+	return ksys_ipc(call, first, second, third, ptr, third);
+}
+#endif /* CONFIG_SYSVIPC */
+
+SYSCALL_DEFINE1(s390_personality, unsigned int, personality)
+{
+	unsigned int ret = current->personality;
+
+	if (personality(current->personality) == PER_LINUX32 &&
+	    personality(personality) == PER_LINUX)
+		personality |= PER_LINUX32;
+
+	if (personality != 0xffffffff)
+		set_personality(personality);
+
+	if (personality(ret) == PER_LINUX32)
+		ret &= ~PER_LINUX32;
+
+	return ret;
+}
+
+SYSCALL_DEFINE0(ni_syscall)
+{
+	return -ENOSYS;
+}
+
+static void do_syscall(struct pt_regs *regs)
+{
+	unsigned long nr;
+
+	nr = regs->int_code & 0xffff;
+	if (!nr) {
+		nr = regs->gprs[1] & 0xffff;
+		regs->int_code &= ~0xffffUL;
+		regs->int_code |= nr;
+	}
+
+	regs->gprs[2] = nr;
+
+	if (nr == __NR_restart_syscall && !(current->restart_block.arch_data & 1)) {
+		regs->psw.addr = current->restart_block.arch_data;
+		current->restart_block.arch_data = 1;
+	}
+	nr = syscall_enter_from_user_mode_work(regs, nr);
+
+	/*
+	 * In the s390 ptrace ABI, both the syscall number and the return value
+	 * use gpr2. However, userspace puts the syscall number either in the
+	 * svc instruction itself, or uses gpr1. To make at least skipping syscalls
+	 * work, the ptrace code sets PIF_SYSCALL_RET_SET, which is checked here
+	 * and if set, the syscall will be skipped.
+	 */
+
+	if (unlikely(test_and_clear_pt_regs_flag(regs, PIF_SYSCALL_RET_SET)))
+		goto out;
+	regs->gprs[2] = -ENOSYS;
+	if (likely(nr >= NR_syscalls))
+		goto out;
+	do {
+		regs->gprs[2] = current->thread.sys_call_table[nr](regs);
+	} while (test_and_clear_pt_regs_flag(regs, PIF_EXECVE_PGSTE_RESTART));
+out:
+	syscall_exit_to_user_mode_work(regs);
+}
+
+void noinstr __do_syscall(struct pt_regs *regs, int per_trap)
+{
+	add_random_kstack_offset();
+	enter_from_user_mode(regs);
+	regs->psw = S390_lowcore.svc_old_psw;
+	regs->int_code = S390_lowcore.svc_int_code;
+	update_timer_sys();
+	if (static_branch_likely(&cpu_has_bear))
+		current->thread.last_break = regs->last_break;
+
+	local_irq_enable();
+	regs->orig_gpr2 = regs->gprs[2];
+
+	if (per_trap)
+		set_thread_flag(TIF_PER_TRAP);
+
+	regs->flags = 0;
+	set_pt_regs_flag(regs, PIF_SYSCALL);
+	do_syscall(regs);
+	exit_to_user_mode();
+}
diff --git a/arch/s390/kernel/syscalls/Makefile b/arch/s390/kernel/syscalls/Makefile
new file mode 100644
index 0000000000..fb85e79794
--- /dev/null
+++ b/arch/s390/kernel/syscalls/Makefile
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: GPL-2.0
+
+gen	:= arch/$(ARCH)/include/generated
+kapi	:= $(gen)/asm
+uapi	:= $(gen)/uapi/asm
+
+syscall	:= $(srctree)/$(src)/syscall.tbl
+systbl	:= $(srctree)/$(src)/syscalltbl
+
+gen-y := $(kapi)/syscall_table.h
+kapi-hdrs-y := $(kapi)/unistd_nr.h
+uapi-hdrs-y := $(uapi)/unistd_32.h
+uapi-hdrs-y += $(uapi)/unistd_64.h
+
+targets += $(addprefix ../../../,$(gen-y) $(kapi-hdrs-y) $(uapi-hdrs-y))
+
+PHONY += kapi uapi
+
+kapi:	$(gen-y) $(kapi-hdrs-y)
+uapi:	$(uapi-hdrs-y)
+
+
+# Create output directory if not already present
+$(shell mkdir -p $(uapi) $(kapi))
+
+filechk_syshdr = $(CONFIG_SHELL) '$(systbl)' -H -a $(syshdr_abi_$(basetarget)) -f "$2" < $<
+
+filechk_sysnr = $(CONFIG_SHELL) '$(systbl)' -N -a $(sysnr_abi_$(basetarget)) < $<
+
+filechk_syscalls = $(CONFIG_SHELL) '$(systbl)' -S < $<
+
+syshdr_abi_unistd_32 := common,32
+$(uapi)/unistd_32.h: $(syscall) FORCE
+	$(call filechk,syshdr,$@)
+
+syshdr_abi_unistd_64 := common,64
+$(uapi)/unistd_64.h: $(syscall) FORCE
+	$(call filechk,syshdr,$@)
+
+$(kapi)/syscall_table.h: $(syscall) FORCE
+	$(call filechk,syscalls)
+
+sysnr_abi_unistd_nr := common,32,64
+$(kapi)/unistd_nr.h: $(syscall) FORCE
+	$(call filechk,sysnr)
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
new file mode 100644
index 0000000000..0122cc1569
--- /dev/null
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -0,0 +1,457 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+#
+# System call table for s390
+#
+# Format:
+#
+# <nr> <abi> <syscall> <entry-64bit> <compat-entry>
+#
+# where <abi> can be common, 64, or 32
+
+1    common	exit			sys_exit			sys_exit
+2    common	fork			sys_fork			sys_fork
+3    common	read			sys_read			compat_sys_s390_read
+4    common	write			sys_write			compat_sys_s390_write
+5    common	open			sys_open			compat_sys_open
+6    common	close			sys_close			sys_close
+7    common	restart_syscall		sys_restart_syscall		sys_restart_syscall
+8    common	creat			sys_creat			sys_creat
+9    common	link			sys_link			sys_link
+10   common	unlink			sys_unlink			sys_unlink
+11   common	execve			sys_execve			compat_sys_execve
+12   common	chdir			sys_chdir			sys_chdir
+13   32		time			-				sys_time32
+14   common	mknod			sys_mknod			sys_mknod
+15   common	chmod			sys_chmod			sys_chmod
+16   32		lchown			-				sys_lchown16
+19   common	lseek			sys_lseek			compat_sys_lseek
+20   common	getpid			sys_getpid			sys_getpid
+21   common	mount			sys_mount			sys_mount
+22   common	umount			sys_oldumount			sys_oldumount
+23   32		setuid			-				sys_setuid16
+24   32		getuid			-				sys_getuid16
+25   32		stime			-				sys_stime32
+26   common	ptrace			sys_ptrace			compat_sys_ptrace
+27   common	alarm			sys_alarm			sys_alarm
+29   common	pause			sys_pause			sys_pause
+30   common	utime			sys_utime			sys_utime32
+33   common	access			sys_access			sys_access
+34   common	nice			sys_nice			sys_nice
+36   common	sync			sys_sync			sys_sync
+37   common	kill			sys_kill			sys_kill
+38   common	rename			sys_rename			sys_rename
+39   common	mkdir			sys_mkdir			sys_mkdir
+40   common	rmdir			sys_rmdir			sys_rmdir
+41   common	dup			sys_dup				sys_dup
+42   common	pipe			sys_pipe			sys_pipe
+43   common	times			sys_times			compat_sys_times
+45   common	brk			sys_brk				sys_brk
+46   32		setgid			-				sys_setgid16
+47   32		getgid			-				sys_getgid16
+48   common	signal			sys_signal			sys_signal
+49   32		geteuid			-				sys_geteuid16
+50   32		getegid			-				sys_getegid16
+51   common	acct			sys_acct			sys_acct
+52   common	umount2			sys_umount			sys_umount
+54   common	ioctl			sys_ioctl			compat_sys_ioctl
+55   common	fcntl			sys_fcntl			compat_sys_fcntl
+57   common	setpgid			sys_setpgid			sys_setpgid
+60   common	umask			sys_umask			sys_umask
+61   common	chroot			sys_chroot			sys_chroot
+62   common	ustat			sys_ustat			compat_sys_ustat
+63   common	dup2			sys_dup2			sys_dup2
+64   common	getppid			sys_getppid			sys_getppid
+65   common	getpgrp			sys_getpgrp			sys_getpgrp
+66   common	setsid			sys_setsid			sys_setsid
+67   common	sigaction		sys_sigaction			compat_sys_sigaction
+70   32		setreuid		-				sys_setreuid16
+71   32		setregid		-				sys_setregid16
+72   common	sigsuspend		sys_sigsuspend			sys_sigsuspend
+73   common	sigpending		sys_sigpending			compat_sys_sigpending
+74   common	sethostname		sys_sethostname			sys_sethostname
+75   common	setrlimit		sys_setrlimit			compat_sys_setrlimit
+76   32		getrlimit		-				compat_sys_old_getrlimit
+77   common	getrusage		sys_getrusage			compat_sys_getrusage
+78   common	gettimeofday		sys_gettimeofday		compat_sys_gettimeofday
+79   common	settimeofday		sys_settimeofday		compat_sys_settimeofday
+80   32		getgroups		-				sys_getgroups16
+81   32		setgroups		-				sys_setgroups16
+83   common	symlink			sys_symlink			sys_symlink
+85   common	readlink		sys_readlink			sys_readlink
+86   common	uselib			sys_uselib			sys_uselib
+87   common	swapon			sys_swapon			sys_swapon
+88   common	reboot			sys_reboot			sys_reboot
+89   common	readdir			-				compat_sys_old_readdir
+90   common	mmap			sys_old_mmap			compat_sys_s390_old_mmap
+91   common	munmap			sys_munmap			sys_munmap
+92   common	truncate		sys_truncate			compat_sys_truncate
+93   common	ftruncate		sys_ftruncate			compat_sys_ftruncate
+94   common	fchmod			sys_fchmod			sys_fchmod
+95   32		fchown			-				sys_fchown16
+96   common	getpriority		sys_getpriority			sys_getpriority
+97   common	setpriority		sys_setpriority			sys_setpriority
+99   common	statfs			sys_statfs			compat_sys_statfs
+100  common	fstatfs			sys_fstatfs			compat_sys_fstatfs
+101  32		ioperm			-				-
+102  common	socketcall		sys_socketcall			compat_sys_socketcall
+103  common	syslog			sys_syslog			sys_syslog
+104  common	setitimer		sys_setitimer			compat_sys_setitimer
+105  common	getitimer		sys_getitimer			compat_sys_getitimer
+106  common	stat			sys_newstat			compat_sys_newstat
+107  common	lstat			sys_newlstat			compat_sys_newlstat
+108  common	fstat			sys_newfstat			compat_sys_newfstat
+110  common	lookup_dcookie		sys_lookup_dcookie		compat_sys_lookup_dcookie
+111  common	vhangup			sys_vhangup			sys_vhangup
+112  common	idle			-				-
+114  common	wait4			sys_wait4			compat_sys_wait4
+115  common	swapoff			sys_swapoff			sys_swapoff
+116  common	sysinfo			sys_sysinfo			compat_sys_sysinfo
+117  common	ipc			sys_s390_ipc			compat_sys_s390_ipc
+118  common	fsync			sys_fsync			sys_fsync
+119  common	sigreturn		sys_sigreturn			compat_sys_sigreturn
+120  common	clone			sys_clone			sys_clone
+121  common	setdomainname		sys_setdomainname		sys_setdomainname
+122  common	uname			sys_newuname			sys_newuname
+124  common	adjtimex		sys_adjtimex			sys_adjtimex_time32
+125  common	mprotect		sys_mprotect			sys_mprotect
+126  common	sigprocmask		sys_sigprocmask			compat_sys_sigprocmask
+127  common	create_module		-				-
+128  common	init_module		sys_init_module			sys_init_module
+129  common	delete_module		sys_delete_module		sys_delete_module
+130  common	get_kernel_syms		-				-
+131  common	quotactl		sys_quotactl			sys_quotactl
+132  common	getpgid			sys_getpgid			sys_getpgid
+133  common	fchdir			sys_fchdir			sys_fchdir
+134  common	bdflush			sys_ni_syscall			sys_ni_syscall
+135  common	sysfs			sys_sysfs			sys_sysfs
+136  common	personality		sys_s390_personality		sys_s390_personality
+137  common	afs_syscall		-				-
+138  32		setfsuid		-				sys_setfsuid16
+139  32		setfsgid		-				sys_setfsgid16
+140  32		_llseek			-				sys_llseek
+141  common	getdents		sys_getdents			compat_sys_getdents
+142  32		_newselect		-				compat_sys_select
+142  64		select			sys_select			-
+143  common	flock			sys_flock			sys_flock
+144  common	msync			sys_msync			sys_msync
+145  common	readv			sys_readv			sys_readv
+146  common	writev			sys_writev			sys_writev
+147  common	getsid			sys_getsid			sys_getsid
+148  common	fdatasync		sys_fdatasync			sys_fdatasync
+149  common	_sysctl			-				-
+150  common	mlock			sys_mlock			sys_mlock
+151  common	munlock			sys_munlock			sys_munlock
+152  common	mlockall		sys_mlockall			sys_mlockall
+153  common	munlockall		sys_munlockall			sys_munlockall
+154  common	sched_setparam		sys_sched_setparam		sys_sched_setparam
+155  common	sched_getparam		sys_sched_getparam		sys_sched_getparam
+156  common	sched_setscheduler	sys_sched_setscheduler		sys_sched_setscheduler
+157  common	sched_getscheduler	sys_sched_getscheduler		sys_sched_getscheduler
+158  common	sched_yield		sys_sched_yield			sys_sched_yield
+159  common	sched_get_priority_max	sys_sched_get_priority_max	sys_sched_get_priority_max
+160  common	sched_get_priority_min	sys_sched_get_priority_min	sys_sched_get_priority_min
+161  common	sched_rr_get_interval	sys_sched_rr_get_interval	sys_sched_rr_get_interval_time32
+162  common	nanosleep		sys_nanosleep			sys_nanosleep_time32
+163  common	mremap			sys_mremap			sys_mremap
+164  32		setresuid		-				sys_setresuid16
+165  32		getresuid		-				sys_getresuid16
+167  common	query_module		-				-
+168  common	poll			sys_poll			sys_poll
+169  common	nfsservctl		-				-
+170  32		setresgid		-				sys_setresgid16
+171  32		getresgid		-				sys_getresgid16
+172  common	prctl			sys_prctl			sys_prctl
+173  common	rt_sigreturn		sys_rt_sigreturn		compat_sys_rt_sigreturn
+174  common	rt_sigaction		sys_rt_sigaction		compat_sys_rt_sigaction
+175  common	rt_sigprocmask		sys_rt_sigprocmask		compat_sys_rt_sigprocmask
+176  common	rt_sigpending		sys_rt_sigpending		compat_sys_rt_sigpending
+177  common	rt_sigtimedwait		sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait_time32
+178  common	rt_sigqueueinfo		sys_rt_sigqueueinfo		compat_sys_rt_sigqueueinfo
+179  common	rt_sigsuspend		sys_rt_sigsuspend		compat_sys_rt_sigsuspend
+180  common	pread64			sys_pread64			compat_sys_s390_pread64
+181  common	pwrite64		sys_pwrite64			compat_sys_s390_pwrite64
+182  32		chown			-				sys_chown16
+183  common	getcwd			sys_getcwd			sys_getcwd
+184  common	capget			sys_capget			sys_capget
+185  common	capset			sys_capset			sys_capset
+186  common	sigaltstack		sys_sigaltstack			compat_sys_sigaltstack
+187  common	sendfile		sys_sendfile64			compat_sys_sendfile
+188  common	getpmsg			-				-
+189  common	putpmsg			-				-
+190  common	vfork			sys_vfork			sys_vfork
+191  32		ugetrlimit		-				compat_sys_getrlimit
+191  64		getrlimit		sys_getrlimit			-
+192  32		mmap2			-				compat_sys_s390_mmap2
+193  32		truncate64		-				compat_sys_s390_truncate64
+194  32		ftruncate64		-				compat_sys_s390_ftruncate64
+195  32		stat64			-				compat_sys_s390_stat64
+196  32		lstat64			-				compat_sys_s390_lstat64
+197  32		fstat64			-				compat_sys_s390_fstat64
+198  32		lchown32		-				sys_lchown
+198  64		lchown			sys_lchown			-
+199  32		getuid32		-				sys_getuid
+199  64		getuid			sys_getuid			-
+200  32		getgid32		-				sys_getgid
+200  64		getgid			sys_getgid			-
+201  32		geteuid32		-				sys_geteuid
+201  64		geteuid			sys_geteuid			-
+202  32		getegid32		-				sys_getegid
+202  64		getegid			sys_getegid			-
+203  32		setreuid32		-				sys_setreuid
+203  64		setreuid		sys_setreuid			-
+204  32		setregid32		-				sys_setregid
+204  64		setregid		sys_setregid			-
+205  32		getgroups32		-				sys_getgroups
+205  64		getgroups		sys_getgroups			-
+206  32		setgroups32		-				sys_setgroups
+206  64		setgroups		sys_setgroups			-
+207  32		fchown32		-				sys_fchown
+207  64		fchown			sys_fchown			-
+208  32		setresuid32		-				sys_setresuid
+208  64		setresuid		sys_setresuid			-
+209  32		getresuid32		-				sys_getresuid
+209  64		getresuid		sys_getresuid			-
+210  32		setresgid32		-				sys_setresgid
+210  64		setresgid		sys_setresgid			-
+211  32		getresgid32		-				sys_getresgid
+211  64		getresgid		sys_getresgid			-
+212  32		chown32			-				sys_chown
+212  64		chown			sys_chown			-
+213  32		setuid32		-				sys_setuid
+213  64		setuid			sys_setuid			-
+214  32		setgid32		-				sys_setgid
+214  64		setgid			sys_setgid			-
+215  32		setfsuid32		-				sys_setfsuid
+215  64		setfsuid		sys_setfsuid			-
+216  32		setfsgid32		-				sys_setfsgid
+216  64		setfsgid		sys_setfsgid			-
+217  common	pivot_root		sys_pivot_root			sys_pivot_root
+218  common	mincore			sys_mincore			sys_mincore
+219  common	madvise			sys_madvise			sys_madvise
+220  common	getdents64		sys_getdents64			sys_getdents64
+221  32		fcntl64			-				compat_sys_fcntl64
+222  common	readahead		sys_readahead			compat_sys_s390_readahead
+223  32		sendfile64		-				compat_sys_sendfile64
+224  common	setxattr		sys_setxattr			sys_setxattr
+225  common	lsetxattr		sys_lsetxattr			sys_lsetxattr
+226  common	fsetxattr		sys_fsetxattr			sys_fsetxattr
+227  common	getxattr		sys_getxattr			sys_getxattr
+228  common	lgetxattr		sys_lgetxattr			sys_lgetxattr
+229  common	fgetxattr		sys_fgetxattr			sys_fgetxattr
+230  common	listxattr		sys_listxattr			sys_listxattr
+231  common	llistxattr		sys_llistxattr			sys_llistxattr
+232  common	flistxattr		sys_flistxattr			sys_flistxattr
+233  common	removexattr		sys_removexattr			sys_removexattr
+234  common	lremovexattr		sys_lremovexattr		sys_lremovexattr
+235  common	fremovexattr		sys_fremovexattr		sys_fremovexattr
+236  common	gettid			sys_gettid			sys_gettid
+237  common	tkill			sys_tkill			sys_tkill
+238  common	futex			sys_futex			sys_futex_time32
+239  common	sched_setaffinity	sys_sched_setaffinity		compat_sys_sched_setaffinity
+240  common	sched_getaffinity	sys_sched_getaffinity		compat_sys_sched_getaffinity
+241  common	tgkill			sys_tgkill			sys_tgkill
+243  common	io_setup		sys_io_setup			compat_sys_io_setup
+244  common	io_destroy		sys_io_destroy			sys_io_destroy
+245  common	io_getevents		sys_io_getevents		sys_io_getevents_time32
+246  common	io_submit		sys_io_submit			compat_sys_io_submit
+247  common	io_cancel		sys_io_cancel			sys_io_cancel
+248  common	exit_group		sys_exit_group			sys_exit_group
+249  common	epoll_create		sys_epoll_create		sys_epoll_create
+250  common	epoll_ctl		sys_epoll_ctl			sys_epoll_ctl
+251  common	epoll_wait		sys_epoll_wait			sys_epoll_wait
+252  common	set_tid_address		sys_set_tid_address		sys_set_tid_address
+253  common	fadvise64		sys_fadvise64_64		compat_sys_s390_fadvise64
+254  common	timer_create		sys_timer_create		compat_sys_timer_create
+255  common	timer_settime		sys_timer_settime		sys_timer_settime32
+256  common	timer_gettime		sys_timer_gettime		sys_timer_gettime32
+257  common	timer_getoverrun	sys_timer_getoverrun		sys_timer_getoverrun
+258  common	timer_delete		sys_timer_delete		sys_timer_delete
+259  common	clock_settime		sys_clock_settime		sys_clock_settime32
+260  common	clock_gettime		sys_clock_gettime		sys_clock_gettime32
+261  common	clock_getres		sys_clock_getres		sys_clock_getres_time32
+262  common	clock_nanosleep		sys_clock_nanosleep		sys_clock_nanosleep_time32
+264  32		fadvise64_64		-				compat_sys_s390_fadvise64_64
+265  common	statfs64		sys_statfs64			compat_sys_statfs64
+266  common	fstatfs64		sys_fstatfs64			compat_sys_fstatfs64
+267  common	remap_file_pages	sys_remap_file_pages		sys_remap_file_pages
+268  common	mbind			sys_mbind			sys_mbind
+269  common	get_mempolicy		sys_get_mempolicy		sys_get_mempolicy
+270  common	set_mempolicy		sys_set_mempolicy		sys_set_mempolicy
+271  common	mq_open			sys_mq_open			compat_sys_mq_open
+272  common	mq_unlink		sys_mq_unlink			sys_mq_unlink
+273  common	mq_timedsend		sys_mq_timedsend		sys_mq_timedsend_time32
+274  common	mq_timedreceive		sys_mq_timedreceive		sys_mq_timedreceive_time32
+275  common	mq_notify		sys_mq_notify			compat_sys_mq_notify
+276  common	mq_getsetattr		sys_mq_getsetattr		compat_sys_mq_getsetattr
+277  common	kexec_load		sys_kexec_load			compat_sys_kexec_load
+278  common	add_key			sys_add_key			sys_add_key
+279  common	request_key		sys_request_key			sys_request_key
+280  common	keyctl			sys_keyctl			compat_sys_keyctl
+281  common	waitid			sys_waitid			compat_sys_waitid
+282  common	ioprio_set		sys_ioprio_set			sys_ioprio_set
+283  common	ioprio_get		sys_ioprio_get			sys_ioprio_get
+284  common	inotify_init		sys_inotify_init		sys_inotify_init
+285  common	inotify_add_watch	sys_inotify_add_watch		sys_inotify_add_watch
+286  common	inotify_rm_watch	sys_inotify_rm_watch		sys_inotify_rm_watch
+287  common	migrate_pages		sys_migrate_pages		sys_migrate_pages
+288  common	openat			sys_openat			compat_sys_openat
+289  common	mkdirat			sys_mkdirat			sys_mkdirat
+290  common	mknodat			sys_mknodat			sys_mknodat
+291  common	fchownat		sys_fchownat			sys_fchownat
+292  common	futimesat		sys_futimesat			sys_futimesat_time32
+293  32		fstatat64		-				compat_sys_s390_fstatat64
+293  64		newfstatat		sys_newfstatat			-
+294  common	unlinkat		sys_unlinkat			sys_unlinkat
+295  common	renameat		sys_renameat			sys_renameat
+296  common	linkat			sys_linkat			sys_linkat
+297  common	symlinkat		sys_symlinkat			sys_symlinkat
+298  common	readlinkat		sys_readlinkat			sys_readlinkat
+299  common	fchmodat		sys_fchmodat			sys_fchmodat
+300  common	faccessat		sys_faccessat			sys_faccessat
+301  common	pselect6		sys_pselect6			compat_sys_pselect6_time32
+302  common	ppoll			sys_ppoll			compat_sys_ppoll_time32
+303  common	unshare			sys_unshare			sys_unshare
+304  common	set_robust_list		sys_set_robust_list		compat_sys_set_robust_list
+305  common	get_robust_list		sys_get_robust_list		compat_sys_get_robust_list
+306  common	splice			sys_splice			sys_splice
+307  common	sync_file_range		sys_sync_file_range		compat_sys_s390_sync_file_range
+308  common	tee			sys_tee				sys_tee
+309  common	vmsplice		sys_vmsplice			sys_vmsplice
+310  common	move_pages		sys_move_pages			sys_move_pages
+311  common	getcpu			sys_getcpu			sys_getcpu
+312  common	epoll_pwait		sys_epoll_pwait			compat_sys_epoll_pwait
+313  common	utimes			sys_utimes			sys_utimes_time32
+314  common	fallocate		sys_fallocate			compat_sys_s390_fallocate
+315  common	utimensat		sys_utimensat			sys_utimensat_time32
+316  common	signalfd		sys_signalfd			compat_sys_signalfd
+317  common	timerfd			-				-
+318  common	eventfd			sys_eventfd			sys_eventfd
+319  common	timerfd_create		sys_timerfd_create		sys_timerfd_create
+320  common	timerfd_settime		sys_timerfd_settime		sys_timerfd_settime32
+321  common	timerfd_gettime		sys_timerfd_gettime		sys_timerfd_gettime32
+322  common	signalfd4		sys_signalfd4			compat_sys_signalfd4
+323  common	eventfd2		sys_eventfd2			sys_eventfd2
+324  common	inotify_init1		sys_inotify_init1		sys_inotify_init1
+325  common	pipe2			sys_pipe2			sys_pipe2
+326  common	dup3			sys_dup3			sys_dup3
+327  common	epoll_create1		sys_epoll_create1		sys_epoll_create1
+328  common	preadv			sys_preadv			compat_sys_preadv
+329  common	pwritev			sys_pwritev			compat_sys_pwritev
+330  common	rt_tgsigqueueinfo	sys_rt_tgsigqueueinfo		compat_sys_rt_tgsigqueueinfo
+331  common	perf_event_open		sys_perf_event_open		sys_perf_event_open
+332  common	fanotify_init		sys_fanotify_init		sys_fanotify_init
+333  common	fanotify_mark		sys_fanotify_mark		compat_sys_fanotify_mark
+334  common	prlimit64		sys_prlimit64			sys_prlimit64
+335  common	name_to_handle_at	sys_name_to_handle_at		sys_name_to_handle_at
+336  common	open_by_handle_at	sys_open_by_handle_at		compat_sys_open_by_handle_at
+337  common	clock_adjtime		sys_clock_adjtime		sys_clock_adjtime32
+338  common	syncfs			sys_syncfs			sys_syncfs
+339  common	setns			sys_setns			sys_setns
+340  common	process_vm_readv	sys_process_vm_readv		sys_process_vm_readv
+341  common	process_vm_writev	sys_process_vm_writev		sys_process_vm_writev
+342  common	s390_runtime_instr	sys_s390_runtime_instr		sys_s390_runtime_instr
+343  common	kcmp			sys_kcmp			sys_kcmp
+344  common	finit_module		sys_finit_module		sys_finit_module
+345  common	sched_setattr		sys_sched_setattr		sys_sched_setattr
+346  common	sched_getattr		sys_sched_getattr		sys_sched_getattr
+347  common	renameat2		sys_renameat2			sys_renameat2
+348  common	seccomp			sys_seccomp			sys_seccomp
+349  common	getrandom		sys_getrandom			sys_getrandom
+350  common	memfd_create		sys_memfd_create		sys_memfd_create
+351  common	bpf			sys_bpf				sys_bpf
+352  common	s390_pci_mmio_write	sys_s390_pci_mmio_write		sys_s390_pci_mmio_write
+353  common	s390_pci_mmio_read	sys_s390_pci_mmio_read		sys_s390_pci_mmio_read
+354  common	execveat		sys_execveat			compat_sys_execveat
+355  common	userfaultfd		sys_userfaultfd			sys_userfaultfd
+356  common	membarrier		sys_membarrier			sys_membarrier
+357  common	recvmmsg		sys_recvmmsg			compat_sys_recvmmsg_time32
+358  common	sendmmsg		sys_sendmmsg			compat_sys_sendmmsg
+359  common	socket			sys_socket			sys_socket
+360  common	socketpair		sys_socketpair			sys_socketpair
+361  common	bind			sys_bind			sys_bind
+362  common	connect			sys_connect			sys_connect
+363  common	listen			sys_listen			sys_listen
+364  common	accept4			sys_accept4			sys_accept4
+365  common	getsockopt		sys_getsockopt			sys_getsockopt
+366  common	setsockopt		sys_setsockopt			sys_setsockopt
+367  common	getsockname		sys_getsockname			sys_getsockname
+368  common	getpeername		sys_getpeername			sys_getpeername
+369  common	sendto			sys_sendto			sys_sendto
+370  common	sendmsg			sys_sendmsg			compat_sys_sendmsg
+371  common	recvfrom		sys_recvfrom			compat_sys_recvfrom
+372  common	recvmsg			sys_recvmsg			compat_sys_recvmsg
+373  common	shutdown		sys_shutdown			sys_shutdown
+374  common	mlock2			sys_mlock2			sys_mlock2
+375  common	copy_file_range		sys_copy_file_range		sys_copy_file_range
+376  common	preadv2			sys_preadv2			compat_sys_preadv2
+377  common	pwritev2		sys_pwritev2			compat_sys_pwritev2
+378  common	s390_guarded_storage	sys_s390_guarded_storage	sys_s390_guarded_storage
+379  common	statx			sys_statx			sys_statx
+380  common	s390_sthyi		sys_s390_sthyi			sys_s390_sthyi
+381  common	kexec_file_load		sys_kexec_file_load		sys_kexec_file_load
+382  common	io_pgetevents		sys_io_pgetevents		compat_sys_io_pgetevents
+383  common	rseq			sys_rseq			sys_rseq
+384  common	pkey_mprotect		sys_pkey_mprotect		sys_pkey_mprotect
+385  common	pkey_alloc		sys_pkey_alloc			sys_pkey_alloc
+386  common	pkey_free		sys_pkey_free			sys_pkey_free
+# room for arch specific syscalls
+392	64	semtimedop		sys_semtimedop			-
+393  common	semget			sys_semget			sys_semget
+394  common	semctl			sys_semctl			compat_sys_semctl
+395  common	shmget			sys_shmget			sys_shmget
+396  common	shmctl			sys_shmctl			compat_sys_shmctl
+397  common	shmat			sys_shmat			compat_sys_shmat
+398  common	shmdt			sys_shmdt 			sys_shmdt
+399  common	msgget			sys_msgget			sys_msgget
+400  common	msgsnd			sys_msgsnd			compat_sys_msgsnd
+401  common	msgrcv			sys_msgrcv			compat_sys_msgrcv
+402  common	msgctl			sys_msgctl			compat_sys_msgctl
+403	32	clock_gettime64		-				sys_clock_gettime
+404	32	clock_settime64		-				sys_clock_settime
+405	32	clock_adjtime64		-				sys_clock_adjtime
+406	32	clock_getres_time64	-				sys_clock_getres
+407	32	clock_nanosleep_time64	-				sys_clock_nanosleep
+408	32	timer_gettime64		-				sys_timer_gettime
+409	32	timer_settime64		-				sys_timer_settime
+410	32	timerfd_gettime64	-				sys_timerfd_gettime
+411	32	timerfd_settime64	-				sys_timerfd_settime
+412	32	utimensat_time64	-				sys_utimensat
+413	32	pselect6_time64		-				compat_sys_pselect6_time64
+414	32	ppoll_time64		-				compat_sys_ppoll_time64
+416	32	io_pgetevents_time64	-				sys_io_pgetevents
+417	32	recvmmsg_time64		-				compat_sys_recvmmsg_time64
+418	32	mq_timedsend_time64	-				sys_mq_timedsend
+419	32	mq_timedreceive_time64	-				sys_mq_timedreceive
+420	32	semtimedop_time64	-				sys_semtimedop
+421	32	rt_sigtimedwait_time64	-				compat_sys_rt_sigtimedwait_time64
+422	32	futex_time64		-				sys_futex
+423	32	sched_rr_get_interval_time64	-			sys_sched_rr_get_interval
+424  common	pidfd_send_signal	sys_pidfd_send_signal		sys_pidfd_send_signal
+425  common	io_uring_setup		sys_io_uring_setup              sys_io_uring_setup
+426  common	io_uring_enter		sys_io_uring_enter              sys_io_uring_enter
+427  common	io_uring_register	sys_io_uring_register           sys_io_uring_register
+428  common	open_tree		sys_open_tree			sys_open_tree
+429  common	move_mount		sys_move_mount			sys_move_mount
+430  common	fsopen			sys_fsopen			sys_fsopen
+431  common	fsconfig		sys_fsconfig			sys_fsconfig
+432  common	fsmount			sys_fsmount			sys_fsmount
+433  common	fspick			sys_fspick			sys_fspick
+434  common	pidfd_open		sys_pidfd_open			sys_pidfd_open
+435  common	clone3			sys_clone3			sys_clone3
+436  common	close_range		sys_close_range			sys_close_range
+437  common	openat2			sys_openat2			sys_openat2
+438  common	pidfd_getfd		sys_pidfd_getfd			sys_pidfd_getfd
+439  common	faccessat2		sys_faccessat2			sys_faccessat2
+440  common	process_madvise		sys_process_madvise		sys_process_madvise
+441  common	epoll_pwait2		sys_epoll_pwait2		compat_sys_epoll_pwait2
+442  common	mount_setattr		sys_mount_setattr		sys_mount_setattr
+443  common	quotactl_fd		sys_quotactl_fd			sys_quotactl_fd
+444  common	landlock_create_ruleset	sys_landlock_create_ruleset	sys_landlock_create_ruleset
+445  common	landlock_add_rule	sys_landlock_add_rule		sys_landlock_add_rule
+446  common	landlock_restrict_self	sys_landlock_restrict_self	sys_landlock_restrict_self
+447  common	memfd_secret		sys_memfd_secret		sys_memfd_secret
+448  common	process_mrelease	sys_process_mrelease		sys_process_mrelease
+449  common	futex_waitv		sys_futex_waitv			sys_futex_waitv
+450  common	set_mempolicy_home_node	sys_set_mempolicy_home_node	sys_set_mempolicy_home_node
+451  common	cachestat		sys_cachestat			sys_cachestat
+452  common	fchmodat2		sys_fchmodat2			sys_fchmodat2
diff --git a/arch/s390/kernel/syscalls/syscalltbl b/arch/s390/kernel/syscalls/syscalltbl
new file mode 100755
index 0000000000..fbac1732f8
--- /dev/null
+++ b/arch/s390/kernel/syscalls/syscalltbl
@@ -0,0 +1,232 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Generate system call table and header files
+#
+# Copyright IBM Corp. 2018
+# Author(s):  Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+
+#
+# File path to the system call table definition.
+# You can set the path with the -i option.  If omitted,
+# system call table definitions are read from standard input.
+#
+SYSCALL_TBL=""
+
+
+create_syscall_table_entries()
+{
+	local nr abi name entry64 entry32 _ignore
+	local temp=$(mktemp ${TMPDIR:-/tmp}/syscalltbl-common.XXXXXXXXX)
+
+	(
+	#
+	# Initialize with 0 to create an NI_SYSCALL for 0
+	#
+	local prev_nr=0 prev_32=sys_ni_syscall prev_64=sys_ni_syscall
+	while read nr abi name entry64 entry32 _ignore; do
+		test x$entry32 = x- && entry32=sys_ni_syscall
+		test x$entry64 = x- && entry64=sys_ni_syscall
+
+		if test $prev_nr -eq $nr; then
+			#
+			# Same syscall but different ABI, just update
+			# the respective entry point
+			#
+			case $abi in
+			32)
+				prev_32=$entry32
+			;;
+			64)
+				prev_64=$entry64
+			;;
+			esac
+			continue;
+		else
+			printf "%d\t%s\t%s\n" $prev_nr $prev_64 $prev_32
+		fi
+
+		prev_nr=$nr
+		prev_64=$entry64
+		prev_32=$entry32
+	done
+	printf "%d\t%s\t%s\n" $prev_nr $prev_64 $prev_32
+	) >> $temp
+
+	#
+	# Check for duplicate syscall numbers
+	#
+	if ! cat $temp |cut -f1 |uniq -d 2>&1; then
+		echo "Error: generated system call table contains duplicate entries: $temp" >&2
+		exit 1
+	fi
+
+	#
+	# Generate syscall table
+	#
+	prev_nr=0
+	while read nr entry64 entry32; do
+		while test $prev_nr -lt $((nr - 1)); do
+			printf "NI_SYSCALL\n"
+			prev_nr=$((prev_nr + 1))
+		done
+		if test x$entry64 = xsys_ni_syscall &&
+		   test x$entry32 = xsys_ni_syscall; then
+			printf "NI_SYSCALL\n"
+		else
+			printf "SYSCALL(%s,%s)\n" $entry64 $entry32
+		fi
+		prev_nr=$nr
+	done < $temp
+	rm $temp
+}
+
+generate_syscall_table()
+{
+	cat <<-EoHEADER
+	/* SPDX-License-Identifier: GPL-2.0 */
+	/*
+	 * Definitions for sys_call_table, each line represents an
+	 * entry in the table in the form
+	 * SYSCALL(64 bit syscall, 31 bit emulated syscall)
+	 *
+	 * This file is meant to be included from entry.S.
+	 */
+
+	#define NI_SYSCALL SYSCALL(sys_ni_syscall,sys_ni_syscall)
+
+EoHEADER
+	grep -Ev '^(#|[[:blank:]]*$)' $SYSCALL_TBL	\
+		|sort -k1 -n				\
+		|create_syscall_table_entries
+}
+
+create_header_defines()
+{
+	local nr abi name _ignore
+
+	while read nr abi name _ignore; do
+		printf "#define __NR_%s %d\n" $name $nr
+	done
+}
+
+normalize_fileguard()
+{
+	local fileguard="$1"
+
+	echo "$1" |tr '[[:lower:]]' '[[:upper:]]' \
+		  |sed -e 's/[^A-Z0-9_]/_/g' -e 's/__/_/g'
+}
+
+generate_syscall_header()
+{
+	local abis=$(echo "($1)" | tr ',' '|')
+	local filename="$2"
+	local fileguard suffix
+
+	if test "$filename"; then
+		fileguard=$(normalize_fileguard "__UAPI_ASM_S390_$2")
+	else
+		case "$abis" in
+		*64*) suffix=64 ;;
+		*32*) suffix=32 ;;
+		esac
+		fileguard=$(normalize_fileguard "__UAPI_ASM_S390_SYSCALLS_$suffix")
+	fi
+
+	cat <<-EoHEADER
+	/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+	#ifndef ${fileguard}
+	#define ${fileguard}
+
+EoHEADER
+
+	grep -E "^[[:digit:]]+[[:space:]]+${abis}" $SYSCALL_TBL	\
+		|sort -k1 -n					\
+		|create_header_defines
+
+	cat <<-EoFOOTER
+
+	#endif /* ${fileguard} */
+EoFOOTER
+}
+
+__max_syscall_nr()
+{
+	local abis=$(echo "($1)" | tr ',' '|')
+
+	grep -E "^[[:digit:]]+[[:space:]]+${abis}" $SYSCALL_TBL	 \
+		|sed -ne 's/^\([[:digit:]]*\)[[:space:]].*/\1/p' \
+		|sort -n					 \
+		|tail -1
+}
+
+
+generate_syscall_nr()
+{
+	local abis="$1"
+	local max_syscall_nr num_syscalls
+
+	max_syscall_nr=$(__max_syscall_nr "$abis")
+	num_syscalls=$((max_syscall_nr + 1))
+
+	cat <<-EoHEADER
+	/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+	#ifndef __ASM_S390_SYSCALLS_NR
+	#define __ASM_S390_SYSCALLS_NR
+
+	#define NR_syscalls ${num_syscalls}
+
+	#endif /* __ASM_S390_SYSCALLS_NR */
+EoHEADER
+}
+
+
+#
+# Parse command line arguments
+#
+do_syscall_header=""
+do_syscall_table=""
+do_syscall_nr=""
+output_file=""
+abi_list="common,64"
+filename=""
+while getopts ":HNSXi:a:f:" arg; do
+	case $arg in
+	a)
+		abi_list="$OPTARG"
+		;;
+	i)
+		SYSCALL_TBL="$OPTARG"
+		;;
+	f)
+		filename=${OPTARG##*/}
+		;;
+	H)
+		do_syscall_header=1
+		;;
+	N)
+		do_syscall_nr=1
+		;;
+	S)
+		do_syscall_table=1
+		;;
+	X)
+		set -x
+		;;
+	:)
+		echo "Missing argument for -$OPTARG" >&2
+		exit 1
+	;;
+	\?)
+		echo "Invalid option specified" >&2
+		exit 1
+	;;
+	esac
+done
+
+test "$do_syscall_header" && generate_syscall_header "$abi_list" "$filename"
+test "$do_syscall_table" && generate_syscall_table
+test "$do_syscall_nr" && generate_syscall_nr "$abi_list"
+
+exit 0
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
new file mode 100644
index 0000000000..b5e364358c
--- /dev/null
+++ b/arch/s390/kernel/sysinfo.c
@@ -0,0 +1,570 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Copyright IBM Corp. 2001, 2009
+ *  Author(s): Ulrich Weigand <Ulrich.Weigand@de.ibm.com>,
+ *	       Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ */
+
+#include <linux/debugfs.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <asm/asm-extable.h>
+#include <asm/ebcdic.h>
+#include <asm/debug.h>
+#include <asm/sysinfo.h>
+#include <asm/cpcmd.h>
+#include <asm/topology.h>
+#include <asm/fpu/api.h>
+
+int topology_max_mnest;
+
+static inline int __stsi(void *sysinfo, int fc, int sel1, int sel2, int *lvl)
+{
+	int r0 = (fc << 28) | sel1;
+	int rc = 0;
+
+	asm volatile(
+		"	lr	0,%[r0]\n"
+		"	lr	1,%[r1]\n"
+		"	stsi	0(%[sysinfo])\n"
+		"0:	jz	2f\n"
+		"1:	lhi	%[rc],%[retval]\n"
+		"2:	lr	%[r0],0\n"
+		EX_TABLE(0b, 1b)
+		: [r0] "+d" (r0), [rc] "+d" (rc)
+		: [r1] "d" (sel2),
+		  [sysinfo] "a" (sysinfo),
+		  [retval] "K" (-EOPNOTSUPP)
+		: "cc", "0", "1", "memory");
+	*lvl = ((unsigned int) r0) >> 28;
+	return rc;
+}
+
+/*
+ * stsi - store system information
+ *
+ * Returns the current configuration level if function code 0 was specified.
+ * Otherwise returns 0 on success or a negative value on error.
+ */
+int stsi(void *sysinfo, int fc, int sel1, int sel2)
+{
+	int lvl, rc;
+
+	rc = __stsi(sysinfo, fc, sel1, sel2, &lvl);
+	if (rc)
+		return rc;
+	return fc ? 0 : lvl;
+}
+EXPORT_SYMBOL(stsi);
+
+#ifdef CONFIG_PROC_FS
+
+static bool convert_ext_name(unsigned char encoding, char *name, size_t len)
+{
+	switch (encoding) {
+	case 1: /* EBCDIC */
+		EBCASC(name, len);
+		break;
+	case 2:	/* UTF-8 */
+		break;
+	default:
+		return false;
+	}
+	return true;
+}
+
+static void stsi_1_1_1(struct seq_file *m, struct sysinfo_1_1_1 *info)
+{
+	int i;
+
+	if (stsi(info, 1, 1, 1))
+		return;
+	EBCASC(info->manufacturer, sizeof(info->manufacturer));
+	EBCASC(info->type, sizeof(info->type));
+	EBCASC(info->model, sizeof(info->model));
+	EBCASC(info->sequence, sizeof(info->sequence));
+	EBCASC(info->plant, sizeof(info->plant));
+	EBCASC(info->model_capacity, sizeof(info->model_capacity));
+	EBCASC(info->model_perm_cap, sizeof(info->model_perm_cap));
+	EBCASC(info->model_temp_cap, sizeof(info->model_temp_cap));
+	seq_printf(m, "Manufacturer:         %-16.16s\n", info->manufacturer);
+	seq_printf(m, "Type:                 %-4.4s\n", info->type);
+	if (info->lic)
+		seq_printf(m, "LIC Identifier:       %016lx\n", info->lic);
+	/*
+	 * Sigh: the model field has been renamed with System z9
+	 * to model_capacity and a new model field has been added
+	 * after the plant field. To avoid confusing older programs
+	 * the "Model:" prints "model_capacity model" or just
+	 * "model_capacity" if the model string is empty .
+	 */
+	seq_printf(m, "Model:                %-16.16s", info->model_capacity);
+	if (info->model[0] != '\0')
+		seq_printf(m, " %-16.16s", info->model);
+	seq_putc(m, '\n');
+	seq_printf(m, "Sequence Code:        %-16.16s\n", info->sequence);
+	seq_printf(m, "Plant:                %-4.4s\n", info->plant);
+	seq_printf(m, "Model Capacity:       %-16.16s %08u\n",
+		   info->model_capacity, info->model_cap_rating);
+	if (info->model_perm_cap_rating)
+		seq_printf(m, "Model Perm. Capacity: %-16.16s %08u\n",
+			   info->model_perm_cap,
+			   info->model_perm_cap_rating);
+	if (info->model_temp_cap_rating)
+		seq_printf(m, "Model Temp. Capacity: %-16.16s %08u\n",
+			   info->model_temp_cap,
+			   info->model_temp_cap_rating);
+	if (info->ncr)
+		seq_printf(m, "Nominal Cap. Rating:  %08u\n", info->ncr);
+	if (info->npr)
+		seq_printf(m, "Nominal Perm. Rating: %08u\n", info->npr);
+	if (info->ntr)
+		seq_printf(m, "Nominal Temp. Rating: %08u\n", info->ntr);
+	if (info->cai) {
+		seq_printf(m, "Capacity Adj. Ind.:   %d\n", info->cai);
+		seq_printf(m, "Capacity Ch. Reason:  %d\n", info->ccr);
+		seq_printf(m, "Capacity Transient:   %d\n", info->t);
+	}
+	if (info->p) {
+		for (i = 1; i <= ARRAY_SIZE(info->typepct); i++) {
+			seq_printf(m, "Type %d Percentage:    %d\n",
+				   i, info->typepct[i - 1]);
+		}
+	}
+}
+
+static void stsi_15_1_x(struct seq_file *m, struct sysinfo_15_1_x *info)
+{
+	int i;
+
+	seq_putc(m, '\n');
+	if (!MACHINE_HAS_TOPOLOGY)
+		return;
+	if (stsi(info, 15, 1, topology_max_mnest))
+		return;
+	seq_printf(m, "CPU Topology HW:     ");
+	for (i = 0; i < TOPOLOGY_NR_MAG; i++)
+		seq_printf(m, " %d", info->mag[i]);
+	seq_putc(m, '\n');
+#ifdef CONFIG_SCHED_TOPOLOGY
+	store_topology(info);
+	seq_printf(m, "CPU Topology SW:     ");
+	for (i = 0; i < TOPOLOGY_NR_MAG; i++)
+		seq_printf(m, " %d", info->mag[i]);
+	seq_putc(m, '\n');
+#endif
+}
+
+static void stsi_1_2_2(struct seq_file *m, struct sysinfo_1_2_2 *info)
+{
+	struct sysinfo_1_2_2_extension *ext;
+	int i;
+
+	if (stsi(info, 1, 2, 2))
+		return;
+	ext = (struct sysinfo_1_2_2_extension *)
+		((unsigned long) info + info->acc_offset);
+	seq_printf(m, "CPUs Total:           %d\n", info->cpus_total);
+	seq_printf(m, "CPUs Configured:      %d\n", info->cpus_configured);
+	seq_printf(m, "CPUs Standby:         %d\n", info->cpus_standby);
+	seq_printf(m, "CPUs Reserved:        %d\n", info->cpus_reserved);
+	if (info->mt_installed) {
+		seq_printf(m, "CPUs G-MTID:          %d\n", info->mt_gtid);
+		seq_printf(m, "CPUs S-MTID:          %d\n", info->mt_stid);
+	}
+	/*
+	 * Sigh 2. According to the specification the alternate
+	 * capability field is a 32 bit floating point number
+	 * if the higher order 8 bits are not zero. Printing
+	 * a floating point number in the kernel is a no-no,
+	 * always print the number as 32 bit unsigned integer.
+	 * The user-space needs to know about the strange
+	 * encoding of the alternate cpu capability.
+	 */
+	seq_printf(m, "Capability:           %u", info->capability);
+	if (info->format == 1)
+		seq_printf(m, " %u", ext->alt_capability);
+	seq_putc(m, '\n');
+	if (info->nominal_cap)
+		seq_printf(m, "Nominal Capability:   %d\n", info->nominal_cap);
+	if (info->secondary_cap)
+		seq_printf(m, "Secondary Capability: %d\n", info->secondary_cap);
+	for (i = 2; i <= info->cpus_total; i++) {
+		seq_printf(m, "Adjustment %02d-way:    %u",
+			   i, info->adjustment[i-2]);
+		if (info->format == 1)
+			seq_printf(m, " %u", ext->alt_adjustment[i-2]);
+		seq_putc(m, '\n');
+	}
+}
+
+static void stsi_2_2_2(struct seq_file *m, struct sysinfo_2_2_2 *info)
+{
+	if (stsi(info, 2, 2, 2))
+		return;
+	EBCASC(info->name, sizeof(info->name));
+	seq_putc(m, '\n');
+	seq_printf(m, "LPAR Number:          %d\n", info->lpar_number);
+	seq_printf(m, "LPAR Characteristics: ");
+	if (info->characteristics & LPAR_CHAR_DEDICATED)
+		seq_printf(m, "Dedicated ");
+	if (info->characteristics & LPAR_CHAR_SHARED)
+		seq_printf(m, "Shared ");
+	if (info->characteristics & LPAR_CHAR_LIMITED)
+		seq_printf(m, "Limited ");
+	seq_putc(m, '\n');
+	seq_printf(m, "LPAR Name:            %-8.8s\n", info->name);
+	seq_printf(m, "LPAR Adjustment:      %d\n", info->caf);
+	seq_printf(m, "LPAR CPUs Total:      %d\n", info->cpus_total);
+	seq_printf(m, "LPAR CPUs Configured: %d\n", info->cpus_configured);
+	seq_printf(m, "LPAR CPUs Standby:    %d\n", info->cpus_standby);
+	seq_printf(m, "LPAR CPUs Reserved:   %d\n", info->cpus_reserved);
+	seq_printf(m, "LPAR CPUs Dedicated:  %d\n", info->cpus_dedicated);
+	seq_printf(m, "LPAR CPUs Shared:     %d\n", info->cpus_shared);
+	if (info->mt_installed) {
+		seq_printf(m, "LPAR CPUs G-MTID:     %d\n", info->mt_gtid);
+		seq_printf(m, "LPAR CPUs S-MTID:     %d\n", info->mt_stid);
+		seq_printf(m, "LPAR CPUs PS-MTID:    %d\n", info->mt_psmtid);
+	}
+	if (convert_ext_name(info->vsne, info->ext_name, sizeof(info->ext_name))) {
+		seq_printf(m, "LPAR Extended Name:   %-.256s\n", info->ext_name);
+		seq_printf(m, "LPAR UUID:            %pUb\n", &info->uuid);
+	}
+}
+
+static void print_ext_name(struct seq_file *m, int lvl,
+			   struct sysinfo_3_2_2 *info)
+{
+	size_t len = sizeof(info->ext_names[lvl]);
+
+	if (!convert_ext_name(info->vm[lvl].evmne, info->ext_names[lvl], len))
+		return;
+	seq_printf(m, "VM%02d Extended Name:   %-.256s\n", lvl,
+		   info->ext_names[lvl]);
+}
+
+static void print_uuid(struct seq_file *m, int i, struct sysinfo_3_2_2 *info)
+{
+	if (uuid_is_null(&info->vm[i].uuid))
+		return;
+	seq_printf(m, "VM%02d UUID:            %pUb\n", i, &info->vm[i].uuid);
+}
+
+static void stsi_3_2_2(struct seq_file *m, struct sysinfo_3_2_2 *info)
+{
+	int i;
+
+	if (stsi(info, 3, 2, 2))
+		return;
+	for (i = 0; i < info->count; i++) {
+		EBCASC(info->vm[i].name, sizeof(info->vm[i].name));
+		EBCASC(info->vm[i].cpi, sizeof(info->vm[i].cpi));
+		seq_putc(m, '\n');
+		seq_printf(m, "VM%02d Name:            %-8.8s\n", i, info->vm[i].name);
+		seq_printf(m, "VM%02d Control Program: %-16.16s\n", i, info->vm[i].cpi);
+		seq_printf(m, "VM%02d Adjustment:      %d\n", i, info->vm[i].caf);
+		seq_printf(m, "VM%02d CPUs Total:      %d\n", i, info->vm[i].cpus_total);
+		seq_printf(m, "VM%02d CPUs Configured: %d\n", i, info->vm[i].cpus_configured);
+		seq_printf(m, "VM%02d CPUs Standby:    %d\n", i, info->vm[i].cpus_standby);
+		seq_printf(m, "VM%02d CPUs Reserved:   %d\n", i, info->vm[i].cpus_reserved);
+		print_ext_name(m, i, info);
+		print_uuid(m, i, info);
+	}
+}
+
+static int sysinfo_show(struct seq_file *m, void *v)
+{
+	void *info = (void *)get_zeroed_page(GFP_KERNEL);
+	int level;
+
+	if (!info)
+		return 0;
+	level = stsi(NULL, 0, 0, 0);
+	if (level >= 1)
+		stsi_1_1_1(m, info);
+	if (level >= 1)
+		stsi_15_1_x(m, info);
+	if (level >= 1)
+		stsi_1_2_2(m, info);
+	if (level >= 2)
+		stsi_2_2_2(m, info);
+	if (level >= 3)
+		stsi_3_2_2(m, info);
+	free_page((unsigned long)info);
+	return 0;
+}
+
+static int __init sysinfo_create_proc(void)
+{
+	proc_create_single("sysinfo", 0444, NULL, sysinfo_show);
+	return 0;
+}
+device_initcall(sysinfo_create_proc);
+
+#endif /* CONFIG_PROC_FS */
+
+/*
+ * Service levels interface.
+ */
+
+static DECLARE_RWSEM(service_level_sem);
+static LIST_HEAD(service_level_list);
+
+int register_service_level(struct service_level *slr)
+{
+	struct service_level *ptr;
+
+	down_write(&service_level_sem);
+	list_for_each_entry(ptr, &service_level_list, list)
+		if (ptr == slr) {
+			up_write(&service_level_sem);
+			return -EEXIST;
+		}
+	list_add_tail(&slr->list, &service_level_list);
+	up_write(&service_level_sem);
+	return 0;
+}
+EXPORT_SYMBOL(register_service_level);
+
+int unregister_service_level(struct service_level *slr)
+{
+	struct service_level *ptr, *next;
+	int rc = -ENOENT;
+
+	down_write(&service_level_sem);
+	list_for_each_entry_safe(ptr, next, &service_level_list, list) {
+		if (ptr != slr)
+			continue;
+		list_del(&ptr->list);
+		rc = 0;
+		break;
+	}
+	up_write(&service_level_sem);
+	return rc;
+}
+EXPORT_SYMBOL(unregister_service_level);
+
+static void *service_level_start(struct seq_file *m, loff_t *pos)
+{
+	down_read(&service_level_sem);
+	return seq_list_start(&service_level_list, *pos);
+}
+
+static void *service_level_next(struct seq_file *m, void *p, loff_t *pos)
+{
+	return seq_list_next(p, &service_level_list, pos);
+}
+
+static void service_level_stop(struct seq_file *m, void *p)
+{
+	up_read(&service_level_sem);
+}
+
+static int service_level_show(struct seq_file *m, void *p)
+{
+	struct service_level *slr;
+
+	slr = list_entry(p, struct service_level, list);
+	slr->seq_print(m, slr);
+	return 0;
+}
+
+static const struct seq_operations service_level_seq_ops = {
+	.start		= service_level_start,
+	.next		= service_level_next,
+	.stop		= service_level_stop,
+	.show		= service_level_show
+};
+
+static void service_level_vm_print(struct seq_file *m,
+				   struct service_level *slr)
+{
+	char *query_buffer, *str;
+
+	query_buffer = kmalloc(1024, GFP_KERNEL | GFP_DMA);
+	if (!query_buffer)
+		return;
+	cpcmd("QUERY CPLEVEL", query_buffer, 1024, NULL);
+	str = strchr(query_buffer, '\n');
+	if (str)
+		*str = 0;
+	seq_printf(m, "VM: %s\n", query_buffer);
+	kfree(query_buffer);
+}
+
+static struct service_level service_level_vm = {
+	.seq_print = service_level_vm_print
+};
+
+static __init int create_proc_service_level(void)
+{
+	proc_create_seq("service_levels", 0, NULL, &service_level_seq_ops);
+	if (MACHINE_IS_VM)
+		register_service_level(&service_level_vm);
+	return 0;
+}
+subsys_initcall(create_proc_service_level);
+
+/*
+ * CPU capability might have changed. Therefore recalculate loops_per_jiffy.
+ */
+void s390_adjust_jiffies(void)
+{
+	struct sysinfo_1_2_2 *info;
+	unsigned long capability;
+	struct kernel_fpu fpu;
+
+	info = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!info)
+		return;
+
+	if (stsi(info, 1, 2, 2) == 0) {
+		/*
+		 * Major sigh. The cpu capability encoding is "special".
+		 * If the first 9 bits of info->capability are 0 then it
+		 * is a 32 bit unsigned integer in the range 0 .. 2^23.
+		 * If the first 9 bits are != 0 then it is a 32 bit float.
+		 * In addition a lower value indicates a proportionally
+		 * higher cpu capacity. Bogomips are the other way round.
+		 * To get to a halfway suitable number we divide 1e7
+		 * by the cpu capability number. Yes, that means a floating
+		 * point division ..
+		 */
+		kernel_fpu_begin(&fpu, KERNEL_FPR);
+		asm volatile(
+			"	sfpc	%3\n"
+			"	l	%0,%1\n"
+			"	tmlh	%0,0xff80\n"
+			"	jnz	0f\n"
+			"	cefbr	%%f2,%0\n"
+			"	j	1f\n"
+			"0:	le	%%f2,%1\n"
+			"1:	cefbr	%%f0,%2\n"
+			"	debr	%%f0,%%f2\n"
+			"	cgebr	%0,5,%%f0\n"
+			: "=&d" (capability)
+			: "Q" (info->capability), "d" (10000000), "d" (0)
+			: "cc"
+			);
+		kernel_fpu_end(&fpu, KERNEL_FPR);
+	} else
+		/*
+		 * Really old machine without stsi block for basic
+		 * cpu information. Report 42.0 bogomips.
+		 */
+		capability = 42;
+	loops_per_jiffy = capability * (500000/HZ);
+	free_page((unsigned long) info);
+}
+
+/*
+ * calibrate the delay loop
+ */
+void calibrate_delay(void)
+{
+	s390_adjust_jiffies();
+	/* Print the good old Bogomips line .. */
+	printk(KERN_DEBUG "Calibrating delay loop (skipped)... "
+	       "%lu.%02lu BogoMIPS preset\n", loops_per_jiffy/(500000/HZ),
+	       (loops_per_jiffy/(5000/HZ)) % 100);
+}
+
+#ifdef CONFIG_DEBUG_FS
+
+#define STSI_FILE(fc, s1, s2)						       \
+static int stsi_open_##fc##_##s1##_##s2(struct inode *inode, struct file *file)\
+{									       \
+	file->private_data = (void *) get_zeroed_page(GFP_KERNEL);	       \
+	if (!file->private_data)					       \
+		return -ENOMEM;						       \
+	if (stsi(file->private_data, fc, s1, s2)) {			       \
+		free_page((unsigned long)file->private_data);		       \
+		file->private_data = NULL;				       \
+		return -EACCES;						       \
+	}								       \
+	return nonseekable_open(inode, file);				       \
+}									       \
+									       \
+static const struct file_operations stsi_##fc##_##s1##_##s2##_fs_ops = {       \
+	.open		= stsi_open_##fc##_##s1##_##s2,			       \
+	.release	= stsi_release,					       \
+	.read		= stsi_read,					       \
+	.llseek		= no_llseek,					       \
+};
+
+static int stsi_release(struct inode *inode, struct file *file)
+{
+	free_page((unsigned long)file->private_data);
+	return 0;
+}
+
+static ssize_t stsi_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
+{
+	return simple_read_from_buffer(buf, size, ppos, file->private_data, PAGE_SIZE);
+}
+
+STSI_FILE( 1, 1, 1);
+STSI_FILE( 1, 2, 1);
+STSI_FILE( 1, 2, 2);
+STSI_FILE( 2, 2, 1);
+STSI_FILE( 2, 2, 2);
+STSI_FILE( 3, 2, 2);
+STSI_FILE(15, 1, 2);
+STSI_FILE(15, 1, 3);
+STSI_FILE(15, 1, 4);
+STSI_FILE(15, 1, 5);
+STSI_FILE(15, 1, 6);
+
+struct stsi_file {
+	const struct file_operations *fops;
+	char *name;
+};
+
+static struct stsi_file stsi_file[] __initdata = {
+	{.fops = &stsi_1_1_1_fs_ops,  .name =  "1_1_1"},
+	{.fops = &stsi_1_2_1_fs_ops,  .name =  "1_2_1"},
+	{.fops = &stsi_1_2_2_fs_ops,  .name =  "1_2_2"},
+	{.fops = &stsi_2_2_1_fs_ops,  .name =  "2_2_1"},
+	{.fops = &stsi_2_2_2_fs_ops,  .name =  "2_2_2"},
+	{.fops = &stsi_3_2_2_fs_ops,  .name =  "3_2_2"},
+	{.fops = &stsi_15_1_2_fs_ops, .name = "15_1_2"},
+	{.fops = &stsi_15_1_3_fs_ops, .name = "15_1_3"},
+	{.fops = &stsi_15_1_4_fs_ops, .name = "15_1_4"},
+	{.fops = &stsi_15_1_5_fs_ops, .name = "15_1_5"},
+	{.fops = &stsi_15_1_6_fs_ops, .name = "15_1_6"},
+};
+
+static u8 stsi_0_0_0;
+
+static __init int stsi_init_debugfs(void)
+{
+	struct dentry *stsi_root;
+	struct stsi_file *sf;
+	int lvl, i;
+
+	stsi_root = debugfs_create_dir("stsi", arch_debugfs_dir);
+	lvl = stsi(NULL, 0, 0, 0);
+	if (lvl > 0)
+		stsi_0_0_0 = lvl;
+	debugfs_create_u8("0_0_0", 0400, stsi_root, &stsi_0_0_0);
+	for (i = 0; i < ARRAY_SIZE(stsi_file); i++) {
+		sf = &stsi_file[i];
+		debugfs_create_file(sf->name, 0400, stsi_root, NULL, sf->fops);
+	}
+	if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY) && MACHINE_HAS_TOPOLOGY) {
+		char link_to[10];
+
+		sprintf(link_to, "15_1_%d", topology_mnest_limit());
+		debugfs_create_symlink("topology", stsi_root, link_to);
+	}
+	return 0;
+}
+device_initcall(stsi_init_debugfs);
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/arch/s390/kernel/text_amode31.S b/arch/s390/kernel/text_amode31.S
new file mode 100644
index 0000000000..14c6d25c03
--- /dev/null
+++ b/arch/s390/kernel/text_amode31.S
@@ -0,0 +1,159 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Code that needs to run below 2 GB.
+ *
+ * Copyright IBM Corp. 2019
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm-extable.h>
+#include <asm/errno.h>
+#include <asm/sigp.h>
+
+	.section .amode31.text,"ax"
+/*
+ * Simplified version of expoline thunk. The normal thunks can not be used here,
+ * because they might be more than 2 GB away, and not reachable by the relative
+ * branch. No comdat, exrl, etc. optimizations used here, because it only
+ * affects a few functions that are not performance-relevant.
+ */
+	.macro BR_EX_AMODE31_r14
+	larl	%r1,0f
+	ex	0,0(%r1)
+	j	.
+0:	br	%r14
+	.endm
+
+/*
+ * int _diag14_amode31(unsigned long rx, unsigned long ry1, unsigned long subcode)
+ */
+SYM_FUNC_START(_diag14_amode31)
+	lgr	%r1,%r2
+	lgr	%r2,%r3
+	lgr	%r3,%r4
+	lhi	%r5,-EIO
+	sam31
+	diag	%r1,%r2,0x14
+.Ldiag14_ex:
+	ipm	%r5
+	srl	%r5,28
+.Ldiag14_fault:
+	sam64
+	lgfr	%r2,%r5
+	BR_EX_AMODE31_r14
+	EX_TABLE_AMODE31(.Ldiag14_ex, .Ldiag14_fault)
+SYM_FUNC_END(_diag14_amode31)
+
+/*
+ * int _diag210_amode31(struct diag210 *addr)
+ */
+SYM_FUNC_START(_diag210_amode31)
+	lgr	%r1,%r2
+	lhi	%r2,-1
+	sam31
+	diag	%r1,%r0,0x210
+.Ldiag210_ex:
+	ipm	%r2
+	srl	%r2,28
+.Ldiag210_fault:
+	sam64
+	lgfr	%r2,%r2
+	BR_EX_AMODE31_r14
+	EX_TABLE_AMODE31(.Ldiag210_ex, .Ldiag210_fault)
+SYM_FUNC_END(_diag210_amode31)
+
+/*
+ * int diag8c(struct diag8c *addr, struct ccw_dev_id *devno, size_t len)
+*/
+SYM_FUNC_START(_diag8c_amode31)
+	llgf	%r3,0(%r3)
+	sam31
+	diag	%r2,%r4,0x8c
+.Ldiag8c_ex:
+	sam64
+	lgfr	%r2,%r3
+	BR_EX_AMODE31_r14
+	EX_TABLE_AMODE31(.Ldiag8c_ex, .Ldiag8c_ex)
+SYM_FUNC_END(_diag8c_amode31)
+/*
+ * int _diag26c_amode31(void *req, void *resp, enum diag26c_sc subcode)
+ */
+SYM_FUNC_START(_diag26c_amode31)
+	lghi	%r5,-EOPNOTSUPP
+	sam31
+	diag	%r2,%r4,0x26c
+.Ldiag26c_ex:
+	sam64
+	lgfr	%r2,%r5
+	BR_EX_AMODE31_r14
+	EX_TABLE_AMODE31(.Ldiag26c_ex, .Ldiag26c_ex)
+SYM_FUNC_END(_diag26c_amode31)
+
+/*
+ * void _diag0c_amode31(struct hypfs_diag0c_entry *entry)
+ */
+SYM_FUNC_START(_diag0c_amode31)
+	sam31
+	diag	%r2,%r2,0x0c
+	sam64
+	BR_EX_AMODE31_r14
+SYM_FUNC_END(_diag0c_amode31)
+
+/*
+ * void _diag308_reset_amode31(void)
+ *
+ * Calls diag 308 subcode 1 and continues execution
+ */
+SYM_FUNC_START(_diag308_reset_amode31)
+	larl	%r4,ctlregs		# Save control registers
+	stctg	%c0,%c15,0(%r4)
+	lg	%r2,0(%r4)		# Disable lowcore protection
+	nilh	%r2,0xefff
+	larl	%r4,ctlreg0
+	stg	%r2,0(%r4)
+	lctlg	%c0,%c0,0(%r4)
+	larl	%r4,fpctl		# Floating point control register
+	stfpc	0(%r4)
+	larl	%r4,prefix		# Save prefix register
+	stpx	0(%r4)
+	larl	%r4,prefix_zero	# Set prefix register to 0
+	spx	0(%r4)
+	larl	%r4,continue_psw	# Save PSW flags
+	epsw	%r2,%r3
+	stm	%r2,%r3,0(%r4)
+	larl	%r4,.Lrestart_part2	# Setup restart PSW at absolute 0
+	larl	%r3,restart_diag308_psw
+	og	%r4,0(%r3)		# Save PSW
+	lghi	%r3,0
+	sturg	%r4,%r3			# Use sturg, because of large pages
+	lghi	%r1,1
+	lghi	%r0,0
+	diag	%r0,%r1,0x308
+.Lrestart_part2:
+	lhi	%r0,0			# Load r0 with zero
+	lhi	%r1,2			# Use mode 2 = ESAME (dump)
+	sigp	%r1,%r0,SIGP_SET_ARCHITECTURE	# Switch to ESAME mode
+	sam64				# Switch to 64 bit addressing mode
+	larl	%r4,ctlregs		# Restore control registers
+	lctlg	%c0,%c15,0(%r4)
+	larl	%r4,fpctl		# Restore floating point ctl register
+	lfpc	0(%r4)
+	larl	%r4,prefix		# Restore prefix register
+	spx	0(%r4)
+	larl	%r4,continue_psw	# Restore PSW flags
+	larl	%r2,.Lcontinue
+	stg	%r2,8(%r4)
+	lpswe	0(%r4)
+.Lcontinue:
+	BR_EX_AMODE31_r14
+SYM_FUNC_END(_diag308_reset_amode31)
+
+	.section .amode31.data,"aw",@progbits
+	.balign	8
+SYM_DATA_LOCAL(restart_diag308_psw,	.long 0x00080000,0x80000000)
+SYM_DATA_LOCAL(continue_psw,		.quad 0,0)
+SYM_DATA_LOCAL(ctlreg0,			.quad 0)
+SYM_DATA_LOCAL(ctlregs,			.fill 16,8,0)
+SYM_DATA_LOCAL(fpctl,			.long 0)
+SYM_DATA_LOCAL(prefix,			.long 0)
+SYM_DATA_LOCAL(prefix_zero,		.long 0)
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
new file mode 100644
index 0000000000..d34d3548c0
--- /dev/null
+++ b/arch/s390/kernel/time.c
@@ -0,0 +1,944 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    Time of day based timer functions.
+ *
+ *  S390 version
+ *    Copyright IBM Corp. 1999, 2008
+ *    Author(s): Hartmut Penner (hp@de.ibm.com),
+ *               Martin Schwidefsky (schwidefsky@de.ibm.com),
+ *               Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
+ *
+ *  Derived from "arch/i386/kernel/time.c"
+ *    Copyright (C) 1991, 1992, 1995  Linus Torvalds
+ */
+
+#define KMSG_COMPONENT "time"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel_stat.h>
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/sched/clock.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/cpu.h>
+#include <linux/stop_machine.h>
+#include <linux/time.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/types.h>
+#include <linux/profile.h>
+#include <linux/timex.h>
+#include <linux/notifier.h>
+#include <linux/timekeeper_internal.h>
+#include <linux/clockchips.h>
+#include <linux/gfp.h>
+#include <linux/kprobes.h>
+#include <linux/uaccess.h>
+#include <vdso/vsyscall.h>
+#include <vdso/clocksource.h>
+#include <vdso/helpers.h>
+#include <asm/facility.h>
+#include <asm/delay.h>
+#include <asm/div64.h>
+#include <asm/vdso.h>
+#include <asm/irq.h>
+#include <asm/irq_regs.h>
+#include <asm/vtimer.h>
+#include <asm/stp.h>
+#include <asm/cio.h>
+#include "entry.h"
+
+union tod_clock tod_clock_base __section(".data");
+EXPORT_SYMBOL_GPL(tod_clock_base);
+
+u64 clock_comparator_max = -1ULL;
+EXPORT_SYMBOL_GPL(clock_comparator_max);
+
+static DEFINE_PER_CPU(struct clock_event_device, comparators);
+
+ATOMIC_NOTIFIER_HEAD(s390_epoch_delta_notifier);
+EXPORT_SYMBOL(s390_epoch_delta_notifier);
+
+unsigned char ptff_function_mask[16];
+
+static unsigned long lpar_offset;
+static unsigned long initial_leap_seconds;
+static unsigned long tod_steering_end;
+static long tod_steering_delta;
+
+/*
+ * Get time offsets with PTFF
+ */
+void __init time_early_init(void)
+{
+	struct ptff_qto qto;
+	struct ptff_qui qui;
+	int cs;
+
+	/* Initialize TOD steering parameters */
+	tod_steering_end = tod_clock_base.tod;
+	for (cs = 0; cs < CS_BASES; cs++)
+		vdso_data[cs].arch_data.tod_steering_end = tod_steering_end;
+
+	if (!test_facility(28))
+		return;
+
+	ptff(&ptff_function_mask, sizeof(ptff_function_mask), PTFF_QAF);
+
+	/* get LPAR offset */
+	if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0)
+		lpar_offset = qto.tod_epoch_difference;
+
+	/* get initial leap seconds */
+	if (ptff_query(PTFF_QUI) && ptff(&qui, sizeof(qui), PTFF_QUI) == 0)
+		initial_leap_seconds = (unsigned long)
+			((long) qui.old_leap * 4096000000L);
+}
+
+unsigned long long noinstr sched_clock_noinstr(void)
+{
+	return tod_to_ns(__get_tod_clock_monotonic());
+}
+
+/*
+ * Scheduler clock - returns current time in nanosec units.
+ */
+unsigned long long notrace sched_clock(void)
+{
+	return tod_to_ns(get_tod_clock_monotonic());
+}
+NOKPROBE_SYMBOL(sched_clock);
+
+static void ext_to_timespec64(union tod_clock *clk, struct timespec64 *xt)
+{
+	unsigned long rem, sec, nsec;
+
+	sec = clk->us;
+	rem = do_div(sec, 1000000);
+	nsec = ((clk->sus + (rem << 12)) * 125) >> 9;
+	xt->tv_sec = sec;
+	xt->tv_nsec = nsec;
+}
+
+void clock_comparator_work(void)
+{
+	struct clock_event_device *cd;
+
+	S390_lowcore.clock_comparator = clock_comparator_max;
+	cd = this_cpu_ptr(&comparators);
+	cd->event_handler(cd);
+}
+
+static int s390_next_event(unsigned long delta,
+			   struct clock_event_device *evt)
+{
+	S390_lowcore.clock_comparator = get_tod_clock() + delta;
+	set_clock_comparator(S390_lowcore.clock_comparator);
+	return 0;
+}
+
+/*
+ * Set up lowcore and control register of the current cpu to
+ * enable TOD clock and clock comparator interrupts.
+ */
+void init_cpu_timer(void)
+{
+	struct clock_event_device *cd;
+	int cpu;
+
+	S390_lowcore.clock_comparator = clock_comparator_max;
+	set_clock_comparator(S390_lowcore.clock_comparator);
+
+	cpu = smp_processor_id();
+	cd = &per_cpu(comparators, cpu);
+	cd->name		= "comparator";
+	cd->features		= CLOCK_EVT_FEAT_ONESHOT;
+	cd->mult		= 16777;
+	cd->shift		= 12;
+	cd->min_delta_ns	= 1;
+	cd->min_delta_ticks	= 1;
+	cd->max_delta_ns	= LONG_MAX;
+	cd->max_delta_ticks	= ULONG_MAX;
+	cd->rating		= 400;
+	cd->cpumask		= cpumask_of(cpu);
+	cd->set_next_event	= s390_next_event;
+
+	clockevents_register_device(cd);
+
+	/* Enable clock comparator timer interrupt. */
+	__ctl_set_bit(0,11);
+
+	/* Always allow the timing alert external interrupt. */
+	__ctl_set_bit(0, 4);
+}
+
+static void clock_comparator_interrupt(struct ext_code ext_code,
+				       unsigned int param32,
+				       unsigned long param64)
+{
+	inc_irq_stat(IRQEXT_CLK);
+	if (S390_lowcore.clock_comparator == clock_comparator_max)
+		set_clock_comparator(S390_lowcore.clock_comparator);
+}
+
+static void stp_timing_alert(struct stp_irq_parm *);
+
+static void timing_alert_interrupt(struct ext_code ext_code,
+				   unsigned int param32, unsigned long param64)
+{
+	inc_irq_stat(IRQEXT_TLA);
+	if (param32 & 0x00038000)
+		stp_timing_alert((struct stp_irq_parm *) &param32);
+}
+
+static void stp_reset(void);
+
+void read_persistent_clock64(struct timespec64 *ts)
+{
+	union tod_clock clk;
+	u64 delta;
+
+	delta = initial_leap_seconds + TOD_UNIX_EPOCH;
+	store_tod_clock_ext(&clk);
+	clk.eitod -= delta;
+	ext_to_timespec64(&clk, ts);
+}
+
+void __init read_persistent_wall_and_boot_offset(struct timespec64 *wall_time,
+						 struct timespec64 *boot_offset)
+{
+	struct timespec64 boot_time;
+	union tod_clock clk;
+	u64 delta;
+
+	delta = initial_leap_seconds + TOD_UNIX_EPOCH;
+	clk = tod_clock_base;
+	clk.eitod -= delta;
+	ext_to_timespec64(&clk, &boot_time);
+
+	read_persistent_clock64(wall_time);
+	*boot_offset = timespec64_sub(*wall_time, boot_time);
+}
+
+static u64 read_tod_clock(struct clocksource *cs)
+{
+	unsigned long now, adj;
+
+	preempt_disable(); /* protect from changes to steering parameters */
+	now = get_tod_clock();
+	adj = tod_steering_end - now;
+	if (unlikely((s64) adj > 0))
+		/*
+		 * manually steer by 1 cycle every 2^16 cycles. This
+		 * corresponds to shifting the tod delta by 15. 1s is
+		 * therefore steered in ~9h. The adjust will decrease
+		 * over time, until it finally reaches 0.
+		 */
+		now += (tod_steering_delta < 0) ? (adj >> 15) : -(adj >> 15);
+	preempt_enable();
+	return now;
+}
+
+static struct clocksource clocksource_tod = {
+	.name		= "tod",
+	.rating		= 400,
+	.read		= read_tod_clock,
+	.mask		= CLOCKSOURCE_MASK(64),
+	.mult		= 1000,
+	.shift		= 12,
+	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
+	.vdso_clock_mode = VDSO_CLOCKMODE_TOD,
+};
+
+struct clocksource * __init clocksource_default_clock(void)
+{
+	return &clocksource_tod;
+}
+
+/*
+ * Initialize the TOD clock and the CPU timer of
+ * the boot cpu.
+ */
+void __init time_init(void)
+{
+	/* Reset time synchronization interfaces. */
+	stp_reset();
+
+	/* request the clock comparator external interrupt */
+	if (register_external_irq(EXT_IRQ_CLK_COMP, clock_comparator_interrupt))
+		panic("Couldn't request external interrupt 0x1004");
+
+	/* request the timing alert external interrupt */
+	if (register_external_irq(EXT_IRQ_TIMING_ALERT, timing_alert_interrupt))
+		panic("Couldn't request external interrupt 0x1406");
+
+	if (__clocksource_register(&clocksource_tod) != 0)
+		panic("Could not register TOD clock source");
+
+	/* Enable TOD clock interrupts on the boot cpu. */
+	init_cpu_timer();
+
+	/* Enable cpu timer interrupts on the boot cpu. */
+	vtime_init();
+}
+
+static DEFINE_PER_CPU(atomic_t, clock_sync_word);
+static DEFINE_MUTEX(stp_mutex);
+static unsigned long clock_sync_flags;
+
+#define CLOCK_SYNC_HAS_STP		0
+#define CLOCK_SYNC_STP			1
+#define CLOCK_SYNC_STPINFO_VALID	2
+
+/*
+ * The get_clock function for the physical clock. It will get the current
+ * TOD clock, subtract the LPAR offset and write the result to *clock.
+ * The function returns 0 if the clock is in sync with the external time
+ * source. If the clock mode is local it will return -EOPNOTSUPP and
+ * -EAGAIN if the clock is not in sync with the external reference.
+ */
+int get_phys_clock(unsigned long *clock)
+{
+	atomic_t *sw_ptr;
+	unsigned int sw0, sw1;
+
+	sw_ptr = &get_cpu_var(clock_sync_word);
+	sw0 = atomic_read(sw_ptr);
+	*clock = get_tod_clock() - lpar_offset;
+	sw1 = atomic_read(sw_ptr);
+	put_cpu_var(clock_sync_word);
+	if (sw0 == sw1 && (sw0 & 0x80000000U))
+		/* Success: time is in sync. */
+		return 0;
+	if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
+		return -EOPNOTSUPP;
+	if (!test_bit(CLOCK_SYNC_STP, &clock_sync_flags))
+		return -EACCES;
+	return -EAGAIN;
+}
+EXPORT_SYMBOL(get_phys_clock);
+
+/*
+ * Make get_phys_clock() return -EAGAIN.
+ */
+static void disable_sync_clock(void *dummy)
+{
+	atomic_t *sw_ptr = this_cpu_ptr(&clock_sync_word);
+	/*
+	 * Clear the in-sync bit 2^31. All get_phys_clock calls will
+	 * fail until the sync bit is turned back on. In addition
+	 * increase the "sequence" counter to avoid the race of an
+	 * stp event and the complete recovery against get_phys_clock.
+	 */
+	atomic_andnot(0x80000000, sw_ptr);
+	atomic_inc(sw_ptr);
+}
+
+/*
+ * Make get_phys_clock() return 0 again.
+ * Needs to be called from a context disabled for preemption.
+ */
+static void enable_sync_clock(void)
+{
+	atomic_t *sw_ptr = this_cpu_ptr(&clock_sync_word);
+	atomic_or(0x80000000, sw_ptr);
+}
+
+/*
+ * Function to check if the clock is in sync.
+ */
+static inline int check_sync_clock(void)
+{
+	atomic_t *sw_ptr;
+	int rc;
+
+	sw_ptr = &get_cpu_var(clock_sync_word);
+	rc = (atomic_read(sw_ptr) & 0x80000000U) != 0;
+	put_cpu_var(clock_sync_word);
+	return rc;
+}
+
+/*
+ * Apply clock delta to the global data structures.
+ * This is called once on the CPU that performed the clock sync.
+ */
+static void clock_sync_global(long delta)
+{
+	unsigned long now, adj;
+	struct ptff_qto qto;
+	int cs;
+
+	/* Fixup the monotonic sched clock. */
+	tod_clock_base.eitod += delta;
+	/* Adjust TOD steering parameters. */
+	now = get_tod_clock();
+	adj = tod_steering_end - now;
+	if (unlikely((s64) adj >= 0))
+		/* Calculate how much of the old adjustment is left. */
+		tod_steering_delta = (tod_steering_delta < 0) ?
+			-(adj >> 15) : (adj >> 15);
+	tod_steering_delta += delta;
+	if ((abs(tod_steering_delta) >> 48) != 0)
+		panic("TOD clock sync offset %li is too large to drift\n",
+		      tod_steering_delta);
+	tod_steering_end = now + (abs(tod_steering_delta) << 15);
+	for (cs = 0; cs < CS_BASES; cs++) {
+		vdso_data[cs].arch_data.tod_steering_end = tod_steering_end;
+		vdso_data[cs].arch_data.tod_steering_delta = tod_steering_delta;
+	}
+
+	/* Update LPAR offset. */
+	if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0)
+		lpar_offset = qto.tod_epoch_difference;
+	/* Call the TOD clock change notifier. */
+	atomic_notifier_call_chain(&s390_epoch_delta_notifier, 0, &delta);
+}
+
+/*
+ * Apply clock delta to the per-CPU data structures of this CPU.
+ * This is called for each online CPU after the call to clock_sync_global.
+ */
+static void clock_sync_local(long delta)
+{
+	/* Add the delta to the clock comparator. */
+	if (S390_lowcore.clock_comparator != clock_comparator_max) {
+		S390_lowcore.clock_comparator += delta;
+		set_clock_comparator(S390_lowcore.clock_comparator);
+	}
+	/* Adjust the last_update_clock time-stamp. */
+	S390_lowcore.last_update_clock += delta;
+}
+
+/* Single threaded workqueue used for stp sync events */
+static struct workqueue_struct *time_sync_wq;
+
+static void __init time_init_wq(void)
+{
+	if (time_sync_wq)
+		return;
+	time_sync_wq = create_singlethread_workqueue("timesync");
+}
+
+struct clock_sync_data {
+	atomic_t cpus;
+	int in_sync;
+	long clock_delta;
+};
+
+/*
+ * Server Time Protocol (STP) code.
+ */
+static bool stp_online;
+static struct stp_sstpi stp_info;
+static void *stp_page;
+
+static void stp_work_fn(struct work_struct *work);
+static DECLARE_WORK(stp_work, stp_work_fn);
+static struct timer_list stp_timer;
+
+static int __init early_parse_stp(char *p)
+{
+	return kstrtobool(p, &stp_online);
+}
+early_param("stp", early_parse_stp);
+
+/*
+ * Reset STP attachment.
+ */
+static void __init stp_reset(void)
+{
+	int rc;
+
+	stp_page = (void *) get_zeroed_page(GFP_ATOMIC);
+	rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000, NULL);
+	if (rc == 0)
+		set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags);
+	else if (stp_online) {
+		pr_warn("The real or virtual hardware system does not provide an STP interface\n");
+		free_page((unsigned long) stp_page);
+		stp_page = NULL;
+		stp_online = false;
+	}
+}
+
+static void stp_timeout(struct timer_list *unused)
+{
+	queue_work(time_sync_wq, &stp_work);
+}
+
+static int __init stp_init(void)
+{
+	if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
+		return 0;
+	timer_setup(&stp_timer, stp_timeout, 0);
+	time_init_wq();
+	if (!stp_online)
+		return 0;
+	queue_work(time_sync_wq, &stp_work);
+	return 0;
+}
+
+arch_initcall(stp_init);
+
+/*
+ * STP timing alert. There are three causes:
+ * 1) timing status change
+ * 2) link availability change
+ * 3) time control parameter change
+ * In all three cases we are only interested in the clock source state.
+ * If a STP clock source is now available use it.
+ */
+static void stp_timing_alert(struct stp_irq_parm *intparm)
+{
+	if (intparm->tsc || intparm->lac || intparm->tcpc)
+		queue_work(time_sync_wq, &stp_work);
+}
+
+/*
+ * STP sync check machine check. This is called when the timing state
+ * changes from the synchronized state to the unsynchronized state.
+ * After a STP sync check the clock is not in sync. The machine check
+ * is broadcasted to all cpus at the same time.
+ */
+int stp_sync_check(void)
+{
+	disable_sync_clock(NULL);
+	return 1;
+}
+
+/*
+ * STP island condition machine check. This is called when an attached
+ * server  attempts to communicate over an STP link and the servers
+ * have matching CTN ids and have a valid stratum-1 configuration
+ * but the configurations do not match.
+ */
+int stp_island_check(void)
+{
+	disable_sync_clock(NULL);
+	return 1;
+}
+
+void stp_queue_work(void)
+{
+	queue_work(time_sync_wq, &stp_work);
+}
+
+static int __store_stpinfo(void)
+{
+	int rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi));
+
+	if (rc)
+		clear_bit(CLOCK_SYNC_STPINFO_VALID, &clock_sync_flags);
+	else
+		set_bit(CLOCK_SYNC_STPINFO_VALID, &clock_sync_flags);
+	return rc;
+}
+
+static int stpinfo_valid(void)
+{
+	return stp_online && test_bit(CLOCK_SYNC_STPINFO_VALID, &clock_sync_flags);
+}
+
+static int stp_sync_clock(void *data)
+{
+	struct clock_sync_data *sync = data;
+	long clock_delta, flags;
+	static int first;
+	int rc;
+
+	enable_sync_clock();
+	if (xchg(&first, 1) == 0) {
+		/* Wait until all other cpus entered the sync function. */
+		while (atomic_read(&sync->cpus) != 0)
+			cpu_relax();
+		rc = 0;
+		if (stp_info.todoff || stp_info.tmd != 2) {
+			flags = vdso_update_begin();
+			rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0,
+					&clock_delta);
+			if (rc == 0) {
+				sync->clock_delta = clock_delta;
+				clock_sync_global(clock_delta);
+				rc = __store_stpinfo();
+				if (rc == 0 && stp_info.tmd != 2)
+					rc = -EAGAIN;
+			}
+			vdso_update_end(flags);
+		}
+		sync->in_sync = rc ? -EAGAIN : 1;
+		xchg(&first, 0);
+	} else {
+		/* Slave */
+		atomic_dec(&sync->cpus);
+		/* Wait for in_sync to be set. */
+		while (READ_ONCE(sync->in_sync) == 0)
+			__udelay(1);
+	}
+	if (sync->in_sync != 1)
+		/* Didn't work. Clear per-cpu in sync bit again. */
+		disable_sync_clock(NULL);
+	/* Apply clock delta to per-CPU fields of this CPU. */
+	clock_sync_local(sync->clock_delta);
+
+	return 0;
+}
+
+static int stp_clear_leap(void)
+{
+	struct __kernel_timex txc;
+	int ret;
+
+	memset(&txc, 0, sizeof(txc));
+
+	ret = do_adjtimex(&txc);
+	if (ret < 0)
+		return ret;
+
+	txc.modes = ADJ_STATUS;
+	txc.status &= ~(STA_INS|STA_DEL);
+	return do_adjtimex(&txc);
+}
+
+static void stp_check_leap(void)
+{
+	struct stp_stzi stzi;
+	struct stp_lsoib *lsoib = &stzi.lsoib;
+	struct __kernel_timex txc;
+	int64_t timediff;
+	int leapdiff, ret;
+
+	if (!stp_info.lu || !check_sync_clock()) {
+		/*
+		 * Either a scheduled leap second was removed by the operator,
+		 * or STP is out of sync. In both cases, clear the leap second
+		 * kernel flags.
+		 */
+		if (stp_clear_leap() < 0)
+			pr_err("failed to clear leap second flags\n");
+		return;
+	}
+
+	if (chsc_stzi(stp_page, &stzi, sizeof(stzi))) {
+		pr_err("stzi failed\n");
+		return;
+	}
+
+	timediff = tod_to_ns(lsoib->nlsout - get_tod_clock()) / NSEC_PER_SEC;
+	leapdiff = lsoib->nlso - lsoib->also;
+
+	if (leapdiff != 1 && leapdiff != -1) {
+		pr_err("Cannot schedule %d leap seconds\n", leapdiff);
+		return;
+	}
+
+	if (timediff < 0) {
+		if (stp_clear_leap() < 0)
+			pr_err("failed to clear leap second flags\n");
+	} else if (timediff < 7200) {
+		memset(&txc, 0, sizeof(txc));
+		ret = do_adjtimex(&txc);
+		if (ret < 0)
+			return;
+
+		txc.modes = ADJ_STATUS;
+		if (leapdiff > 0)
+			txc.status |= STA_INS;
+		else
+			txc.status |= STA_DEL;
+		ret = do_adjtimex(&txc);
+		if (ret < 0)
+			pr_err("failed to set leap second flags\n");
+		/* arm Timer to clear leap second flags */
+		mod_timer(&stp_timer, jiffies + msecs_to_jiffies(14400 * MSEC_PER_SEC));
+	} else {
+		/* The day the leap second is scheduled for hasn't been reached. Retry
+		 * in one hour.
+		 */
+		mod_timer(&stp_timer, jiffies + msecs_to_jiffies(3600 * MSEC_PER_SEC));
+	}
+}
+
+/*
+ * STP work. Check for the STP state and take over the clock
+ * synchronization if the STP clock source is usable.
+ */
+static void stp_work_fn(struct work_struct *work)
+{
+	struct clock_sync_data stp_sync;
+	int rc;
+
+	/* prevent multiple execution. */
+	mutex_lock(&stp_mutex);
+
+	if (!stp_online) {
+		chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000, NULL);
+		del_timer_sync(&stp_timer);
+		goto out_unlock;
+	}
+
+	rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xf0e0, NULL);
+	if (rc)
+		goto out_unlock;
+
+	rc = __store_stpinfo();
+	if (rc || stp_info.c == 0)
+		goto out_unlock;
+
+	/* Skip synchronization if the clock is already in sync. */
+	if (!check_sync_clock()) {
+		memset(&stp_sync, 0, sizeof(stp_sync));
+		cpus_read_lock();
+		atomic_set(&stp_sync.cpus, num_online_cpus() - 1);
+		stop_machine_cpuslocked(stp_sync_clock, &stp_sync, cpu_online_mask);
+		cpus_read_unlock();
+	}
+
+	if (!check_sync_clock())
+		/*
+		 * There is a usable clock but the synchronization failed.
+		 * Retry after a second.
+		 */
+		mod_timer(&stp_timer, jiffies + msecs_to_jiffies(MSEC_PER_SEC));
+	else if (stp_info.lu)
+		stp_check_leap();
+
+out_unlock:
+	mutex_unlock(&stp_mutex);
+}
+
+/*
+ * STP subsys sysfs interface functions
+ */
+static struct bus_type stp_subsys = {
+	.name		= "stp",
+	.dev_name	= "stp",
+};
+
+static ssize_t ctn_id_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	ssize_t ret = -ENODATA;
+
+	mutex_lock(&stp_mutex);
+	if (stpinfo_valid())
+		ret = sprintf(buf, "%016lx\n",
+			      *(unsigned long *) stp_info.ctnid);
+	mutex_unlock(&stp_mutex);
+	return ret;
+}
+
+static DEVICE_ATTR_RO(ctn_id);
+
+static ssize_t ctn_type_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	ssize_t ret = -ENODATA;
+
+	mutex_lock(&stp_mutex);
+	if (stpinfo_valid())
+		ret = sprintf(buf, "%i\n", stp_info.ctn);
+	mutex_unlock(&stp_mutex);
+	return ret;
+}
+
+static DEVICE_ATTR_RO(ctn_type);
+
+static ssize_t dst_offset_show(struct device *dev,
+				   struct device_attribute *attr,
+				   char *buf)
+{
+	ssize_t ret = -ENODATA;
+
+	mutex_lock(&stp_mutex);
+	if (stpinfo_valid() && (stp_info.vbits & 0x2000))
+		ret = sprintf(buf, "%i\n", (int)(s16) stp_info.dsto);
+	mutex_unlock(&stp_mutex);
+	return ret;
+}
+
+static DEVICE_ATTR_RO(dst_offset);
+
+static ssize_t leap_seconds_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	ssize_t ret = -ENODATA;
+
+	mutex_lock(&stp_mutex);
+	if (stpinfo_valid() && (stp_info.vbits & 0x8000))
+		ret = sprintf(buf, "%i\n", (int)(s16) stp_info.leaps);
+	mutex_unlock(&stp_mutex);
+	return ret;
+}
+
+static DEVICE_ATTR_RO(leap_seconds);
+
+static ssize_t leap_seconds_scheduled_show(struct device *dev,
+						struct device_attribute *attr,
+						char *buf)
+{
+	struct stp_stzi stzi;
+	ssize_t ret;
+
+	mutex_lock(&stp_mutex);
+	if (!stpinfo_valid() || !(stp_info.vbits & 0x8000) || !stp_info.lu) {
+		mutex_unlock(&stp_mutex);
+		return -ENODATA;
+	}
+
+	ret = chsc_stzi(stp_page, &stzi, sizeof(stzi));
+	mutex_unlock(&stp_mutex);
+	if (ret < 0)
+		return ret;
+
+	if (!stzi.lsoib.p)
+		return sprintf(buf, "0,0\n");
+
+	return sprintf(buf, "%lu,%d\n",
+		       tod_to_ns(stzi.lsoib.nlsout - TOD_UNIX_EPOCH) / NSEC_PER_SEC,
+		       stzi.lsoib.nlso - stzi.lsoib.also);
+}
+
+static DEVICE_ATTR_RO(leap_seconds_scheduled);
+
+static ssize_t stratum_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	ssize_t ret = -ENODATA;
+
+	mutex_lock(&stp_mutex);
+	if (stpinfo_valid())
+		ret = sprintf(buf, "%i\n", (int)(s16) stp_info.stratum);
+	mutex_unlock(&stp_mutex);
+	return ret;
+}
+
+static DEVICE_ATTR_RO(stratum);
+
+static ssize_t time_offset_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	ssize_t ret = -ENODATA;
+
+	mutex_lock(&stp_mutex);
+	if (stpinfo_valid() && (stp_info.vbits & 0x0800))
+		ret = sprintf(buf, "%i\n", (int) stp_info.tto);
+	mutex_unlock(&stp_mutex);
+	return ret;
+}
+
+static DEVICE_ATTR_RO(time_offset);
+
+static ssize_t time_zone_offset_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	ssize_t ret = -ENODATA;
+
+	mutex_lock(&stp_mutex);
+	if (stpinfo_valid() && (stp_info.vbits & 0x4000))
+		ret = sprintf(buf, "%i\n", (int)(s16) stp_info.tzo);
+	mutex_unlock(&stp_mutex);
+	return ret;
+}
+
+static DEVICE_ATTR_RO(time_zone_offset);
+
+static ssize_t timing_mode_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	ssize_t ret = -ENODATA;
+
+	mutex_lock(&stp_mutex);
+	if (stpinfo_valid())
+		ret = sprintf(buf, "%i\n", stp_info.tmd);
+	mutex_unlock(&stp_mutex);
+	return ret;
+}
+
+static DEVICE_ATTR_RO(timing_mode);
+
+static ssize_t timing_state_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	ssize_t ret = -ENODATA;
+
+	mutex_lock(&stp_mutex);
+	if (stpinfo_valid())
+		ret = sprintf(buf, "%i\n", stp_info.tst);
+	mutex_unlock(&stp_mutex);
+	return ret;
+}
+
+static DEVICE_ATTR_RO(timing_state);
+
+static ssize_t online_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	return sprintf(buf, "%i\n", stp_online);
+}
+
+static ssize_t online_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	unsigned int value;
+
+	value = simple_strtoul(buf, NULL, 0);
+	if (value != 0 && value != 1)
+		return -EINVAL;
+	if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
+		return -EOPNOTSUPP;
+	mutex_lock(&stp_mutex);
+	stp_online = value;
+	if (stp_online)
+		set_bit(CLOCK_SYNC_STP, &clock_sync_flags);
+	else
+		clear_bit(CLOCK_SYNC_STP, &clock_sync_flags);
+	queue_work(time_sync_wq, &stp_work);
+	mutex_unlock(&stp_mutex);
+	return count;
+}
+
+/*
+ * Can't use DEVICE_ATTR because the attribute should be named
+ * stp/online but dev_attr_online already exists in this file ..
+ */
+static DEVICE_ATTR_RW(online);
+
+static struct attribute *stp_dev_attrs[] = {
+	&dev_attr_ctn_id.attr,
+	&dev_attr_ctn_type.attr,
+	&dev_attr_dst_offset.attr,
+	&dev_attr_leap_seconds.attr,
+	&dev_attr_online.attr,
+	&dev_attr_leap_seconds_scheduled.attr,
+	&dev_attr_stratum.attr,
+	&dev_attr_time_offset.attr,
+	&dev_attr_time_zone_offset.attr,
+	&dev_attr_timing_mode.attr,
+	&dev_attr_timing_state.attr,
+	NULL
+};
+ATTRIBUTE_GROUPS(stp_dev);
+
+static int __init stp_init_sysfs(void)
+{
+	return subsys_system_register(&stp_subsys, stp_dev_groups);
+}
+
+device_initcall(stp_init_sysfs);
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
new file mode 100644
index 0000000000..68adf1de88
--- /dev/null
+++ b/arch/s390/kernel/topology.c
@@ -0,0 +1,661 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    Copyright IBM Corp. 2007, 2011
+ */
+
+#define KMSG_COMPONENT "cpu"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/workqueue.h>
+#include <linux/memblock.h>
+#include <linux/uaccess.h>
+#include <linux/sysctl.h>
+#include <linux/cpuset.h>
+#include <linux/device.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/sched/topology.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+#include <linux/mm.h>
+#include <linux/nodemask.h>
+#include <linux/node.h>
+#include <asm/sysinfo.h>
+
+#define PTF_HORIZONTAL	(0UL)
+#define PTF_VERTICAL	(1UL)
+#define PTF_CHECK	(2UL)
+
+enum {
+	TOPOLOGY_MODE_HW,
+	TOPOLOGY_MODE_SINGLE,
+	TOPOLOGY_MODE_PACKAGE,
+	TOPOLOGY_MODE_UNINITIALIZED
+};
+
+struct mask_info {
+	struct mask_info *next;
+	unsigned char id;
+	cpumask_t mask;
+};
+
+static int topology_mode = TOPOLOGY_MODE_UNINITIALIZED;
+static void set_topology_timer(void);
+static void topology_work_fn(struct work_struct *work);
+static struct sysinfo_15_1_x *tl_info;
+
+static DECLARE_WORK(topology_work, topology_work_fn);
+
+/*
+ * Socket/Book linked lists and cpu_topology updates are
+ * protected by "sched_domains_mutex".
+ */
+static struct mask_info socket_info;
+static struct mask_info book_info;
+static struct mask_info drawer_info;
+
+struct cpu_topology_s390 cpu_topology[NR_CPUS];
+EXPORT_SYMBOL_GPL(cpu_topology);
+
+static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int cpu)
+{
+	static cpumask_t mask;
+
+	cpumask_clear(&mask);
+	if (!cpumask_test_cpu(cpu, &cpu_setup_mask))
+		goto out;
+	cpumask_set_cpu(cpu, &mask);
+	switch (topology_mode) {
+	case TOPOLOGY_MODE_HW:
+		while (info) {
+			if (cpumask_test_cpu(cpu, &info->mask)) {
+				cpumask_copy(&mask, &info->mask);
+				break;
+			}
+			info = info->next;
+		}
+		break;
+	case TOPOLOGY_MODE_PACKAGE:
+		cpumask_copy(&mask, cpu_present_mask);
+		break;
+	default:
+		fallthrough;
+	case TOPOLOGY_MODE_SINGLE:
+		break;
+	}
+	cpumask_and(&mask, &mask, &cpu_setup_mask);
+out:
+	cpumask_copy(dst, &mask);
+}
+
+static void cpu_thread_map(cpumask_t *dst, unsigned int cpu)
+{
+	static cpumask_t mask;
+	unsigned int max_cpu;
+
+	cpumask_clear(&mask);
+	if (!cpumask_test_cpu(cpu, &cpu_setup_mask))
+		goto out;
+	cpumask_set_cpu(cpu, &mask);
+	if (topology_mode != TOPOLOGY_MODE_HW)
+		goto out;
+	cpu -= cpu % (smp_cpu_mtid + 1);
+	max_cpu = min(cpu + smp_cpu_mtid, nr_cpu_ids - 1);
+	for (; cpu <= max_cpu; cpu++) {
+		if (cpumask_test_cpu(cpu, &cpu_setup_mask))
+			cpumask_set_cpu(cpu, &mask);
+	}
+out:
+	cpumask_copy(dst, &mask);
+}
+
+#define TOPOLOGY_CORE_BITS	64
+
+static void add_cpus_to_mask(struct topology_core *tl_core,
+			     struct mask_info *drawer,
+			     struct mask_info *book,
+			     struct mask_info *socket)
+{
+	struct cpu_topology_s390 *topo;
+	unsigned int core;
+
+	for_each_set_bit(core, &tl_core->mask, TOPOLOGY_CORE_BITS) {
+		unsigned int max_cpu, rcore;
+		int cpu;
+
+		rcore = TOPOLOGY_CORE_BITS - 1 - core + tl_core->origin;
+		cpu = smp_find_processor_id(rcore << smp_cpu_mt_shift);
+		if (cpu < 0)
+			continue;
+		max_cpu = min(cpu + smp_cpu_mtid, nr_cpu_ids - 1);
+		for (; cpu <= max_cpu; cpu++) {
+			topo = &cpu_topology[cpu];
+			topo->drawer_id = drawer->id;
+			topo->book_id = book->id;
+			topo->socket_id = socket->id;
+			topo->core_id = rcore;
+			topo->thread_id = cpu;
+			topo->dedicated = tl_core->d;
+			cpumask_set_cpu(cpu, &drawer->mask);
+			cpumask_set_cpu(cpu, &book->mask);
+			cpumask_set_cpu(cpu, &socket->mask);
+			smp_cpu_set_polarization(cpu, tl_core->pp);
+		}
+	}
+}
+
+static void clear_masks(void)
+{
+	struct mask_info *info;
+
+	info = &socket_info;
+	while (info) {
+		cpumask_clear(&info->mask);
+		info = info->next;
+	}
+	info = &book_info;
+	while (info) {
+		cpumask_clear(&info->mask);
+		info = info->next;
+	}
+	info = &drawer_info;
+	while (info) {
+		cpumask_clear(&info->mask);
+		info = info->next;
+	}
+}
+
+static union topology_entry *next_tle(union topology_entry *tle)
+{
+	if (!tle->nl)
+		return (union topology_entry *)((struct topology_core *)tle + 1);
+	return (union topology_entry *)((struct topology_container *)tle + 1);
+}
+
+static void tl_to_masks(struct sysinfo_15_1_x *info)
+{
+	struct mask_info *socket = &socket_info;
+	struct mask_info *book = &book_info;
+	struct mask_info *drawer = &drawer_info;
+	union topology_entry *tle, *end;
+
+	clear_masks();
+	tle = info->tle;
+	end = (union topology_entry *)((unsigned long)info + info->length);
+	while (tle < end) {
+		switch (tle->nl) {
+		case 3:
+			drawer = drawer->next;
+			drawer->id = tle->container.id;
+			break;
+		case 2:
+			book = book->next;
+			book->id = tle->container.id;
+			break;
+		case 1:
+			socket = socket->next;
+			socket->id = tle->container.id;
+			break;
+		case 0:
+			add_cpus_to_mask(&tle->cpu, drawer, book, socket);
+			break;
+		default:
+			clear_masks();
+			return;
+		}
+		tle = next_tle(tle);
+	}
+}
+
+static void topology_update_polarization_simple(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		smp_cpu_set_polarization(cpu, POLARIZATION_HRZ);
+}
+
+static int ptf(unsigned long fc)
+{
+	int rc;
+
+	asm volatile(
+		"	.insn	rre,0xb9a20000,%1,%1\n"
+		"	ipm	%0\n"
+		"	srl	%0,28\n"
+		: "=d" (rc)
+		: "d" (fc)  : "cc");
+	return rc;
+}
+
+int topology_set_cpu_management(int fc)
+{
+	int cpu, rc;
+
+	if (!MACHINE_HAS_TOPOLOGY)
+		return -EOPNOTSUPP;
+	if (fc)
+		rc = ptf(PTF_VERTICAL);
+	else
+		rc = ptf(PTF_HORIZONTAL);
+	if (rc)
+		return -EBUSY;
+	for_each_possible_cpu(cpu)
+		smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+	return rc;
+}
+
+void update_cpu_masks(void)
+{
+	struct cpu_topology_s390 *topo, *topo_package, *topo_sibling;
+	int cpu, sibling, pkg_first, smt_first, id;
+
+	for_each_possible_cpu(cpu) {
+		topo = &cpu_topology[cpu];
+		cpu_thread_map(&topo->thread_mask, cpu);
+		cpu_group_map(&topo->core_mask, &socket_info, cpu);
+		cpu_group_map(&topo->book_mask, &book_info, cpu);
+		cpu_group_map(&topo->drawer_mask, &drawer_info, cpu);
+		topo->booted_cores = 0;
+		if (topology_mode != TOPOLOGY_MODE_HW) {
+			id = topology_mode == TOPOLOGY_MODE_PACKAGE ? 0 : cpu;
+			topo->thread_id = cpu;
+			topo->core_id = cpu;
+			topo->socket_id = id;
+			topo->book_id = id;
+			topo->drawer_id = id;
+		}
+	}
+	for_each_online_cpu(cpu) {
+		topo = &cpu_topology[cpu];
+		pkg_first = cpumask_first(&topo->core_mask);
+		topo_package = &cpu_topology[pkg_first];
+		if (cpu == pkg_first) {
+			for_each_cpu(sibling, &topo->core_mask) {
+				topo_sibling = &cpu_topology[sibling];
+				smt_first = cpumask_first(&topo_sibling->thread_mask);
+				if (sibling == smt_first)
+					topo_package->booted_cores++;
+			}
+		} else {
+			topo->booted_cores = topo_package->booted_cores;
+		}
+	}
+}
+
+void store_topology(struct sysinfo_15_1_x *info)
+{
+	stsi(info, 15, 1, topology_mnest_limit());
+}
+
+static void __arch_update_dedicated_flag(void *arg)
+{
+	if (topology_cpu_dedicated(smp_processor_id()))
+		set_cpu_flag(CIF_DEDICATED_CPU);
+	else
+		clear_cpu_flag(CIF_DEDICATED_CPU);
+}
+
+static int __arch_update_cpu_topology(void)
+{
+	struct sysinfo_15_1_x *info = tl_info;
+	int rc = 0;
+
+	mutex_lock(&smp_cpu_state_mutex);
+	if (MACHINE_HAS_TOPOLOGY) {
+		rc = 1;
+		store_topology(info);
+		tl_to_masks(info);
+	}
+	update_cpu_masks();
+	if (!MACHINE_HAS_TOPOLOGY)
+		topology_update_polarization_simple();
+	mutex_unlock(&smp_cpu_state_mutex);
+	return rc;
+}
+
+int arch_update_cpu_topology(void)
+{
+	struct device *dev;
+	int cpu, rc;
+
+	rc = __arch_update_cpu_topology();
+	on_each_cpu(__arch_update_dedicated_flag, NULL, 0);
+	for_each_online_cpu(cpu) {
+		dev = get_cpu_device(cpu);
+		if (dev)
+			kobject_uevent(&dev->kobj, KOBJ_CHANGE);
+	}
+	return rc;
+}
+
+static void topology_work_fn(struct work_struct *work)
+{
+	rebuild_sched_domains();
+}
+
+void topology_schedule_update(void)
+{
+	schedule_work(&topology_work);
+}
+
+static void topology_flush_work(void)
+{
+	flush_work(&topology_work);
+}
+
+static void topology_timer_fn(struct timer_list *unused)
+{
+	if (ptf(PTF_CHECK))
+		topology_schedule_update();
+	set_topology_timer();
+}
+
+static struct timer_list topology_timer;
+
+static atomic_t topology_poll = ATOMIC_INIT(0);
+
+static void set_topology_timer(void)
+{
+	if (atomic_add_unless(&topology_poll, -1, 0))
+		mod_timer(&topology_timer, jiffies + msecs_to_jiffies(100));
+	else
+		mod_timer(&topology_timer, jiffies + msecs_to_jiffies(60 * MSEC_PER_SEC));
+}
+
+void topology_expect_change(void)
+{
+	if (!MACHINE_HAS_TOPOLOGY)
+		return;
+	/* This is racy, but it doesn't matter since it is just a heuristic.
+	 * Worst case is that we poll in a higher frequency for a bit longer.
+	 */
+	if (atomic_read(&topology_poll) > 60)
+		return;
+	atomic_add(60, &topology_poll);
+	set_topology_timer();
+}
+
+static int cpu_management;
+
+static ssize_t dispatching_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	ssize_t count;
+
+	mutex_lock(&smp_cpu_state_mutex);
+	count = sprintf(buf, "%d\n", cpu_management);
+	mutex_unlock(&smp_cpu_state_mutex);
+	return count;
+}
+
+static ssize_t dispatching_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf,
+				 size_t count)
+{
+	int val, rc;
+	char delim;
+
+	if (sscanf(buf, "%d %c", &val, &delim) != 1)
+		return -EINVAL;
+	if (val != 0 && val != 1)
+		return -EINVAL;
+	rc = 0;
+	cpus_read_lock();
+	mutex_lock(&smp_cpu_state_mutex);
+	if (cpu_management == val)
+		goto out;
+	rc = topology_set_cpu_management(val);
+	if (rc)
+		goto out;
+	cpu_management = val;
+	topology_expect_change();
+out:
+	mutex_unlock(&smp_cpu_state_mutex);
+	cpus_read_unlock();
+	return rc ? rc : count;
+}
+static DEVICE_ATTR_RW(dispatching);
+
+static ssize_t cpu_polarization_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	int cpu = dev->id;
+	ssize_t count;
+
+	mutex_lock(&smp_cpu_state_mutex);
+	switch (smp_cpu_get_polarization(cpu)) {
+	case POLARIZATION_HRZ:
+		count = sprintf(buf, "horizontal\n");
+		break;
+	case POLARIZATION_VL:
+		count = sprintf(buf, "vertical:low\n");
+		break;
+	case POLARIZATION_VM:
+		count = sprintf(buf, "vertical:medium\n");
+		break;
+	case POLARIZATION_VH:
+		count = sprintf(buf, "vertical:high\n");
+		break;
+	default:
+		count = sprintf(buf, "unknown\n");
+		break;
+	}
+	mutex_unlock(&smp_cpu_state_mutex);
+	return count;
+}
+static DEVICE_ATTR(polarization, 0444, cpu_polarization_show, NULL);
+
+static struct attribute *topology_cpu_attrs[] = {
+	&dev_attr_polarization.attr,
+	NULL,
+};
+
+static struct attribute_group topology_cpu_attr_group = {
+	.attrs = topology_cpu_attrs,
+};
+
+static ssize_t cpu_dedicated_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	int cpu = dev->id;
+	ssize_t count;
+
+	mutex_lock(&smp_cpu_state_mutex);
+	count = sprintf(buf, "%d\n", topology_cpu_dedicated(cpu));
+	mutex_unlock(&smp_cpu_state_mutex);
+	return count;
+}
+static DEVICE_ATTR(dedicated, 0444, cpu_dedicated_show, NULL);
+
+static struct attribute *topology_extra_cpu_attrs[] = {
+	&dev_attr_dedicated.attr,
+	NULL,
+};
+
+static struct attribute_group topology_extra_cpu_attr_group = {
+	.attrs = topology_extra_cpu_attrs,
+};
+
+int topology_cpu_init(struct cpu *cpu)
+{
+	int rc;
+
+	rc = sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group);
+	if (rc || !MACHINE_HAS_TOPOLOGY)
+		return rc;
+	rc = sysfs_create_group(&cpu->dev.kobj, &topology_extra_cpu_attr_group);
+	if (rc)
+		sysfs_remove_group(&cpu->dev.kobj, &topology_cpu_attr_group);
+	return rc;
+}
+
+static const struct cpumask *cpu_thread_mask(int cpu)
+{
+	return &cpu_topology[cpu].thread_mask;
+}
+
+
+const struct cpumask *cpu_coregroup_mask(int cpu)
+{
+	return &cpu_topology[cpu].core_mask;
+}
+
+static const struct cpumask *cpu_book_mask(int cpu)
+{
+	return &cpu_topology[cpu].book_mask;
+}
+
+static const struct cpumask *cpu_drawer_mask(int cpu)
+{
+	return &cpu_topology[cpu].drawer_mask;
+}
+
+static struct sched_domain_topology_level s390_topology[] = {
+	{ cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
+	{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
+	{ cpu_book_mask, SD_INIT_NAME(BOOK) },
+	{ cpu_drawer_mask, SD_INIT_NAME(DRAWER) },
+	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
+	{ NULL, },
+};
+
+static void __init alloc_masks(struct sysinfo_15_1_x *info,
+			       struct mask_info *mask, int offset)
+{
+	int i, nr_masks;
+
+	nr_masks = info->mag[TOPOLOGY_NR_MAG - offset];
+	for (i = 0; i < info->mnest - offset; i++)
+		nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i];
+	nr_masks = max(nr_masks, 1);
+	for (i = 0; i < nr_masks; i++) {
+		mask->next = memblock_alloc(sizeof(*mask->next), 8);
+		if (!mask->next)
+			panic("%s: Failed to allocate %zu bytes align=0x%x\n",
+			      __func__, sizeof(*mask->next), 8);
+		mask = mask->next;
+	}
+}
+
+void __init topology_init_early(void)
+{
+	struct sysinfo_15_1_x *info;
+
+	set_sched_topology(s390_topology);
+	if (topology_mode == TOPOLOGY_MODE_UNINITIALIZED) {
+		if (MACHINE_HAS_TOPOLOGY)
+			topology_mode = TOPOLOGY_MODE_HW;
+		else
+			topology_mode = TOPOLOGY_MODE_SINGLE;
+	}
+	if (!MACHINE_HAS_TOPOLOGY)
+		goto out;
+	tl_info = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+	if (!tl_info)
+		panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
+		      __func__, PAGE_SIZE, PAGE_SIZE);
+	info = tl_info;
+	store_topology(info);
+	pr_info("The CPU configuration topology of the machine is: %d %d %d %d %d %d / %d\n",
+		info->mag[0], info->mag[1], info->mag[2], info->mag[3],
+		info->mag[4], info->mag[5], info->mnest);
+	alloc_masks(info, &socket_info, 1);
+	alloc_masks(info, &book_info, 2);
+	alloc_masks(info, &drawer_info, 3);
+out:
+	cpumask_set_cpu(0, &cpu_setup_mask);
+	__arch_update_cpu_topology();
+	__arch_update_dedicated_flag(NULL);
+}
+
+static inline int topology_get_mode(int enabled)
+{
+	if (!enabled)
+		return TOPOLOGY_MODE_SINGLE;
+	return MACHINE_HAS_TOPOLOGY ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE;
+}
+
+static inline int topology_is_enabled(void)
+{
+	return topology_mode != TOPOLOGY_MODE_SINGLE;
+}
+
+static int __init topology_setup(char *str)
+{
+	bool enabled;
+	int rc;
+
+	rc = kstrtobool(str, &enabled);
+	if (rc)
+		return rc;
+	topology_mode = topology_get_mode(enabled);
+	return 0;
+}
+early_param("topology", topology_setup);
+
+static int topology_ctl_handler(struct ctl_table *ctl, int write,
+				void *buffer, size_t *lenp, loff_t *ppos)
+{
+	int enabled = topology_is_enabled();
+	int new_mode;
+	int rc;
+	struct ctl_table ctl_entry = {
+		.procname	= ctl->procname,
+		.data		= &enabled,
+		.maxlen		= sizeof(int),
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	};
+
+	rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos);
+	if (rc < 0 || !write)
+		return rc;
+
+	mutex_lock(&smp_cpu_state_mutex);
+	new_mode = topology_get_mode(enabled);
+	if (topology_mode != new_mode) {
+		topology_mode = new_mode;
+		topology_schedule_update();
+	}
+	mutex_unlock(&smp_cpu_state_mutex);
+	topology_flush_work();
+
+	return rc;
+}
+
+static struct ctl_table topology_ctl_table[] = {
+	{
+		.procname	= "topology",
+		.mode		= 0644,
+		.proc_handler	= topology_ctl_handler,
+	},
+	{ },
+};
+
+static int __init topology_init(void)
+{
+	struct device *dev_root;
+	int rc = 0;
+
+	timer_setup(&topology_timer, topology_timer_fn, TIMER_DEFERRABLE);
+	if (MACHINE_HAS_TOPOLOGY)
+		set_topology_timer();
+	else
+		topology_update_polarization_simple();
+	register_sysctl("s390", topology_ctl_table);
+
+	dev_root = bus_get_dev_root(&cpu_subsys);
+	if (dev_root) {
+		rc = device_create_file(dev_root, &dev_attr_dispatching);
+		put_device(dev_root);
+	}
+	return rc;
+}
+device_initcall(topology_init);
diff --git a/arch/s390/kernel/trace.c b/arch/s390/kernel/trace.c
new file mode 100644
index 0000000000..11a669f3cc
--- /dev/null
+++ b/arch/s390/kernel/trace.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Tracepoint definitions for s390
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/percpu.h>
+#define CREATE_TRACE_POINTS
+#include <asm/trace/diag.h>
+
+EXPORT_TRACEPOINT_SYMBOL(s390_diagnose);
+
+static DEFINE_PER_CPU(unsigned int, diagnose_trace_depth);
+
+void notrace trace_s390_diagnose_norecursion(int diag_nr)
+{
+	unsigned long flags;
+	unsigned int *depth;
+
+	/* Avoid lockdep recursion. */
+	if (IS_ENABLED(CONFIG_LOCKDEP))
+		return;
+	local_irq_save(flags);
+	depth = this_cpu_ptr(&diagnose_trace_depth);
+	if (*depth == 0) {
+		(*depth)++;
+		trace_s390_diagnose(diag_nr);
+		(*depth)--;
+	}
+	local_irq_restore(flags);
+}
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
new file mode 100644
index 0000000000..1d2aa448d1
--- /dev/null
+++ b/arch/s390/kernel/traps.c
@@ -0,0 +1,403 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999, 2000
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ *               Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
+ *
+ *  Derived from "arch/i386/kernel/traps.c"
+ *    Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+/*
+ * 'Traps.c' handles hardware traps and faults after we have saved some
+ * state in 'asm.s'.
+ */
+#include "asm/irqflags.h"
+#include "asm/ptrace.h"
+#include <linux/kprobes.h>
+#include <linux/kdebug.h>
+#include <linux/randomize_kstack.h>
+#include <linux/extable.h>
+#include <linux/ptrace.h>
+#include <linux/sched.h>
+#include <linux/sched/debug.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/cpu.h>
+#include <linux/entry-common.h>
+#include <asm/asm-extable.h>
+#include <asm/fpu/api.h>
+#include <asm/vtime.h>
+#include "entry.h"
+
+static inline void __user *get_trap_ip(struct pt_regs *regs)
+{
+	unsigned long address;
+
+	if (regs->int_code & 0x200)
+		address = current->thread.trap_tdb.data[3];
+	else
+		address = regs->psw.addr;
+	return (void __user *) (address - (regs->int_code >> 16));
+}
+
+int is_valid_bugaddr(unsigned long addr)
+{
+	return 1;
+}
+
+void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
+{
+	if (user_mode(regs)) {
+		force_sig_fault(si_signo, si_code, get_trap_ip(regs));
+		report_user_fault(regs, si_signo, 0);
+        } else {
+		if (!fixup_exception(regs))
+			die(regs, str);
+        }
+}
+
+static void do_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
+{
+	if (notify_die(DIE_TRAP, str, regs, 0,
+		       regs->int_code, si_signo) == NOTIFY_STOP)
+		return;
+	do_report_trap(regs, si_signo, si_code, str);
+}
+NOKPROBE_SYMBOL(do_trap);
+
+void do_per_trap(struct pt_regs *regs)
+{
+	if (notify_die(DIE_SSTEP, "sstep", regs, 0, 0, SIGTRAP) == NOTIFY_STOP)
+		return;
+	if (!current->ptrace)
+		return;
+	force_sig_fault(SIGTRAP, TRAP_HWBKPT,
+		(void __force __user *) current->thread.per_event.address);
+}
+NOKPROBE_SYMBOL(do_per_trap);
+
+static void default_trap_handler(struct pt_regs *regs)
+{
+	if (user_mode(regs)) {
+		report_user_fault(regs, SIGSEGV, 0);
+		force_exit_sig(SIGSEGV);
+	} else
+		die(regs, "Unknown program exception");
+}
+
+#define DO_ERROR_INFO(name, signr, sicode, str) \
+static void name(struct pt_regs *regs)		\
+{						\
+	do_trap(regs, signr, sicode, str);	\
+}
+
+DO_ERROR_INFO(addressing_exception, SIGILL, ILL_ILLADR,
+	      "addressing exception")
+DO_ERROR_INFO(execute_exception, SIGILL, ILL_ILLOPN,
+	      "execute exception")
+DO_ERROR_INFO(divide_exception, SIGFPE, FPE_INTDIV,
+	      "fixpoint divide exception")
+DO_ERROR_INFO(overflow_exception, SIGFPE, FPE_INTOVF,
+	      "fixpoint overflow exception")
+DO_ERROR_INFO(hfp_overflow_exception, SIGFPE, FPE_FLTOVF,
+	      "HFP overflow exception")
+DO_ERROR_INFO(hfp_underflow_exception, SIGFPE, FPE_FLTUND,
+	      "HFP underflow exception")
+DO_ERROR_INFO(hfp_significance_exception, SIGFPE, FPE_FLTRES,
+	      "HFP significance exception")
+DO_ERROR_INFO(hfp_divide_exception, SIGFPE, FPE_FLTDIV,
+	      "HFP divide exception")
+DO_ERROR_INFO(hfp_sqrt_exception, SIGFPE, FPE_FLTINV,
+	      "HFP square root exception")
+DO_ERROR_INFO(operand_exception, SIGILL, ILL_ILLOPN,
+	      "operand exception")
+DO_ERROR_INFO(privileged_op, SIGILL, ILL_PRVOPC,
+	      "privileged operation")
+DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN,
+	      "special operation exception")
+DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN,
+	      "transaction constraint exception")
+
+static inline void do_fp_trap(struct pt_regs *regs, __u32 fpc)
+{
+	int si_code = 0;
+	/* FPC[2] is Data Exception Code */
+	if ((fpc & 0x00000300) == 0) {
+		/* bits 6 and 7 of DXC are 0 iff IEEE exception */
+		if (fpc & 0x8000) /* invalid fp operation */
+			si_code = FPE_FLTINV;
+		else if (fpc & 0x4000) /* div by 0 */
+			si_code = FPE_FLTDIV;
+		else if (fpc & 0x2000) /* overflow */
+			si_code = FPE_FLTOVF;
+		else if (fpc & 0x1000) /* underflow */
+			si_code = FPE_FLTUND;
+		else if (fpc & 0x0800) /* inexact */
+			si_code = FPE_FLTRES;
+	}
+	do_trap(regs, SIGFPE, si_code, "floating point exception");
+}
+
+static void translation_specification_exception(struct pt_regs *regs)
+{
+	/* May never happen. */
+	panic("Translation-Specification Exception");
+}
+
+static void illegal_op(struct pt_regs *regs)
+{
+        __u8 opcode[6];
+	__u16 __user *location;
+	int is_uprobe_insn = 0;
+	int signal = 0;
+
+	location = get_trap_ip(regs);
+
+	if (user_mode(regs)) {
+		if (get_user(*((__u16 *) opcode), (__u16 __user *) location))
+			return;
+		if (*((__u16 *) opcode) == S390_BREAKPOINT_U16) {
+			if (current->ptrace)
+				force_sig_fault(SIGTRAP, TRAP_BRKPT, location);
+			else
+				signal = SIGILL;
+#ifdef CONFIG_UPROBES
+		} else if (*((__u16 *) opcode) == UPROBE_SWBP_INSN) {
+			is_uprobe_insn = 1;
+#endif
+		} else
+			signal = SIGILL;
+	}
+	/*
+	 * We got either an illegal op in kernel mode, or user space trapped
+	 * on a uprobes illegal instruction. See if kprobes or uprobes picks
+	 * it up. If not, SIGILL.
+	 */
+	if (is_uprobe_insn || !user_mode(regs)) {
+		if (notify_die(DIE_BPT, "bpt", regs, 0,
+			       3, SIGTRAP) != NOTIFY_STOP)
+			signal = SIGILL;
+	}
+	if (signal)
+		do_trap(regs, signal, ILL_ILLOPC, "illegal operation");
+}
+NOKPROBE_SYMBOL(illegal_op);
+
+DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN,
+	      "specification exception");
+
+static void vector_exception(struct pt_regs *regs)
+{
+	int si_code, vic;
+
+	if (!MACHINE_HAS_VX) {
+		do_trap(regs, SIGILL, ILL_ILLOPN, "illegal operation");
+		return;
+	}
+
+	/* get vector interrupt code from fpc */
+	save_fpu_regs();
+	vic = (current->thread.fpu.fpc & 0xf00) >> 8;
+	switch (vic) {
+	case 1: /* invalid vector operation */
+		si_code = FPE_FLTINV;
+		break;
+	case 2: /* division by zero */
+		si_code = FPE_FLTDIV;
+		break;
+	case 3: /* overflow */
+		si_code = FPE_FLTOVF;
+		break;
+	case 4: /* underflow */
+		si_code = FPE_FLTUND;
+		break;
+	case 5:	/* inexact */
+		si_code = FPE_FLTRES;
+		break;
+	default: /* unknown cause */
+		si_code = 0;
+	}
+	do_trap(regs, SIGFPE, si_code, "vector exception");
+}
+
+static void data_exception(struct pt_regs *regs)
+{
+	save_fpu_regs();
+	if (current->thread.fpu.fpc & FPC_DXC_MASK)
+		do_fp_trap(regs, current->thread.fpu.fpc);
+	else
+		do_trap(regs, SIGILL, ILL_ILLOPN, "data exception");
+}
+
+static void space_switch_exception(struct pt_regs *regs)
+{
+	/* Set user psw back to home space mode. */
+	if (user_mode(regs))
+		regs->psw.mask |= PSW_ASC_HOME;
+	/* Send SIGILL. */
+	do_trap(regs, SIGILL, ILL_PRVOPC, "space switch event");
+}
+
+static void monitor_event_exception(struct pt_regs *regs)
+{
+	if (user_mode(regs))
+		return;
+
+	switch (report_bug(regs->psw.addr - (regs->int_code >> 16), regs)) {
+	case BUG_TRAP_TYPE_NONE:
+		fixup_exception(regs);
+		break;
+	case BUG_TRAP_TYPE_WARN:
+		break;
+	case BUG_TRAP_TYPE_BUG:
+		die(regs, "monitor event");
+		break;
+	}
+}
+
+void kernel_stack_overflow(struct pt_regs *regs)
+{
+	bust_spinlocks(1);
+	printk("Kernel stack overflow.\n");
+	show_regs(regs);
+	bust_spinlocks(0);
+	panic("Corrupt kernel stack, can't continue.");
+}
+NOKPROBE_SYMBOL(kernel_stack_overflow);
+
+static void __init test_monitor_call(void)
+{
+	int val = 1;
+
+	if (!IS_ENABLED(CONFIG_BUG))
+		return;
+	asm volatile(
+		"	mc	0,0\n"
+		"0:	xgr	%0,%0\n"
+		"1:\n"
+		EX_TABLE(0b,1b)
+		: "+d" (val));
+	if (!val)
+		panic("Monitor call doesn't work!\n");
+}
+
+void __init trap_init(void)
+{
+	local_mcck_enable();
+	test_monitor_call();
+}
+
+static void (*pgm_check_table[128])(struct pt_regs *regs);
+
+void noinstr __do_pgm_check(struct pt_regs *regs)
+{
+	unsigned int trapnr;
+	irqentry_state_t state;
+
+	regs->int_code = S390_lowcore.pgm_int_code;
+	regs->int_parm_long = S390_lowcore.trans_exc_code;
+
+	state = irqentry_enter(regs);
+
+	if (user_mode(regs)) {
+		update_timer_sys();
+		if (!static_branch_likely(&cpu_has_bear)) {
+			if (regs->last_break < 4096)
+				regs->last_break = 1;
+		}
+		current->thread.last_break = regs->last_break;
+	}
+
+	if (S390_lowcore.pgm_code & 0x0200) {
+		/* transaction abort */
+		current->thread.trap_tdb = S390_lowcore.pgm_tdb;
+	}
+
+	if (S390_lowcore.pgm_code & PGM_INT_CODE_PER) {
+		if (user_mode(regs)) {
+			struct per_event *ev = &current->thread.per_event;
+
+			set_thread_flag(TIF_PER_TRAP);
+			ev->address = S390_lowcore.per_address;
+			ev->cause = S390_lowcore.per_code_combined;
+			ev->paid = S390_lowcore.per_access_id;
+		} else {
+			/* PER event in kernel is kprobes */
+			__arch_local_irq_ssm(regs->psw.mask & ~PSW_MASK_PER);
+			do_per_trap(regs);
+			goto out;
+		}
+	}
+
+	if (!irqs_disabled_flags(regs->psw.mask))
+		trace_hardirqs_on();
+	__arch_local_irq_ssm(regs->psw.mask & ~PSW_MASK_PER);
+
+	trapnr = regs->int_code & PGM_INT_CODE_MASK;
+	if (trapnr)
+		pgm_check_table[trapnr](regs);
+out:
+	local_irq_disable();
+	irqentry_exit(regs, state);
+}
+
+/*
+ * The program check table contains exactly 128 (0x00-0x7f) entries. Each
+ * line defines the function to be called corresponding to the program check
+ * interruption code.
+ */
+static void (*pgm_check_table[128])(struct pt_regs *regs) = {
+	[0x00]		= default_trap_handler,
+	[0x01]		= illegal_op,
+	[0x02]		= privileged_op,
+	[0x03]		= execute_exception,
+	[0x04]		= do_protection_exception,
+	[0x05]		= addressing_exception,
+	[0x06]		= specification_exception,
+	[0x07]		= data_exception,
+	[0x08]		= overflow_exception,
+	[0x09]		= divide_exception,
+	[0x0a]		= overflow_exception,
+	[0x0b]		= divide_exception,
+	[0x0c]		= hfp_overflow_exception,
+	[0x0d]		= hfp_underflow_exception,
+	[0x0e]		= hfp_significance_exception,
+	[0x0f]		= hfp_divide_exception,
+	[0x10]		= do_dat_exception,
+	[0x11]		= do_dat_exception,
+	[0x12]		= translation_specification_exception,
+	[0x13]		= special_op_exception,
+	[0x14]		= default_trap_handler,
+	[0x15]		= operand_exception,
+	[0x16]		= default_trap_handler,
+	[0x17]		= default_trap_handler,
+	[0x18]		= transaction_exception,
+	[0x19]		= default_trap_handler,
+	[0x1a]		= default_trap_handler,
+	[0x1b]		= vector_exception,
+	[0x1c]		= space_switch_exception,
+	[0x1d]		= hfp_sqrt_exception,
+	[0x1e ... 0x37] = default_trap_handler,
+	[0x38]		= do_dat_exception,
+	[0x39]		= do_dat_exception,
+	[0x3a]		= do_dat_exception,
+	[0x3b]		= do_dat_exception,
+	[0x3c]		= default_trap_handler,
+	[0x3d]		= do_secure_storage_access,
+	[0x3e]		= do_non_secure_storage_access,
+	[0x3f]		= do_secure_storage_violation,
+	[0x40]		= monitor_event_exception,
+	[0x41 ... 0x7f] = default_trap_handler,
+};
+
+#define COND_TRAP(x) asm(			\
+	".weak " __stringify(x) "\n\t"		\
+	".set  " __stringify(x) ","		\
+	__stringify(default_trap_handler))
+
+COND_TRAP(do_secure_storage_access);
+COND_TRAP(do_non_secure_storage_access);
+COND_TRAP(do_secure_storage_violation);
diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c
new file mode 100644
index 0000000000..0ece156fdd
--- /dev/null
+++ b/arch/s390/kernel/unwind_bc.c
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/sched.h>
+#include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
+#include <linux/interrupt.h>
+#include <asm/sections.h>
+#include <asm/ptrace.h>
+#include <asm/bitops.h>
+#include <asm/stacktrace.h>
+#include <asm/unwind.h>
+
+unsigned long unwind_get_return_address(struct unwind_state *state)
+{
+	if (unwind_done(state))
+		return 0;
+	return __kernel_text_address(state->ip) ? state->ip : 0;
+}
+EXPORT_SYMBOL_GPL(unwind_get_return_address);
+
+static bool outside_of_stack(struct unwind_state *state, unsigned long sp)
+{
+	return (sp <= state->sp) ||
+		(sp > state->stack_info.end - sizeof(struct stack_frame));
+}
+
+static bool update_stack_info(struct unwind_state *state, unsigned long sp)
+{
+	struct stack_info *info = &state->stack_info;
+	unsigned long *mask = &state->stack_mask;
+
+	/* New stack pointer leaves the current stack */
+	if (get_stack_info(sp, state->task, info, mask) != 0 ||
+	    !on_stack(info, sp, sizeof(struct stack_frame)))
+		/* 'sp' does not point to a valid stack */
+		return false;
+	return true;
+}
+
+static inline bool is_final_pt_regs(struct unwind_state *state,
+				    struct pt_regs *regs)
+{
+	/* user mode or kernel thread pt_regs at the bottom of task stack */
+	if (task_pt_regs(state->task) == regs)
+		return true;
+
+	/* user mode pt_regs at the bottom of irq stack */
+	return state->stack_info.type == STACK_TYPE_IRQ &&
+	       state->stack_info.end - sizeof(struct pt_regs) == (unsigned long)regs &&
+	       READ_ONCE_NOCHECK(regs->psw.mask) & PSW_MASK_PSTATE;
+}
+
+bool unwind_next_frame(struct unwind_state *state)
+{
+	struct stack_info *info = &state->stack_info;
+	struct stack_frame *sf;
+	struct pt_regs *regs;
+	unsigned long sp, ip;
+	bool reliable;
+
+	regs = state->regs;
+	if (unlikely(regs)) {
+		sp = state->sp;
+		sf = (struct stack_frame *) sp;
+		ip = READ_ONCE_NOCHECK(sf->gprs[8]);
+		reliable = false;
+		regs = NULL;
+		/* skip bogus %r14 or if is the same as regs->psw.addr */
+		if (!__kernel_text_address(ip) || state->ip == unwind_recover_ret_addr(state, ip)) {
+			state->regs = NULL;
+			return unwind_next_frame(state);
+		}
+	} else {
+		sf = (struct stack_frame *) state->sp;
+		sp = READ_ONCE_NOCHECK(sf->back_chain);
+		if (likely(sp)) {
+			/* Non-zero back-chain points to the previous frame */
+			if (unlikely(outside_of_stack(state, sp))) {
+				if (!update_stack_info(state, sp))
+					goto out_err;
+			}
+			sf = (struct stack_frame *) sp;
+			ip = READ_ONCE_NOCHECK(sf->gprs[8]);
+			reliable = true;
+		} else {
+			/* No back-chain, look for a pt_regs structure */
+			sp = state->sp + STACK_FRAME_OVERHEAD;
+			if (!on_stack(info, sp, sizeof(struct pt_regs)))
+				goto out_err;
+			regs = (struct pt_regs *) sp;
+			if (is_final_pt_regs(state, regs))
+				goto out_stop;
+			ip = READ_ONCE_NOCHECK(regs->psw.addr);
+			sp = READ_ONCE_NOCHECK(regs->gprs[15]);
+			if (unlikely(outside_of_stack(state, sp))) {
+				if (!update_stack_info(state, sp))
+					goto out_err;
+			}
+			reliable = true;
+		}
+	}
+
+	/* Sanity check: ABI requires SP to be aligned 8 bytes. */
+	if (sp & 0x7)
+		goto out_err;
+
+	/* Update unwind state */
+	state->sp = sp;
+	state->regs = regs;
+	state->reliable = reliable;
+	state->ip = unwind_recover_ret_addr(state, ip);
+	return true;
+
+out_err:
+	state->error = true;
+out_stop:
+	state->stack_info.type = STACK_TYPE_UNKNOWN;
+	return false;
+}
+EXPORT_SYMBOL_GPL(unwind_next_frame);
+
+void __unwind_start(struct unwind_state *state, struct task_struct *task,
+		    struct pt_regs *regs, unsigned long first_frame)
+{
+	struct stack_info *info = &state->stack_info;
+	struct stack_frame *sf;
+	unsigned long ip, sp;
+
+	memset(state, 0, sizeof(*state));
+	state->task = task;
+	state->regs = regs;
+
+	/* Don't even attempt to start from user mode regs: */
+	if (regs && user_mode(regs)) {
+		info->type = STACK_TYPE_UNKNOWN;
+		return;
+	}
+
+	/* Get the instruction pointer from pt_regs or the stack frame */
+	if (regs) {
+		ip = regs->psw.addr;
+		sp = regs->gprs[15];
+	} else if (task == current) {
+		sp = current_frame_address();
+	} else {
+		sp = task->thread.ksp;
+	}
+
+	/* Get current stack pointer and initialize stack info */
+	if (!update_stack_info(state, sp)) {
+		/* Something is wrong with the stack pointer */
+		info->type = STACK_TYPE_UNKNOWN;
+		state->error = true;
+		return;
+	}
+
+	if (!regs) {
+		/* Stack frame is within valid stack */
+		sf = (struct stack_frame *)sp;
+		ip = READ_ONCE_NOCHECK(sf->gprs[8]);
+	}
+
+	/* Update unwind state */
+	state->sp = sp;
+	state->reliable = true;
+	state->ip = unwind_recover_ret_addr(state, ip);
+
+	if (!first_frame)
+		return;
+	/* Skip through the call chain to the specified starting frame */
+	while (!unwind_done(state)) {
+		if (on_stack(&state->stack_info, first_frame, sizeof(struct stack_frame))) {
+			if (state->sp >= first_frame)
+				break;
+		}
+		unwind_next_frame(state);
+	}
+}
+EXPORT_SYMBOL_GPL(__unwind_start);
diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c
new file mode 100644
index 0000000000..b88345ef8b
--- /dev/null
+++ b/arch/s390/kernel/uprobes.c
@@ -0,0 +1,388 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  User-space Probes (UProbes) for s390
+ *
+ *    Copyright IBM Corp. 2014
+ *    Author(s): Jan Willeke,
+ */
+
+#include <linux/uaccess.h>
+#include <linux/uprobes.h>
+#include <linux/compat.h>
+#include <linux/kdebug.h>
+#include <linux/sched/task_stack.h>
+
+#include <asm/switch_to.h>
+#include <asm/facility.h>
+#include <asm/kprobes.h>
+#include <asm/dis.h>
+#include "entry.h"
+
+#define	UPROBE_TRAP_NR	UINT_MAX
+
+int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
+			     unsigned long addr)
+{
+	return probe_is_prohibited_opcode(auprobe->insn);
+}
+
+int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	if (psw_bits(regs->psw).eaba == PSW_BITS_AMODE_24BIT)
+		return -EINVAL;
+	if (!is_compat_task() && psw_bits(regs->psw).eaba == PSW_BITS_AMODE_31BIT)
+		return -EINVAL;
+	clear_thread_flag(TIF_PER_TRAP);
+	auprobe->saved_per = psw_bits(regs->psw).per;
+	auprobe->saved_int_code = regs->int_code;
+	regs->int_code = UPROBE_TRAP_NR;
+	regs->psw.addr = current->utask->xol_vaddr;
+	set_tsk_thread_flag(current, TIF_UPROBE_SINGLESTEP);
+	update_cr_regs(current);
+	return 0;
+}
+
+bool arch_uprobe_xol_was_trapped(struct task_struct *tsk)
+{
+	struct pt_regs *regs = task_pt_regs(tsk);
+
+	if (regs->int_code != UPROBE_TRAP_NR)
+		return true;
+	return false;
+}
+
+static int check_per_event(unsigned short cause, unsigned long control,
+			   struct pt_regs *regs)
+{
+	if (!(regs->psw.mask & PSW_MASK_PER))
+		return 0;
+	/* user space single step */
+	if (control == 0)
+		return 1;
+	/* over indication for storage alteration */
+	if ((control & 0x20200000) && (cause & 0x2000))
+		return 1;
+	if (cause & 0x8000) {
+		/* all branches */
+		if ((control & 0x80800000) == 0x80000000)
+			return 1;
+		/* branch into selected range */
+		if (((control & 0x80800000) == 0x80800000) &&
+		    regs->psw.addr >= current->thread.per_user.start &&
+		    regs->psw.addr <= current->thread.per_user.end)
+			return 1;
+	}
+	return 0;
+}
+
+int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	int fixup = probe_get_fixup_type(auprobe->insn);
+	struct uprobe_task *utask = current->utask;
+
+	clear_tsk_thread_flag(current, TIF_UPROBE_SINGLESTEP);
+	update_cr_regs(current);
+	psw_bits(regs->psw).per = auprobe->saved_per;
+	regs->int_code = auprobe->saved_int_code;
+
+	if (fixup & FIXUP_PSW_NORMAL)
+		regs->psw.addr += utask->vaddr - utask->xol_vaddr;
+	if (fixup & FIXUP_RETURN_REGISTER) {
+		int reg = (auprobe->insn[0] & 0xf0) >> 4;
+
+		regs->gprs[reg] += utask->vaddr - utask->xol_vaddr;
+	}
+	if (fixup & FIXUP_BRANCH_NOT_TAKEN) {
+		int ilen = insn_length(auprobe->insn[0] >> 8);
+
+		if (regs->psw.addr - utask->xol_vaddr == ilen)
+			regs->psw.addr = utask->vaddr + ilen;
+	}
+	if (check_per_event(current->thread.per_event.cause,
+			    current->thread.per_user.control, regs)) {
+		/* fix per address */
+		current->thread.per_event.address = utask->vaddr;
+		/* trigger per event */
+		set_thread_flag(TIF_PER_TRAP);
+	}
+	return 0;
+}
+
+int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val,
+				 void *data)
+{
+	struct die_args *args = data;
+	struct pt_regs *regs = args->regs;
+
+	if (!user_mode(regs))
+		return NOTIFY_DONE;
+	if (regs->int_code & 0x200) /* Trap during transaction */
+		return NOTIFY_DONE;
+	switch (val) {
+	case DIE_BPT:
+		if (uprobe_pre_sstep_notifier(regs))
+			return NOTIFY_STOP;
+		break;
+	case DIE_SSTEP:
+		if (uprobe_post_sstep_notifier(regs))
+			return NOTIFY_STOP;
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	clear_thread_flag(TIF_UPROBE_SINGLESTEP);
+	regs->int_code = auprobe->saved_int_code;
+	regs->psw.addr = current->utask->vaddr;
+	current->thread.per_event.address = current->utask->vaddr;
+}
+
+unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline,
+						struct pt_regs *regs)
+{
+	unsigned long orig;
+
+	orig = regs->gprs[14];
+	regs->gprs[14] = trampoline;
+	return orig;
+}
+
+bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx,
+			     struct pt_regs *regs)
+{
+	if (ctx == RP_CHECK_CHAIN_CALL)
+		return user_stack_pointer(regs) <= ret->stack;
+	else
+		return user_stack_pointer(regs) < ret->stack;
+}
+
+/* Instruction Emulation */
+
+static void adjust_psw_addr(psw_t *psw, unsigned long len)
+{
+	psw->addr = __rewind_psw(*psw, -len);
+}
+
+#define EMU_ILLEGAL_OP		1
+#define EMU_SPECIFICATION	2
+#define EMU_ADDRESSING		3
+
+#define emu_load_ril(ptr, output)			\
+({							\
+	unsigned int mask = sizeof(*(ptr)) - 1;		\
+	__typeof__(*(ptr)) input;			\
+	int __rc = 0;					\
+							\
+	if ((u64 __force)ptr & mask)			\
+		__rc = EMU_SPECIFICATION;		\
+	else if (get_user(input, ptr))			\
+		__rc = EMU_ADDRESSING;			\
+	else						\
+		*(output) = input;			\
+	__rc;						\
+})
+
+#define emu_store_ril(regs, ptr, input)			\
+({							\
+	unsigned int mask = sizeof(*(ptr)) - 1;		\
+	__typeof__(ptr) __ptr = (ptr);			\
+	int __rc = 0;					\
+							\
+	if ((u64 __force)__ptr & mask)			\
+		__rc = EMU_SPECIFICATION;		\
+	else if (put_user(*(input), __ptr))		\
+		__rc = EMU_ADDRESSING;			\
+	if (__rc == 0)					\
+		sim_stor_event(regs,			\
+			       (void __force *)__ptr,	\
+			       mask + 1);		\
+	__rc;						\
+})
+
+#define emu_cmp_ril(regs, ptr, cmp)			\
+({							\
+	unsigned int mask = sizeof(*(ptr)) - 1;		\
+	__typeof__(*(ptr)) input;			\
+	int __rc = 0;					\
+							\
+	if ((u64 __force)ptr & mask)			\
+		__rc = EMU_SPECIFICATION;		\
+	else if (get_user(input, ptr))			\
+		__rc = EMU_ADDRESSING;			\
+	else if (input > *(cmp))			\
+		psw_bits((regs)->psw).cc = 1;		\
+	else if (input < *(cmp))			\
+		psw_bits((regs)->psw).cc = 2;		\
+	else						\
+		psw_bits((regs)->psw).cc = 0;		\
+	__rc;						\
+})
+
+struct insn_ril {
+	u8 opc0;
+	u8 reg	: 4;
+	u8 opc1 : 4;
+	s32 disp;
+} __packed;
+
+union split_register {
+	u64 u64;
+	u32 u32[2];
+	u16 u16[4];
+	s64 s64;
+	s32 s32[2];
+	s16 s16[4];
+};
+
+/*
+ * If user per registers are setup to trace storage alterations and an
+ * emulated store took place on a fitting address a user trap is generated.
+ */
+static void sim_stor_event(struct pt_regs *regs, void *addr, int len)
+{
+	if (!(regs->psw.mask & PSW_MASK_PER))
+		return;
+	if (!(current->thread.per_user.control & PER_EVENT_STORE))
+		return;
+	if ((void *)current->thread.per_user.start > (addr + len))
+		return;
+	if ((void *)current->thread.per_user.end < addr)
+		return;
+	current->thread.per_event.address = regs->psw.addr;
+	current->thread.per_event.cause = PER_EVENT_STORE >> 16;
+	set_thread_flag(TIF_PER_TRAP);
+}
+
+/*
+ * pc relative instructions are emulated, since parameters may not be
+ * accessible from the xol area due to range limitations.
+ */
+static void handle_insn_ril(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	union split_register *rx;
+	struct insn_ril *insn;
+	unsigned int ilen;
+	void *uptr;
+	int rc = 0;
+
+	insn = (struct insn_ril *) &auprobe->insn;
+	rx = (union split_register *) &regs->gprs[insn->reg];
+	uptr = (void *)(regs->psw.addr + (insn->disp * 2));
+	ilen = insn_length(insn->opc0);
+
+	switch (insn->opc0) {
+	case 0xc0:
+		switch (insn->opc1) {
+		case 0x00: /* larl */
+			rx->u64 = (unsigned long)uptr;
+			break;
+		}
+		break;
+	case 0xc4:
+		switch (insn->opc1) {
+		case 0x02: /* llhrl */
+			rc = emu_load_ril((u16 __user *)uptr, &rx->u32[1]);
+			break;
+		case 0x04: /* lghrl */
+			rc = emu_load_ril((s16 __user *)uptr, &rx->u64);
+			break;
+		case 0x05: /* lhrl */
+			rc = emu_load_ril((s16 __user *)uptr, &rx->u32[1]);
+			break;
+		case 0x06: /* llghrl */
+			rc = emu_load_ril((u16 __user *)uptr, &rx->u64);
+			break;
+		case 0x08: /* lgrl */
+			rc = emu_load_ril((u64 __user *)uptr, &rx->u64);
+			break;
+		case 0x0c: /* lgfrl */
+			rc = emu_load_ril((s32 __user *)uptr, &rx->u64);
+			break;
+		case 0x0d: /* lrl */
+			rc = emu_load_ril((u32 __user *)uptr, &rx->u32[1]);
+			break;
+		case 0x0e: /* llgfrl */
+			rc = emu_load_ril((u32 __user *)uptr, &rx->u64);
+			break;
+		case 0x07: /* sthrl */
+			rc = emu_store_ril(regs, (u16 __user *)uptr, &rx->u16[3]);
+			break;
+		case 0x0b: /* stgrl */
+			rc = emu_store_ril(regs, (u64 __user *)uptr, &rx->u64);
+			break;
+		case 0x0f: /* strl */
+			rc = emu_store_ril(regs, (u32 __user *)uptr, &rx->u32[1]);
+			break;
+		}
+		break;
+	case 0xc6:
+		switch (insn->opc1) {
+		case 0x04: /* cghrl */
+			rc = emu_cmp_ril(regs, (s16 __user *)uptr, &rx->s64);
+			break;
+		case 0x05: /* chrl */
+			rc = emu_cmp_ril(regs, (s16 __user *)uptr, &rx->s32[1]);
+			break;
+		case 0x06: /* clghrl */
+			rc = emu_cmp_ril(regs, (u16 __user *)uptr, &rx->u64);
+			break;
+		case 0x07: /* clhrl */
+			rc = emu_cmp_ril(regs, (u16 __user *)uptr, &rx->u32[1]);
+			break;
+		case 0x08: /* cgrl */
+			rc = emu_cmp_ril(regs, (s64 __user *)uptr, &rx->s64);
+			break;
+		case 0x0a: /* clgrl */
+			rc = emu_cmp_ril(regs, (u64 __user *)uptr, &rx->u64);
+			break;
+		case 0x0c: /* cgfrl */
+			rc = emu_cmp_ril(regs, (s32 __user *)uptr, &rx->s64);
+			break;
+		case 0x0d: /* crl */
+			rc = emu_cmp_ril(regs, (s32 __user *)uptr, &rx->s32[1]);
+			break;
+		case 0x0e: /* clgfrl */
+			rc = emu_cmp_ril(regs, (u32 __user *)uptr, &rx->u64);
+			break;
+		case 0x0f: /* clrl */
+			rc = emu_cmp_ril(regs, (u32 __user *)uptr, &rx->u32[1]);
+			break;
+		}
+		break;
+	}
+	adjust_psw_addr(&regs->psw, ilen);
+	switch (rc) {
+	case EMU_ILLEGAL_OP:
+		regs->int_code = ilen << 16 | 0x0001;
+		do_report_trap(regs, SIGILL, ILL_ILLOPC, NULL);
+		break;
+	case EMU_SPECIFICATION:
+		regs->int_code = ilen << 16 | 0x0006;
+		do_report_trap(regs, SIGILL, ILL_ILLOPC , NULL);
+		break;
+	case EMU_ADDRESSING:
+		regs->int_code = ilen << 16 | 0x0005;
+		do_report_trap(regs, SIGSEGV, SEGV_MAPERR, NULL);
+		break;
+	}
+}
+
+bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	if ((psw_bits(regs->psw).eaba == PSW_BITS_AMODE_24BIT) ||
+	    ((psw_bits(regs->psw).eaba == PSW_BITS_AMODE_31BIT) &&
+	     !is_compat_task())) {
+		regs->psw.addr = __rewind_psw(regs->psw, UPROBE_SWBP_INSN_SIZE);
+		do_report_trap(regs, SIGILL, ILL_ILLADR, NULL);
+		return true;
+	}
+	if (probe_is_insn_relative_long(auprobe->insn)) {
+		handle_insn_ril(auprobe, regs);
+		return true;
+	}
+	return false;
+}
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
new file mode 100644
index 0000000000..fc07bc39e6
--- /dev/null
+++ b/arch/s390/kernel/uv.c
@@ -0,0 +1,717 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Common Ultravisor functions and initialization
+ *
+ * Copyright IBM Corp. 2019, 2020
+ */
+#define KMSG_COMPONENT "prot_virt"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/sizes.h>
+#include <linux/bitmap.h>
+#include <linux/memblock.h>
+#include <linux/pagemap.h>
+#include <linux/swap.h>
+#include <asm/facility.h>
+#include <asm/sections.h>
+#include <asm/uv.h>
+
+/* the bootdata_preserved fields come from ones in arch/s390/boot/uv.c */
+#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
+int __bootdata_preserved(prot_virt_guest);
+#endif
+
+/*
+ * uv_info contains both host and guest information but it's currently only
+ * expected to be used within modules if it's the KVM module or for
+ * any PV guest module.
+ *
+ * The kernel itself will write these values once in uv_query_info()
+ * and then make some of them readable via a sysfs interface.
+ */
+struct uv_info __bootdata_preserved(uv_info);
+EXPORT_SYMBOL(uv_info);
+
+#if IS_ENABLED(CONFIG_KVM)
+int __bootdata_preserved(prot_virt_host);
+EXPORT_SYMBOL(prot_virt_host);
+
+static int __init uv_init(phys_addr_t stor_base, unsigned long stor_len)
+{
+	struct uv_cb_init uvcb = {
+		.header.cmd = UVC_CMD_INIT_UV,
+		.header.len = sizeof(uvcb),
+		.stor_origin = stor_base,
+		.stor_len = stor_len,
+	};
+
+	if (uv_call(0, (uint64_t)&uvcb)) {
+		pr_err("Ultravisor init failed with rc: 0x%x rrc: 0%x\n",
+		       uvcb.header.rc, uvcb.header.rrc);
+		return -1;
+	}
+	return 0;
+}
+
+void __init setup_uv(void)
+{
+	void *uv_stor_base;
+
+	if (!is_prot_virt_host())
+		return;
+
+	uv_stor_base = memblock_alloc_try_nid(
+		uv_info.uv_base_stor_len, SZ_1M, SZ_2G,
+		MEMBLOCK_ALLOC_ACCESSIBLE, NUMA_NO_NODE);
+	if (!uv_stor_base) {
+		pr_warn("Failed to reserve %lu bytes for ultravisor base storage\n",
+			uv_info.uv_base_stor_len);
+		goto fail;
+	}
+
+	if (uv_init(__pa(uv_stor_base), uv_info.uv_base_stor_len)) {
+		memblock_free(uv_stor_base, uv_info.uv_base_stor_len);
+		goto fail;
+	}
+
+	pr_info("Reserving %luMB as ultravisor base storage\n",
+		uv_info.uv_base_stor_len >> 20);
+	return;
+fail:
+	pr_info("Disabling support for protected virtualization");
+	prot_virt_host = 0;
+}
+
+/*
+ * Requests the Ultravisor to pin the page in the shared state. This will
+ * cause an intercept when the guest attempts to unshare the pinned page.
+ */
+int uv_pin_shared(unsigned long paddr)
+{
+	struct uv_cb_cfs uvcb = {
+		.header.cmd = UVC_CMD_PIN_PAGE_SHARED,
+		.header.len = sizeof(uvcb),
+		.paddr = paddr,
+	};
+
+	if (uv_call(0, (u64)&uvcb))
+		return -EINVAL;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(uv_pin_shared);
+
+/*
+ * Requests the Ultravisor to destroy a guest page and make it
+ * accessible to the host. The destroy clears the page instead of
+ * exporting.
+ *
+ * @paddr: Absolute host address of page to be destroyed
+ */
+static int uv_destroy_page(unsigned long paddr)
+{
+	struct uv_cb_cfs uvcb = {
+		.header.cmd = UVC_CMD_DESTR_SEC_STOR,
+		.header.len = sizeof(uvcb),
+		.paddr = paddr
+	};
+
+	if (uv_call(0, (u64)&uvcb)) {
+		/*
+		 * Older firmware uses 107/d as an indication of a non secure
+		 * page. Let us emulate the newer variant (no-op).
+		 */
+		if (uvcb.header.rc == 0x107 && uvcb.header.rrc == 0xd)
+			return 0;
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/*
+ * The caller must already hold a reference to the page
+ */
+int uv_destroy_owned_page(unsigned long paddr)
+{
+	struct page *page = phys_to_page(paddr);
+	int rc;
+
+	get_page(page);
+	rc = uv_destroy_page(paddr);
+	if (!rc)
+		clear_bit(PG_arch_1, &page->flags);
+	put_page(page);
+	return rc;
+}
+
+/*
+ * Requests the Ultravisor to encrypt a guest page and make it
+ * accessible to the host for paging (export).
+ *
+ * @paddr: Absolute host address of page to be exported
+ */
+int uv_convert_from_secure(unsigned long paddr)
+{
+	struct uv_cb_cfs uvcb = {
+		.header.cmd = UVC_CMD_CONV_FROM_SEC_STOR,
+		.header.len = sizeof(uvcb),
+		.paddr = paddr
+	};
+
+	if (uv_call(0, (u64)&uvcb))
+		return -EINVAL;
+	return 0;
+}
+
+/*
+ * The caller must already hold a reference to the page
+ */
+int uv_convert_owned_from_secure(unsigned long paddr)
+{
+	struct page *page = phys_to_page(paddr);
+	int rc;
+
+	get_page(page);
+	rc = uv_convert_from_secure(paddr);
+	if (!rc)
+		clear_bit(PG_arch_1, &page->flags);
+	put_page(page);
+	return rc;
+}
+
+/*
+ * Calculate the expected ref_count for a page that would otherwise have no
+ * further pins. This was cribbed from similar functions in other places in
+ * the kernel, but with some slight modifications. We know that a secure
+ * page can not be a huge page for example.
+ */
+static int expected_page_refs(struct page *page)
+{
+	int res;
+
+	res = page_mapcount(page);
+	if (PageSwapCache(page)) {
+		res++;
+	} else if (page_mapping(page)) {
+		res++;
+		if (page_has_private(page))
+			res++;
+	}
+	return res;
+}
+
+static int make_page_secure(struct page *page, struct uv_cb_header *uvcb)
+{
+	int expected, cc = 0;
+
+	if (PageWriteback(page))
+		return -EAGAIN;
+	expected = expected_page_refs(page);
+	if (!page_ref_freeze(page, expected))
+		return -EBUSY;
+	set_bit(PG_arch_1, &page->flags);
+	/*
+	 * If the UVC does not succeed or fail immediately, we don't want to
+	 * loop for long, or we might get stall notifications.
+	 * On the other hand, this is a complex scenario and we are holding a lot of
+	 * locks, so we can't easily sleep and reschedule. We try only once,
+	 * and if the UVC returned busy or partial completion, we return
+	 * -EAGAIN and we let the callers deal with it.
+	 */
+	cc = __uv_call(0, (u64)uvcb);
+	page_ref_unfreeze(page, expected);
+	/*
+	 * Return -ENXIO if the page was not mapped, -EINVAL for other errors.
+	 * If busy or partially completed, return -EAGAIN.
+	 */
+	if (cc == UVC_CC_OK)
+		return 0;
+	else if (cc == UVC_CC_BUSY || cc == UVC_CC_PARTIAL)
+		return -EAGAIN;
+	return uvcb->rc == 0x10a ? -ENXIO : -EINVAL;
+}
+
+/**
+ * should_export_before_import - Determine whether an export is needed
+ * before an import-like operation
+ * @uvcb: the Ultravisor control block of the UVC to be performed
+ * @mm: the mm of the process
+ *
+ * Returns whether an export is needed before every import-like operation.
+ * This is needed for shared pages, which don't trigger a secure storage
+ * exception when accessed from a different guest.
+ *
+ * Although considered as one, the Unpin Page UVC is not an actual import,
+ * so it is not affected.
+ *
+ * No export is needed also when there is only one protected VM, because the
+ * page cannot belong to the wrong VM in that case (there is no "other VM"
+ * it can belong to).
+ *
+ * Return: true if an export is needed before every import, otherwise false.
+ */
+static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
+{
+	/*
+	 * The misc feature indicates, among other things, that importing a
+	 * shared page from a different protected VM will automatically also
+	 * transfer its ownership.
+	 */
+	if (uv_has_feature(BIT_UV_FEAT_MISC))
+		return false;
+	if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
+		return false;
+	return atomic_read(&mm->context.protected_count) > 1;
+}
+
+/*
+ * Requests the Ultravisor to make a page accessible to a guest.
+ * If it's brought in the first time, it will be cleared. If
+ * it has been exported before, it will be decrypted and integrity
+ * checked.
+ */
+int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
+{
+	struct vm_area_struct *vma;
+	bool local_drain = false;
+	spinlock_t *ptelock;
+	unsigned long uaddr;
+	struct page *page;
+	pte_t *ptep;
+	int rc;
+
+again:
+	rc = -EFAULT;
+	mmap_read_lock(gmap->mm);
+
+	uaddr = __gmap_translate(gmap, gaddr);
+	if (IS_ERR_VALUE(uaddr))
+		goto out;
+	vma = vma_lookup(gmap->mm, uaddr);
+	if (!vma)
+		goto out;
+	/*
+	 * Secure pages cannot be huge and userspace should not combine both.
+	 * In case userspace does it anyway this will result in an -EFAULT for
+	 * the unpack. The guest is thus never reaching secure mode. If
+	 * userspace is playing dirty tricky with mapping huge pages later
+	 * on this will result in a segmentation fault.
+	 */
+	if (is_vm_hugetlb_page(vma))
+		goto out;
+
+	rc = -ENXIO;
+	ptep = get_locked_pte(gmap->mm, uaddr, &ptelock);
+	if (!ptep)
+		goto out;
+	if (pte_present(*ptep) && !(pte_val(*ptep) & _PAGE_INVALID) && pte_write(*ptep)) {
+		page = pte_page(*ptep);
+		rc = -EAGAIN;
+		if (trylock_page(page)) {
+			if (should_export_before_import(uvcb, gmap->mm))
+				uv_convert_from_secure(page_to_phys(page));
+			rc = make_page_secure(page, uvcb);
+			unlock_page(page);
+		}
+	}
+	pte_unmap_unlock(ptep, ptelock);
+out:
+	mmap_read_unlock(gmap->mm);
+
+	if (rc == -EAGAIN) {
+		/*
+		 * If we are here because the UVC returned busy or partial
+		 * completion, this is just a useless check, but it is safe.
+		 */
+		wait_on_page_writeback(page);
+	} else if (rc == -EBUSY) {
+		/*
+		 * If we have tried a local drain and the page refcount
+		 * still does not match our expected safe value, try with a
+		 * system wide drain. This is needed if the pagevecs holding
+		 * the page are on a different CPU.
+		 */
+		if (local_drain) {
+			lru_add_drain_all();
+			/* We give up here, and let the caller try again */
+			return -EAGAIN;
+		}
+		/*
+		 * We are here if the page refcount does not match the
+		 * expected safe value. The main culprits are usually
+		 * pagevecs. With lru_add_drain() we drain the pagevecs
+		 * on the local CPU so that hopefully the refcount will
+		 * reach the expected safe value.
+		 */
+		lru_add_drain();
+		local_drain = true;
+		/* And now we try again immediately after draining */
+		goto again;
+	} else if (rc == -ENXIO) {
+		if (gmap_fault(gmap, gaddr, FAULT_FLAG_WRITE))
+			return -EFAULT;
+		return -EAGAIN;
+	}
+	return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_make_secure);
+
+int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr)
+{
+	struct uv_cb_cts uvcb = {
+		.header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
+		.header.len = sizeof(uvcb),
+		.guest_handle = gmap->guest_handle,
+		.gaddr = gaddr,
+	};
+
+	return gmap_make_secure(gmap, gaddr, &uvcb);
+}
+EXPORT_SYMBOL_GPL(gmap_convert_to_secure);
+
+/**
+ * gmap_destroy_page - Destroy a guest page.
+ * @gmap: the gmap of the guest
+ * @gaddr: the guest address to destroy
+ *
+ * An attempt will be made to destroy the given guest page. If the attempt
+ * fails, an attempt is made to export the page. If both attempts fail, an
+ * appropriate error is returned.
+ */
+int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr)
+{
+	struct vm_area_struct *vma;
+	unsigned long uaddr;
+	struct page *page;
+	int rc;
+
+	rc = -EFAULT;
+	mmap_read_lock(gmap->mm);
+
+	uaddr = __gmap_translate(gmap, gaddr);
+	if (IS_ERR_VALUE(uaddr))
+		goto out;
+	vma = vma_lookup(gmap->mm, uaddr);
+	if (!vma)
+		goto out;
+	/*
+	 * Huge pages should not be able to become secure
+	 */
+	if (is_vm_hugetlb_page(vma))
+		goto out;
+
+	rc = 0;
+	/* we take an extra reference here */
+	page = follow_page(vma, uaddr, FOLL_WRITE | FOLL_GET);
+	if (IS_ERR_OR_NULL(page))
+		goto out;
+	rc = uv_destroy_owned_page(page_to_phys(page));
+	/*
+	 * Fault handlers can race; it is possible that two CPUs will fault
+	 * on the same secure page. One CPU can destroy the page, reboot,
+	 * re-enter secure mode and import it, while the second CPU was
+	 * stuck at the beginning of the handler. At some point the second
+	 * CPU will be able to progress, and it will not be able to destroy
+	 * the page. In that case we do not want to terminate the process,
+	 * we instead try to export the page.
+	 */
+	if (rc)
+		rc = uv_convert_owned_from_secure(page_to_phys(page));
+	put_page(page);
+out:
+	mmap_read_unlock(gmap->mm);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_destroy_page);
+
+/*
+ * To be called with the page locked or with an extra reference! This will
+ * prevent gmap_make_secure from touching the page concurrently. Having 2
+ * parallel make_page_accessible is fine, as the UV calls will become a
+ * no-op if the page is already exported.
+ */
+int arch_make_page_accessible(struct page *page)
+{
+	int rc = 0;
+
+	/* Hugepage cannot be protected, so nothing to do */
+	if (PageHuge(page))
+		return 0;
+
+	/*
+	 * PG_arch_1 is used in 3 places:
+	 * 1. for kernel page tables during early boot
+	 * 2. for storage keys of huge pages and KVM
+	 * 3. As an indication that this page might be secure. This can
+	 *    overindicate, e.g. we set the bit before calling
+	 *    convert_to_secure.
+	 * As secure pages are never huge, all 3 variants can co-exists.
+	 */
+	if (!test_bit(PG_arch_1, &page->flags))
+		return 0;
+
+	rc = uv_pin_shared(page_to_phys(page));
+	if (!rc) {
+		clear_bit(PG_arch_1, &page->flags);
+		return 0;
+	}
+
+	rc = uv_convert_from_secure(page_to_phys(page));
+	if (!rc) {
+		clear_bit(PG_arch_1, &page->flags);
+		return 0;
+	}
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(arch_make_page_accessible);
+
+#endif
+
+#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM)
+static ssize_t uv_query_facilities(struct kobject *kobj,
+				   struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%lx\n%lx\n%lx\n%lx\n",
+			  uv_info.inst_calls_list[0],
+			  uv_info.inst_calls_list[1],
+			  uv_info.inst_calls_list[2],
+			  uv_info.inst_calls_list[3]);
+}
+
+static struct kobj_attribute uv_query_facilities_attr =
+	__ATTR(facilities, 0444, uv_query_facilities, NULL);
+
+static ssize_t uv_query_supp_se_hdr_ver(struct kobject *kobj,
+					struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%lx\n", uv_info.supp_se_hdr_ver);
+}
+
+static struct kobj_attribute uv_query_supp_se_hdr_ver_attr =
+	__ATTR(supp_se_hdr_ver, 0444, uv_query_supp_se_hdr_ver, NULL);
+
+static ssize_t uv_query_supp_se_hdr_pcf(struct kobject *kobj,
+					struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%lx\n", uv_info.supp_se_hdr_pcf);
+}
+
+static struct kobj_attribute uv_query_supp_se_hdr_pcf_attr =
+	__ATTR(supp_se_hdr_pcf, 0444, uv_query_supp_se_hdr_pcf, NULL);
+
+static ssize_t uv_query_dump_cpu_len(struct kobject *kobj,
+				     struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%lx\n", uv_info.guest_cpu_stor_len);
+}
+
+static struct kobj_attribute uv_query_dump_cpu_len_attr =
+	__ATTR(uv_query_dump_cpu_len, 0444, uv_query_dump_cpu_len, NULL);
+
+static ssize_t uv_query_dump_storage_state_len(struct kobject *kobj,
+					       struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%lx\n", uv_info.conf_dump_storage_state_len);
+}
+
+static struct kobj_attribute uv_query_dump_storage_state_len_attr =
+	__ATTR(dump_storage_state_len, 0444, uv_query_dump_storage_state_len, NULL);
+
+static ssize_t uv_query_dump_finalize_len(struct kobject *kobj,
+					  struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%lx\n", uv_info.conf_dump_finalize_len);
+}
+
+static struct kobj_attribute uv_query_dump_finalize_len_attr =
+	__ATTR(dump_finalize_len, 0444, uv_query_dump_finalize_len, NULL);
+
+static ssize_t uv_query_feature_indications(struct kobject *kobj,
+					    struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%lx\n", uv_info.uv_feature_indications);
+}
+
+static struct kobj_attribute uv_query_feature_indications_attr =
+	__ATTR(feature_indications, 0444, uv_query_feature_indications, NULL);
+
+static ssize_t uv_query_max_guest_cpus(struct kobject *kobj,
+				       struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%d\n", uv_info.max_guest_cpu_id + 1);
+}
+
+static struct kobj_attribute uv_query_max_guest_cpus_attr =
+	__ATTR(max_cpus, 0444, uv_query_max_guest_cpus, NULL);
+
+static ssize_t uv_query_max_guest_vms(struct kobject *kobj,
+				      struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%d\n", uv_info.max_num_sec_conf);
+}
+
+static struct kobj_attribute uv_query_max_guest_vms_attr =
+	__ATTR(max_guests, 0444, uv_query_max_guest_vms, NULL);
+
+static ssize_t uv_query_max_guest_addr(struct kobject *kobj,
+				       struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%lx\n", uv_info.max_sec_stor_addr);
+}
+
+static struct kobj_attribute uv_query_max_guest_addr_attr =
+	__ATTR(max_address, 0444, uv_query_max_guest_addr, NULL);
+
+static ssize_t uv_query_supp_att_req_hdr_ver(struct kobject *kobj,
+					     struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%lx\n", uv_info.supp_att_req_hdr_ver);
+}
+
+static struct kobj_attribute uv_query_supp_att_req_hdr_ver_attr =
+	__ATTR(supp_att_req_hdr_ver, 0444, uv_query_supp_att_req_hdr_ver, NULL);
+
+static ssize_t uv_query_supp_att_pflags(struct kobject *kobj,
+					struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%lx\n", uv_info.supp_att_pflags);
+}
+
+static struct kobj_attribute uv_query_supp_att_pflags_attr =
+	__ATTR(supp_att_pflags, 0444, uv_query_supp_att_pflags, NULL);
+
+static ssize_t uv_query_supp_add_secret_req_ver(struct kobject *kobj,
+						struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%lx\n", uv_info.supp_add_secret_req_ver);
+}
+
+static struct kobj_attribute uv_query_supp_add_secret_req_ver_attr =
+	__ATTR(supp_add_secret_req_ver, 0444, uv_query_supp_add_secret_req_ver, NULL);
+
+static ssize_t uv_query_supp_add_secret_pcf(struct kobject *kobj,
+					    struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%lx\n", uv_info.supp_add_secret_pcf);
+}
+
+static struct kobj_attribute uv_query_supp_add_secret_pcf_attr =
+	__ATTR(supp_add_secret_pcf, 0444, uv_query_supp_add_secret_pcf, NULL);
+
+static ssize_t uv_query_supp_secret_types(struct kobject *kobj,
+					  struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%lx\n", uv_info.supp_secret_types);
+}
+
+static struct kobj_attribute uv_query_supp_secret_types_attr =
+	__ATTR(supp_secret_types, 0444, uv_query_supp_secret_types, NULL);
+
+static ssize_t uv_query_max_secrets(struct kobject *kobj,
+				    struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%d\n", uv_info.max_secrets);
+}
+
+static struct kobj_attribute uv_query_max_secrets_attr =
+	__ATTR(max_secrets, 0444, uv_query_max_secrets, NULL);
+
+static struct attribute *uv_query_attrs[] = {
+	&uv_query_facilities_attr.attr,
+	&uv_query_feature_indications_attr.attr,
+	&uv_query_max_guest_cpus_attr.attr,
+	&uv_query_max_guest_vms_attr.attr,
+	&uv_query_max_guest_addr_attr.attr,
+	&uv_query_supp_se_hdr_ver_attr.attr,
+	&uv_query_supp_se_hdr_pcf_attr.attr,
+	&uv_query_dump_storage_state_len_attr.attr,
+	&uv_query_dump_finalize_len_attr.attr,
+	&uv_query_dump_cpu_len_attr.attr,
+	&uv_query_supp_att_req_hdr_ver_attr.attr,
+	&uv_query_supp_att_pflags_attr.attr,
+	&uv_query_supp_add_secret_req_ver_attr.attr,
+	&uv_query_supp_add_secret_pcf_attr.attr,
+	&uv_query_supp_secret_types_attr.attr,
+	&uv_query_max_secrets_attr.attr,
+	NULL,
+};
+
+static struct attribute_group uv_query_attr_group = {
+	.attrs = uv_query_attrs,
+};
+
+static ssize_t uv_is_prot_virt_guest(struct kobject *kobj,
+				     struct kobj_attribute *attr, char *buf)
+{
+	int val = 0;
+
+#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
+	val = prot_virt_guest;
+#endif
+	return sysfs_emit(buf, "%d\n", val);
+}
+
+static ssize_t uv_is_prot_virt_host(struct kobject *kobj,
+				    struct kobj_attribute *attr, char *buf)
+{
+	int val = 0;
+
+#if IS_ENABLED(CONFIG_KVM)
+	val = prot_virt_host;
+#endif
+
+	return sysfs_emit(buf, "%d\n", val);
+}
+
+static struct kobj_attribute uv_prot_virt_guest =
+	__ATTR(prot_virt_guest, 0444, uv_is_prot_virt_guest, NULL);
+
+static struct kobj_attribute uv_prot_virt_host =
+	__ATTR(prot_virt_host, 0444, uv_is_prot_virt_host, NULL);
+
+static const struct attribute *uv_prot_virt_attrs[] = {
+	&uv_prot_virt_guest.attr,
+	&uv_prot_virt_host.attr,
+	NULL,
+};
+
+static struct kset *uv_query_kset;
+static struct kobject *uv_kobj;
+
+static int __init uv_info_init(void)
+{
+	int rc = -ENOMEM;
+
+	if (!test_facility(158))
+		return 0;
+
+	uv_kobj = kobject_create_and_add("uv", firmware_kobj);
+	if (!uv_kobj)
+		return -ENOMEM;
+
+	rc = sysfs_create_files(uv_kobj, uv_prot_virt_attrs);
+	if (rc)
+		goto out_kobj;
+
+	uv_query_kset = kset_create_and_add("query", NULL, uv_kobj);
+	if (!uv_query_kset) {
+		rc = -ENOMEM;
+		goto out_ind_files;
+	}
+
+	rc = sysfs_create_group(&uv_query_kset->kobj, &uv_query_attr_group);
+	if (!rc)
+		return 0;
+
+	kset_unregister(uv_query_kset);
+out_ind_files:
+	sysfs_remove_files(uv_kobj, uv_prot_virt_attrs);
+out_kobj:
+	kobject_del(uv_kobj);
+	kobject_put(uv_kobj);
+	return rc;
+}
+device_initcall(uv_info_init);
+#endif
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
new file mode 100644
index 0000000000..bbaefd84f1
--- /dev/null
+++ b/arch/s390/kernel/vdso.c
@@ -0,0 +1,258 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * vdso setup for s390
+ *
+ *  Copyright IBM Corp. 2008
+ *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+
+#include <linux/binfmts.h>
+#include <linux/compat.h>
+#include <linux/elf.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/time_namespace.h>
+#include <linux/random.h>
+#include <vdso/datapage.h>
+#include <asm/vdso.h>
+
+extern char vdso64_start[], vdso64_end[];
+extern char vdso32_start[], vdso32_end[];
+
+static struct vm_special_mapping vvar_mapping;
+
+static union {
+	struct vdso_data	data[CS_BASES];
+	u8			page[PAGE_SIZE];
+} vdso_data_store __page_aligned_data;
+
+struct vdso_data *vdso_data = vdso_data_store.data;
+
+enum vvar_pages {
+	VVAR_DATA_PAGE_OFFSET,
+	VVAR_TIMENS_PAGE_OFFSET,
+	VVAR_NR_PAGES,
+};
+
+#ifdef CONFIG_TIME_NS
+struct vdso_data *arch_get_vdso_data(void *vvar_page)
+{
+	return (struct vdso_data *)(vvar_page);
+}
+
+/*
+ * The VVAR page layout depends on whether a task belongs to the root or
+ * non-root time namespace. Whenever a task changes its namespace, the VVAR
+ * page tables are cleared and then they will be re-faulted with a
+ * corresponding layout.
+ * See also the comment near timens_setup_vdso_data() for details.
+ */
+int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
+{
+	struct mm_struct *mm = task->mm;
+	VMA_ITERATOR(vmi, mm, 0);
+	struct vm_area_struct *vma;
+
+	mmap_read_lock(mm);
+	for_each_vma(vmi, vma) {
+		if (!vma_is_special_mapping(vma, &vvar_mapping))
+			continue;
+		zap_vma_pages(vma);
+		break;
+	}
+	mmap_read_unlock(mm);
+	return 0;
+}
+#endif
+
+static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
+			     struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct page *timens_page = find_timens_vvar_page(vma);
+	unsigned long addr, pfn;
+	vm_fault_t err;
+
+	switch (vmf->pgoff) {
+	case VVAR_DATA_PAGE_OFFSET:
+		pfn = virt_to_pfn(vdso_data);
+		if (timens_page) {
+			/*
+			 * Fault in VVAR page too, since it will be accessed
+			 * to get clock data anyway.
+			 */
+			addr = vmf->address + VVAR_TIMENS_PAGE_OFFSET * PAGE_SIZE;
+			err = vmf_insert_pfn(vma, addr, pfn);
+			if (unlikely(err & VM_FAULT_ERROR))
+				return err;
+			pfn = page_to_pfn(timens_page);
+		}
+		break;
+#ifdef CONFIG_TIME_NS
+	case VVAR_TIMENS_PAGE_OFFSET:
+		/*
+		 * If a task belongs to a time namespace then a namespace
+		 * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
+		 * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
+		 * offset.
+		 * See also the comment near timens_setup_vdso_data().
+		 */
+		if (!timens_page)
+			return VM_FAULT_SIGBUS;
+		pfn = virt_to_pfn(vdso_data);
+		break;
+#endif /* CONFIG_TIME_NS */
+	default:
+		return VM_FAULT_SIGBUS;
+	}
+	return vmf_insert_pfn(vma, vmf->address, pfn);
+}
+
+static int vdso_mremap(const struct vm_special_mapping *sm,
+		       struct vm_area_struct *vma)
+{
+	current->mm->context.vdso_base = vma->vm_start;
+	return 0;
+}
+
+static struct vm_special_mapping vvar_mapping = {
+	.name = "[vvar]",
+	.fault = vvar_fault,
+};
+
+static struct vm_special_mapping vdso64_mapping = {
+	.name = "[vdso]",
+	.mremap = vdso_mremap,
+};
+
+static struct vm_special_mapping vdso32_mapping = {
+	.name = "[vdso]",
+	.mremap = vdso_mremap,
+};
+
+int vdso_getcpu_init(void)
+{
+	set_tod_programmable_field(smp_processor_id());
+	return 0;
+}
+early_initcall(vdso_getcpu_init); /* Must be called before SMP init */
+
+static int map_vdso(unsigned long addr, unsigned long vdso_mapping_len)
+{
+	unsigned long vvar_start, vdso_text_start, vdso_text_len;
+	struct vm_special_mapping *vdso_mapping;
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	int rc;
+
+	BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES);
+	if (mmap_write_lock_killable(mm))
+		return -EINTR;
+
+	if (is_compat_task()) {
+		vdso_text_len = vdso32_end - vdso32_start;
+		vdso_mapping = &vdso32_mapping;
+	} else {
+		vdso_text_len = vdso64_end - vdso64_start;
+		vdso_mapping = &vdso64_mapping;
+	}
+	vvar_start = get_unmapped_area(NULL, addr, vdso_mapping_len, 0, 0);
+	rc = vvar_start;
+	if (IS_ERR_VALUE(vvar_start))
+		goto out;
+	vma = _install_special_mapping(mm, vvar_start, VVAR_NR_PAGES*PAGE_SIZE,
+				       VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP|
+				       VM_PFNMAP,
+				       &vvar_mapping);
+	rc = PTR_ERR(vma);
+	if (IS_ERR(vma))
+		goto out;
+	vdso_text_start = vvar_start + VVAR_NR_PAGES * PAGE_SIZE;
+	/* VM_MAYWRITE for COW so gdb can set breakpoints */
+	vma = _install_special_mapping(mm, vdso_text_start, vdso_text_len,
+				       VM_READ|VM_EXEC|
+				       VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
+				       vdso_mapping);
+	if (IS_ERR(vma)) {
+		do_munmap(mm, vvar_start, PAGE_SIZE, NULL);
+		rc = PTR_ERR(vma);
+	} else {
+		current->mm->context.vdso_base = vdso_text_start;
+		rc = 0;
+	}
+out:
+	mmap_write_unlock(mm);
+	return rc;
+}
+
+static unsigned long vdso_addr(unsigned long start, unsigned long len)
+{
+	unsigned long addr, end, offset;
+
+	/*
+	 * Round up the start address. It can start out unaligned as a result
+	 * of stack start randomization.
+	 */
+	start = PAGE_ALIGN(start);
+
+	/* Round the lowest possible end address up to a PMD boundary. */
+	end = (start + len + PMD_SIZE - 1) & PMD_MASK;
+	if (end >= VDSO_BASE)
+		end = VDSO_BASE;
+	end -= len;
+
+	if (end > start) {
+		offset = get_random_u32_below(((end - start) >> PAGE_SHIFT) + 1);
+		addr = start + (offset << PAGE_SHIFT);
+	} else {
+		addr = start;
+	}
+	return addr;
+}
+
+unsigned long vdso_size(void)
+{
+	unsigned long size = VVAR_NR_PAGES * PAGE_SIZE;
+
+	if (is_compat_task())
+		size += vdso32_end - vdso32_start;
+	else
+		size += vdso64_end - vdso64_start;
+	return PAGE_ALIGN(size);
+}
+
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+{
+	unsigned long addr = VDSO_BASE;
+	unsigned long size = vdso_size();
+
+	if (current->flags & PF_RANDOMIZE)
+		addr = vdso_addr(current->mm->start_stack + PAGE_SIZE, size);
+	return map_vdso(addr, size);
+}
+
+static struct page ** __init vdso_setup_pages(void *start, void *end)
+{
+	int pages = (end - start) >> PAGE_SHIFT;
+	struct page **pagelist;
+	int i;
+
+	pagelist = kcalloc(pages + 1, sizeof(struct page *), GFP_KERNEL);
+	if (!pagelist)
+		panic("%s: Cannot allocate page list for VDSO", __func__);
+	for (i = 0; i < pages; i++)
+		pagelist[i] = virt_to_page(start + i * PAGE_SIZE);
+	return pagelist;
+}
+
+static int __init vdso_init(void)
+{
+	vdso64_mapping.pages = vdso_setup_pages(vdso64_start, vdso64_end);
+	if (IS_ENABLED(CONFIG_COMPAT))
+		vdso32_mapping.pages = vdso_setup_pages(vdso32_start, vdso32_end);
+	return 0;
+}
+arch_initcall(vdso_init);
diff --git a/arch/s390/kernel/vdso32/.gitignore b/arch/s390/kernel/vdso32/.gitignore
new file mode 100644
index 0000000000..5167384843
--- /dev/null
+++ b/arch/s390/kernel/vdso32/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+vdso32.lds
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
new file mode 100644
index 0000000000..23e868b79a
--- /dev/null
+++ b/arch/s390/kernel/vdso32/Makefile
@@ -0,0 +1,80 @@
+# SPDX-License-Identifier: GPL-2.0
+# List of files in the vdso
+
+KCOV_INSTRUMENT := n
+
+# Include the generic Makefile to check the built vdso.
+include $(srctree)/lib/vdso/Makefile
+obj-vdso32 = vdso_user_wrapper-32.o note-32.o
+
+# Build rules
+
+targets := $(obj-vdso32) vdso32.so vdso32.so.dbg
+obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
+
+KBUILD_AFLAGS += -DBUILD_VDSO
+KBUILD_CFLAGS += -DBUILD_VDSO -DDISABLE_BRANCH_PROFILING
+
+KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS))
+KBUILD_AFLAGS_32 += -m31 -s
+
+KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
+KBUILD_CFLAGS_32 := $(filter-out -mno-pic-data-is-text-relative,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 += -m31 -fPIC -shared -fno-common -fno-builtin
+
+LDFLAGS_vdso32.so.dbg += -fPIC -shared -soname=linux-vdso32.so.1 \
+	--hash-style=both --build-id=sha1 -melf_s390 -T
+
+$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
+$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
+
+obj-y += vdso32_wrapper.o
+targets += vdso32.lds
+CPPFLAGS_vdso32.lds += -P -C -U$(ARCH)
+
+# Disable gcov profiling, ubsan and kasan for VDSO code
+GCOV_PROFILE := n
+UBSAN_SANITIZE := n
+KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
+
+# Force dependency (incbin is bad)
+$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
+
+quiet_cmd_vdso_and_check = VDSO    $@
+      cmd_vdso_and_check = $(cmd_ld); $(cmd_vdso_check)
+
+$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE
+	$(call if_changed,vdso_and_check)
+
+# strip rule for the .so file
+$(obj)/%.so: OBJCOPYFLAGS := -S
+$(obj)/%.so: $(obj)/%.so.dbg FORCE
+	$(call if_changed,objcopy)
+
+$(obj-vdso32): %-32.o: %.S FORCE
+	$(call if_changed_dep,vdso32as)
+
+# actual build commands
+quiet_cmd_vdso32as = VDSO32A $@
+      cmd_vdso32as = $(CC) $(a_flags) -c -o $@ $<
+quiet_cmd_vdso32cc = VDSO32C $@
+      cmd_vdso32cc = $(CC) $(c_flags) -c -o $@ $<
+
+# install commands for the unstripped file
+quiet_cmd_vdso_install = INSTALL $@
+      cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
+
+vdso32.so: $(obj)/vdso32.so.dbg
+	@mkdir -p $(MODLIB)/vdso
+	$(call cmd,vdso_install)
+
+vdso_install: vdso32.so
+
+# Generate VDSO offsets using helper script
+gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh
+quiet_cmd_vdsosym = VDSOSYM $@
+	cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
+
+include/generated/vdso32-offsets.h: $(obj)/vdso32.so.dbg FORCE
+	$(call if_changed,vdsosym)
diff --git a/arch/s390/kernel/vdso32/gen_vdso_offsets.sh b/arch/s390/kernel/vdso32/gen_vdso_offsets.sh
new file mode 100755
index 0000000000..9c4f951e22
--- /dev/null
+++ b/arch/s390/kernel/vdso32/gen_vdso_offsets.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+#
+# Match symbols in the DSO that look like VDSO_*; produce a header file
+# of constant offsets into the shared object.
+#
+# Doing this inside the Makefile will break the $(filter-out) function,
+# causing Kbuild to rebuild the vdso-offsets header file every time.
+#
+# Inspired by arm64 version.
+#
+
+LC_ALL=C
+sed -n 's/\([0-9a-f]*\) . __kernel_compat_\(.*\)/\#define vdso32_offset_\2\t0x\1/p'
diff --git a/arch/s390/kernel/vdso32/note.S b/arch/s390/kernel/vdso32/note.S
new file mode 100644
index 0000000000..db19d0680a
--- /dev/null
+++ b/arch/s390/kernel/vdso32/note.S
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/uts.h>
+#include <linux/version.h>
+#include <linux/elfnote.h>
+
+ELFNOTE_START(Linux, 0, "a")
+	.long LINUX_VERSION_CODE
+ELFNOTE_END
diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S
new file mode 100644
index 0000000000..edf5ff1deb
--- /dev/null
+++ b/arch/s390/kernel/vdso32/vdso32.lds.S
@@ -0,0 +1,142 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This is the infamous ld script for the 64 bits vdso
+ * library
+ */
+
+#include <asm/page.h>
+#include <asm/vdso.h>
+
+OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390")
+OUTPUT_ARCH(s390:31-bit)
+ENTRY(_start)
+
+SECTIONS
+{
+	PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
+#ifdef CONFIG_TIME_NS
+	PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
+#endif
+	. = VDSO_LBASE + SIZEOF_HEADERS;
+
+	.hash		: { *(.hash) }			:text
+	.gnu.hash	: { *(.gnu.hash) }
+	.dynsym		: { *(.dynsym) }
+	.dynstr		: { *(.dynstr) }
+	.gnu.version	: { *(.gnu.version) }
+	.gnu.version_d	: { *(.gnu.version_d) }
+	.gnu.version_r	: { *(.gnu.version_r) }
+
+	.note		: { *(.note.*) }		:text	:note
+
+	. = ALIGN(16);
+	.text		: {
+		*(.text .stub .text.* .gnu.linkonce.t.*)
+	} :text
+	PROVIDE(__etext = .);
+	PROVIDE(_etext = .);
+	PROVIDE(etext = .);
+
+	/*
+	 * Other stuff is appended to the text segment:
+	 */
+	.rodata		: { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+	.rodata1	: { *(.rodata1) }
+
+	.dynamic	: { *(.dynamic) }		:text	:dynamic
+
+	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
+	.eh_frame	: { KEEP (*(.eh_frame)) }	:text
+	.gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) }
+
+	.rela.dyn ALIGN(8) : { *(.rela.dyn) }
+	.got ALIGN(8)	: { *(.got .toc) }
+	.got.plt ALIGN(8) : { *(.got.plt) }
+
+	_end = .;
+	PROVIDE(end = .);
+
+	/*
+	 * Stabs debugging sections are here too.
+	 */
+	.stab	       0 : { *(.stab) }
+	.stabstr       0 : { *(.stabstr) }
+	.stab.excl     0 : { *(.stab.excl) }
+	.stab.exclstr  0 : { *(.stab.exclstr) }
+	.stab.index    0 : { *(.stab.index) }
+	.stab.indexstr 0 : { *(.stab.indexstr) }
+	.comment       0 : { *(.comment) }
+
+	/*
+	 * DWARF debug sections.
+	 * Symbols in the DWARF debugging sections are relative to the
+	 * beginning of the section so we begin them at 0.
+	 */
+	/* DWARF 1 */
+	.debug		0 : { *(.debug) }
+	.line		0 : { *(.line) }
+	/* GNU DWARF 1 extensions */
+	.debug_srcinfo	0 : { *(.debug_srcinfo) }
+	.debug_sfnames	0 : { *(.debug_sfnames) }
+	/* DWARF 1.1 and DWARF 2 */
+	.debug_aranges	0 : { *(.debug_aranges) }
+	.debug_pubnames 0 : { *(.debug_pubnames) }
+	/* DWARF 2 */
+	.debug_info	0 : { *(.debug_info .gnu.linkonce.wi.*) }
+	.debug_abbrev	0 : { *(.debug_abbrev) }
+	.debug_line	0 : { *(.debug_line) }
+	.debug_frame	0 : { *(.debug_frame) }
+	.debug_str	0 : { *(.debug_str) }
+	.debug_loc	0 : { *(.debug_loc) }
+	.debug_macinfo	0 : { *(.debug_macinfo) }
+	/* SGI/MIPS DWARF 2 extensions */
+	.debug_weaknames 0 : { *(.debug_weaknames) }
+	.debug_funcnames 0 : { *(.debug_funcnames) }
+	.debug_typenames 0 : { *(.debug_typenames) }
+	.debug_varnames  0 : { *(.debug_varnames) }
+	/* DWARF 3 */
+	.debug_pubtypes 0 : { *(.debug_pubtypes) }
+	.debug_ranges	0 : { *(.debug_ranges) }
+	.gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+
+	/DISCARD/	: {
+		*(.note.GNU-stack)
+		*(.branch_lt)
+		*(.data .data.* .gnu.linkonce.d.* .sdata*)
+		*(.bss .sbss .dynbss .dynsbss)
+	}
+}
+
+/*
+ * Very old versions of ld do not recognize this name token; use the constant.
+ */
+#define PT_GNU_EH_FRAME	0x6474e550
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+	text		PT_LOAD FILEHDR PHDRS FLAGS(5);	/* PF_R|PF_X */
+	dynamic		PT_DYNAMIC FLAGS(4);		/* PF_R */
+	note		PT_NOTE FLAGS(4);		/* PF_R */
+	eh_frame_hdr	PT_GNU_EH_FRAME;
+}
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+	VDSO_VERSION_STRING {
+	global:
+		/*
+		 * Has to be there for the kernel to find
+		 */
+		__kernel_compat_restart_syscall;
+		__kernel_compat_rt_sigreturn;
+		__kernel_compat_sigreturn;
+	local: *;
+	};
+}
diff --git a/arch/s390/kernel/vdso32/vdso32_wrapper.S b/arch/s390/kernel/vdso32/vdso32_wrapper.S
new file mode 100644
index 0000000000..de2fb93047
--- /dev/null
+++ b/arch/s390/kernel/vdso32/vdso32_wrapper.S
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/page.h>
+
+	__PAGE_ALIGNED_DATA
+
+	.globl vdso32_start, vdso32_end
+	.balign PAGE_SIZE
+vdso32_start:
+	.incbin "arch/s390/kernel/vdso32/vdso32.so"
+	.balign PAGE_SIZE
+vdso32_end:
+
+	.previous
diff --git a/arch/s390/kernel/vdso32/vdso_user_wrapper.S b/arch/s390/kernel/vdso32/vdso_user_wrapper.S
new file mode 100644
index 0000000000..2e645003fd
--- /dev/null
+++ b/arch/s390/kernel/vdso32/vdso_user_wrapper.S
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/linkage.h>
+#include <asm/unistd.h>
+#include <asm/dwarf.h>
+
+.macro vdso_syscall func,syscall
+	.globl __kernel_compat_\func
+	.type  __kernel_compat_\func,@function
+	__ALIGN
+__kernel_compat_\func:
+	CFI_STARTPROC
+	svc	\syscall
+	/* Make sure we notice when a syscall returns, which shouldn't happen */
+	.word	0
+	CFI_ENDPROC
+	.size	__kernel_compat_\func,.-__kernel_compat_\func
+.endm
+
+vdso_syscall restart_syscall,__NR_restart_syscall
+vdso_syscall sigreturn,__NR_sigreturn
+vdso_syscall rt_sigreturn,__NR_rt_sigreturn
diff --git a/arch/s390/kernel/vdso64/.gitignore b/arch/s390/kernel/vdso64/.gitignore
new file mode 100644
index 0000000000..4ec80685fe
--- /dev/null
+++ b/arch/s390/kernel/vdso64/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+vdso64.lds
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
new file mode 100644
index 0000000000..fc1c6ff817
--- /dev/null
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -0,0 +1,89 @@
+# SPDX-License-Identifier: GPL-2.0
+# List of files in the vdso
+
+KCOV_INSTRUMENT := n
+
+# Include the generic Makefile to check the built vdso.
+include $(srctree)/lib/vdso/Makefile
+obj-vdso64 = vdso_user_wrapper.o note.o
+obj-cvdso64 = vdso64_generic.o getcpu.o
+VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) $(CC_FLAGS_CHECK_STACK)
+CFLAGS_REMOVE_getcpu.o = $(VDSO_CFLAGS_REMOVE)
+CFLAGS_REMOVE_vdso64_generic.o = $(VDSO_CFLAGS_REMOVE)
+
+# Build rules
+
+targets := $(obj-vdso64) $(obj-cvdso64) vdso64.so vdso64.so.dbg
+obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
+obj-cvdso64 := $(addprefix $(obj)/, $(obj-cvdso64))
+
+KBUILD_AFLAGS += -DBUILD_VDSO
+KBUILD_CFLAGS += -DBUILD_VDSO -DDISABLE_BRANCH_PROFILING
+
+KBUILD_AFLAGS_64 := $(filter-out -m64,$(KBUILD_AFLAGS))
+KBUILD_AFLAGS_64 += -m64
+
+KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS))
+KBUILD_CFLAGS_64 := $(filter-out -mno-pic-data-is-text-relative,$(KBUILD_CFLAGS_64))
+KBUILD_CFLAGS_64 += -m64 -fPIC -fno-common -fno-builtin
+ldflags-y := -fPIC -shared -soname=linux-vdso64.so.1 \
+	     --hash-style=both --build-id=sha1 -T
+
+$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64)
+$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64)
+
+obj-y += vdso64_wrapper.o
+targets += vdso64.lds
+CPPFLAGS_vdso64.lds += -P -C -U$(ARCH)
+
+# Disable gcov profiling, ubsan and kasan for VDSO code
+GCOV_PROFILE := n
+UBSAN_SANITIZE := n
+KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
+
+# Force dependency (incbin is bad)
+$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
+
+quiet_cmd_vdso_and_check = VDSO    $@
+      cmd_vdso_and_check = $(cmd_ld); $(cmd_vdso_check)
+
+# link rule for the .so file, .lds has to be first
+$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) $(obj-cvdso64) FORCE
+	$(call if_changed,vdso_and_check)
+
+# strip rule for the .so file
+$(obj)/%.so: OBJCOPYFLAGS := -S
+$(obj)/%.so: $(obj)/%.so.dbg FORCE
+	$(call if_changed,objcopy)
+
+# assembly rules for the .S files
+$(obj-vdso64): %.o: %.S FORCE
+	$(call if_changed_dep,vdso64as)
+
+$(obj-cvdso64): %.o: %.c FORCE
+	$(call if_changed_dep,vdso64cc)
+
+# actual build commands
+quiet_cmd_vdso64as = VDSO64A $@
+      cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $<
+quiet_cmd_vdso64cc = VDSO64C $@
+      cmd_vdso64cc = $(CC) $(c_flags) -c -o $@ $<
+
+# install commands for the unstripped file
+quiet_cmd_vdso_install = INSTALL $@
+      cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
+
+vdso64.so: $(obj)/vdso64.so.dbg
+	@mkdir -p $(MODLIB)/vdso
+	$(call cmd,vdso_install)
+
+vdso_install: vdso64.so
+
+# Generate VDSO offsets using helper script
+gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh
+quiet_cmd_vdsosym = VDSOSYM $@
+	cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
+
+include/generated/vdso64-offsets.h: $(obj)/vdso64.so.dbg FORCE
+	$(call if_changed,vdsosym)
diff --git a/arch/s390/kernel/vdso64/gen_vdso_offsets.sh b/arch/s390/kernel/vdso64/gen_vdso_offsets.sh
new file mode 100755
index 0000000000..37f05cb38d
--- /dev/null
+++ b/arch/s390/kernel/vdso64/gen_vdso_offsets.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+#
+# Match symbols in the DSO that look like VDSO_*; produce a header file
+# of constant offsets into the shared object.
+#
+# Doing this inside the Makefile will break the $(filter-out) function,
+# causing Kbuild to rebuild the vdso-offsets header file every time.
+#
+# Inspired by arm64 version.
+#
+
+LC_ALL=C
+sed -n 's/\([0-9a-f]*\) . __kernel_\(.*\)/\#define vdso64_offset_\2\t0x\1/p'
diff --git a/arch/s390/kernel/vdso64/getcpu.c b/arch/s390/kernel/vdso64/getcpu.c
new file mode 100644
index 0000000000..5c5d4a848b
--- /dev/null
+++ b/arch/s390/kernel/vdso64/getcpu.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright IBM Corp. 2020 */
+
+#include <linux/compiler.h>
+#include <linux/getcpu.h>
+#include <asm/timex.h>
+#include "vdso.h"
+
+int __s390_vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
+{
+	union tod_clock clk;
+
+	/* CPU number is stored in the programmable field of the TOD clock */
+	store_tod_clock_ext(&clk);
+	if (cpu)
+		*cpu = clk.pf;
+	/* NUMA node is always zero */
+	if (node)
+		*node = 0;
+	return 0;
+}
diff --git a/arch/s390/kernel/vdso64/note.S b/arch/s390/kernel/vdso64/note.S
new file mode 100644
index 0000000000..db19d0680a
--- /dev/null
+++ b/arch/s390/kernel/vdso64/note.S
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/uts.h>
+#include <linux/version.h>
+#include <linux/elfnote.h>
+
+ELFNOTE_START(Linux, 0, "a")
+	.long LINUX_VERSION_CODE
+ELFNOTE_END
diff --git a/arch/s390/kernel/vdso64/vdso.h b/arch/s390/kernel/vdso64/vdso.h
new file mode 100644
index 0000000000..34c7a2312f
--- /dev/null
+++ b/arch/s390/kernel/vdso64/vdso.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ARCH_S390_KERNEL_VDSO64_VDSO_H
+#define __ARCH_S390_KERNEL_VDSO64_VDSO_H
+
+#include <vdso/datapage.h>
+
+struct getcpu_cache;
+
+int __s390_vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused);
+int __s390_vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz);
+int __s390_vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts);
+int __s390_vdso_clock_getres(clockid_t clock, struct __kernel_timespec *ts);
+
+#endif /* __ARCH_S390_KERNEL_VDSO64_VDSO_H */
diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S
new file mode 100644
index 0000000000..4461ea151e
--- /dev/null
+++ b/arch/s390/kernel/vdso64/vdso64.lds.S
@@ -0,0 +1,146 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This is the infamous ld script for the 64 bits vdso
+ * library
+ */
+
+#include <asm/page.h>
+#include <asm/vdso.h>
+
+OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
+OUTPUT_ARCH(s390:64-bit)
+ENTRY(_start)
+
+SECTIONS
+{
+	PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
+#ifdef CONFIG_TIME_NS
+	PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
+#endif
+	. = VDSO_LBASE + SIZEOF_HEADERS;
+
+	.hash		: { *(.hash) }			:text
+	.gnu.hash	: { *(.gnu.hash) }
+	.dynsym		: { *(.dynsym) }
+	.dynstr		: { *(.dynstr) }
+	.gnu.version	: { *(.gnu.version) }
+	.gnu.version_d	: { *(.gnu.version_d) }
+	.gnu.version_r	: { *(.gnu.version_r) }
+
+	.note		: { *(.note.*) }		:text	:note
+
+	. = ALIGN(16);
+	.text		: {
+		*(.text .stub .text.* .gnu.linkonce.t.*)
+	} :text
+	PROVIDE(__etext = .);
+	PROVIDE(_etext = .);
+	PROVIDE(etext = .);
+
+	/*
+	 * Other stuff is appended to the text segment:
+	 */
+	.rodata		: { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+	.rodata1	: { *(.rodata1) }
+
+	.dynamic	: { *(.dynamic) }		:text	:dynamic
+
+	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
+	.eh_frame	: { KEEP (*(.eh_frame)) }	:text
+	.gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) }
+
+	.rela.dyn ALIGN(8) : { *(.rela.dyn) }
+	.got ALIGN(8)	: { *(.got .toc) }
+	.got.plt ALIGN(8) : { *(.got.plt) }
+
+	_end = .;
+	PROVIDE(end = .);
+
+	/*
+	 * Stabs debugging sections are here too.
+	 */
+	.stab	       0 : { *(.stab) }
+	.stabstr       0 : { *(.stabstr) }
+	.stab.excl     0 : { *(.stab.excl) }
+	.stab.exclstr  0 : { *(.stab.exclstr) }
+	.stab.index    0 : { *(.stab.index) }
+	.stab.indexstr 0 : { *(.stab.indexstr) }
+	.comment       0 : { *(.comment) }
+
+	/*
+	 * DWARF debug sections.
+	 * Symbols in the DWARF debugging sections are relative to the
+	 * beginning of the section so we begin them at 0.
+	 */
+	/* DWARF 1 */
+	.debug		0 : { *(.debug) }
+	.line		0 : { *(.line) }
+	/* GNU DWARF 1 extensions */
+	.debug_srcinfo	0 : { *(.debug_srcinfo) }
+	.debug_sfnames	0 : { *(.debug_sfnames) }
+	/* DWARF 1.1 and DWARF 2 */
+	.debug_aranges	0 : { *(.debug_aranges) }
+	.debug_pubnames 0 : { *(.debug_pubnames) }
+	/* DWARF 2 */
+	.debug_info	0 : { *(.debug_info .gnu.linkonce.wi.*) }
+	.debug_abbrev	0 : { *(.debug_abbrev) }
+	.debug_line	0 : { *(.debug_line) }
+	.debug_frame	0 : { *(.debug_frame) }
+	.debug_str	0 : { *(.debug_str) }
+	.debug_loc	0 : { *(.debug_loc) }
+	.debug_macinfo	0 : { *(.debug_macinfo) }
+	/* SGI/MIPS DWARF 2 extensions */
+	.debug_weaknames 0 : { *(.debug_weaknames) }
+	.debug_funcnames 0 : { *(.debug_funcnames) }
+	.debug_typenames 0 : { *(.debug_typenames) }
+	.debug_varnames  0 : { *(.debug_varnames) }
+	/* DWARF 3 */
+	.debug_pubtypes 0 : { *(.debug_pubtypes) }
+	.debug_ranges	0 : { *(.debug_ranges) }
+	.gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+
+	/DISCARD/	: {
+		*(.note.GNU-stack)
+		*(.branch_lt)
+		*(.data .data.* .gnu.linkonce.d.* .sdata*)
+		*(.bss .sbss .dynbss .dynsbss)
+	}
+}
+
+/*
+ * Very old versions of ld do not recognize this name token; use the constant.
+ */
+#define PT_GNU_EH_FRAME	0x6474e550
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+	text		PT_LOAD FILEHDR PHDRS FLAGS(5);	/* PF_R|PF_X */
+	dynamic		PT_DYNAMIC FLAGS(4);		/* PF_R */
+	note		PT_NOTE FLAGS(4);		/* PF_R */
+	eh_frame_hdr	PT_GNU_EH_FRAME;
+}
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+	VDSO_VERSION_STRING {
+	global:
+		/*
+		 * Has to be there for the kernel to find
+		 */
+		__kernel_gettimeofday;
+		__kernel_clock_gettime;
+		__kernel_clock_getres;
+		__kernel_getcpu;
+		__kernel_restart_syscall;
+		__kernel_rt_sigreturn;
+		__kernel_sigreturn;
+	local: *;
+	};
+}
diff --git a/arch/s390/kernel/vdso64/vdso64_generic.c b/arch/s390/kernel/vdso64/vdso64_generic.c
new file mode 100644
index 0000000000..a9aa75643c
--- /dev/null
+++ b/arch/s390/kernel/vdso64/vdso64_generic.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../../../../lib/vdso/gettimeofday.c"
+#include "vdso.h"
+
+int __s390_vdso_gettimeofday(struct __kernel_old_timeval *tv,
+			     struct timezone *tz)
+{
+	return __cvdso_gettimeofday(tv, tz);
+}
+
+int __s390_vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
+{
+	return __cvdso_clock_gettime(clock, ts);
+}
+
+int __s390_vdso_clock_getres(clockid_t clock, struct __kernel_timespec *ts)
+{
+	return __cvdso_clock_getres(clock, ts);
+}
diff --git a/arch/s390/kernel/vdso64/vdso64_wrapper.S b/arch/s390/kernel/vdso64/vdso64_wrapper.S
new file mode 100644
index 0000000000..6721849986
--- /dev/null
+++ b/arch/s390/kernel/vdso64/vdso64_wrapper.S
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/page.h>
+
+	__PAGE_ALIGNED_DATA
+
+	.globl vdso64_start, vdso64_end
+	.balign PAGE_SIZE
+vdso64_start:
+	.incbin "arch/s390/kernel/vdso64/vdso64.so"
+	.balign PAGE_SIZE
+vdso64_end:
+
+	.previous
diff --git a/arch/s390/kernel/vdso64/vdso_user_wrapper.S b/arch/s390/kernel/vdso64/vdso_user_wrapper.S
new file mode 100644
index 0000000000..57f62596e5
--- /dev/null
+++ b/arch/s390/kernel/vdso64/vdso_user_wrapper.S
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/vdso.h>
+#include <asm/unistd.h>
+#include <asm/asm-offsets.h>
+#include <asm/dwarf.h>
+#include <asm/ptrace.h>
+
+#define WRAPPER_FRAME_SIZE (STACK_FRAME_OVERHEAD+8)
+
+/*
+ * Older glibc version called vdso without allocating a stackframe. This wrapper
+ * is just used to allocate a stackframe. See
+ * https://sourceware.org/git/?p=glibc.git;a=commit;h=478593e6374f3818da39332260dc453cb19cfa1e
+ * for details.
+ */
+.macro vdso_func func
+	.globl __kernel_\func
+	.type  __kernel_\func,@function
+	__ALIGN
+__kernel_\func:
+	CFI_STARTPROC
+	aghi	%r15,-WRAPPER_FRAME_SIZE
+	CFI_DEF_CFA_OFFSET (STACK_FRAME_OVERHEAD + WRAPPER_FRAME_SIZE)
+	CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
+	stg	%r14,STACK_FRAME_OVERHEAD(%r15)
+	brasl	%r14,__s390_vdso_\func
+	lg	%r14,STACK_FRAME_OVERHEAD(%r15)
+	aghi	%r15,WRAPPER_FRAME_SIZE
+	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
+	CFI_RESTORE 15
+	br	%r14
+	CFI_ENDPROC
+	.size	__kernel_\func,.-__kernel_\func
+.endm
+
+vdso_func gettimeofday
+vdso_func clock_getres
+vdso_func clock_gettime
+vdso_func getcpu
+
+.macro vdso_syscall func,syscall
+	.globl __kernel_\func
+	.type  __kernel_\func,@function
+	__ALIGN
+__kernel_\func:
+	CFI_STARTPROC
+	svc	\syscall
+	/* Make sure we notice when a syscall returns, which shouldn't happen */
+	.word	0
+	CFI_ENDPROC
+	.size	__kernel_\func,.-__kernel_\func
+.endm
+
+vdso_syscall restart_syscall,__NR_restart_syscall
+vdso_syscall sigreturn,__NR_sigreturn
+vdso_syscall rt_sigreturn,__NR_rt_sigreturn
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
new file mode 100644
index 0000000000..2ae201ebf9
--- /dev/null
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -0,0 +1,243 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* ld script to make s390 Linux kernel
+ * Written by Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+
+#include <asm/thread_info.h>
+#include <asm/page.h>
+#include <asm/ftrace.lds.h>
+
+/*
+ * Put .bss..swapper_pg_dir as the first thing in .bss. This will
+ * make sure it has 16k alignment.
+ */
+#define BSS_FIRST_SECTIONS *(.bss..swapper_pg_dir) \
+			   *(.bss..invalid_pg_dir)
+
+#define RO_EXCEPTION_TABLE_ALIGN	16
+
+/* Handle ro_after_init data on our own. */
+#define RO_AFTER_INIT_DATA
+
+#define RUNTIME_DISCARD_EXIT
+
+#define EMITS_PT_NOTE
+
+#include <asm-generic/vmlinux.lds.h>
+#include <asm/vmlinux.lds.h>
+
+OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
+OUTPUT_ARCH(s390:64-bit)
+ENTRY(startup_continue)
+jiffies = jiffies_64;
+
+PHDRS {
+	text PT_LOAD FLAGS(5);	/* R_E */
+	data PT_LOAD FLAGS(7);	/* RWE */
+	note PT_NOTE FLAGS(0);	/* ___ */
+}
+
+SECTIONS
+{
+	. = 0x100000;
+	.text : {
+		_stext = .;		/* Start of text section */
+		_text = .;		/* Text and read-only data */
+		HEAD_TEXT
+		TEXT_TEXT
+		SCHED_TEXT
+		LOCK_TEXT
+		KPROBES_TEXT
+		IRQENTRY_TEXT
+		SOFTIRQENTRY_TEXT
+		FTRACE_HOTPATCH_TRAMPOLINES_TEXT
+		*(.text.*_indirect_*)
+		*(.gnu.warning)
+		. = ALIGN(PAGE_SIZE);
+		_etext = .;		/* End of text section */
+	} :text = 0x0700
+
+	RO_DATA(PAGE_SIZE)
+
+	. = ALIGN(PAGE_SIZE);
+	_sdata = .;		/* Start of data section */
+
+	. = ALIGN(PAGE_SIZE);
+	__start_ro_after_init = .;
+	.data..ro_after_init : {
+		 *(.data..ro_after_init)
+		JUMP_TABLE_DATA
+	} :data
+	. = ALIGN(PAGE_SIZE);
+	__end_ro_after_init = .;
+
+	RW_DATA(0x100, PAGE_SIZE, THREAD_SIZE)
+	BOOT_DATA_PRESERVED
+
+	. = ALIGN(8);
+	.amode31.refs : {
+		_start_amode31_refs = .;
+		*(.amode31.refs)
+		_end_amode31_refs = .;
+	}
+
+	. = ALIGN(PAGE_SIZE);
+	_edata = .;		/* End of data section */
+
+	/* will be freed after init */
+	. = ALIGN(PAGE_SIZE);	/* Init code and data */
+	__init_begin = .;
+
+	. = ALIGN(PAGE_SIZE);
+	.init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
+		_sinittext = .;
+		INIT_TEXT
+		. = ALIGN(PAGE_SIZE);
+		_einittext = .;
+	}
+
+	/*
+	 * .exit.text is discarded at runtime, not link time,
+	 * to deal with references from __bug_table
+	*/
+	.exit.text : {
+		EXIT_TEXT
+	}
+
+	.exit.data : {
+		EXIT_DATA
+	}
+
+	/*
+	 * struct alt_inst entries. From the header (alternative.h):
+	 * "Alternative instructions for different CPU types or capabilities"
+	 * Think locking instructions on spinlocks.
+	 * Note, that it is a part of __init region.
+	 */
+	. = ALIGN(8);
+	.altinstructions : {
+		__alt_instructions = .;
+		*(.altinstructions)
+		__alt_instructions_end = .;
+	}
+
+	/*
+	 * And here are the replacement instructions. The linker sticks
+	 * them as binary blobs. The .altinstructions has enough data to
+	 * get the address and the length of them to patch the kernel safely.
+	 * Note, that it is a part of __init region.
+	 */
+	.altinstr_replacement : {
+		*(.altinstr_replacement)
+	}
+
+	/*
+	 * Table with the patch locations to undo expolines
+	*/
+	. = ALIGN(4);
+	.nospec_call_table : {
+		__nospec_call_start = . ;
+		*(.s390_indirect*)
+		__nospec_call_end = . ;
+	}
+	.nospec_return_table : {
+		__nospec_return_start = . ;
+		*(.s390_return*)
+		__nospec_return_end = . ;
+	}
+
+	BOOT_DATA
+
+	/*
+	 * .amode31 section for code, data, ex_table that need to stay
+	 * below 2 GB, even when the kernel is relocated above 2 GB.
+	 */
+	. = ALIGN(PAGE_SIZE);
+	_samode31 = .;
+	.amode31.text : {
+		_stext_amode31 = .;
+		*(.amode31.text)
+		*(.amode31.text.*_indirect_*)
+		. = ALIGN(PAGE_SIZE);
+		_etext_amode31 = .;
+	}
+	. = ALIGN(16);
+	.amode31.ex_table : {
+		_start_amode31_ex_table = .;
+		KEEP(*(.amode31.ex_table))
+		_stop_amode31_ex_table = .;
+	}
+	. = ALIGN(PAGE_SIZE);
+	.amode31.data : {
+		*(.amode31.data)
+	}
+	. = ALIGN(PAGE_SIZE);
+	_eamode31 = .;
+
+	/* early.c uses stsi, which requires page aligned data. */
+	. = ALIGN(PAGE_SIZE);
+	INIT_DATA_SECTION(0x100)
+
+	PERCPU_SECTION(0x100)
+
+	.dynsym ALIGN(8) : {
+		__dynsym_start = .;
+		*(.dynsym)
+		__dynsym_end = .;
+	}
+	.rela.dyn ALIGN(8) : {
+		__rela_dyn_start = .;
+		*(.rela*)
+		__rela_dyn_end = .;
+	}
+
+	. = ALIGN(PAGE_SIZE);
+	__init_end = .;		/* freed after init ends here */
+
+	BSS_SECTION(PAGE_SIZE, 4 * PAGE_SIZE, PAGE_SIZE)
+
+	. = ALIGN(PAGE_SIZE);
+	_end = . ;
+
+	/*
+	 * uncompressed image info used by the decompressor
+	 * it should match struct vmlinux_info
+	 */
+	.vmlinux.info 0 (INFO) : {
+		QUAD(_stext)					/* default_lma */
+		QUAD(startup_continue)				/* entry */
+		QUAD(__bss_start - _stext)			/* image_size */
+		QUAD(__bss_stop - __bss_start)			/* bss_size */
+		QUAD(__boot_data_start)				/* bootdata_off */
+		QUAD(__boot_data_end - __boot_data_start)	/* bootdata_size */
+		QUAD(__boot_data_preserved_start)		/* bootdata_preserved_off */
+		QUAD(__boot_data_preserved_end -
+		     __boot_data_preserved_start)		/* bootdata_preserved_size */
+		QUAD(__dynsym_start)				/* dynsym_start */
+		QUAD(__rela_dyn_start)				/* rela_dyn_start */
+		QUAD(__rela_dyn_end)				/* rela_dyn_end */
+		QUAD(_eamode31 - _samode31)			/* amode31_size */
+		QUAD(init_mm)
+		QUAD(swapper_pg_dir)
+		QUAD(invalid_pg_dir)
+#ifdef CONFIG_KASAN
+		QUAD(kasan_early_shadow_page)
+		QUAD(kasan_early_shadow_pte)
+		QUAD(kasan_early_shadow_pmd)
+		QUAD(kasan_early_shadow_pud)
+		QUAD(kasan_early_shadow_p4d)
+#endif
+	} :NONE
+
+	/* Debugging sections.	*/
+	STABS_DEBUG
+	DWARF_DEBUG
+	ELF_DETAILS
+
+	/* Sections to be discarded */
+	DISCARDS
+	/DISCARD/ : {
+		*(.eh_frame)
+		*(.interp)
+	}
+}
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
new file mode 100644
index 0000000000..e0a88dcaf5
--- /dev/null
+++ b/arch/s390/kernel/vtime.c
@@ -0,0 +1,454 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    Virtual cpu timer based timer functions.
+ *
+ *    Copyright IBM Corp. 2004, 2012
+ *    Author(s): Jan Glauber <jan.glauber@de.ibm.com>
+ */
+
+#include <linux/kernel_stat.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/timex.h>
+#include <linux/types.h>
+#include <linux/time.h>
+#include <asm/alternative.h>
+#include <asm/cputime.h>
+#include <asm/vtimer.h>
+#include <asm/vtime.h>
+#include <asm/cpu_mf.h>
+#include <asm/smp.h>
+
+#include "entry.h"
+
+static void virt_timer_expire(void);
+
+static LIST_HEAD(virt_timer_list);
+static DEFINE_SPINLOCK(virt_timer_lock);
+static atomic64_t virt_timer_current;
+static atomic64_t virt_timer_elapsed;
+
+DEFINE_PER_CPU(u64, mt_cycles[8]);
+static DEFINE_PER_CPU(u64, mt_scaling_mult) = { 1 };
+static DEFINE_PER_CPU(u64, mt_scaling_div) = { 1 };
+static DEFINE_PER_CPU(u64, mt_scaling_jiffies);
+
+static inline u64 get_vtimer(void)
+{
+	u64 timer;
+
+	asm volatile("stpt %0" : "=Q" (timer));
+	return timer;
+}
+
+static inline void set_vtimer(u64 expires)
+{
+	u64 timer;
+
+	asm volatile(
+		"	stpt	%0\n"	/* Store current cpu timer value */
+		"	spt	%1"	/* Set new value imm. afterwards */
+		: "=Q" (timer) : "Q" (expires));
+	S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer;
+	S390_lowcore.last_update_timer = expires;
+}
+
+static inline int virt_timer_forward(u64 elapsed)
+{
+	BUG_ON(!irqs_disabled());
+
+	if (list_empty(&virt_timer_list))
+		return 0;
+	elapsed = atomic64_add_return(elapsed, &virt_timer_elapsed);
+	return elapsed >= atomic64_read(&virt_timer_current);
+}
+
+static void update_mt_scaling(void)
+{
+	u64 cycles_new[8], *cycles_old;
+	u64 delta, fac, mult, div;
+	int i;
+
+	stcctm(MT_DIAG, smp_cpu_mtid + 1, cycles_new);
+	cycles_old = this_cpu_ptr(mt_cycles);
+	fac = 1;
+	mult = div = 0;
+	for (i = 0; i <= smp_cpu_mtid; i++) {
+		delta = cycles_new[i] - cycles_old[i];
+		div += delta;
+		mult *= i + 1;
+		mult += delta * fac;
+		fac *= i + 1;
+	}
+	div *= fac;
+	if (div > 0) {
+		/* Update scaling factor */
+		__this_cpu_write(mt_scaling_mult, mult);
+		__this_cpu_write(mt_scaling_div, div);
+		memcpy(cycles_old, cycles_new,
+		       sizeof(u64) * (smp_cpu_mtid + 1));
+	}
+	__this_cpu_write(mt_scaling_jiffies, jiffies_64);
+}
+
+static inline u64 update_tsk_timer(unsigned long *tsk_vtime, u64 new)
+{
+	u64 delta;
+
+	delta = new - *tsk_vtime;
+	*tsk_vtime = new;
+	return delta;
+}
+
+
+static inline u64 scale_vtime(u64 vtime)
+{
+	u64 mult = __this_cpu_read(mt_scaling_mult);
+	u64 div = __this_cpu_read(mt_scaling_div);
+
+	if (smp_cpu_mtid)
+		return vtime * mult / div;
+	return vtime;
+}
+
+static void account_system_index_scaled(struct task_struct *p, u64 cputime,
+					enum cpu_usage_stat index)
+{
+	p->stimescaled += cputime_to_nsecs(scale_vtime(cputime));
+	account_system_index_time(p, cputime_to_nsecs(cputime), index);
+}
+
+/*
+ * Update process times based on virtual cpu times stored by entry.S
+ * to the lowcore fields user_timer, system_timer & steal_clock.
+ */
+static int do_account_vtime(struct task_struct *tsk)
+{
+	u64 timer, clock, user, guest, system, hardirq, softirq;
+
+	timer = S390_lowcore.last_update_timer;
+	clock = S390_lowcore.last_update_clock;
+	asm volatile(
+		"	stpt	%0\n"	/* Store current cpu timer value */
+		"	stckf	%1"	/* Store current tod clock value */
+		: "=Q" (S390_lowcore.last_update_timer),
+		  "=Q" (S390_lowcore.last_update_clock)
+		: : "cc");
+	clock = S390_lowcore.last_update_clock - clock;
+	timer -= S390_lowcore.last_update_timer;
+
+	if (hardirq_count())
+		S390_lowcore.hardirq_timer += timer;
+	else
+		S390_lowcore.system_timer += timer;
+
+	/* Update MT utilization calculation */
+	if (smp_cpu_mtid &&
+	    time_after64(jiffies_64, this_cpu_read(mt_scaling_jiffies)))
+		update_mt_scaling();
+
+	/* Calculate cputime delta */
+	user = update_tsk_timer(&tsk->thread.user_timer,
+				READ_ONCE(S390_lowcore.user_timer));
+	guest = update_tsk_timer(&tsk->thread.guest_timer,
+				 READ_ONCE(S390_lowcore.guest_timer));
+	system = update_tsk_timer(&tsk->thread.system_timer,
+				  READ_ONCE(S390_lowcore.system_timer));
+	hardirq = update_tsk_timer(&tsk->thread.hardirq_timer,
+				   READ_ONCE(S390_lowcore.hardirq_timer));
+	softirq = update_tsk_timer(&tsk->thread.softirq_timer,
+				   READ_ONCE(S390_lowcore.softirq_timer));
+	S390_lowcore.steal_timer +=
+		clock - user - guest - system - hardirq - softirq;
+
+	/* Push account value */
+	if (user) {
+		account_user_time(tsk, cputime_to_nsecs(user));
+		tsk->utimescaled += cputime_to_nsecs(scale_vtime(user));
+	}
+
+	if (guest) {
+		account_guest_time(tsk, cputime_to_nsecs(guest));
+		tsk->utimescaled += cputime_to_nsecs(scale_vtime(guest));
+	}
+
+	if (system)
+		account_system_index_scaled(tsk, system, CPUTIME_SYSTEM);
+	if (hardirq)
+		account_system_index_scaled(tsk, hardirq, CPUTIME_IRQ);
+	if (softirq)
+		account_system_index_scaled(tsk, softirq, CPUTIME_SOFTIRQ);
+
+	return virt_timer_forward(user + guest + system + hardirq + softirq);
+}
+
+void vtime_task_switch(struct task_struct *prev)
+{
+	do_account_vtime(prev);
+	prev->thread.user_timer = S390_lowcore.user_timer;
+	prev->thread.guest_timer = S390_lowcore.guest_timer;
+	prev->thread.system_timer = S390_lowcore.system_timer;
+	prev->thread.hardirq_timer = S390_lowcore.hardirq_timer;
+	prev->thread.softirq_timer = S390_lowcore.softirq_timer;
+	S390_lowcore.user_timer = current->thread.user_timer;
+	S390_lowcore.guest_timer = current->thread.guest_timer;
+	S390_lowcore.system_timer = current->thread.system_timer;
+	S390_lowcore.hardirq_timer = current->thread.hardirq_timer;
+	S390_lowcore.softirq_timer = current->thread.softirq_timer;
+}
+
+/*
+ * In s390, accounting pending user time also implies
+ * accounting system time in order to correctly compute
+ * the stolen time accounting.
+ */
+void vtime_flush(struct task_struct *tsk)
+{
+	u64 steal, avg_steal;
+
+	if (do_account_vtime(tsk))
+		virt_timer_expire();
+
+	steal = S390_lowcore.steal_timer;
+	avg_steal = S390_lowcore.avg_steal_timer / 2;
+	if ((s64) steal > 0) {
+		S390_lowcore.steal_timer = 0;
+		account_steal_time(cputime_to_nsecs(steal));
+		avg_steal += steal;
+	}
+	S390_lowcore.avg_steal_timer = avg_steal;
+}
+
+static u64 vtime_delta(void)
+{
+	u64 timer = S390_lowcore.last_update_timer;
+
+	S390_lowcore.last_update_timer = get_vtimer();
+
+	return timer - S390_lowcore.last_update_timer;
+}
+
+/*
+ * Update process times based on virtual cpu times stored by entry.S
+ * to the lowcore fields user_timer, system_timer & steal_clock.
+ */
+void vtime_account_kernel(struct task_struct *tsk)
+{
+	u64 delta = vtime_delta();
+
+	if (tsk->flags & PF_VCPU)
+		S390_lowcore.guest_timer += delta;
+	else
+		S390_lowcore.system_timer += delta;
+
+	virt_timer_forward(delta);
+}
+EXPORT_SYMBOL_GPL(vtime_account_kernel);
+
+void vtime_account_softirq(struct task_struct *tsk)
+{
+	u64 delta = vtime_delta();
+
+	S390_lowcore.softirq_timer += delta;
+
+	virt_timer_forward(delta);
+}
+
+void vtime_account_hardirq(struct task_struct *tsk)
+{
+	u64 delta = vtime_delta();
+
+	S390_lowcore.hardirq_timer += delta;
+
+	virt_timer_forward(delta);
+}
+
+/*
+ * Sorted add to a list. List is linear searched until first bigger
+ * element is found.
+ */
+static void list_add_sorted(struct vtimer_list *timer, struct list_head *head)
+{
+	struct vtimer_list *tmp;
+
+	list_for_each_entry(tmp, head, entry) {
+		if (tmp->expires > timer->expires) {
+			list_add_tail(&timer->entry, &tmp->entry);
+			return;
+		}
+	}
+	list_add_tail(&timer->entry, head);
+}
+
+/*
+ * Handler for expired virtual CPU timer.
+ */
+static void virt_timer_expire(void)
+{
+	struct vtimer_list *timer, *tmp;
+	unsigned long elapsed;
+	LIST_HEAD(cb_list);
+
+	/* walk timer list, fire all expired timers */
+	spin_lock(&virt_timer_lock);
+	elapsed = atomic64_read(&virt_timer_elapsed);
+	list_for_each_entry_safe(timer, tmp, &virt_timer_list, entry) {
+		if (timer->expires < elapsed)
+			/* move expired timer to the callback queue */
+			list_move_tail(&timer->entry, &cb_list);
+		else
+			timer->expires -= elapsed;
+	}
+	if (!list_empty(&virt_timer_list)) {
+		timer = list_first_entry(&virt_timer_list,
+					 struct vtimer_list, entry);
+		atomic64_set(&virt_timer_current, timer->expires);
+	}
+	atomic64_sub(elapsed, &virt_timer_elapsed);
+	spin_unlock(&virt_timer_lock);
+
+	/* Do callbacks and recharge periodic timers */
+	list_for_each_entry_safe(timer, tmp, &cb_list, entry) {
+		list_del_init(&timer->entry);
+		timer->function(timer->data);
+		if (timer->interval) {
+			/* Recharge interval timer */
+			timer->expires = timer->interval +
+				atomic64_read(&virt_timer_elapsed);
+			spin_lock(&virt_timer_lock);
+			list_add_sorted(timer, &virt_timer_list);
+			spin_unlock(&virt_timer_lock);
+		}
+	}
+}
+
+void init_virt_timer(struct vtimer_list *timer)
+{
+	timer->function = NULL;
+	INIT_LIST_HEAD(&timer->entry);
+}
+EXPORT_SYMBOL(init_virt_timer);
+
+static inline int vtimer_pending(struct vtimer_list *timer)
+{
+	return !list_empty(&timer->entry);
+}
+
+static void internal_add_vtimer(struct vtimer_list *timer)
+{
+	if (list_empty(&virt_timer_list)) {
+		/* First timer, just program it. */
+		atomic64_set(&virt_timer_current, timer->expires);
+		atomic64_set(&virt_timer_elapsed, 0);
+		list_add(&timer->entry, &virt_timer_list);
+	} else {
+		/* Update timer against current base. */
+		timer->expires += atomic64_read(&virt_timer_elapsed);
+		if (likely((s64) timer->expires <
+			   (s64) atomic64_read(&virt_timer_current)))
+			/* The new timer expires before the current timer. */
+			atomic64_set(&virt_timer_current, timer->expires);
+		/* Insert new timer into the list. */
+		list_add_sorted(timer, &virt_timer_list);
+	}
+}
+
+static void __add_vtimer(struct vtimer_list *timer, int periodic)
+{
+	unsigned long flags;
+
+	timer->interval = periodic ? timer->expires : 0;
+	spin_lock_irqsave(&virt_timer_lock, flags);
+	internal_add_vtimer(timer);
+	spin_unlock_irqrestore(&virt_timer_lock, flags);
+}
+
+/*
+ * add_virt_timer - add a oneshot virtual CPU timer
+ */
+void add_virt_timer(struct vtimer_list *timer)
+{
+	__add_vtimer(timer, 0);
+}
+EXPORT_SYMBOL(add_virt_timer);
+
+/*
+ * add_virt_timer_int - add an interval virtual CPU timer
+ */
+void add_virt_timer_periodic(struct vtimer_list *timer)
+{
+	__add_vtimer(timer, 1);
+}
+EXPORT_SYMBOL(add_virt_timer_periodic);
+
+static int __mod_vtimer(struct vtimer_list *timer, u64 expires, int periodic)
+{
+	unsigned long flags;
+	int rc;
+
+	BUG_ON(!timer->function);
+
+	if (timer->expires == expires && vtimer_pending(timer))
+		return 1;
+	spin_lock_irqsave(&virt_timer_lock, flags);
+	rc = vtimer_pending(timer);
+	if (rc)
+		list_del_init(&timer->entry);
+	timer->interval = periodic ? expires : 0;
+	timer->expires = expires;
+	internal_add_vtimer(timer);
+	spin_unlock_irqrestore(&virt_timer_lock, flags);
+	return rc;
+}
+
+/*
+ * returns whether it has modified a pending timer (1) or not (0)
+ */
+int mod_virt_timer(struct vtimer_list *timer, u64 expires)
+{
+	return __mod_vtimer(timer, expires, 0);
+}
+EXPORT_SYMBOL(mod_virt_timer);
+
+/*
+ * returns whether it has modified a pending timer (1) or not (0)
+ */
+int mod_virt_timer_periodic(struct vtimer_list *timer, u64 expires)
+{
+	return __mod_vtimer(timer, expires, 1);
+}
+EXPORT_SYMBOL(mod_virt_timer_periodic);
+
+/*
+ * Delete a virtual timer.
+ *
+ * returns whether the deleted timer was pending (1) or not (0)
+ */
+int del_virt_timer(struct vtimer_list *timer)
+{
+	unsigned long flags;
+
+	if (!vtimer_pending(timer))
+		return 0;
+	spin_lock_irqsave(&virt_timer_lock, flags);
+	list_del_init(&timer->entry);
+	spin_unlock_irqrestore(&virt_timer_lock, flags);
+	return 1;
+}
+EXPORT_SYMBOL(del_virt_timer);
+
+/*
+ * Start the virtual CPU timer on the current CPU.
+ */
+void vtime_init(void)
+{
+	/* set initial cpu timer */
+	set_vtimer(VTIMER_MAX_SLICE);
+	/* Setup initial MT scaling values */
+	if (smp_cpu_mtid) {
+		__this_cpu_write(mt_scaling_jiffies, jiffies);
+		__this_cpu_write(mt_scaling_mult, 1);
+		__this_cpu_write(mt_scaling_div, 1);
+		stcctm(MT_DIAG, smp_cpu_mtid + 1, this_cpu_ptr(mt_cycles));
+	}
+}
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
new file mode 100644
index 0000000000..45fdf2a9b2
--- /dev/null
+++ b/arch/s390/kvm/Kconfig
@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# KVM configuration
+#
+source "virt/kvm/Kconfig"
+
+menuconfig VIRTUALIZATION
+	def_bool y
+	prompt "KVM"
+	help
+	  Say Y here to get to see options for using your Linux host to run other
+	  operating systems inside virtual machines (guests).
+	  This option alone does not add any kernel code.
+
+	  If you say N, all options in this submenu will be skipped and disabled.
+
+if VIRTUALIZATION
+
+config KVM
+	def_tristate y
+	prompt "Kernel-based Virtual Machine (KVM) support"
+	depends on HAVE_KVM
+	select PREEMPT_NOTIFIERS
+	select HAVE_KVM_CPU_RELAX_INTERCEPT
+	select HAVE_KVM_VCPU_ASYNC_IOCTL
+	select HAVE_KVM_EVENTFD
+	select KVM_ASYNC_PF
+	select KVM_ASYNC_PF_SYNC
+	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_IRQFD
+	select HAVE_KVM_IRQ_ROUTING
+	select HAVE_KVM_INVALID_WAKEUPS
+	select HAVE_KVM_NO_POLL
+	select KVM_VFIO
+	select INTERVAL_TREE
+	select MMU_NOTIFIER
+	help
+	  Support hosting paravirtualized guest machines using the SIE
+	  virtualization capability on the mainframe. This should work
+	  on any 64bit machine.
+
+	  This module provides access to the hardware capabilities through
+	  a character device node named /dev/kvm.
+
+	  To compile this as a module, choose M here: the module
+	  will be called kvm.
+
+	  If unsure, say N.
+
+config KVM_S390_UCONTROL
+	bool "Userspace controlled virtual machines"
+	depends on KVM
+	help
+	  Allow CAP_SYS_ADMIN users to create KVM virtual machines that are
+	  controlled by userspace.
+
+	  If unsure, say N.
+
+endif # VIRTUALIZATION
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
new file mode 100644
index 0000000000..02217fb4ae
--- /dev/null
+++ b/arch/s390/kvm/Makefile
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for kernel virtual machines on s390
+#
+# Copyright IBM Corp. 2008
+
+include $(srctree)/virt/kvm/Makefile.kvm
+
+ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
+
+kvm-y += kvm-s390.o intercept.o interrupt.o priv.o sigp.o
+kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o
+
+kvm-$(CONFIG_VFIO_PCI_ZDEV_KVM) += pci.o
+obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
new file mode 100644
index 0000000000..3c65b8258a
--- /dev/null
+++ b/arch/s390/kvm/diag.c
@@ -0,0 +1,307 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * handling diagnose instructions
+ *
+ * Copyright IBM Corp. 2008, 2020
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ *               Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <asm/gmap.h>
+#include <asm/virtio-ccw.h>
+#include "kvm-s390.h"
+#include "trace.h"
+#include "trace-s390.h"
+#include "gaccess.h"
+
+static int diag_release_pages(struct kvm_vcpu *vcpu)
+{
+	unsigned long start, end;
+	unsigned long prefix  = kvm_s390_get_prefix(vcpu);
+
+	start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
+	end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + PAGE_SIZE;
+	vcpu->stat.instruction_diagnose_10++;
+
+	if (start & ~PAGE_MASK || end & ~PAGE_MASK || start >= end
+	    || start < 2 * PAGE_SIZE)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end);
+
+	/*
+	 * We checked for start >= end above, so lets check for the
+	 * fast path (no prefix swap page involved)
+	 */
+	if (end <= prefix || start >= prefix + 2 * PAGE_SIZE) {
+		gmap_discard(vcpu->arch.gmap, start, end);
+	} else {
+		/*
+		 * This is slow path.  gmap_discard will check for start
+		 * so lets split this into before prefix, prefix, after
+		 * prefix and let gmap_discard make some of these calls
+		 * NOPs.
+		 */
+		gmap_discard(vcpu->arch.gmap, start, prefix);
+		if (start <= prefix)
+			gmap_discard(vcpu->arch.gmap, 0, PAGE_SIZE);
+		if (end > prefix + PAGE_SIZE)
+			gmap_discard(vcpu->arch.gmap, PAGE_SIZE, 2 * PAGE_SIZE);
+		gmap_discard(vcpu->arch.gmap, prefix + 2 * PAGE_SIZE, end);
+	}
+	return 0;
+}
+
+static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
+{
+	struct prs_parm {
+		u16 code;
+		u16 subcode;
+		u16 parm_len;
+		u16 parm_version;
+		u64 token_addr;
+		u64 select_mask;
+		u64 compare_mask;
+		u64 zarch;
+	};
+	struct prs_parm parm;
+	int rc;
+	u16 rx = (vcpu->arch.sie_block->ipa & 0xf0) >> 4;
+	u16 ry = (vcpu->arch.sie_block->ipa & 0x0f);
+
+	VCPU_EVENT(vcpu, 3, "diag page reference parameter block at 0x%llx",
+		   vcpu->run->s.regs.gprs[rx]);
+	vcpu->stat.instruction_diagnose_258++;
+	if (vcpu->run->s.regs.gprs[rx] & 7)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+	rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], rx, &parm, sizeof(parm));
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
+	if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	switch (parm.subcode) {
+	case 0: /* TOKEN */
+		VCPU_EVENT(vcpu, 3, "pageref token addr 0x%llx "
+			   "select mask 0x%llx compare mask 0x%llx",
+			   parm.token_addr, parm.select_mask, parm.compare_mask);
+		if (vcpu->arch.pfault_token != KVM_S390_PFAULT_TOKEN_INVALID) {
+			/*
+			 * If the pagefault handshake is already activated,
+			 * the token must not be changed.  We have to return
+			 * decimal 8 instead, as mandated in SC24-6084.
+			 */
+			vcpu->run->s.regs.gprs[ry] = 8;
+			return 0;
+		}
+
+		if ((parm.compare_mask & parm.select_mask) != parm.compare_mask ||
+		    parm.token_addr & 7 || parm.zarch != 0x8000000000000000ULL)
+			return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+		if (kvm_is_error_gpa(vcpu->kvm, parm.token_addr))
+			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+		vcpu->arch.pfault_token = parm.token_addr;
+		vcpu->arch.pfault_select = parm.select_mask;
+		vcpu->arch.pfault_compare = parm.compare_mask;
+		vcpu->run->s.regs.gprs[ry] = 0;
+		rc = 0;
+		break;
+	case 1: /*
+		 * CANCEL
+		 * Specification allows to let already pending tokens survive
+		 * the cancel, therefore to reduce code complexity, we assume
+		 * all outstanding tokens are already pending.
+		 */
+		VCPU_EVENT(vcpu, 3, "pageref cancel addr 0x%llx", parm.token_addr);
+		if (parm.token_addr || parm.select_mask ||
+		    parm.compare_mask || parm.zarch)
+			return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+		vcpu->run->s.regs.gprs[ry] = 0;
+		/*
+		 * If the pfault handling was not established or is already
+		 * canceled SC24-6084 requests to return decimal 4.
+		 */
+		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
+			vcpu->run->s.regs.gprs[ry] = 4;
+		else
+			vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+
+		rc = 0;
+		break;
+	default:
+		rc = -EOPNOTSUPP;
+		break;
+	}
+
+	return rc;
+}
+
+static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
+{
+	VCPU_EVENT(vcpu, 5, "%s", "diag time slice end");
+	vcpu->stat.instruction_diagnose_44++;
+	kvm_vcpu_on_spin(vcpu, true);
+	return 0;
+}
+
+static int forward_cnt;
+static unsigned long cur_slice;
+
+static int diag9c_forwarding_overrun(void)
+{
+	/* Reset the count on a new slice */
+	if (time_after(jiffies, cur_slice)) {
+		cur_slice = jiffies;
+		forward_cnt = diag9c_forwarding_hz / HZ;
+	}
+	return forward_cnt-- <= 0 ? 1 : 0;
+}
+
+static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu)
+{
+	struct kvm_vcpu *tcpu;
+	int tcpu_cpu;
+	int tid;
+
+	tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
+	vcpu->stat.instruction_diagnose_9c++;
+
+	/* yield to self */
+	if (tid == vcpu->vcpu_id)
+		goto no_yield;
+
+	/* yield to invalid */
+	tcpu = kvm_get_vcpu_by_id(vcpu->kvm, tid);
+	if (!tcpu)
+		goto no_yield;
+
+	/* target guest VCPU already running */
+	tcpu_cpu = READ_ONCE(tcpu->cpu);
+	if (tcpu_cpu >= 0) {
+		if (!diag9c_forwarding_hz || diag9c_forwarding_overrun())
+			goto no_yield;
+
+		/* target host CPU already running */
+		if (!vcpu_is_preempted(tcpu_cpu))
+			goto no_yield;
+		smp_yield_cpu(tcpu_cpu);
+		VCPU_EVENT(vcpu, 5,
+			   "diag time slice end directed to %d: yield forwarded",
+			   tid);
+		vcpu->stat.diag_9c_forward++;
+		return 0;
+	}
+
+	if (kvm_vcpu_yield_to(tcpu) <= 0)
+		goto no_yield;
+
+	VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d: done", tid);
+	return 0;
+no_yield:
+	VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d: ignored", tid);
+	vcpu->stat.diag_9c_ignored++;
+	return 0;
+}
+
+static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
+{
+	unsigned int reg = vcpu->arch.sie_block->ipa & 0xf;
+	unsigned long subcode = vcpu->run->s.regs.gprs[reg] & 0xffff;
+
+	VCPU_EVENT(vcpu, 3, "diag ipl functions, subcode %lx", subcode);
+	vcpu->stat.instruction_diagnose_308++;
+	switch (subcode) {
+	case 3:
+		vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR;
+		break;
+	case 4:
+		vcpu->run->s390_reset_flags = 0;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	/*
+	 * no need to check the return value of vcpu_stop as it can only have
+	 * an error for protvirt, but protvirt means user cpu state
+	 */
+	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
+		kvm_s390_vcpu_stop(vcpu);
+	vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM;
+	vcpu->run->s390_reset_flags |= KVM_S390_RESET_IPL;
+	vcpu->run->s390_reset_flags |= KVM_S390_RESET_CPU_INIT;
+	vcpu->run->exit_reason = KVM_EXIT_S390_RESET;
+	VCPU_EVENT(vcpu, 3, "requesting userspace resets %llx",
+	  vcpu->run->s390_reset_flags);
+	trace_kvm_s390_request_resets(vcpu->run->s390_reset_flags);
+	return -EREMOTE;
+}
+
+static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
+{
+	int ret;
+
+	vcpu->stat.instruction_diagnose_500++;
+	/* No virtio-ccw notification? Get out quickly. */
+	if (!vcpu->kvm->arch.css_support ||
+	    (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY))
+		return -EOPNOTSUPP;
+
+	VCPU_EVENT(vcpu, 4, "diag 0x500 schid 0x%8.8x queue 0x%x cookie 0x%llx",
+			    (u32) vcpu->run->s.regs.gprs[2],
+			    (u32) vcpu->run->s.regs.gprs[3],
+			    vcpu->run->s.regs.gprs[4]);
+
+	/*
+	 * The layout is as follows:
+	 * - gpr 2 contains the subchannel id (passed as addr)
+	 * - gpr 3 contains the virtqueue index (passed as datamatch)
+	 * - gpr 4 contains the index on the bus (optionally)
+	 */
+	ret = kvm_io_bus_write_cookie(vcpu, KVM_VIRTIO_CCW_NOTIFY_BUS,
+				      vcpu->run->s.regs.gprs[2] & 0xffffffff,
+				      8, &vcpu->run->s.regs.gprs[3],
+				      vcpu->run->s.regs.gprs[4]);
+
+	/*
+	 * Return cookie in gpr 2, but don't overwrite the register if the
+	 * diagnose will be handled by userspace.
+	 */
+	if (ret != -EOPNOTSUPP)
+		vcpu->run->s.regs.gprs[2] = ret;
+	/* kvm_io_bus_write_cookie returns -EOPNOTSUPP if it found no match. */
+	return ret < 0 ? ret : 0;
+}
+
+int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
+{
+	int code = kvm_s390_get_base_disp_rs(vcpu, NULL) & 0xffff;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	trace_kvm_s390_handle_diag(vcpu, code);
+	switch (code) {
+	case 0x10:
+		return diag_release_pages(vcpu);
+	case 0x44:
+		return __diag_time_slice_end(vcpu);
+	case 0x9c:
+		return __diag_time_slice_end_directed(vcpu);
+	case 0x258:
+		return __diag_page_ref_service(vcpu);
+	case 0x308:
+		return __diag_ipl_functions(vcpu);
+	case 0x500:
+		return __diag_virtio_hypercall(vcpu);
+	default:
+		vcpu->stat.instruction_diagnose_other++;
+		return -EOPNOTSUPP;
+	}
+}
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
new file mode 100644
index 0000000000..6d6bc19b37
--- /dev/null
+++ b/arch/s390/kvm/gaccess.c
@@ -0,0 +1,1624 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * guest access functions
+ *
+ * Copyright IBM Corp. 2014
+ *
+ */
+
+#include <linux/vmalloc.h>
+#include <linux/mm_types.h>
+#include <linux/err.h>
+#include <linux/pgtable.h>
+#include <linux/bitfield.h>
+
+#include <asm/gmap.h>
+#include "kvm-s390.h"
+#include "gaccess.h"
+#include <asm/switch_to.h>
+
+union asce {
+	unsigned long val;
+	struct {
+		unsigned long origin : 52; /* Region- or Segment-Table Origin */
+		unsigned long	 : 2;
+		unsigned long g  : 1; /* Subspace Group Control */
+		unsigned long p  : 1; /* Private Space Control */
+		unsigned long s  : 1; /* Storage-Alteration-Event Control */
+		unsigned long x  : 1; /* Space-Switch-Event Control */
+		unsigned long r  : 1; /* Real-Space Control */
+		unsigned long	 : 1;
+		unsigned long dt : 2; /* Designation-Type Control */
+		unsigned long tl : 2; /* Region- or Segment-Table Length */
+	};
+};
+
+enum {
+	ASCE_TYPE_SEGMENT = 0,
+	ASCE_TYPE_REGION3 = 1,
+	ASCE_TYPE_REGION2 = 2,
+	ASCE_TYPE_REGION1 = 3
+};
+
+union region1_table_entry {
+	unsigned long val;
+	struct {
+		unsigned long rto: 52;/* Region-Table Origin */
+		unsigned long	 : 2;
+		unsigned long p  : 1; /* DAT-Protection Bit */
+		unsigned long	 : 1;
+		unsigned long tf : 2; /* Region-Second-Table Offset */
+		unsigned long i  : 1; /* Region-Invalid Bit */
+		unsigned long	 : 1;
+		unsigned long tt : 2; /* Table-Type Bits */
+		unsigned long tl : 2; /* Region-Second-Table Length */
+	};
+};
+
+union region2_table_entry {
+	unsigned long val;
+	struct {
+		unsigned long rto: 52;/* Region-Table Origin */
+		unsigned long	 : 2;
+		unsigned long p  : 1; /* DAT-Protection Bit */
+		unsigned long	 : 1;
+		unsigned long tf : 2; /* Region-Third-Table Offset */
+		unsigned long i  : 1; /* Region-Invalid Bit */
+		unsigned long	 : 1;
+		unsigned long tt : 2; /* Table-Type Bits */
+		unsigned long tl : 2; /* Region-Third-Table Length */
+	};
+};
+
+struct region3_table_entry_fc0 {
+	unsigned long sto: 52;/* Segment-Table Origin */
+	unsigned long	 : 1;
+	unsigned long fc : 1; /* Format-Control */
+	unsigned long p  : 1; /* DAT-Protection Bit */
+	unsigned long	 : 1;
+	unsigned long tf : 2; /* Segment-Table Offset */
+	unsigned long i  : 1; /* Region-Invalid Bit */
+	unsigned long cr : 1; /* Common-Region Bit */
+	unsigned long tt : 2; /* Table-Type Bits */
+	unsigned long tl : 2; /* Segment-Table Length */
+};
+
+struct region3_table_entry_fc1 {
+	unsigned long rfaa : 33; /* Region-Frame Absolute Address */
+	unsigned long	 : 14;
+	unsigned long av : 1; /* ACCF-Validity Control */
+	unsigned long acc: 4; /* Access-Control Bits */
+	unsigned long f  : 1; /* Fetch-Protection Bit */
+	unsigned long fc : 1; /* Format-Control */
+	unsigned long p  : 1; /* DAT-Protection Bit */
+	unsigned long iep: 1; /* Instruction-Execution-Protection */
+	unsigned long	 : 2;
+	unsigned long i  : 1; /* Region-Invalid Bit */
+	unsigned long cr : 1; /* Common-Region Bit */
+	unsigned long tt : 2; /* Table-Type Bits */
+	unsigned long	 : 2;
+};
+
+union region3_table_entry {
+	unsigned long val;
+	struct region3_table_entry_fc0 fc0;
+	struct region3_table_entry_fc1 fc1;
+	struct {
+		unsigned long	 : 53;
+		unsigned long fc : 1; /* Format-Control */
+		unsigned long	 : 4;
+		unsigned long i  : 1; /* Region-Invalid Bit */
+		unsigned long cr : 1; /* Common-Region Bit */
+		unsigned long tt : 2; /* Table-Type Bits */
+		unsigned long	 : 2;
+	};
+};
+
+struct segment_entry_fc0 {
+	unsigned long pto: 53;/* Page-Table Origin */
+	unsigned long fc : 1; /* Format-Control */
+	unsigned long p  : 1; /* DAT-Protection Bit */
+	unsigned long	 : 3;
+	unsigned long i  : 1; /* Segment-Invalid Bit */
+	unsigned long cs : 1; /* Common-Segment Bit */
+	unsigned long tt : 2; /* Table-Type Bits */
+	unsigned long	 : 2;
+};
+
+struct segment_entry_fc1 {
+	unsigned long sfaa : 44; /* Segment-Frame Absolute Address */
+	unsigned long	 : 3;
+	unsigned long av : 1; /* ACCF-Validity Control */
+	unsigned long acc: 4; /* Access-Control Bits */
+	unsigned long f  : 1; /* Fetch-Protection Bit */
+	unsigned long fc : 1; /* Format-Control */
+	unsigned long p  : 1; /* DAT-Protection Bit */
+	unsigned long iep: 1; /* Instruction-Execution-Protection */
+	unsigned long	 : 2;
+	unsigned long i  : 1; /* Segment-Invalid Bit */
+	unsigned long cs : 1; /* Common-Segment Bit */
+	unsigned long tt : 2; /* Table-Type Bits */
+	unsigned long	 : 2;
+};
+
+union segment_table_entry {
+	unsigned long val;
+	struct segment_entry_fc0 fc0;
+	struct segment_entry_fc1 fc1;
+	struct {
+		unsigned long	 : 53;
+		unsigned long fc : 1; /* Format-Control */
+		unsigned long	 : 4;
+		unsigned long i  : 1; /* Segment-Invalid Bit */
+		unsigned long cs : 1; /* Common-Segment Bit */
+		unsigned long tt : 2; /* Table-Type Bits */
+		unsigned long	 : 2;
+	};
+};
+
+enum {
+	TABLE_TYPE_SEGMENT = 0,
+	TABLE_TYPE_REGION3 = 1,
+	TABLE_TYPE_REGION2 = 2,
+	TABLE_TYPE_REGION1 = 3
+};
+
+union page_table_entry {
+	unsigned long val;
+	struct {
+		unsigned long pfra : 52; /* Page-Frame Real Address */
+		unsigned long z  : 1; /* Zero Bit */
+		unsigned long i  : 1; /* Page-Invalid Bit */
+		unsigned long p  : 1; /* DAT-Protection Bit */
+		unsigned long iep: 1; /* Instruction-Execution-Protection */
+		unsigned long	 : 8;
+	};
+};
+
+/*
+ * vaddress union in order to easily decode a virtual address into its
+ * region first index, region second index etc. parts.
+ */
+union vaddress {
+	unsigned long addr;
+	struct {
+		unsigned long rfx : 11;
+		unsigned long rsx : 11;
+		unsigned long rtx : 11;
+		unsigned long sx  : 11;
+		unsigned long px  : 8;
+		unsigned long bx  : 12;
+	};
+	struct {
+		unsigned long rfx01 : 2;
+		unsigned long	    : 9;
+		unsigned long rsx01 : 2;
+		unsigned long	    : 9;
+		unsigned long rtx01 : 2;
+		unsigned long	    : 9;
+		unsigned long sx01  : 2;
+		unsigned long	    : 29;
+	};
+};
+
+/*
+ * raddress union which will contain the result (real or absolute address)
+ * after a page table walk. The rfaa, sfaa and pfra members are used to
+ * simply assign them the value of a region, segment or page table entry.
+ */
+union raddress {
+	unsigned long addr;
+	unsigned long rfaa : 33; /* Region-Frame Absolute Address */
+	unsigned long sfaa : 44; /* Segment-Frame Absolute Address */
+	unsigned long pfra : 52; /* Page-Frame Real Address */
+};
+
+union alet {
+	u32 val;
+	struct {
+		u32 reserved : 7;
+		u32 p        : 1;
+		u32 alesn    : 8;
+		u32 alen     : 16;
+	};
+};
+
+union ald {
+	u32 val;
+	struct {
+		u32     : 1;
+		u32 alo : 24;
+		u32 all : 7;
+	};
+};
+
+struct ale {
+	unsigned long i      : 1; /* ALEN-Invalid Bit */
+	unsigned long        : 5;
+	unsigned long fo     : 1; /* Fetch-Only Bit */
+	unsigned long p      : 1; /* Private Bit */
+	unsigned long alesn  : 8; /* Access-List-Entry Sequence Number */
+	unsigned long aleax  : 16; /* Access-List-Entry Authorization Index */
+	unsigned long        : 32;
+	unsigned long        : 1;
+	unsigned long asteo  : 25; /* ASN-Second-Table-Entry Origin */
+	unsigned long        : 6;
+	unsigned long astesn : 32; /* ASTE Sequence Number */
+};
+
+struct aste {
+	unsigned long i      : 1; /* ASX-Invalid Bit */
+	unsigned long ato    : 29; /* Authority-Table Origin */
+	unsigned long        : 1;
+	unsigned long b      : 1; /* Base-Space Bit */
+	unsigned long ax     : 16; /* Authorization Index */
+	unsigned long atl    : 12; /* Authority-Table Length */
+	unsigned long        : 2;
+	unsigned long ca     : 1; /* Controlled-ASN Bit */
+	unsigned long ra     : 1; /* Reusable-ASN Bit */
+	unsigned long asce   : 64; /* Address-Space-Control Element */
+	unsigned long ald    : 32;
+	unsigned long astesn : 32;
+	/* .. more fields there */
+};
+
+int ipte_lock_held(struct kvm *kvm)
+{
+	if (sclp.has_siif) {
+		int rc;
+
+		read_lock(&kvm->arch.sca_lock);
+		rc = kvm_s390_get_ipte_control(kvm)->kh != 0;
+		read_unlock(&kvm->arch.sca_lock);
+		return rc;
+	}
+	return kvm->arch.ipte_lock_count != 0;
+}
+
+static void ipte_lock_simple(struct kvm *kvm)
+{
+	union ipte_control old, new, *ic;
+
+	mutex_lock(&kvm->arch.ipte_mutex);
+	kvm->arch.ipte_lock_count++;
+	if (kvm->arch.ipte_lock_count > 1)
+		goto out;
+retry:
+	read_lock(&kvm->arch.sca_lock);
+	ic = kvm_s390_get_ipte_control(kvm);
+	do {
+		old = READ_ONCE(*ic);
+		if (old.k) {
+			read_unlock(&kvm->arch.sca_lock);
+			cond_resched();
+			goto retry;
+		}
+		new = old;
+		new.k = 1;
+	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+	read_unlock(&kvm->arch.sca_lock);
+out:
+	mutex_unlock(&kvm->arch.ipte_mutex);
+}
+
+static void ipte_unlock_simple(struct kvm *kvm)
+{
+	union ipte_control old, new, *ic;
+
+	mutex_lock(&kvm->arch.ipte_mutex);
+	kvm->arch.ipte_lock_count--;
+	if (kvm->arch.ipte_lock_count)
+		goto out;
+	read_lock(&kvm->arch.sca_lock);
+	ic = kvm_s390_get_ipte_control(kvm);
+	do {
+		old = READ_ONCE(*ic);
+		new = old;
+		new.k = 0;
+	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+	read_unlock(&kvm->arch.sca_lock);
+	wake_up(&kvm->arch.ipte_wq);
+out:
+	mutex_unlock(&kvm->arch.ipte_mutex);
+}
+
+static void ipte_lock_siif(struct kvm *kvm)
+{
+	union ipte_control old, new, *ic;
+
+retry:
+	read_lock(&kvm->arch.sca_lock);
+	ic = kvm_s390_get_ipte_control(kvm);
+	do {
+		old = READ_ONCE(*ic);
+		if (old.kg) {
+			read_unlock(&kvm->arch.sca_lock);
+			cond_resched();
+			goto retry;
+		}
+		new = old;
+		new.k = 1;
+		new.kh++;
+	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+	read_unlock(&kvm->arch.sca_lock);
+}
+
+static void ipte_unlock_siif(struct kvm *kvm)
+{
+	union ipte_control old, new, *ic;
+
+	read_lock(&kvm->arch.sca_lock);
+	ic = kvm_s390_get_ipte_control(kvm);
+	do {
+		old = READ_ONCE(*ic);
+		new = old;
+		new.kh--;
+		if (!new.kh)
+			new.k = 0;
+	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+	read_unlock(&kvm->arch.sca_lock);
+	if (!new.kh)
+		wake_up(&kvm->arch.ipte_wq);
+}
+
+void ipte_lock(struct kvm *kvm)
+{
+	if (sclp.has_siif)
+		ipte_lock_siif(kvm);
+	else
+		ipte_lock_simple(kvm);
+}
+
+void ipte_unlock(struct kvm *kvm)
+{
+	if (sclp.has_siif)
+		ipte_unlock_siif(kvm);
+	else
+		ipte_unlock_simple(kvm);
+}
+
+static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, u8 ar,
+			  enum gacc_mode mode)
+{
+	union alet alet;
+	struct ale ale;
+	struct aste aste;
+	unsigned long ald_addr, authority_table_addr;
+	union ald ald;
+	int eax, rc;
+	u8 authority_table;
+
+	if (ar >= NUM_ACRS)
+		return -EINVAL;
+
+	save_access_regs(vcpu->run->s.regs.acrs);
+	alet.val = vcpu->run->s.regs.acrs[ar];
+
+	if (ar == 0 || alet.val == 0) {
+		asce->val = vcpu->arch.sie_block->gcr[1];
+		return 0;
+	} else if (alet.val == 1) {
+		asce->val = vcpu->arch.sie_block->gcr[7];
+		return 0;
+	}
+
+	if (alet.reserved)
+		return PGM_ALET_SPECIFICATION;
+
+	if (alet.p)
+		ald_addr = vcpu->arch.sie_block->gcr[5];
+	else
+		ald_addr = vcpu->arch.sie_block->gcr[2];
+	ald_addr &= 0x7fffffc0;
+
+	rc = read_guest_real(vcpu, ald_addr + 16, &ald.val, sizeof(union ald));
+	if (rc)
+		return rc;
+
+	if (alet.alen / 8 > ald.all)
+		return PGM_ALEN_TRANSLATION;
+
+	if (0x7fffffff - ald.alo * 128 < alet.alen * 16)
+		return PGM_ADDRESSING;
+
+	rc = read_guest_real(vcpu, ald.alo * 128 + alet.alen * 16, &ale,
+			     sizeof(struct ale));
+	if (rc)
+		return rc;
+
+	if (ale.i == 1)
+		return PGM_ALEN_TRANSLATION;
+	if (ale.alesn != alet.alesn)
+		return PGM_ALE_SEQUENCE;
+
+	rc = read_guest_real(vcpu, ale.asteo * 64, &aste, sizeof(struct aste));
+	if (rc)
+		return rc;
+
+	if (aste.i)
+		return PGM_ASTE_VALIDITY;
+	if (aste.astesn != ale.astesn)
+		return PGM_ASTE_SEQUENCE;
+
+	if (ale.p == 1) {
+		eax = (vcpu->arch.sie_block->gcr[8] >> 16) & 0xffff;
+		if (ale.aleax != eax) {
+			if (eax / 16 > aste.atl)
+				return PGM_EXTENDED_AUTHORITY;
+
+			authority_table_addr = aste.ato * 4 + eax / 4;
+
+			rc = read_guest_real(vcpu, authority_table_addr,
+					     &authority_table,
+					     sizeof(u8));
+			if (rc)
+				return rc;
+
+			if ((authority_table & (0x40 >> ((eax & 3) * 2))) == 0)
+				return PGM_EXTENDED_AUTHORITY;
+		}
+	}
+
+	if (ale.fo == 1 && mode == GACC_STORE)
+		return PGM_PROTECTION;
+
+	asce->val = aste.asce;
+	return 0;
+}
+
+struct trans_exc_code_bits {
+	unsigned long addr : 52; /* Translation-exception Address */
+	unsigned long fsi  : 2;  /* Access Exception Fetch/Store Indication */
+	unsigned long	   : 2;
+	unsigned long b56  : 1;
+	unsigned long	   : 3;
+	unsigned long b60  : 1;
+	unsigned long b61  : 1;
+	unsigned long as   : 2;  /* ASCE Identifier */
+};
+
+enum {
+	FSI_UNKNOWN = 0, /* Unknown whether fetch or store */
+	FSI_STORE   = 1, /* Exception was due to store operation */
+	FSI_FETCH   = 2  /* Exception was due to fetch operation */
+};
+
+enum prot_type {
+	PROT_TYPE_LA   = 0,
+	PROT_TYPE_KEYC = 1,
+	PROT_TYPE_ALC  = 2,
+	PROT_TYPE_DAT  = 3,
+	PROT_TYPE_IEP  = 4,
+	/* Dummy value for passing an initialized value when code != PGM_PROTECTION */
+	PROT_NONE,
+};
+
+static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar,
+			    enum gacc_mode mode, enum prot_type prot, bool terminate)
+{
+	struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
+	struct trans_exc_code_bits *tec;
+
+	memset(pgm, 0, sizeof(*pgm));
+	pgm->code = code;
+	tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
+
+	switch (code) {
+	case PGM_PROTECTION:
+		switch (prot) {
+		case PROT_NONE:
+			/* We should never get here, acts like termination */
+			WARN_ON_ONCE(1);
+			break;
+		case PROT_TYPE_IEP:
+			tec->b61 = 1;
+			fallthrough;
+		case PROT_TYPE_LA:
+			tec->b56 = 1;
+			break;
+		case PROT_TYPE_KEYC:
+			tec->b60 = 1;
+			break;
+		case PROT_TYPE_ALC:
+			tec->b60 = 1;
+			fallthrough;
+		case PROT_TYPE_DAT:
+			tec->b61 = 1;
+			break;
+		}
+		if (terminate) {
+			tec->b56 = 0;
+			tec->b60 = 0;
+			tec->b61 = 0;
+		}
+		fallthrough;
+	case PGM_ASCE_TYPE:
+	case PGM_PAGE_TRANSLATION:
+	case PGM_REGION_FIRST_TRANS:
+	case PGM_REGION_SECOND_TRANS:
+	case PGM_REGION_THIRD_TRANS:
+	case PGM_SEGMENT_TRANSLATION:
+		/*
+		 * op_access_id only applies to MOVE_PAGE -> set bit 61
+		 * exc_access_id has to be set to 0 for some instructions. Both
+		 * cases have to be handled by the caller.
+		 */
+		tec->addr = gva >> PAGE_SHIFT;
+		tec->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH;
+		tec->as = psw_bits(vcpu->arch.sie_block->gpsw).as;
+		fallthrough;
+	case PGM_ALEN_TRANSLATION:
+	case PGM_ALE_SEQUENCE:
+	case PGM_ASTE_VALIDITY:
+	case PGM_ASTE_SEQUENCE:
+	case PGM_EXTENDED_AUTHORITY:
+		/*
+		 * We can always store exc_access_id, as it is
+		 * undefined for non-ar cases. It is undefined for
+		 * most DAT protection exceptions.
+		 */
+		pgm->exc_access_id = ar;
+		break;
+	}
+	return code;
+}
+
+static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar,
+		     enum gacc_mode mode, enum prot_type prot)
+{
+	return trans_exc_ending(vcpu, code, gva, ar, mode, prot, false);
+}
+
+static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
+			 unsigned long ga, u8 ar, enum gacc_mode mode)
+{
+	int rc;
+	struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw);
+
+	if (!psw.dat) {
+		asce->val = 0;
+		asce->r = 1;
+		return 0;
+	}
+
+	if ((mode == GACC_IFETCH) && (psw.as != PSW_BITS_AS_HOME))
+		psw.as = PSW_BITS_AS_PRIMARY;
+
+	switch (psw.as) {
+	case PSW_BITS_AS_PRIMARY:
+		asce->val = vcpu->arch.sie_block->gcr[1];
+		return 0;
+	case PSW_BITS_AS_SECONDARY:
+		asce->val = vcpu->arch.sie_block->gcr[7];
+		return 0;
+	case PSW_BITS_AS_HOME:
+		asce->val = vcpu->arch.sie_block->gcr[13];
+		return 0;
+	case PSW_BITS_AS_ACCREG:
+		rc = ar_translation(vcpu, asce, ar, mode);
+		if (rc > 0)
+			return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_ALC);
+		return rc;
+	}
+	return 0;
+}
+
+static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
+{
+	return kvm_read_guest(kvm, gpa, val, sizeof(*val));
+}
+
+/**
+ * guest_translate - translate a guest virtual into a guest absolute address
+ * @vcpu: virtual cpu
+ * @gva: guest virtual address
+ * @gpa: points to where guest physical (absolute) address should be stored
+ * @asce: effective asce
+ * @mode: indicates the access mode to be used
+ * @prot: returns the type for protection exceptions
+ *
+ * Translate a guest virtual address into a guest absolute address by means
+ * of dynamic address translation as specified by the architecture.
+ * If the resulting absolute address is not available in the configuration
+ * an addressing exception is indicated and @gpa will not be changed.
+ *
+ * Returns: - zero on success; @gpa contains the resulting absolute address
+ *	    - a negative value if guest access failed due to e.g. broken
+ *	      guest mapping
+ *	    - a positive value if an access exception happened. In this case
+ *	      the returned value is the program interruption code as defined
+ *	      by the architecture
+ */
+static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
+				     unsigned long *gpa, const union asce asce,
+				     enum gacc_mode mode, enum prot_type *prot)
+{
+	union vaddress vaddr = {.addr = gva};
+	union raddress raddr = {.addr = gva};
+	union page_table_entry pte;
+	int dat_protection = 0;
+	int iep_protection = 0;
+	union ctlreg0 ctlreg0;
+	unsigned long ptr;
+	int edat1, edat2, iep;
+
+	ctlreg0.val = vcpu->arch.sie_block->gcr[0];
+	edat1 = ctlreg0.edat && test_kvm_facility(vcpu->kvm, 8);
+	edat2 = edat1 && test_kvm_facility(vcpu->kvm, 78);
+	iep = ctlreg0.iep && test_kvm_facility(vcpu->kvm, 130);
+	if (asce.r)
+		goto real_address;
+	ptr = asce.origin * PAGE_SIZE;
+	switch (asce.dt) {
+	case ASCE_TYPE_REGION1:
+		if (vaddr.rfx01 > asce.tl)
+			return PGM_REGION_FIRST_TRANS;
+		ptr += vaddr.rfx * 8;
+		break;
+	case ASCE_TYPE_REGION2:
+		if (vaddr.rfx)
+			return PGM_ASCE_TYPE;
+		if (vaddr.rsx01 > asce.tl)
+			return PGM_REGION_SECOND_TRANS;
+		ptr += vaddr.rsx * 8;
+		break;
+	case ASCE_TYPE_REGION3:
+		if (vaddr.rfx || vaddr.rsx)
+			return PGM_ASCE_TYPE;
+		if (vaddr.rtx01 > asce.tl)
+			return PGM_REGION_THIRD_TRANS;
+		ptr += vaddr.rtx * 8;
+		break;
+	case ASCE_TYPE_SEGMENT:
+		if (vaddr.rfx || vaddr.rsx || vaddr.rtx)
+			return PGM_ASCE_TYPE;
+		if (vaddr.sx01 > asce.tl)
+			return PGM_SEGMENT_TRANSLATION;
+		ptr += vaddr.sx * 8;
+		break;
+	}
+	switch (asce.dt) {
+	case ASCE_TYPE_REGION1:	{
+		union region1_table_entry rfte;
+
+		if (kvm_is_error_gpa(vcpu->kvm, ptr))
+			return PGM_ADDRESSING;
+		if (deref_table(vcpu->kvm, ptr, &rfte.val))
+			return -EFAULT;
+		if (rfte.i)
+			return PGM_REGION_FIRST_TRANS;
+		if (rfte.tt != TABLE_TYPE_REGION1)
+			return PGM_TRANSLATION_SPEC;
+		if (vaddr.rsx01 < rfte.tf || vaddr.rsx01 > rfte.tl)
+			return PGM_REGION_SECOND_TRANS;
+		if (edat1)
+			dat_protection |= rfte.p;
+		ptr = rfte.rto * PAGE_SIZE + vaddr.rsx * 8;
+	}
+		fallthrough;
+	case ASCE_TYPE_REGION2: {
+		union region2_table_entry rste;
+
+		if (kvm_is_error_gpa(vcpu->kvm, ptr))
+			return PGM_ADDRESSING;
+		if (deref_table(vcpu->kvm, ptr, &rste.val))
+			return -EFAULT;
+		if (rste.i)
+			return PGM_REGION_SECOND_TRANS;
+		if (rste.tt != TABLE_TYPE_REGION2)
+			return PGM_TRANSLATION_SPEC;
+		if (vaddr.rtx01 < rste.tf || vaddr.rtx01 > rste.tl)
+			return PGM_REGION_THIRD_TRANS;
+		if (edat1)
+			dat_protection |= rste.p;
+		ptr = rste.rto * PAGE_SIZE + vaddr.rtx * 8;
+	}
+		fallthrough;
+	case ASCE_TYPE_REGION3: {
+		union region3_table_entry rtte;
+
+		if (kvm_is_error_gpa(vcpu->kvm, ptr))
+			return PGM_ADDRESSING;
+		if (deref_table(vcpu->kvm, ptr, &rtte.val))
+			return -EFAULT;
+		if (rtte.i)
+			return PGM_REGION_THIRD_TRANS;
+		if (rtte.tt != TABLE_TYPE_REGION3)
+			return PGM_TRANSLATION_SPEC;
+		if (rtte.cr && asce.p && edat2)
+			return PGM_TRANSLATION_SPEC;
+		if (rtte.fc && edat2) {
+			dat_protection |= rtte.fc1.p;
+			iep_protection = rtte.fc1.iep;
+			raddr.rfaa = rtte.fc1.rfaa;
+			goto absolute_address;
+		}
+		if (vaddr.sx01 < rtte.fc0.tf)
+			return PGM_SEGMENT_TRANSLATION;
+		if (vaddr.sx01 > rtte.fc0.tl)
+			return PGM_SEGMENT_TRANSLATION;
+		if (edat1)
+			dat_protection |= rtte.fc0.p;
+		ptr = rtte.fc0.sto * PAGE_SIZE + vaddr.sx * 8;
+	}
+		fallthrough;
+	case ASCE_TYPE_SEGMENT: {
+		union segment_table_entry ste;
+
+		if (kvm_is_error_gpa(vcpu->kvm, ptr))
+			return PGM_ADDRESSING;
+		if (deref_table(vcpu->kvm, ptr, &ste.val))
+			return -EFAULT;
+		if (ste.i)
+			return PGM_SEGMENT_TRANSLATION;
+		if (ste.tt != TABLE_TYPE_SEGMENT)
+			return PGM_TRANSLATION_SPEC;
+		if (ste.cs && asce.p)
+			return PGM_TRANSLATION_SPEC;
+		if (ste.fc && edat1) {
+			dat_protection |= ste.fc1.p;
+			iep_protection = ste.fc1.iep;
+			raddr.sfaa = ste.fc1.sfaa;
+			goto absolute_address;
+		}
+		dat_protection |= ste.fc0.p;
+		ptr = ste.fc0.pto * (PAGE_SIZE / 2) + vaddr.px * 8;
+	}
+	}
+	if (kvm_is_error_gpa(vcpu->kvm, ptr))
+		return PGM_ADDRESSING;
+	if (deref_table(vcpu->kvm, ptr, &pte.val))
+		return -EFAULT;
+	if (pte.i)
+		return PGM_PAGE_TRANSLATION;
+	if (pte.z)
+		return PGM_TRANSLATION_SPEC;
+	dat_protection |= pte.p;
+	iep_protection = pte.iep;
+	raddr.pfra = pte.pfra;
+real_address:
+	raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr);
+absolute_address:
+	if (mode == GACC_STORE && dat_protection) {
+		*prot = PROT_TYPE_DAT;
+		return PGM_PROTECTION;
+	}
+	if (mode == GACC_IFETCH && iep_protection && iep) {
+		*prot = PROT_TYPE_IEP;
+		return PGM_PROTECTION;
+	}
+	if (kvm_is_error_gpa(vcpu->kvm, raddr.addr))
+		return PGM_ADDRESSING;
+	*gpa = raddr.addr;
+	return 0;
+}
+
+static inline int is_low_address(unsigned long ga)
+{
+	/* Check for address ranges 0..511 and 4096..4607 */
+	return (ga & ~0x11fful) == 0;
+}
+
+static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
+					  const union asce asce)
+{
+	union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
+	psw_t *psw = &vcpu->arch.sie_block->gpsw;
+
+	if (!ctlreg0.lap)
+		return 0;
+	if (psw_bits(*psw).dat && asce.p)
+		return 0;
+	return 1;
+}
+
+static int vm_check_access_key(struct kvm *kvm, u8 access_key,
+			       enum gacc_mode mode, gpa_t gpa)
+{
+	u8 storage_key, access_control;
+	bool fetch_protected;
+	unsigned long hva;
+	int r;
+
+	if (access_key == 0)
+		return 0;
+
+	hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
+	if (kvm_is_error_hva(hva))
+		return PGM_ADDRESSING;
+
+	mmap_read_lock(current->mm);
+	r = get_guest_storage_key(current->mm, hva, &storage_key);
+	mmap_read_unlock(current->mm);
+	if (r)
+		return r;
+	access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key);
+	if (access_control == access_key)
+		return 0;
+	fetch_protected = storage_key & _PAGE_FP_BIT;
+	if ((mode == GACC_FETCH || mode == GACC_IFETCH) && !fetch_protected)
+		return 0;
+	return PGM_PROTECTION;
+}
+
+static bool fetch_prot_override_applicable(struct kvm_vcpu *vcpu, enum gacc_mode mode,
+					   union asce asce)
+{
+	psw_t *psw = &vcpu->arch.sie_block->gpsw;
+	unsigned long override;
+
+	if (mode == GACC_FETCH || mode == GACC_IFETCH) {
+		/* check if fetch protection override enabled */
+		override = vcpu->arch.sie_block->gcr[0];
+		override &= CR0_FETCH_PROTECTION_OVERRIDE;
+		/* not applicable if subject to DAT && private space */
+		override = override && !(psw_bits(*psw).dat && asce.p);
+		return override;
+	}
+	return false;
+}
+
+static bool fetch_prot_override_applies(unsigned long ga, unsigned int len)
+{
+	return ga < 2048 && ga + len <= 2048;
+}
+
+static bool storage_prot_override_applicable(struct kvm_vcpu *vcpu)
+{
+	/* check if storage protection override enabled */
+	return vcpu->arch.sie_block->gcr[0] & CR0_STORAGE_PROTECTION_OVERRIDE;
+}
+
+static bool storage_prot_override_applies(u8 access_control)
+{
+	/* matches special storage protection override key (9) -> allow */
+	return access_control == PAGE_SPO_ACC;
+}
+
+static int vcpu_check_access_key(struct kvm_vcpu *vcpu, u8 access_key,
+				 enum gacc_mode mode, union asce asce, gpa_t gpa,
+				 unsigned long ga, unsigned int len)
+{
+	u8 storage_key, access_control;
+	unsigned long hva;
+	int r;
+
+	/* access key 0 matches any storage key -> allow */
+	if (access_key == 0)
+		return 0;
+	/*
+	 * caller needs to ensure that gfn is accessible, so we can
+	 * assume that this cannot fail
+	 */
+	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gpa));
+	mmap_read_lock(current->mm);
+	r = get_guest_storage_key(current->mm, hva, &storage_key);
+	mmap_read_unlock(current->mm);
+	if (r)
+		return r;
+	access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key);
+	/* access key matches storage key -> allow */
+	if (access_control == access_key)
+		return 0;
+	if (mode == GACC_FETCH || mode == GACC_IFETCH) {
+		/* it is a fetch and fetch protection is off -> allow */
+		if (!(storage_key & _PAGE_FP_BIT))
+			return 0;
+		if (fetch_prot_override_applicable(vcpu, mode, asce) &&
+		    fetch_prot_override_applies(ga, len))
+			return 0;
+	}
+	if (storage_prot_override_applicable(vcpu) &&
+	    storage_prot_override_applies(access_control))
+		return 0;
+	return PGM_PROTECTION;
+}
+
+/**
+ * guest_range_to_gpas() - Calculate guest physical addresses of page fragments
+ * covering a logical range
+ * @vcpu: virtual cpu
+ * @ga: guest address, start of range
+ * @ar: access register
+ * @gpas: output argument, may be NULL
+ * @len: length of range in bytes
+ * @asce: address-space-control element to use for translation
+ * @mode: access mode
+ * @access_key: access key to mach the range's storage keys against
+ *
+ * Translate a logical range to a series of guest absolute addresses,
+ * such that the concatenation of page fragments starting at each gpa make up
+ * the whole range.
+ * The translation is performed as if done by the cpu for the given @asce, @ar,
+ * @mode and state of the @vcpu.
+ * If the translation causes an exception, its program interruption code is
+ * returned and the &struct kvm_s390_pgm_info pgm member of @vcpu is modified
+ * such that a subsequent call to kvm_s390_inject_prog_vcpu() will inject
+ * a correct exception into the guest.
+ * The resulting gpas are stored into @gpas, unless it is NULL.
+ *
+ * Note: All fragments except the first one start at the beginning of a page.
+ *	 When deriving the boundaries of a fragment from a gpa, all but the last
+ *	 fragment end at the end of the page.
+ *
+ * Return:
+ * * 0		- success
+ * * <0		- translation could not be performed, for example if  guest
+ *		  memory could not be accessed
+ * * >0		- an access exception occurred. In this case the returned value
+ *		  is the program interruption code and the contents of pgm may
+ *		  be used to inject an exception into the guest.
+ */
+static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
+			       unsigned long *gpas, unsigned long len,
+			       const union asce asce, enum gacc_mode mode,
+			       u8 access_key)
+{
+	psw_t *psw = &vcpu->arch.sie_block->gpsw;
+	unsigned int offset = offset_in_page(ga);
+	unsigned int fragment_len;
+	int lap_enabled, rc = 0;
+	enum prot_type prot;
+	unsigned long gpa;
+
+	lap_enabled = low_address_protection_enabled(vcpu, asce);
+	while (min(PAGE_SIZE - offset, len) > 0) {
+		fragment_len = min(PAGE_SIZE - offset, len);
+		ga = kvm_s390_logical_to_effective(vcpu, ga);
+		if (mode == GACC_STORE && lap_enabled && is_low_address(ga))
+			return trans_exc(vcpu, PGM_PROTECTION, ga, ar, mode,
+					 PROT_TYPE_LA);
+		if (psw_bits(*psw).dat) {
+			rc = guest_translate(vcpu, ga, &gpa, asce, mode, &prot);
+			if (rc < 0)
+				return rc;
+		} else {
+			gpa = kvm_s390_real_to_abs(vcpu, ga);
+			if (kvm_is_error_gpa(vcpu->kvm, gpa)) {
+				rc = PGM_ADDRESSING;
+				prot = PROT_NONE;
+			}
+		}
+		if (rc)
+			return trans_exc(vcpu, rc, ga, ar, mode, prot);
+		rc = vcpu_check_access_key(vcpu, access_key, mode, asce, gpa, ga,
+					   fragment_len);
+		if (rc)
+			return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_KEYC);
+		if (gpas)
+			*gpas++ = gpa;
+		offset = 0;
+		ga += fragment_len;
+		len -= fragment_len;
+	}
+	return 0;
+}
+
+static int access_guest_page(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa,
+			     void *data, unsigned int len)
+{
+	const unsigned int offset = offset_in_page(gpa);
+	const gfn_t gfn = gpa_to_gfn(gpa);
+	int rc;
+
+	if (mode == GACC_STORE)
+		rc = kvm_write_guest_page(kvm, gfn, data, offset, len);
+	else
+		rc = kvm_read_guest_page(kvm, gfn, data, offset, len);
+	return rc;
+}
+
+static int
+access_guest_page_with_key(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa,
+			   void *data, unsigned int len, u8 access_key)
+{
+	struct kvm_memory_slot *slot;
+	bool writable;
+	gfn_t gfn;
+	hva_t hva;
+	int rc;
+
+	gfn = gpa >> PAGE_SHIFT;
+	slot = gfn_to_memslot(kvm, gfn);
+	hva = gfn_to_hva_memslot_prot(slot, gfn, &writable);
+
+	if (kvm_is_error_hva(hva))
+		return PGM_ADDRESSING;
+	/*
+	 * Check if it's a ro memslot, even tho that can't occur (they're unsupported).
+	 * Don't try to actually handle that case.
+	 */
+	if (!writable && mode == GACC_STORE)
+		return -EOPNOTSUPP;
+	hva += offset_in_page(gpa);
+	if (mode == GACC_STORE)
+		rc = copy_to_user_key((void __user *)hva, data, len, access_key);
+	else
+		rc = copy_from_user_key(data, (void __user *)hva, len, access_key);
+	if (rc)
+		return PGM_PROTECTION;
+	if (mode == GACC_STORE)
+		mark_page_dirty_in_slot(kvm, slot, gfn);
+	return 0;
+}
+
+int access_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, void *data,
+			      unsigned long len, enum gacc_mode mode, u8 access_key)
+{
+	int offset = offset_in_page(gpa);
+	int fragment_len;
+	int rc;
+
+	while (min(PAGE_SIZE - offset, len) > 0) {
+		fragment_len = min(PAGE_SIZE - offset, len);
+		rc = access_guest_page_with_key(kvm, mode, gpa, data, fragment_len, access_key);
+		if (rc)
+			return rc;
+		offset = 0;
+		len -= fragment_len;
+		data += fragment_len;
+		gpa += fragment_len;
+	}
+	return 0;
+}
+
+int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
+			  void *data, unsigned long len, enum gacc_mode mode,
+			  u8 access_key)
+{
+	psw_t *psw = &vcpu->arch.sie_block->gpsw;
+	unsigned long nr_pages, idx;
+	unsigned long gpa_array[2];
+	unsigned int fragment_len;
+	unsigned long *gpas;
+	enum prot_type prot;
+	int need_ipte_lock;
+	union asce asce;
+	bool try_storage_prot_override;
+	bool try_fetch_prot_override;
+	int rc;
+
+	if (!len)
+		return 0;
+	ga = kvm_s390_logical_to_effective(vcpu, ga);
+	rc = get_vcpu_asce(vcpu, &asce, ga, ar, mode);
+	if (rc)
+		return rc;
+	nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
+	gpas = gpa_array;
+	if (nr_pages > ARRAY_SIZE(gpa_array))
+		gpas = vmalloc(array_size(nr_pages, sizeof(unsigned long)));
+	if (!gpas)
+		return -ENOMEM;
+	try_fetch_prot_override = fetch_prot_override_applicable(vcpu, mode, asce);
+	try_storage_prot_override = storage_prot_override_applicable(vcpu);
+	need_ipte_lock = psw_bits(*psw).dat && !asce.r;
+	if (need_ipte_lock)
+		ipte_lock(vcpu->kvm);
+	/*
+	 * Since we do the access further down ultimately via a move instruction
+	 * that does key checking and returns an error in case of a protection
+	 * violation, we don't need to do the check during address translation.
+	 * Skip it by passing access key 0, which matches any storage key,
+	 * obviating the need for any further checks. As a result the check is
+	 * handled entirely in hardware on access, we only need to take care to
+	 * forego key protection checking if fetch protection override applies or
+	 * retry with the special key 9 in case of storage protection override.
+	 */
+	rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode, 0);
+	if (rc)
+		goto out_unlock;
+	for (idx = 0; idx < nr_pages; idx++) {
+		fragment_len = min(PAGE_SIZE - offset_in_page(gpas[idx]), len);
+		if (try_fetch_prot_override && fetch_prot_override_applies(ga, fragment_len)) {
+			rc = access_guest_page(vcpu->kvm, mode, gpas[idx],
+					       data, fragment_len);
+		} else {
+			rc = access_guest_page_with_key(vcpu->kvm, mode, gpas[idx],
+							data, fragment_len, access_key);
+		}
+		if (rc == PGM_PROTECTION && try_storage_prot_override)
+			rc = access_guest_page_with_key(vcpu->kvm, mode, gpas[idx],
+							data, fragment_len, PAGE_SPO_ACC);
+		if (rc)
+			break;
+		len -= fragment_len;
+		data += fragment_len;
+		ga = kvm_s390_logical_to_effective(vcpu, ga + fragment_len);
+	}
+	if (rc > 0) {
+		bool terminate = (mode == GACC_STORE) && (idx > 0);
+
+		if (rc == PGM_PROTECTION)
+			prot = PROT_TYPE_KEYC;
+		else
+			prot = PROT_NONE;
+		rc = trans_exc_ending(vcpu, rc, ga, ar, mode, prot, terminate);
+	}
+out_unlock:
+	if (need_ipte_lock)
+		ipte_unlock(vcpu->kvm);
+	if (nr_pages > ARRAY_SIZE(gpa_array))
+		vfree(gpas);
+	return rc;
+}
+
+int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
+		      void *data, unsigned long len, enum gacc_mode mode)
+{
+	unsigned int fragment_len;
+	unsigned long gpa;
+	int rc = 0;
+
+	while (len && !rc) {
+		gpa = kvm_s390_real_to_abs(vcpu, gra);
+		fragment_len = min(PAGE_SIZE - offset_in_page(gpa), len);
+		rc = access_guest_page(vcpu->kvm, mode, gpa, data, fragment_len);
+		len -= fragment_len;
+		gra += fragment_len;
+		data += fragment_len;
+	}
+	return rc;
+}
+
+/**
+ * cmpxchg_guest_abs_with_key() - Perform cmpxchg on guest absolute address.
+ * @kvm: Virtual machine instance.
+ * @gpa: Absolute guest address of the location to be changed.
+ * @len: Operand length of the cmpxchg, required: 1 <= len <= 16. Providing a
+ *       non power of two will result in failure.
+ * @old_addr: Pointer to old value. If the location at @gpa contains this value,
+ *            the exchange will succeed. After calling cmpxchg_guest_abs_with_key()
+ *            *@old_addr contains the value at @gpa before the attempt to
+ *            exchange the value.
+ * @new: The value to place at @gpa.
+ * @access_key: The access key to use for the guest access.
+ * @success: output value indicating if an exchange occurred.
+ *
+ * Atomically exchange the value at @gpa by @new, if it contains *@old.
+ * Honors storage keys.
+ *
+ * Return: * 0: successful exchange
+ *         * >0: a program interruption code indicating the reason cmpxchg could
+ *               not be attempted
+ *         * -EINVAL: address misaligned or len not power of two
+ *         * -EAGAIN: transient failure (len 1 or 2)
+ *         * -EOPNOTSUPP: read-only memslot (should never occur)
+ */
+int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len,
+			       __uint128_t *old_addr, __uint128_t new,
+			       u8 access_key, bool *success)
+{
+	gfn_t gfn = gpa_to_gfn(gpa);
+	struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
+	bool writable;
+	hva_t hva;
+	int ret;
+
+	if (!IS_ALIGNED(gpa, len))
+		return -EINVAL;
+
+	hva = gfn_to_hva_memslot_prot(slot, gfn, &writable);
+	if (kvm_is_error_hva(hva))
+		return PGM_ADDRESSING;
+	/*
+	 * Check if it's a read-only memslot, even though that cannot occur
+	 * since those are unsupported.
+	 * Don't try to actually handle that case.
+	 */
+	if (!writable)
+		return -EOPNOTSUPP;
+
+	hva += offset_in_page(gpa);
+	/*
+	 * The cmpxchg_user_key macro depends on the type of "old", so we need
+	 * a case for each valid length and get some code duplication as long
+	 * as we don't introduce a new macro.
+	 */
+	switch (len) {
+	case 1: {
+		u8 old;
+
+		ret = cmpxchg_user_key((u8 __user *)hva, &old, *old_addr, new, access_key);
+		*success = !ret && old == *old_addr;
+		*old_addr = old;
+		break;
+	}
+	case 2: {
+		u16 old;
+
+		ret = cmpxchg_user_key((u16 __user *)hva, &old, *old_addr, new, access_key);
+		*success = !ret && old == *old_addr;
+		*old_addr = old;
+		break;
+	}
+	case 4: {
+		u32 old;
+
+		ret = cmpxchg_user_key((u32 __user *)hva, &old, *old_addr, new, access_key);
+		*success = !ret && old == *old_addr;
+		*old_addr = old;
+		break;
+	}
+	case 8: {
+		u64 old;
+
+		ret = cmpxchg_user_key((u64 __user *)hva, &old, *old_addr, new, access_key);
+		*success = !ret && old == *old_addr;
+		*old_addr = old;
+		break;
+	}
+	case 16: {
+		__uint128_t old;
+
+		ret = cmpxchg_user_key((__uint128_t __user *)hva, &old, *old_addr, new, access_key);
+		*success = !ret && old == *old_addr;
+		*old_addr = old;
+		break;
+	}
+	default:
+		return -EINVAL;
+	}
+	if (*success)
+		mark_page_dirty_in_slot(kvm, slot, gfn);
+	/*
+	 * Assume that the fault is caused by protection, either key protection
+	 * or user page write protection.
+	 */
+	if (ret == -EFAULT)
+		ret = PGM_PROTECTION;
+	return ret;
+}
+
+/**
+ * guest_translate_address_with_key - translate guest logical into guest absolute address
+ * @vcpu: virtual cpu
+ * @gva: Guest virtual address
+ * @ar: Access register
+ * @gpa: Guest physical address
+ * @mode: Translation access mode
+ * @access_key: access key to mach the storage key with
+ *
+ * Parameter semantics are the same as the ones from guest_translate.
+ * The memory contents at the guest address are not changed.
+ *
+ * Note: The IPTE lock is not taken during this function, so the caller
+ * has to take care of this.
+ */
+int guest_translate_address_with_key(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
+				     unsigned long *gpa, enum gacc_mode mode,
+				     u8 access_key)
+{
+	union asce asce;
+	int rc;
+
+	gva = kvm_s390_logical_to_effective(vcpu, gva);
+	rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
+	if (rc)
+		return rc;
+	return guest_range_to_gpas(vcpu, gva, ar, gpa, 1, asce, mode,
+				   access_key);
+}
+
+/**
+ * check_gva_range - test a range of guest virtual addresses for accessibility
+ * @vcpu: virtual cpu
+ * @gva: Guest virtual address
+ * @ar: Access register
+ * @length: Length of test range
+ * @mode: Translation access mode
+ * @access_key: access key to mach the storage keys with
+ */
+int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
+		    unsigned long length, enum gacc_mode mode, u8 access_key)
+{
+	union asce asce;
+	int rc = 0;
+
+	rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
+	if (rc)
+		return rc;
+	ipte_lock(vcpu->kvm);
+	rc = guest_range_to_gpas(vcpu, gva, ar, NULL, length, asce, mode,
+				 access_key);
+	ipte_unlock(vcpu->kvm);
+
+	return rc;
+}
+
+/**
+ * check_gpa_range - test a range of guest physical addresses for accessibility
+ * @kvm: virtual machine instance
+ * @gpa: guest physical address
+ * @length: length of test range
+ * @mode: access mode to test, relevant for storage keys
+ * @access_key: access key to mach the storage keys with
+ */
+int check_gpa_range(struct kvm *kvm, unsigned long gpa, unsigned long length,
+		    enum gacc_mode mode, u8 access_key)
+{
+	unsigned int fragment_len;
+	int rc = 0;
+
+	while (length && !rc) {
+		fragment_len = min(PAGE_SIZE - offset_in_page(gpa), length);
+		rc = vm_check_access_key(kvm, access_key, mode, gpa);
+		length -= fragment_len;
+		gpa += fragment_len;
+	}
+	return rc;
+}
+
+/**
+ * kvm_s390_check_low_addr_prot_real - check for low-address protection
+ * @vcpu: virtual cpu
+ * @gra: Guest real address
+ *
+ * Checks whether an address is subject to low-address protection and set
+ * up vcpu->arch.pgm accordingly if necessary.
+ *
+ * Return: 0 if no protection exception, or PGM_PROTECTION if protected.
+ */
+int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra)
+{
+	union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
+
+	if (!ctlreg0.lap || !is_low_address(gra))
+		return 0;
+	return trans_exc(vcpu, PGM_PROTECTION, gra, 0, GACC_STORE, PROT_TYPE_LA);
+}
+
+/**
+ * kvm_s390_shadow_tables - walk the guest page table and create shadow tables
+ * @sg: pointer to the shadow guest address space structure
+ * @saddr: faulting address in the shadow gmap
+ * @pgt: pointer to the beginning of the page table for the given address if
+ *	 successful (return value 0), or to the first invalid DAT entry in
+ *	 case of exceptions (return value > 0)
+ * @dat_protection: referenced memory is write protected
+ * @fake: pgt references contiguous guest memory block, not a pgtable
+ */
+static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
+				  unsigned long *pgt, int *dat_protection,
+				  int *fake)
+{
+	struct gmap *parent;
+	union asce asce;
+	union vaddress vaddr;
+	unsigned long ptr;
+	int rc;
+
+	*fake = 0;
+	*dat_protection = 0;
+	parent = sg->parent;
+	vaddr.addr = saddr;
+	asce.val = sg->orig_asce;
+	ptr = asce.origin * PAGE_SIZE;
+	if (asce.r) {
+		*fake = 1;
+		ptr = 0;
+		asce.dt = ASCE_TYPE_REGION1;
+	}
+	switch (asce.dt) {
+	case ASCE_TYPE_REGION1:
+		if (vaddr.rfx01 > asce.tl && !*fake)
+			return PGM_REGION_FIRST_TRANS;
+		break;
+	case ASCE_TYPE_REGION2:
+		if (vaddr.rfx)
+			return PGM_ASCE_TYPE;
+		if (vaddr.rsx01 > asce.tl)
+			return PGM_REGION_SECOND_TRANS;
+		break;
+	case ASCE_TYPE_REGION3:
+		if (vaddr.rfx || vaddr.rsx)
+			return PGM_ASCE_TYPE;
+		if (vaddr.rtx01 > asce.tl)
+			return PGM_REGION_THIRD_TRANS;
+		break;
+	case ASCE_TYPE_SEGMENT:
+		if (vaddr.rfx || vaddr.rsx || vaddr.rtx)
+			return PGM_ASCE_TYPE;
+		if (vaddr.sx01 > asce.tl)
+			return PGM_SEGMENT_TRANSLATION;
+		break;
+	}
+
+	switch (asce.dt) {
+	case ASCE_TYPE_REGION1: {
+		union region1_table_entry rfte;
+
+		if (*fake) {
+			ptr += vaddr.rfx * _REGION1_SIZE;
+			rfte.val = ptr;
+			goto shadow_r2t;
+		}
+		*pgt = ptr + vaddr.rfx * 8;
+		rc = gmap_read_table(parent, ptr + vaddr.rfx * 8, &rfte.val);
+		if (rc)
+			return rc;
+		if (rfte.i)
+			return PGM_REGION_FIRST_TRANS;
+		if (rfte.tt != TABLE_TYPE_REGION1)
+			return PGM_TRANSLATION_SPEC;
+		if (vaddr.rsx01 < rfte.tf || vaddr.rsx01 > rfte.tl)
+			return PGM_REGION_SECOND_TRANS;
+		if (sg->edat_level >= 1)
+			*dat_protection |= rfte.p;
+		ptr = rfte.rto * PAGE_SIZE;
+shadow_r2t:
+		rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake);
+		if (rc)
+			return rc;
+	}
+		fallthrough;
+	case ASCE_TYPE_REGION2: {
+		union region2_table_entry rste;
+
+		if (*fake) {
+			ptr += vaddr.rsx * _REGION2_SIZE;
+			rste.val = ptr;
+			goto shadow_r3t;
+		}
+		*pgt = ptr + vaddr.rsx * 8;
+		rc = gmap_read_table(parent, ptr + vaddr.rsx * 8, &rste.val);
+		if (rc)
+			return rc;
+		if (rste.i)
+			return PGM_REGION_SECOND_TRANS;
+		if (rste.tt != TABLE_TYPE_REGION2)
+			return PGM_TRANSLATION_SPEC;
+		if (vaddr.rtx01 < rste.tf || vaddr.rtx01 > rste.tl)
+			return PGM_REGION_THIRD_TRANS;
+		if (sg->edat_level >= 1)
+			*dat_protection |= rste.p;
+		ptr = rste.rto * PAGE_SIZE;
+shadow_r3t:
+		rste.p |= *dat_protection;
+		rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake);
+		if (rc)
+			return rc;
+	}
+		fallthrough;
+	case ASCE_TYPE_REGION3: {
+		union region3_table_entry rtte;
+
+		if (*fake) {
+			ptr += vaddr.rtx * _REGION3_SIZE;
+			rtte.val = ptr;
+			goto shadow_sgt;
+		}
+		*pgt = ptr + vaddr.rtx * 8;
+		rc = gmap_read_table(parent, ptr + vaddr.rtx * 8, &rtte.val);
+		if (rc)
+			return rc;
+		if (rtte.i)
+			return PGM_REGION_THIRD_TRANS;
+		if (rtte.tt != TABLE_TYPE_REGION3)
+			return PGM_TRANSLATION_SPEC;
+		if (rtte.cr && asce.p && sg->edat_level >= 2)
+			return PGM_TRANSLATION_SPEC;
+		if (rtte.fc && sg->edat_level >= 2) {
+			*dat_protection |= rtte.fc0.p;
+			*fake = 1;
+			ptr = rtte.fc1.rfaa * _REGION3_SIZE;
+			rtte.val = ptr;
+			goto shadow_sgt;
+		}
+		if (vaddr.sx01 < rtte.fc0.tf || vaddr.sx01 > rtte.fc0.tl)
+			return PGM_SEGMENT_TRANSLATION;
+		if (sg->edat_level >= 1)
+			*dat_protection |= rtte.fc0.p;
+		ptr = rtte.fc0.sto * PAGE_SIZE;
+shadow_sgt:
+		rtte.fc0.p |= *dat_protection;
+		rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake);
+		if (rc)
+			return rc;
+	}
+		fallthrough;
+	case ASCE_TYPE_SEGMENT: {
+		union segment_table_entry ste;
+
+		if (*fake) {
+			ptr += vaddr.sx * _SEGMENT_SIZE;
+			ste.val = ptr;
+			goto shadow_pgt;
+		}
+		*pgt = ptr + vaddr.sx * 8;
+		rc = gmap_read_table(parent, ptr + vaddr.sx * 8, &ste.val);
+		if (rc)
+			return rc;
+		if (ste.i)
+			return PGM_SEGMENT_TRANSLATION;
+		if (ste.tt != TABLE_TYPE_SEGMENT)
+			return PGM_TRANSLATION_SPEC;
+		if (ste.cs && asce.p)
+			return PGM_TRANSLATION_SPEC;
+		*dat_protection |= ste.fc0.p;
+		if (ste.fc && sg->edat_level >= 1) {
+			*fake = 1;
+			ptr = ste.fc1.sfaa * _SEGMENT_SIZE;
+			ste.val = ptr;
+			goto shadow_pgt;
+		}
+		ptr = ste.fc0.pto * (PAGE_SIZE / 2);
+shadow_pgt:
+		ste.fc0.p |= *dat_protection;
+		rc = gmap_shadow_pgt(sg, saddr, ste.val, *fake);
+		if (rc)
+			return rc;
+	}
+	}
+	/* Return the parent address of the page table */
+	*pgt = ptr;
+	return 0;
+}
+
+/**
+ * kvm_s390_shadow_fault - handle fault on a shadow page table
+ * @vcpu: virtual cpu
+ * @sg: pointer to the shadow guest address space structure
+ * @saddr: faulting address in the shadow gmap
+ * @datptr: will contain the address of the faulting DAT table entry, or of
+ *	    the valid leaf, plus some flags
+ *
+ * Returns: - 0 if the shadow fault was successfully resolved
+ *	    - > 0 (pgm exception code) on exceptions while faulting
+ *	    - -EAGAIN if the caller can retry immediately
+ *	    - -EFAULT when accessing invalid guest addresses
+ *	    - -ENOMEM if out of memory
+ */
+int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
+			  unsigned long saddr, unsigned long *datptr)
+{
+	union vaddress vaddr;
+	union page_table_entry pte;
+	unsigned long pgt = 0;
+	int dat_protection, fake;
+	int rc;
+
+	mmap_read_lock(sg->mm);
+	/*
+	 * We don't want any guest-2 tables to change - so the parent
+	 * tables/pointers we read stay valid - unshadowing is however
+	 * always possible - only guest_table_lock protects us.
+	 */
+	ipte_lock(vcpu->kvm);
+
+	rc = gmap_shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake);
+	if (rc)
+		rc = kvm_s390_shadow_tables(sg, saddr, &pgt, &dat_protection,
+					    &fake);
+
+	vaddr.addr = saddr;
+	if (fake) {
+		pte.val = pgt + vaddr.px * PAGE_SIZE;
+		goto shadow_page;
+	}
+
+	switch (rc) {
+	case PGM_SEGMENT_TRANSLATION:
+	case PGM_REGION_THIRD_TRANS:
+	case PGM_REGION_SECOND_TRANS:
+	case PGM_REGION_FIRST_TRANS:
+		pgt |= PEI_NOT_PTE;
+		break;
+	case 0:
+		pgt += vaddr.px * 8;
+		rc = gmap_read_table(sg->parent, pgt, &pte.val);
+	}
+	if (datptr)
+		*datptr = pgt | dat_protection * PEI_DAT_PROT;
+	if (!rc && pte.i)
+		rc = PGM_PAGE_TRANSLATION;
+	if (!rc && pte.z)
+		rc = PGM_TRANSLATION_SPEC;
+shadow_page:
+	pte.p |= dat_protection;
+	if (!rc)
+		rc = gmap_shadow_page(sg, saddr, __pte(pte.val));
+	ipte_unlock(vcpu->kvm);
+	mmap_read_unlock(sg->mm);
+	return rc;
+}
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
new file mode 100644
index 0000000000..b320d12aa0
--- /dev/null
+++ b/arch/s390/kvm/gaccess.h
@@ -0,0 +1,458 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * access guest memory
+ *
+ * Copyright IBM Corp. 2008, 2014
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ */
+
+#ifndef __KVM_S390_GACCESS_H
+#define __KVM_S390_GACCESS_H
+
+#include <linux/compiler.h>
+#include <linux/kvm_host.h>
+#include <linux/uaccess.h>
+#include <linux/ptrace.h>
+#include "kvm-s390.h"
+
+/**
+ * kvm_s390_real_to_abs - convert guest real address to guest absolute address
+ * @prefix - guest prefix
+ * @gra - guest real address
+ *
+ * Returns the guest absolute address that corresponds to the passed guest real
+ * address @gra of by applying the given prefix.
+ */
+static inline unsigned long _kvm_s390_real_to_abs(u32 prefix, unsigned long gra)
+{
+	if (gra < 2 * PAGE_SIZE)
+		gra += prefix;
+	else if (gra >= prefix && gra < prefix + 2 * PAGE_SIZE)
+		gra -= prefix;
+	return gra;
+}
+
+/**
+ * kvm_s390_real_to_abs - convert guest real address to guest absolute address
+ * @vcpu - guest virtual cpu
+ * @gra - guest real address
+ *
+ * Returns the guest absolute address that corresponds to the passed guest real
+ * address @gra of a virtual guest cpu by applying its prefix.
+ */
+static inline unsigned long kvm_s390_real_to_abs(struct kvm_vcpu *vcpu,
+						 unsigned long gra)
+{
+	return _kvm_s390_real_to_abs(kvm_s390_get_prefix(vcpu), gra);
+}
+
+/**
+ * _kvm_s390_logical_to_effective - convert guest logical to effective address
+ * @psw: psw of the guest
+ * @ga: guest logical address
+ *
+ * Convert a guest logical address to an effective address by applying the
+ * rules of the addressing mode defined by bits 31 and 32 of the given PSW
+ * (extendended/basic addressing mode).
+ *
+ * Depending on the addressing mode, the upper 40 bits (24 bit addressing
+ * mode), 33 bits (31 bit addressing mode) or no bits (64 bit addressing
+ * mode) of @ga will be zeroed and the remaining bits will be returned.
+ */
+static inline unsigned long _kvm_s390_logical_to_effective(psw_t *psw,
+							   unsigned long ga)
+{
+	if (psw_bits(*psw).eaba == PSW_BITS_AMODE_64BIT)
+		return ga;
+	if (psw_bits(*psw).eaba == PSW_BITS_AMODE_31BIT)
+		return ga & ((1UL << 31) - 1);
+	return ga & ((1UL << 24) - 1);
+}
+
+/**
+ * kvm_s390_logical_to_effective - convert guest logical to effective address
+ * @vcpu: guest virtual cpu
+ * @ga: guest logical address
+ *
+ * Convert a guest vcpu logical address to a guest vcpu effective address by
+ * applying the rules of the vcpu's addressing mode defined by PSW bits 31
+ * and 32 (extendended/basic addressing mode).
+ *
+ * Depending on the vcpu's addressing mode the upper 40 bits (24 bit addressing
+ * mode), 33 bits (31 bit addressing mode) or no bits (64 bit addressing mode)
+ * of @ga will be zeroed and the remaining bits will be returned.
+ */
+static inline unsigned long kvm_s390_logical_to_effective(struct kvm_vcpu *vcpu,
+							  unsigned long ga)
+{
+	return _kvm_s390_logical_to_effective(&vcpu->arch.sie_block->gpsw, ga);
+}
+
+/*
+ * put_guest_lc, read_guest_lc and write_guest_lc are guest access functions
+ * which shall only be used to access the lowcore of a vcpu.
+ * These functions should be used for e.g. interrupt handlers where no
+ * guest memory access protection facilities, like key or low address
+ * protection, are applicable.
+ * At a later point guest vcpu lowcore access should happen via pinned
+ * prefix pages, so that these pages can be accessed directly via the
+ * kernel mapping. All of these *_lc functions can be removed then.
+ */
+
+/**
+ * put_guest_lc - write a simple variable to a guest vcpu's lowcore
+ * @vcpu: virtual cpu
+ * @x: value to copy to guest
+ * @gra: vcpu's destination guest real address
+ *
+ * Copies a simple value from kernel space to a guest vcpu's lowcore.
+ * The size of the variable may be 1, 2, 4 or 8 bytes. The destination
+ * must be located in the vcpu's lowcore. Otherwise the result is undefined.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * Note: an error indicates that either the kernel is out of memory or
+ *	 the guest memory mapping is broken. In any case the best solution
+ *	 would be to terminate the guest.
+ *	 It is wrong to inject a guest exception.
+ */
+#define put_guest_lc(vcpu, x, gra)				\
+({								\
+	struct kvm_vcpu *__vcpu = (vcpu);			\
+	__typeof__(*(gra)) __x = (x);				\
+	unsigned long __gpa;					\
+								\
+	__gpa = (unsigned long)(gra);				\
+	__gpa += kvm_s390_get_prefix(__vcpu);			\
+	kvm_write_guest(__vcpu->kvm, __gpa, &__x, sizeof(__x));	\
+})
+
+/**
+ * write_guest_lc - copy data from kernel space to guest vcpu's lowcore
+ * @vcpu: virtual cpu
+ * @gra: vcpu's source guest real address
+ * @data: source address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy data from kernel space to guest vcpu's lowcore. The entire range must
+ * be located within the vcpu's lowcore, otherwise the result is undefined.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * Note: an error indicates that either the kernel is out of memory or
+ *	 the guest memory mapping is broken. In any case the best solution
+ *	 would be to terminate the guest.
+ *	 It is wrong to inject a guest exception.
+ */
+static inline __must_check
+int write_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
+		   unsigned long len)
+{
+	unsigned long gpa = gra + kvm_s390_get_prefix(vcpu);
+
+	return kvm_write_guest(vcpu->kvm, gpa, data, len);
+}
+
+/**
+ * read_guest_lc - copy data from guest vcpu's lowcore to kernel space
+ * @vcpu: virtual cpu
+ * @gra: vcpu's source guest real address
+ * @data: destination address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy data from guest vcpu's lowcore to kernel space. The entire range must
+ * be located within the vcpu's lowcore, otherwise the result is undefined.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * Note: an error indicates that either the kernel is out of memory or
+ *	 the guest memory mapping is broken. In any case the best solution
+ *	 would be to terminate the guest.
+ *	 It is wrong to inject a guest exception.
+ */
+static inline __must_check
+int read_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
+		  unsigned long len)
+{
+	unsigned long gpa = gra + kvm_s390_get_prefix(vcpu);
+
+	return kvm_read_guest(vcpu->kvm, gpa, data, len);
+}
+
+enum gacc_mode {
+	GACC_FETCH,
+	GACC_STORE,
+	GACC_IFETCH,
+};
+
+int guest_translate_address_with_key(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
+				     unsigned long *gpa, enum gacc_mode mode,
+				     u8 access_key);
+
+int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
+		    unsigned long length, enum gacc_mode mode, u8 access_key);
+
+int check_gpa_range(struct kvm *kvm, unsigned long gpa, unsigned long length,
+		    enum gacc_mode mode, u8 access_key);
+
+int access_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, void *data,
+			      unsigned long len, enum gacc_mode mode, u8 access_key);
+
+int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
+			  void *data, unsigned long len, enum gacc_mode mode,
+			  u8 access_key);
+
+int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
+		      void *data, unsigned long len, enum gacc_mode mode);
+
+int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len, __uint128_t *old,
+			       __uint128_t new, u8 access_key, bool *success);
+
+/**
+ * write_guest_with_key - copy data from kernel space to guest space
+ * @vcpu: virtual cpu
+ * @ga: guest address
+ * @ar: access register
+ * @data: source address in kernel space
+ * @len: number of bytes to copy
+ * @access_key: access key the storage key needs to match
+ *
+ * Copy @len bytes from @data (kernel space) to @ga (guest address).
+ * In order to copy data to guest space the PSW of the vcpu is inspected:
+ * If DAT is off data will be copied to guest real or absolute memory.
+ * If DAT is on data will be copied to the address space as specified by
+ * the address space bits of the PSW:
+ * Primary, secondary, home space or access register mode.
+ * The addressing mode of the PSW is also inspected, so that address wrap
+ * around is taken into account for 24-, 31- and 64-bit addressing mode,
+ * if the to be copied data crosses page boundaries in guest address space.
+ * In addition low address, DAT and key protection checks are performed before
+ * copying any data.
+ *
+ * This function modifies the 'struct kvm_s390_pgm_info pgm' member of @vcpu.
+ * In case of an access exception (e.g. protection exception) pgm will contain
+ * all data necessary so that a subsequent call to 'kvm_s390_inject_prog_vcpu()'
+ * will inject a correct exception into the guest.
+ * If no access exception happened, the contents of pgm are undefined when
+ * this function returns.
+ *
+ * Returns:  - zero on success
+ *	     - a negative value if e.g. the guest mapping is broken or in
+ *	       case of out-of-memory. In this case the contents of pgm are
+ *	       undefined. Also parts of @data may have been copied to guest
+ *	       space.
+ *	     - a positive value if an access exception happened. In this case
+ *	       the returned value is the program interruption code and the
+ *	       contents of pgm may be used to inject an exception into the
+ *	       guest. No data has been copied to guest space.
+ *
+ * Note: in case an access exception is recognized no data has been copied to
+ *	 guest space (this is also true, if the to be copied data would cross
+ *	 one or more page boundaries in guest space).
+ *	 Therefore this function may be used for nullifying and suppressing
+ *	 instruction emulation.
+ *	 It may also be used for terminating instructions, if it is undefined
+ *	 if data has been changed in guest space in case of an exception.
+ */
+static inline __must_check
+int write_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
+			 void *data, unsigned long len, u8 access_key)
+{
+	return access_guest_with_key(vcpu, ga, ar, data, len, GACC_STORE,
+				     access_key);
+}
+
+/**
+ * write_guest - copy data from kernel space to guest space
+ * @vcpu: virtual cpu
+ * @ga: guest address
+ * @ar: access register
+ * @data: source address in kernel space
+ * @len: number of bytes to copy
+ *
+ * The behaviour of write_guest is identical to write_guest_with_key, except
+ * that the PSW access key is used instead of an explicit argument.
+ */
+static inline __must_check
+int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
+		unsigned long len)
+{
+	u8 access_key = psw_bits(vcpu->arch.sie_block->gpsw).key;
+
+	return write_guest_with_key(vcpu, ga, ar, data, len, access_key);
+}
+
+/**
+ * read_guest_with_key - copy data from guest space to kernel space
+ * @vcpu: virtual cpu
+ * @ga: guest address
+ * @ar: access register
+ * @data: destination address in kernel space
+ * @len: number of bytes to copy
+ * @access_key: access key the storage key needs to match
+ *
+ * Copy @len bytes from @ga (guest address) to @data (kernel space).
+ *
+ * The behaviour of read_guest_with_key is identical to write_guest_with_key,
+ * except that data will be copied from guest space to kernel space.
+ */
+static inline __must_check
+int read_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
+			void *data, unsigned long len, u8 access_key)
+{
+	return access_guest_with_key(vcpu, ga, ar, data, len, GACC_FETCH,
+				     access_key);
+}
+
+/**
+ * read_guest - copy data from guest space to kernel space
+ * @vcpu: virtual cpu
+ * @ga: guest address
+ * @ar: access register
+ * @data: destination address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @ga (guest address) to @data (kernel space).
+ *
+ * The behaviour of read_guest is identical to read_guest_with_key, except
+ * that the PSW access key is used instead of an explicit argument.
+ */
+static inline __must_check
+int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
+	       unsigned long len)
+{
+	u8 access_key = psw_bits(vcpu->arch.sie_block->gpsw).key;
+
+	return read_guest_with_key(vcpu, ga, ar, data, len, access_key);
+}
+
+/**
+ * read_guest_instr - copy instruction data from guest space to kernel space
+ * @vcpu: virtual cpu
+ * @ga: guest address
+ * @data: destination address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from the given address (guest space) to @data (kernel
+ * space).
+ *
+ * The behaviour of read_guest_instr is identical to read_guest, except that
+ * instruction data will be read from primary space when in home-space or
+ * address-space mode.
+ */
+static inline __must_check
+int read_guest_instr(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+		     unsigned long len)
+{
+	u8 access_key = psw_bits(vcpu->arch.sie_block->gpsw).key;
+
+	return access_guest_with_key(vcpu, ga, 0, data, len, GACC_IFETCH,
+				     access_key);
+}
+
+/**
+ * write_guest_abs - copy data from kernel space to guest space absolute
+ * @vcpu: virtual cpu
+ * @gpa: guest physical (absolute) address
+ * @data: source address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @data (kernel space) to @gpa (guest absolute address).
+ * It is up to the caller to ensure that the entire guest memory range is
+ * valid memory before calling this function.
+ * Guest low address and key protection are not checked.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * If an error occurs data may have been copied partially to guest memory.
+ */
+static inline __must_check
+int write_guest_abs(struct kvm_vcpu *vcpu, unsigned long gpa, void *data,
+		    unsigned long len)
+{
+	return kvm_write_guest(vcpu->kvm, gpa, data, len);
+}
+
+/**
+ * read_guest_abs - copy data from guest space absolute to kernel space
+ * @vcpu: virtual cpu
+ * @gpa: guest physical (absolute) address
+ * @data: destination address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @gpa (guest absolute address) to @data (kernel space).
+ * It is up to the caller to ensure that the entire guest memory range is
+ * valid memory before calling this function.
+ * Guest key protection is not checked.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * If an error occurs data may have been copied partially to kernel space.
+ */
+static inline __must_check
+int read_guest_abs(struct kvm_vcpu *vcpu, unsigned long gpa, void *data,
+		   unsigned long len)
+{
+	return kvm_read_guest(vcpu->kvm, gpa, data, len);
+}
+
+/**
+ * write_guest_real - copy data from kernel space to guest space real
+ * @vcpu: virtual cpu
+ * @gra: guest real address
+ * @data: source address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @data (kernel space) to @gra (guest real address).
+ * It is up to the caller to ensure that the entire guest memory range is
+ * valid memory before calling this function.
+ * Guest low address and key protection are not checked.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * If an error occurs data may have been copied partially to guest memory.
+ */
+static inline __must_check
+int write_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
+		     unsigned long len)
+{
+	return access_guest_real(vcpu, gra, data, len, 1);
+}
+
+/**
+ * read_guest_real - copy data from guest space real to kernel space
+ * @vcpu: virtual cpu
+ * @gra: guest real address
+ * @data: destination address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @gra (guest real address) to @data (kernel space).
+ * It is up to the caller to ensure that the entire guest memory range is
+ * valid memory before calling this function.
+ * Guest key protection is not checked.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * If an error occurs data may have been copied partially to kernel space.
+ */
+static inline __must_check
+int read_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
+		    unsigned long len)
+{
+	return access_guest_real(vcpu, gra, data, len, 0);
+}
+
+void ipte_lock(struct kvm *kvm);
+void ipte_unlock(struct kvm *kvm);
+int ipte_lock_held(struct kvm *kvm);
+int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra);
+
+/* MVPG PEI indication bits */
+#define PEI_DAT_PROT 2
+#define PEI_NOT_PTE 4
+
+int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *shadow,
+			  unsigned long saddr, unsigned long *datptr);
+
+#endif /* __KVM_S390_GACCESS_H */
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
new file mode 100644
index 0000000000..3765c4223b
--- /dev/null
+++ b/arch/s390/kvm/guestdbg.c
@@ -0,0 +1,626 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * kvm guest debug support
+ *
+ * Copyright IBM Corp. 2014
+ *
+ *    Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
+ */
+#include <linux/kvm_host.h>
+#include <linux/errno.h>
+#include "kvm-s390.h"
+#include "gaccess.h"
+
+/*
+ * Extends the address range given by *start and *stop to include the address
+ * range starting with estart and the length len. Takes care of overflowing
+ * intervals and tries to minimize the overall interval size.
+ */
+static void extend_address_range(u64 *start, u64 *stop, u64 estart, int len)
+{
+	u64 estop;
+
+	if (len > 0)
+		len--;
+	else
+		len = 0;
+
+	estop = estart + len;
+
+	/* 0-0 range represents "not set" */
+	if ((*start == 0) && (*stop == 0)) {
+		*start = estart;
+		*stop = estop;
+	} else if (*start <= *stop) {
+		/* increase the existing range */
+		if (estart < *start)
+			*start = estart;
+		if (estop > *stop)
+			*stop = estop;
+	} else {
+		/* "overflowing" interval, whereby *stop > *start */
+		if (estart <= *stop) {
+			if (estop > *stop)
+				*stop = estop;
+		} else if (estop > *start) {
+			if (estart < *start)
+				*start = estart;
+		}
+		/* minimize the range */
+		else if ((estop - *stop) < (*start - estart))
+			*stop = estop;
+		else
+			*start = estart;
+	}
+}
+
+#define MAX_INST_SIZE 6
+
+static void enable_all_hw_bp(struct kvm_vcpu *vcpu)
+{
+	unsigned long start, len;
+	u64 *cr9 = &vcpu->arch.sie_block->gcr[9];
+	u64 *cr10 = &vcpu->arch.sie_block->gcr[10];
+	u64 *cr11 = &vcpu->arch.sie_block->gcr[11];
+	int i;
+
+	if (vcpu->arch.guestdbg.nr_hw_bp <= 0 ||
+	    vcpu->arch.guestdbg.hw_bp_info == NULL)
+		return;
+
+	/*
+	 * If the guest is not interested in branching events, we can safely
+	 * limit them to the PER address range.
+	 */
+	if (!(*cr9 & PER_EVENT_BRANCH))
+		*cr9 |= PER_CONTROL_BRANCH_ADDRESS;
+	*cr9 |= PER_EVENT_IFETCH | PER_EVENT_BRANCH;
+
+	for (i = 0; i < vcpu->arch.guestdbg.nr_hw_bp; i++) {
+		start = vcpu->arch.guestdbg.hw_bp_info[i].addr;
+		len = vcpu->arch.guestdbg.hw_bp_info[i].len;
+
+		/*
+		 * The instruction in front of the desired bp has to
+		 * report instruction-fetching events
+		 */
+		if (start < MAX_INST_SIZE) {
+			len += start;
+			start = 0;
+		} else {
+			start -= MAX_INST_SIZE;
+			len += MAX_INST_SIZE;
+		}
+
+		extend_address_range(cr10, cr11, start, len);
+	}
+}
+
+static void enable_all_hw_wp(struct kvm_vcpu *vcpu)
+{
+	unsigned long start, len;
+	u64 *cr9 = &vcpu->arch.sie_block->gcr[9];
+	u64 *cr10 = &vcpu->arch.sie_block->gcr[10];
+	u64 *cr11 = &vcpu->arch.sie_block->gcr[11];
+	int i;
+
+	if (vcpu->arch.guestdbg.nr_hw_wp <= 0 ||
+	    vcpu->arch.guestdbg.hw_wp_info == NULL)
+		return;
+
+	/* if host uses storage alternation for special address
+	 * spaces, enable all events and give all to the guest */
+	if (*cr9 & PER_EVENT_STORE && *cr9 & PER_CONTROL_ALTERATION) {
+		*cr9 &= ~PER_CONTROL_ALTERATION;
+		*cr10 = 0;
+		*cr11 = -1UL;
+	} else {
+		*cr9 &= ~PER_CONTROL_ALTERATION;
+		*cr9 |= PER_EVENT_STORE;
+
+		for (i = 0; i < vcpu->arch.guestdbg.nr_hw_wp; i++) {
+			start = vcpu->arch.guestdbg.hw_wp_info[i].addr;
+			len = vcpu->arch.guestdbg.hw_wp_info[i].len;
+
+			extend_address_range(cr10, cr11, start, len);
+		}
+	}
+}
+
+void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.guestdbg.cr0 = vcpu->arch.sie_block->gcr[0];
+	vcpu->arch.guestdbg.cr9 = vcpu->arch.sie_block->gcr[9];
+	vcpu->arch.guestdbg.cr10 = vcpu->arch.sie_block->gcr[10];
+	vcpu->arch.guestdbg.cr11 = vcpu->arch.sie_block->gcr[11];
+}
+
+void kvm_s390_restore_guest_per_regs(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.sie_block->gcr[0] = vcpu->arch.guestdbg.cr0;
+	vcpu->arch.sie_block->gcr[9] = vcpu->arch.guestdbg.cr9;
+	vcpu->arch.sie_block->gcr[10] = vcpu->arch.guestdbg.cr10;
+	vcpu->arch.sie_block->gcr[11] = vcpu->arch.guestdbg.cr11;
+}
+
+void kvm_s390_patch_guest_per_regs(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * TODO: if guest psw has per enabled, otherwise 0s!
+	 * This reduces the amount of reported events.
+	 * Need to intercept all psw changes!
+	 */
+
+	if (guestdbg_sstep_enabled(vcpu)) {
+		/* disable timer (clock-comparator) interrupts */
+		vcpu->arch.sie_block->gcr[0] &= ~CR0_CLOCK_COMPARATOR_SUBMASK;
+		vcpu->arch.sie_block->gcr[9] |= PER_EVENT_IFETCH;
+		vcpu->arch.sie_block->gcr[10] = 0;
+		vcpu->arch.sie_block->gcr[11] = -1UL;
+	}
+
+	if (guestdbg_hw_bp_enabled(vcpu)) {
+		enable_all_hw_bp(vcpu);
+		enable_all_hw_wp(vcpu);
+	}
+
+	/* TODO: Instruction-fetching-nullification not allowed for now */
+	if (vcpu->arch.sie_block->gcr[9] & PER_EVENT_NULLIFICATION)
+		vcpu->arch.sie_block->gcr[9] &= ~PER_EVENT_NULLIFICATION;
+}
+
+#define MAX_WP_SIZE 100
+
+static int __import_wp_info(struct kvm_vcpu *vcpu,
+			    struct kvm_hw_breakpoint *bp_data,
+			    struct kvm_hw_wp_info_arch *wp_info)
+{
+	int ret = 0;
+	wp_info->len = bp_data->len;
+	wp_info->addr = bp_data->addr;
+	wp_info->phys_addr = bp_data->phys_addr;
+	wp_info->old_data = NULL;
+
+	if (wp_info->len < 0 || wp_info->len > MAX_WP_SIZE)
+		return -EINVAL;
+
+	wp_info->old_data = kmalloc(bp_data->len, GFP_KERNEL_ACCOUNT);
+	if (!wp_info->old_data)
+		return -ENOMEM;
+	/* try to backup the original value */
+	ret = read_guest_abs(vcpu, wp_info->phys_addr, wp_info->old_data,
+			     wp_info->len);
+	if (ret) {
+		kfree(wp_info->old_data);
+		wp_info->old_data = NULL;
+	}
+
+	return ret;
+}
+
+#define MAX_BP_COUNT 50
+
+int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu,
+			    struct kvm_guest_debug *dbg)
+{
+	int ret = 0, nr_wp = 0, nr_bp = 0, i;
+	struct kvm_hw_breakpoint *bp_data = NULL;
+	struct kvm_hw_wp_info_arch *wp_info = NULL;
+	struct kvm_hw_bp_info_arch *bp_info = NULL;
+
+	if (dbg->arch.nr_hw_bp <= 0 || !dbg->arch.hw_bp)
+		return 0;
+	else if (dbg->arch.nr_hw_bp > MAX_BP_COUNT)
+		return -EINVAL;
+
+	bp_data = memdup_user(dbg->arch.hw_bp,
+			      sizeof(*bp_data) * dbg->arch.nr_hw_bp);
+	if (IS_ERR(bp_data))
+		return PTR_ERR(bp_data);
+
+	for (i = 0; i < dbg->arch.nr_hw_bp; i++) {
+		switch (bp_data[i].type) {
+		case KVM_HW_WP_WRITE:
+			nr_wp++;
+			break;
+		case KVM_HW_BP:
+			nr_bp++;
+			break;
+		default:
+			break;
+		}
+	}
+
+	if (nr_wp > 0) {
+		wp_info = kmalloc_array(nr_wp,
+					sizeof(*wp_info),
+					GFP_KERNEL_ACCOUNT);
+		if (!wp_info) {
+			ret = -ENOMEM;
+			goto error;
+		}
+	}
+	if (nr_bp > 0) {
+		bp_info = kmalloc_array(nr_bp,
+					sizeof(*bp_info),
+					GFP_KERNEL_ACCOUNT);
+		if (!bp_info) {
+			ret = -ENOMEM;
+			goto error;
+		}
+	}
+
+	for (nr_wp = 0, nr_bp = 0, i = 0; i < dbg->arch.nr_hw_bp; i++) {
+		switch (bp_data[i].type) {
+		case KVM_HW_WP_WRITE:
+			ret = __import_wp_info(vcpu, &bp_data[i],
+					       &wp_info[nr_wp]);
+			if (ret)
+				goto error;
+			nr_wp++;
+			break;
+		case KVM_HW_BP:
+			bp_info[nr_bp].len = bp_data[i].len;
+			bp_info[nr_bp].addr = bp_data[i].addr;
+			nr_bp++;
+			break;
+		}
+	}
+
+	vcpu->arch.guestdbg.nr_hw_bp = nr_bp;
+	vcpu->arch.guestdbg.hw_bp_info = bp_info;
+	vcpu->arch.guestdbg.nr_hw_wp = nr_wp;
+	vcpu->arch.guestdbg.hw_wp_info = wp_info;
+	return 0;
+error:
+	kfree(bp_data);
+	kfree(wp_info);
+	kfree(bp_info);
+	return ret;
+}
+
+void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu)
+{
+	int i;
+	struct kvm_hw_wp_info_arch *hw_wp_info = NULL;
+
+	for (i = 0; i < vcpu->arch.guestdbg.nr_hw_wp; i++) {
+		hw_wp_info = &vcpu->arch.guestdbg.hw_wp_info[i];
+		kfree(hw_wp_info->old_data);
+		hw_wp_info->old_data = NULL;
+	}
+	kfree(vcpu->arch.guestdbg.hw_wp_info);
+	vcpu->arch.guestdbg.hw_wp_info = NULL;
+
+	kfree(vcpu->arch.guestdbg.hw_bp_info);
+	vcpu->arch.guestdbg.hw_bp_info = NULL;
+
+	vcpu->arch.guestdbg.nr_hw_wp = 0;
+	vcpu->arch.guestdbg.nr_hw_bp = 0;
+}
+
+static inline int in_addr_range(u64 addr, u64 a, u64 b)
+{
+	if (a <= b)
+		return (addr >= a) && (addr <= b);
+	else
+		/* "overflowing" interval */
+		return (addr >= a) || (addr <= b);
+}
+
+#define end_of_range(bp_info) (bp_info->addr + bp_info->len - 1)
+
+static struct kvm_hw_bp_info_arch *find_hw_bp(struct kvm_vcpu *vcpu,
+					      unsigned long addr)
+{
+	struct kvm_hw_bp_info_arch *bp_info = vcpu->arch.guestdbg.hw_bp_info;
+	int i;
+
+	if (vcpu->arch.guestdbg.nr_hw_bp == 0)
+		return NULL;
+
+	for (i = 0; i < vcpu->arch.guestdbg.nr_hw_bp; i++) {
+		/* addr is directly the start or in the range of a bp */
+		if (addr == bp_info->addr)
+			goto found;
+		if (bp_info->len > 0 &&
+		    in_addr_range(addr, bp_info->addr, end_of_range(bp_info)))
+			goto found;
+
+		bp_info++;
+	}
+
+	return NULL;
+found:
+	return bp_info;
+}
+
+static struct kvm_hw_wp_info_arch *any_wp_changed(struct kvm_vcpu *vcpu)
+{
+	int i;
+	struct kvm_hw_wp_info_arch *wp_info = NULL;
+	void *temp = NULL;
+
+	if (vcpu->arch.guestdbg.nr_hw_wp == 0)
+		return NULL;
+
+	for (i = 0; i < vcpu->arch.guestdbg.nr_hw_wp; i++) {
+		wp_info = &vcpu->arch.guestdbg.hw_wp_info[i];
+		if (!wp_info || !wp_info->old_data || wp_info->len <= 0)
+			continue;
+
+		temp = kmalloc(wp_info->len, GFP_KERNEL_ACCOUNT);
+		if (!temp)
+			continue;
+
+		/* refetch the wp data and compare it to the old value */
+		if (!read_guest_abs(vcpu, wp_info->phys_addr, temp,
+				    wp_info->len)) {
+			if (memcmp(temp, wp_info->old_data, wp_info->len)) {
+				kfree(temp);
+				return wp_info;
+			}
+		}
+		kfree(temp);
+		temp = NULL;
+	}
+
+	return NULL;
+}
+
+void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu)
+{
+	vcpu->run->exit_reason = KVM_EXIT_DEBUG;
+	vcpu->guest_debug &= ~KVM_GUESTDBG_EXIT_PENDING;
+}
+
+#define PER_CODE_MASK		(PER_EVENT_MASK >> 24)
+#define PER_CODE_BRANCH		(PER_EVENT_BRANCH >> 24)
+#define PER_CODE_IFETCH		(PER_EVENT_IFETCH >> 24)
+#define PER_CODE_STORE		(PER_EVENT_STORE >> 24)
+#define PER_CODE_STORE_REAL	(PER_EVENT_STORE_REAL >> 24)
+
+#define per_bp_event(code) \
+			(code & (PER_CODE_IFETCH | PER_CODE_BRANCH))
+#define per_write_wp_event(code) \
+			(code & (PER_CODE_STORE | PER_CODE_STORE_REAL))
+
+static int debug_exit_required(struct kvm_vcpu *vcpu, u8 perc,
+			       unsigned long peraddr)
+{
+	struct kvm_debug_exit_arch *debug_exit = &vcpu->run->debug.arch;
+	struct kvm_hw_wp_info_arch *wp_info = NULL;
+	struct kvm_hw_bp_info_arch *bp_info = NULL;
+	unsigned long addr = vcpu->arch.sie_block->gpsw.addr;
+
+	if (guestdbg_hw_bp_enabled(vcpu)) {
+		if (per_write_wp_event(perc) &&
+		    vcpu->arch.guestdbg.nr_hw_wp > 0) {
+			wp_info = any_wp_changed(vcpu);
+			if (wp_info) {
+				debug_exit->addr = wp_info->addr;
+				debug_exit->type = KVM_HW_WP_WRITE;
+				goto exit_required;
+			}
+		}
+		if (per_bp_event(perc) &&
+			 vcpu->arch.guestdbg.nr_hw_bp > 0) {
+			bp_info = find_hw_bp(vcpu, addr);
+			/* remove duplicate events if PC==PER address */
+			if (bp_info && (addr != peraddr)) {
+				debug_exit->addr = addr;
+				debug_exit->type = KVM_HW_BP;
+				vcpu->arch.guestdbg.last_bp = addr;
+				goto exit_required;
+			}
+			/* breakpoint missed */
+			bp_info = find_hw_bp(vcpu, peraddr);
+			if (bp_info && vcpu->arch.guestdbg.last_bp != peraddr) {
+				debug_exit->addr = peraddr;
+				debug_exit->type = KVM_HW_BP;
+				goto exit_required;
+			}
+		}
+	}
+	if (guestdbg_sstep_enabled(vcpu) && per_bp_event(perc)) {
+		debug_exit->addr = addr;
+		debug_exit->type = KVM_SINGLESTEP;
+		goto exit_required;
+	}
+
+	return 0;
+exit_required:
+	return 1;
+}
+
+static int per_fetched_addr(struct kvm_vcpu *vcpu, unsigned long *addr)
+{
+	u8 exec_ilen = 0;
+	u16 opcode[3];
+	int rc;
+
+	if (vcpu->arch.sie_block->icptcode == ICPT_PROGI) {
+		/* PER address references the fetched or the execute instr */
+		*addr = vcpu->arch.sie_block->peraddr;
+		/*
+		 * Manually detect if we have an EXECUTE instruction. As
+		 * instructions are always 2 byte aligned we can read the
+		 * first two bytes unconditionally
+		 */
+		rc = read_guest_instr(vcpu, *addr, &opcode, 2);
+		if (rc)
+			return rc;
+		if (opcode[0] >> 8 == 0x44)
+			exec_ilen = 4;
+		if ((opcode[0] & 0xff0f) == 0xc600)
+			exec_ilen = 6;
+	} else {
+		/* instr was suppressed, calculate the responsible instr */
+		*addr = __rewind_psw(vcpu->arch.sie_block->gpsw,
+				     kvm_s390_get_ilen(vcpu));
+		if (vcpu->arch.sie_block->icptstatus & 0x01) {
+			exec_ilen = (vcpu->arch.sie_block->icptstatus & 0x60) >> 4;
+			if (!exec_ilen)
+				exec_ilen = 4;
+		}
+	}
+
+	if (exec_ilen) {
+		/* read the complete EXECUTE instr to detect the fetched addr */
+		rc = read_guest_instr(vcpu, *addr, &opcode, exec_ilen);
+		if (rc)
+			return rc;
+		if (exec_ilen == 6) {
+			/* EXECUTE RELATIVE LONG - RIL-b format */
+			s32 rl = *((s32 *) (opcode + 1));
+
+			/* rl is a _signed_ 32 bit value specifying halfwords */
+			*addr += (u64)(s64) rl * 2;
+		} else {
+			/* EXECUTE - RX-a format */
+			u32 base = (opcode[1] & 0xf000) >> 12;
+			u32 disp = opcode[1] & 0x0fff;
+			u32 index = opcode[0] & 0x000f;
+
+			*addr = base ? vcpu->run->s.regs.gprs[base] : 0;
+			*addr += index ? vcpu->run->s.regs.gprs[index] : 0;
+			*addr += disp;
+		}
+		*addr = kvm_s390_logical_to_effective(vcpu, *addr);
+	}
+	return 0;
+}
+
+#define guest_per_enabled(vcpu) \
+			     (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER)
+
+int kvm_s390_handle_per_ifetch_icpt(struct kvm_vcpu *vcpu)
+{
+	const u64 cr10 = vcpu->arch.sie_block->gcr[10];
+	const u64 cr11 = vcpu->arch.sie_block->gcr[11];
+	const u8 ilen = kvm_s390_get_ilen(vcpu);
+	struct kvm_s390_pgm_info pgm_info = {
+		.code = PGM_PER,
+		.per_code = PER_CODE_IFETCH,
+		.per_address = __rewind_psw(vcpu->arch.sie_block->gpsw, ilen),
+	};
+	unsigned long fetched_addr;
+	int rc;
+
+	/*
+	 * The PSW points to the next instruction, therefore the intercepted
+	 * instruction generated a PER i-fetch event. PER address therefore
+	 * points at the previous PSW address (could be an EXECUTE function).
+	 */
+	if (!guestdbg_enabled(vcpu))
+		return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
+
+	if (debug_exit_required(vcpu, pgm_info.per_code, pgm_info.per_address))
+		vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING;
+
+	if (!guest_per_enabled(vcpu) ||
+	    !(vcpu->arch.sie_block->gcr[9] & PER_EVENT_IFETCH))
+		return 0;
+
+	rc = per_fetched_addr(vcpu, &fetched_addr);
+	if (rc < 0)
+		return rc;
+	if (rc)
+		/* instruction-fetching exceptions */
+		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+	if (in_addr_range(fetched_addr, cr10, cr11))
+		return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
+	return 0;
+}
+
+static int filter_guest_per_event(struct kvm_vcpu *vcpu)
+{
+	const u8 perc = vcpu->arch.sie_block->perc;
+	u64 addr = vcpu->arch.sie_block->gpsw.addr;
+	u64 cr9 = vcpu->arch.sie_block->gcr[9];
+	u64 cr10 = vcpu->arch.sie_block->gcr[10];
+	u64 cr11 = vcpu->arch.sie_block->gcr[11];
+	/* filter all events, demanded by the guest */
+	u8 guest_perc = perc & (cr9 >> 24) & PER_CODE_MASK;
+	unsigned long fetched_addr;
+	int rc;
+
+	if (!guest_per_enabled(vcpu))
+		guest_perc = 0;
+
+	/* filter "successful-branching" events */
+	if (guest_perc & PER_CODE_BRANCH &&
+	    cr9 & PER_CONTROL_BRANCH_ADDRESS &&
+	    !in_addr_range(addr, cr10, cr11))
+		guest_perc &= ~PER_CODE_BRANCH;
+
+	/* filter "instruction-fetching" events */
+	if (guest_perc & PER_CODE_IFETCH) {
+		rc = per_fetched_addr(vcpu, &fetched_addr);
+		if (rc < 0)
+			return rc;
+		/*
+		 * Don't inject an irq on exceptions. This would make handling
+		 * on icpt code 8 very complex (as PSW was already rewound).
+		 */
+		if (rc || !in_addr_range(fetched_addr, cr10, cr11))
+			guest_perc &= ~PER_CODE_IFETCH;
+	}
+
+	/* All other PER events will be given to the guest */
+	/* TODO: Check altered address/address space */
+
+	vcpu->arch.sie_block->perc = guest_perc;
+
+	if (!guest_perc)
+		vcpu->arch.sie_block->iprcc &= ~PGM_PER;
+	return 0;
+}
+
+#define pssec(vcpu) (vcpu->arch.sie_block->gcr[1] & _ASCE_SPACE_SWITCH)
+#define hssec(vcpu) (vcpu->arch.sie_block->gcr[13] & _ASCE_SPACE_SWITCH)
+#define old_ssec(vcpu) ((vcpu->arch.sie_block->tecmc >> 31) & 0x1)
+#define old_as_is_home(vcpu) !(vcpu->arch.sie_block->tecmc & 0xffff)
+
+int kvm_s390_handle_per_event(struct kvm_vcpu *vcpu)
+{
+	int rc, new_as;
+
+	if (debug_exit_required(vcpu, vcpu->arch.sie_block->perc,
+				vcpu->arch.sie_block->peraddr))
+		vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING;
+
+	rc = filter_guest_per_event(vcpu);
+	if (rc)
+		return rc;
+
+	/*
+	 * Only RP, SAC, SACF, PT, PTI, PR, PC instructions can trigger
+	 * a space-switch event. PER events enforce space-switch events
+	 * for these instructions. So if no PER event for the guest is left,
+	 * we might have to filter the space-switch element out, too.
+	 */
+	if (vcpu->arch.sie_block->iprcc == PGM_SPACE_SWITCH) {
+		vcpu->arch.sie_block->iprcc = 0;
+		new_as = psw_bits(vcpu->arch.sie_block->gpsw).as;
+
+		/*
+		 * If the AS changed from / to home, we had RP, SAC or SACF
+		 * instruction. Check primary and home space-switch-event
+		 * controls. (theoretically home -> home produced no event)
+		 */
+		if (((new_as == PSW_BITS_AS_HOME) ^ old_as_is_home(vcpu)) &&
+		    (pssec(vcpu) || hssec(vcpu)))
+			vcpu->arch.sie_block->iprcc = PGM_SPACE_SWITCH;
+
+		/*
+		 * PT, PTI, PR, PC instruction operate on primary AS only. Check
+		 * if the primary-space-switch-event control was or got set.
+		 */
+		if (new_as == PSW_BITS_AS_PRIMARY && !old_as_is_home(vcpu) &&
+		    (pssec(vcpu) || old_ssec(vcpu)))
+			vcpu->arch.sie_block->iprcc = PGM_SPACE_SWITCH;
+	}
+	return 0;
+}
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
new file mode 100644
index 0000000000..b16352083f
--- /dev/null
+++ b/arch/s390/kvm/intercept.c
@@ -0,0 +1,667 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * in-kernel handling for sie intercepts
+ *
+ * Copyright IBM Corp. 2008, 2020
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ *               Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+
+#include <asm/asm-offsets.h>
+#include <asm/irq.h>
+#include <asm/sysinfo.h>
+#include <asm/uv.h>
+
+#include "kvm-s390.h"
+#include "gaccess.h"
+#include "trace.h"
+#include "trace-s390.h"
+
+u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
+	u8 ilen = 0;
+
+	switch (vcpu->arch.sie_block->icptcode) {
+	case ICPT_INST:
+	case ICPT_INSTPROGI:
+	case ICPT_OPEREXC:
+	case ICPT_PARTEXEC:
+	case ICPT_IOINST:
+		/* instruction only stored for these icptcodes */
+		ilen = insn_length(vcpu->arch.sie_block->ipa >> 8);
+		/* Use the length of the EXECUTE instruction if necessary */
+		if (sie_block->icptstatus & 1) {
+			ilen = (sie_block->icptstatus >> 4) & 0x6;
+			if (!ilen)
+				ilen = 4;
+		}
+		break;
+	case ICPT_PROGI:
+		/* bit 1+2 of pgmilc are the ilc, so we directly get ilen */
+		ilen = vcpu->arch.sie_block->pgmilc & 0x6;
+		break;
+	}
+	return ilen;
+}
+
+static int handle_stop(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	int rc = 0;
+	uint8_t flags, stop_pending;
+
+	vcpu->stat.exit_stop_request++;
+
+	/* delay the stop if any non-stop irq is pending */
+	if (kvm_s390_vcpu_has_irq(vcpu, 1))
+		return 0;
+
+	/* avoid races with the injection/SIGP STOP code */
+	spin_lock(&li->lock);
+	flags = li->irq.stop.flags;
+	stop_pending = kvm_s390_is_stop_irq_pending(vcpu);
+	spin_unlock(&li->lock);
+
+	trace_kvm_s390_stop_request(stop_pending, flags);
+	if (!stop_pending)
+		return 0;
+
+	if (flags & KVM_S390_STOP_FLAG_STORE_STATUS) {
+		rc = kvm_s390_vcpu_store_status(vcpu,
+						KVM_S390_STORE_STATUS_NOADDR);
+		if (rc)
+			return rc;
+	}
+
+	/*
+	 * no need to check the return value of vcpu_stop as it can only have
+	 * an error for protvirt, but protvirt means user cpu state
+	 */
+	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
+		kvm_s390_vcpu_stop(vcpu);
+	return -EOPNOTSUPP;
+}
+
+static int handle_validity(struct kvm_vcpu *vcpu)
+{
+	int viwhy = vcpu->arch.sie_block->ipb >> 16;
+
+	vcpu->stat.exit_validity++;
+	trace_kvm_s390_intercept_validity(vcpu, viwhy);
+	KVM_EVENT(3, "validity intercept 0x%x for pid %u (kvm 0x%pK)", viwhy,
+		  current->pid, vcpu->kvm);
+
+	/* do not warn on invalid runtime instrumentation mode */
+	WARN_ONCE(viwhy != 0x44, "kvm: unhandled validity intercept 0x%x\n",
+		  viwhy);
+	return -EINVAL;
+}
+
+static int handle_instruction(struct kvm_vcpu *vcpu)
+{
+	vcpu->stat.exit_instruction++;
+	trace_kvm_s390_intercept_instruction(vcpu,
+					     vcpu->arch.sie_block->ipa,
+					     vcpu->arch.sie_block->ipb);
+
+	switch (vcpu->arch.sie_block->ipa >> 8) {
+	case 0x01:
+		return kvm_s390_handle_01(vcpu);
+	case 0x82:
+		return kvm_s390_handle_lpsw(vcpu);
+	case 0x83:
+		return kvm_s390_handle_diag(vcpu);
+	case 0xaa:
+		return kvm_s390_handle_aa(vcpu);
+	case 0xae:
+		return kvm_s390_handle_sigp(vcpu);
+	case 0xb2:
+		return kvm_s390_handle_b2(vcpu);
+	case 0xb6:
+		return kvm_s390_handle_stctl(vcpu);
+	case 0xb7:
+		return kvm_s390_handle_lctl(vcpu);
+	case 0xb9:
+		return kvm_s390_handle_b9(vcpu);
+	case 0xe3:
+		return kvm_s390_handle_e3(vcpu);
+	case 0xe5:
+		return kvm_s390_handle_e5(vcpu);
+	case 0xeb:
+		return kvm_s390_handle_eb(vcpu);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int inject_prog_on_prog_intercept(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_pgm_info pgm_info = {
+		.code = vcpu->arch.sie_block->iprcc,
+		/* the PSW has already been rewound */
+		.flags = KVM_S390_PGM_FLAGS_NO_REWIND,
+	};
+
+	switch (vcpu->arch.sie_block->iprcc & ~PGM_PER) {
+	case PGM_AFX_TRANSLATION:
+	case PGM_ASX_TRANSLATION:
+	case PGM_EX_TRANSLATION:
+	case PGM_LFX_TRANSLATION:
+	case PGM_LSTE_SEQUENCE:
+	case PGM_LSX_TRANSLATION:
+	case PGM_LX_TRANSLATION:
+	case PGM_PRIMARY_AUTHORITY:
+	case PGM_SECONDARY_AUTHORITY:
+	case PGM_SPACE_SWITCH:
+		pgm_info.trans_exc_code = vcpu->arch.sie_block->tecmc;
+		break;
+	case PGM_ALEN_TRANSLATION:
+	case PGM_ALE_SEQUENCE:
+	case PGM_ASTE_INSTANCE:
+	case PGM_ASTE_SEQUENCE:
+	case PGM_ASTE_VALIDITY:
+	case PGM_EXTENDED_AUTHORITY:
+		pgm_info.exc_access_id = vcpu->arch.sie_block->eai;
+		break;
+	case PGM_ASCE_TYPE:
+	case PGM_PAGE_TRANSLATION:
+	case PGM_REGION_FIRST_TRANS:
+	case PGM_REGION_SECOND_TRANS:
+	case PGM_REGION_THIRD_TRANS:
+	case PGM_SEGMENT_TRANSLATION:
+		pgm_info.trans_exc_code = vcpu->arch.sie_block->tecmc;
+		pgm_info.exc_access_id  = vcpu->arch.sie_block->eai;
+		pgm_info.op_access_id  = vcpu->arch.sie_block->oai;
+		break;
+	case PGM_MONITOR:
+		pgm_info.mon_class_nr = vcpu->arch.sie_block->mcn;
+		pgm_info.mon_code = vcpu->arch.sie_block->tecmc;
+		break;
+	case PGM_VECTOR_PROCESSING:
+	case PGM_DATA:
+		pgm_info.data_exc_code = vcpu->arch.sie_block->dxc;
+		break;
+	case PGM_PROTECTION:
+		pgm_info.trans_exc_code = vcpu->arch.sie_block->tecmc;
+		pgm_info.exc_access_id  = vcpu->arch.sie_block->eai;
+		break;
+	default:
+		break;
+	}
+
+	if (vcpu->arch.sie_block->iprcc & PGM_PER) {
+		pgm_info.per_code = vcpu->arch.sie_block->perc;
+		pgm_info.per_atmid = vcpu->arch.sie_block->peratmid;
+		pgm_info.per_address = vcpu->arch.sie_block->peraddr;
+		pgm_info.per_access_id = vcpu->arch.sie_block->peraid;
+	}
+	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
+}
+
+/*
+ * restore ITDB to program-interruption TDB in guest lowcore
+ * and set TX abort indication if required
+*/
+static int handle_itdb(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_itdb *itdb;
+	int rc;
+
+	if (!IS_TE_ENABLED(vcpu) || !IS_ITDB_VALID(vcpu))
+		return 0;
+	if (current->thread.per_flags & PER_FLAG_NO_TE)
+		return 0;
+	itdb = phys_to_virt(vcpu->arch.sie_block->itdba);
+	rc = write_guest_lc(vcpu, __LC_PGM_TDB, itdb, sizeof(*itdb));
+	if (rc)
+		return rc;
+	memset(itdb, 0, sizeof(*itdb));
+
+	return 0;
+}
+
+#define per_event(vcpu) (vcpu->arch.sie_block->iprcc & PGM_PER)
+
+static bool should_handle_per_event(const struct kvm_vcpu *vcpu)
+{
+	if (!guestdbg_enabled(vcpu) || !per_event(vcpu))
+		return false;
+	if (guestdbg_sstep_enabled(vcpu) &&
+	    vcpu->arch.sie_block->iprcc != PGM_PER) {
+		/*
+		 * __vcpu_run() will exit after delivering the concurrently
+		 * indicated condition.
+		 */
+		return false;
+	}
+	return true;
+}
+
+static int handle_prog(struct kvm_vcpu *vcpu)
+{
+	psw_t psw;
+	int rc;
+
+	vcpu->stat.exit_program_interruption++;
+
+	/*
+	 * Intercept 8 indicates a loop of specification exceptions
+	 * for protected guests.
+	 */
+	if (kvm_s390_pv_cpu_is_protected(vcpu))
+		return -EOPNOTSUPP;
+
+	if (should_handle_per_event(vcpu)) {
+		rc = kvm_s390_handle_per_event(vcpu);
+		if (rc)
+			return rc;
+		/* the interrupt might have been filtered out completely */
+		if (vcpu->arch.sie_block->iprcc == 0)
+			return 0;
+	}
+
+	trace_kvm_s390_intercept_prog(vcpu, vcpu->arch.sie_block->iprcc);
+	if (vcpu->arch.sie_block->iprcc == PGM_SPECIFICATION) {
+		rc = read_guest_lc(vcpu, __LC_PGM_NEW_PSW, &psw, sizeof(psw_t));
+		if (rc)
+			return rc;
+		/* Avoid endless loops of specification exceptions */
+		if (!is_valid_psw(&psw))
+			return -EOPNOTSUPP;
+	}
+	rc = handle_itdb(vcpu);
+	if (rc)
+		return rc;
+
+	return inject_prog_on_prog_intercept(vcpu);
+}
+
+/**
+ * handle_external_interrupt - used for external interruption interceptions
+ * @vcpu: virtual cpu
+ *
+ * This interception occurs if:
+ * - the CPUSTAT_EXT_INT bit was already set when the external interrupt
+ *   occurred. In this case, the interrupt needs to be injected manually to
+ *   preserve interrupt priority.
+ * - the external new PSW has external interrupts enabled, which will cause an
+ *   interruption loop. We drop to userspace in this case.
+ *
+ * The latter case can be detected by inspecting the external mask bit in the
+ * external new psw.
+ *
+ * Under PV, only the latter case can occur, since interrupt priorities are
+ * handled in the ultravisor.
+ */
+static int handle_external_interrupt(struct kvm_vcpu *vcpu)
+{
+	u16 eic = vcpu->arch.sie_block->eic;
+	struct kvm_s390_irq irq;
+	psw_t newpsw;
+	int rc;
+
+	vcpu->stat.exit_external_interrupt++;
+
+	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+		newpsw = vcpu->arch.sie_block->gpsw;
+	} else {
+		rc = read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &newpsw, sizeof(psw_t));
+		if (rc)
+			return rc;
+	}
+
+	/*
+	 * Clock comparator or timer interrupt with external interrupt enabled
+	 * will cause interrupt loop. Drop to userspace.
+	 */
+	if ((eic == EXT_IRQ_CLK_COMP || eic == EXT_IRQ_CPU_TIMER) &&
+	    (newpsw.mask & PSW_MASK_EXT))
+		return -EOPNOTSUPP;
+
+	switch (eic) {
+	case EXT_IRQ_CLK_COMP:
+		irq.type = KVM_S390_INT_CLOCK_COMP;
+		break;
+	case EXT_IRQ_CPU_TIMER:
+		irq.type = KVM_S390_INT_CPU_TIMER;
+		break;
+	case EXT_IRQ_EXTERNAL_CALL:
+		irq.type = KVM_S390_INT_EXTERNAL_CALL;
+		irq.u.extcall.code = vcpu->arch.sie_block->extcpuaddr;
+		rc = kvm_s390_inject_vcpu(vcpu, &irq);
+		/* ignore if another external call is already pending */
+		if (rc == -EBUSY)
+			return 0;
+		return rc;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return kvm_s390_inject_vcpu(vcpu, &irq);
+}
+
+/**
+ * handle_mvpg_pei - Handle MOVE PAGE partial execution interception.
+ * @vcpu: virtual cpu
+ *
+ * This interception can only happen for guests with DAT disabled and
+ * addresses that are currently not mapped in the host. Thus we try to
+ * set up the mappings for the corresponding user pages here (or throw
+ * addressing exceptions in case of illegal guest addresses).
+ */
+static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
+{
+	unsigned long srcaddr, dstaddr;
+	int reg1, reg2, rc;
+
+	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+
+	/* Ensure that the source is paged-in, no actual access -> no key checking */
+	rc = guest_translate_address_with_key(vcpu, vcpu->run->s.regs.gprs[reg2],
+					      reg2, &srcaddr, GACC_FETCH, 0);
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
+	rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0);
+	if (rc != 0)
+		return rc;
+
+	/* Ensure that the source is paged-in, no actual access -> no key checking */
+	rc = guest_translate_address_with_key(vcpu, vcpu->run->s.regs.gprs[reg1],
+					      reg1, &dstaddr, GACC_STORE, 0);
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
+	rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1);
+	if (rc != 0)
+		return rc;
+
+	kvm_s390_retry_instr(vcpu);
+
+	return 0;
+}
+
+static int handle_partial_execution(struct kvm_vcpu *vcpu)
+{
+	vcpu->stat.exit_pei++;
+
+	if (vcpu->arch.sie_block->ipa == 0xb254)	/* MVPG */
+		return handle_mvpg_pei(vcpu);
+	if (vcpu->arch.sie_block->ipa >> 8 == 0xae)	/* SIGP */
+		return kvm_s390_handle_sigp_pei(vcpu);
+
+	return -EOPNOTSUPP;
+}
+
+/*
+ * Handle the sthyi instruction that provides the guest with system
+ * information, like current CPU resources available at each level of
+ * the machine.
+ */
+int handle_sthyi(struct kvm_vcpu *vcpu)
+{
+	int reg1, reg2, cc = 0, r = 0;
+	u64 code, addr, rc = 0;
+	struct sthyi_sctns *sctns = NULL;
+
+	if (!test_kvm_facility(vcpu->kvm, 74))
+		return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
+
+	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+	code = vcpu->run->s.regs.gprs[reg1];
+	addr = vcpu->run->s.regs.gprs[reg2];
+
+	vcpu->stat.instruction_sthyi++;
+	VCPU_EVENT(vcpu, 3, "STHYI: fc: %llu addr: 0x%016llx", code, addr);
+	trace_kvm_s390_handle_sthyi(vcpu, code, addr);
+
+	if (reg1 == reg2 || reg1 & 1 || reg2 & 1)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	if (code & 0xffff) {
+		cc = 3;
+		rc = 4;
+		goto out;
+	}
+
+	if (!kvm_s390_pv_cpu_is_protected(vcpu) && (addr & ~PAGE_MASK))
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	sctns = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
+	if (!sctns)
+		return -ENOMEM;
+
+	cc = sthyi_fill(sctns, &rc);
+	if (cc < 0) {
+		free_page((unsigned long)sctns);
+		return cc;
+	}
+out:
+	if (!cc) {
+		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+			memcpy(sida_addr(vcpu->arch.sie_block), sctns, PAGE_SIZE);
+		} else {
+			r = write_guest(vcpu, addr, reg2, sctns, PAGE_SIZE);
+			if (r) {
+				free_page((unsigned long)sctns);
+				return kvm_s390_inject_prog_cond(vcpu, r);
+			}
+		}
+	}
+
+	free_page((unsigned long)sctns);
+	vcpu->run->s.regs.gprs[reg2 + 1] = rc;
+	kvm_s390_set_psw_cc(vcpu, cc);
+	return r;
+}
+
+static int handle_operexc(struct kvm_vcpu *vcpu)
+{
+	psw_t oldpsw, newpsw;
+	int rc;
+
+	vcpu->stat.exit_operation_exception++;
+	trace_kvm_s390_handle_operexc(vcpu, vcpu->arch.sie_block->ipa,
+				      vcpu->arch.sie_block->ipb);
+
+	if (vcpu->arch.sie_block->ipa == 0xb256)
+		return handle_sthyi(vcpu);
+
+	if (vcpu->arch.sie_block->ipa == 0 && vcpu->kvm->arch.user_instr0)
+		return -EOPNOTSUPP;
+	rc = read_guest_lc(vcpu, __LC_PGM_NEW_PSW, &newpsw, sizeof(psw_t));
+	if (rc)
+		return rc;
+	/*
+	 * Avoid endless loops of operation exceptions, if the pgm new
+	 * PSW will cause a new operation exception.
+	 * The heuristic checks if the pgm new psw is within 6 bytes before
+	 * the faulting psw address (with same DAT, AS settings) and the
+	 * new psw is not a wait psw and the fault was not triggered by
+	 * problem state.
+	 */
+	oldpsw = vcpu->arch.sie_block->gpsw;
+	if (oldpsw.addr - newpsw.addr <= 6 &&
+	    !(newpsw.mask & PSW_MASK_WAIT) &&
+	    !(oldpsw.mask & PSW_MASK_PSTATE) &&
+	    (newpsw.mask & PSW_MASK_ASC) == (oldpsw.mask & PSW_MASK_ASC) &&
+	    (newpsw.mask & PSW_MASK_DAT) == (oldpsw.mask & PSW_MASK_DAT))
+		return -EOPNOTSUPP;
+
+	return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
+}
+
+static int handle_pv_spx(struct kvm_vcpu *vcpu)
+{
+	u32 pref = *(u32 *)sida_addr(vcpu->arch.sie_block);
+
+	kvm_s390_set_prefix(vcpu, pref);
+	trace_kvm_s390_handle_prefix(vcpu, 1, pref);
+	return 0;
+}
+
+static int handle_pv_sclp(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+
+	spin_lock(&fi->lock);
+	/*
+	 * 2 cases:
+	 * a: an sccb answering interrupt was already pending or in flight.
+	 *    As the sccb value is not known we can simply set some value to
+	 *    trigger delivery of a saved SCCB. UV will then use its saved
+	 *    copy of the SCCB value.
+	 * b: an error SCCB interrupt needs to be injected so we also inject
+	 *    a fake SCCB address. Firmware will use the proper one.
+	 * This makes sure, that both errors and real sccb returns will only
+	 * be delivered after a notification intercept (instruction has
+	 * finished) but not after others.
+	 */
+	fi->srv_signal.ext_params |= 0x43000;
+	set_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs);
+	clear_bit(IRQ_PEND_EXT_SERVICE, &fi->masked_irqs);
+	spin_unlock(&fi->lock);
+	return 0;
+}
+
+static int handle_pv_uvc(struct kvm_vcpu *vcpu)
+{
+	struct uv_cb_share *guest_uvcb = sida_addr(vcpu->arch.sie_block);
+	struct uv_cb_cts uvcb = {
+		.header.cmd	= UVC_CMD_UNPIN_PAGE_SHARED,
+		.header.len	= sizeof(uvcb),
+		.guest_handle	= kvm_s390_pv_get_handle(vcpu->kvm),
+		.gaddr		= guest_uvcb->paddr,
+	};
+	int rc;
+
+	if (guest_uvcb->header.cmd != UVC_CMD_REMOVE_SHARED_ACCESS) {
+		WARN_ONCE(1, "Unexpected notification intercept for UVC 0x%x\n",
+			  guest_uvcb->header.cmd);
+		return 0;
+	}
+	rc = gmap_make_secure(vcpu->arch.gmap, uvcb.gaddr, &uvcb);
+	/*
+	 * If the unpin did not succeed, the guest will exit again for the UVC
+	 * and we will retry the unpin.
+	 */
+	if (rc == -EINVAL)
+		return 0;
+	/*
+	 * If we got -EAGAIN here, we simply return it. It will eventually
+	 * get propagated all the way to userspace, which should then try
+	 * again.
+	 */
+	return rc;
+}
+
+static int handle_pv_notification(struct kvm_vcpu *vcpu)
+{
+	int ret;
+
+	if (vcpu->arch.sie_block->ipa == 0xb210)
+		return handle_pv_spx(vcpu);
+	if (vcpu->arch.sie_block->ipa == 0xb220)
+		return handle_pv_sclp(vcpu);
+	if (vcpu->arch.sie_block->ipa == 0xb9a4)
+		return handle_pv_uvc(vcpu);
+	if (vcpu->arch.sie_block->ipa >> 8 == 0xae) {
+		/*
+		 * Besides external call, other SIGP orders also cause a
+		 * 108 (pv notify) intercept. In contrast to external call,
+		 * these orders need to be emulated and hence the appropriate
+		 * place to handle them is in handle_instruction().
+		 * So first try kvm_s390_handle_sigp_pei() and if that isn't
+		 * successful, go on with handle_instruction().
+		 */
+		ret = kvm_s390_handle_sigp_pei(vcpu);
+		if (!ret)
+			return ret;
+	}
+
+	return handle_instruction(vcpu);
+}
+
+static bool should_handle_per_ifetch(const struct kvm_vcpu *vcpu, int rc)
+{
+	/* Process PER, also if the instruction is processed in user space. */
+	if (!(vcpu->arch.sie_block->icptstatus & 0x02))
+		return false;
+	if (rc != 0 && rc != -EOPNOTSUPP)
+		return false;
+	if (guestdbg_sstep_enabled(vcpu) && vcpu->arch.local_int.pending_irqs)
+		/* __vcpu_run() will exit after delivering the interrupt. */
+		return false;
+	return true;
+}
+
+int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
+{
+	int rc, per_rc = 0;
+
+	if (kvm_is_ucontrol(vcpu->kvm))
+		return -EOPNOTSUPP;
+
+	switch (vcpu->arch.sie_block->icptcode) {
+	case ICPT_EXTREQ:
+		vcpu->stat.exit_external_request++;
+		return 0;
+	case ICPT_IOREQ:
+		vcpu->stat.exit_io_request++;
+		return 0;
+	case ICPT_INST:
+		rc = handle_instruction(vcpu);
+		break;
+	case ICPT_PROGI:
+		return handle_prog(vcpu);
+	case ICPT_EXTINT:
+		return handle_external_interrupt(vcpu);
+	case ICPT_WAIT:
+		return kvm_s390_handle_wait(vcpu);
+	case ICPT_VALIDITY:
+		return handle_validity(vcpu);
+	case ICPT_STOP:
+		return handle_stop(vcpu);
+	case ICPT_OPEREXC:
+		rc = handle_operexc(vcpu);
+		break;
+	case ICPT_PARTEXEC:
+		rc = handle_partial_execution(vcpu);
+		break;
+	case ICPT_KSS:
+		/* Instruction will be redriven, skip the PER check. */
+		return kvm_s390_skey_check_enable(vcpu);
+	case ICPT_MCHKREQ:
+	case ICPT_INT_ENABLE:
+		/*
+		 * PSW bit 13 or a CR (0, 6, 14) changed and we might
+		 * now be able to deliver interrupts. The pre-run code
+		 * will take care of this.
+		 */
+		rc = 0;
+		break;
+	case ICPT_PV_INSTR:
+		rc = handle_instruction(vcpu);
+		break;
+	case ICPT_PV_NOTIFY:
+		rc = handle_pv_notification(vcpu);
+		break;
+	case ICPT_PV_PREF:
+		rc = 0;
+		gmap_convert_to_secure(vcpu->arch.gmap,
+				       kvm_s390_get_prefix(vcpu));
+		gmap_convert_to_secure(vcpu->arch.gmap,
+				       kvm_s390_get_prefix(vcpu) + PAGE_SIZE);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	if (should_handle_per_ifetch(vcpu, rc))
+		per_rc = kvm_s390_handle_per_ifetch_icpt(vcpu);
+	return per_rc ? per_rc : rc;
+}
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
new file mode 100644
index 0000000000..efaebba5ee
--- /dev/null
+++ b/arch/s390/kvm/interrupt.c
@@ -0,0 +1,3481 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * handling kvm guest interrupts
+ *
+ * Copyright IBM Corp. 2008, 2020
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ */
+
+#define KMSG_COMPONENT "kvm-s390"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/interrupt.h>
+#include <linux/kvm_host.h>
+#include <linux/hrtimer.h>
+#include <linux/mmu_context.h>
+#include <linux/nospec.h>
+#include <linux/signal.h>
+#include <linux/slab.h>
+#include <linux/bitmap.h>
+#include <linux/vmalloc.h>
+#include <asm/asm-offsets.h>
+#include <asm/dis.h>
+#include <linux/uaccess.h>
+#include <asm/sclp.h>
+#include <asm/isc.h>
+#include <asm/gmap.h>
+#include <asm/switch_to.h>
+#include <asm/nmi.h>
+#include <asm/airq.h>
+#include <asm/tpi.h>
+#include "kvm-s390.h"
+#include "gaccess.h"
+#include "trace-s390.h"
+#include "pci.h"
+
+#define PFAULT_INIT 0x0600
+#define PFAULT_DONE 0x0680
+#define VIRTIO_PARAM 0x0d00
+
+static struct kvm_s390_gib *gib;
+
+/* handle external calls via sigp interpretation facility */
+static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id)
+{
+	int c, scn;
+
+	if (!kvm_s390_test_cpuflags(vcpu, CPUSTAT_ECALL_PEND))
+		return 0;
+
+	BUG_ON(!kvm_s390_use_sca_entries());
+	read_lock(&vcpu->kvm->arch.sca_lock);
+	if (vcpu->kvm->arch.use_esca) {
+		struct esca_block *sca = vcpu->kvm->arch.sca;
+		union esca_sigp_ctrl sigp_ctrl =
+			sca->cpu[vcpu->vcpu_id].sigp_ctrl;
+
+		c = sigp_ctrl.c;
+		scn = sigp_ctrl.scn;
+	} else {
+		struct bsca_block *sca = vcpu->kvm->arch.sca;
+		union bsca_sigp_ctrl sigp_ctrl =
+			sca->cpu[vcpu->vcpu_id].sigp_ctrl;
+
+		c = sigp_ctrl.c;
+		scn = sigp_ctrl.scn;
+	}
+	read_unlock(&vcpu->kvm->arch.sca_lock);
+
+	if (src_id)
+		*src_id = scn;
+
+	return c;
+}
+
+static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id)
+{
+	int expect, rc;
+
+	BUG_ON(!kvm_s390_use_sca_entries());
+	read_lock(&vcpu->kvm->arch.sca_lock);
+	if (vcpu->kvm->arch.use_esca) {
+		struct esca_block *sca = vcpu->kvm->arch.sca;
+		union esca_sigp_ctrl *sigp_ctrl =
+			&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
+		union esca_sigp_ctrl new_val = {0}, old_val;
+
+		old_val = READ_ONCE(*sigp_ctrl);
+		new_val.scn = src_id;
+		new_val.c = 1;
+		old_val.c = 0;
+
+		expect = old_val.value;
+		rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value);
+	} else {
+		struct bsca_block *sca = vcpu->kvm->arch.sca;
+		union bsca_sigp_ctrl *sigp_ctrl =
+			&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
+		union bsca_sigp_ctrl new_val = {0}, old_val;
+
+		old_val = READ_ONCE(*sigp_ctrl);
+		new_val.scn = src_id;
+		new_val.c = 1;
+		old_val.c = 0;
+
+		expect = old_val.value;
+		rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value);
+	}
+	read_unlock(&vcpu->kvm->arch.sca_lock);
+
+	if (rc != expect) {
+		/* another external call is pending */
+		return -EBUSY;
+	}
+	kvm_s390_set_cpuflags(vcpu, CPUSTAT_ECALL_PEND);
+	return 0;
+}
+
+static void sca_clear_ext_call(struct kvm_vcpu *vcpu)
+{
+	int rc, expect;
+
+	if (!kvm_s390_use_sca_entries())
+		return;
+	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_ECALL_PEND);
+	read_lock(&vcpu->kvm->arch.sca_lock);
+	if (vcpu->kvm->arch.use_esca) {
+		struct esca_block *sca = vcpu->kvm->arch.sca;
+		union esca_sigp_ctrl *sigp_ctrl =
+			&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
+		union esca_sigp_ctrl old;
+
+		old = READ_ONCE(*sigp_ctrl);
+		expect = old.value;
+		rc = cmpxchg(&sigp_ctrl->value, old.value, 0);
+	} else {
+		struct bsca_block *sca = vcpu->kvm->arch.sca;
+		union bsca_sigp_ctrl *sigp_ctrl =
+			&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
+		union bsca_sigp_ctrl old;
+
+		old = READ_ONCE(*sigp_ctrl);
+		expect = old.value;
+		rc = cmpxchg(&sigp_ctrl->value, old.value, 0);
+	}
+	read_unlock(&vcpu->kvm->arch.sca_lock);
+	WARN_ON(rc != expect); /* cannot clear? */
+}
+
+int psw_extint_disabled(struct kvm_vcpu *vcpu)
+{
+	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT);
+}
+
+static int psw_ioint_disabled(struct kvm_vcpu *vcpu)
+{
+	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_IO);
+}
+
+static int psw_mchk_disabled(struct kvm_vcpu *vcpu)
+{
+	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_MCHECK);
+}
+
+static int psw_interrupts_disabled(struct kvm_vcpu *vcpu)
+{
+	return psw_extint_disabled(vcpu) &&
+	       psw_ioint_disabled(vcpu) &&
+	       psw_mchk_disabled(vcpu);
+}
+
+static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu)
+{
+	if (psw_extint_disabled(vcpu) ||
+	    !(vcpu->arch.sie_block->gcr[0] & CR0_CLOCK_COMPARATOR_SUBMASK))
+		return 0;
+	if (guestdbg_enabled(vcpu) && guestdbg_sstep_enabled(vcpu))
+		/* No timer interrupts when single stepping */
+		return 0;
+	return 1;
+}
+
+static int ckc_irq_pending(struct kvm_vcpu *vcpu)
+{
+	const u64 now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
+	const u64 ckc = vcpu->arch.sie_block->ckc;
+
+	if (vcpu->arch.sie_block->gcr[0] & CR0_CLOCK_COMPARATOR_SIGN) {
+		if ((s64)ckc >= (s64)now)
+			return 0;
+	} else if (ckc >= now) {
+		return 0;
+	}
+	return ckc_interrupts_enabled(vcpu);
+}
+
+static int cpu_timer_interrupts_enabled(struct kvm_vcpu *vcpu)
+{
+	return !psw_extint_disabled(vcpu) &&
+	       (vcpu->arch.sie_block->gcr[0] & CR0_CPU_TIMER_SUBMASK);
+}
+
+static int cpu_timer_irq_pending(struct kvm_vcpu *vcpu)
+{
+	if (!cpu_timer_interrupts_enabled(vcpu))
+		return 0;
+	return kvm_s390_get_cpu_timer(vcpu) >> 63;
+}
+
+static uint64_t isc_to_isc_bits(int isc)
+{
+	return (0x80 >> isc) << 24;
+}
+
+static inline u32 isc_to_int_word(u8 isc)
+{
+	return ((u32)isc << 27) | 0x80000000;
+}
+
+static inline u8 int_word_to_isc(u32 int_word)
+{
+	return (int_word & 0x38000000) >> 27;
+}
+
+/*
+ * To use atomic bitmap functions, we have to provide a bitmap address
+ * that is u64 aligned. However, the ipm might be u32 aligned.
+ * Therefore, we logically start the bitmap at the very beginning of the
+ * struct and fixup the bit number.
+ */
+#define IPM_BIT_OFFSET (offsetof(struct kvm_s390_gisa, ipm) * BITS_PER_BYTE)
+
+/**
+ * gisa_set_iam - change the GISA interruption alert mask
+ *
+ * @gisa: gisa to operate on
+ * @iam: new IAM value to use
+ *
+ * Change the IAM atomically with the next alert address and the IPM
+ * of the GISA if the GISA is not part of the GIB alert list. All three
+ * fields are located in the first long word of the GISA.
+ *
+ * Returns: 0 on success
+ *          -EBUSY in case the gisa is part of the alert list
+ */
+static inline int gisa_set_iam(struct kvm_s390_gisa *gisa, u8 iam)
+{
+	u64 word, _word;
+
+	do {
+		word = READ_ONCE(gisa->u64.word[0]);
+		if ((u64)gisa != word >> 32)
+			return -EBUSY;
+		_word = (word & ~0xffUL) | iam;
+	} while (cmpxchg(&gisa->u64.word[0], word, _word) != word);
+
+	return 0;
+}
+
+/**
+ * gisa_clear_ipm - clear the GISA interruption pending mask
+ *
+ * @gisa: gisa to operate on
+ *
+ * Clear the IPM atomically with the next alert address and the IAM
+ * of the GISA unconditionally. All three fields are located in the
+ * first long word of the GISA.
+ */
+static inline void gisa_clear_ipm(struct kvm_s390_gisa *gisa)
+{
+	u64 word, _word;
+
+	do {
+		word = READ_ONCE(gisa->u64.word[0]);
+		_word = word & ~(0xffUL << 24);
+	} while (cmpxchg(&gisa->u64.word[0], word, _word) != word);
+}
+
+/**
+ * gisa_get_ipm_or_restore_iam - return IPM or restore GISA IAM
+ *
+ * @gi: gisa interrupt struct to work on
+ *
+ * Atomically restores the interruption alert mask if none of the
+ * relevant ISCs are pending and return the IPM.
+ *
+ * Returns: the relevant pending ISCs
+ */
+static inline u8 gisa_get_ipm_or_restore_iam(struct kvm_s390_gisa_interrupt *gi)
+{
+	u8 pending_mask, alert_mask;
+	u64 word, _word;
+
+	do {
+		word = READ_ONCE(gi->origin->u64.word[0]);
+		alert_mask = READ_ONCE(gi->alert.mask);
+		pending_mask = (u8)(word >> 24) & alert_mask;
+		if (pending_mask)
+			return pending_mask;
+		_word = (word & ~0xffUL) | alert_mask;
+	} while (cmpxchg(&gi->origin->u64.word[0], word, _word) != word);
+
+	return 0;
+}
+
+static inline void gisa_set_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)
+{
+	set_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa);
+}
+
+static inline u8 gisa_get_ipm(struct kvm_s390_gisa *gisa)
+{
+	return READ_ONCE(gisa->ipm);
+}
+
+static inline int gisa_tac_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)
+{
+	return test_and_clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa);
+}
+
+static inline unsigned long pending_irqs_no_gisa(struct kvm_vcpu *vcpu)
+{
+	unsigned long pending = vcpu->kvm->arch.float_int.pending_irqs |
+				vcpu->arch.local_int.pending_irqs;
+
+	pending &= ~vcpu->kvm->arch.float_int.masked_irqs;
+	return pending;
+}
+
+static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_gisa_interrupt *gi = &vcpu->kvm->arch.gisa_int;
+	unsigned long pending_mask;
+
+	pending_mask = pending_irqs_no_gisa(vcpu);
+	if (gi->origin)
+		pending_mask |= gisa_get_ipm(gi->origin) << IRQ_PEND_IO_ISC_7;
+	return pending_mask;
+}
+
+static inline int isc_to_irq_type(unsigned long isc)
+{
+	return IRQ_PEND_IO_ISC_0 - isc;
+}
+
+static inline int irq_type_to_isc(unsigned long irq_type)
+{
+	return IRQ_PEND_IO_ISC_0 - irq_type;
+}
+
+static unsigned long disable_iscs(struct kvm_vcpu *vcpu,
+				   unsigned long active_mask)
+{
+	int i;
+
+	for (i = 0; i <= MAX_ISC; i++)
+		if (!(vcpu->arch.sie_block->gcr[6] & isc_to_isc_bits(i)))
+			active_mask &= ~(1UL << (isc_to_irq_type(i)));
+
+	return active_mask;
+}
+
+static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu)
+{
+	unsigned long active_mask;
+
+	active_mask = pending_irqs(vcpu);
+	if (!active_mask)
+		return 0;
+
+	if (psw_extint_disabled(vcpu))
+		active_mask &= ~IRQ_PEND_EXT_MASK;
+	if (psw_ioint_disabled(vcpu))
+		active_mask &= ~IRQ_PEND_IO_MASK;
+	else
+		active_mask = disable_iscs(vcpu, active_mask);
+	if (!(vcpu->arch.sie_block->gcr[0] & CR0_EXTERNAL_CALL_SUBMASK))
+		__clear_bit(IRQ_PEND_EXT_EXTERNAL, &active_mask);
+	if (!(vcpu->arch.sie_block->gcr[0] & CR0_EMERGENCY_SIGNAL_SUBMASK))
+		__clear_bit(IRQ_PEND_EXT_EMERGENCY, &active_mask);
+	if (!(vcpu->arch.sie_block->gcr[0] & CR0_CLOCK_COMPARATOR_SUBMASK))
+		__clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &active_mask);
+	if (!(vcpu->arch.sie_block->gcr[0] & CR0_CPU_TIMER_SUBMASK))
+		__clear_bit(IRQ_PEND_EXT_CPU_TIMER, &active_mask);
+	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK)) {
+		__clear_bit(IRQ_PEND_EXT_SERVICE, &active_mask);
+		__clear_bit(IRQ_PEND_EXT_SERVICE_EV, &active_mask);
+	}
+	if (psw_mchk_disabled(vcpu))
+		active_mask &= ~IRQ_PEND_MCHK_MASK;
+	/* PV guest cpus can have a single interruption injected at a time. */
+	if (kvm_s390_pv_cpu_get_handle(vcpu) &&
+	    vcpu->arch.sie_block->iictl != IICTL_CODE_NONE)
+		active_mask &= ~(IRQ_PEND_EXT_II_MASK |
+				 IRQ_PEND_IO_MASK |
+				 IRQ_PEND_MCHK_MASK);
+	/*
+	 * Check both floating and local interrupt's cr14 because
+	 * bit IRQ_PEND_MCHK_REP could be set in both cases.
+	 */
+	if (!(vcpu->arch.sie_block->gcr[14] &
+	   (vcpu->kvm->arch.float_int.mchk.cr14 |
+	   vcpu->arch.local_int.irq.mchk.cr14)))
+		__clear_bit(IRQ_PEND_MCHK_REP, &active_mask);
+
+	/*
+	 * STOP irqs will never be actively delivered. They are triggered via
+	 * intercept requests and cleared when the stop intercept is performed.
+	 */
+	__clear_bit(IRQ_PEND_SIGP_STOP, &active_mask);
+
+	return active_mask;
+}
+
+static void __set_cpu_idle(struct kvm_vcpu *vcpu)
+{
+	kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT);
+	set_bit(vcpu->vcpu_idx, vcpu->kvm->arch.idle_mask);
+}
+
+static void __unset_cpu_idle(struct kvm_vcpu *vcpu)
+{
+	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT);
+	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.idle_mask);
+}
+
+static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
+{
+	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IO_INT | CPUSTAT_EXT_INT |
+				      CPUSTAT_STOP_INT);
+	vcpu->arch.sie_block->lctl = 0x0000;
+	vcpu->arch.sie_block->ictl &= ~(ICTL_LPSW | ICTL_STCTL | ICTL_PINT);
+
+	if (guestdbg_enabled(vcpu)) {
+		vcpu->arch.sie_block->lctl |= (LCTL_CR0 | LCTL_CR9 |
+					       LCTL_CR10 | LCTL_CR11);
+		vcpu->arch.sie_block->ictl |= (ICTL_STCTL | ICTL_PINT);
+	}
+}
+
+static void set_intercept_indicators_io(struct kvm_vcpu *vcpu)
+{
+	if (!(pending_irqs_no_gisa(vcpu) & IRQ_PEND_IO_MASK))
+		return;
+	if (psw_ioint_disabled(vcpu))
+		kvm_s390_set_cpuflags(vcpu, CPUSTAT_IO_INT);
+	else
+		vcpu->arch.sie_block->lctl |= LCTL_CR6;
+}
+
+static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu)
+{
+	if (!(pending_irqs_no_gisa(vcpu) & IRQ_PEND_EXT_MASK))
+		return;
+	if (psw_extint_disabled(vcpu))
+		kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT);
+	else
+		vcpu->arch.sie_block->lctl |= LCTL_CR0;
+}
+
+static void set_intercept_indicators_mchk(struct kvm_vcpu *vcpu)
+{
+	if (!(pending_irqs_no_gisa(vcpu) & IRQ_PEND_MCHK_MASK))
+		return;
+	if (psw_mchk_disabled(vcpu))
+		vcpu->arch.sie_block->ictl |= ICTL_LPSW;
+	else
+		vcpu->arch.sie_block->lctl |= LCTL_CR14;
+}
+
+static void set_intercept_indicators_stop(struct kvm_vcpu *vcpu)
+{
+	if (kvm_s390_is_stop_irq_pending(vcpu))
+		kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
+}
+
+/* Set interception request for non-deliverable interrupts */
+static void set_intercept_indicators(struct kvm_vcpu *vcpu)
+{
+	set_intercept_indicators_io(vcpu);
+	set_intercept_indicators_ext(vcpu);
+	set_intercept_indicators_mchk(vcpu);
+	set_intercept_indicators_stop(vcpu);
+}
+
+static int __must_check __deliver_cpu_timer(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	int rc = 0;
+
+	vcpu->stat.deliver_cputm++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER,
+					 0, 0);
+	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+		vcpu->arch.sie_block->iictl = IICTL_CODE_EXT;
+		vcpu->arch.sie_block->eic = EXT_IRQ_CPU_TIMER;
+	} else {
+		rc  = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER,
+				   (u16 *)__LC_EXT_INT_CODE);
+		rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+				     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	}
+	clear_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_ckc(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	int rc = 0;
+
+	vcpu->stat.deliver_ckc++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP,
+					 0, 0);
+	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+		vcpu->arch.sie_block->iictl = IICTL_CODE_EXT;
+		vcpu->arch.sie_block->eic = EXT_IRQ_CLK_COMP;
+	} else {
+		rc  = put_guest_lc(vcpu, EXT_IRQ_CLK_COMP,
+				   (u16 __user *)__LC_EXT_INT_CODE);
+		rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+				     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	}
+	clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_pfault_init(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_ext_info ext;
+	int rc;
+
+	spin_lock(&li->lock);
+	ext = li->irq.ext;
+	clear_bit(IRQ_PEND_PFAULT_INIT, &li->pending_irqs);
+	li->irq.ext.ext_params2 = 0;
+	spin_unlock(&li->lock);
+
+	VCPU_EVENT(vcpu, 4, "deliver: pfault init token 0x%llx",
+		   ext.ext_params2);
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+					 KVM_S390_INT_PFAULT_INIT,
+					 0, ext.ext_params2);
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *) __LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, PFAULT_INIT, (u16 *) __LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= put_guest_lc(vcpu, ext.ext_params2, (u64 *) __LC_EXT_PARAMS2);
+	return rc ? -EFAULT : 0;
+}
+
+static int __write_machine_check(struct kvm_vcpu *vcpu,
+				 struct kvm_s390_mchk_info *mchk)
+{
+	unsigned long ext_sa_addr;
+	unsigned long lc;
+	freg_t fprs[NUM_FPRS];
+	union mci mci;
+	int rc;
+
+	/*
+	 * All other possible payload for a machine check (e.g. the register
+	 * contents in the save area) will be handled by the ultravisor, as
+	 * the hypervisor does not not have the needed information for
+	 * protected guests.
+	 */
+	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+		vcpu->arch.sie_block->iictl = IICTL_CODE_MCHK;
+		vcpu->arch.sie_block->mcic = mchk->mcic;
+		vcpu->arch.sie_block->faddr = mchk->failing_storage_address;
+		vcpu->arch.sie_block->edc = mchk->ext_damage_code;
+		return 0;
+	}
+
+	mci.val = mchk->mcic;
+	/* take care of lazy register loading */
+	save_fpu_regs();
+	save_access_regs(vcpu->run->s.regs.acrs);
+	if (MACHINE_HAS_GS && vcpu->arch.gs_enabled)
+		save_gs_cb(current->thread.gs_cb);
+
+	/* Extended save area */
+	rc = read_guest_lc(vcpu, __LC_MCESAD, &ext_sa_addr,
+			   sizeof(unsigned long));
+	/* Only bits 0 through 63-LC are used for address formation */
+	lc = ext_sa_addr & MCESA_LC_MASK;
+	if (test_kvm_facility(vcpu->kvm, 133)) {
+		switch (lc) {
+		case 0:
+		case 10:
+			ext_sa_addr &= ~0x3ffUL;
+			break;
+		case 11:
+			ext_sa_addr &= ~0x7ffUL;
+			break;
+		case 12:
+			ext_sa_addr &= ~0xfffUL;
+			break;
+		default:
+			ext_sa_addr = 0;
+			break;
+		}
+	} else {
+		ext_sa_addr &= ~0x3ffUL;
+	}
+
+	if (!rc && mci.vr && ext_sa_addr && test_kvm_facility(vcpu->kvm, 129)) {
+		if (write_guest_abs(vcpu, ext_sa_addr, vcpu->run->s.regs.vrs,
+				    512))
+			mci.vr = 0;
+	} else {
+		mci.vr = 0;
+	}
+	if (!rc && mci.gs && ext_sa_addr && test_kvm_facility(vcpu->kvm, 133)
+	    && (lc == 11 || lc == 12)) {
+		if (write_guest_abs(vcpu, ext_sa_addr + 1024,
+				    &vcpu->run->s.regs.gscb, 32))
+			mci.gs = 0;
+	} else {
+		mci.gs = 0;
+	}
+
+	/* General interruption information */
+	rc |= put_guest_lc(vcpu, 1, (u8 __user *) __LC_AR_MODE_ID);
+	rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= put_guest_lc(vcpu, mci.val, (u64 __user *) __LC_MCCK_CODE);
+
+	/* Register-save areas */
+	if (MACHINE_HAS_VX) {
+		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
+		rc |= write_guest_lc(vcpu, __LC_FPREGS_SAVE_AREA, fprs, 128);
+	} else {
+		rc |= write_guest_lc(vcpu, __LC_FPREGS_SAVE_AREA,
+				     vcpu->run->s.regs.fprs, 128);
+	}
+	rc |= write_guest_lc(vcpu, __LC_GPREGS_SAVE_AREA,
+			     vcpu->run->s.regs.gprs, 128);
+	rc |= put_guest_lc(vcpu, current->thread.fpu.fpc,
+			   (u32 __user *) __LC_FP_CREG_SAVE_AREA);
+	rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->todpr,
+			   (u32 __user *) __LC_TOD_PROGREG_SAVE_AREA);
+	rc |= put_guest_lc(vcpu, kvm_s390_get_cpu_timer(vcpu),
+			   (u64 __user *) __LC_CPU_TIMER_SAVE_AREA);
+	rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->ckc >> 8,
+			   (u64 __user *) __LC_CLOCK_COMP_SAVE_AREA);
+	rc |= write_guest_lc(vcpu, __LC_AREGS_SAVE_AREA,
+			     &vcpu->run->s.regs.acrs, 64);
+	rc |= write_guest_lc(vcpu, __LC_CREGS_SAVE_AREA,
+			     &vcpu->arch.sie_block->gcr, 128);
+
+	/* Extended interruption information */
+	rc |= put_guest_lc(vcpu, mchk->ext_damage_code,
+			   (u32 __user *) __LC_EXT_DAMAGE_CODE);
+	rc |= put_guest_lc(vcpu, mchk->failing_storage_address,
+			   (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA, &mchk->fixed_logout,
+			     sizeof(mchk->fixed_logout));
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_mchk_info mchk = {};
+	int deliver = 0;
+	int rc = 0;
+
+	spin_lock(&fi->lock);
+	spin_lock(&li->lock);
+	if (test_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs) ||
+	    test_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs)) {
+		/*
+		 * If there was an exigent machine check pending, then any
+		 * repressible machine checks that might have been pending
+		 * are indicated along with it, so always clear bits for
+		 * repressible and exigent interrupts
+		 */
+		mchk = li->irq.mchk;
+		clear_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs);
+		clear_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs);
+		memset(&li->irq.mchk, 0, sizeof(mchk));
+		deliver = 1;
+	}
+	/*
+	 * We indicate floating repressible conditions along with
+	 * other pending conditions. Channel Report Pending and Channel
+	 * Subsystem damage are the only two and are indicated by
+	 * bits in mcic and masked in cr14.
+	 */
+	if (test_and_clear_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs)) {
+		mchk.mcic |= fi->mchk.mcic;
+		mchk.cr14 |= fi->mchk.cr14;
+		memset(&fi->mchk, 0, sizeof(mchk));
+		deliver = 1;
+	}
+	spin_unlock(&li->lock);
+	spin_unlock(&fi->lock);
+
+	if (deliver) {
+		VCPU_EVENT(vcpu, 3, "deliver: machine check mcic 0x%llx",
+			   mchk.mcic);
+		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+						 KVM_S390_MCHK,
+						 mchk.cr14, mchk.mcic);
+		vcpu->stat.deliver_machine_check++;
+		rc = __write_machine_check(vcpu, &mchk);
+	}
+	return rc;
+}
+
+static int __must_check __deliver_restart(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	int rc = 0;
+
+	VCPU_EVENT(vcpu, 3, "%s", "deliver: cpu restart");
+	vcpu->stat.deliver_restart_signal++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0);
+
+	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+		vcpu->arch.sie_block->iictl = IICTL_CODE_RESTART;
+	} else {
+		rc  = write_guest_lc(vcpu,
+				     offsetof(struct lowcore, restart_old_psw),
+				     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, offsetof(struct lowcore, restart_psw),
+				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	}
+	clear_bit(IRQ_PEND_RESTART, &li->pending_irqs);
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_set_prefix(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_prefix_info prefix;
+
+	spin_lock(&li->lock);
+	prefix = li->irq.prefix;
+	li->irq.prefix.address = 0;
+	clear_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs);
+	spin_unlock(&li->lock);
+
+	vcpu->stat.deliver_prefix_signal++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+					 KVM_S390_SIGP_SET_PREFIX,
+					 prefix.address, 0);
+
+	kvm_s390_set_prefix(vcpu, prefix.address);
+	return 0;
+}
+
+static int __must_check __deliver_emergency_signal(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	int rc;
+	int cpu_addr;
+
+	spin_lock(&li->lock);
+	cpu_addr = find_first_bit(li->sigp_emerg_pending, KVM_MAX_VCPUS);
+	clear_bit(cpu_addr, li->sigp_emerg_pending);
+	if (bitmap_empty(li->sigp_emerg_pending, KVM_MAX_VCPUS))
+		clear_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
+	spin_unlock(&li->lock);
+
+	VCPU_EVENT(vcpu, 4, "%s", "deliver: sigp emerg");
+	vcpu->stat.deliver_emergency_signal++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
+					 cpu_addr, 0);
+	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+		vcpu->arch.sie_block->iictl = IICTL_CODE_EXT;
+		vcpu->arch.sie_block->eic = EXT_IRQ_EMERGENCY_SIG;
+		vcpu->arch.sie_block->extcpuaddr = cpu_addr;
+		return 0;
+	}
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_EMERGENCY_SIG,
+			   (u16 *)__LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, cpu_addr, (u16 *)__LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_external_call(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_extcall_info extcall;
+	int rc;
+
+	spin_lock(&li->lock);
+	extcall = li->irq.extcall;
+	li->irq.extcall.code = 0;
+	clear_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
+	spin_unlock(&li->lock);
+
+	VCPU_EVENT(vcpu, 4, "%s", "deliver: sigp ext call");
+	vcpu->stat.deliver_external_call++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+					 KVM_S390_INT_EXTERNAL_CALL,
+					 extcall.code, 0);
+	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+		vcpu->arch.sie_block->iictl = IICTL_CODE_EXT;
+		vcpu->arch.sie_block->eic = EXT_IRQ_EXTERNAL_CALL;
+		vcpu->arch.sie_block->extcpuaddr = extcall.code;
+		return 0;
+	}
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_EXTERNAL_CALL,
+			   (u16 *)__LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, extcall.code, (u16 *)__LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &vcpu->arch.sie_block->gpsw,
+			    sizeof(psw_t));
+	return rc ? -EFAULT : 0;
+}
+
+static int __deliver_prog_pv(struct kvm_vcpu *vcpu, u16 code)
+{
+	switch (code) {
+	case PGM_SPECIFICATION:
+		vcpu->arch.sie_block->iictl = IICTL_CODE_SPECIFICATION;
+		break;
+	case PGM_OPERAND:
+		vcpu->arch.sie_block->iictl = IICTL_CODE_OPERAND;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_pgm_info pgm_info;
+	int rc = 0, nullifying = false;
+	u16 ilen;
+
+	spin_lock(&li->lock);
+	pgm_info = li->irq.pgm;
+	clear_bit(IRQ_PEND_PROG, &li->pending_irqs);
+	memset(&li->irq.pgm, 0, sizeof(pgm_info));
+	spin_unlock(&li->lock);
+
+	ilen = pgm_info.flags & KVM_S390_PGM_FLAGS_ILC_MASK;
+	VCPU_EVENT(vcpu, 3, "deliver: program irq code 0x%x, ilen:%d",
+		   pgm_info.code, ilen);
+	vcpu->stat.deliver_program++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
+					 pgm_info.code, 0);
+
+	/* PER is handled by the ultravisor */
+	if (kvm_s390_pv_cpu_is_protected(vcpu))
+		return __deliver_prog_pv(vcpu, pgm_info.code & ~PGM_PER);
+
+	switch (pgm_info.code & ~PGM_PER) {
+	case PGM_AFX_TRANSLATION:
+	case PGM_ASX_TRANSLATION:
+	case PGM_EX_TRANSLATION:
+	case PGM_LFX_TRANSLATION:
+	case PGM_LSTE_SEQUENCE:
+	case PGM_LSX_TRANSLATION:
+	case PGM_LX_TRANSLATION:
+	case PGM_PRIMARY_AUTHORITY:
+	case PGM_SECONDARY_AUTHORITY:
+		nullifying = true;
+		fallthrough;
+	case PGM_SPACE_SWITCH:
+		rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
+				  (u64 *)__LC_TRANS_EXC_CODE);
+		break;
+	case PGM_ALEN_TRANSLATION:
+	case PGM_ALE_SEQUENCE:
+	case PGM_ASTE_INSTANCE:
+	case PGM_ASTE_SEQUENCE:
+	case PGM_ASTE_VALIDITY:
+	case PGM_EXTENDED_AUTHORITY:
+		rc = put_guest_lc(vcpu, pgm_info.exc_access_id,
+				  (u8 *)__LC_EXC_ACCESS_ID);
+		nullifying = true;
+		break;
+	case PGM_ASCE_TYPE:
+	case PGM_PAGE_TRANSLATION:
+	case PGM_REGION_FIRST_TRANS:
+	case PGM_REGION_SECOND_TRANS:
+	case PGM_REGION_THIRD_TRANS:
+	case PGM_SEGMENT_TRANSLATION:
+		rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
+				  (u64 *)__LC_TRANS_EXC_CODE);
+		rc |= put_guest_lc(vcpu, pgm_info.exc_access_id,
+				   (u8 *)__LC_EXC_ACCESS_ID);
+		rc |= put_guest_lc(vcpu, pgm_info.op_access_id,
+				   (u8 *)__LC_OP_ACCESS_ID);
+		nullifying = true;
+		break;
+	case PGM_MONITOR:
+		rc = put_guest_lc(vcpu, pgm_info.mon_class_nr,
+				  (u16 *)__LC_MON_CLASS_NR);
+		rc |= put_guest_lc(vcpu, pgm_info.mon_code,
+				   (u64 *)__LC_MON_CODE);
+		break;
+	case PGM_VECTOR_PROCESSING:
+	case PGM_DATA:
+		rc = put_guest_lc(vcpu, pgm_info.data_exc_code,
+				  (u32 *)__LC_DATA_EXC_CODE);
+		break;
+	case PGM_PROTECTION:
+		rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
+				  (u64 *)__LC_TRANS_EXC_CODE);
+		rc |= put_guest_lc(vcpu, pgm_info.exc_access_id,
+				   (u8 *)__LC_EXC_ACCESS_ID);
+		break;
+	case PGM_STACK_FULL:
+	case PGM_STACK_EMPTY:
+	case PGM_STACK_SPECIFICATION:
+	case PGM_STACK_TYPE:
+	case PGM_STACK_OPERATION:
+	case PGM_TRACE_TABEL:
+	case PGM_CRYPTO_OPERATION:
+		nullifying = true;
+		break;
+	}
+
+	if (pgm_info.code & PGM_PER) {
+		rc |= put_guest_lc(vcpu, pgm_info.per_code,
+				   (u8 *) __LC_PER_CODE);
+		rc |= put_guest_lc(vcpu, pgm_info.per_atmid,
+				   (u8 *)__LC_PER_ATMID);
+		rc |= put_guest_lc(vcpu, pgm_info.per_address,
+				   (u64 *) __LC_PER_ADDRESS);
+		rc |= put_guest_lc(vcpu, pgm_info.per_access_id,
+				   (u8 *) __LC_PER_ACCESS_ID);
+	}
+
+	if (nullifying && !(pgm_info.flags & KVM_S390_PGM_FLAGS_NO_REWIND))
+		kvm_s390_rewind_psw(vcpu, ilen);
+
+	/* bit 1+2 of the target are the ilc, so we can directly use ilen */
+	rc |= put_guest_lc(vcpu, ilen, (u16 *) __LC_PGM_ILC);
+	rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->gbea,
+				 (u64 *) __LC_PGM_LAST_BREAK);
+	rc |= put_guest_lc(vcpu, pgm_info.code,
+			   (u16 *)__LC_PGM_INT_CODE);
+	rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_PGM_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	return rc ? -EFAULT : 0;
+}
+
+#define SCCB_MASK 0xFFFFFFF8
+#define SCCB_EVENT_PENDING 0x3
+
+static int write_sclp(struct kvm_vcpu *vcpu, u32 parm)
+{
+	int rc;
+
+	if (kvm_s390_pv_cpu_get_handle(vcpu)) {
+		vcpu->arch.sie_block->iictl = IICTL_CODE_EXT;
+		vcpu->arch.sie_block->eic = EXT_IRQ_SERVICE_SIG;
+		vcpu->arch.sie_block->eiparams = parm;
+		return 0;
+	}
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_SERVICE_SIG, (u16 *)__LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= put_guest_lc(vcpu, parm,
+			   (u32 *)__LC_EXT_PARAMS);
+
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_service(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+	struct kvm_s390_ext_info ext;
+
+	spin_lock(&fi->lock);
+	if (test_bit(IRQ_PEND_EXT_SERVICE, &fi->masked_irqs) ||
+	    !(test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs))) {
+		spin_unlock(&fi->lock);
+		return 0;
+	}
+	ext = fi->srv_signal;
+	memset(&fi->srv_signal, 0, sizeof(ext));
+	clear_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs);
+	clear_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs);
+	if (kvm_s390_pv_cpu_is_protected(vcpu))
+		set_bit(IRQ_PEND_EXT_SERVICE, &fi->masked_irqs);
+	spin_unlock(&fi->lock);
+
+	VCPU_EVENT(vcpu, 4, "deliver: sclp parameter 0x%x",
+		   ext.ext_params);
+	vcpu->stat.deliver_service_signal++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_SERVICE,
+					 ext.ext_params, 0);
+
+	return write_sclp(vcpu, ext.ext_params);
+}
+
+static int __must_check __deliver_service_ev(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+	struct kvm_s390_ext_info ext;
+
+	spin_lock(&fi->lock);
+	if (!(test_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs))) {
+		spin_unlock(&fi->lock);
+		return 0;
+	}
+	ext = fi->srv_signal;
+	/* only clear the event bit */
+	fi->srv_signal.ext_params &= ~SCCB_EVENT_PENDING;
+	clear_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs);
+	spin_unlock(&fi->lock);
+
+	VCPU_EVENT(vcpu, 4, "%s", "deliver: sclp parameter event");
+	vcpu->stat.deliver_service_signal++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_SERVICE,
+					 ext.ext_params, 0);
+
+	return write_sclp(vcpu, SCCB_EVENT_PENDING);
+}
+
+static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+	struct kvm_s390_interrupt_info *inti;
+	int rc = 0;
+
+	spin_lock(&fi->lock);
+	inti = list_first_entry_or_null(&fi->lists[FIRQ_LIST_PFAULT],
+					struct kvm_s390_interrupt_info,
+					list);
+	if (inti) {
+		list_del(&inti->list);
+		fi->counters[FIRQ_CNTR_PFAULT] -= 1;
+	}
+	if (list_empty(&fi->lists[FIRQ_LIST_PFAULT]))
+		clear_bit(IRQ_PEND_PFAULT_DONE, &fi->pending_irqs);
+	spin_unlock(&fi->lock);
+
+	if (inti) {
+		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+						 KVM_S390_INT_PFAULT_DONE, 0,
+						 inti->ext.ext_params2);
+		VCPU_EVENT(vcpu, 4, "deliver: pfault done token 0x%llx",
+			   inti->ext.ext_params2);
+
+		rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE,
+				(u16 *)__LC_EXT_INT_CODE);
+		rc |= put_guest_lc(vcpu, PFAULT_DONE,
+				(u16 *)__LC_EXT_CPU_ADDR);
+		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+				&vcpu->arch.sie_block->gpsw,
+				sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+				&vcpu->arch.sie_block->gpsw,
+				sizeof(psw_t));
+		rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+				(u64 *)__LC_EXT_PARAMS2);
+		kfree(inti);
+	}
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+	struct kvm_s390_interrupt_info *inti;
+	int rc = 0;
+
+	spin_lock(&fi->lock);
+	inti = list_first_entry_or_null(&fi->lists[FIRQ_LIST_VIRTIO],
+					struct kvm_s390_interrupt_info,
+					list);
+	if (inti) {
+		VCPU_EVENT(vcpu, 4,
+			   "deliver: virtio parm: 0x%x,parm64: 0x%llx",
+			   inti->ext.ext_params, inti->ext.ext_params2);
+		vcpu->stat.deliver_virtio++;
+		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+				inti->type,
+				inti->ext.ext_params,
+				inti->ext.ext_params2);
+		list_del(&inti->list);
+		fi->counters[FIRQ_CNTR_VIRTIO] -= 1;
+	}
+	if (list_empty(&fi->lists[FIRQ_LIST_VIRTIO]))
+		clear_bit(IRQ_PEND_VIRTIO, &fi->pending_irqs);
+	spin_unlock(&fi->lock);
+
+	if (inti) {
+		rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE,
+				(u16 *)__LC_EXT_INT_CODE);
+		rc |= put_guest_lc(vcpu, VIRTIO_PARAM,
+				(u16 *)__LC_EXT_CPU_ADDR);
+		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+				&vcpu->arch.sie_block->gpsw,
+				sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+				&vcpu->arch.sie_block->gpsw,
+				sizeof(psw_t));
+		rc |= put_guest_lc(vcpu, inti->ext.ext_params,
+				(u32 *)__LC_EXT_PARAMS);
+		rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+				(u64 *)__LC_EXT_PARAMS2);
+		kfree(inti);
+	}
+	return rc ? -EFAULT : 0;
+}
+
+static int __do_deliver_io(struct kvm_vcpu *vcpu, struct kvm_s390_io_info *io)
+{
+	int rc;
+
+	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+		vcpu->arch.sie_block->iictl = IICTL_CODE_IO;
+		vcpu->arch.sie_block->subchannel_id = io->subchannel_id;
+		vcpu->arch.sie_block->subchannel_nr = io->subchannel_nr;
+		vcpu->arch.sie_block->io_int_parm = io->io_int_parm;
+		vcpu->arch.sie_block->io_int_word = io->io_int_word;
+		return 0;
+	}
+
+	rc  = put_guest_lc(vcpu, io->subchannel_id, (u16 *)__LC_SUBCHANNEL_ID);
+	rc |= put_guest_lc(vcpu, io->subchannel_nr, (u16 *)__LC_SUBCHANNEL_NR);
+	rc |= put_guest_lc(vcpu, io->io_int_parm, (u32 *)__LC_IO_INT_PARM);
+	rc |= put_guest_lc(vcpu, io->io_int_word, (u32 *)__LC_IO_INT_WORD);
+	rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw,
+			     sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw,
+			    sizeof(psw_t));
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
+				     unsigned long irq_type)
+{
+	struct list_head *isc_list;
+	struct kvm_s390_float_interrupt *fi;
+	struct kvm_s390_gisa_interrupt *gi = &vcpu->kvm->arch.gisa_int;
+	struct kvm_s390_interrupt_info *inti = NULL;
+	struct kvm_s390_io_info io;
+	u32 isc;
+	int rc = 0;
+
+	fi = &vcpu->kvm->arch.float_int;
+
+	spin_lock(&fi->lock);
+	isc = irq_type_to_isc(irq_type);
+	isc_list = &fi->lists[isc];
+	inti = list_first_entry_or_null(isc_list,
+					struct kvm_s390_interrupt_info,
+					list);
+	if (inti) {
+		if (inti->type & KVM_S390_INT_IO_AI_MASK)
+			VCPU_EVENT(vcpu, 4, "%s", "deliver: I/O (AI)");
+		else
+			VCPU_EVENT(vcpu, 4, "deliver: I/O %x ss %x schid %04x",
+			inti->io.subchannel_id >> 8,
+			inti->io.subchannel_id >> 1 & 0x3,
+			inti->io.subchannel_nr);
+
+		vcpu->stat.deliver_io++;
+		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+				inti->type,
+				((__u32)inti->io.subchannel_id << 16) |
+				inti->io.subchannel_nr,
+				((__u64)inti->io.io_int_parm << 32) |
+				inti->io.io_int_word);
+		list_del(&inti->list);
+		fi->counters[FIRQ_CNTR_IO] -= 1;
+	}
+	if (list_empty(isc_list))
+		clear_bit(irq_type, &fi->pending_irqs);
+	spin_unlock(&fi->lock);
+
+	if (inti) {
+		rc = __do_deliver_io(vcpu, &(inti->io));
+		kfree(inti);
+		goto out;
+	}
+
+	if (gi->origin && gisa_tac_ipm_gisc(gi->origin, isc)) {
+		/*
+		 * in case an adapter interrupt was not delivered
+		 * in SIE context KVM will handle the delivery
+		 */
+		VCPU_EVENT(vcpu, 4, "%s isc %u", "deliver: I/O (AI/gisa)", isc);
+		memset(&io, 0, sizeof(io));
+		io.io_int_word = isc_to_int_word(isc);
+		vcpu->stat.deliver_io++;
+		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+			KVM_S390_INT_IO(1, 0, 0, 0),
+			((__u32)io.subchannel_id << 16) |
+			io.subchannel_nr,
+			((__u64)io.io_int_parm << 32) |
+			io.io_int_word);
+		rc = __do_deliver_io(vcpu, &io);
+	}
+out:
+	return rc;
+}
+
+/* Check whether an external call is pending (deliverable or not) */
+int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	if (!sclp.has_sigpif)
+		return test_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
+
+	return sca_ext_call_pending(vcpu, NULL);
+}
+
+int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop)
+{
+	if (deliverable_irqs(vcpu))
+		return 1;
+
+	if (kvm_cpu_has_pending_timer(vcpu))
+		return 1;
+
+	/* external call pending and deliverable */
+	if (kvm_s390_ext_call_pending(vcpu) &&
+	    !psw_extint_disabled(vcpu) &&
+	    (vcpu->arch.sie_block->gcr[0] & CR0_EXTERNAL_CALL_SUBMASK))
+		return 1;
+
+	if (!exclude_stop && kvm_s390_is_stop_irq_pending(vcpu))
+		return 1;
+	return 0;
+}
+
+int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
+{
+	return ckc_irq_pending(vcpu) || cpu_timer_irq_pending(vcpu);
+}
+
+static u64 __calculate_sltime(struct kvm_vcpu *vcpu)
+{
+	const u64 now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
+	const u64 ckc = vcpu->arch.sie_block->ckc;
+	u64 cputm, sltime = 0;
+
+	if (ckc_interrupts_enabled(vcpu)) {
+		if (vcpu->arch.sie_block->gcr[0] & CR0_CLOCK_COMPARATOR_SIGN) {
+			if ((s64)now < (s64)ckc)
+				sltime = tod_to_ns((s64)ckc - (s64)now);
+		} else if (now < ckc) {
+			sltime = tod_to_ns(ckc - now);
+		}
+		/* already expired */
+		if (!sltime)
+			return 0;
+		if (cpu_timer_interrupts_enabled(vcpu)) {
+			cputm = kvm_s390_get_cpu_timer(vcpu);
+			/* already expired? */
+			if (cputm >> 63)
+				return 0;
+			return min_t(u64, sltime, tod_to_ns(cputm));
+		}
+	} else if (cpu_timer_interrupts_enabled(vcpu)) {
+		sltime = kvm_s390_get_cpu_timer(vcpu);
+		/* already expired? */
+		if (sltime >> 63)
+			return 0;
+	}
+	return sltime;
+}
+
+int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_gisa_interrupt *gi = &vcpu->kvm->arch.gisa_int;
+	u64 sltime;
+
+	vcpu->stat.exit_wait_state++;
+
+	/* fast path */
+	if (kvm_arch_vcpu_runnable(vcpu))
+		return 0;
+
+	if (psw_interrupts_disabled(vcpu)) {
+		VCPU_EVENT(vcpu, 3, "%s", "disabled wait");
+		return -EOPNOTSUPP; /* disabled wait */
+	}
+
+	if (gi->origin &&
+	    (gisa_get_ipm_or_restore_iam(gi) &
+	     vcpu->arch.sie_block->gcr[6] >> 24))
+		return 0;
+
+	if (!ckc_interrupts_enabled(vcpu) &&
+	    !cpu_timer_interrupts_enabled(vcpu)) {
+		VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer");
+		__set_cpu_idle(vcpu);
+		goto no_timer;
+	}
+
+	sltime = __calculate_sltime(vcpu);
+	if (!sltime)
+		return 0;
+
+	__set_cpu_idle(vcpu);
+	hrtimer_start(&vcpu->arch.ckc_timer, sltime, HRTIMER_MODE_REL);
+	VCPU_EVENT(vcpu, 4, "enabled wait: %llu ns", sltime);
+no_timer:
+	kvm_vcpu_srcu_read_unlock(vcpu);
+	kvm_vcpu_halt(vcpu);
+	vcpu->valid_wakeup = false;
+	__unset_cpu_idle(vcpu);
+	kvm_vcpu_srcu_read_lock(vcpu);
+
+	hrtimer_cancel(&vcpu->arch.ckc_timer);
+	return 0;
+}
+
+void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
+{
+	vcpu->valid_wakeup = true;
+	kvm_vcpu_wake_up(vcpu);
+
+	/*
+	 * The VCPU might not be sleeping but rather executing VSIE. Let's
+	 * kick it, so it leaves the SIE to process the request.
+	 */
+	kvm_s390_vsie_kick(vcpu);
+}
+
+enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer)
+{
+	struct kvm_vcpu *vcpu;
+	u64 sltime;
+
+	vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer);
+	sltime = __calculate_sltime(vcpu);
+
+	/*
+	 * If the monotonic clock runs faster than the tod clock we might be
+	 * woken up too early and have to go back to sleep to avoid deadlocks.
+	 */
+	if (sltime && hrtimer_forward_now(timer, ns_to_ktime(sltime)))
+		return HRTIMER_RESTART;
+	kvm_s390_vcpu_wakeup(vcpu);
+	return HRTIMER_NORESTART;
+}
+
+void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	spin_lock(&li->lock);
+	li->pending_irqs = 0;
+	bitmap_zero(li->sigp_emerg_pending, KVM_MAX_VCPUS);
+	memset(&li->irq, 0, sizeof(li->irq));
+	spin_unlock(&li->lock);
+
+	sca_clear_ext_call(vcpu);
+}
+
+int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	int rc = 0;
+	bool delivered = false;
+	unsigned long irq_type;
+	unsigned long irqs;
+
+	__reset_intercept_indicators(vcpu);
+
+	/* pending ckc conditions might have been invalidated */
+	clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
+	if (ckc_irq_pending(vcpu))
+		set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
+
+	/* pending cpu timer conditions might have been invalidated */
+	clear_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
+	if (cpu_timer_irq_pending(vcpu))
+		set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
+
+	while ((irqs = deliverable_irqs(vcpu)) && !rc) {
+		/* bits are in the reverse order of interrupt priority */
+		irq_type = find_last_bit(&irqs, IRQ_PEND_COUNT);
+		switch (irq_type) {
+		case IRQ_PEND_IO_ISC_0:
+		case IRQ_PEND_IO_ISC_1:
+		case IRQ_PEND_IO_ISC_2:
+		case IRQ_PEND_IO_ISC_3:
+		case IRQ_PEND_IO_ISC_4:
+		case IRQ_PEND_IO_ISC_5:
+		case IRQ_PEND_IO_ISC_6:
+		case IRQ_PEND_IO_ISC_7:
+			rc = __deliver_io(vcpu, irq_type);
+			break;
+		case IRQ_PEND_MCHK_EX:
+		case IRQ_PEND_MCHK_REP:
+			rc = __deliver_machine_check(vcpu);
+			break;
+		case IRQ_PEND_PROG:
+			rc = __deliver_prog(vcpu);
+			break;
+		case IRQ_PEND_EXT_EMERGENCY:
+			rc = __deliver_emergency_signal(vcpu);
+			break;
+		case IRQ_PEND_EXT_EXTERNAL:
+			rc = __deliver_external_call(vcpu);
+			break;
+		case IRQ_PEND_EXT_CLOCK_COMP:
+			rc = __deliver_ckc(vcpu);
+			break;
+		case IRQ_PEND_EXT_CPU_TIMER:
+			rc = __deliver_cpu_timer(vcpu);
+			break;
+		case IRQ_PEND_RESTART:
+			rc = __deliver_restart(vcpu);
+			break;
+		case IRQ_PEND_SET_PREFIX:
+			rc = __deliver_set_prefix(vcpu);
+			break;
+		case IRQ_PEND_PFAULT_INIT:
+			rc = __deliver_pfault_init(vcpu);
+			break;
+		case IRQ_PEND_EXT_SERVICE:
+			rc = __deliver_service(vcpu);
+			break;
+		case IRQ_PEND_EXT_SERVICE_EV:
+			rc = __deliver_service_ev(vcpu);
+			break;
+		case IRQ_PEND_PFAULT_DONE:
+			rc = __deliver_pfault_done(vcpu);
+			break;
+		case IRQ_PEND_VIRTIO:
+			rc = __deliver_virtio(vcpu);
+			break;
+		default:
+			WARN_ONCE(1, "Unknown pending irq type %ld", irq_type);
+			clear_bit(irq_type, &li->pending_irqs);
+		}
+		delivered |= !rc;
+	}
+
+	/*
+	 * We delivered at least one interrupt and modified the PC. Force a
+	 * singlestep event now.
+	 */
+	if (delivered && guestdbg_sstep_enabled(vcpu)) {
+		struct kvm_debug_exit_arch *debug_exit = &vcpu->run->debug.arch;
+
+		debug_exit->addr = vcpu->arch.sie_block->gpsw.addr;
+		debug_exit->type = KVM_SINGLESTEP;
+		vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING;
+	}
+
+	set_intercept_indicators(vcpu);
+
+	return rc;
+}
+
+static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	vcpu->stat.inject_program++;
+	VCPU_EVENT(vcpu, 3, "inject: program irq code 0x%x", irq->u.pgm.code);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
+				   irq->u.pgm.code, 0);
+
+	if (!(irq->u.pgm.flags & KVM_S390_PGM_FLAGS_ILC_VALID)) {
+		/* auto detection if no valid ILC was given */
+		irq->u.pgm.flags &= ~KVM_S390_PGM_FLAGS_ILC_MASK;
+		irq->u.pgm.flags |= kvm_s390_get_ilen(vcpu);
+		irq->u.pgm.flags |= KVM_S390_PGM_FLAGS_ILC_VALID;
+	}
+
+	if (irq->u.pgm.code == PGM_PER) {
+		li->irq.pgm.code |= PGM_PER;
+		li->irq.pgm.flags = irq->u.pgm.flags;
+		/* only modify PER related information */
+		li->irq.pgm.per_address = irq->u.pgm.per_address;
+		li->irq.pgm.per_code = irq->u.pgm.per_code;
+		li->irq.pgm.per_atmid = irq->u.pgm.per_atmid;
+		li->irq.pgm.per_access_id = irq->u.pgm.per_access_id;
+	} else if (!(irq->u.pgm.code & PGM_PER)) {
+		li->irq.pgm.code = (li->irq.pgm.code & PGM_PER) |
+				   irq->u.pgm.code;
+		li->irq.pgm.flags = irq->u.pgm.flags;
+		/* only modify non-PER information */
+		li->irq.pgm.trans_exc_code = irq->u.pgm.trans_exc_code;
+		li->irq.pgm.mon_code = irq->u.pgm.mon_code;
+		li->irq.pgm.data_exc_code = irq->u.pgm.data_exc_code;
+		li->irq.pgm.mon_class_nr = irq->u.pgm.mon_class_nr;
+		li->irq.pgm.exc_access_id = irq->u.pgm.exc_access_id;
+		li->irq.pgm.op_access_id = irq->u.pgm.op_access_id;
+	} else {
+		li->irq.pgm = irq->u.pgm;
+	}
+	set_bit(IRQ_PEND_PROG, &li->pending_irqs);
+	return 0;
+}
+
+static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	vcpu->stat.inject_pfault_init++;
+	VCPU_EVENT(vcpu, 4, "inject: pfault init parameter block at 0x%llx",
+		   irq->u.ext.ext_params2);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_PFAULT_INIT,
+				   irq->u.ext.ext_params,
+				   irq->u.ext.ext_params2);
+
+	li->irq.ext = irq->u.ext;
+	set_bit(IRQ_PEND_PFAULT_INIT, &li->pending_irqs);
+	kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT);
+	return 0;
+}
+
+static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_extcall_info *extcall = &li->irq.extcall;
+	uint16_t src_id = irq->u.extcall.code;
+
+	vcpu->stat.inject_external_call++;
+	VCPU_EVENT(vcpu, 4, "inject: external call source-cpu:%u",
+		   src_id);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EXTERNAL_CALL,
+				   src_id, 0);
+
+	/* sending vcpu invalid */
+	if (kvm_get_vcpu_by_id(vcpu->kvm, src_id) == NULL)
+		return -EINVAL;
+
+	if (sclp.has_sigpif && !kvm_s390_pv_cpu_get_handle(vcpu))
+		return sca_inject_ext_call(vcpu, src_id);
+
+	if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs))
+		return -EBUSY;
+	*extcall = irq->u.extcall;
+	kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT);
+	return 0;
+}
+
+static int __inject_set_prefix(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_prefix_info *prefix = &li->irq.prefix;
+
+	vcpu->stat.inject_set_prefix++;
+	VCPU_EVENT(vcpu, 3, "inject: set prefix to %x",
+		   irq->u.prefix.address);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_SET_PREFIX,
+				   irq->u.prefix.address, 0);
+
+	if (!is_vcpu_stopped(vcpu))
+		return -EBUSY;
+
+	*prefix = irq->u.prefix;
+	set_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs);
+	return 0;
+}
+
+#define KVM_S390_STOP_SUPP_FLAGS (KVM_S390_STOP_FLAG_STORE_STATUS)
+static int __inject_sigp_stop(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_stop_info *stop = &li->irq.stop;
+	int rc = 0;
+
+	vcpu->stat.inject_stop_signal++;
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_STOP, 0, 0);
+
+	if (irq->u.stop.flags & ~KVM_S390_STOP_SUPP_FLAGS)
+		return -EINVAL;
+
+	if (is_vcpu_stopped(vcpu)) {
+		if (irq->u.stop.flags & KVM_S390_STOP_FLAG_STORE_STATUS)
+			rc = kvm_s390_store_status_unloaded(vcpu,
+						KVM_S390_STORE_STATUS_NOADDR);
+		return rc;
+	}
+
+	if (test_and_set_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs))
+		return -EBUSY;
+	stop->flags = irq->u.stop.flags;
+	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
+	return 0;
+}
+
+static int __inject_sigp_restart(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	vcpu->stat.inject_restart++;
+	VCPU_EVENT(vcpu, 3, "%s", "inject: restart int");
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0);
+
+	set_bit(IRQ_PEND_RESTART, &li->pending_irqs);
+	return 0;
+}
+
+static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
+				   struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	vcpu->stat.inject_emergency_signal++;
+	VCPU_EVENT(vcpu, 4, "inject: emergency from cpu %u",
+		   irq->u.emerg.code);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
+				   irq->u.emerg.code, 0);
+
+	/* sending vcpu invalid */
+	if (kvm_get_vcpu_by_id(vcpu->kvm, irq->u.emerg.code) == NULL)
+		return -EINVAL;
+
+	set_bit(irq->u.emerg.code, li->sigp_emerg_pending);
+	set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
+	kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT);
+	return 0;
+}
+
+static int __inject_mchk(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_mchk_info *mchk = &li->irq.mchk;
+
+	vcpu->stat.inject_mchk++;
+	VCPU_EVENT(vcpu, 3, "inject: machine check mcic 0x%llx",
+		   irq->u.mchk.mcic);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_MCHK, 0,
+				   irq->u.mchk.mcic);
+
+	/*
+	 * Because repressible machine checks can be indicated along with
+	 * exigent machine checks (PoP, Chapter 11, Interruption action)
+	 * we need to combine cr14, mcic and external damage code.
+	 * Failing storage address and the logout area should not be or'ed
+	 * together, we just indicate the last occurrence of the corresponding
+	 * machine check
+	 */
+	mchk->cr14 |= irq->u.mchk.cr14;
+	mchk->mcic |= irq->u.mchk.mcic;
+	mchk->ext_damage_code |= irq->u.mchk.ext_damage_code;
+	mchk->failing_storage_address = irq->u.mchk.failing_storage_address;
+	memcpy(&mchk->fixed_logout, &irq->u.mchk.fixed_logout,
+	       sizeof(mchk->fixed_logout));
+	if (mchk->mcic & MCHK_EX_MASK)
+		set_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs);
+	else if (mchk->mcic & MCHK_REP_MASK)
+		set_bit(IRQ_PEND_MCHK_REP,  &li->pending_irqs);
+	return 0;
+}
+
+static int __inject_ckc(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	vcpu->stat.inject_ckc++;
+	VCPU_EVENT(vcpu, 3, "%s", "inject: clock comparator external");
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP,
+				   0, 0);
+
+	set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
+	kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT);
+	return 0;
+}
+
+static int __inject_cpu_timer(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	vcpu->stat.inject_cputm++;
+	VCPU_EVENT(vcpu, 3, "%s", "inject: cpu timer external");
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER,
+				   0, 0);
+
+	set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
+	kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT);
+	return 0;
+}
+
+static struct kvm_s390_interrupt_info *get_io_int(struct kvm *kvm,
+						  int isc, u32 schid)
+{
+	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+	struct list_head *isc_list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc];
+	struct kvm_s390_interrupt_info *iter;
+	u16 id = (schid & 0xffff0000U) >> 16;
+	u16 nr = schid & 0x0000ffffU;
+
+	spin_lock(&fi->lock);
+	list_for_each_entry(iter, isc_list, list) {
+		if (schid && (id != iter->io.subchannel_id ||
+			      nr != iter->io.subchannel_nr))
+			continue;
+		/* found an appropriate entry */
+		list_del_init(&iter->list);
+		fi->counters[FIRQ_CNTR_IO] -= 1;
+		if (list_empty(isc_list))
+			clear_bit(isc_to_irq_type(isc), &fi->pending_irqs);
+		spin_unlock(&fi->lock);
+		return iter;
+	}
+	spin_unlock(&fi->lock);
+	return NULL;
+}
+
+static struct kvm_s390_interrupt_info *get_top_io_int(struct kvm *kvm,
+						      u64 isc_mask, u32 schid)
+{
+	struct kvm_s390_interrupt_info *inti = NULL;
+	int isc;
+
+	for (isc = 0; isc <= MAX_ISC && !inti; isc++) {
+		if (isc_mask & isc_to_isc_bits(isc))
+			inti = get_io_int(kvm, isc, schid);
+	}
+	return inti;
+}
+
+static int get_top_gisa_isc(struct kvm *kvm, u64 isc_mask, u32 schid)
+{
+	struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int;
+	unsigned long active_mask;
+	int isc;
+
+	if (schid)
+		goto out;
+	if (!gi->origin)
+		goto out;
+
+	active_mask = (isc_mask & gisa_get_ipm(gi->origin) << 24) << 32;
+	while (active_mask) {
+		isc = __fls(active_mask) ^ (BITS_PER_LONG - 1);
+		if (gisa_tac_ipm_gisc(gi->origin, isc))
+			return isc;
+		clear_bit_inv(isc, &active_mask);
+	}
+out:
+	return -EINVAL;
+}
+
+/*
+ * Dequeue and return an I/O interrupt matching any of the interruption
+ * subclasses as designated by the isc mask in cr6 and the schid (if != 0).
+ * Take into account the interrupts pending in the interrupt list and in GISA.
+ *
+ * Note that for a guest that does not enable I/O interrupts
+ * but relies on TPI, a flood of classic interrupts may starve
+ * out adapter interrupts on the same isc. Linux does not do
+ * that, and it is possible to work around the issue by configuring
+ * different iscs for classic and adapter interrupts in the guest,
+ * but we may want to revisit this in the future.
+ */
+struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
+						    u64 isc_mask, u32 schid)
+{
+	struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int;
+	struct kvm_s390_interrupt_info *inti, *tmp_inti;
+	int isc;
+
+	inti = get_top_io_int(kvm, isc_mask, schid);
+
+	isc = get_top_gisa_isc(kvm, isc_mask, schid);
+	if (isc < 0)
+		/* no AI in GISA */
+		goto out;
+
+	if (!inti)
+		/* AI in GISA but no classical IO int */
+		goto gisa_out;
+
+	/* both types of interrupts present */
+	if (int_word_to_isc(inti->io.io_int_word) <= isc) {
+		/* classical IO int with higher priority */
+		gisa_set_ipm_gisc(gi->origin, isc);
+		goto out;
+	}
+gisa_out:
+	tmp_inti = kzalloc(sizeof(*inti), GFP_KERNEL_ACCOUNT);
+	if (tmp_inti) {
+		tmp_inti->type = KVM_S390_INT_IO(1, 0, 0, 0);
+		tmp_inti->io.io_int_word = isc_to_int_word(isc);
+		if (inti)
+			kvm_s390_reinject_io_int(kvm, inti);
+		inti = tmp_inti;
+	} else
+		gisa_set_ipm_gisc(gi->origin, isc);
+out:
+	return inti;
+}
+
+static int __inject_service(struct kvm *kvm,
+			     struct kvm_s390_interrupt_info *inti)
+{
+	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+
+	kvm->stat.inject_service_signal++;
+	spin_lock(&fi->lock);
+	fi->srv_signal.ext_params |= inti->ext.ext_params & SCCB_EVENT_PENDING;
+
+	/* We always allow events, track them separately from the sccb ints */
+	if (fi->srv_signal.ext_params & SCCB_EVENT_PENDING)
+		set_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs);
+
+	/*
+	 * Early versions of the QEMU s390 bios will inject several
+	 * service interrupts after another without handling a
+	 * condition code indicating busy.
+	 * We will silently ignore those superfluous sccb values.
+	 * A future version of QEMU will take care of serialization
+	 * of servc requests
+	 */
+	if (fi->srv_signal.ext_params & SCCB_MASK)
+		goto out;
+	fi->srv_signal.ext_params |= inti->ext.ext_params & SCCB_MASK;
+	set_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs);
+out:
+	spin_unlock(&fi->lock);
+	kfree(inti);
+	return 0;
+}
+
+static int __inject_virtio(struct kvm *kvm,
+			    struct kvm_s390_interrupt_info *inti)
+{
+	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+
+	kvm->stat.inject_virtio++;
+	spin_lock(&fi->lock);
+	if (fi->counters[FIRQ_CNTR_VIRTIO] >= KVM_S390_MAX_VIRTIO_IRQS) {
+		spin_unlock(&fi->lock);
+		return -EBUSY;
+	}
+	fi->counters[FIRQ_CNTR_VIRTIO] += 1;
+	list_add_tail(&inti->list, &fi->lists[FIRQ_LIST_VIRTIO]);
+	set_bit(IRQ_PEND_VIRTIO, &fi->pending_irqs);
+	spin_unlock(&fi->lock);
+	return 0;
+}
+
+static int __inject_pfault_done(struct kvm *kvm,
+				 struct kvm_s390_interrupt_info *inti)
+{
+	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+
+	kvm->stat.inject_pfault_done++;
+	spin_lock(&fi->lock);
+	if (fi->counters[FIRQ_CNTR_PFAULT] >=
+		(ASYNC_PF_PER_VCPU * KVM_MAX_VCPUS)) {
+		spin_unlock(&fi->lock);
+		return -EBUSY;
+	}
+	fi->counters[FIRQ_CNTR_PFAULT] += 1;
+	list_add_tail(&inti->list, &fi->lists[FIRQ_LIST_PFAULT]);
+	set_bit(IRQ_PEND_PFAULT_DONE, &fi->pending_irqs);
+	spin_unlock(&fi->lock);
+	return 0;
+}
+
+#define CR_PENDING_SUBCLASS 28
+static int __inject_float_mchk(struct kvm *kvm,
+				struct kvm_s390_interrupt_info *inti)
+{
+	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+
+	kvm->stat.inject_float_mchk++;
+	spin_lock(&fi->lock);
+	fi->mchk.cr14 |= inti->mchk.cr14 & (1UL << CR_PENDING_SUBCLASS);
+	fi->mchk.mcic |= inti->mchk.mcic;
+	set_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs);
+	spin_unlock(&fi->lock);
+	kfree(inti);
+	return 0;
+}
+
+static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
+{
+	struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int;
+	struct kvm_s390_float_interrupt *fi;
+	struct list_head *list;
+	int isc;
+
+	kvm->stat.inject_io++;
+	isc = int_word_to_isc(inti->io.io_int_word);
+
+	/*
+	 * We do not use the lock checking variant as this is just a
+	 * performance optimization and we do not hold the lock here.
+	 * This is ok as the code will pick interrupts from both "lists"
+	 * for delivery.
+	 */
+	if (gi->origin && inti->type & KVM_S390_INT_IO_AI_MASK) {
+		VM_EVENT(kvm, 4, "%s isc %1u", "inject: I/O (AI/gisa)", isc);
+		gisa_set_ipm_gisc(gi->origin, isc);
+		kfree(inti);
+		return 0;
+	}
+
+	fi = &kvm->arch.float_int;
+	spin_lock(&fi->lock);
+	if (fi->counters[FIRQ_CNTR_IO] >= KVM_S390_MAX_FLOAT_IRQS) {
+		spin_unlock(&fi->lock);
+		return -EBUSY;
+	}
+	fi->counters[FIRQ_CNTR_IO] += 1;
+
+	if (inti->type & KVM_S390_INT_IO_AI_MASK)
+		VM_EVENT(kvm, 4, "%s", "inject: I/O (AI)");
+	else
+		VM_EVENT(kvm, 4, "inject: I/O %x ss %x schid %04x",
+			inti->io.subchannel_id >> 8,
+			inti->io.subchannel_id >> 1 & 0x3,
+			inti->io.subchannel_nr);
+	list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc];
+	list_add_tail(&inti->list, list);
+	set_bit(isc_to_irq_type(isc), &fi->pending_irqs);
+	spin_unlock(&fi->lock);
+	return 0;
+}
+
+/*
+ * Find a destination VCPU for a floating irq and kick it.
+ */
+static void __floating_irq_kick(struct kvm *kvm, u64 type)
+{
+	struct kvm_vcpu *dst_vcpu;
+	int sigcpu, online_vcpus, nr_tries = 0;
+
+	online_vcpus = atomic_read(&kvm->online_vcpus);
+	if (!online_vcpus)
+		return;
+
+	/* find idle VCPUs first, then round robin */
+	sigcpu = find_first_bit(kvm->arch.idle_mask, online_vcpus);
+	if (sigcpu == online_vcpus) {
+		do {
+			sigcpu = kvm->arch.float_int.next_rr_cpu++;
+			kvm->arch.float_int.next_rr_cpu %= online_vcpus;
+			/* avoid endless loops if all vcpus are stopped */
+			if (nr_tries++ >= online_vcpus)
+				return;
+		} while (is_vcpu_stopped(kvm_get_vcpu(kvm, sigcpu)));
+	}
+	dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
+
+	/* make the VCPU drop out of the SIE, or wake it up if sleeping */
+	switch (type) {
+	case KVM_S390_MCHK:
+		kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_STOP_INT);
+		break;
+	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+		if (!(type & KVM_S390_INT_IO_AI_MASK &&
+		      kvm->arch.gisa_int.origin) ||
+		      kvm_s390_pv_cpu_get_handle(dst_vcpu))
+			kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT);
+		break;
+	default:
+		kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_EXT_INT);
+		break;
+	}
+	kvm_s390_vcpu_wakeup(dst_vcpu);
+}
+
+static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
+{
+	u64 type = READ_ONCE(inti->type);
+	int rc;
+
+	switch (type) {
+	case KVM_S390_MCHK:
+		rc = __inject_float_mchk(kvm, inti);
+		break;
+	case KVM_S390_INT_VIRTIO:
+		rc = __inject_virtio(kvm, inti);
+		break;
+	case KVM_S390_INT_SERVICE:
+		rc = __inject_service(kvm, inti);
+		break;
+	case KVM_S390_INT_PFAULT_DONE:
+		rc = __inject_pfault_done(kvm, inti);
+		break;
+	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+		rc = __inject_io(kvm, inti);
+		break;
+	default:
+		rc = -EINVAL;
+	}
+	if (rc)
+		return rc;
+
+	__floating_irq_kick(kvm, type);
+	return 0;
+}
+
+int kvm_s390_inject_vm(struct kvm *kvm,
+		       struct kvm_s390_interrupt *s390int)
+{
+	struct kvm_s390_interrupt_info *inti;
+	int rc;
+
+	inti = kzalloc(sizeof(*inti), GFP_KERNEL_ACCOUNT);
+	if (!inti)
+		return -ENOMEM;
+
+	inti->type = s390int->type;
+	switch (inti->type) {
+	case KVM_S390_INT_VIRTIO:
+		VM_EVENT(kvm, 5, "inject: virtio parm:%x,parm64:%llx",
+			 s390int->parm, s390int->parm64);
+		inti->ext.ext_params = s390int->parm;
+		inti->ext.ext_params2 = s390int->parm64;
+		break;
+	case KVM_S390_INT_SERVICE:
+		VM_EVENT(kvm, 4, "inject: sclp parm:%x", s390int->parm);
+		inti->ext.ext_params = s390int->parm;
+		break;
+	case KVM_S390_INT_PFAULT_DONE:
+		inti->ext.ext_params2 = s390int->parm64;
+		break;
+	case KVM_S390_MCHK:
+		VM_EVENT(kvm, 3, "inject: machine check mcic 0x%llx",
+			 s390int->parm64);
+		inti->mchk.cr14 = s390int->parm; /* upper bits are not used */
+		inti->mchk.mcic = s390int->parm64;
+		break;
+	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+		inti->io.subchannel_id = s390int->parm >> 16;
+		inti->io.subchannel_nr = s390int->parm & 0x0000ffffu;
+		inti->io.io_int_parm = s390int->parm64 >> 32;
+		inti->io.io_int_word = s390int->parm64 & 0x00000000ffffffffull;
+		break;
+	default:
+		kfree(inti);
+		return -EINVAL;
+	}
+	trace_kvm_s390_inject_vm(s390int->type, s390int->parm, s390int->parm64,
+				 2);
+
+	rc = __inject_vm(kvm, inti);
+	if (rc)
+		kfree(inti);
+	return rc;
+}
+
+int kvm_s390_reinject_io_int(struct kvm *kvm,
+			      struct kvm_s390_interrupt_info *inti)
+{
+	return __inject_vm(kvm, inti);
+}
+
+int s390int_to_s390irq(struct kvm_s390_interrupt *s390int,
+		       struct kvm_s390_irq *irq)
+{
+	irq->type = s390int->type;
+	switch (irq->type) {
+	case KVM_S390_PROGRAM_INT:
+		if (s390int->parm & 0xffff0000)
+			return -EINVAL;
+		irq->u.pgm.code = s390int->parm;
+		break;
+	case KVM_S390_SIGP_SET_PREFIX:
+		irq->u.prefix.address = s390int->parm;
+		break;
+	case KVM_S390_SIGP_STOP:
+		irq->u.stop.flags = s390int->parm;
+		break;
+	case KVM_S390_INT_EXTERNAL_CALL:
+		if (s390int->parm & 0xffff0000)
+			return -EINVAL;
+		irq->u.extcall.code = s390int->parm;
+		break;
+	case KVM_S390_INT_EMERGENCY:
+		if (s390int->parm & 0xffff0000)
+			return -EINVAL;
+		irq->u.emerg.code = s390int->parm;
+		break;
+	case KVM_S390_MCHK:
+		irq->u.mchk.mcic = s390int->parm64;
+		break;
+	case KVM_S390_INT_PFAULT_INIT:
+		irq->u.ext.ext_params = s390int->parm;
+		irq->u.ext.ext_params2 = s390int->parm64;
+		break;
+	case KVM_S390_RESTART:
+	case KVM_S390_INT_CLOCK_COMP:
+	case KVM_S390_INT_CPU_TIMER:
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+int kvm_s390_is_stop_irq_pending(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	return test_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs);
+}
+
+int kvm_s390_is_restart_irq_pending(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	return test_bit(IRQ_PEND_RESTART, &li->pending_irqs);
+}
+
+void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	spin_lock(&li->lock);
+	li->irq.stop.flags = 0;
+	clear_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs);
+	spin_unlock(&li->lock);
+}
+
+static int do_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	int rc;
+
+	switch (irq->type) {
+	case KVM_S390_PROGRAM_INT:
+		rc = __inject_prog(vcpu, irq);
+		break;
+	case KVM_S390_SIGP_SET_PREFIX:
+		rc = __inject_set_prefix(vcpu, irq);
+		break;
+	case KVM_S390_SIGP_STOP:
+		rc = __inject_sigp_stop(vcpu, irq);
+		break;
+	case KVM_S390_RESTART:
+		rc = __inject_sigp_restart(vcpu);
+		break;
+	case KVM_S390_INT_CLOCK_COMP:
+		rc = __inject_ckc(vcpu);
+		break;
+	case KVM_S390_INT_CPU_TIMER:
+		rc = __inject_cpu_timer(vcpu);
+		break;
+	case KVM_S390_INT_EXTERNAL_CALL:
+		rc = __inject_extcall(vcpu, irq);
+		break;
+	case KVM_S390_INT_EMERGENCY:
+		rc = __inject_sigp_emergency(vcpu, irq);
+		break;
+	case KVM_S390_MCHK:
+		rc = __inject_mchk(vcpu, irq);
+		break;
+	case KVM_S390_INT_PFAULT_INIT:
+		rc = __inject_pfault_init(vcpu, irq);
+		break;
+	case KVM_S390_INT_VIRTIO:
+	case KVM_S390_INT_SERVICE:
+	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+	default:
+		rc = -EINVAL;
+	}
+
+	return rc;
+}
+
+int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	int rc;
+
+	spin_lock(&li->lock);
+	rc = do_inject_vcpu(vcpu, irq);
+	spin_unlock(&li->lock);
+	if (!rc)
+		kvm_s390_vcpu_wakeup(vcpu);
+	return rc;
+}
+
+static inline void clear_irq_list(struct list_head *_list)
+{
+	struct kvm_s390_interrupt_info *inti, *n;
+
+	list_for_each_entry_safe(inti, n, _list, list) {
+		list_del(&inti->list);
+		kfree(inti);
+	}
+}
+
+static void inti_to_irq(struct kvm_s390_interrupt_info *inti,
+		       struct kvm_s390_irq *irq)
+{
+	irq->type = inti->type;
+	switch (inti->type) {
+	case KVM_S390_INT_PFAULT_INIT:
+	case KVM_S390_INT_PFAULT_DONE:
+	case KVM_S390_INT_VIRTIO:
+		irq->u.ext = inti->ext;
+		break;
+	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+		irq->u.io = inti->io;
+		break;
+	}
+}
+
+void kvm_s390_clear_float_irqs(struct kvm *kvm)
+{
+	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+	int i;
+
+	mutex_lock(&kvm->lock);
+	if (!kvm_s390_pv_is_protected(kvm))
+		fi->masked_irqs = 0;
+	mutex_unlock(&kvm->lock);
+	spin_lock(&fi->lock);
+	fi->pending_irqs = 0;
+	memset(&fi->srv_signal, 0, sizeof(fi->srv_signal));
+	memset(&fi->mchk, 0, sizeof(fi->mchk));
+	for (i = 0; i < FIRQ_LIST_COUNT; i++)
+		clear_irq_list(&fi->lists[i]);
+	for (i = 0; i < FIRQ_MAX_COUNT; i++)
+		fi->counters[i] = 0;
+	spin_unlock(&fi->lock);
+	kvm_s390_gisa_clear(kvm);
+};
+
+static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len)
+{
+	struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int;
+	struct kvm_s390_interrupt_info *inti;
+	struct kvm_s390_float_interrupt *fi;
+	struct kvm_s390_irq *buf;
+	struct kvm_s390_irq *irq;
+	int max_irqs;
+	int ret = 0;
+	int n = 0;
+	int i;
+
+	if (len > KVM_S390_FLIC_MAX_BUFFER || len == 0)
+		return -EINVAL;
+
+	/*
+	 * We are already using -ENOMEM to signal
+	 * userspace it may retry with a bigger buffer,
+	 * so we need to use something else for this case
+	 */
+	buf = vzalloc(len);
+	if (!buf)
+		return -ENOBUFS;
+
+	max_irqs = len / sizeof(struct kvm_s390_irq);
+
+	if (gi->origin && gisa_get_ipm(gi->origin)) {
+		for (i = 0; i <= MAX_ISC; i++) {
+			if (n == max_irqs) {
+				/* signal userspace to try again */
+				ret = -ENOMEM;
+				goto out_nolock;
+			}
+			if (gisa_tac_ipm_gisc(gi->origin, i)) {
+				irq = (struct kvm_s390_irq *) &buf[n];
+				irq->type = KVM_S390_INT_IO(1, 0, 0, 0);
+				irq->u.io.io_int_word = isc_to_int_word(i);
+				n++;
+			}
+		}
+	}
+	fi = &kvm->arch.float_int;
+	spin_lock(&fi->lock);
+	for (i = 0; i < FIRQ_LIST_COUNT; i++) {
+		list_for_each_entry(inti, &fi->lists[i], list) {
+			if (n == max_irqs) {
+				/* signal userspace to try again */
+				ret = -ENOMEM;
+				goto out;
+			}
+			inti_to_irq(inti, &buf[n]);
+			n++;
+		}
+	}
+	if (test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs) ||
+	    test_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs)) {
+		if (n == max_irqs) {
+			/* signal userspace to try again */
+			ret = -ENOMEM;
+			goto out;
+		}
+		irq = (struct kvm_s390_irq *) &buf[n];
+		irq->type = KVM_S390_INT_SERVICE;
+		irq->u.ext = fi->srv_signal;
+		n++;
+	}
+	if (test_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs)) {
+		if (n == max_irqs) {
+				/* signal userspace to try again */
+				ret = -ENOMEM;
+				goto out;
+		}
+		irq = (struct kvm_s390_irq *) &buf[n];
+		irq->type = KVM_S390_MCHK;
+		irq->u.mchk = fi->mchk;
+		n++;
+}
+
+out:
+	spin_unlock(&fi->lock);
+out_nolock:
+	if (!ret && n > 0) {
+		if (copy_to_user(usrbuf, buf, sizeof(struct kvm_s390_irq) * n))
+			ret = -EFAULT;
+	}
+	vfree(buf);
+
+	return ret < 0 ? ret : n;
+}
+
+static int flic_ais_mode_get_all(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+	struct kvm_s390_ais_all ais;
+
+	if (attr->attr < sizeof(ais))
+		return -EINVAL;
+
+	if (!test_kvm_facility(kvm, 72))
+		return -EOPNOTSUPP;
+
+	mutex_lock(&fi->ais_lock);
+	ais.simm = fi->simm;
+	ais.nimm = fi->nimm;
+	mutex_unlock(&fi->ais_lock);
+
+	if (copy_to_user((void __user *)attr->addr, &ais, sizeof(ais)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int flic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	int r;
+
+	switch (attr->group) {
+	case KVM_DEV_FLIC_GET_ALL_IRQS:
+		r = get_all_floating_irqs(dev->kvm, (u8 __user *) attr->addr,
+					  attr->attr);
+		break;
+	case KVM_DEV_FLIC_AISM_ALL:
+		r = flic_ais_mode_get_all(dev->kvm, attr);
+		break;
+	default:
+		r = -EINVAL;
+	}
+
+	return r;
+}
+
+static inline int copy_irq_from_user(struct kvm_s390_interrupt_info *inti,
+				     u64 addr)
+{
+	struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr;
+	void *target = NULL;
+	void __user *source;
+	u64 size;
+
+	if (get_user(inti->type, (u64 __user *)addr))
+		return -EFAULT;
+
+	switch (inti->type) {
+	case KVM_S390_INT_PFAULT_INIT:
+	case KVM_S390_INT_PFAULT_DONE:
+	case KVM_S390_INT_VIRTIO:
+	case KVM_S390_INT_SERVICE:
+		target = (void *) &inti->ext;
+		source = &uptr->u.ext;
+		size = sizeof(inti->ext);
+		break;
+	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+		target = (void *) &inti->io;
+		source = &uptr->u.io;
+		size = sizeof(inti->io);
+		break;
+	case KVM_S390_MCHK:
+		target = (void *) &inti->mchk;
+		source = &uptr->u.mchk;
+		size = sizeof(inti->mchk);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (copy_from_user(target, source, size))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int enqueue_floating_irq(struct kvm_device *dev,
+				struct kvm_device_attr *attr)
+{
+	struct kvm_s390_interrupt_info *inti = NULL;
+	int r = 0;
+	int len = attr->attr;
+
+	if (len % sizeof(struct kvm_s390_irq) != 0)
+		return -EINVAL;
+	else if (len > KVM_S390_FLIC_MAX_BUFFER)
+		return -EINVAL;
+
+	while (len >= sizeof(struct kvm_s390_irq)) {
+		inti = kzalloc(sizeof(*inti), GFP_KERNEL_ACCOUNT);
+		if (!inti)
+			return -ENOMEM;
+
+		r = copy_irq_from_user(inti, attr->addr);
+		if (r) {
+			kfree(inti);
+			return r;
+		}
+		r = __inject_vm(dev->kvm, inti);
+		if (r) {
+			kfree(inti);
+			return r;
+		}
+		len -= sizeof(struct kvm_s390_irq);
+		attr->addr += sizeof(struct kvm_s390_irq);
+	}
+
+	return r;
+}
+
+static struct s390_io_adapter *get_io_adapter(struct kvm *kvm, unsigned int id)
+{
+	if (id >= MAX_S390_IO_ADAPTERS)
+		return NULL;
+	id = array_index_nospec(id, MAX_S390_IO_ADAPTERS);
+	return kvm->arch.adapters[id];
+}
+
+static int register_io_adapter(struct kvm_device *dev,
+			       struct kvm_device_attr *attr)
+{
+	struct s390_io_adapter *adapter;
+	struct kvm_s390_io_adapter adapter_info;
+
+	if (copy_from_user(&adapter_info,
+			   (void __user *)attr->addr, sizeof(adapter_info)))
+		return -EFAULT;
+
+	if (adapter_info.id >= MAX_S390_IO_ADAPTERS)
+		return -EINVAL;
+
+	adapter_info.id = array_index_nospec(adapter_info.id,
+					     MAX_S390_IO_ADAPTERS);
+
+	if (dev->kvm->arch.adapters[adapter_info.id] != NULL)
+		return -EINVAL;
+
+	adapter = kzalloc(sizeof(*adapter), GFP_KERNEL_ACCOUNT);
+	if (!adapter)
+		return -ENOMEM;
+
+	adapter->id = adapter_info.id;
+	adapter->isc = adapter_info.isc;
+	adapter->maskable = adapter_info.maskable;
+	adapter->masked = false;
+	adapter->swap = adapter_info.swap;
+	adapter->suppressible = (adapter_info.flags) &
+				KVM_S390_ADAPTER_SUPPRESSIBLE;
+	dev->kvm->arch.adapters[adapter->id] = adapter;
+
+	return 0;
+}
+
+int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked)
+{
+	int ret;
+	struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
+
+	if (!adapter || !adapter->maskable)
+		return -EINVAL;
+	ret = adapter->masked;
+	adapter->masked = masked;
+	return ret;
+}
+
+void kvm_s390_destroy_adapters(struct kvm *kvm)
+{
+	int i;
+
+	for (i = 0; i < MAX_S390_IO_ADAPTERS; i++)
+		kfree(kvm->arch.adapters[i]);
+}
+
+static int modify_io_adapter(struct kvm_device *dev,
+			     struct kvm_device_attr *attr)
+{
+	struct kvm_s390_io_adapter_req req;
+	struct s390_io_adapter *adapter;
+	int ret;
+
+	if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req)))
+		return -EFAULT;
+
+	adapter = get_io_adapter(dev->kvm, req.id);
+	if (!adapter)
+		return -EINVAL;
+	switch (req.type) {
+	case KVM_S390_IO_ADAPTER_MASK:
+		ret = kvm_s390_mask_adapter(dev->kvm, req.id, req.mask);
+		if (ret > 0)
+			ret = 0;
+		break;
+	/*
+	 * The following operations are no longer needed and therefore no-ops.
+	 * The gpa to hva translation is done when an IRQ route is set up. The
+	 * set_irq code uses get_user_pages_remote() to do the actual write.
+	 */
+	case KVM_S390_IO_ADAPTER_MAP:
+	case KVM_S390_IO_ADAPTER_UNMAP:
+		ret = 0;
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static int clear_io_irq(struct kvm *kvm, struct kvm_device_attr *attr)
+
+{
+	const u64 isc_mask = 0xffUL << 24; /* all iscs set */
+	u32 schid;
+
+	if (attr->flags)
+		return -EINVAL;
+	if (attr->attr != sizeof(schid))
+		return -EINVAL;
+	if (copy_from_user(&schid, (void __user *) attr->addr, sizeof(schid)))
+		return -EFAULT;
+	if (!schid)
+		return -EINVAL;
+	kfree(kvm_s390_get_io_int(kvm, isc_mask, schid));
+	/*
+	 * If userspace is conforming to the architecture, we can have at most
+	 * one pending I/O interrupt per subchannel, so this is effectively a
+	 * clear all.
+	 */
+	return 0;
+}
+
+static int modify_ais_mode(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+	struct kvm_s390_ais_req req;
+	int ret = 0;
+
+	if (!test_kvm_facility(kvm, 72))
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req)))
+		return -EFAULT;
+
+	if (req.isc > MAX_ISC)
+		return -EINVAL;
+
+	trace_kvm_s390_modify_ais_mode(req.isc,
+				       (fi->simm & AIS_MODE_MASK(req.isc)) ?
+				       (fi->nimm & AIS_MODE_MASK(req.isc)) ?
+				       2 : KVM_S390_AIS_MODE_SINGLE :
+				       KVM_S390_AIS_MODE_ALL, req.mode);
+
+	mutex_lock(&fi->ais_lock);
+	switch (req.mode) {
+	case KVM_S390_AIS_MODE_ALL:
+		fi->simm &= ~AIS_MODE_MASK(req.isc);
+		fi->nimm &= ~AIS_MODE_MASK(req.isc);
+		break;
+	case KVM_S390_AIS_MODE_SINGLE:
+		fi->simm |= AIS_MODE_MASK(req.isc);
+		fi->nimm &= ~AIS_MODE_MASK(req.isc);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+	mutex_unlock(&fi->ais_lock);
+
+	return ret;
+}
+
+static int kvm_s390_inject_airq(struct kvm *kvm,
+				struct s390_io_adapter *adapter)
+{
+	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+	struct kvm_s390_interrupt s390int = {
+		.type = KVM_S390_INT_IO(1, 0, 0, 0),
+		.parm = 0,
+		.parm64 = isc_to_int_word(adapter->isc),
+	};
+	int ret = 0;
+
+	if (!test_kvm_facility(kvm, 72) || !adapter->suppressible)
+		return kvm_s390_inject_vm(kvm, &s390int);
+
+	mutex_lock(&fi->ais_lock);
+	if (fi->nimm & AIS_MODE_MASK(adapter->isc)) {
+		trace_kvm_s390_airq_suppressed(adapter->id, adapter->isc);
+		goto out;
+	}
+
+	ret = kvm_s390_inject_vm(kvm, &s390int);
+	if (!ret && (fi->simm & AIS_MODE_MASK(adapter->isc))) {
+		fi->nimm |= AIS_MODE_MASK(adapter->isc);
+		trace_kvm_s390_modify_ais_mode(adapter->isc,
+					       KVM_S390_AIS_MODE_SINGLE, 2);
+	}
+out:
+	mutex_unlock(&fi->ais_lock);
+	return ret;
+}
+
+static int flic_inject_airq(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	unsigned int id = attr->attr;
+	struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
+
+	if (!adapter)
+		return -EINVAL;
+
+	return kvm_s390_inject_airq(kvm, adapter);
+}
+
+static int flic_ais_mode_set_all(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+	struct kvm_s390_ais_all ais;
+
+	if (!test_kvm_facility(kvm, 72))
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&ais, (void __user *)attr->addr, sizeof(ais)))
+		return -EFAULT;
+
+	mutex_lock(&fi->ais_lock);
+	fi->simm = ais.simm;
+	fi->nimm = ais.nimm;
+	mutex_unlock(&fi->ais_lock);
+
+	return 0;
+}
+
+static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	int r = 0;
+	unsigned long i;
+	struct kvm_vcpu *vcpu;
+
+	switch (attr->group) {
+	case KVM_DEV_FLIC_ENQUEUE:
+		r = enqueue_floating_irq(dev, attr);
+		break;
+	case KVM_DEV_FLIC_CLEAR_IRQS:
+		kvm_s390_clear_float_irqs(dev->kvm);
+		break;
+	case KVM_DEV_FLIC_APF_ENABLE:
+		dev->kvm->arch.gmap->pfault_enabled = 1;
+		break;
+	case KVM_DEV_FLIC_APF_DISABLE_WAIT:
+		dev->kvm->arch.gmap->pfault_enabled = 0;
+		/*
+		 * Make sure no async faults are in transition when
+		 * clearing the queues. So we don't need to worry
+		 * about late coming workers.
+		 */
+		synchronize_srcu(&dev->kvm->srcu);
+		kvm_for_each_vcpu(i, vcpu, dev->kvm)
+			kvm_clear_async_pf_completion_queue(vcpu);
+		break;
+	case KVM_DEV_FLIC_ADAPTER_REGISTER:
+		r = register_io_adapter(dev, attr);
+		break;
+	case KVM_DEV_FLIC_ADAPTER_MODIFY:
+		r = modify_io_adapter(dev, attr);
+		break;
+	case KVM_DEV_FLIC_CLEAR_IO_IRQ:
+		r = clear_io_irq(dev->kvm, attr);
+		break;
+	case KVM_DEV_FLIC_AISM:
+		r = modify_ais_mode(dev->kvm, attr);
+		break;
+	case KVM_DEV_FLIC_AIRQ_INJECT:
+		r = flic_inject_airq(dev->kvm, attr);
+		break;
+	case KVM_DEV_FLIC_AISM_ALL:
+		r = flic_ais_mode_set_all(dev->kvm, attr);
+		break;
+	default:
+		r = -EINVAL;
+	}
+
+	return r;
+}
+
+static int flic_has_attr(struct kvm_device *dev,
+			     struct kvm_device_attr *attr)
+{
+	switch (attr->group) {
+	case KVM_DEV_FLIC_GET_ALL_IRQS:
+	case KVM_DEV_FLIC_ENQUEUE:
+	case KVM_DEV_FLIC_CLEAR_IRQS:
+	case KVM_DEV_FLIC_APF_ENABLE:
+	case KVM_DEV_FLIC_APF_DISABLE_WAIT:
+	case KVM_DEV_FLIC_ADAPTER_REGISTER:
+	case KVM_DEV_FLIC_ADAPTER_MODIFY:
+	case KVM_DEV_FLIC_CLEAR_IO_IRQ:
+	case KVM_DEV_FLIC_AISM:
+	case KVM_DEV_FLIC_AIRQ_INJECT:
+	case KVM_DEV_FLIC_AISM_ALL:
+		return 0;
+	}
+	return -ENXIO;
+}
+
+static int flic_create(struct kvm_device *dev, u32 type)
+{
+	if (!dev)
+		return -EINVAL;
+	if (dev->kvm->arch.flic)
+		return -EINVAL;
+	dev->kvm->arch.flic = dev;
+	return 0;
+}
+
+static void flic_destroy(struct kvm_device *dev)
+{
+	dev->kvm->arch.flic = NULL;
+	kfree(dev);
+}
+
+/* s390 floating irq controller (flic) */
+struct kvm_device_ops kvm_flic_ops = {
+	.name = "kvm-flic",
+	.get_attr = flic_get_attr,
+	.set_attr = flic_set_attr,
+	.has_attr = flic_has_attr,
+	.create = flic_create,
+	.destroy = flic_destroy,
+};
+
+static unsigned long get_ind_bit(__u64 addr, unsigned long bit_nr, bool swap)
+{
+	unsigned long bit;
+
+	bit = bit_nr + (addr % PAGE_SIZE) * 8;
+
+	return swap ? (bit ^ (BITS_PER_LONG - 1)) : bit;
+}
+
+static struct page *get_map_page(struct kvm *kvm, u64 uaddr)
+{
+	struct page *page = NULL;
+
+	mmap_read_lock(kvm->mm);
+	get_user_pages_remote(kvm->mm, uaddr, 1, FOLL_WRITE,
+			      &page, NULL);
+	mmap_read_unlock(kvm->mm);
+	return page;
+}
+
+static int adapter_indicators_set(struct kvm *kvm,
+				  struct s390_io_adapter *adapter,
+				  struct kvm_s390_adapter_int *adapter_int)
+{
+	unsigned long bit;
+	int summary_set, idx;
+	struct page *ind_page, *summary_page;
+	void *map;
+
+	ind_page = get_map_page(kvm, adapter_int->ind_addr);
+	if (!ind_page)
+		return -1;
+	summary_page = get_map_page(kvm, adapter_int->summary_addr);
+	if (!summary_page) {
+		put_page(ind_page);
+		return -1;
+	}
+
+	idx = srcu_read_lock(&kvm->srcu);
+	map = page_address(ind_page);
+	bit = get_ind_bit(adapter_int->ind_addr,
+			  adapter_int->ind_offset, adapter->swap);
+	set_bit(bit, map);
+	mark_page_dirty(kvm, adapter_int->ind_addr >> PAGE_SHIFT);
+	set_page_dirty_lock(ind_page);
+	map = page_address(summary_page);
+	bit = get_ind_bit(adapter_int->summary_addr,
+			  adapter_int->summary_offset, adapter->swap);
+	summary_set = test_and_set_bit(bit, map);
+	mark_page_dirty(kvm, adapter_int->summary_addr >> PAGE_SHIFT);
+	set_page_dirty_lock(summary_page);
+	srcu_read_unlock(&kvm->srcu, idx);
+
+	put_page(ind_page);
+	put_page(summary_page);
+	return summary_set ? 0 : 1;
+}
+
+/*
+ * < 0 - not injected due to error
+ * = 0 - coalesced, summary indicator already active
+ * > 0 - injected interrupt
+ */
+static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e,
+			   struct kvm *kvm, int irq_source_id, int level,
+			   bool line_status)
+{
+	int ret;
+	struct s390_io_adapter *adapter;
+
+	/* We're only interested in the 0->1 transition. */
+	if (!level)
+		return 0;
+	adapter = get_io_adapter(kvm, e->adapter.adapter_id);
+	if (!adapter)
+		return -1;
+	ret = adapter_indicators_set(kvm, adapter, &e->adapter);
+	if ((ret > 0) && !adapter->masked) {
+		ret = kvm_s390_inject_airq(kvm, adapter);
+		if (ret == 0)
+			ret = 1;
+	}
+	return ret;
+}
+
+/*
+ * Inject the machine check to the guest.
+ */
+void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu,
+				     struct mcck_volatile_info *mcck_info)
+{
+	struct kvm_s390_interrupt_info inti;
+	struct kvm_s390_irq irq;
+	struct kvm_s390_mchk_info *mchk;
+	union mci mci;
+	__u64 cr14 = 0;         /* upper bits are not used */
+	int rc;
+
+	mci.val = mcck_info->mcic;
+	if (mci.sr)
+		cr14 |= CR14_RECOVERY_SUBMASK;
+	if (mci.dg)
+		cr14 |= CR14_DEGRADATION_SUBMASK;
+	if (mci.w)
+		cr14 |= CR14_WARNING_SUBMASK;
+
+	mchk = mci.ck ? &inti.mchk : &irq.u.mchk;
+	mchk->cr14 = cr14;
+	mchk->mcic = mcck_info->mcic;
+	mchk->ext_damage_code = mcck_info->ext_damage_code;
+	mchk->failing_storage_address = mcck_info->failing_storage_address;
+	if (mci.ck) {
+		/* Inject the floating machine check */
+		inti.type = KVM_S390_MCHK;
+		rc = __inject_vm(vcpu->kvm, &inti);
+	} else {
+		/* Inject the machine check to specified vcpu */
+		irq.type = KVM_S390_MCHK;
+		rc = kvm_s390_inject_vcpu(vcpu, &irq);
+	}
+	WARN_ON_ONCE(rc);
+}
+
+int kvm_set_routing_entry(struct kvm *kvm,
+			  struct kvm_kernel_irq_routing_entry *e,
+			  const struct kvm_irq_routing_entry *ue)
+{
+	u64 uaddr;
+
+	switch (ue->type) {
+	/* we store the userspace addresses instead of the guest addresses */
+	case KVM_IRQ_ROUTING_S390_ADAPTER:
+		e->set = set_adapter_int;
+		uaddr =  gmap_translate(kvm->arch.gmap, ue->u.adapter.summary_addr);
+		if (uaddr == -EFAULT)
+			return -EFAULT;
+		e->adapter.summary_addr = uaddr;
+		uaddr =  gmap_translate(kvm->arch.gmap, ue->u.adapter.ind_addr);
+		if (uaddr == -EFAULT)
+			return -EFAULT;
+		e->adapter.ind_addr = uaddr;
+		e->adapter.summary_offset = ue->u.adapter.summary_offset;
+		e->adapter.ind_offset = ue->u.adapter.ind_offset;
+		e->adapter.adapter_id = ue->u.adapter.adapter_id;
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
+int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm,
+		int irq_source_id, int level, bool line_status)
+{
+	return -EINVAL;
+}
+
+int kvm_s390_set_irq_state(struct kvm_vcpu *vcpu, void __user *irqstate, int len)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_irq *buf;
+	int r = 0;
+	int n;
+
+	buf = vmalloc(len);
+	if (!buf)
+		return -ENOMEM;
+
+	if (copy_from_user((void *) buf, irqstate, len)) {
+		r = -EFAULT;
+		goto out_free;
+	}
+
+	/*
+	 * Don't allow setting the interrupt state
+	 * when there are already interrupts pending
+	 */
+	spin_lock(&li->lock);
+	if (li->pending_irqs) {
+		r = -EBUSY;
+		goto out_unlock;
+	}
+
+	for (n = 0; n < len / sizeof(*buf); n++) {
+		r = do_inject_vcpu(vcpu, &buf[n]);
+		if (r)
+			break;
+	}
+
+out_unlock:
+	spin_unlock(&li->lock);
+out_free:
+	vfree(buf);
+
+	return r;
+}
+
+static void store_local_irq(struct kvm_s390_local_interrupt *li,
+			    struct kvm_s390_irq *irq,
+			    unsigned long irq_type)
+{
+	switch (irq_type) {
+	case IRQ_PEND_MCHK_EX:
+	case IRQ_PEND_MCHK_REP:
+		irq->type = KVM_S390_MCHK;
+		irq->u.mchk = li->irq.mchk;
+		break;
+	case IRQ_PEND_PROG:
+		irq->type = KVM_S390_PROGRAM_INT;
+		irq->u.pgm = li->irq.pgm;
+		break;
+	case IRQ_PEND_PFAULT_INIT:
+		irq->type = KVM_S390_INT_PFAULT_INIT;
+		irq->u.ext = li->irq.ext;
+		break;
+	case IRQ_PEND_EXT_EXTERNAL:
+		irq->type = KVM_S390_INT_EXTERNAL_CALL;
+		irq->u.extcall = li->irq.extcall;
+		break;
+	case IRQ_PEND_EXT_CLOCK_COMP:
+		irq->type = KVM_S390_INT_CLOCK_COMP;
+		break;
+	case IRQ_PEND_EXT_CPU_TIMER:
+		irq->type = KVM_S390_INT_CPU_TIMER;
+		break;
+	case IRQ_PEND_SIGP_STOP:
+		irq->type = KVM_S390_SIGP_STOP;
+		irq->u.stop = li->irq.stop;
+		break;
+	case IRQ_PEND_RESTART:
+		irq->type = KVM_S390_RESTART;
+		break;
+	case IRQ_PEND_SET_PREFIX:
+		irq->type = KVM_S390_SIGP_SET_PREFIX;
+		irq->u.prefix = li->irq.prefix;
+		break;
+	}
+}
+
+int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len)
+{
+	int scn;
+	DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS);
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	unsigned long pending_irqs;
+	struct kvm_s390_irq irq;
+	unsigned long irq_type;
+	int cpuaddr;
+	int n = 0;
+
+	spin_lock(&li->lock);
+	pending_irqs = li->pending_irqs;
+	memcpy(&sigp_emerg_pending, &li->sigp_emerg_pending,
+	       sizeof(sigp_emerg_pending));
+	spin_unlock(&li->lock);
+
+	for_each_set_bit(irq_type, &pending_irqs, IRQ_PEND_COUNT) {
+		memset(&irq, 0, sizeof(irq));
+		if (irq_type == IRQ_PEND_EXT_EMERGENCY)
+			continue;
+		if (n + sizeof(irq) > len)
+			return -ENOBUFS;
+		store_local_irq(&vcpu->arch.local_int, &irq, irq_type);
+		if (copy_to_user(&buf[n], &irq, sizeof(irq)))
+			return -EFAULT;
+		n += sizeof(irq);
+	}
+
+	if (test_bit(IRQ_PEND_EXT_EMERGENCY, &pending_irqs)) {
+		for_each_set_bit(cpuaddr, sigp_emerg_pending, KVM_MAX_VCPUS) {
+			memset(&irq, 0, sizeof(irq));
+			if (n + sizeof(irq) > len)
+				return -ENOBUFS;
+			irq.type = KVM_S390_INT_EMERGENCY;
+			irq.u.emerg.code = cpuaddr;
+			if (copy_to_user(&buf[n], &irq, sizeof(irq)))
+				return -EFAULT;
+			n += sizeof(irq);
+		}
+	}
+
+	if (sca_ext_call_pending(vcpu, &scn)) {
+		if (n + sizeof(irq) > len)
+			return -ENOBUFS;
+		memset(&irq, 0, sizeof(irq));
+		irq.type = KVM_S390_INT_EXTERNAL_CALL;
+		irq.u.extcall.code = scn;
+		if (copy_to_user(&buf[n], &irq, sizeof(irq)))
+			return -EFAULT;
+		n += sizeof(irq);
+	}
+
+	return n;
+}
+
+static void __airqs_kick_single_vcpu(struct kvm *kvm, u8 deliverable_mask)
+{
+	int vcpu_idx, online_vcpus = atomic_read(&kvm->online_vcpus);
+	struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int;
+	struct kvm_vcpu *vcpu;
+	u8 vcpu_isc_mask;
+
+	for_each_set_bit(vcpu_idx, kvm->arch.idle_mask, online_vcpus) {
+		vcpu = kvm_get_vcpu(kvm, vcpu_idx);
+		if (psw_ioint_disabled(vcpu))
+			continue;
+		vcpu_isc_mask = (u8)(vcpu->arch.sie_block->gcr[6] >> 24);
+		if (deliverable_mask & vcpu_isc_mask) {
+			/* lately kicked but not yet running */
+			if (test_and_set_bit(vcpu_idx, gi->kicked_mask))
+				return;
+			kvm_s390_vcpu_wakeup(vcpu);
+			return;
+		}
+	}
+}
+
+static enum hrtimer_restart gisa_vcpu_kicker(struct hrtimer *timer)
+{
+	struct kvm_s390_gisa_interrupt *gi =
+		container_of(timer, struct kvm_s390_gisa_interrupt, timer);
+	struct kvm *kvm =
+		container_of(gi->origin, struct sie_page2, gisa)->kvm;
+	u8 pending_mask;
+
+	pending_mask = gisa_get_ipm_or_restore_iam(gi);
+	if (pending_mask) {
+		__airqs_kick_single_vcpu(kvm, pending_mask);
+		hrtimer_forward_now(timer, ns_to_ktime(gi->expires));
+		return HRTIMER_RESTART;
+	}
+
+	return HRTIMER_NORESTART;
+}
+
+#define NULL_GISA_ADDR 0x00000000UL
+#define NONE_GISA_ADDR 0x00000001UL
+#define GISA_ADDR_MASK 0xfffff000UL
+
+static void process_gib_alert_list(void)
+{
+	struct kvm_s390_gisa_interrupt *gi;
+	u32 final, gisa_phys, origin = 0UL;
+	struct kvm_s390_gisa *gisa;
+	struct kvm *kvm;
+
+	do {
+		/*
+		 * If the NONE_GISA_ADDR is still stored in the alert list
+		 * origin, we will leave the outer loop. No further GISA has
+		 * been added to the alert list by millicode while processing
+		 * the current alert list.
+		 */
+		final = (origin & NONE_GISA_ADDR);
+		/*
+		 * Cut off the alert list and store the NONE_GISA_ADDR in the
+		 * alert list origin to avoid further GAL interruptions.
+		 * A new alert list can be build up by millicode in parallel
+		 * for guests not in the yet cut-off alert list. When in the
+		 * final loop, store the NULL_GISA_ADDR instead. This will re-
+		 * enable GAL interruptions on the host again.
+		 */
+		origin = xchg(&gib->alert_list_origin,
+			      (!final) ? NONE_GISA_ADDR : NULL_GISA_ADDR);
+		/*
+		 * Loop through the just cut-off alert list and start the
+		 * gisa timers to kick idle vcpus to consume the pending
+		 * interruptions asap.
+		 */
+		while (origin & GISA_ADDR_MASK) {
+			gisa_phys = origin;
+			gisa = phys_to_virt(gisa_phys);
+			origin = gisa->next_alert;
+			gisa->next_alert = gisa_phys;
+			kvm = container_of(gisa, struct sie_page2, gisa)->kvm;
+			gi = &kvm->arch.gisa_int;
+			if (hrtimer_active(&gi->timer))
+				hrtimer_cancel(&gi->timer);
+			hrtimer_start(&gi->timer, 0, HRTIMER_MODE_REL);
+		}
+	} while (!final);
+
+}
+
+void kvm_s390_gisa_clear(struct kvm *kvm)
+{
+	struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int;
+
+	if (!gi->origin)
+		return;
+	gisa_clear_ipm(gi->origin);
+	VM_EVENT(kvm, 3, "gisa 0x%pK cleared", gi->origin);
+}
+
+void kvm_s390_gisa_init(struct kvm *kvm)
+{
+	struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int;
+
+	if (!css_general_characteristics.aiv)
+		return;
+	gi->origin = &kvm->arch.sie_page2->gisa;
+	gi->alert.mask = 0;
+	spin_lock_init(&gi->alert.ref_lock);
+	gi->expires = 50 * 1000; /* 50 usec */
+	hrtimer_init(&gi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	gi->timer.function = gisa_vcpu_kicker;
+	memset(gi->origin, 0, sizeof(struct kvm_s390_gisa));
+	gi->origin->next_alert = (u32)virt_to_phys(gi->origin);
+	VM_EVENT(kvm, 3, "gisa 0x%pK initialized", gi->origin);
+}
+
+void kvm_s390_gisa_enable(struct kvm *kvm)
+{
+	struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int;
+	struct kvm_vcpu *vcpu;
+	unsigned long i;
+	u32 gisa_desc;
+
+	if (gi->origin)
+		return;
+	kvm_s390_gisa_init(kvm);
+	gisa_desc = kvm_s390_get_gisa_desc(kvm);
+	if (!gisa_desc)
+		return;
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		mutex_lock(&vcpu->mutex);
+		vcpu->arch.sie_block->gd = gisa_desc;
+		vcpu->arch.sie_block->eca |= ECA_AIV;
+		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
+			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
+		mutex_unlock(&vcpu->mutex);
+	}
+}
+
+void kvm_s390_gisa_destroy(struct kvm *kvm)
+{
+	struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int;
+	struct kvm_s390_gisa *gisa = gi->origin;
+
+	if (!gi->origin)
+		return;
+	WARN(gi->alert.mask != 0x00,
+	     "unexpected non zero alert.mask 0x%02x",
+	     gi->alert.mask);
+	gi->alert.mask = 0x00;
+	if (gisa_set_iam(gi->origin, gi->alert.mask))
+		process_gib_alert_list();
+	hrtimer_cancel(&gi->timer);
+	gi->origin = NULL;
+	VM_EVENT(kvm, 3, "gisa 0x%pK destroyed", gisa);
+}
+
+void kvm_s390_gisa_disable(struct kvm *kvm)
+{
+	struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int;
+	struct kvm_vcpu *vcpu;
+	unsigned long i;
+
+	if (!gi->origin)
+		return;
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		mutex_lock(&vcpu->mutex);
+		vcpu->arch.sie_block->eca &= ~ECA_AIV;
+		vcpu->arch.sie_block->gd = 0U;
+		mutex_unlock(&vcpu->mutex);
+		VCPU_EVENT(vcpu, 3, "AIV disabled for cpu %03u", vcpu->vcpu_id);
+	}
+	kvm_s390_gisa_destroy(kvm);
+}
+
+/**
+ * kvm_s390_gisc_register - register a guest ISC
+ *
+ * @kvm:  the kernel vm to work with
+ * @gisc: the guest interruption sub class to register
+ *
+ * The function extends the vm specific alert mask to use.
+ * The effective IAM mask in the GISA is updated as well
+ * in case the GISA is not part of the GIB alert list.
+ * It will be updated latest when the IAM gets restored
+ * by gisa_get_ipm_or_restore_iam().
+ *
+ * Returns: the nonspecific ISC (NISC) the gib alert mechanism
+ *          has registered with the channel subsystem.
+ *          -ENODEV in case the vm uses no GISA
+ *          -ERANGE in case the guest ISC is invalid
+ */
+int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc)
+{
+	struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int;
+
+	if (!gi->origin)
+		return -ENODEV;
+	if (gisc > MAX_ISC)
+		return -ERANGE;
+
+	spin_lock(&gi->alert.ref_lock);
+	gi->alert.ref_count[gisc]++;
+	if (gi->alert.ref_count[gisc] == 1) {
+		gi->alert.mask |= 0x80 >> gisc;
+		gisa_set_iam(gi->origin, gi->alert.mask);
+	}
+	spin_unlock(&gi->alert.ref_lock);
+
+	return gib->nisc;
+}
+EXPORT_SYMBOL_GPL(kvm_s390_gisc_register);
+
+/**
+ * kvm_s390_gisc_unregister - unregister a guest ISC
+ *
+ * @kvm:  the kernel vm to work with
+ * @gisc: the guest interruption sub class to register
+ *
+ * The function reduces the vm specific alert mask to use.
+ * The effective IAM mask in the GISA is updated as well
+ * in case the GISA is not part of the GIB alert list.
+ * It will be updated latest when the IAM gets restored
+ * by gisa_get_ipm_or_restore_iam().
+ *
+ * Returns: the nonspecific ISC (NISC) the gib alert mechanism
+ *          has registered with the channel subsystem.
+ *          -ENODEV in case the vm uses no GISA
+ *          -ERANGE in case the guest ISC is invalid
+ *          -EINVAL in case the guest ISC is not registered
+ */
+int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc)
+{
+	struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int;
+	int rc = 0;
+
+	if (!gi->origin)
+		return -ENODEV;
+	if (gisc > MAX_ISC)
+		return -ERANGE;
+
+	spin_lock(&gi->alert.ref_lock);
+	if (gi->alert.ref_count[gisc] == 0) {
+		rc = -EINVAL;
+		goto out;
+	}
+	gi->alert.ref_count[gisc]--;
+	if (gi->alert.ref_count[gisc] == 0) {
+		gi->alert.mask &= ~(0x80 >> gisc);
+		gisa_set_iam(gi->origin, gi->alert.mask);
+	}
+out:
+	spin_unlock(&gi->alert.ref_lock);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(kvm_s390_gisc_unregister);
+
+static void aen_host_forward(unsigned long si)
+{
+	struct kvm_s390_gisa_interrupt *gi;
+	struct zpci_gaite *gaite;
+	struct kvm *kvm;
+
+	gaite = (struct zpci_gaite *)aift->gait +
+		(si * sizeof(struct zpci_gaite));
+	if (gaite->count == 0)
+		return;
+	if (gaite->aisb != 0)
+		set_bit_inv(gaite->aisbo, phys_to_virt(gaite->aisb));
+
+	kvm = kvm_s390_pci_si_to_kvm(aift, si);
+	if (!kvm)
+		return;
+	gi = &kvm->arch.gisa_int;
+
+	if (!(gi->origin->g1.simm & AIS_MODE_MASK(gaite->gisc)) ||
+	    !(gi->origin->g1.nimm & AIS_MODE_MASK(gaite->gisc))) {
+		gisa_set_ipm_gisc(gi->origin, gaite->gisc);
+		if (hrtimer_active(&gi->timer))
+			hrtimer_cancel(&gi->timer);
+		hrtimer_start(&gi->timer, 0, HRTIMER_MODE_REL);
+		kvm->stat.aen_forward++;
+	}
+}
+
+static void aen_process_gait(u8 isc)
+{
+	bool found = false, first = true;
+	union zpci_sic_iib iib = {{0}};
+	unsigned long si, flags;
+
+	spin_lock_irqsave(&aift->gait_lock, flags);
+
+	if (!aift->gait) {
+		spin_unlock_irqrestore(&aift->gait_lock, flags);
+		return;
+	}
+
+	for (si = 0;;) {
+		/* Scan adapter summary indicator bit vector */
+		si = airq_iv_scan(aift->sbv, si, airq_iv_end(aift->sbv));
+		if (si == -1UL) {
+			if (first || found) {
+				/* Re-enable interrupts. */
+				zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, isc,
+						  &iib);
+				first = found = false;
+			} else {
+				/* Interrupts on and all bits processed */
+				break;
+			}
+			found = false;
+			si = 0;
+			/* Scan again after re-enabling interrupts */
+			continue;
+		}
+		found = true;
+		aen_host_forward(si);
+	}
+
+	spin_unlock_irqrestore(&aift->gait_lock, flags);
+}
+
+static void gib_alert_irq_handler(struct airq_struct *airq,
+				  struct tpi_info *tpi_info)
+{
+	struct tpi_adapter_info *info = (struct tpi_adapter_info *)tpi_info;
+
+	inc_irq_stat(IRQIO_GAL);
+
+	if ((info->forward || info->error) &&
+	    IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
+		aen_process_gait(info->isc);
+		if (info->aism != 0)
+			process_gib_alert_list();
+	} else {
+		process_gib_alert_list();
+	}
+}
+
+static struct airq_struct gib_alert_irq = {
+	.handler = gib_alert_irq_handler,
+};
+
+void kvm_s390_gib_destroy(void)
+{
+	if (!gib)
+		return;
+	if (kvm_s390_pci_interp_allowed() && aift) {
+		mutex_lock(&aift->aift_lock);
+		kvm_s390_pci_aen_exit();
+		mutex_unlock(&aift->aift_lock);
+	}
+	chsc_sgib(0);
+	unregister_adapter_interrupt(&gib_alert_irq);
+	free_page((unsigned long)gib);
+	gib = NULL;
+}
+
+int __init kvm_s390_gib_init(u8 nisc)
+{
+	u32 gib_origin;
+	int rc = 0;
+
+	if (!css_general_characteristics.aiv) {
+		KVM_EVENT(3, "%s", "gib not initialized, no AIV facility");
+		goto out;
+	}
+
+	gib = (struct kvm_s390_gib *)get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
+	if (!gib) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	gib_alert_irq.isc = nisc;
+	if (register_adapter_interrupt(&gib_alert_irq)) {
+		pr_err("Registering the GIB alert interruption handler failed\n");
+		rc = -EIO;
+		goto out_free_gib;
+	}
+	/* adapter interrupts used for AP (applicable here) don't use the LSI */
+	*gib_alert_irq.lsi_ptr = 0xff;
+
+	gib->nisc = nisc;
+	gib_origin = virt_to_phys(gib);
+	if (chsc_sgib(gib_origin)) {
+		pr_err("Associating the GIB with the AIV facility failed\n");
+		free_page((unsigned long)gib);
+		gib = NULL;
+		rc = -EIO;
+		goto out_unreg_gal;
+	}
+
+	if (kvm_s390_pci_interp_allowed()) {
+		if (kvm_s390_pci_aen_init(nisc)) {
+			pr_err("Initializing AEN for PCI failed\n");
+			rc = -EIO;
+			goto out_unreg_gal;
+		}
+	}
+
+	KVM_EVENT(3, "gib 0x%pK (nisc=%d) initialized", gib, gib->nisc);
+	goto out;
+
+out_unreg_gal:
+	unregister_adapter_interrupt(&gib_alert_irq);
+out_free_gib:
+	free_page((unsigned long)gib);
+	gib = NULL;
+out:
+	return rc;
+}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
new file mode 100644
index 0000000000..b3f17e014c
--- /dev/null
+++ b/arch/s390/kvm/kvm-s390.c
@@ -0,0 +1,5895 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * hosting IBM Z kernel virtual machines (s390x)
+ *
+ * Copyright IBM Corp. 2008, 2020
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ *               Christian Borntraeger <borntraeger@de.ibm.com>
+ *               Christian Ehrhardt <ehrhardt@de.ibm.com>
+ *               Jason J. Herne <jjherne@us.ibm.com>
+ */
+
+#define KMSG_COMPONENT "kvm-s390"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/compiler.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/hrtimer.h>
+#include <linux/init.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/mman.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/timer.h>
+#include <linux/vmalloc.h>
+#include <linux/bitmap.h>
+#include <linux/sched/signal.h>
+#include <linux/string.h>
+#include <linux/pgtable.h>
+#include <linux/mmu_notifier.h>
+
+#include <asm/asm-offsets.h>
+#include <asm/lowcore.h>
+#include <asm/stp.h>
+#include <asm/gmap.h>
+#include <asm/nmi.h>
+#include <asm/switch_to.h>
+#include <asm/isc.h>
+#include <asm/sclp.h>
+#include <asm/cpacf.h>
+#include <asm/timex.h>
+#include <asm/ap.h>
+#include <asm/uv.h>
+#include <asm/fpu/api.h>
+#include "kvm-s390.h"
+#include "gaccess.h"
+#include "pci.h"
+
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+#include "trace-s390.h"
+
+#define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
+#define LOCAL_IRQS 32
+#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
+			   (KVM_MAX_VCPUS + LOCAL_IRQS))
+
+const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
+	KVM_GENERIC_VM_STATS(),
+	STATS_DESC_COUNTER(VM, inject_io),
+	STATS_DESC_COUNTER(VM, inject_float_mchk),
+	STATS_DESC_COUNTER(VM, inject_pfault_done),
+	STATS_DESC_COUNTER(VM, inject_service_signal),
+	STATS_DESC_COUNTER(VM, inject_virtio),
+	STATS_DESC_COUNTER(VM, aen_forward)
+};
+
+const struct kvm_stats_header kvm_vm_stats_header = {
+	.name_size = KVM_STATS_NAME_SIZE,
+	.num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
+	.id_offset = sizeof(struct kvm_stats_header),
+	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
+	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
+		       sizeof(kvm_vm_stats_desc),
+};
+
+const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
+	KVM_GENERIC_VCPU_STATS(),
+	STATS_DESC_COUNTER(VCPU, exit_userspace),
+	STATS_DESC_COUNTER(VCPU, exit_null),
+	STATS_DESC_COUNTER(VCPU, exit_external_request),
+	STATS_DESC_COUNTER(VCPU, exit_io_request),
+	STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
+	STATS_DESC_COUNTER(VCPU, exit_stop_request),
+	STATS_DESC_COUNTER(VCPU, exit_validity),
+	STATS_DESC_COUNTER(VCPU, exit_instruction),
+	STATS_DESC_COUNTER(VCPU, exit_pei),
+	STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
+	STATS_DESC_COUNTER(VCPU, instruction_lctl),
+	STATS_DESC_COUNTER(VCPU, instruction_lctlg),
+	STATS_DESC_COUNTER(VCPU, instruction_stctl),
+	STATS_DESC_COUNTER(VCPU, instruction_stctg),
+	STATS_DESC_COUNTER(VCPU, exit_program_interruption),
+	STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
+	STATS_DESC_COUNTER(VCPU, exit_operation_exception),
+	STATS_DESC_COUNTER(VCPU, deliver_ckc),
+	STATS_DESC_COUNTER(VCPU, deliver_cputm),
+	STATS_DESC_COUNTER(VCPU, deliver_external_call),
+	STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
+	STATS_DESC_COUNTER(VCPU, deliver_service_signal),
+	STATS_DESC_COUNTER(VCPU, deliver_virtio),
+	STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
+	STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
+	STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
+	STATS_DESC_COUNTER(VCPU, deliver_program),
+	STATS_DESC_COUNTER(VCPU, deliver_io),
+	STATS_DESC_COUNTER(VCPU, deliver_machine_check),
+	STATS_DESC_COUNTER(VCPU, exit_wait_state),
+	STATS_DESC_COUNTER(VCPU, inject_ckc),
+	STATS_DESC_COUNTER(VCPU, inject_cputm),
+	STATS_DESC_COUNTER(VCPU, inject_external_call),
+	STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
+	STATS_DESC_COUNTER(VCPU, inject_mchk),
+	STATS_DESC_COUNTER(VCPU, inject_pfault_init),
+	STATS_DESC_COUNTER(VCPU, inject_program),
+	STATS_DESC_COUNTER(VCPU, inject_restart),
+	STATS_DESC_COUNTER(VCPU, inject_set_prefix),
+	STATS_DESC_COUNTER(VCPU, inject_stop_signal),
+	STATS_DESC_COUNTER(VCPU, instruction_epsw),
+	STATS_DESC_COUNTER(VCPU, instruction_gs),
+	STATS_DESC_COUNTER(VCPU, instruction_io_other),
+	STATS_DESC_COUNTER(VCPU, instruction_lpsw),
+	STATS_DESC_COUNTER(VCPU, instruction_lpswe),
+	STATS_DESC_COUNTER(VCPU, instruction_pfmf),
+	STATS_DESC_COUNTER(VCPU, instruction_ptff),
+	STATS_DESC_COUNTER(VCPU, instruction_sck),
+	STATS_DESC_COUNTER(VCPU, instruction_sckpf),
+	STATS_DESC_COUNTER(VCPU, instruction_stidp),
+	STATS_DESC_COUNTER(VCPU, instruction_spx),
+	STATS_DESC_COUNTER(VCPU, instruction_stpx),
+	STATS_DESC_COUNTER(VCPU, instruction_stap),
+	STATS_DESC_COUNTER(VCPU, instruction_iske),
+	STATS_DESC_COUNTER(VCPU, instruction_ri),
+	STATS_DESC_COUNTER(VCPU, instruction_rrbe),
+	STATS_DESC_COUNTER(VCPU, instruction_sske),
+	STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
+	STATS_DESC_COUNTER(VCPU, instruction_stsi),
+	STATS_DESC_COUNTER(VCPU, instruction_stfl),
+	STATS_DESC_COUNTER(VCPU, instruction_tb),
+	STATS_DESC_COUNTER(VCPU, instruction_tpi),
+	STATS_DESC_COUNTER(VCPU, instruction_tprot),
+	STATS_DESC_COUNTER(VCPU, instruction_tsch),
+	STATS_DESC_COUNTER(VCPU, instruction_sie),
+	STATS_DESC_COUNTER(VCPU, instruction_essa),
+	STATS_DESC_COUNTER(VCPU, instruction_sthyi),
+	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
+	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
+	STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
+	STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
+	STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
+	STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
+	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
+	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
+	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
+	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
+	STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
+	STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
+	STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
+	STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
+	STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
+	STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
+	STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
+	STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
+	STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
+	STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
+	STATS_DESC_COUNTER(VCPU, diag_9c_forward),
+	STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
+	STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
+	STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
+	STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
+	STATS_DESC_COUNTER(VCPU, pfault_sync)
+};
+
+const struct kvm_stats_header kvm_vcpu_stats_header = {
+	.name_size = KVM_STATS_NAME_SIZE,
+	.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
+	.id_offset = sizeof(struct kvm_stats_header),
+	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
+	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
+		       sizeof(kvm_vcpu_stats_desc),
+};
+
+/* allow nested virtualization in KVM (if enabled by user space) */
+static int nested;
+module_param(nested, int, S_IRUGO);
+MODULE_PARM_DESC(nested, "Nested virtualization support");
+
+/* allow 1m huge page guest backing, if !nested */
+static int hpage;
+module_param(hpage, int, 0444);
+MODULE_PARM_DESC(hpage, "1m huge page backing support");
+
+/* maximum percentage of steal time for polling.  >100 is treated like 100 */
+static u8 halt_poll_max_steal = 10;
+module_param(halt_poll_max_steal, byte, 0644);
+MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
+
+/* if set to true, the GISA will be initialized and used if available */
+static bool use_gisa  = true;
+module_param(use_gisa, bool, 0644);
+MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
+
+/* maximum diag9c forwarding per second */
+unsigned int diag9c_forwarding_hz;
+module_param(diag9c_forwarding_hz, uint, 0644);
+MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
+
+/*
+ * allow asynchronous deinit for protected guests; enable by default since
+ * the feature is opt-in anyway
+ */
+static int async_destroy = 1;
+module_param(async_destroy, int, 0444);
+MODULE_PARM_DESC(async_destroy, "Asynchronous destroy for protected guests");
+
+/*
+ * For now we handle at most 16 double words as this is what the s390 base
+ * kernel handles and stores in the prefix page. If we ever need to go beyond
+ * this, this requires changes to code, but the external uapi can stay.
+ */
+#define SIZE_INTERNAL 16
+
+/*
+ * Base feature mask that defines default mask for facilities. Consists of the
+ * defines in FACILITIES_KVM and the non-hypervisor managed bits.
+ */
+static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
+/*
+ * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
+ * and defines the facilities that can be enabled via a cpu model.
+ */
+static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
+
+static unsigned long kvm_s390_fac_size(void)
+{
+	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
+	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
+	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
+		sizeof(stfle_fac_list));
+
+	return SIZE_INTERNAL;
+}
+
+/* available cpu features supported by kvm */
+static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
+/* available subfunctions indicated via query / "test bit" */
+static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
+
+static struct gmap_notifier gmap_notifier;
+static struct gmap_notifier vsie_gmap_notifier;
+debug_info_t *kvm_s390_dbf;
+debug_info_t *kvm_s390_dbf_uv;
+
+/* Section: not file related */
+/* forward declarations */
+static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
+			      unsigned long end);
+static int sca_switch_to_extended(struct kvm *kvm);
+
+static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
+{
+	u8 delta_idx = 0;
+
+	/*
+	 * The TOD jumps by delta, we have to compensate this by adding
+	 * -delta to the epoch.
+	 */
+	delta = -delta;
+
+	/* sign-extension - we're adding to signed values below */
+	if ((s64)delta < 0)
+		delta_idx = -1;
+
+	scb->epoch += delta;
+	if (scb->ecd & ECD_MEF) {
+		scb->epdx += delta_idx;
+		if (scb->epoch < delta)
+			scb->epdx += 1;
+	}
+}
+
+/*
+ * This callback is executed during stop_machine(). All CPUs are therefore
+ * temporarily stopped. In order not to change guest behavior, we have to
+ * disable preemption whenever we touch the epoch of kvm and the VCPUs,
+ * so a CPU won't be stopped while calculating with the epoch.
+ */
+static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
+			  void *v)
+{
+	struct kvm *kvm;
+	struct kvm_vcpu *vcpu;
+	unsigned long i;
+	unsigned long long *delta = v;
+
+	list_for_each_entry(kvm, &vm_list, vm_list) {
+		kvm_for_each_vcpu(i, vcpu, kvm) {
+			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
+			if (i == 0) {
+				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
+				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
+			}
+			if (vcpu->arch.cputm_enabled)
+				vcpu->arch.cputm_start += *delta;
+			if (vcpu->arch.vsie_block)
+				kvm_clock_sync_scb(vcpu->arch.vsie_block,
+						   *delta);
+		}
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block kvm_clock_notifier = {
+	.notifier_call = kvm_clock_sync,
+};
+
+static void allow_cpu_feat(unsigned long nr)
+{
+	set_bit_inv(nr, kvm_s390_available_cpu_feat);
+}
+
+static inline int plo_test_bit(unsigned char nr)
+{
+	unsigned long function = (unsigned long)nr | 0x100;
+	int cc;
+
+	asm volatile(
+		"	lgr	0,%[function]\n"
+		/* Parameter registers are ignored for "test bit" */
+		"	plo	0,0,0,0(0)\n"
+		"	ipm	%0\n"
+		"	srl	%0,28\n"
+		: "=d" (cc)
+		: [function] "d" (function)
+		: "cc", "0");
+	return cc == 0;
+}
+
+static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
+{
+	asm volatile(
+		"	lghi	0,0\n"
+		"	lgr	1,%[query]\n"
+		/* Parameter registers are ignored */
+		"	.insn	rrf,%[opc] << 16,2,4,6,0\n"
+		:
+		: [query] "d" ((unsigned long)query), [opc] "i" (opcode)
+		: "cc", "memory", "0", "1");
+}
+
+#define INSN_SORTL 0xb938
+#define INSN_DFLTCC 0xb939
+
+static void __init kvm_s390_cpu_feat_init(void)
+{
+	int i;
+
+	for (i = 0; i < 256; ++i) {
+		if (plo_test_bit(i))
+			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
+	}
+
+	if (test_facility(28)) /* TOD-clock steering */
+		ptff(kvm_s390_available_subfunc.ptff,
+		     sizeof(kvm_s390_available_subfunc.ptff),
+		     PTFF_QAF);
+
+	if (test_facility(17)) { /* MSA */
+		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.kmac);
+		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.kmc);
+		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.km);
+		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.kimd);
+		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.klmd);
+	}
+	if (test_facility(76)) /* MSA3 */
+		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.pckmo);
+	if (test_facility(77)) { /* MSA4 */
+		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.kmctr);
+		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.kmf);
+		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.kmo);
+		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.pcc);
+	}
+	if (test_facility(57)) /* MSA5 */
+		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.ppno);
+
+	if (test_facility(146)) /* MSA8 */
+		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.kma);
+
+	if (test_facility(155)) /* MSA9 */
+		__cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.kdsa);
+
+	if (test_facility(150)) /* SORTL */
+		__insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
+
+	if (test_facility(151)) /* DFLTCC */
+		__insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
+
+	if (MACHINE_HAS_ESOP)
+		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
+	/*
+	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
+	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
+	 */
+	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
+	    !test_facility(3) || !nested)
+		return;
+	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
+	if (sclp.has_64bscao)
+		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
+	if (sclp.has_siif)
+		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
+	if (sclp.has_gpere)
+		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
+	if (sclp.has_gsls)
+		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
+	if (sclp.has_ib)
+		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
+	if (sclp.has_cei)
+		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
+	if (sclp.has_ibs)
+		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
+	if (sclp.has_kss)
+		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
+	/*
+	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
+	 * all skey handling functions read/set the skey from the PGSTE
+	 * instead of the real storage key.
+	 *
+	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
+	 * pages being detected as preserved although they are resident.
+	 *
+	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
+	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
+	 *
+	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
+	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
+	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
+	 *
+	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
+	 * cannot easily shadow the SCA because of the ipte lock.
+	 */
+}
+
+static int __init __kvm_s390_init(void)
+{
+	int rc = -ENOMEM;
+
+	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
+	if (!kvm_s390_dbf)
+		return -ENOMEM;
+
+	kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
+	if (!kvm_s390_dbf_uv)
+		goto err_kvm_uv;
+
+	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
+	    debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
+		goto err_debug_view;
+
+	kvm_s390_cpu_feat_init();
+
+	/* Register floating interrupt controller interface. */
+	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
+	if (rc) {
+		pr_err("A FLIC registration call failed with rc=%d\n", rc);
+		goto err_flic;
+	}
+
+	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
+		rc = kvm_s390_pci_init();
+		if (rc) {
+			pr_err("Unable to allocate AIFT for PCI\n");
+			goto err_pci;
+		}
+	}
+
+	rc = kvm_s390_gib_init(GAL_ISC);
+	if (rc)
+		goto err_gib;
+
+	gmap_notifier.notifier_call = kvm_gmap_notifier;
+	gmap_register_pte_notifier(&gmap_notifier);
+	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
+	gmap_register_pte_notifier(&vsie_gmap_notifier);
+	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
+				       &kvm_clock_notifier);
+
+	return 0;
+
+err_gib:
+	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
+		kvm_s390_pci_exit();
+err_pci:
+err_flic:
+err_debug_view:
+	debug_unregister(kvm_s390_dbf_uv);
+err_kvm_uv:
+	debug_unregister(kvm_s390_dbf);
+	return rc;
+}
+
+static void __kvm_s390_exit(void)
+{
+	gmap_unregister_pte_notifier(&gmap_notifier);
+	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
+	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
+					 &kvm_clock_notifier);
+
+	kvm_s390_gib_destroy();
+	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
+		kvm_s390_pci_exit();
+	debug_unregister(kvm_s390_dbf);
+	debug_unregister(kvm_s390_dbf_uv);
+}
+
+/* Section: device related */
+long kvm_arch_dev_ioctl(struct file *filp,
+			unsigned int ioctl, unsigned long arg)
+{
+	if (ioctl == KVM_S390_ENABLE_SIE)
+		return s390_enable_sie();
+	return -EINVAL;
+}
+
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
+{
+	int r;
+
+	switch (ext) {
+	case KVM_CAP_S390_PSW:
+	case KVM_CAP_S390_GMAP:
+	case KVM_CAP_SYNC_MMU:
+#ifdef CONFIG_KVM_S390_UCONTROL
+	case KVM_CAP_S390_UCONTROL:
+#endif
+	case KVM_CAP_ASYNC_PF:
+	case KVM_CAP_SYNC_REGS:
+	case KVM_CAP_ONE_REG:
+	case KVM_CAP_ENABLE_CAP:
+	case KVM_CAP_S390_CSS_SUPPORT:
+	case KVM_CAP_IOEVENTFD:
+	case KVM_CAP_DEVICE_CTRL:
+	case KVM_CAP_S390_IRQCHIP:
+	case KVM_CAP_VM_ATTRIBUTES:
+	case KVM_CAP_MP_STATE:
+	case KVM_CAP_IMMEDIATE_EXIT:
+	case KVM_CAP_S390_INJECT_IRQ:
+	case KVM_CAP_S390_USER_SIGP:
+	case KVM_CAP_S390_USER_STSI:
+	case KVM_CAP_S390_SKEYS:
+	case KVM_CAP_S390_IRQ_STATE:
+	case KVM_CAP_S390_USER_INSTR0:
+	case KVM_CAP_S390_CMMA_MIGRATION:
+	case KVM_CAP_S390_AIS:
+	case KVM_CAP_S390_AIS_MIGRATION:
+	case KVM_CAP_S390_VCPU_RESETS:
+	case KVM_CAP_SET_GUEST_DEBUG:
+	case KVM_CAP_S390_DIAG318:
+	case KVM_CAP_IRQFD_RESAMPLE:
+		r = 1;
+		break;
+	case KVM_CAP_SET_GUEST_DEBUG2:
+		r = KVM_GUESTDBG_VALID_MASK;
+		break;
+	case KVM_CAP_S390_HPAGE_1M:
+		r = 0;
+		if (hpage && !kvm_is_ucontrol(kvm))
+			r = 1;
+		break;
+	case KVM_CAP_S390_MEM_OP:
+		r = MEM_OP_MAX_SIZE;
+		break;
+	case KVM_CAP_S390_MEM_OP_EXTENSION:
+		/*
+		 * Flag bits indicating which extensions are supported.
+		 * If r > 0, the base extension must also be supported/indicated,
+		 * in order to maintain backwards compatibility.
+		 */
+		r = KVM_S390_MEMOP_EXTENSION_CAP_BASE |
+		    KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG;
+		break;
+	case KVM_CAP_NR_VCPUS:
+	case KVM_CAP_MAX_VCPUS:
+	case KVM_CAP_MAX_VCPU_ID:
+		r = KVM_S390_BSCA_CPU_SLOTS;
+		if (!kvm_s390_use_sca_entries())
+			r = KVM_MAX_VCPUS;
+		else if (sclp.has_esca && sclp.has_64bscao)
+			r = KVM_S390_ESCA_CPU_SLOTS;
+		if (ext == KVM_CAP_NR_VCPUS)
+			r = min_t(unsigned int, num_online_cpus(), r);
+		break;
+	case KVM_CAP_S390_COW:
+		r = MACHINE_HAS_ESOP;
+		break;
+	case KVM_CAP_S390_VECTOR_REGISTERS:
+		r = MACHINE_HAS_VX;
+		break;
+	case KVM_CAP_S390_RI:
+		r = test_facility(64);
+		break;
+	case KVM_CAP_S390_GS:
+		r = test_facility(133);
+		break;
+	case KVM_CAP_S390_BPB:
+		r = test_facility(82);
+		break;
+	case KVM_CAP_S390_PROTECTED_ASYNC_DISABLE:
+		r = async_destroy && is_prot_virt_host();
+		break;
+	case KVM_CAP_S390_PROTECTED:
+		r = is_prot_virt_host();
+		break;
+	case KVM_CAP_S390_PROTECTED_DUMP: {
+		u64 pv_cmds_dump[] = {
+			BIT_UVC_CMD_DUMP_INIT,
+			BIT_UVC_CMD_DUMP_CONFIG_STOR_STATE,
+			BIT_UVC_CMD_DUMP_CPU,
+			BIT_UVC_CMD_DUMP_COMPLETE,
+		};
+		int i;
+
+		r = is_prot_virt_host();
+
+		for (i = 0; i < ARRAY_SIZE(pv_cmds_dump); i++) {
+			if (!test_bit_inv(pv_cmds_dump[i],
+					  (unsigned long *)&uv_info.inst_calls_list)) {
+				r = 0;
+				break;
+			}
+		}
+		break;
+	}
+	case KVM_CAP_S390_ZPCI_OP:
+		r = kvm_s390_pci_interp_allowed();
+		break;
+	case KVM_CAP_S390_CPU_TOPOLOGY:
+		r = test_facility(11);
+		break;
+	default:
+		r = 0;
+	}
+	return r;
+}
+
+void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
+{
+	int i;
+	gfn_t cur_gfn, last_gfn;
+	unsigned long gaddr, vmaddr;
+	struct gmap *gmap = kvm->arch.gmap;
+	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
+
+	/* Loop over all guest segments */
+	cur_gfn = memslot->base_gfn;
+	last_gfn = memslot->base_gfn + memslot->npages;
+	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
+		gaddr = gfn_to_gpa(cur_gfn);
+		vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
+		if (kvm_is_error_hva(vmaddr))
+			continue;
+
+		bitmap_zero(bitmap, _PAGE_ENTRIES);
+		gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
+		for (i = 0; i < _PAGE_ENTRIES; i++) {
+			if (test_bit(i, bitmap))
+				mark_page_dirty(kvm, cur_gfn + i);
+		}
+
+		if (fatal_signal_pending(current))
+			return;
+		cond_resched();
+	}
+}
+
+/* Section: vm related */
+static void sca_del_vcpu(struct kvm_vcpu *vcpu);
+
+/*
+ * Get (and clear) the dirty memory log for a memory slot.
+ */
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
+			       struct kvm_dirty_log *log)
+{
+	int r;
+	unsigned long n;
+	struct kvm_memory_slot *memslot;
+	int is_dirty;
+
+	if (kvm_is_ucontrol(kvm))
+		return -EINVAL;
+
+	mutex_lock(&kvm->slots_lock);
+
+	r = -EINVAL;
+	if (log->slot >= KVM_USER_MEM_SLOTS)
+		goto out;
+
+	r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
+	if (r)
+		goto out;
+
+	/* Clear the dirty log */
+	if (is_dirty) {
+		n = kvm_dirty_bitmap_bytes(memslot);
+		memset(memslot->dirty_bitmap, 0, n);
+	}
+	r = 0;
+out:
+	mutex_unlock(&kvm->slots_lock);
+	return r;
+}
+
+static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
+{
+	unsigned long i;
+	struct kvm_vcpu *vcpu;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
+	}
+}
+
+int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
+{
+	int r;
+
+	if (cap->flags)
+		return -EINVAL;
+
+	switch (cap->cap) {
+	case KVM_CAP_S390_IRQCHIP:
+		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
+		kvm->arch.use_irqchip = 1;
+		r = 0;
+		break;
+	case KVM_CAP_S390_USER_SIGP:
+		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
+		kvm->arch.user_sigp = 1;
+		r = 0;
+		break;
+	case KVM_CAP_S390_VECTOR_REGISTERS:
+		mutex_lock(&kvm->lock);
+		if (kvm->created_vcpus) {
+			r = -EBUSY;
+		} else if (MACHINE_HAS_VX) {
+			set_kvm_facility(kvm->arch.model.fac_mask, 129);
+			set_kvm_facility(kvm->arch.model.fac_list, 129);
+			if (test_facility(134)) {
+				set_kvm_facility(kvm->arch.model.fac_mask, 134);
+				set_kvm_facility(kvm->arch.model.fac_list, 134);
+			}
+			if (test_facility(135)) {
+				set_kvm_facility(kvm->arch.model.fac_mask, 135);
+				set_kvm_facility(kvm->arch.model.fac_list, 135);
+			}
+			if (test_facility(148)) {
+				set_kvm_facility(kvm->arch.model.fac_mask, 148);
+				set_kvm_facility(kvm->arch.model.fac_list, 148);
+			}
+			if (test_facility(152)) {
+				set_kvm_facility(kvm->arch.model.fac_mask, 152);
+				set_kvm_facility(kvm->arch.model.fac_list, 152);
+			}
+			if (test_facility(192)) {
+				set_kvm_facility(kvm->arch.model.fac_mask, 192);
+				set_kvm_facility(kvm->arch.model.fac_list, 192);
+			}
+			r = 0;
+		} else
+			r = -EINVAL;
+		mutex_unlock(&kvm->lock);
+		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
+			 r ? "(not available)" : "(success)");
+		break;
+	case KVM_CAP_S390_RI:
+		r = -EINVAL;
+		mutex_lock(&kvm->lock);
+		if (kvm->created_vcpus) {
+			r = -EBUSY;
+		} else if (test_facility(64)) {
+			set_kvm_facility(kvm->arch.model.fac_mask, 64);
+			set_kvm_facility(kvm->arch.model.fac_list, 64);
+			r = 0;
+		}
+		mutex_unlock(&kvm->lock);
+		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
+			 r ? "(not available)" : "(success)");
+		break;
+	case KVM_CAP_S390_AIS:
+		mutex_lock(&kvm->lock);
+		if (kvm->created_vcpus) {
+			r = -EBUSY;
+		} else {
+			set_kvm_facility(kvm->arch.model.fac_mask, 72);
+			set_kvm_facility(kvm->arch.model.fac_list, 72);
+			r = 0;
+		}
+		mutex_unlock(&kvm->lock);
+		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
+			 r ? "(not available)" : "(success)");
+		break;
+	case KVM_CAP_S390_GS:
+		r = -EINVAL;
+		mutex_lock(&kvm->lock);
+		if (kvm->created_vcpus) {
+			r = -EBUSY;
+		} else if (test_facility(133)) {
+			set_kvm_facility(kvm->arch.model.fac_mask, 133);
+			set_kvm_facility(kvm->arch.model.fac_list, 133);
+			r = 0;
+		}
+		mutex_unlock(&kvm->lock);
+		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
+			 r ? "(not available)" : "(success)");
+		break;
+	case KVM_CAP_S390_HPAGE_1M:
+		mutex_lock(&kvm->lock);
+		if (kvm->created_vcpus)
+			r = -EBUSY;
+		else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
+			r = -EINVAL;
+		else {
+			r = 0;
+			mmap_write_lock(kvm->mm);
+			kvm->mm->context.allow_gmap_hpage_1m = 1;
+			mmap_write_unlock(kvm->mm);
+			/*
+			 * We might have to create fake 4k page
+			 * tables. To avoid that the hardware works on
+			 * stale PGSTEs, we emulate these instructions.
+			 */
+			kvm->arch.use_skf = 0;
+			kvm->arch.use_pfmfi = 0;
+		}
+		mutex_unlock(&kvm->lock);
+		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
+			 r ? "(not available)" : "(success)");
+		break;
+	case KVM_CAP_S390_USER_STSI:
+		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
+		kvm->arch.user_stsi = 1;
+		r = 0;
+		break;
+	case KVM_CAP_S390_USER_INSTR0:
+		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
+		kvm->arch.user_instr0 = 1;
+		icpt_operexc_on_all_vcpus(kvm);
+		r = 0;
+		break;
+	case KVM_CAP_S390_CPU_TOPOLOGY:
+		r = -EINVAL;
+		mutex_lock(&kvm->lock);
+		if (kvm->created_vcpus) {
+			r = -EBUSY;
+		} else if (test_facility(11)) {
+			set_kvm_facility(kvm->arch.model.fac_mask, 11);
+			set_kvm_facility(kvm->arch.model.fac_list, 11);
+			r = 0;
+		}
+		mutex_unlock(&kvm->lock);
+		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_CPU_TOPOLOGY %s",
+			 r ? "(not available)" : "(success)");
+		break;
+	default:
+		r = -EINVAL;
+		break;
+	}
+	return r;
+}
+
+static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	int ret;
+
+	switch (attr->attr) {
+	case KVM_S390_VM_MEM_LIMIT_SIZE:
+		ret = 0;
+		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
+			 kvm->arch.mem_limit);
+		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
+			ret = -EFAULT;
+		break;
+	default:
+		ret = -ENXIO;
+		break;
+	}
+	return ret;
+}
+
+static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	int ret;
+	unsigned int idx;
+	switch (attr->attr) {
+	case KVM_S390_VM_MEM_ENABLE_CMMA:
+		ret = -ENXIO;
+		if (!sclp.has_cmma)
+			break;
+
+		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
+		mutex_lock(&kvm->lock);
+		if (kvm->created_vcpus)
+			ret = -EBUSY;
+		else if (kvm->mm->context.allow_gmap_hpage_1m)
+			ret = -EINVAL;
+		else {
+			kvm->arch.use_cmma = 1;
+			/* Not compatible with cmma. */
+			kvm->arch.use_pfmfi = 0;
+			ret = 0;
+		}
+		mutex_unlock(&kvm->lock);
+		break;
+	case KVM_S390_VM_MEM_CLR_CMMA:
+		ret = -ENXIO;
+		if (!sclp.has_cmma)
+			break;
+		ret = -EINVAL;
+		if (!kvm->arch.use_cmma)
+			break;
+
+		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
+		mutex_lock(&kvm->lock);
+		idx = srcu_read_lock(&kvm->srcu);
+		s390_reset_cmma(kvm->arch.gmap->mm);
+		srcu_read_unlock(&kvm->srcu, idx);
+		mutex_unlock(&kvm->lock);
+		ret = 0;
+		break;
+	case KVM_S390_VM_MEM_LIMIT_SIZE: {
+		unsigned long new_limit;
+
+		if (kvm_is_ucontrol(kvm))
+			return -EINVAL;
+
+		if (get_user(new_limit, (u64 __user *)attr->addr))
+			return -EFAULT;
+
+		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
+		    new_limit > kvm->arch.mem_limit)
+			return -E2BIG;
+
+		if (!new_limit)
+			return -EINVAL;
+
+		/* gmap_create takes last usable address */
+		if (new_limit != KVM_S390_NO_MEM_LIMIT)
+			new_limit -= 1;
+
+		ret = -EBUSY;
+		mutex_lock(&kvm->lock);
+		if (!kvm->created_vcpus) {
+			/* gmap_create will round the limit up */
+			struct gmap *new = gmap_create(current->mm, new_limit);
+
+			if (!new) {
+				ret = -ENOMEM;
+			} else {
+				gmap_remove(kvm->arch.gmap);
+				new->private = kvm;
+				kvm->arch.gmap = new;
+				ret = 0;
+			}
+		}
+		mutex_unlock(&kvm->lock);
+		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
+		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
+			 (void *) kvm->arch.gmap->asce);
+		break;
+	}
+	default:
+		ret = -ENXIO;
+		break;
+	}
+	return ret;
+}
+
+static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
+
+void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
+{
+	struct kvm_vcpu *vcpu;
+	unsigned long i;
+
+	kvm_s390_vcpu_block_all(kvm);
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		kvm_s390_vcpu_crypto_setup(vcpu);
+		/* recreate the shadow crycb by leaving the VSIE handler */
+		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
+	}
+
+	kvm_s390_vcpu_unblock_all(kvm);
+}
+
+static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	mutex_lock(&kvm->lock);
+	switch (attr->attr) {
+	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
+		if (!test_kvm_facility(kvm, 76)) {
+			mutex_unlock(&kvm->lock);
+			return -EINVAL;
+		}
+		get_random_bytes(
+			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
+			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
+		kvm->arch.crypto.aes_kw = 1;
+		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
+		break;
+	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
+		if (!test_kvm_facility(kvm, 76)) {
+			mutex_unlock(&kvm->lock);
+			return -EINVAL;
+		}
+		get_random_bytes(
+			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
+			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
+		kvm->arch.crypto.dea_kw = 1;
+		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
+		break;
+	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
+		if (!test_kvm_facility(kvm, 76)) {
+			mutex_unlock(&kvm->lock);
+			return -EINVAL;
+		}
+		kvm->arch.crypto.aes_kw = 0;
+		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
+			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
+		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
+		break;
+	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
+		if (!test_kvm_facility(kvm, 76)) {
+			mutex_unlock(&kvm->lock);
+			return -EINVAL;
+		}
+		kvm->arch.crypto.dea_kw = 0;
+		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
+			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
+		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
+		break;
+	case KVM_S390_VM_CRYPTO_ENABLE_APIE:
+		if (!ap_instructions_available()) {
+			mutex_unlock(&kvm->lock);
+			return -EOPNOTSUPP;
+		}
+		kvm->arch.crypto.apie = 1;
+		break;
+	case KVM_S390_VM_CRYPTO_DISABLE_APIE:
+		if (!ap_instructions_available()) {
+			mutex_unlock(&kvm->lock);
+			return -EOPNOTSUPP;
+		}
+		kvm->arch.crypto.apie = 0;
+		break;
+	default:
+		mutex_unlock(&kvm->lock);
+		return -ENXIO;
+	}
+
+	kvm_s390_vcpu_crypto_reset_all(kvm);
+	mutex_unlock(&kvm->lock);
+	return 0;
+}
+
+static void kvm_s390_vcpu_pci_setup(struct kvm_vcpu *vcpu)
+{
+	/* Only set the ECB bits after guest requests zPCI interpretation */
+	if (!vcpu->kvm->arch.use_zpci_interp)
+		return;
+
+	vcpu->arch.sie_block->ecb2 |= ECB2_ZPCI_LSI;
+	vcpu->arch.sie_block->ecb3 |= ECB3_AISII + ECB3_AISI;
+}
+
+void kvm_s390_vcpu_pci_enable_interp(struct kvm *kvm)
+{
+	struct kvm_vcpu *vcpu;
+	unsigned long i;
+
+	lockdep_assert_held(&kvm->lock);
+
+	if (!kvm_s390_pci_interp_allowed())
+		return;
+
+	/*
+	 * If host is configured for PCI and the necessary facilities are
+	 * available, turn on interpretation for the life of this guest
+	 */
+	kvm->arch.use_zpci_interp = 1;
+
+	kvm_s390_vcpu_block_all(kvm);
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		kvm_s390_vcpu_pci_setup(vcpu);
+		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
+	}
+
+	kvm_s390_vcpu_unblock_all(kvm);
+}
+
+static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
+{
+	unsigned long cx;
+	struct kvm_vcpu *vcpu;
+
+	kvm_for_each_vcpu(cx, vcpu, kvm)
+		kvm_s390_sync_request(req, vcpu);
+}
+
+/*
+ * Must be called with kvm->srcu held to avoid races on memslots, and with
+ * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
+ */
+static int kvm_s390_vm_start_migration(struct kvm *kvm)
+{
+	struct kvm_memory_slot *ms;
+	struct kvm_memslots *slots;
+	unsigned long ram_pages = 0;
+	int bkt;
+
+	/* migration mode already enabled */
+	if (kvm->arch.migration_mode)
+		return 0;
+	slots = kvm_memslots(kvm);
+	if (!slots || kvm_memslots_empty(slots))
+		return -EINVAL;
+
+	if (!kvm->arch.use_cmma) {
+		kvm->arch.migration_mode = 1;
+		return 0;
+	}
+	/* mark all the pages in active slots as dirty */
+	kvm_for_each_memslot(ms, bkt, slots) {
+		if (!ms->dirty_bitmap)
+			return -EINVAL;
+		/*
+		 * The second half of the bitmap is only used on x86,
+		 * and would be wasted otherwise, so we put it to good
+		 * use here to keep track of the state of the storage
+		 * attributes.
+		 */
+		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
+		ram_pages += ms->npages;
+	}
+	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
+	kvm->arch.migration_mode = 1;
+	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
+	return 0;
+}
+
+/*
+ * Must be called with kvm->slots_lock to avoid races with ourselves and
+ * kvm_s390_vm_start_migration.
+ */
+static int kvm_s390_vm_stop_migration(struct kvm *kvm)
+{
+	/* migration mode already disabled */
+	if (!kvm->arch.migration_mode)
+		return 0;
+	kvm->arch.migration_mode = 0;
+	if (kvm->arch.use_cmma)
+		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
+	return 0;
+}
+
+static int kvm_s390_vm_set_migration(struct kvm *kvm,
+				     struct kvm_device_attr *attr)
+{
+	int res = -ENXIO;
+
+	mutex_lock(&kvm->slots_lock);
+	switch (attr->attr) {
+	case KVM_S390_VM_MIGRATION_START:
+		res = kvm_s390_vm_start_migration(kvm);
+		break;
+	case KVM_S390_VM_MIGRATION_STOP:
+		res = kvm_s390_vm_stop_migration(kvm);
+		break;
+	default:
+		break;
+	}
+	mutex_unlock(&kvm->slots_lock);
+
+	return res;
+}
+
+static int kvm_s390_vm_get_migration(struct kvm *kvm,
+				     struct kvm_device_attr *attr)
+{
+	u64 mig = kvm->arch.migration_mode;
+
+	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
+		return -ENXIO;
+
+	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
+		return -EFAULT;
+	return 0;
+}
+
+static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
+
+static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	struct kvm_s390_vm_tod_clock gtod;
+
+	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
+		return -EFAULT;
+
+	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
+		return -EINVAL;
+	__kvm_s390_set_tod_clock(kvm, &gtod);
+
+	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
+		gtod.epoch_idx, gtod.tod);
+
+	return 0;
+}
+
+static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	u8 gtod_high;
+
+	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
+					   sizeof(gtod_high)))
+		return -EFAULT;
+
+	if (gtod_high != 0)
+		return -EINVAL;
+	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
+
+	return 0;
+}
+
+static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	struct kvm_s390_vm_tod_clock gtod = { 0 };
+
+	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
+			   sizeof(gtod.tod)))
+		return -EFAULT;
+
+	__kvm_s390_set_tod_clock(kvm, &gtod);
+	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
+	return 0;
+}
+
+static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	int ret;
+
+	if (attr->flags)
+		return -EINVAL;
+
+	mutex_lock(&kvm->lock);
+	/*
+	 * For protected guests, the TOD is managed by the ultravisor, so trying
+	 * to change it will never bring the expected results.
+	 */
+	if (kvm_s390_pv_is_protected(kvm)) {
+		ret = -EOPNOTSUPP;
+		goto out_unlock;
+	}
+
+	switch (attr->attr) {
+	case KVM_S390_VM_TOD_EXT:
+		ret = kvm_s390_set_tod_ext(kvm, attr);
+		break;
+	case KVM_S390_VM_TOD_HIGH:
+		ret = kvm_s390_set_tod_high(kvm, attr);
+		break;
+	case KVM_S390_VM_TOD_LOW:
+		ret = kvm_s390_set_tod_low(kvm, attr);
+		break;
+	default:
+		ret = -ENXIO;
+		break;
+	}
+
+out_unlock:
+	mutex_unlock(&kvm->lock);
+	return ret;
+}
+
+static void kvm_s390_get_tod_clock(struct kvm *kvm,
+				   struct kvm_s390_vm_tod_clock *gtod)
+{
+	union tod_clock clk;
+
+	preempt_disable();
+
+	store_tod_clock_ext(&clk);
+
+	gtod->tod = clk.tod + kvm->arch.epoch;
+	gtod->epoch_idx = 0;
+	if (test_kvm_facility(kvm, 139)) {
+		gtod->epoch_idx = clk.ei + kvm->arch.epdx;
+		if (gtod->tod < clk.tod)
+			gtod->epoch_idx += 1;
+	}
+
+	preempt_enable();
+}
+
+static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	struct kvm_s390_vm_tod_clock gtod;
+
+	memset(&gtod, 0, sizeof(gtod));
+	kvm_s390_get_tod_clock(kvm, &gtod);
+	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
+		return -EFAULT;
+
+	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
+		gtod.epoch_idx, gtod.tod);
+	return 0;
+}
+
+static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	u8 gtod_high = 0;
+
+	if (copy_to_user((void __user *)attr->addr, &gtod_high,
+					 sizeof(gtod_high)))
+		return -EFAULT;
+	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
+
+	return 0;
+}
+
+static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	u64 gtod;
+
+	gtod = kvm_s390_get_tod_clock_fast(kvm);
+	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
+		return -EFAULT;
+	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
+
+	return 0;
+}
+
+static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	int ret;
+
+	if (attr->flags)
+		return -EINVAL;
+
+	switch (attr->attr) {
+	case KVM_S390_VM_TOD_EXT:
+		ret = kvm_s390_get_tod_ext(kvm, attr);
+		break;
+	case KVM_S390_VM_TOD_HIGH:
+		ret = kvm_s390_get_tod_high(kvm, attr);
+		break;
+	case KVM_S390_VM_TOD_LOW:
+		ret = kvm_s390_get_tod_low(kvm, attr);
+		break;
+	default:
+		ret = -ENXIO;
+		break;
+	}
+	return ret;
+}
+
+static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	struct kvm_s390_vm_cpu_processor *proc;
+	u16 lowest_ibc, unblocked_ibc;
+	int ret = 0;
+
+	mutex_lock(&kvm->lock);
+	if (kvm->created_vcpus) {
+		ret = -EBUSY;
+		goto out;
+	}
+	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
+	if (!proc) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	if (!copy_from_user(proc, (void __user *)attr->addr,
+			    sizeof(*proc))) {
+		kvm->arch.model.cpuid = proc->cpuid;
+		lowest_ibc = sclp.ibc >> 16 & 0xfff;
+		unblocked_ibc = sclp.ibc & 0xfff;
+		if (lowest_ibc && proc->ibc) {
+			if (proc->ibc > unblocked_ibc)
+				kvm->arch.model.ibc = unblocked_ibc;
+			else if (proc->ibc < lowest_ibc)
+				kvm->arch.model.ibc = lowest_ibc;
+			else
+				kvm->arch.model.ibc = proc->ibc;
+		}
+		memcpy(kvm->arch.model.fac_list, proc->fac_list,
+		       S390_ARCH_FAC_LIST_SIZE_BYTE);
+		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
+			 kvm->arch.model.ibc,
+			 kvm->arch.model.cpuid);
+		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
+			 kvm->arch.model.fac_list[0],
+			 kvm->arch.model.fac_list[1],
+			 kvm->arch.model.fac_list[2]);
+	} else
+		ret = -EFAULT;
+	kfree(proc);
+out:
+	mutex_unlock(&kvm->lock);
+	return ret;
+}
+
+static int kvm_s390_set_processor_feat(struct kvm *kvm,
+				       struct kvm_device_attr *attr)
+{
+	struct kvm_s390_vm_cpu_feat data;
+
+	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
+		return -EFAULT;
+	if (!bitmap_subset((unsigned long *) data.feat,
+			   kvm_s390_available_cpu_feat,
+			   KVM_S390_VM_CPU_FEAT_NR_BITS))
+		return -EINVAL;
+
+	mutex_lock(&kvm->lock);
+	if (kvm->created_vcpus) {
+		mutex_unlock(&kvm->lock);
+		return -EBUSY;
+	}
+	bitmap_from_arr64(kvm->arch.cpu_feat, data.feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
+	mutex_unlock(&kvm->lock);
+	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
+			 data.feat[0],
+			 data.feat[1],
+			 data.feat[2]);
+	return 0;
+}
+
+static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
+					  struct kvm_device_attr *attr)
+{
+	mutex_lock(&kvm->lock);
+	if (kvm->created_vcpus) {
+		mutex_unlock(&kvm->lock);
+		return -EBUSY;
+	}
+
+	if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
+			   sizeof(struct kvm_s390_vm_cpu_subfunc))) {
+		mutex_unlock(&kvm->lock);
+		return -EFAULT;
+	}
+	mutex_unlock(&kvm->lock);
+
+	VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
+	VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
+	VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
+	VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
+	VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
+	VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
+	VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
+	VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
+	VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
+	VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
+	VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
+	VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
+	VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
+	VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
+	VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
+	VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
+	VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
+
+	return 0;
+}
+
+#define KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK	\
+(						\
+	((struct kvm_s390_vm_cpu_uv_feat){	\
+		.ap = 1,			\
+		.ap_intr = 1,			\
+	})					\
+	.feat					\
+)
+
+static int kvm_s390_set_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	struct kvm_s390_vm_cpu_uv_feat __user *ptr = (void __user *)attr->addr;
+	unsigned long data, filter;
+
+	filter = uv_info.uv_feature_indications & KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK;
+	if (get_user(data, &ptr->feat))
+		return -EFAULT;
+	if (!bitmap_subset(&data, &filter, KVM_S390_VM_CPU_UV_FEAT_NR_BITS))
+		return -EINVAL;
+
+	mutex_lock(&kvm->lock);
+	if (kvm->created_vcpus) {
+		mutex_unlock(&kvm->lock);
+		return -EBUSY;
+	}
+	kvm->arch.model.uv_feat_guest.feat = data;
+	mutex_unlock(&kvm->lock);
+
+	VM_EVENT(kvm, 3, "SET: guest UV-feat: 0x%16.16lx", data);
+
+	return 0;
+}
+
+static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	int ret = -ENXIO;
+
+	switch (attr->attr) {
+	case KVM_S390_VM_CPU_PROCESSOR:
+		ret = kvm_s390_set_processor(kvm, attr);
+		break;
+	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
+		ret = kvm_s390_set_processor_feat(kvm, attr);
+		break;
+	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
+		ret = kvm_s390_set_processor_subfunc(kvm, attr);
+		break;
+	case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
+		ret = kvm_s390_set_uv_feat(kvm, attr);
+		break;
+	}
+	return ret;
+}
+
+static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	struct kvm_s390_vm_cpu_processor *proc;
+	int ret = 0;
+
+	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
+	if (!proc) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	proc->cpuid = kvm->arch.model.cpuid;
+	proc->ibc = kvm->arch.model.ibc;
+	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
+	       S390_ARCH_FAC_LIST_SIZE_BYTE);
+	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
+		 kvm->arch.model.ibc,
+		 kvm->arch.model.cpuid);
+	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
+		 kvm->arch.model.fac_list[0],
+		 kvm->arch.model.fac_list[1],
+		 kvm->arch.model.fac_list[2]);
+	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
+		ret = -EFAULT;
+	kfree(proc);
+out:
+	return ret;
+}
+
+static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	struct kvm_s390_vm_cpu_machine *mach;
+	int ret = 0;
+
+	mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
+	if (!mach) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	get_cpu_id((struct cpuid *) &mach->cpuid);
+	mach->ibc = sclp.ibc;
+	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
+	       S390_ARCH_FAC_LIST_SIZE_BYTE);
+	memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
+	       sizeof(stfle_fac_list));
+	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
+		 kvm->arch.model.ibc,
+		 kvm->arch.model.cpuid);
+	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
+		 mach->fac_mask[0],
+		 mach->fac_mask[1],
+		 mach->fac_mask[2]);
+	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
+		 mach->fac_list[0],
+		 mach->fac_list[1],
+		 mach->fac_list[2]);
+	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
+		ret = -EFAULT;
+	kfree(mach);
+out:
+	return ret;
+}
+
+static int kvm_s390_get_processor_feat(struct kvm *kvm,
+				       struct kvm_device_attr *attr)
+{
+	struct kvm_s390_vm_cpu_feat data;
+
+	bitmap_to_arr64(data.feat, kvm->arch.cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
+	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
+		return -EFAULT;
+	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
+			 data.feat[0],
+			 data.feat[1],
+			 data.feat[2]);
+	return 0;
+}
+
+static int kvm_s390_get_machine_feat(struct kvm *kvm,
+				     struct kvm_device_attr *attr)
+{
+	struct kvm_s390_vm_cpu_feat data;
+
+	bitmap_to_arr64(data.feat, kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
+	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
+		return -EFAULT;
+	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
+			 data.feat[0],
+			 data.feat[1],
+			 data.feat[2]);
+	return 0;
+}
+
+static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
+					  struct kvm_device_attr *attr)
+{
+	if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
+	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
+		return -EFAULT;
+
+	VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
+	VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
+	VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
+	VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
+	VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
+	VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
+	VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
+	VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
+	VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
+	VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
+	VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
+	VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
+	VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
+	VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
+	VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
+	VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
+	VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
+		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
+
+	return 0;
+}
+
+static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
+					struct kvm_device_attr *attr)
+{
+	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
+	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
+		return -EFAULT;
+
+	VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
+		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
+		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
+		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
+	VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
+		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
+	VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
+		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
+	VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
+		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
+	VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
+		 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
+	VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
+		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
+	VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
+		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
+	VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
+		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
+	VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
+		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
+	VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
+		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
+	VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
+		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
+	VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
+		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
+	VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
+		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
+	VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
+		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
+	VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
+		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
+	VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
+		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
+		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
+		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
+	VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
+		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
+		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
+		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
+
+	return 0;
+}
+
+static int kvm_s390_get_processor_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	struct kvm_s390_vm_cpu_uv_feat __user *dst = (void __user *)attr->addr;
+	unsigned long feat = kvm->arch.model.uv_feat_guest.feat;
+
+	if (put_user(feat, &dst->feat))
+		return -EFAULT;
+	VM_EVENT(kvm, 3, "GET: guest UV-feat: 0x%16.16lx", feat);
+
+	return 0;
+}
+
+static int kvm_s390_get_machine_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	struct kvm_s390_vm_cpu_uv_feat __user *dst = (void __user *)attr->addr;
+	unsigned long feat;
+
+	BUILD_BUG_ON(sizeof(*dst) != sizeof(uv_info.uv_feature_indications));
+
+	feat = uv_info.uv_feature_indications & KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK;
+	if (put_user(feat, &dst->feat))
+		return -EFAULT;
+	VM_EVENT(kvm, 3, "GET: guest UV-feat: 0x%16.16lx", feat);
+
+	return 0;
+}
+
+static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	int ret = -ENXIO;
+
+	switch (attr->attr) {
+	case KVM_S390_VM_CPU_PROCESSOR:
+		ret = kvm_s390_get_processor(kvm, attr);
+		break;
+	case KVM_S390_VM_CPU_MACHINE:
+		ret = kvm_s390_get_machine(kvm, attr);
+		break;
+	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
+		ret = kvm_s390_get_processor_feat(kvm, attr);
+		break;
+	case KVM_S390_VM_CPU_MACHINE_FEAT:
+		ret = kvm_s390_get_machine_feat(kvm, attr);
+		break;
+	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
+		ret = kvm_s390_get_processor_subfunc(kvm, attr);
+		break;
+	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
+		ret = kvm_s390_get_machine_subfunc(kvm, attr);
+		break;
+	case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
+		ret = kvm_s390_get_processor_uv_feat(kvm, attr);
+		break;
+	case KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST:
+		ret = kvm_s390_get_machine_uv_feat(kvm, attr);
+		break;
+	}
+	return ret;
+}
+
+/**
+ * kvm_s390_update_topology_change_report - update CPU topology change report
+ * @kvm: guest KVM description
+ * @val: set or clear the MTCR bit
+ *
+ * Updates the Multiprocessor Topology-Change-Report bit to signal
+ * the guest with a topology change.
+ * This is only relevant if the topology facility is present.
+ *
+ * The SCA version, bsca or esca, doesn't matter as offset is the same.
+ */
+static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val)
+{
+	union sca_utility new, old;
+	struct bsca_block *sca;
+
+	read_lock(&kvm->arch.sca_lock);
+	sca = kvm->arch.sca;
+	do {
+		old = READ_ONCE(sca->utility);
+		new = old;
+		new.mtcr = val;
+	} while (cmpxchg(&sca->utility.val, old.val, new.val) != old.val);
+	read_unlock(&kvm->arch.sca_lock);
+}
+
+static int kvm_s390_set_topo_change_indication(struct kvm *kvm,
+					       struct kvm_device_attr *attr)
+{
+	if (!test_kvm_facility(kvm, 11))
+		return -ENXIO;
+
+	kvm_s390_update_topology_change_report(kvm, !!attr->attr);
+	return 0;
+}
+
+static int kvm_s390_get_topo_change_indication(struct kvm *kvm,
+					       struct kvm_device_attr *attr)
+{
+	u8 topo;
+
+	if (!test_kvm_facility(kvm, 11))
+		return -ENXIO;
+
+	read_lock(&kvm->arch.sca_lock);
+	topo = ((struct bsca_block *)kvm->arch.sca)->utility.mtcr;
+	read_unlock(&kvm->arch.sca_lock);
+
+	return put_user(topo, (u8 __user *)attr->addr);
+}
+
+static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	int ret;
+
+	switch (attr->group) {
+	case KVM_S390_VM_MEM_CTRL:
+		ret = kvm_s390_set_mem_control(kvm, attr);
+		break;
+	case KVM_S390_VM_TOD:
+		ret = kvm_s390_set_tod(kvm, attr);
+		break;
+	case KVM_S390_VM_CPU_MODEL:
+		ret = kvm_s390_set_cpu_model(kvm, attr);
+		break;
+	case KVM_S390_VM_CRYPTO:
+		ret = kvm_s390_vm_set_crypto(kvm, attr);
+		break;
+	case KVM_S390_VM_MIGRATION:
+		ret = kvm_s390_vm_set_migration(kvm, attr);
+		break;
+	case KVM_S390_VM_CPU_TOPOLOGY:
+		ret = kvm_s390_set_topo_change_indication(kvm, attr);
+		break;
+	default:
+		ret = -ENXIO;
+		break;
+	}
+
+	return ret;
+}
+
+static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	int ret;
+
+	switch (attr->group) {
+	case KVM_S390_VM_MEM_CTRL:
+		ret = kvm_s390_get_mem_control(kvm, attr);
+		break;
+	case KVM_S390_VM_TOD:
+		ret = kvm_s390_get_tod(kvm, attr);
+		break;
+	case KVM_S390_VM_CPU_MODEL:
+		ret = kvm_s390_get_cpu_model(kvm, attr);
+		break;
+	case KVM_S390_VM_MIGRATION:
+		ret = kvm_s390_vm_get_migration(kvm, attr);
+		break;
+	case KVM_S390_VM_CPU_TOPOLOGY:
+		ret = kvm_s390_get_topo_change_indication(kvm, attr);
+		break;
+	default:
+		ret = -ENXIO;
+		break;
+	}
+
+	return ret;
+}
+
+static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	int ret;
+
+	switch (attr->group) {
+	case KVM_S390_VM_MEM_CTRL:
+		switch (attr->attr) {
+		case KVM_S390_VM_MEM_ENABLE_CMMA:
+		case KVM_S390_VM_MEM_CLR_CMMA:
+			ret = sclp.has_cmma ? 0 : -ENXIO;
+			break;
+		case KVM_S390_VM_MEM_LIMIT_SIZE:
+			ret = 0;
+			break;
+		default:
+			ret = -ENXIO;
+			break;
+		}
+		break;
+	case KVM_S390_VM_TOD:
+		switch (attr->attr) {
+		case KVM_S390_VM_TOD_LOW:
+		case KVM_S390_VM_TOD_HIGH:
+			ret = 0;
+			break;
+		default:
+			ret = -ENXIO;
+			break;
+		}
+		break;
+	case KVM_S390_VM_CPU_MODEL:
+		switch (attr->attr) {
+		case KVM_S390_VM_CPU_PROCESSOR:
+		case KVM_S390_VM_CPU_MACHINE:
+		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
+		case KVM_S390_VM_CPU_MACHINE_FEAT:
+		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
+		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
+		case KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST:
+		case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
+			ret = 0;
+			break;
+		default:
+			ret = -ENXIO;
+			break;
+		}
+		break;
+	case KVM_S390_VM_CRYPTO:
+		switch (attr->attr) {
+		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
+		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
+		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
+		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
+			ret = 0;
+			break;
+		case KVM_S390_VM_CRYPTO_ENABLE_APIE:
+		case KVM_S390_VM_CRYPTO_DISABLE_APIE:
+			ret = ap_instructions_available() ? 0 : -ENXIO;
+			break;
+		default:
+			ret = -ENXIO;
+			break;
+		}
+		break;
+	case KVM_S390_VM_MIGRATION:
+		ret = 0;
+		break;
+	case KVM_S390_VM_CPU_TOPOLOGY:
+		ret = test_kvm_facility(kvm, 11) ? 0 : -ENXIO;
+		break;
+	default:
+		ret = -ENXIO;
+		break;
+	}
+
+	return ret;
+}
+
+static int kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
+{
+	uint8_t *keys;
+	uint64_t hva;
+	int srcu_idx, i, r = 0;
+
+	if (args->flags != 0)
+		return -EINVAL;
+
+	/* Is this guest using storage keys? */
+	if (!mm_uses_skeys(current->mm))
+		return KVM_S390_GET_SKEYS_NONE;
+
+	/* Enforce sane limit on memory allocation */
+	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
+		return -EINVAL;
+
+	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
+	if (!keys)
+		return -ENOMEM;
+
+	mmap_read_lock(current->mm);
+	srcu_idx = srcu_read_lock(&kvm->srcu);
+	for (i = 0; i < args->count; i++) {
+		hva = gfn_to_hva(kvm, args->start_gfn + i);
+		if (kvm_is_error_hva(hva)) {
+			r = -EFAULT;
+			break;
+		}
+
+		r = get_guest_storage_key(current->mm, hva, &keys[i]);
+		if (r)
+			break;
+	}
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
+	mmap_read_unlock(current->mm);
+
+	if (!r) {
+		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
+				 sizeof(uint8_t) * args->count);
+		if (r)
+			r = -EFAULT;
+	}
+
+	kvfree(keys);
+	return r;
+}
+
+static int kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
+{
+	uint8_t *keys;
+	uint64_t hva;
+	int srcu_idx, i, r = 0;
+	bool unlocked;
+
+	if (args->flags != 0)
+		return -EINVAL;
+
+	/* Enforce sane limit on memory allocation */
+	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
+		return -EINVAL;
+
+	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
+	if (!keys)
+		return -ENOMEM;
+
+	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
+			   sizeof(uint8_t) * args->count);
+	if (r) {
+		r = -EFAULT;
+		goto out;
+	}
+
+	/* Enable storage key handling for the guest */
+	r = s390_enable_skey();
+	if (r)
+		goto out;
+
+	i = 0;
+	mmap_read_lock(current->mm);
+	srcu_idx = srcu_read_lock(&kvm->srcu);
+        while (i < args->count) {
+		unlocked = false;
+		hva = gfn_to_hva(kvm, args->start_gfn + i);
+		if (kvm_is_error_hva(hva)) {
+			r = -EFAULT;
+			break;
+		}
+
+		/* Lowest order bit is reserved */
+		if (keys[i] & 0x01) {
+			r = -EINVAL;
+			break;
+		}
+
+		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
+		if (r) {
+			r = fixup_user_fault(current->mm, hva,
+					     FAULT_FLAG_WRITE, &unlocked);
+			if (r)
+				break;
+		}
+		if (!r)
+			i++;
+	}
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
+	mmap_read_unlock(current->mm);
+out:
+	kvfree(keys);
+	return r;
+}
+
+/*
+ * Base address and length must be sent at the start of each block, therefore
+ * it's cheaper to send some clean data, as long as it's less than the size of
+ * two longs.
+ */
+#define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
+/* for consistency */
+#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
+
+static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
+			      u8 *res, unsigned long bufsize)
+{
+	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
+
+	args->count = 0;
+	while (args->count < bufsize) {
+		hva = gfn_to_hva(kvm, cur_gfn);
+		/*
+		 * We return an error if the first value was invalid, but we
+		 * return successfully if at least one value was copied.
+		 */
+		if (kvm_is_error_hva(hva))
+			return args->count ? 0 : -EFAULT;
+		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
+			pgstev = 0;
+		res[args->count++] = (pgstev >> 24) & 0x43;
+		cur_gfn++;
+	}
+
+	return 0;
+}
+
+static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots,
+						     gfn_t gfn)
+{
+	return ____gfn_to_memslot(slots, gfn, true);
+}
+
+static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
+					      unsigned long cur_gfn)
+{
+	struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn);
+	unsigned long ofs = cur_gfn - ms->base_gfn;
+	struct rb_node *mnode = &ms->gfn_node[slots->node_idx];
+
+	if (ms->base_gfn + ms->npages <= cur_gfn) {
+		mnode = rb_next(mnode);
+		/* If we are above the highest slot, wrap around */
+		if (!mnode)
+			mnode = rb_first(&slots->gfn_tree);
+
+		ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
+		ofs = 0;
+	}
+
+	if (cur_gfn < ms->base_gfn)
+		ofs = 0;
+
+	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
+	while (ofs >= ms->npages && (mnode = rb_next(mnode))) {
+		ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
+		ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages);
+	}
+	return ms->base_gfn + ofs;
+}
+
+static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
+			     u8 *res, unsigned long bufsize)
+{
+	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
+	struct kvm_memslots *slots = kvm_memslots(kvm);
+	struct kvm_memory_slot *ms;
+
+	if (unlikely(kvm_memslots_empty(slots)))
+		return 0;
+
+	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
+	ms = gfn_to_memslot(kvm, cur_gfn);
+	args->count = 0;
+	args->start_gfn = cur_gfn;
+	if (!ms)
+		return 0;
+	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
+	mem_end = kvm_s390_get_gfn_end(slots);
+
+	while (args->count < bufsize) {
+		hva = gfn_to_hva(kvm, cur_gfn);
+		if (kvm_is_error_hva(hva))
+			return 0;
+		/* Decrement only if we actually flipped the bit to 0 */
+		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
+			atomic64_dec(&kvm->arch.cmma_dirty_pages);
+		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
+			pgstev = 0;
+		/* Save the value */
+		res[args->count++] = (pgstev >> 24) & 0x43;
+		/* If the next bit is too far away, stop. */
+		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
+			return 0;
+		/* If we reached the previous "next", find the next one */
+		if (cur_gfn == next_gfn)
+			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
+		/* Reached the end of memory or of the buffer, stop */
+		if ((next_gfn >= mem_end) ||
+		    (next_gfn - args->start_gfn >= bufsize))
+			return 0;
+		cur_gfn++;
+		/* Reached the end of the current memslot, take the next one. */
+		if (cur_gfn - ms->base_gfn >= ms->npages) {
+			ms = gfn_to_memslot(kvm, cur_gfn);
+			if (!ms)
+				return 0;
+		}
+	}
+	return 0;
+}
+
+/*
+ * This function searches for the next page with dirty CMMA attributes, and
+ * saves the attributes in the buffer up to either the end of the buffer or
+ * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
+ * no trailing clean bytes are saved.
+ * In case no dirty bits were found, or if CMMA was not enabled or used, the
+ * output buffer will indicate 0 as length.
+ */
+static int kvm_s390_get_cmma_bits(struct kvm *kvm,
+				  struct kvm_s390_cmma_log *args)
+{
+	unsigned long bufsize;
+	int srcu_idx, peek, ret;
+	u8 *values;
+
+	if (!kvm->arch.use_cmma)
+		return -ENXIO;
+	/* Invalid/unsupported flags were specified */
+	if (args->flags & ~KVM_S390_CMMA_PEEK)
+		return -EINVAL;
+	/* Migration mode query, and we are not doing a migration */
+	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
+	if (!peek && !kvm->arch.migration_mode)
+		return -EINVAL;
+	/* CMMA is disabled or was not used, or the buffer has length zero */
+	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
+	if (!bufsize || !kvm->mm->context.uses_cmm) {
+		memset(args, 0, sizeof(*args));
+		return 0;
+	}
+	/* We are not peeking, and there are no dirty pages */
+	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
+		memset(args, 0, sizeof(*args));
+		return 0;
+	}
+
+	values = vmalloc(bufsize);
+	if (!values)
+		return -ENOMEM;
+
+	mmap_read_lock(kvm->mm);
+	srcu_idx = srcu_read_lock(&kvm->srcu);
+	if (peek)
+		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
+	else
+		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
+	mmap_read_unlock(kvm->mm);
+
+	if (kvm->arch.migration_mode)
+		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
+	else
+		args->remaining = 0;
+
+	if (copy_to_user((void __user *)args->values, values, args->count))
+		ret = -EFAULT;
+
+	vfree(values);
+	return ret;
+}
+
+/*
+ * This function sets the CMMA attributes for the given pages. If the input
+ * buffer has zero length, no action is taken, otherwise the attributes are
+ * set and the mm->context.uses_cmm flag is set.
+ */
+static int kvm_s390_set_cmma_bits(struct kvm *kvm,
+				  const struct kvm_s390_cmma_log *args)
+{
+	unsigned long hva, mask, pgstev, i;
+	uint8_t *bits;
+	int srcu_idx, r = 0;
+
+	mask = args->mask;
+
+	if (!kvm->arch.use_cmma)
+		return -ENXIO;
+	/* invalid/unsupported flags */
+	if (args->flags != 0)
+		return -EINVAL;
+	/* Enforce sane limit on memory allocation */
+	if (args->count > KVM_S390_CMMA_SIZE_MAX)
+		return -EINVAL;
+	/* Nothing to do */
+	if (args->count == 0)
+		return 0;
+
+	bits = vmalloc(array_size(sizeof(*bits), args->count));
+	if (!bits)
+		return -ENOMEM;
+
+	r = copy_from_user(bits, (void __user *)args->values, args->count);
+	if (r) {
+		r = -EFAULT;
+		goto out;
+	}
+
+	mmap_read_lock(kvm->mm);
+	srcu_idx = srcu_read_lock(&kvm->srcu);
+	for (i = 0; i < args->count; i++) {
+		hva = gfn_to_hva(kvm, args->start_gfn + i);
+		if (kvm_is_error_hva(hva)) {
+			r = -EFAULT;
+			break;
+		}
+
+		pgstev = bits[i];
+		pgstev = pgstev << 24;
+		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
+		set_pgste_bits(kvm->mm, hva, mask, pgstev);
+	}
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
+	mmap_read_unlock(kvm->mm);
+
+	if (!kvm->mm->context.uses_cmm) {
+		mmap_write_lock(kvm->mm);
+		kvm->mm->context.uses_cmm = 1;
+		mmap_write_unlock(kvm->mm);
+	}
+out:
+	vfree(bits);
+	return r;
+}
+
+/**
+ * kvm_s390_cpus_from_pv - Convert all protected vCPUs in a protected VM to
+ * non protected.
+ * @kvm: the VM whose protected vCPUs are to be converted
+ * @rc: return value for the RC field of the UVC (in case of error)
+ * @rrc: return value for the RRC field of the UVC (in case of error)
+ *
+ * Does not stop in case of error, tries to convert as many
+ * CPUs as possible. In case of error, the RC and RRC of the last error are
+ * returned.
+ *
+ * Return: 0 in case of success, otherwise -EIO
+ */
+int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+	struct kvm_vcpu *vcpu;
+	unsigned long i;
+	u16 _rc, _rrc;
+	int ret = 0;
+
+	/*
+	 * We ignore failures and try to destroy as many CPUs as possible.
+	 * At the same time we must not free the assigned resources when
+	 * this fails, as the ultravisor has still access to that memory.
+	 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
+	 * behind.
+	 * We want to return the first failure rc and rrc, though.
+	 */
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		mutex_lock(&vcpu->mutex);
+		if (kvm_s390_pv_destroy_cpu(vcpu, &_rc, &_rrc) && !ret) {
+			*rc = _rc;
+			*rrc = _rrc;
+			ret = -EIO;
+		}
+		mutex_unlock(&vcpu->mutex);
+	}
+	/* Ensure that we re-enable gisa if the non-PV guest used it but the PV guest did not. */
+	if (use_gisa)
+		kvm_s390_gisa_enable(kvm);
+	return ret;
+}
+
+/**
+ * kvm_s390_cpus_to_pv - Convert all non-protected vCPUs in a protected VM
+ * to protected.
+ * @kvm: the VM whose protected vCPUs are to be converted
+ * @rc: return value for the RC field of the UVC (in case of error)
+ * @rrc: return value for the RRC field of the UVC (in case of error)
+ *
+ * Tries to undo the conversion in case of error.
+ *
+ * Return: 0 in case of success, otherwise -EIO
+ */
+static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+	unsigned long i;
+	int r = 0;
+	u16 dummy;
+
+	struct kvm_vcpu *vcpu;
+
+	/* Disable the GISA if the ultravisor does not support AIV. */
+	if (!uv_has_feature(BIT_UV_FEAT_AIV))
+		kvm_s390_gisa_disable(kvm);
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		mutex_lock(&vcpu->mutex);
+		r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
+		mutex_unlock(&vcpu->mutex);
+		if (r)
+			break;
+	}
+	if (r)
+		kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
+	return r;
+}
+
+/*
+ * Here we provide user space with a direct interface to query UV
+ * related data like UV maxima and available features as well as
+ * feature specific data.
+ *
+ * To facilitate future extension of the data structures we'll try to
+ * write data up to the maximum requested length.
+ */
+static ssize_t kvm_s390_handle_pv_info(struct kvm_s390_pv_info *info)
+{
+	ssize_t len_min;
+
+	switch (info->header.id) {
+	case KVM_PV_INFO_VM: {
+		len_min =  sizeof(info->header) + sizeof(info->vm);
+
+		if (info->header.len_max < len_min)
+			return -EINVAL;
+
+		memcpy(info->vm.inst_calls_list,
+		       uv_info.inst_calls_list,
+		       sizeof(uv_info.inst_calls_list));
+
+		/* It's max cpuid not max cpus, so it's off by one */
+		info->vm.max_cpus = uv_info.max_guest_cpu_id + 1;
+		info->vm.max_guests = uv_info.max_num_sec_conf;
+		info->vm.max_guest_addr = uv_info.max_sec_stor_addr;
+		info->vm.feature_indication = uv_info.uv_feature_indications;
+
+		return len_min;
+	}
+	case KVM_PV_INFO_DUMP: {
+		len_min =  sizeof(info->header) + sizeof(info->dump);
+
+		if (info->header.len_max < len_min)
+			return -EINVAL;
+
+		info->dump.dump_cpu_buffer_len = uv_info.guest_cpu_stor_len;
+		info->dump.dump_config_mem_buffer_per_1m = uv_info.conf_dump_storage_state_len;
+		info->dump.dump_config_finalize_len = uv_info.conf_dump_finalize_len;
+		return len_min;
+	}
+	default:
+		return -EINVAL;
+	}
+}
+
+static int kvm_s390_pv_dmp(struct kvm *kvm, struct kvm_pv_cmd *cmd,
+			   struct kvm_s390_pv_dmp dmp)
+{
+	int r = -EINVAL;
+	void __user *result_buff = (void __user *)dmp.buff_addr;
+
+	switch (dmp.subcmd) {
+	case KVM_PV_DUMP_INIT: {
+		if (kvm->arch.pv.dumping)
+			break;
+
+		/*
+		 * Block SIE entry as concurrent dump UVCs could lead
+		 * to validities.
+		 */
+		kvm_s390_vcpu_block_all(kvm);
+
+		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
+				  UVC_CMD_DUMP_INIT, &cmd->rc, &cmd->rrc);
+		KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP INIT: rc %x rrc %x",
+			     cmd->rc, cmd->rrc);
+		if (!r) {
+			kvm->arch.pv.dumping = true;
+		} else {
+			kvm_s390_vcpu_unblock_all(kvm);
+			r = -EINVAL;
+		}
+		break;
+	}
+	case KVM_PV_DUMP_CONFIG_STOR_STATE: {
+		if (!kvm->arch.pv.dumping)
+			break;
+
+		/*
+		 * gaddr is an output parameter since we might stop
+		 * early. As dmp will be copied back in our caller, we
+		 * don't need to do it ourselves.
+		 */
+		r = kvm_s390_pv_dump_stor_state(kvm, result_buff, &dmp.gaddr, dmp.buff_len,
+						&cmd->rc, &cmd->rrc);
+		break;
+	}
+	case KVM_PV_DUMP_COMPLETE: {
+		if (!kvm->arch.pv.dumping)
+			break;
+
+		r = -EINVAL;
+		if (dmp.buff_len < uv_info.conf_dump_finalize_len)
+			break;
+
+		r = kvm_s390_pv_dump_complete(kvm, result_buff,
+					      &cmd->rc, &cmd->rrc);
+		break;
+	}
+	default:
+		r = -ENOTTY;
+		break;
+	}
+
+	return r;
+}
+
+static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
+{
+	const bool need_lock = (cmd->cmd != KVM_PV_ASYNC_CLEANUP_PERFORM);
+	void __user *argp = (void __user *)cmd->data;
+	int r = 0;
+	u16 dummy;
+
+	if (need_lock)
+		mutex_lock(&kvm->lock);
+
+	switch (cmd->cmd) {
+	case KVM_PV_ENABLE: {
+		r = -EINVAL;
+		if (kvm_s390_pv_is_protected(kvm))
+			break;
+
+		/*
+		 *  FMT 4 SIE needs esca. As we never switch back to bsca from
+		 *  esca, we need no cleanup in the error cases below
+		 */
+		r = sca_switch_to_extended(kvm);
+		if (r)
+			break;
+
+		mmap_write_lock(current->mm);
+		r = gmap_mark_unmergeable();
+		mmap_write_unlock(current->mm);
+		if (r)
+			break;
+
+		r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
+		if (r)
+			break;
+
+		r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
+		if (r)
+			kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
+
+		/* we need to block service interrupts from now on */
+		set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
+		break;
+	}
+	case KVM_PV_ASYNC_CLEANUP_PREPARE:
+		r = -EINVAL;
+		if (!kvm_s390_pv_is_protected(kvm) || !async_destroy)
+			break;
+
+		r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
+		/*
+		 * If a CPU could not be destroyed, destroy VM will also fail.
+		 * There is no point in trying to destroy it. Instead return
+		 * the rc and rrc from the first CPU that failed destroying.
+		 */
+		if (r)
+			break;
+		r = kvm_s390_pv_set_aside(kvm, &cmd->rc, &cmd->rrc);
+
+		/* no need to block service interrupts any more */
+		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
+		break;
+	case KVM_PV_ASYNC_CLEANUP_PERFORM:
+		r = -EINVAL;
+		if (!async_destroy)
+			break;
+		/* kvm->lock must not be held; this is asserted inside the function. */
+		r = kvm_s390_pv_deinit_aside_vm(kvm, &cmd->rc, &cmd->rrc);
+		break;
+	case KVM_PV_DISABLE: {
+		r = -EINVAL;
+		if (!kvm_s390_pv_is_protected(kvm))
+			break;
+
+		r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
+		/*
+		 * If a CPU could not be destroyed, destroy VM will also fail.
+		 * There is no point in trying to destroy it. Instead return
+		 * the rc and rrc from the first CPU that failed destroying.
+		 */
+		if (r)
+			break;
+		r = kvm_s390_pv_deinit_cleanup_all(kvm, &cmd->rc, &cmd->rrc);
+
+		/* no need to block service interrupts any more */
+		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
+		break;
+	}
+	case KVM_PV_SET_SEC_PARMS: {
+		struct kvm_s390_pv_sec_parm parms = {};
+		void *hdr;
+
+		r = -EINVAL;
+		if (!kvm_s390_pv_is_protected(kvm))
+			break;
+
+		r = -EFAULT;
+		if (copy_from_user(&parms, argp, sizeof(parms)))
+			break;
+
+		/* Currently restricted to 8KB */
+		r = -EINVAL;
+		if (parms.length > PAGE_SIZE * 2)
+			break;
+
+		r = -ENOMEM;
+		hdr = vmalloc(parms.length);
+		if (!hdr)
+			break;
+
+		r = -EFAULT;
+		if (!copy_from_user(hdr, (void __user *)parms.origin,
+				    parms.length))
+			r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
+						      &cmd->rc, &cmd->rrc);
+
+		vfree(hdr);
+		break;
+	}
+	case KVM_PV_UNPACK: {
+		struct kvm_s390_pv_unp unp = {};
+
+		r = -EINVAL;
+		if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
+			break;
+
+		r = -EFAULT;
+		if (copy_from_user(&unp, argp, sizeof(unp)))
+			break;
+
+		r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
+				       &cmd->rc, &cmd->rrc);
+		break;
+	}
+	case KVM_PV_VERIFY: {
+		r = -EINVAL;
+		if (!kvm_s390_pv_is_protected(kvm))
+			break;
+
+		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
+				  UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
+		KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
+			     cmd->rrc);
+		break;
+	}
+	case KVM_PV_PREP_RESET: {
+		r = -EINVAL;
+		if (!kvm_s390_pv_is_protected(kvm))
+			break;
+
+		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
+				  UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
+		KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
+			     cmd->rc, cmd->rrc);
+		break;
+	}
+	case KVM_PV_UNSHARE_ALL: {
+		r = -EINVAL;
+		if (!kvm_s390_pv_is_protected(kvm))
+			break;
+
+		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
+				  UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
+		KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
+			     cmd->rc, cmd->rrc);
+		break;
+	}
+	case KVM_PV_INFO: {
+		struct kvm_s390_pv_info info = {};
+		ssize_t data_len;
+
+		/*
+		 * No need to check the VM protection here.
+		 *
+		 * Maybe user space wants to query some of the data
+		 * when the VM is still unprotected. If we see the
+		 * need to fence a new data command we can still
+		 * return an error in the info handler.
+		 */
+
+		r = -EFAULT;
+		if (copy_from_user(&info, argp, sizeof(info.header)))
+			break;
+
+		r = -EINVAL;
+		if (info.header.len_max < sizeof(info.header))
+			break;
+
+		data_len = kvm_s390_handle_pv_info(&info);
+		if (data_len < 0) {
+			r = data_len;
+			break;
+		}
+		/*
+		 * If a data command struct is extended (multiple
+		 * times) this can be used to determine how much of it
+		 * is valid.
+		 */
+		info.header.len_written = data_len;
+
+		r = -EFAULT;
+		if (copy_to_user(argp, &info, data_len))
+			break;
+
+		r = 0;
+		break;
+	}
+	case KVM_PV_DUMP: {
+		struct kvm_s390_pv_dmp dmp;
+
+		r = -EINVAL;
+		if (!kvm_s390_pv_is_protected(kvm))
+			break;
+
+		r = -EFAULT;
+		if (copy_from_user(&dmp, argp, sizeof(dmp)))
+			break;
+
+		r = kvm_s390_pv_dmp(kvm, cmd, dmp);
+		if (r)
+			break;
+
+		if (copy_to_user(argp, &dmp, sizeof(dmp))) {
+			r = -EFAULT;
+			break;
+		}
+
+		break;
+	}
+	default:
+		r = -ENOTTY;
+	}
+	if (need_lock)
+		mutex_unlock(&kvm->lock);
+
+	return r;
+}
+
+static int mem_op_validate_common(struct kvm_s390_mem_op *mop, u64 supported_flags)
+{
+	if (mop->flags & ~supported_flags || !mop->size)
+		return -EINVAL;
+	if (mop->size > MEM_OP_MAX_SIZE)
+		return -E2BIG;
+	if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
+		if (mop->key > 0xf)
+			return -EINVAL;
+	} else {
+		mop->key = 0;
+	}
+	return 0;
+}
+
+static int kvm_s390_vm_mem_op_abs(struct kvm *kvm, struct kvm_s390_mem_op *mop)
+{
+	void __user *uaddr = (void __user *)mop->buf;
+	enum gacc_mode acc_mode;
+	void *tmpbuf = NULL;
+	int r, srcu_idx;
+
+	r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_SKEY_PROTECTION |
+					KVM_S390_MEMOP_F_CHECK_ONLY);
+	if (r)
+		return r;
+
+	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
+		tmpbuf = vmalloc(mop->size);
+		if (!tmpbuf)
+			return -ENOMEM;
+	}
+
+	srcu_idx = srcu_read_lock(&kvm->srcu);
+
+	if (kvm_is_error_gpa(kvm, mop->gaddr)) {
+		r = PGM_ADDRESSING;
+		goto out_unlock;
+	}
+
+	acc_mode = mop->op == KVM_S390_MEMOP_ABSOLUTE_READ ? GACC_FETCH : GACC_STORE;
+	if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
+		r = check_gpa_range(kvm, mop->gaddr, mop->size, acc_mode, mop->key);
+		goto out_unlock;
+	}
+	if (acc_mode == GACC_FETCH) {
+		r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
+					      mop->size, GACC_FETCH, mop->key);
+		if (r)
+			goto out_unlock;
+		if (copy_to_user(uaddr, tmpbuf, mop->size))
+			r = -EFAULT;
+	} else {
+		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
+			r = -EFAULT;
+			goto out_unlock;
+		}
+		r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
+					      mop->size, GACC_STORE, mop->key);
+	}
+
+out_unlock:
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
+
+	vfree(tmpbuf);
+	return r;
+}
+
+static int kvm_s390_vm_mem_op_cmpxchg(struct kvm *kvm, struct kvm_s390_mem_op *mop)
+{
+	void __user *uaddr = (void __user *)mop->buf;
+	void __user *old_addr = (void __user *)mop->old_addr;
+	union {
+		__uint128_t quad;
+		char raw[sizeof(__uint128_t)];
+	} old = { .quad = 0}, new = { .quad = 0 };
+	unsigned int off_in_quad = sizeof(new) - mop->size;
+	int r, srcu_idx;
+	bool success;
+
+	r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_SKEY_PROTECTION);
+	if (r)
+		return r;
+	/*
+	 * This validates off_in_quad. Checking that size is a power
+	 * of two is not necessary, as cmpxchg_guest_abs_with_key
+	 * takes care of that
+	 */
+	if (mop->size > sizeof(new))
+		return -EINVAL;
+	if (copy_from_user(&new.raw[off_in_quad], uaddr, mop->size))
+		return -EFAULT;
+	if (copy_from_user(&old.raw[off_in_quad], old_addr, mop->size))
+		return -EFAULT;
+
+	srcu_idx = srcu_read_lock(&kvm->srcu);
+
+	if (kvm_is_error_gpa(kvm, mop->gaddr)) {
+		r = PGM_ADDRESSING;
+		goto out_unlock;
+	}
+
+	r = cmpxchg_guest_abs_with_key(kvm, mop->gaddr, mop->size, &old.quad,
+				       new.quad, mop->key, &success);
+	if (!success && copy_to_user(old_addr, &old.raw[off_in_quad], mop->size))
+		r = -EFAULT;
+
+out_unlock:
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
+	return r;
+}
+
+static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
+{
+	/*
+	 * This is technically a heuristic only, if the kvm->lock is not
+	 * taken, it is not guaranteed that the vm is/remains non-protected.
+	 * This is ok from a kernel perspective, wrongdoing is detected
+	 * on the access, -EFAULT is returned and the vm may crash the
+	 * next time it accesses the memory in question.
+	 * There is no sane usecase to do switching and a memop on two
+	 * different CPUs at the same time.
+	 */
+	if (kvm_s390_pv_get_handle(kvm))
+		return -EINVAL;
+
+	switch (mop->op) {
+	case KVM_S390_MEMOP_ABSOLUTE_READ:
+	case KVM_S390_MEMOP_ABSOLUTE_WRITE:
+		return kvm_s390_vm_mem_op_abs(kvm, mop);
+	case KVM_S390_MEMOP_ABSOLUTE_CMPXCHG:
+		return kvm_s390_vm_mem_op_cmpxchg(kvm, mop);
+	default:
+		return -EINVAL;
+	}
+}
+
+int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
+{
+	struct kvm *kvm = filp->private_data;
+	void __user *argp = (void __user *)arg;
+	struct kvm_device_attr attr;
+	int r;
+
+	switch (ioctl) {
+	case KVM_S390_INTERRUPT: {
+		struct kvm_s390_interrupt s390int;
+
+		r = -EFAULT;
+		if (copy_from_user(&s390int, argp, sizeof(s390int)))
+			break;
+		r = kvm_s390_inject_vm(kvm, &s390int);
+		break;
+	}
+	case KVM_CREATE_IRQCHIP: {
+		struct kvm_irq_routing_entry routing;
+
+		r = -EINVAL;
+		if (kvm->arch.use_irqchip) {
+			/* Set up dummy routing. */
+			memset(&routing, 0, sizeof(routing));
+			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
+		}
+		break;
+	}
+	case KVM_SET_DEVICE_ATTR: {
+		r = -EFAULT;
+		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
+			break;
+		r = kvm_s390_vm_set_attr(kvm, &attr);
+		break;
+	}
+	case KVM_GET_DEVICE_ATTR: {
+		r = -EFAULT;
+		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
+			break;
+		r = kvm_s390_vm_get_attr(kvm, &attr);
+		break;
+	}
+	case KVM_HAS_DEVICE_ATTR: {
+		r = -EFAULT;
+		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
+			break;
+		r = kvm_s390_vm_has_attr(kvm, &attr);
+		break;
+	}
+	case KVM_S390_GET_SKEYS: {
+		struct kvm_s390_skeys args;
+
+		r = -EFAULT;
+		if (copy_from_user(&args, argp,
+				   sizeof(struct kvm_s390_skeys)))
+			break;
+		r = kvm_s390_get_skeys(kvm, &args);
+		break;
+	}
+	case KVM_S390_SET_SKEYS: {
+		struct kvm_s390_skeys args;
+
+		r = -EFAULT;
+		if (copy_from_user(&args, argp,
+				   sizeof(struct kvm_s390_skeys)))
+			break;
+		r = kvm_s390_set_skeys(kvm, &args);
+		break;
+	}
+	case KVM_S390_GET_CMMA_BITS: {
+		struct kvm_s390_cmma_log args;
+
+		r = -EFAULT;
+		if (copy_from_user(&args, argp, sizeof(args)))
+			break;
+		mutex_lock(&kvm->slots_lock);
+		r = kvm_s390_get_cmma_bits(kvm, &args);
+		mutex_unlock(&kvm->slots_lock);
+		if (!r) {
+			r = copy_to_user(argp, &args, sizeof(args));
+			if (r)
+				r = -EFAULT;
+		}
+		break;
+	}
+	case KVM_S390_SET_CMMA_BITS: {
+		struct kvm_s390_cmma_log args;
+
+		r = -EFAULT;
+		if (copy_from_user(&args, argp, sizeof(args)))
+			break;
+		mutex_lock(&kvm->slots_lock);
+		r = kvm_s390_set_cmma_bits(kvm, &args);
+		mutex_unlock(&kvm->slots_lock);
+		break;
+	}
+	case KVM_S390_PV_COMMAND: {
+		struct kvm_pv_cmd args;
+
+		/* protvirt means user cpu state */
+		kvm_s390_set_user_cpu_state_ctrl(kvm);
+		r = 0;
+		if (!is_prot_virt_host()) {
+			r = -EINVAL;
+			break;
+		}
+		if (copy_from_user(&args, argp, sizeof(args))) {
+			r = -EFAULT;
+			break;
+		}
+		if (args.flags) {
+			r = -EINVAL;
+			break;
+		}
+		/* must be called without kvm->lock */
+		r = kvm_s390_handle_pv(kvm, &args);
+		if (copy_to_user(argp, &args, sizeof(args))) {
+			r = -EFAULT;
+			break;
+		}
+		break;
+	}
+	case KVM_S390_MEM_OP: {
+		struct kvm_s390_mem_op mem_op;
+
+		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
+			r = kvm_s390_vm_mem_op(kvm, &mem_op);
+		else
+			r = -EFAULT;
+		break;
+	}
+	case KVM_S390_ZPCI_OP: {
+		struct kvm_s390_zpci_op args;
+
+		r = -EINVAL;
+		if (!IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
+			break;
+		if (copy_from_user(&args, argp, sizeof(args))) {
+			r = -EFAULT;
+			break;
+		}
+		r = kvm_s390_pci_zpci_op(kvm, &args);
+		break;
+	}
+	default:
+		r = -ENOTTY;
+	}
+
+	return r;
+}
+
+static int kvm_s390_apxa_installed(void)
+{
+	struct ap_config_info info;
+
+	if (ap_instructions_available()) {
+		if (ap_qci(&info) == 0)
+			return info.apxa;
+	}
+
+	return 0;
+}
+
+/*
+ * The format of the crypto control block (CRYCB) is specified in the 3 low
+ * order bits of the CRYCB designation (CRYCBD) field as follows:
+ * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
+ *	     AP extended addressing (APXA) facility are installed.
+ * Format 1: The APXA facility is not installed but the MSAX3 facility is.
+ * Format 2: Both the APXA and MSAX3 facilities are installed
+ */
+static void kvm_s390_set_crycb_format(struct kvm *kvm)
+{
+	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
+
+	/* Clear the CRYCB format bits - i.e., set format 0 by default */
+	kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
+
+	/* Check whether MSAX3 is installed */
+	if (!test_kvm_facility(kvm, 76))
+		return;
+
+	if (kvm_s390_apxa_installed())
+		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
+	else
+		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
+}
+
+/*
+ * kvm_arch_crypto_set_masks
+ *
+ * @kvm: pointer to the target guest's KVM struct containing the crypto masks
+ *	 to be set.
+ * @apm: the mask identifying the accessible AP adapters
+ * @aqm: the mask identifying the accessible AP domains
+ * @adm: the mask identifying the accessible AP control domains
+ *
+ * Set the masks that identify the adapters, domains and control domains to
+ * which the KVM guest is granted access.
+ *
+ * Note: The kvm->lock mutex must be locked by the caller before invoking this
+ *	 function.
+ */
+void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
+			       unsigned long *aqm, unsigned long *adm)
+{
+	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
+
+	kvm_s390_vcpu_block_all(kvm);
+
+	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
+	case CRYCB_FORMAT2: /* APCB1 use 256 bits */
+		memcpy(crycb->apcb1.apm, apm, 32);
+		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
+			 apm[0], apm[1], apm[2], apm[3]);
+		memcpy(crycb->apcb1.aqm, aqm, 32);
+		VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
+			 aqm[0], aqm[1], aqm[2], aqm[3]);
+		memcpy(crycb->apcb1.adm, adm, 32);
+		VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
+			 adm[0], adm[1], adm[2], adm[3]);
+		break;
+	case CRYCB_FORMAT1:
+	case CRYCB_FORMAT0: /* Fall through both use APCB0 */
+		memcpy(crycb->apcb0.apm, apm, 8);
+		memcpy(crycb->apcb0.aqm, aqm, 2);
+		memcpy(crycb->apcb0.adm, adm, 2);
+		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
+			 apm[0], *((unsigned short *)aqm),
+			 *((unsigned short *)adm));
+		break;
+	default:	/* Can not happen */
+		break;
+	}
+
+	/* recreate the shadow crycb for each vcpu */
+	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
+	kvm_s390_vcpu_unblock_all(kvm);
+}
+EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
+
+/*
+ * kvm_arch_crypto_clear_masks
+ *
+ * @kvm: pointer to the target guest's KVM struct containing the crypto masks
+ *	 to be cleared.
+ *
+ * Clear the masks that identify the adapters, domains and control domains to
+ * which the KVM guest is granted access.
+ *
+ * Note: The kvm->lock mutex must be locked by the caller before invoking this
+ *	 function.
+ */
+void kvm_arch_crypto_clear_masks(struct kvm *kvm)
+{
+	kvm_s390_vcpu_block_all(kvm);
+
+	memset(&kvm->arch.crypto.crycb->apcb0, 0,
+	       sizeof(kvm->arch.crypto.crycb->apcb0));
+	memset(&kvm->arch.crypto.crycb->apcb1, 0,
+	       sizeof(kvm->arch.crypto.crycb->apcb1));
+
+	VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
+	/* recreate the shadow crycb for each vcpu */
+	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
+	kvm_s390_vcpu_unblock_all(kvm);
+}
+EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
+
+static u64 kvm_s390_get_initial_cpuid(void)
+{
+	struct cpuid cpuid;
+
+	get_cpu_id(&cpuid);
+	cpuid.version = 0xff;
+	return *((u64 *) &cpuid);
+}
+
+static void kvm_s390_crypto_init(struct kvm *kvm)
+{
+	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
+	kvm_s390_set_crycb_format(kvm);
+	init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
+
+	if (!test_kvm_facility(kvm, 76))
+		return;
+
+	/* Enable AES/DEA protected key functions by default */
+	kvm->arch.crypto.aes_kw = 1;
+	kvm->arch.crypto.dea_kw = 1;
+	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
+			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
+	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
+			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
+}
+
+static void sca_dispose(struct kvm *kvm)
+{
+	if (kvm->arch.use_esca)
+		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
+	else
+		free_page((unsigned long)(kvm->arch.sca));
+	kvm->arch.sca = NULL;
+}
+
+void kvm_arch_free_vm(struct kvm *kvm)
+{
+	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
+		kvm_s390_pci_clear_list(kvm);
+
+	__kvm_arch_free_vm(kvm);
+}
+
+int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
+{
+	gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
+	int i, rc;
+	char debug_name[16];
+	static unsigned long sca_offset;
+
+	rc = -EINVAL;
+#ifdef CONFIG_KVM_S390_UCONTROL
+	if (type & ~KVM_VM_S390_UCONTROL)
+		goto out_err;
+	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
+		goto out_err;
+#else
+	if (type)
+		goto out_err;
+#endif
+
+	rc = s390_enable_sie();
+	if (rc)
+		goto out_err;
+
+	rc = -ENOMEM;
+
+	if (!sclp.has_64bscao)
+		alloc_flags |= GFP_DMA;
+	rwlock_init(&kvm->arch.sca_lock);
+	/* start with basic SCA */
+	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
+	if (!kvm->arch.sca)
+		goto out_err;
+	mutex_lock(&kvm_lock);
+	sca_offset += 16;
+	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
+		sca_offset = 0;
+	kvm->arch.sca = (struct bsca_block *)
+			((char *) kvm->arch.sca + sca_offset);
+	mutex_unlock(&kvm_lock);
+
+	sprintf(debug_name, "kvm-%u", current->pid);
+
+	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
+	if (!kvm->arch.dbf)
+		goto out_err;
+
+	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
+	kvm->arch.sie_page2 =
+	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
+	if (!kvm->arch.sie_page2)
+		goto out_err;
+
+	kvm->arch.sie_page2->kvm = kvm;
+	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
+
+	for (i = 0; i < kvm_s390_fac_size(); i++) {
+		kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
+					      (kvm_s390_fac_base[i] |
+					       kvm_s390_fac_ext[i]);
+		kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
+					      kvm_s390_fac_base[i];
+	}
+	kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
+
+	/* we are always in czam mode - even on pre z14 machines */
+	set_kvm_facility(kvm->arch.model.fac_mask, 138);
+	set_kvm_facility(kvm->arch.model.fac_list, 138);
+	/* we emulate STHYI in kvm */
+	set_kvm_facility(kvm->arch.model.fac_mask, 74);
+	set_kvm_facility(kvm->arch.model.fac_list, 74);
+	if (MACHINE_HAS_TLB_GUEST) {
+		set_kvm_facility(kvm->arch.model.fac_mask, 147);
+		set_kvm_facility(kvm->arch.model.fac_list, 147);
+	}
+
+	if (css_general_characteristics.aiv && test_facility(65))
+		set_kvm_facility(kvm->arch.model.fac_mask, 65);
+
+	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
+	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
+
+	kvm->arch.model.uv_feat_guest.feat = 0;
+
+	kvm_s390_crypto_init(kvm);
+
+	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
+		mutex_lock(&kvm->lock);
+		kvm_s390_pci_init_list(kvm);
+		kvm_s390_vcpu_pci_enable_interp(kvm);
+		mutex_unlock(&kvm->lock);
+	}
+
+	mutex_init(&kvm->arch.float_int.ais_lock);
+	spin_lock_init(&kvm->arch.float_int.lock);
+	for (i = 0; i < FIRQ_LIST_COUNT; i++)
+		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
+	init_waitqueue_head(&kvm->arch.ipte_wq);
+	mutex_init(&kvm->arch.ipte_mutex);
+
+	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
+	VM_EVENT(kvm, 3, "vm created with type %lu", type);
+
+	if (type & KVM_VM_S390_UCONTROL) {
+		kvm->arch.gmap = NULL;
+		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
+	} else {
+		if (sclp.hamax == U64_MAX)
+			kvm->arch.mem_limit = TASK_SIZE_MAX;
+		else
+			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
+						    sclp.hamax + 1);
+		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
+		if (!kvm->arch.gmap)
+			goto out_err;
+		kvm->arch.gmap->private = kvm;
+		kvm->arch.gmap->pfault_enabled = 0;
+	}
+
+	kvm->arch.use_pfmfi = sclp.has_pfmfi;
+	kvm->arch.use_skf = sclp.has_skey;
+	spin_lock_init(&kvm->arch.start_stop_lock);
+	kvm_s390_vsie_init(kvm);
+	if (use_gisa)
+		kvm_s390_gisa_init(kvm);
+	INIT_LIST_HEAD(&kvm->arch.pv.need_cleanup);
+	kvm->arch.pv.set_aside = NULL;
+	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
+
+	return 0;
+out_err:
+	free_page((unsigned long)kvm->arch.sie_page2);
+	debug_unregister(kvm->arch.dbf);
+	sca_dispose(kvm);
+	KVM_EVENT(3, "creation of vm failed: %d", rc);
+	return rc;
+}
+
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+	u16 rc, rrc;
+
+	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
+	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
+	kvm_s390_clear_local_irqs(vcpu);
+	kvm_clear_async_pf_completion_queue(vcpu);
+	if (!kvm_is_ucontrol(vcpu->kvm))
+		sca_del_vcpu(vcpu);
+	kvm_s390_update_topology_change_report(vcpu->kvm, 1);
+
+	if (kvm_is_ucontrol(vcpu->kvm))
+		gmap_remove(vcpu->arch.gmap);
+
+	if (vcpu->kvm->arch.use_cmma)
+		kvm_s390_vcpu_unsetup_cmma(vcpu);
+	/* We can not hold the vcpu mutex here, we are already dying */
+	if (kvm_s390_pv_cpu_get_handle(vcpu))
+		kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
+	free_page((unsigned long)(vcpu->arch.sie_block));
+}
+
+void kvm_arch_destroy_vm(struct kvm *kvm)
+{
+	u16 rc, rrc;
+
+	kvm_destroy_vcpus(kvm);
+	sca_dispose(kvm);
+	kvm_s390_gisa_destroy(kvm);
+	/*
+	 * We are already at the end of life and kvm->lock is not taken.
+	 * This is ok as the file descriptor is closed by now and nobody
+	 * can mess with the pv state.
+	 */
+	kvm_s390_pv_deinit_cleanup_all(kvm, &rc, &rrc);
+	/*
+	 * Remove the mmu notifier only when the whole KVM VM is torn down,
+	 * and only if one was registered to begin with. If the VM is
+	 * currently not protected, but has been previously been protected,
+	 * then it's possible that the notifier is still registered.
+	 */
+	if (kvm->arch.pv.mmu_notifier.ops)
+		mmu_notifier_unregister(&kvm->arch.pv.mmu_notifier, kvm->mm);
+
+	debug_unregister(kvm->arch.dbf);
+	free_page((unsigned long)kvm->arch.sie_page2);
+	if (!kvm_is_ucontrol(kvm))
+		gmap_remove(kvm->arch.gmap);
+	kvm_s390_destroy_adapters(kvm);
+	kvm_s390_clear_float_irqs(kvm);
+	kvm_s390_vsie_destroy(kvm);
+	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
+}
+
+/* Section: vcpu related */
+static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
+	if (!vcpu->arch.gmap)
+		return -ENOMEM;
+	vcpu->arch.gmap->private = vcpu->kvm;
+
+	return 0;
+}
+
+static void sca_del_vcpu(struct kvm_vcpu *vcpu)
+{
+	if (!kvm_s390_use_sca_entries())
+		return;
+	read_lock(&vcpu->kvm->arch.sca_lock);
+	if (vcpu->kvm->arch.use_esca) {
+		struct esca_block *sca = vcpu->kvm->arch.sca;
+
+		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
+		sca->cpu[vcpu->vcpu_id].sda = 0;
+	} else {
+		struct bsca_block *sca = vcpu->kvm->arch.sca;
+
+		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
+		sca->cpu[vcpu->vcpu_id].sda = 0;
+	}
+	read_unlock(&vcpu->kvm->arch.sca_lock);
+}
+
+static void sca_add_vcpu(struct kvm_vcpu *vcpu)
+{
+	if (!kvm_s390_use_sca_entries()) {
+		phys_addr_t sca_phys = virt_to_phys(vcpu->kvm->arch.sca);
+
+		/* we still need the basic sca for the ipte control */
+		vcpu->arch.sie_block->scaoh = sca_phys >> 32;
+		vcpu->arch.sie_block->scaol = sca_phys;
+		return;
+	}
+	read_lock(&vcpu->kvm->arch.sca_lock);
+	if (vcpu->kvm->arch.use_esca) {
+		struct esca_block *sca = vcpu->kvm->arch.sca;
+		phys_addr_t sca_phys = virt_to_phys(sca);
+
+		sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block);
+		vcpu->arch.sie_block->scaoh = sca_phys >> 32;
+		vcpu->arch.sie_block->scaol = sca_phys & ESCA_SCAOL_MASK;
+		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
+		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
+	} else {
+		struct bsca_block *sca = vcpu->kvm->arch.sca;
+		phys_addr_t sca_phys = virt_to_phys(sca);
+
+		sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block);
+		vcpu->arch.sie_block->scaoh = sca_phys >> 32;
+		vcpu->arch.sie_block->scaol = sca_phys;
+		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
+	}
+	read_unlock(&vcpu->kvm->arch.sca_lock);
+}
+
+/* Basic SCA to Extended SCA data copy routines */
+static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
+{
+	d->sda = s->sda;
+	d->sigp_ctrl.c = s->sigp_ctrl.c;
+	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
+}
+
+static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
+{
+	int i;
+
+	d->ipte_control = s->ipte_control;
+	d->mcn[0] = s->mcn;
+	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
+		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
+}
+
+static int sca_switch_to_extended(struct kvm *kvm)
+{
+	struct bsca_block *old_sca = kvm->arch.sca;
+	struct esca_block *new_sca;
+	struct kvm_vcpu *vcpu;
+	unsigned long vcpu_idx;
+	u32 scaol, scaoh;
+	phys_addr_t new_sca_phys;
+
+	if (kvm->arch.use_esca)
+		return 0;
+
+	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+	if (!new_sca)
+		return -ENOMEM;
+
+	new_sca_phys = virt_to_phys(new_sca);
+	scaoh = new_sca_phys >> 32;
+	scaol = new_sca_phys & ESCA_SCAOL_MASK;
+
+	kvm_s390_vcpu_block_all(kvm);
+	write_lock(&kvm->arch.sca_lock);
+
+	sca_copy_b_to_e(new_sca, old_sca);
+
+	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
+		vcpu->arch.sie_block->scaoh = scaoh;
+		vcpu->arch.sie_block->scaol = scaol;
+		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
+	}
+	kvm->arch.sca = new_sca;
+	kvm->arch.use_esca = 1;
+
+	write_unlock(&kvm->arch.sca_lock);
+	kvm_s390_vcpu_unblock_all(kvm);
+
+	free_page((unsigned long)old_sca);
+
+	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
+		 old_sca, kvm->arch.sca);
+	return 0;
+}
+
+static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
+{
+	int rc;
+
+	if (!kvm_s390_use_sca_entries()) {
+		if (id < KVM_MAX_VCPUS)
+			return true;
+		return false;
+	}
+	if (id < KVM_S390_BSCA_CPU_SLOTS)
+		return true;
+	if (!sclp.has_esca || !sclp.has_64bscao)
+		return false;
+
+	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
+
+	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
+}
+
+/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
+static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
+{
+	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
+	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
+	vcpu->arch.cputm_start = get_tod_clock_fast();
+	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
+}
+
+/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
+static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
+{
+	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
+	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
+	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
+	vcpu->arch.cputm_start = 0;
+	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
+}
+
+/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
+static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
+{
+	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
+	vcpu->arch.cputm_enabled = true;
+	__start_cpu_timer_accounting(vcpu);
+}
+
+/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
+static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
+{
+	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
+	__stop_cpu_timer_accounting(vcpu);
+	vcpu->arch.cputm_enabled = false;
+}
+
+static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
+{
+	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
+	__enable_cpu_timer_accounting(vcpu);
+	preempt_enable();
+}
+
+static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
+{
+	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
+	__disable_cpu_timer_accounting(vcpu);
+	preempt_enable();
+}
+
+/* set the cpu timer - may only be called from the VCPU thread itself */
+void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
+{
+	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
+	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
+	if (vcpu->arch.cputm_enabled)
+		vcpu->arch.cputm_start = get_tod_clock_fast();
+	vcpu->arch.sie_block->cputm = cputm;
+	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
+	preempt_enable();
+}
+
+/* update and get the cpu timer - can also be called from other VCPU threads */
+__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
+{
+	unsigned int seq;
+	__u64 value;
+
+	if (unlikely(!vcpu->arch.cputm_enabled))
+		return vcpu->arch.sie_block->cputm;
+
+	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
+	do {
+		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
+		/*
+		 * If the writer would ever execute a read in the critical
+		 * section, e.g. in irq context, we have a deadlock.
+		 */
+		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
+		value = vcpu->arch.sie_block->cputm;
+		/* if cputm_start is 0, accounting is being started/stopped */
+		if (likely(vcpu->arch.cputm_start))
+			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
+	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
+	preempt_enable();
+	return value;
+}
+
+void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+
+	gmap_enable(vcpu->arch.enabled_gmap);
+	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
+	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
+		__start_cpu_timer_accounting(vcpu);
+	vcpu->cpu = cpu;
+}
+
+void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
+{
+	vcpu->cpu = -1;
+	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
+		__stop_cpu_timer_accounting(vcpu);
+	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
+	vcpu->arch.enabled_gmap = gmap_get_enabled();
+	gmap_disable(vcpu->arch.enabled_gmap);
+
+}
+
+void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
+{
+	mutex_lock(&vcpu->kvm->lock);
+	preempt_disable();
+	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
+	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
+	preempt_enable();
+	mutex_unlock(&vcpu->kvm->lock);
+	if (!kvm_is_ucontrol(vcpu->kvm)) {
+		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
+		sca_add_vcpu(vcpu);
+	}
+	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
+		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
+	/* make vcpu_load load the right gmap on the first trigger */
+	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
+}
+
+static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
+{
+	if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
+	    test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
+		return true;
+	return false;
+}
+
+static bool kvm_has_pckmo_ecc(struct kvm *kvm)
+{
+	/* At least one ECC subfunction must be present */
+	return kvm_has_pckmo_subfunc(kvm, 32) ||
+	       kvm_has_pckmo_subfunc(kvm, 33) ||
+	       kvm_has_pckmo_subfunc(kvm, 34) ||
+	       kvm_has_pckmo_subfunc(kvm, 40) ||
+	       kvm_has_pckmo_subfunc(kvm, 41);
+
+}
+
+static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * If the AP instructions are not being interpreted and the MSAX3
+	 * facility is not configured for the guest, there is nothing to set up.
+	 */
+	if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
+		return;
+
+	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
+	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
+	vcpu->arch.sie_block->eca &= ~ECA_APIE;
+	vcpu->arch.sie_block->ecd &= ~ECD_ECC;
+
+	if (vcpu->kvm->arch.crypto.apie)
+		vcpu->arch.sie_block->eca |= ECA_APIE;
+
+	/* Set up protected key support */
+	if (vcpu->kvm->arch.crypto.aes_kw) {
+		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
+		/* ecc is also wrapped with AES key */
+		if (kvm_has_pckmo_ecc(vcpu->kvm))
+			vcpu->arch.sie_block->ecd |= ECD_ECC;
+	}
+
+	if (vcpu->kvm->arch.crypto.dea_kw)
+		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
+}
+
+void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
+{
+	free_page((unsigned long)phys_to_virt(vcpu->arch.sie_block->cbrlo));
+	vcpu->arch.sie_block->cbrlo = 0;
+}
+
+int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
+{
+	void *cbrlo_page = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
+
+	if (!cbrlo_page)
+		return -ENOMEM;
+
+	vcpu->arch.sie_block->cbrlo = virt_to_phys(cbrlo_page);
+	return 0;
+}
+
+static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
+
+	vcpu->arch.sie_block->ibc = model->ibc;
+	if (test_kvm_facility(vcpu->kvm, 7))
+		vcpu->arch.sie_block->fac = virt_to_phys(model->fac_list);
+}
+
+static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+	int rc = 0;
+	u16 uvrc, uvrrc;
+
+	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
+						    CPUSTAT_SM |
+						    CPUSTAT_STOPPED);
+
+	if (test_kvm_facility(vcpu->kvm, 78))
+		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
+	else if (test_kvm_facility(vcpu->kvm, 8))
+		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
+
+	kvm_s390_vcpu_setup_model(vcpu);
+
+	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
+	if (MACHINE_HAS_ESOP)
+		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
+	if (test_kvm_facility(vcpu->kvm, 9))
+		vcpu->arch.sie_block->ecb |= ECB_SRSI;
+	if (test_kvm_facility(vcpu->kvm, 11))
+		vcpu->arch.sie_block->ecb |= ECB_PTF;
+	if (test_kvm_facility(vcpu->kvm, 73))
+		vcpu->arch.sie_block->ecb |= ECB_TE;
+	if (!kvm_is_ucontrol(vcpu->kvm))
+		vcpu->arch.sie_block->ecb |= ECB_SPECI;
+
+	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
+		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
+	if (test_kvm_facility(vcpu->kvm, 130))
+		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
+	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
+	if (sclp.has_cei)
+		vcpu->arch.sie_block->eca |= ECA_CEI;
+	if (sclp.has_ib)
+		vcpu->arch.sie_block->eca |= ECA_IB;
+	if (sclp.has_siif)
+		vcpu->arch.sie_block->eca |= ECA_SII;
+	if (sclp.has_sigpif)
+		vcpu->arch.sie_block->eca |= ECA_SIGPI;
+	if (test_kvm_facility(vcpu->kvm, 129)) {
+		vcpu->arch.sie_block->eca |= ECA_VX;
+		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
+	}
+	if (test_kvm_facility(vcpu->kvm, 139))
+		vcpu->arch.sie_block->ecd |= ECD_MEF;
+	if (test_kvm_facility(vcpu->kvm, 156))
+		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
+	if (vcpu->arch.sie_block->gd) {
+		vcpu->arch.sie_block->eca |= ECA_AIV;
+		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
+			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
+	}
+	vcpu->arch.sie_block->sdnxo = virt_to_phys(&vcpu->run->s.regs.sdnx) | SDNXC;
+	vcpu->arch.sie_block->riccbd = virt_to_phys(&vcpu->run->s.regs.riccb);
+
+	if (sclp.has_kss)
+		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
+	else
+		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
+
+	if (vcpu->kvm->arch.use_cmma) {
+		rc = kvm_s390_vcpu_setup_cmma(vcpu);
+		if (rc)
+			return rc;
+	}
+	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
+
+	vcpu->arch.sie_block->hpid = HPID_KVM;
+
+	kvm_s390_vcpu_crypto_setup(vcpu);
+
+	kvm_s390_vcpu_pci_setup(vcpu);
+
+	mutex_lock(&vcpu->kvm->lock);
+	if (kvm_s390_pv_is_protected(vcpu->kvm)) {
+		rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
+		if (rc)
+			kvm_s390_vcpu_unsetup_cmma(vcpu);
+	}
+	mutex_unlock(&vcpu->kvm->lock);
+
+	return rc;
+}
+
+int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
+{
+	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
+		return -EINVAL;
+	return 0;
+}
+
+int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
+{
+	struct sie_page *sie_page;
+	int rc;
+
+	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
+	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
+	if (!sie_page)
+		return -ENOMEM;
+
+	vcpu->arch.sie_block = &sie_page->sie_block;
+	vcpu->arch.sie_block->itdba = virt_to_phys(&sie_page->itdb);
+
+	/* the real guest size will always be smaller than msl */
+	vcpu->arch.sie_block->mso = 0;
+	vcpu->arch.sie_block->msl = sclp.hamax;
+
+	vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
+	spin_lock_init(&vcpu->arch.local_int.lock);
+	vcpu->arch.sie_block->gd = kvm_s390_get_gisa_desc(vcpu->kvm);
+	seqcount_init(&vcpu->arch.cputm_seqcount);
+
+	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+	kvm_clear_async_pf_completion_queue(vcpu);
+	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
+				    KVM_SYNC_GPRS |
+				    KVM_SYNC_ACRS |
+				    KVM_SYNC_CRS |
+				    KVM_SYNC_ARCH0 |
+				    KVM_SYNC_PFAULT |
+				    KVM_SYNC_DIAG318;
+	kvm_s390_set_prefix(vcpu, 0);
+	if (test_kvm_facility(vcpu->kvm, 64))
+		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
+	if (test_kvm_facility(vcpu->kvm, 82))
+		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
+	if (test_kvm_facility(vcpu->kvm, 133))
+		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
+	if (test_kvm_facility(vcpu->kvm, 156))
+		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
+	/* fprs can be synchronized via vrs, even if the guest has no vx. With
+	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
+	 */
+	if (MACHINE_HAS_VX)
+		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
+	else
+		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
+
+	if (kvm_is_ucontrol(vcpu->kvm)) {
+		rc = __kvm_ucontrol_vcpu_init(vcpu);
+		if (rc)
+			goto out_free_sie_block;
+	}
+
+	VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
+		 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
+	trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
+
+	rc = kvm_s390_vcpu_setup(vcpu);
+	if (rc)
+		goto out_ucontrol_uninit;
+
+	kvm_s390_update_topology_change_report(vcpu->kvm, 1);
+	return 0;
+
+out_ucontrol_uninit:
+	if (kvm_is_ucontrol(vcpu->kvm))
+		gmap_remove(vcpu->arch.gmap);
+out_free_sie_block:
+	free_page((unsigned long)(vcpu->arch.sie_block));
+	return rc;
+}
+
+int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
+{
+	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
+	return kvm_s390_vcpu_has_irq(vcpu, 0);
+}
+
+bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
+{
+	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
+}
+
+void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
+{
+	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
+	exit_sie(vcpu);
+}
+
+void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
+{
+	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
+}
+
+static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
+{
+	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
+	exit_sie(vcpu);
+}
+
+bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
+{
+	return atomic_read(&vcpu->arch.sie_block->prog20) &
+	       (PROG_BLOCK_SIE | PROG_REQUEST);
+}
+
+static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
+{
+	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
+}
+
+/*
+ * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
+ * If the CPU is not running (e.g. waiting as idle) the function will
+ * return immediately. */
+void exit_sie(struct kvm_vcpu *vcpu)
+{
+	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
+	kvm_s390_vsie_kick(vcpu);
+	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
+		cpu_relax();
+}
+
+/* Kick a guest cpu out of SIE to process a request synchronously */
+void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
+{
+	__kvm_make_request(req, vcpu);
+	kvm_s390_vcpu_request(vcpu);
+}
+
+static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
+			      unsigned long end)
+{
+	struct kvm *kvm = gmap->private;
+	struct kvm_vcpu *vcpu;
+	unsigned long prefix;
+	unsigned long i;
+
+	if (gmap_is_shadow(gmap))
+		return;
+	if (start >= 1UL << 31)
+		/* We are only interested in prefix pages */
+		return;
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		/* match against both prefix pages */
+		prefix = kvm_s390_get_prefix(vcpu);
+		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
+			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
+				   start, end);
+			kvm_s390_sync_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
+		}
+	}
+}
+
+bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
+{
+	/* do not poll with more than halt_poll_max_steal percent of steal time */
+	if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
+	    READ_ONCE(halt_poll_max_steal)) {
+		vcpu->stat.halt_no_poll_steal++;
+		return true;
+	}
+	return false;
+}
+
+int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
+{
+	/* kvm common code refers to this, but never calls it */
+	BUG();
+	return 0;
+}
+
+static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
+					   struct kvm_one_reg *reg)
+{
+	int r = -EINVAL;
+
+	switch (reg->id) {
+	case KVM_REG_S390_TODPR:
+		r = put_user(vcpu->arch.sie_block->todpr,
+			     (u32 __user *)reg->addr);
+		break;
+	case KVM_REG_S390_EPOCHDIFF:
+		r = put_user(vcpu->arch.sie_block->epoch,
+			     (u64 __user *)reg->addr);
+		break;
+	case KVM_REG_S390_CPU_TIMER:
+		r = put_user(kvm_s390_get_cpu_timer(vcpu),
+			     (u64 __user *)reg->addr);
+		break;
+	case KVM_REG_S390_CLOCK_COMP:
+		r = put_user(vcpu->arch.sie_block->ckc,
+			     (u64 __user *)reg->addr);
+		break;
+	case KVM_REG_S390_PFTOKEN:
+		r = put_user(vcpu->arch.pfault_token,
+			     (u64 __user *)reg->addr);
+		break;
+	case KVM_REG_S390_PFCOMPARE:
+		r = put_user(vcpu->arch.pfault_compare,
+			     (u64 __user *)reg->addr);
+		break;
+	case KVM_REG_S390_PFSELECT:
+		r = put_user(vcpu->arch.pfault_select,
+			     (u64 __user *)reg->addr);
+		break;
+	case KVM_REG_S390_PP:
+		r = put_user(vcpu->arch.sie_block->pp,
+			     (u64 __user *)reg->addr);
+		break;
+	case KVM_REG_S390_GBEA:
+		r = put_user(vcpu->arch.sie_block->gbea,
+			     (u64 __user *)reg->addr);
+		break;
+	default:
+		break;
+	}
+
+	return r;
+}
+
+static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
+					   struct kvm_one_reg *reg)
+{
+	int r = -EINVAL;
+	__u64 val;
+
+	switch (reg->id) {
+	case KVM_REG_S390_TODPR:
+		r = get_user(vcpu->arch.sie_block->todpr,
+			     (u32 __user *)reg->addr);
+		break;
+	case KVM_REG_S390_EPOCHDIFF:
+		r = get_user(vcpu->arch.sie_block->epoch,
+			     (u64 __user *)reg->addr);
+		break;
+	case KVM_REG_S390_CPU_TIMER:
+		r = get_user(val, (u64 __user *)reg->addr);
+		if (!r)
+			kvm_s390_set_cpu_timer(vcpu, val);
+		break;
+	case KVM_REG_S390_CLOCK_COMP:
+		r = get_user(vcpu->arch.sie_block->ckc,
+			     (u64 __user *)reg->addr);
+		break;
+	case KVM_REG_S390_PFTOKEN:
+		r = get_user(vcpu->arch.pfault_token,
+			     (u64 __user *)reg->addr);
+		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
+			kvm_clear_async_pf_completion_queue(vcpu);
+		break;
+	case KVM_REG_S390_PFCOMPARE:
+		r = get_user(vcpu->arch.pfault_compare,
+			     (u64 __user *)reg->addr);
+		break;
+	case KVM_REG_S390_PFSELECT:
+		r = get_user(vcpu->arch.pfault_select,
+			     (u64 __user *)reg->addr);
+		break;
+	case KVM_REG_S390_PP:
+		r = get_user(vcpu->arch.sie_block->pp,
+			     (u64 __user *)reg->addr);
+		break;
+	case KVM_REG_S390_GBEA:
+		r = get_user(vcpu->arch.sie_block->gbea,
+			     (u64 __user *)reg->addr);
+		break;
+	default:
+		break;
+	}
+
+	return r;
+}
+
+static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
+	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+	memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
+
+	kvm_clear_async_pf_completion_queue(vcpu);
+	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
+		kvm_s390_vcpu_stop(vcpu);
+	kvm_s390_clear_local_irqs(vcpu);
+}
+
+static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
+{
+	/* Initial reset is a superset of the normal reset */
+	kvm_arch_vcpu_ioctl_normal_reset(vcpu);
+
+	/*
+	 * This equals initial cpu reset in pop, but we don't switch to ESA.
+	 * We do not only reset the internal data, but also ...
+	 */
+	vcpu->arch.sie_block->gpsw.mask = 0;
+	vcpu->arch.sie_block->gpsw.addr = 0;
+	kvm_s390_set_prefix(vcpu, 0);
+	kvm_s390_set_cpu_timer(vcpu, 0);
+	vcpu->arch.sie_block->ckc = 0;
+	memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
+	vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
+	vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
+
+	/* ... the data in sync regs */
+	memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
+	vcpu->run->s.regs.ckc = 0;
+	vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
+	vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
+	vcpu->run->psw_addr = 0;
+	vcpu->run->psw_mask = 0;
+	vcpu->run->s.regs.todpr = 0;
+	vcpu->run->s.regs.cputm = 0;
+	vcpu->run->s.regs.ckc = 0;
+	vcpu->run->s.regs.pp = 0;
+	vcpu->run->s.regs.gbea = 1;
+	vcpu->run->s.regs.fpc = 0;
+	/*
+	 * Do not reset these registers in the protected case, as some of
+	 * them are overlaid and they are not accessible in this case
+	 * anyway.
+	 */
+	if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
+		vcpu->arch.sie_block->gbea = 1;
+		vcpu->arch.sie_block->pp = 0;
+		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
+		vcpu->arch.sie_block->todpr = 0;
+	}
+}
+
+static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
+{
+	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
+
+	/* Clear reset is a superset of the initial reset */
+	kvm_arch_vcpu_ioctl_initial_reset(vcpu);
+
+	memset(&regs->gprs, 0, sizeof(regs->gprs));
+	memset(&regs->vrs, 0, sizeof(regs->vrs));
+	memset(&regs->acrs, 0, sizeof(regs->acrs));
+	memset(&regs->gscb, 0, sizeof(regs->gscb));
+
+	regs->etoken = 0;
+	regs->etoken_extension = 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	vcpu_load(vcpu);
+	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
+	vcpu_put(vcpu);
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	vcpu_load(vcpu);
+	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
+	vcpu_put(vcpu);
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+				  struct kvm_sregs *sregs)
+{
+	vcpu_load(vcpu);
+
+	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
+	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
+
+	vcpu_put(vcpu);
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+				  struct kvm_sregs *sregs)
+{
+	vcpu_load(vcpu);
+
+	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
+	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
+
+	vcpu_put(vcpu);
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	int ret = 0;
+
+	vcpu_load(vcpu);
+
+	if (test_fp_ctl(fpu->fpc)) {
+		ret = -EINVAL;
+		goto out;
+	}
+	vcpu->run->s.regs.fpc = fpu->fpc;
+	if (MACHINE_HAS_VX)
+		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
+				 (freg_t *) fpu->fprs);
+	else
+		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
+
+out:
+	vcpu_put(vcpu);
+	return ret;
+}
+
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	vcpu_load(vcpu);
+
+	/* make sure we have the latest values */
+	save_fpu_regs();
+	if (MACHINE_HAS_VX)
+		convert_vx_to_fp((freg_t *) fpu->fprs,
+				 (__vector128 *) vcpu->run->s.regs.vrs);
+	else
+		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
+	fpu->fpc = vcpu->run->s.regs.fpc;
+
+	vcpu_put(vcpu);
+	return 0;
+}
+
+static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
+{
+	int rc = 0;
+
+	if (!is_vcpu_stopped(vcpu))
+		rc = -EBUSY;
+	else {
+		vcpu->run->psw_mask = psw.mask;
+		vcpu->run->psw_addr = psw.addr;
+	}
+	return rc;
+}
+
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+				  struct kvm_translation *tr)
+{
+	return -EINVAL; /* not implemented yet */
+}
+
+#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
+			      KVM_GUESTDBG_USE_HW_BP | \
+			      KVM_GUESTDBG_ENABLE)
+
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+					struct kvm_guest_debug *dbg)
+{
+	int rc = 0;
+
+	vcpu_load(vcpu);
+
+	vcpu->guest_debug = 0;
+	kvm_s390_clear_bp_data(vcpu);
+
+	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
+		rc = -EINVAL;
+		goto out;
+	}
+	if (!sclp.has_gpere) {
+		rc = -EINVAL;
+		goto out;
+	}
+
+	if (dbg->control & KVM_GUESTDBG_ENABLE) {
+		vcpu->guest_debug = dbg->control;
+		/* enforce guest PER */
+		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
+
+		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
+			rc = kvm_s390_import_bp_data(vcpu, dbg);
+	} else {
+		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
+		vcpu->arch.guestdbg.last_bp = 0;
+	}
+
+	if (rc) {
+		vcpu->guest_debug = 0;
+		kvm_s390_clear_bp_data(vcpu);
+		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
+	}
+
+out:
+	vcpu_put(vcpu);
+	return rc;
+}
+
+int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
+				    struct kvm_mp_state *mp_state)
+{
+	int ret;
+
+	vcpu_load(vcpu);
+
+	/* CHECK_STOP and LOAD are not supported yet */
+	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
+				      KVM_MP_STATE_OPERATING;
+
+	vcpu_put(vcpu);
+	return ret;
+}
+
+int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
+				    struct kvm_mp_state *mp_state)
+{
+	int rc = 0;
+
+	vcpu_load(vcpu);
+
+	/* user space knows about this interface - let it control the state */
+	kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm);
+
+	switch (mp_state->mp_state) {
+	case KVM_MP_STATE_STOPPED:
+		rc = kvm_s390_vcpu_stop(vcpu);
+		break;
+	case KVM_MP_STATE_OPERATING:
+		rc = kvm_s390_vcpu_start(vcpu);
+		break;
+	case KVM_MP_STATE_LOAD:
+		if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
+			rc = -ENXIO;
+			break;
+		}
+		rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
+		break;
+	case KVM_MP_STATE_CHECK_STOP:
+		fallthrough;	/* CHECK_STOP and LOAD are not supported yet */
+	default:
+		rc = -ENXIO;
+	}
+
+	vcpu_put(vcpu);
+	return rc;
+}
+
+static bool ibs_enabled(struct kvm_vcpu *vcpu)
+{
+	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
+}
+
+static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
+{
+retry:
+	kvm_s390_vcpu_request_handled(vcpu);
+	if (!kvm_request_pending(vcpu))
+		return 0;
+	/*
+	 * If the guest prefix changed, re-arm the ipte notifier for the
+	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
+	 * This ensures that the ipte instruction for this request has
+	 * already finished. We might race against a second unmapper that
+	 * wants to set the blocking bit. Lets just retry the request loop.
+	 */
+	if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) {
+		int rc;
+		rc = gmap_mprotect_notify(vcpu->arch.gmap,
+					  kvm_s390_get_prefix(vcpu),
+					  PAGE_SIZE * 2, PROT_WRITE);
+		if (rc) {
+			kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
+			return rc;
+		}
+		goto retry;
+	}
+
+	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
+		vcpu->arch.sie_block->ihcpu = 0xffff;
+		goto retry;
+	}
+
+	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
+		if (!ibs_enabled(vcpu)) {
+			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
+			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
+		}
+		goto retry;
+	}
+
+	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
+		if (ibs_enabled(vcpu)) {
+			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
+			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
+		}
+		goto retry;
+	}
+
+	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
+		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
+		goto retry;
+	}
+
+	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
+		/*
+		 * Disable CMM virtualization; we will emulate the ESSA
+		 * instruction manually, in order to provide additional
+		 * functionalities needed for live migration.
+		 */
+		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
+		goto retry;
+	}
+
+	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
+		/*
+		 * Re-enable CMM virtualization if CMMA is available and
+		 * CMM has been used.
+		 */
+		if ((vcpu->kvm->arch.use_cmma) &&
+		    (vcpu->kvm->mm->context.uses_cmm))
+			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
+		goto retry;
+	}
+
+	/* we left the vsie handler, nothing to do, just clear the request */
+	kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
+
+	return 0;
+}
+
+static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
+{
+	struct kvm_vcpu *vcpu;
+	union tod_clock clk;
+	unsigned long i;
+
+	preempt_disable();
+
+	store_tod_clock_ext(&clk);
+
+	kvm->arch.epoch = gtod->tod - clk.tod;
+	kvm->arch.epdx = 0;
+	if (test_kvm_facility(kvm, 139)) {
+		kvm->arch.epdx = gtod->epoch_idx - clk.ei;
+		if (kvm->arch.epoch > gtod->tod)
+			kvm->arch.epdx -= 1;
+	}
+
+	kvm_s390_vcpu_block_all(kvm);
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
+		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
+	}
+
+	kvm_s390_vcpu_unblock_all(kvm);
+	preempt_enable();
+}
+
+int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
+{
+	if (!mutex_trylock(&kvm->lock))
+		return 0;
+	__kvm_s390_set_tod_clock(kvm, gtod);
+	mutex_unlock(&kvm->lock);
+	return 1;
+}
+
+/**
+ * kvm_arch_fault_in_page - fault-in guest page if necessary
+ * @vcpu: The corresponding virtual cpu
+ * @gpa: Guest physical address
+ * @writable: Whether the page should be writable or not
+ *
+ * Make sure that a guest page has been faulted-in on the host.
+ *
+ * Return: Zero on success, negative error code otherwise.
+ */
+long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
+{
+	return gmap_fault(vcpu->arch.gmap, gpa,
+			  writable ? FAULT_FLAG_WRITE : 0);
+}
+
+static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
+				      unsigned long token)
+{
+	struct kvm_s390_interrupt inti;
+	struct kvm_s390_irq irq;
+
+	if (start_token) {
+		irq.u.ext.ext_params2 = token;
+		irq.type = KVM_S390_INT_PFAULT_INIT;
+		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
+	} else {
+		inti.type = KVM_S390_INT_PFAULT_DONE;
+		inti.parm64 = token;
+		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
+	}
+}
+
+bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
+				     struct kvm_async_pf *work)
+{
+	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
+	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
+
+	return true;
+}
+
+void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
+				 struct kvm_async_pf *work)
+{
+	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
+	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
+}
+
+void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
+			       struct kvm_async_pf *work)
+{
+	/* s390 will always inject the page directly */
+}
+
+bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * s390 will always inject the page directly,
+	 * but we still want check_async_completion to cleanup
+	 */
+	return true;
+}
+
+static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
+{
+	hva_t hva;
+	struct kvm_arch_async_pf arch;
+
+	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
+		return false;
+	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
+	    vcpu->arch.pfault_compare)
+		return false;
+	if (psw_extint_disabled(vcpu))
+		return false;
+	if (kvm_s390_vcpu_has_irq(vcpu, 0))
+		return false;
+	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
+		return false;
+	if (!vcpu->arch.gmap->pfault_enabled)
+		return false;
+
+	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
+	hva += current->thread.gmap_addr & ~PAGE_MASK;
+	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
+		return false;
+
+	return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
+}
+
+static int vcpu_pre_run(struct kvm_vcpu *vcpu)
+{
+	int rc, cpuflags;
+
+	/*
+	 * On s390 notifications for arriving pages will be delivered directly
+	 * to the guest but the house keeping for completed pfaults is
+	 * handled outside the worker.
+	 */
+	kvm_check_async_pf_completion(vcpu);
+
+	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
+	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
+
+	if (need_resched())
+		schedule();
+
+	if (!kvm_is_ucontrol(vcpu->kvm)) {
+		rc = kvm_s390_deliver_pending_interrupts(vcpu);
+		if (rc || guestdbg_exit_pending(vcpu))
+			return rc;
+	}
+
+	rc = kvm_s390_handle_requests(vcpu);
+	if (rc)
+		return rc;
+
+	if (guestdbg_enabled(vcpu)) {
+		kvm_s390_backup_guest_per_regs(vcpu);
+		kvm_s390_patch_guest_per_regs(vcpu);
+	}
+
+	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
+
+	vcpu->arch.sie_block->icptcode = 0;
+	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
+	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
+	trace_kvm_s390_sie_enter(vcpu, cpuflags);
+
+	return 0;
+}
+
+static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_pgm_info pgm_info = {
+		.code = PGM_ADDRESSING,
+	};
+	u8 opcode, ilen;
+	int rc;
+
+	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
+	trace_kvm_s390_sie_fault(vcpu);
+
+	/*
+	 * We want to inject an addressing exception, which is defined as a
+	 * suppressing or terminating exception. However, since we came here
+	 * by a DAT access exception, the PSW still points to the faulting
+	 * instruction since DAT exceptions are nullifying. So we've got
+	 * to look up the current opcode to get the length of the instruction
+	 * to be able to forward the PSW.
+	 */
+	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
+	ilen = insn_length(opcode);
+	if (rc < 0) {
+		return rc;
+	} else if (rc) {
+		/* Instruction-Fetching Exceptions - we can't detect the ilen.
+		 * Forward by arbitrary ilc, injection will take care of
+		 * nullification if necessary.
+		 */
+		pgm_info = vcpu->arch.pgm;
+		ilen = 4;
+	}
+	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
+	kvm_s390_forward_psw(vcpu, ilen);
+	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
+}
+
+static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
+{
+	struct mcck_volatile_info *mcck_info;
+	struct sie_page *sie_page;
+
+	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
+		   vcpu->arch.sie_block->icptcode);
+	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
+
+	if (guestdbg_enabled(vcpu))
+		kvm_s390_restore_guest_per_regs(vcpu);
+
+	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
+	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
+
+	if (exit_reason == -EINTR) {
+		VCPU_EVENT(vcpu, 3, "%s", "machine check");
+		sie_page = container_of(vcpu->arch.sie_block,
+					struct sie_page, sie_block);
+		mcck_info = &sie_page->mcck_info;
+		kvm_s390_reinject_machine_check(vcpu, mcck_info);
+		return 0;
+	}
+
+	if (vcpu->arch.sie_block->icptcode > 0) {
+		int rc = kvm_handle_sie_intercept(vcpu);
+
+		if (rc != -EOPNOTSUPP)
+			return rc;
+		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
+		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
+		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
+		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
+		return -EREMOTE;
+	} else if (exit_reason != -EFAULT) {
+		vcpu->stat.exit_null++;
+		return 0;
+	} else if (kvm_is_ucontrol(vcpu->kvm)) {
+		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
+		vcpu->run->s390_ucontrol.trans_exc_code =
+						current->thread.gmap_addr;
+		vcpu->run->s390_ucontrol.pgm_code = 0x10;
+		return -EREMOTE;
+	} else if (current->thread.gmap_pfault) {
+		trace_kvm_s390_major_guest_pfault(vcpu);
+		current->thread.gmap_pfault = 0;
+		if (kvm_arch_setup_async_pf(vcpu))
+			return 0;
+		vcpu->stat.pfault_sync++;
+		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
+	}
+	return vcpu_post_run_fault_in_sie(vcpu);
+}
+
+#define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
+static int __vcpu_run(struct kvm_vcpu *vcpu)
+{
+	int rc, exit_reason;
+	struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
+
+	/*
+	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
+	 * ning the guest), so that memslots (and other stuff) are protected
+	 */
+	kvm_vcpu_srcu_read_lock(vcpu);
+
+	do {
+		rc = vcpu_pre_run(vcpu);
+		if (rc || guestdbg_exit_pending(vcpu))
+			break;
+
+		kvm_vcpu_srcu_read_unlock(vcpu);
+		/*
+		 * As PF_VCPU will be used in fault handler, between
+		 * guest_enter and guest_exit should be no uaccess.
+		 */
+		local_irq_disable();
+		guest_enter_irqoff();
+		__disable_cpu_timer_accounting(vcpu);
+		local_irq_enable();
+		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+			memcpy(sie_page->pv_grregs,
+			       vcpu->run->s.regs.gprs,
+			       sizeof(sie_page->pv_grregs));
+		}
+		if (test_cpu_flag(CIF_FPU))
+			load_fpu_regs();
+		exit_reason = sie64a(vcpu->arch.sie_block,
+				     vcpu->run->s.regs.gprs);
+		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+			memcpy(vcpu->run->s.regs.gprs,
+			       sie_page->pv_grregs,
+			       sizeof(sie_page->pv_grregs));
+			/*
+			 * We're not allowed to inject interrupts on intercepts
+			 * that leave the guest state in an "in-between" state
+			 * where the next SIE entry will do a continuation.
+			 * Fence interrupts in our "internal" PSW.
+			 */
+			if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
+			    vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
+				vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
+			}
+		}
+		local_irq_disable();
+		__enable_cpu_timer_accounting(vcpu);
+		guest_exit_irqoff();
+		local_irq_enable();
+		kvm_vcpu_srcu_read_lock(vcpu);
+
+		rc = vcpu_post_run(vcpu, exit_reason);
+	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
+
+	kvm_vcpu_srcu_read_unlock(vcpu);
+	return rc;
+}
+
+static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
+{
+	struct kvm_run *kvm_run = vcpu->run;
+	struct runtime_instr_cb *riccb;
+	struct gs_cb *gscb;
+
+	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
+	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
+	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
+	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
+	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
+		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
+		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
+		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
+	}
+	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
+		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
+		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
+		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
+		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
+			kvm_clear_async_pf_completion_queue(vcpu);
+	}
+	if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
+		vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
+		vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
+		VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc);
+	}
+	/*
+	 * If userspace sets the riccb (e.g. after migration) to a valid state,
+	 * we should enable RI here instead of doing the lazy enablement.
+	 */
+	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
+	    test_kvm_facility(vcpu->kvm, 64) &&
+	    riccb->v &&
+	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
+		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
+		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
+	}
+	/*
+	 * If userspace sets the gscb (e.g. after migration) to non-zero,
+	 * we should enable GS here instead of doing the lazy enablement.
+	 */
+	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
+	    test_kvm_facility(vcpu->kvm, 133) &&
+	    gscb->gssm &&
+	    !vcpu->arch.gs_enabled) {
+		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
+		vcpu->arch.sie_block->ecb |= ECB_GS;
+		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
+		vcpu->arch.gs_enabled = 1;
+	}
+	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
+	    test_kvm_facility(vcpu->kvm, 82)) {
+		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
+		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
+	}
+	if (MACHINE_HAS_GS) {
+		preempt_disable();
+		__ctl_set_bit(2, 4);
+		if (current->thread.gs_cb) {
+			vcpu->arch.host_gscb = current->thread.gs_cb;
+			save_gs_cb(vcpu->arch.host_gscb);
+		}
+		if (vcpu->arch.gs_enabled) {
+			current->thread.gs_cb = (struct gs_cb *)
+						&vcpu->run->s.regs.gscb;
+			restore_gs_cb(current->thread.gs_cb);
+		}
+		preempt_enable();
+	}
+	/* SIE will load etoken directly from SDNX and therefore kvm_run */
+}
+
+static void sync_regs(struct kvm_vcpu *vcpu)
+{
+	struct kvm_run *kvm_run = vcpu->run;
+
+	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
+		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
+	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
+		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
+		/* some control register changes require a tlb flush */
+		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+	}
+	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
+		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
+		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
+	}
+	save_access_regs(vcpu->arch.host_acrs);
+	restore_access_regs(vcpu->run->s.regs.acrs);
+	/* save host (userspace) fprs/vrs */
+	save_fpu_regs();
+	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
+	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
+	if (MACHINE_HAS_VX)
+		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
+	else
+		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
+	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
+	if (test_fp_ctl(current->thread.fpu.fpc))
+		/* User space provided an invalid FPC, let's clear it */
+		current->thread.fpu.fpc = 0;
+
+	/* Sync fmt2 only data */
+	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
+		sync_regs_fmt2(vcpu);
+	} else {
+		/*
+		 * In several places we have to modify our internal view to
+		 * not do things that are disallowed by the ultravisor. For
+		 * example we must not inject interrupts after specific exits
+		 * (e.g. 112 prefix page not secure). We do this by turning
+		 * off the machine check, external and I/O interrupt bits
+		 * of our PSW copy. To avoid getting validity intercepts, we
+		 * do only accept the condition code from userspace.
+		 */
+		vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
+		vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
+						   PSW_MASK_CC;
+	}
+
+	kvm_run->kvm_dirty_regs = 0;
+}
+
+static void store_regs_fmt2(struct kvm_vcpu *vcpu)
+{
+	struct kvm_run *kvm_run = vcpu->run;
+
+	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
+	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
+	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
+	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
+	kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
+	if (MACHINE_HAS_GS) {
+		preempt_disable();
+		__ctl_set_bit(2, 4);
+		if (vcpu->arch.gs_enabled)
+			save_gs_cb(current->thread.gs_cb);
+		current->thread.gs_cb = vcpu->arch.host_gscb;
+		restore_gs_cb(vcpu->arch.host_gscb);
+		if (!vcpu->arch.host_gscb)
+			__ctl_clear_bit(2, 4);
+		vcpu->arch.host_gscb = NULL;
+		preempt_enable();
+	}
+	/* SIE will save etoken directly into SDNX and therefore kvm_run */
+}
+
+static void store_regs(struct kvm_vcpu *vcpu)
+{
+	struct kvm_run *kvm_run = vcpu->run;
+
+	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
+	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
+	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
+	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
+	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
+	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
+	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
+	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
+	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
+	save_access_regs(vcpu->run->s.regs.acrs);
+	restore_access_regs(vcpu->arch.host_acrs);
+	/* Save guest register state */
+	save_fpu_regs();
+	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
+	/* Restore will be done lazily at return */
+	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
+	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
+	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
+		store_regs_fmt2(vcpu);
+}
+
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
+{
+	struct kvm_run *kvm_run = vcpu->run;
+	int rc;
+
+	/*
+	 * Running a VM while dumping always has the potential to
+	 * produce inconsistent dump data. But for PV vcpus a SIE
+	 * entry while dumping could also lead to a fatal validity
+	 * intercept which we absolutely want to avoid.
+	 */
+	if (vcpu->kvm->arch.pv.dumping)
+		return -EINVAL;
+
+	if (kvm_run->immediate_exit)
+		return -EINTR;
+
+	if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
+	    kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
+		return -EINVAL;
+
+	vcpu_load(vcpu);
+
+	if (guestdbg_exit_pending(vcpu)) {
+		kvm_s390_prepare_debug_exit(vcpu);
+		rc = 0;
+		goto out;
+	}
+
+	kvm_sigset_activate(vcpu);
+
+	/*
+	 * no need to check the return value of vcpu_start as it can only have
+	 * an error for protvirt, but protvirt means user cpu state
+	 */
+	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
+		kvm_s390_vcpu_start(vcpu);
+	} else if (is_vcpu_stopped(vcpu)) {
+		pr_err_ratelimited("can't run stopped vcpu %d\n",
+				   vcpu->vcpu_id);
+		rc = -EINVAL;
+		goto out;
+	}
+
+	sync_regs(vcpu);
+	enable_cpu_timer_accounting(vcpu);
+
+	might_fault();
+	rc = __vcpu_run(vcpu);
+
+	if (signal_pending(current) && !rc) {
+		kvm_run->exit_reason = KVM_EXIT_INTR;
+		rc = -EINTR;
+	}
+
+	if (guestdbg_exit_pending(vcpu) && !rc)  {
+		kvm_s390_prepare_debug_exit(vcpu);
+		rc = 0;
+	}
+
+	if (rc == -EREMOTE) {
+		/* userspace support is needed, kvm_run has been prepared */
+		rc = 0;
+	}
+
+	disable_cpu_timer_accounting(vcpu);
+	store_regs(vcpu);
+
+	kvm_sigset_deactivate(vcpu);
+
+	vcpu->stat.exit_userspace++;
+out:
+	vcpu_put(vcpu);
+	return rc;
+}
+
+/*
+ * store status at address
+ * we use have two special cases:
+ * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
+ * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
+ */
+int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
+{
+	unsigned char archmode = 1;
+	freg_t fprs[NUM_FPRS];
+	unsigned int px;
+	u64 clkcomp, cputm;
+	int rc;
+
+	px = kvm_s390_get_prefix(vcpu);
+	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
+		if (write_guest_abs(vcpu, 163, &archmode, 1))
+			return -EFAULT;
+		gpa = 0;
+	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
+		if (write_guest_real(vcpu, 163, &archmode, 1))
+			return -EFAULT;
+		gpa = px;
+	} else
+		gpa -= __LC_FPREGS_SAVE_AREA;
+
+	/* manually convert vector registers if necessary */
+	if (MACHINE_HAS_VX) {
+		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
+		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
+				     fprs, 128);
+	} else {
+		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
+				     vcpu->run->s.regs.fprs, 128);
+	}
+	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
+			      vcpu->run->s.regs.gprs, 128);
+	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
+			      &vcpu->arch.sie_block->gpsw, 16);
+	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
+			      &px, 4);
+	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
+			      &vcpu->run->s.regs.fpc, 4);
+	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
+			      &vcpu->arch.sie_block->todpr, 4);
+	cputm = kvm_s390_get_cpu_timer(vcpu);
+	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
+			      &cputm, 8);
+	clkcomp = vcpu->arch.sie_block->ckc >> 8;
+	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
+			      &clkcomp, 8);
+	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
+			      &vcpu->run->s.regs.acrs, 64);
+	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
+			      &vcpu->arch.sie_block->gcr, 128);
+	return rc ? -EFAULT : 0;
+}
+
+int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+	/*
+	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
+	 * switch in the run ioctl. Let's update our copies before we save
+	 * it into the save area
+	 */
+	save_fpu_regs();
+	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
+	save_access_regs(vcpu->run->s.regs.acrs);
+
+	return kvm_s390_store_status_unloaded(vcpu, addr);
+}
+
+static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
+{
+	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
+	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
+}
+
+static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
+{
+	unsigned long i;
+	struct kvm_vcpu *vcpu;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		__disable_ibs_on_vcpu(vcpu);
+	}
+}
+
+static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
+{
+	if (!sclp.has_ibs)
+		return;
+	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
+	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
+}
+
+int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
+{
+	int i, online_vcpus, r = 0, started_vcpus = 0;
+
+	if (!is_vcpu_stopped(vcpu))
+		return 0;
+
+	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
+	/* Only one cpu at a time may enter/leave the STOPPED state. */
+	spin_lock(&vcpu->kvm->arch.start_stop_lock);
+	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
+
+	/* Let's tell the UV that we want to change into the operating state */
+	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
+		if (r) {
+			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
+			return r;
+		}
+	}
+
+	for (i = 0; i < online_vcpus; i++) {
+		if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i)))
+			started_vcpus++;
+	}
+
+	if (started_vcpus == 0) {
+		/* we're the only active VCPU -> speed it up */
+		__enable_ibs_on_vcpu(vcpu);
+	} else if (started_vcpus == 1) {
+		/*
+		 * As we are starting a second VCPU, we have to disable
+		 * the IBS facility on all VCPUs to remove potentially
+		 * outstanding ENABLE requests.
+		 */
+		__disable_ibs_on_all_vcpus(vcpu->kvm);
+	}
+
+	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
+	/*
+	 * The real PSW might have changed due to a RESTART interpreted by the
+	 * ultravisor. We block all interrupts and let the next sie exit
+	 * refresh our view.
+	 */
+	if (kvm_s390_pv_cpu_is_protected(vcpu))
+		vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
+	/*
+	 * Another VCPU might have used IBS while we were offline.
+	 * Let's play safe and flush the VCPU at startup.
+	 */
+	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
+	return 0;
+}
+
+int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
+{
+	int i, online_vcpus, r = 0, started_vcpus = 0;
+	struct kvm_vcpu *started_vcpu = NULL;
+
+	if (is_vcpu_stopped(vcpu))
+		return 0;
+
+	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
+	/* Only one cpu at a time may enter/leave the STOPPED state. */
+	spin_lock(&vcpu->kvm->arch.start_stop_lock);
+	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
+
+	/* Let's tell the UV that we want to change into the stopped state */
+	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
+		if (r) {
+			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
+			return r;
+		}
+	}
+
+	/*
+	 * Set the VCPU to STOPPED and THEN clear the interrupt flag,
+	 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
+	 * have been fully processed. This will ensure that the VCPU
+	 * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
+	 */
+	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
+	kvm_s390_clear_stop_irq(vcpu);
+
+	__disable_ibs_on_vcpu(vcpu);
+
+	for (i = 0; i < online_vcpus; i++) {
+		struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i);
+
+		if (!is_vcpu_stopped(tmp)) {
+			started_vcpus++;
+			started_vcpu = tmp;
+		}
+	}
+
+	if (started_vcpus == 1) {
+		/*
+		 * As we only have one VCPU left, we want to enable the
+		 * IBS facility for that VCPU to speed it up.
+		 */
+		__enable_ibs_on_vcpu(started_vcpu);
+	}
+
+	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
+	return 0;
+}
+
+static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
+				     struct kvm_enable_cap *cap)
+{
+	int r;
+
+	if (cap->flags)
+		return -EINVAL;
+
+	switch (cap->cap) {
+	case KVM_CAP_S390_CSS_SUPPORT:
+		if (!vcpu->kvm->arch.css_support) {
+			vcpu->kvm->arch.css_support = 1;
+			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
+			trace_kvm_s390_enable_css(vcpu->kvm);
+		}
+		r = 0;
+		break;
+	default:
+		r = -EINVAL;
+		break;
+	}
+	return r;
+}
+
+static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu,
+				  struct kvm_s390_mem_op *mop)
+{
+	void __user *uaddr = (void __user *)mop->buf;
+	void *sida_addr;
+	int r = 0;
+
+	if (mop->flags || !mop->size)
+		return -EINVAL;
+	if (mop->size + mop->sida_offset < mop->size)
+		return -EINVAL;
+	if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
+		return -E2BIG;
+	if (!kvm_s390_pv_cpu_is_protected(vcpu))
+		return -EINVAL;
+
+	sida_addr = (char *)sida_addr(vcpu->arch.sie_block) + mop->sida_offset;
+
+	switch (mop->op) {
+	case KVM_S390_MEMOP_SIDA_READ:
+		if (copy_to_user(uaddr, sida_addr, mop->size))
+			r = -EFAULT;
+
+		break;
+	case KVM_S390_MEMOP_SIDA_WRITE:
+		if (copy_from_user(sida_addr, uaddr, mop->size))
+			r = -EFAULT;
+		break;
+	}
+	return r;
+}
+
+static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu,
+				 struct kvm_s390_mem_op *mop)
+{
+	void __user *uaddr = (void __user *)mop->buf;
+	enum gacc_mode acc_mode;
+	void *tmpbuf = NULL;
+	int r;
+
+	r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_INJECT_EXCEPTION |
+					KVM_S390_MEMOP_F_CHECK_ONLY |
+					KVM_S390_MEMOP_F_SKEY_PROTECTION);
+	if (r)
+		return r;
+	if (mop->ar >= NUM_ACRS)
+		return -EINVAL;
+	if (kvm_s390_pv_cpu_is_protected(vcpu))
+		return -EINVAL;
+	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
+		tmpbuf = vmalloc(mop->size);
+		if (!tmpbuf)
+			return -ENOMEM;
+	}
+
+	acc_mode = mop->op == KVM_S390_MEMOP_LOGICAL_READ ? GACC_FETCH : GACC_STORE;
+	if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
+		r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
+				    acc_mode, mop->key);
+		goto out_inject;
+	}
+	if (acc_mode == GACC_FETCH) {
+		r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
+					mop->size, mop->key);
+		if (r)
+			goto out_inject;
+		if (copy_to_user(uaddr, tmpbuf, mop->size)) {
+			r = -EFAULT;
+			goto out_free;
+		}
+	} else {
+		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
+			r = -EFAULT;
+			goto out_free;
+		}
+		r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
+					 mop->size, mop->key);
+	}
+
+out_inject:
+	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
+		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
+
+out_free:
+	vfree(tmpbuf);
+	return r;
+}
+
+static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu,
+				     struct kvm_s390_mem_op *mop)
+{
+	int r, srcu_idx;
+
+	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+	switch (mop->op) {
+	case KVM_S390_MEMOP_LOGICAL_READ:
+	case KVM_S390_MEMOP_LOGICAL_WRITE:
+		r = kvm_s390_vcpu_mem_op(vcpu, mop);
+		break;
+	case KVM_S390_MEMOP_SIDA_READ:
+	case KVM_S390_MEMOP_SIDA_WRITE:
+		/* we are locked against sida going away by the vcpu->mutex */
+		r = kvm_s390_vcpu_sida_op(vcpu, mop);
+		break;
+	default:
+		r = -EINVAL;
+	}
+
+	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+	return r;
+}
+
+long kvm_arch_vcpu_async_ioctl(struct file *filp,
+			       unsigned int ioctl, unsigned long arg)
+{
+	struct kvm_vcpu *vcpu = filp->private_data;
+	void __user *argp = (void __user *)arg;
+	int rc;
+
+	switch (ioctl) {
+	case KVM_S390_IRQ: {
+		struct kvm_s390_irq s390irq;
+
+		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
+			return -EFAULT;
+		rc = kvm_s390_inject_vcpu(vcpu, &s390irq);
+		break;
+	}
+	case KVM_S390_INTERRUPT: {
+		struct kvm_s390_interrupt s390int;
+		struct kvm_s390_irq s390irq = {};
+
+		if (copy_from_user(&s390int, argp, sizeof(s390int)))
+			return -EFAULT;
+		if (s390int_to_s390irq(&s390int, &s390irq))
+			return -EINVAL;
+		rc = kvm_s390_inject_vcpu(vcpu, &s390irq);
+		break;
+	}
+	default:
+		rc = -ENOIOCTLCMD;
+		break;
+	}
+
+	/*
+	 * To simplify single stepping of userspace-emulated instructions,
+	 * KVM_EXIT_S390_SIEIC exit sets KVM_GUESTDBG_EXIT_PENDING (see
+	 * should_handle_per_ifetch()). However, if userspace emulation injects
+	 * an interrupt, it needs to be cleared, so that KVM_EXIT_DEBUG happens
+	 * after (and not before) the interrupt delivery.
+	 */
+	if (!rc)
+		vcpu->guest_debug &= ~KVM_GUESTDBG_EXIT_PENDING;
+
+	return rc;
+}
+
+static int kvm_s390_handle_pv_vcpu_dump(struct kvm_vcpu *vcpu,
+					struct kvm_pv_cmd *cmd)
+{
+	struct kvm_s390_pv_dmp dmp;
+	void *data;
+	int ret;
+
+	/* Dump initialization is a prerequisite */
+	if (!vcpu->kvm->arch.pv.dumping)
+		return -EINVAL;
+
+	if (copy_from_user(&dmp, (__u8 __user *)cmd->data, sizeof(dmp)))
+		return -EFAULT;
+
+	/* We only handle this subcmd right now */
+	if (dmp.subcmd != KVM_PV_DUMP_CPU)
+		return -EINVAL;
+
+	/* CPU dump length is the same as create cpu storage donation. */
+	if (dmp.buff_len != uv_info.guest_cpu_stor_len)
+		return -EINVAL;
+
+	data = kvzalloc(uv_info.guest_cpu_stor_len, GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	ret = kvm_s390_pv_dump_cpu(vcpu, data, &cmd->rc, &cmd->rrc);
+
+	VCPU_EVENT(vcpu, 3, "PROTVIRT DUMP CPU %d rc %x rrc %x",
+		   vcpu->vcpu_id, cmd->rc, cmd->rrc);
+
+	if (ret)
+		ret = -EINVAL;
+
+	/* On success copy over the dump data */
+	if (!ret && copy_to_user((__u8 __user *)dmp.buff_addr, data, uv_info.guest_cpu_stor_len))
+		ret = -EFAULT;
+
+	kvfree(data);
+	return ret;
+}
+
+long kvm_arch_vcpu_ioctl(struct file *filp,
+			 unsigned int ioctl, unsigned long arg)
+{
+	struct kvm_vcpu *vcpu = filp->private_data;
+	void __user *argp = (void __user *)arg;
+	int idx;
+	long r;
+	u16 rc, rrc;
+
+	vcpu_load(vcpu);
+
+	switch (ioctl) {
+	case KVM_S390_STORE_STATUS:
+		idx = srcu_read_lock(&vcpu->kvm->srcu);
+		r = kvm_s390_store_status_unloaded(vcpu, arg);
+		srcu_read_unlock(&vcpu->kvm->srcu, idx);
+		break;
+	case KVM_S390_SET_INITIAL_PSW: {
+		psw_t psw;
+
+		r = -EFAULT;
+		if (copy_from_user(&psw, argp, sizeof(psw)))
+			break;
+		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
+		break;
+	}
+	case KVM_S390_CLEAR_RESET:
+		r = 0;
+		kvm_arch_vcpu_ioctl_clear_reset(vcpu);
+		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
+					  UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
+			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
+				   rc, rrc);
+		}
+		break;
+	case KVM_S390_INITIAL_RESET:
+		r = 0;
+		kvm_arch_vcpu_ioctl_initial_reset(vcpu);
+		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
+					  UVC_CMD_CPU_RESET_INITIAL,
+					  &rc, &rrc);
+			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
+				   rc, rrc);
+		}
+		break;
+	case KVM_S390_NORMAL_RESET:
+		r = 0;
+		kvm_arch_vcpu_ioctl_normal_reset(vcpu);
+		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
+					  UVC_CMD_CPU_RESET, &rc, &rrc);
+			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
+				   rc, rrc);
+		}
+		break;
+	case KVM_SET_ONE_REG:
+	case KVM_GET_ONE_REG: {
+		struct kvm_one_reg reg;
+		r = -EINVAL;
+		if (kvm_s390_pv_cpu_is_protected(vcpu))
+			break;
+		r = -EFAULT;
+		if (copy_from_user(&reg, argp, sizeof(reg)))
+			break;
+		if (ioctl == KVM_SET_ONE_REG)
+			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
+		else
+			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
+		break;
+	}
+#ifdef CONFIG_KVM_S390_UCONTROL
+	case KVM_S390_UCAS_MAP: {
+		struct kvm_s390_ucas_mapping ucasmap;
+
+		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
+			r = -EFAULT;
+			break;
+		}
+
+		if (!kvm_is_ucontrol(vcpu->kvm)) {
+			r = -EINVAL;
+			break;
+		}
+
+		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
+				     ucasmap.vcpu_addr, ucasmap.length);
+		break;
+	}
+	case KVM_S390_UCAS_UNMAP: {
+		struct kvm_s390_ucas_mapping ucasmap;
+
+		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
+			r = -EFAULT;
+			break;
+		}
+
+		if (!kvm_is_ucontrol(vcpu->kvm)) {
+			r = -EINVAL;
+			break;
+		}
+
+		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
+			ucasmap.length);
+		break;
+	}
+#endif
+	case KVM_S390_VCPU_FAULT: {
+		r = gmap_fault(vcpu->arch.gmap, arg, 0);
+		break;
+	}
+	case KVM_ENABLE_CAP:
+	{
+		struct kvm_enable_cap cap;
+		r = -EFAULT;
+		if (copy_from_user(&cap, argp, sizeof(cap)))
+			break;
+		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
+		break;
+	}
+	case KVM_S390_MEM_OP: {
+		struct kvm_s390_mem_op mem_op;
+
+		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
+			r = kvm_s390_vcpu_memsida_op(vcpu, &mem_op);
+		else
+			r = -EFAULT;
+		break;
+	}
+	case KVM_S390_SET_IRQ_STATE: {
+		struct kvm_s390_irq_state irq_state;
+
+		r = -EFAULT;
+		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
+			break;
+		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
+		    irq_state.len == 0 ||
+		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
+			r = -EINVAL;
+			break;
+		}
+		/* do not use irq_state.flags, it will break old QEMUs */
+		r = kvm_s390_set_irq_state(vcpu,
+					   (void __user *) irq_state.buf,
+					   irq_state.len);
+		break;
+	}
+	case KVM_S390_GET_IRQ_STATE: {
+		struct kvm_s390_irq_state irq_state;
+
+		r = -EFAULT;
+		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
+			break;
+		if (irq_state.len == 0) {
+			r = -EINVAL;
+			break;
+		}
+		/* do not use irq_state.flags, it will break old QEMUs */
+		r = kvm_s390_get_irq_state(vcpu,
+					   (__u8 __user *)  irq_state.buf,
+					   irq_state.len);
+		break;
+	}
+	case KVM_S390_PV_CPU_COMMAND: {
+		struct kvm_pv_cmd cmd;
+
+		r = -EINVAL;
+		if (!is_prot_virt_host())
+			break;
+
+		r = -EFAULT;
+		if (copy_from_user(&cmd, argp, sizeof(cmd)))
+			break;
+
+		r = -EINVAL;
+		if (cmd.flags)
+			break;
+
+		/* We only handle this cmd right now */
+		if (cmd.cmd != KVM_PV_DUMP)
+			break;
+
+		r = kvm_s390_handle_pv_vcpu_dump(vcpu, &cmd);
+
+		/* Always copy over UV rc / rrc data */
+		if (copy_to_user((__u8 __user *)argp, &cmd.rc,
+				 sizeof(cmd.rc) + sizeof(cmd.rrc)))
+			r = -EFAULT;
+		break;
+	}
+	default:
+		r = -ENOTTY;
+	}
+
+	vcpu_put(vcpu);
+	return r;
+}
+
+vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
+{
+#ifdef CONFIG_KVM_S390_UCONTROL
+	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
+		 && (kvm_is_ucontrol(vcpu->kvm))) {
+		vmf->page = virt_to_page(vcpu->arch.sie_block);
+		get_page(vmf->page);
+		return 0;
+	}
+#endif
+	return VM_FAULT_SIGBUS;
+}
+
+bool kvm_arch_irqchip_in_kernel(struct kvm *kvm)
+{
+	return true;
+}
+
+/* Section: memory related */
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+				   const struct kvm_memory_slot *old,
+				   struct kvm_memory_slot *new,
+				   enum kvm_mr_change change)
+{
+	gpa_t size;
+
+	/* When we are protected, we should not change the memory slots */
+	if (kvm_s390_pv_get_handle(kvm))
+		return -EINVAL;
+
+	if (change != KVM_MR_DELETE && change != KVM_MR_FLAGS_ONLY) {
+		/*
+		 * A few sanity checks. We can have memory slots which have to be
+		 * located/ended at a segment boundary (1MB). The memory in userland is
+		 * ok to be fragmented into various different vmas. It is okay to mmap()
+		 * and munmap() stuff in this slot after doing this call at any time
+		 */
+
+		if (new->userspace_addr & 0xffffful)
+			return -EINVAL;
+
+		size = new->npages * PAGE_SIZE;
+		if (size & 0xffffful)
+			return -EINVAL;
+
+		if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
+			return -EINVAL;
+	}
+
+	if (!kvm->arch.migration_mode)
+		return 0;
+
+	/*
+	 * Turn off migration mode when:
+	 * - userspace creates a new memslot with dirty logging off,
+	 * - userspace modifies an existing memslot (MOVE or FLAGS_ONLY) and
+	 *   dirty logging is turned off.
+	 * Migration mode expects dirty page logging being enabled to store
+	 * its dirty bitmap.
+	 */
+	if (change != KVM_MR_DELETE &&
+	    !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
+		WARN(kvm_s390_vm_stop_migration(kvm),
+		     "Failed to stop migration mode");
+
+	return 0;
+}
+
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+				struct kvm_memory_slot *old,
+				const struct kvm_memory_slot *new,
+				enum kvm_mr_change change)
+{
+	int rc = 0;
+
+	switch (change) {
+	case KVM_MR_DELETE:
+		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
+					old->npages * PAGE_SIZE);
+		break;
+	case KVM_MR_MOVE:
+		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
+					old->npages * PAGE_SIZE);
+		if (rc)
+			break;
+		fallthrough;
+	case KVM_MR_CREATE:
+		rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr,
+				      new->base_gfn * PAGE_SIZE,
+				      new->npages * PAGE_SIZE);
+		break;
+	case KVM_MR_FLAGS_ONLY:
+		break;
+	default:
+		WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
+	}
+	if (rc)
+		pr_warn("failed to commit memory region\n");
+	return;
+}
+
+static inline unsigned long nonhyp_mask(int i)
+{
+	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
+
+	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
+}
+
+static int __init kvm_s390_init(void)
+{
+	int i, r;
+
+	if (!sclp.has_sief2) {
+		pr_info("SIE is not available\n");
+		return -ENODEV;
+	}
+
+	if (nested && hpage) {
+		pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < 16; i++)
+		kvm_s390_fac_base[i] |=
+			stfle_fac_list[i] & nonhyp_mask(i);
+
+	r = __kvm_s390_init();
+	if (r)
+		return r;
+
+	r = kvm_init(sizeof(struct kvm_vcpu), 0, THIS_MODULE);
+	if (r) {
+		__kvm_s390_exit();
+		return r;
+	}
+	return 0;
+}
+
+static void __exit kvm_s390_exit(void)
+{
+	kvm_exit();
+
+	__kvm_s390_exit();
+}
+
+module_init(kvm_s390_init);
+module_exit(kvm_s390_exit);
+
+/*
+ * Enable autoloading of the kvm module.
+ * Note that we add the module alias here instead of virt/kvm/kvm_main.c
+ * since x86 takes a different approach.
+ */
+#include <linux/miscdevice.h>
+MODULE_ALIAS_MISCDEV(KVM_MINOR);
+MODULE_ALIAS("devname:kvm");
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
new file mode 100644
index 0000000000..a7ea80cfa4
--- /dev/null
+++ b/arch/s390/kvm/kvm-s390.h
@@ -0,0 +1,524 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * definition for kvm on s390
+ *
+ * Copyright IBM Corp. 2008, 2020
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ *               Christian Borntraeger <borntraeger@de.ibm.com>
+ *               Christian Ehrhardt <ehrhardt@de.ibm.com>
+ */
+
+#ifndef ARCH_S390_KVM_S390_H
+#define ARCH_S390_KVM_S390_H
+
+#include <linux/hrtimer.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/lockdep.h>
+#include <asm/facility.h>
+#include <asm/processor.h>
+#include <asm/sclp.h>
+
+/* Transactional Memory Execution related macros */
+#define IS_TE_ENABLED(vcpu)	((vcpu->arch.sie_block->ecb & ECB_TE))
+#define TDB_FORMAT1		1
+#define IS_ITDB_VALID(vcpu) \
+	((*(char *)phys_to_virt((vcpu)->arch.sie_block->itdba) == TDB_FORMAT1))
+
+extern debug_info_t *kvm_s390_dbf;
+extern debug_info_t *kvm_s390_dbf_uv;
+
+#define KVM_UV_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
+do { \
+	debug_sprintf_event((d_kvm)->arch.dbf, d_loglevel, d_string "\n", \
+	  d_args); \
+	debug_sprintf_event(kvm_s390_dbf_uv, d_loglevel, \
+			    "%d: " d_string "\n", (d_kvm)->userspace_pid, \
+			    d_args); \
+} while (0)
+
+#define KVM_EVENT(d_loglevel, d_string, d_args...)\
+do { \
+	debug_sprintf_event(kvm_s390_dbf, d_loglevel, d_string "\n", \
+	  d_args); \
+} while (0)
+
+#define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
+do { \
+	debug_sprintf_event(d_kvm->arch.dbf, d_loglevel, d_string "\n", \
+	  d_args); \
+} while (0)
+
+#define VCPU_EVENT(d_vcpu, d_loglevel, d_string, d_args...)\
+do { \
+	debug_sprintf_event(d_vcpu->kvm->arch.dbf, d_loglevel, \
+	  "%02d[%016lx-%016lx]: " d_string "\n", d_vcpu->vcpu_id, \
+	  d_vcpu->arch.sie_block->gpsw.mask, d_vcpu->arch.sie_block->gpsw.addr,\
+	  d_args); \
+} while (0)
+
+static inline void kvm_s390_set_cpuflags(struct kvm_vcpu *vcpu, u32 flags)
+{
+	atomic_or(flags, &vcpu->arch.sie_block->cpuflags);
+}
+
+static inline void kvm_s390_clear_cpuflags(struct kvm_vcpu *vcpu, u32 flags)
+{
+	atomic_andnot(flags, &vcpu->arch.sie_block->cpuflags);
+}
+
+static inline bool kvm_s390_test_cpuflags(struct kvm_vcpu *vcpu, u32 flags)
+{
+	return (atomic_read(&vcpu->arch.sie_block->cpuflags) & flags) == flags;
+}
+
+static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu)
+{
+	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_STOPPED);
+}
+
+static inline int is_vcpu_idle(struct kvm_vcpu *vcpu)
+{
+	return test_bit(vcpu->vcpu_idx, vcpu->kvm->arch.idle_mask);
+}
+
+static inline int kvm_is_ucontrol(struct kvm *kvm)
+{
+#ifdef CONFIG_KVM_S390_UCONTROL
+	if (kvm->arch.gmap)
+		return 0;
+	return 1;
+#else
+	return 0;
+#endif
+}
+
+#define GUEST_PREFIX_SHIFT 13
+static inline u32 kvm_s390_get_prefix(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.sie_block->prefix << GUEST_PREFIX_SHIFT;
+}
+
+static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix)
+{
+	VCPU_EVENT(vcpu, 3, "set prefix of cpu %03u to 0x%x", vcpu->vcpu_id,
+		   prefix);
+	vcpu->arch.sie_block->prefix = prefix >> GUEST_PREFIX_SHIFT;
+	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+	kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
+}
+
+static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu, u8 *ar)
+{
+	u32 base2 = vcpu->arch.sie_block->ipb >> 28;
+	u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+
+	if (ar)
+		*ar = base2;
+
+	return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
+}
+
+static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu,
+					      u64 *address1, u64 *address2,
+					      u8 *ar_b1, u8 *ar_b2)
+{
+	u32 base1 = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28;
+	u32 disp1 = (vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16;
+	u32 base2 = (vcpu->arch.sie_block->ipb & 0xf000) >> 12;
+	u32 disp2 = vcpu->arch.sie_block->ipb & 0x0fff;
+
+	*address1 = (base1 ? vcpu->run->s.regs.gprs[base1] : 0) + disp1;
+	*address2 = (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
+
+	if (ar_b1)
+		*ar_b1 = base1;
+	if (ar_b2)
+		*ar_b2 = base2;
+}
+
+static inline void kvm_s390_get_regs_rre(struct kvm_vcpu *vcpu, int *r1, int *r2)
+{
+	if (r1)
+		*r1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 20;
+	if (r2)
+		*r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16;
+}
+
+static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu, u8 *ar)
+{
+	u32 base2 = vcpu->arch.sie_block->ipb >> 28;
+	u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) +
+			((vcpu->arch.sie_block->ipb & 0xff00) << 4);
+	/* The displacement is a 20bit _SIGNED_ value */
+	if (disp2 & 0x80000)
+		disp2+=0xfff00000;
+
+	if (ar)
+		*ar = base2;
+
+	return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + (long)(int)disp2;
+}
+
+static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu, u8 *ar)
+{
+	u32 base2 = vcpu->arch.sie_block->ipb >> 28;
+	u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+
+	if (ar)
+		*ar = base2;
+
+	return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
+}
+
+/* Set the condition code in the guest program status word */
+static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc)
+{
+	vcpu->arch.sie_block->gpsw.mask &= ~(3UL << 44);
+	vcpu->arch.sie_block->gpsw.mask |= cc << 44;
+}
+
+/* test availability of facility in a kvm instance */
+static inline int test_kvm_facility(struct kvm *kvm, unsigned long nr)
+{
+	return __test_facility(nr, kvm->arch.model.fac_mask) &&
+		__test_facility(nr, kvm->arch.model.fac_list);
+}
+
+static inline int set_kvm_facility(u64 *fac_list, unsigned long nr)
+{
+	unsigned char *ptr;
+
+	if (nr >= MAX_FACILITY_BIT)
+		return -EINVAL;
+	ptr = (unsigned char *) fac_list + (nr >> 3);
+	*ptr |= (0x80UL >> (nr & 7));
+	return 0;
+}
+
+static inline int test_kvm_cpu_feat(struct kvm *kvm, unsigned long nr)
+{
+	WARN_ON_ONCE(nr >= KVM_S390_VM_CPU_FEAT_NR_BITS);
+	return test_bit_inv(nr, kvm->arch.cpu_feat);
+}
+
+/* are cpu states controlled by user space */
+static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm)
+{
+	return kvm->arch.user_cpu_state_ctrl != 0;
+}
+
+static inline void kvm_s390_set_user_cpu_state_ctrl(struct kvm *kvm)
+{
+	if (kvm->arch.user_cpu_state_ctrl)
+		return;
+
+	VM_EVENT(kvm, 3, "%s", "ENABLE: Userspace CPU state control");
+	kvm->arch.user_cpu_state_ctrl = 1;
+}
+
+/* get the end gfn of the last (highest gfn) memslot */
+static inline unsigned long kvm_s390_get_gfn_end(struct kvm_memslots *slots)
+{
+	struct rb_node *node;
+	struct kvm_memory_slot *ms;
+
+	if (WARN_ON(kvm_memslots_empty(slots)))
+		return 0;
+
+	node = rb_last(&slots->gfn_tree);
+	ms = container_of(node, struct kvm_memory_slot, gfn_node[slots->node_idx]);
+	return ms->base_gfn + ms->npages;
+}
+
+static inline u32 kvm_s390_get_gisa_desc(struct kvm *kvm)
+{
+	u32 gd = virt_to_phys(kvm->arch.gisa_int.origin);
+
+	if (gd && sclp.has_gisaf)
+		gd |= GISA_FORMAT1;
+	return gd;
+}
+
+/* implemented in pv.c */
+int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
+int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
+int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc);
+int kvm_s390_pv_deinit_aside_vm(struct kvm *kvm, u16 *rc, u16 *rrc);
+int kvm_s390_pv_deinit_cleanup_all(struct kvm *kvm, u16 *rc, u16 *rrc);
+int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc);
+int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc);
+int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
+			      u16 *rrc);
+int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
+		       unsigned long tweak, u16 *rc, u16 *rrc);
+int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state);
+int kvm_s390_pv_dump_cpu(struct kvm_vcpu *vcpu, void *buff, u16 *rc, u16 *rrc);
+int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user,
+				u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc);
+int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user,
+			      u16 *rc, u16 *rrc);
+
+static inline u64 kvm_s390_pv_get_handle(struct kvm *kvm)
+{
+	return kvm->arch.pv.handle;
+}
+
+static inline u64 kvm_s390_pv_cpu_get_handle(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.pv.handle;
+}
+
+/* implemented in interrupt.c */
+int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
+void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu);
+enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
+int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
+void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu);
+void kvm_s390_clear_float_irqs(struct kvm *kvm);
+int __must_check kvm_s390_inject_vm(struct kvm *kvm,
+				    struct kvm_s390_interrupt *s390int);
+int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
+				      struct kvm_s390_irq *irq);
+static inline int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
+					   struct kvm_s390_pgm_info *pgm_info)
+{
+	struct kvm_s390_irq irq = {
+		.type = KVM_S390_PROGRAM_INT,
+		.u.pgm = *pgm_info,
+	};
+
+	return kvm_s390_inject_vcpu(vcpu, &irq);
+}
+static inline int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
+{
+	struct kvm_s390_irq irq = {
+		.type = KVM_S390_PROGRAM_INT,
+		.u.pgm.code = code,
+	};
+
+	return kvm_s390_inject_vcpu(vcpu, &irq);
+}
+struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
+						    u64 isc_mask, u32 schid);
+int kvm_s390_reinject_io_int(struct kvm *kvm,
+			     struct kvm_s390_interrupt_info *inti);
+int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked);
+
+/* implemented in intercept.c */
+u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu);
+int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
+static inline void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilen)
+{
+	struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
+
+	sie_block->gpsw.addr = __rewind_psw(sie_block->gpsw, ilen);
+}
+static inline void kvm_s390_forward_psw(struct kvm_vcpu *vcpu, int ilen)
+{
+	kvm_s390_rewind_psw(vcpu, -ilen);
+}
+static inline void kvm_s390_retry_instr(struct kvm_vcpu *vcpu)
+{
+	/* don't inject PER events if we re-execute the instruction */
+	vcpu->arch.sie_block->icptstatus &= ~0x02;
+	kvm_s390_rewind_psw(vcpu, kvm_s390_get_ilen(vcpu));
+}
+
+int handle_sthyi(struct kvm_vcpu *vcpu);
+
+/* implemented in priv.c */
+int is_valid_psw(psw_t *psw);
+int kvm_s390_handle_aa(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_e3(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_e5(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_01(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_b9(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_eb(struct kvm_vcpu *vcpu);
+int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu);
+
+/* implemented in vsie.c */
+int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu);
+void kvm_s390_vsie_kick(struct kvm_vcpu *vcpu);
+void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
+				 unsigned long end);
+void kvm_s390_vsie_init(struct kvm *kvm);
+void kvm_s390_vsie_destroy(struct kvm *kvm);
+
+/* implemented in sigp.c */
+int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu);
+
+/* implemented in kvm-s390.c */
+int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
+long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
+int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
+int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr);
+int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu);
+int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu);
+void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu);
+void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu);
+bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu);
+void exit_sie(struct kvm_vcpu *vcpu);
+void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu);
+int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu);
+void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
+void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm);
+__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu);
+int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc);
+
+/* implemented in diag.c */
+int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
+
+static inline void kvm_s390_vcpu_block_all(struct kvm *kvm)
+{
+	unsigned long i;
+	struct kvm_vcpu *vcpu;
+
+	WARN_ON(!mutex_is_locked(&kvm->lock));
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		kvm_s390_vcpu_block(vcpu);
+}
+
+static inline void kvm_s390_vcpu_unblock_all(struct kvm *kvm)
+{
+	unsigned long i;
+	struct kvm_vcpu *vcpu;
+
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		kvm_s390_vcpu_unblock(vcpu);
+}
+
+static inline u64 kvm_s390_get_tod_clock_fast(struct kvm *kvm)
+{
+	u64 rc;
+
+	preempt_disable();
+	rc = get_tod_clock_fast() + kvm->arch.epoch;
+	preempt_enable();
+	return rc;
+}
+
+/**
+ * kvm_s390_inject_prog_cond - conditionally inject a program check
+ * @vcpu: virtual cpu
+ * @rc: original return/error code
+ *
+ * This function is supposed to be used after regular guest access functions
+ * failed, to conditionally inject a program check to a vcpu. The typical
+ * pattern would look like
+ *
+ * rc = write_guest(vcpu, addr, data, len);
+ * if (rc)
+ *	return kvm_s390_inject_prog_cond(vcpu, rc);
+ *
+ * A negative return code from guest access functions implies an internal error
+ * like e.g. out of memory. In these cases no program check should be injected
+ * to the guest.
+ * A positive value implies that an exception happened while accessing a guest's
+ * memory. In this case all data belonging to the corresponding program check
+ * has been stored in vcpu->arch.pgm and can be injected with
+ * kvm_s390_inject_prog_irq().
+ *
+ * Returns: - the original @rc value if @rc was negative (internal error)
+ *	    - zero if @rc was already zero
+ *	    - zero or error code from injecting if @rc was positive
+ *	      (program check injected to @vcpu)
+ */
+static inline int kvm_s390_inject_prog_cond(struct kvm_vcpu *vcpu, int rc)
+{
+	if (rc <= 0)
+		return rc;
+	return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
+}
+
+int s390int_to_s390irq(struct kvm_s390_interrupt *s390int,
+			struct kvm_s390_irq *s390irq);
+
+/* implemented in interrupt.c */
+int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop);
+int psw_extint_disabled(struct kvm_vcpu *vcpu);
+void kvm_s390_destroy_adapters(struct kvm *kvm);
+int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu);
+extern struct kvm_device_ops kvm_flic_ops;
+int kvm_s390_is_stop_irq_pending(struct kvm_vcpu *vcpu);
+int kvm_s390_is_restart_irq_pending(struct kvm_vcpu *vcpu);
+void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu);
+int kvm_s390_set_irq_state(struct kvm_vcpu *vcpu,
+			   void __user *buf, int len);
+int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu,
+			   __u8 __user *buf, int len);
+void kvm_s390_gisa_init(struct kvm *kvm);
+void kvm_s390_gisa_clear(struct kvm *kvm);
+void kvm_s390_gisa_destroy(struct kvm *kvm);
+void kvm_s390_gisa_disable(struct kvm *kvm);
+void kvm_s390_gisa_enable(struct kvm *kvm);
+int __init kvm_s390_gib_init(u8 nisc);
+void kvm_s390_gib_destroy(void);
+
+/* implemented in guestdbg.c */
+void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu);
+void kvm_s390_restore_guest_per_regs(struct kvm_vcpu *vcpu);
+void kvm_s390_patch_guest_per_regs(struct kvm_vcpu *vcpu);
+int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu,
+			    struct kvm_guest_debug *dbg);
+void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu);
+void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_per_ifetch_icpt(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_per_event(struct kvm_vcpu *vcpu);
+
+/* support for Basic/Extended SCA handling */
+static inline union ipte_control *kvm_s390_get_ipte_control(struct kvm *kvm)
+{
+	struct bsca_block *sca = kvm->arch.sca; /* SCA version doesn't matter */
+
+	return &sca->ipte_control;
+}
+static inline int kvm_s390_use_sca_entries(void)
+{
+	/*
+	 * Without SIGP interpretation, only SRS interpretation (if available)
+	 * might use the entries. By not setting the entries and keeping them
+	 * invalid, hardware will not access them but intercept.
+	 */
+	return sclp.has_sigpif;
+}
+void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu,
+				     struct mcck_volatile_info *mcck_info);
+
+/**
+ * kvm_s390_vcpu_crypto_reset_all
+ *
+ * Reset the crypto attributes for each vcpu. This can be done while the vcpus
+ * are running as each vcpu will be removed from SIE before resetting the crypt
+ * attributes and restored to SIE afterward.
+ *
+ * Note: The kvm->lock must be held while calling this function
+ *
+ * @kvm: the KVM guest
+ */
+void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm);
+
+/**
+ * kvm_s390_vcpu_pci_enable_interp
+ *
+ * Set the associated PCI attributes for each vcpu to allow for zPCI Load/Store
+ * interpretation as well as adapter interruption forwarding.
+ *
+ * @kvm: the KVM guest
+ */
+void kvm_s390_vcpu_pci_enable_interp(struct kvm *kvm);
+
+/**
+ * diag9c_forwarding_hz
+ *
+ * Set the maximum number of diag9c forwarding per second
+ */
+extern unsigned int diag9c_forwarding_hz;
+
+#endif
diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c
new file mode 100644
index 0000000000..ffa7739c7a
--- /dev/null
+++ b/arch/s390/kvm/pci.c
@@ -0,0 +1,704 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * s390 kvm PCI passthrough support
+ *
+ * Copyright IBM Corp. 2022
+ *
+ *    Author(s): Matthew Rosato <mjrosato@linux.ibm.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/pci.h>
+#include <asm/pci.h>
+#include <asm/pci_insn.h>
+#include <asm/pci_io.h>
+#include <asm/sclp.h>
+#include "pci.h"
+#include "kvm-s390.h"
+
+struct zpci_aift *aift;
+
+static inline int __set_irq_noiib(u16 ctl, u8 isc)
+{
+	union zpci_sic_iib iib = {{0}};
+
+	return zpci_set_irq_ctrl(ctl, isc, &iib);
+}
+
+void kvm_s390_pci_aen_exit(void)
+{
+	unsigned long flags;
+	struct kvm_zdev **gait_kzdev;
+
+	lockdep_assert_held(&aift->aift_lock);
+
+	/*
+	 * Contents of the aipb remain registered for the life of the host
+	 * kernel, the information preserved in zpci_aipb and zpci_aif_sbv
+	 * in case we insert the KVM module again later.  Clear the AIFT
+	 * information and free anything not registered with underlying
+	 * firmware.
+	 */
+	spin_lock_irqsave(&aift->gait_lock, flags);
+	gait_kzdev = aift->kzdev;
+	aift->gait = NULL;
+	aift->sbv = NULL;
+	aift->kzdev = NULL;
+	spin_unlock_irqrestore(&aift->gait_lock, flags);
+
+	kfree(gait_kzdev);
+}
+
+static int zpci_setup_aipb(u8 nisc)
+{
+	struct page *page;
+	int size, rc;
+
+	zpci_aipb = kzalloc(sizeof(union zpci_sic_iib), GFP_KERNEL);
+	if (!zpci_aipb)
+		return -ENOMEM;
+
+	aift->sbv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC, NULL);
+	if (!aift->sbv) {
+		rc = -ENOMEM;
+		goto free_aipb;
+	}
+	zpci_aif_sbv = aift->sbv;
+	size = get_order(PAGE_ALIGN(ZPCI_NR_DEVICES *
+						sizeof(struct zpci_gaite)));
+	page = alloc_pages(GFP_KERNEL | __GFP_ZERO, size);
+	if (!page) {
+		rc = -ENOMEM;
+		goto free_sbv;
+	}
+	aift->gait = (struct zpci_gaite *)page_to_virt(page);
+
+	zpci_aipb->aipb.faisb = virt_to_phys(aift->sbv->vector);
+	zpci_aipb->aipb.gait = virt_to_phys(aift->gait);
+	zpci_aipb->aipb.afi = nisc;
+	zpci_aipb->aipb.faal = ZPCI_NR_DEVICES;
+
+	/* Setup Adapter Event Notification Interpretation */
+	if (zpci_set_irq_ctrl(SIC_SET_AENI_CONTROLS, 0, zpci_aipb)) {
+		rc = -EIO;
+		goto free_gait;
+	}
+
+	return 0;
+
+free_gait:
+	free_pages((unsigned long)aift->gait, size);
+free_sbv:
+	airq_iv_release(aift->sbv);
+	zpci_aif_sbv = NULL;
+free_aipb:
+	kfree(zpci_aipb);
+	zpci_aipb = NULL;
+
+	return rc;
+}
+
+static int zpci_reset_aipb(u8 nisc)
+{
+	/*
+	 * AEN registration can only happen once per system boot.  If
+	 * an aipb already exists then AEN was already registered and
+	 * we can re-use the aipb contents.  This can only happen if
+	 * the KVM module was removed and re-inserted.  However, we must
+	 * ensure that the same forwarding ISC is used as this is assigned
+	 * during KVM module load.
+	 */
+	if (zpci_aipb->aipb.afi != nisc)
+		return -EINVAL;
+
+	aift->sbv = zpci_aif_sbv;
+	aift->gait = phys_to_virt(zpci_aipb->aipb.gait);
+
+	return 0;
+}
+
+int kvm_s390_pci_aen_init(u8 nisc)
+{
+	int rc = 0;
+
+	/* If already enabled for AEN, bail out now */
+	if (aift->gait || aift->sbv)
+		return -EPERM;
+
+	mutex_lock(&aift->aift_lock);
+	aift->kzdev = kcalloc(ZPCI_NR_DEVICES, sizeof(struct kvm_zdev *),
+			      GFP_KERNEL);
+	if (!aift->kzdev) {
+		rc = -ENOMEM;
+		goto unlock;
+	}
+
+	if (!zpci_aipb)
+		rc = zpci_setup_aipb(nisc);
+	else
+		rc = zpci_reset_aipb(nisc);
+	if (rc)
+		goto free_zdev;
+
+	/* Enable floating IRQs */
+	if (__set_irq_noiib(SIC_IRQ_MODE_SINGLE, nisc)) {
+		rc = -EIO;
+		kvm_s390_pci_aen_exit();
+	}
+
+	goto unlock;
+
+free_zdev:
+	kfree(aift->kzdev);
+unlock:
+	mutex_unlock(&aift->aift_lock);
+	return rc;
+}
+
+/* Modify PCI: Register floating adapter interruption forwarding */
+static int kvm_zpci_set_airq(struct zpci_dev *zdev)
+{
+	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT);
+	struct zpci_fib fib = {};
+	u8 status;
+
+	fib.fmt0.isc = zdev->kzdev->fib.fmt0.isc;
+	fib.fmt0.sum = 1;       /* enable summary notifications */
+	fib.fmt0.noi = airq_iv_end(zdev->aibv);
+	fib.fmt0.aibv = virt_to_phys(zdev->aibv->vector);
+	fib.fmt0.aibvo = 0;
+	fib.fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8);
+	fib.fmt0.aisbo = zdev->aisb & 63;
+	fib.gd = zdev->gisa;
+
+	return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
+}
+
+/* Modify PCI: Unregister floating adapter interruption forwarding */
+static int kvm_zpci_clear_airq(struct zpci_dev *zdev)
+{
+	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_DEREG_INT);
+	struct zpci_fib fib = {};
+	u8 cc, status;
+
+	fib.gd = zdev->gisa;
+
+	cc = zpci_mod_fc(req, &fib, &status);
+	if (cc == 3 || (cc == 1 && status == 24))
+		/* Function already gone or IRQs already deregistered. */
+		cc = 0;
+
+	return cc ? -EIO : 0;
+}
+
+static inline void unaccount_mem(unsigned long nr_pages)
+{
+	struct user_struct *user = get_uid(current_user());
+
+	if (user)
+		atomic_long_sub(nr_pages, &user->locked_vm);
+	if (current->mm)
+		atomic64_sub(nr_pages, &current->mm->pinned_vm);
+}
+
+static inline int account_mem(unsigned long nr_pages)
+{
+	struct user_struct *user = get_uid(current_user());
+	unsigned long page_limit, cur_pages, new_pages;
+
+	page_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+
+	do {
+		cur_pages = atomic_long_read(&user->locked_vm);
+		new_pages = cur_pages + nr_pages;
+		if (new_pages > page_limit)
+			return -ENOMEM;
+	} while (atomic_long_cmpxchg(&user->locked_vm, cur_pages,
+					new_pages) != cur_pages);
+
+	atomic64_add(nr_pages, &current->mm->pinned_vm);
+
+	return 0;
+}
+
+static int kvm_s390_pci_aif_enable(struct zpci_dev *zdev, struct zpci_fib *fib,
+				   bool assist)
+{
+	struct page *pages[1], *aibv_page, *aisb_page = NULL;
+	unsigned int msi_vecs, idx;
+	struct zpci_gaite *gaite;
+	unsigned long hva, bit;
+	struct kvm *kvm;
+	phys_addr_t gaddr;
+	int rc = 0, gisc, npages, pcount = 0;
+
+	/*
+	 * Interrupt forwarding is only applicable if the device is already
+	 * enabled for interpretation
+	 */
+	if (zdev->gisa == 0)
+		return -EINVAL;
+
+	kvm = zdev->kzdev->kvm;
+	msi_vecs = min_t(unsigned int, fib->fmt0.noi, zdev->max_msi);
+
+	/* Get the associated forwarding ISC - if invalid, return the error */
+	gisc = kvm_s390_gisc_register(kvm, fib->fmt0.isc);
+	if (gisc < 0)
+		return gisc;
+
+	/* Replace AIBV address */
+	idx = srcu_read_lock(&kvm->srcu);
+	hva = gfn_to_hva(kvm, gpa_to_gfn((gpa_t)fib->fmt0.aibv));
+	npages = pin_user_pages_fast(hva, 1, FOLL_WRITE | FOLL_LONGTERM, pages);
+	srcu_read_unlock(&kvm->srcu, idx);
+	if (npages < 1) {
+		rc = -EIO;
+		goto out;
+	}
+	aibv_page = pages[0];
+	pcount++;
+	gaddr = page_to_phys(aibv_page) + (fib->fmt0.aibv & ~PAGE_MASK);
+	fib->fmt0.aibv = gaddr;
+
+	/* Pin the guest AISB if one was specified */
+	if (fib->fmt0.sum == 1) {
+		idx = srcu_read_lock(&kvm->srcu);
+		hva = gfn_to_hva(kvm, gpa_to_gfn((gpa_t)fib->fmt0.aisb));
+		npages = pin_user_pages_fast(hva, 1, FOLL_WRITE | FOLL_LONGTERM,
+					     pages);
+		srcu_read_unlock(&kvm->srcu, idx);
+		if (npages < 1) {
+			rc = -EIO;
+			goto unpin1;
+		}
+		aisb_page = pages[0];
+		pcount++;
+	}
+
+	/* Account for pinned pages, roll back on failure */
+	if (account_mem(pcount))
+		goto unpin2;
+
+	/* AISB must be allocated before we can fill in GAITE */
+	mutex_lock(&aift->aift_lock);
+	bit = airq_iv_alloc_bit(aift->sbv);
+	if (bit == -1UL)
+		goto unlock;
+	zdev->aisb = bit; /* store the summary bit number */
+	zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA |
+				    AIRQ_IV_BITLOCK |
+				    AIRQ_IV_GUESTVEC,
+				    phys_to_virt(fib->fmt0.aibv));
+
+	spin_lock_irq(&aift->gait_lock);
+	gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb *
+						   sizeof(struct zpci_gaite));
+
+	/* If assist not requested, host will get all alerts */
+	if (assist)
+		gaite->gisa = (u32)virt_to_phys(&kvm->arch.sie_page2->gisa);
+	else
+		gaite->gisa = 0;
+
+	gaite->gisc = fib->fmt0.isc;
+	gaite->count++;
+	gaite->aisbo = fib->fmt0.aisbo;
+	gaite->aisb = virt_to_phys(page_address(aisb_page) + (fib->fmt0.aisb &
+							      ~PAGE_MASK));
+	aift->kzdev[zdev->aisb] = zdev->kzdev;
+	spin_unlock_irq(&aift->gait_lock);
+
+	/* Update guest FIB for re-issue */
+	fib->fmt0.aisbo = zdev->aisb & 63;
+	fib->fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8);
+	fib->fmt0.isc = gisc;
+
+	/* Save some guest fib values in the host for later use */
+	zdev->kzdev->fib.fmt0.isc = fib->fmt0.isc;
+	zdev->kzdev->fib.fmt0.aibv = fib->fmt0.aibv;
+	mutex_unlock(&aift->aift_lock);
+
+	/* Issue the clp to setup the irq now */
+	rc = kvm_zpci_set_airq(zdev);
+	return rc;
+
+unlock:
+	mutex_unlock(&aift->aift_lock);
+unpin2:
+	if (fib->fmt0.sum == 1)
+		unpin_user_page(aisb_page);
+unpin1:
+	unpin_user_page(aibv_page);
+out:
+	return rc;
+}
+
+static int kvm_s390_pci_aif_disable(struct zpci_dev *zdev, bool force)
+{
+	struct kvm_zdev *kzdev = zdev->kzdev;
+	struct zpci_gaite *gaite;
+	struct page *vpage = NULL, *spage = NULL;
+	int rc, pcount = 0;
+	u8 isc;
+
+	if (zdev->gisa == 0)
+		return -EINVAL;
+
+	mutex_lock(&aift->aift_lock);
+
+	/*
+	 * If the clear fails due to an error, leave now unless we know this
+	 * device is about to go away (force) -- In that case clear the GAITE
+	 * regardless.
+	 */
+	rc = kvm_zpci_clear_airq(zdev);
+	if (rc && !force)
+		goto out;
+
+	if (zdev->kzdev->fib.fmt0.aibv == 0)
+		goto out;
+	spin_lock_irq(&aift->gait_lock);
+	gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb *
+						   sizeof(struct zpci_gaite));
+	isc = gaite->gisc;
+	gaite->count--;
+	if (gaite->count == 0) {
+		/* Release guest AIBV and AISB */
+		vpage = phys_to_page(kzdev->fib.fmt0.aibv);
+		if (gaite->aisb != 0)
+			spage = phys_to_page(gaite->aisb);
+		/* Clear the GAIT entry */
+		gaite->aisb = 0;
+		gaite->gisc = 0;
+		gaite->aisbo = 0;
+		gaite->gisa = 0;
+		aift->kzdev[zdev->aisb] = NULL;
+		/* Clear zdev info */
+		airq_iv_free_bit(aift->sbv, zdev->aisb);
+		airq_iv_release(zdev->aibv);
+		zdev->aisb = 0;
+		zdev->aibv = NULL;
+	}
+	spin_unlock_irq(&aift->gait_lock);
+	kvm_s390_gisc_unregister(kzdev->kvm, isc);
+	kzdev->fib.fmt0.isc = 0;
+	kzdev->fib.fmt0.aibv = 0;
+
+	if (vpage) {
+		unpin_user_page(vpage);
+		pcount++;
+	}
+	if (spage) {
+		unpin_user_page(spage);
+		pcount++;
+	}
+	if (pcount > 0)
+		unaccount_mem(pcount);
+out:
+	mutex_unlock(&aift->aift_lock);
+
+	return rc;
+}
+
+static int kvm_s390_pci_dev_open(struct zpci_dev *zdev)
+{
+	struct kvm_zdev *kzdev;
+
+	kzdev = kzalloc(sizeof(struct kvm_zdev), GFP_KERNEL);
+	if (!kzdev)
+		return -ENOMEM;
+
+	kzdev->zdev = zdev;
+	zdev->kzdev = kzdev;
+
+	return 0;
+}
+
+static void kvm_s390_pci_dev_release(struct zpci_dev *zdev)
+{
+	struct kvm_zdev *kzdev;
+
+	kzdev = zdev->kzdev;
+	WARN_ON(kzdev->zdev != zdev);
+	zdev->kzdev = NULL;
+	kfree(kzdev);
+}
+
+
+/*
+ * Register device with the specified KVM. If interpretation facilities are
+ * available, enable them and let userspace indicate whether or not they will
+ * be used (specify SHM bit to disable).
+ */
+static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm)
+{
+	struct zpci_dev *zdev = opaque;
+	u8 status;
+	int rc;
+
+	if (!zdev)
+		return -EINVAL;
+
+	mutex_lock(&zdev->kzdev_lock);
+
+	if (zdev->kzdev || zdev->gisa != 0 || !kvm) {
+		mutex_unlock(&zdev->kzdev_lock);
+		return -EINVAL;
+	}
+
+	kvm_get_kvm(kvm);
+
+	mutex_lock(&kvm->lock);
+
+	rc = kvm_s390_pci_dev_open(zdev);
+	if (rc)
+		goto err;
+
+	/*
+	 * If interpretation facilities aren't available, add the device to
+	 * the kzdev list but don't enable for interpretation.
+	 */
+	if (!kvm_s390_pci_interp_allowed())
+		goto out;
+
+	/*
+	 * If this is the first request to use an interpreted device, make the
+	 * necessary vcpu changes
+	 */
+	if (!kvm->arch.use_zpci_interp)
+		kvm_s390_vcpu_pci_enable_interp(kvm);
+
+	if (zdev_enabled(zdev)) {
+		rc = zpci_disable_device(zdev);
+		if (rc)
+			goto err;
+	}
+
+	/*
+	 * Store information about the identity of the kvm guest allowed to
+	 * access this device via interpretation to be used by host CLP
+	 */
+	zdev->gisa = (u32)virt_to_phys(&kvm->arch.sie_page2->gisa);
+
+	rc = zpci_enable_device(zdev);
+	if (rc)
+		goto clear_gisa;
+
+	/* Re-register the IOMMU that was already created */
+	rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
+				virt_to_phys(zdev->dma_table), &status);
+	if (rc)
+		goto clear_gisa;
+
+out:
+	zdev->kzdev->kvm = kvm;
+
+	spin_lock(&kvm->arch.kzdev_list_lock);
+	list_add_tail(&zdev->kzdev->entry, &kvm->arch.kzdev_list);
+	spin_unlock(&kvm->arch.kzdev_list_lock);
+
+	mutex_unlock(&kvm->lock);
+	mutex_unlock(&zdev->kzdev_lock);
+	return 0;
+
+clear_gisa:
+	zdev->gisa = 0;
+err:
+	if (zdev->kzdev)
+		kvm_s390_pci_dev_release(zdev);
+	mutex_unlock(&kvm->lock);
+	mutex_unlock(&zdev->kzdev_lock);
+	kvm_put_kvm(kvm);
+	return rc;
+}
+
+static void kvm_s390_pci_unregister_kvm(void *opaque)
+{
+	struct zpci_dev *zdev = opaque;
+	struct kvm *kvm;
+	u8 status;
+
+	if (!zdev)
+		return;
+
+	mutex_lock(&zdev->kzdev_lock);
+
+	if (WARN_ON(!zdev->kzdev)) {
+		mutex_unlock(&zdev->kzdev_lock);
+		return;
+	}
+
+	kvm = zdev->kzdev->kvm;
+	mutex_lock(&kvm->lock);
+
+	/*
+	 * A 0 gisa means interpretation was never enabled, just remove the
+	 * device from the list.
+	 */
+	if (zdev->gisa == 0)
+		goto out;
+
+	/* Forwarding must be turned off before interpretation */
+	if (zdev->kzdev->fib.fmt0.aibv != 0)
+		kvm_s390_pci_aif_disable(zdev, true);
+
+	/* Remove the host CLP guest designation */
+	zdev->gisa = 0;
+
+	if (zdev_enabled(zdev)) {
+		if (zpci_disable_device(zdev))
+			goto out;
+	}
+
+	if (zpci_enable_device(zdev))
+		goto out;
+
+	/* Re-register the IOMMU that was already created */
+	zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
+			   virt_to_phys(zdev->dma_table), &status);
+
+out:
+	spin_lock(&kvm->arch.kzdev_list_lock);
+	list_del(&zdev->kzdev->entry);
+	spin_unlock(&kvm->arch.kzdev_list_lock);
+	kvm_s390_pci_dev_release(zdev);
+
+	mutex_unlock(&kvm->lock);
+	mutex_unlock(&zdev->kzdev_lock);
+
+	kvm_put_kvm(kvm);
+}
+
+void kvm_s390_pci_init_list(struct kvm *kvm)
+{
+	spin_lock_init(&kvm->arch.kzdev_list_lock);
+	INIT_LIST_HEAD(&kvm->arch.kzdev_list);
+}
+
+void kvm_s390_pci_clear_list(struct kvm *kvm)
+{
+	/*
+	 * This list should already be empty, either via vfio device closures
+	 * or kvm fd cleanup.
+	 */
+	spin_lock(&kvm->arch.kzdev_list_lock);
+	WARN_ON_ONCE(!list_empty(&kvm->arch.kzdev_list));
+	spin_unlock(&kvm->arch.kzdev_list_lock);
+}
+
+static struct zpci_dev *get_zdev_from_kvm_by_fh(struct kvm *kvm, u32 fh)
+{
+	struct zpci_dev *zdev = NULL;
+	struct kvm_zdev *kzdev;
+
+	spin_lock(&kvm->arch.kzdev_list_lock);
+	list_for_each_entry(kzdev, &kvm->arch.kzdev_list, entry) {
+		if (kzdev->zdev->fh == fh) {
+			zdev = kzdev->zdev;
+			break;
+		}
+	}
+	spin_unlock(&kvm->arch.kzdev_list_lock);
+
+	return zdev;
+}
+
+static int kvm_s390_pci_zpci_reg_aen(struct zpci_dev *zdev,
+				     struct kvm_s390_zpci_op *args)
+{
+	struct zpci_fib fib = {};
+	bool hostflag;
+
+	fib.fmt0.aibv = args->u.reg_aen.ibv;
+	fib.fmt0.isc = args->u.reg_aen.isc;
+	fib.fmt0.noi = args->u.reg_aen.noi;
+	if (args->u.reg_aen.sb != 0) {
+		fib.fmt0.aisb = args->u.reg_aen.sb;
+		fib.fmt0.aisbo = args->u.reg_aen.sbo;
+		fib.fmt0.sum = 1;
+	} else {
+		fib.fmt0.aisb = 0;
+		fib.fmt0.aisbo = 0;
+		fib.fmt0.sum = 0;
+	}
+
+	hostflag = !(args->u.reg_aen.flags & KVM_S390_ZPCIOP_REGAEN_HOST);
+	return kvm_s390_pci_aif_enable(zdev, &fib, hostflag);
+}
+
+int kvm_s390_pci_zpci_op(struct kvm *kvm, struct kvm_s390_zpci_op *args)
+{
+	struct kvm_zdev *kzdev;
+	struct zpci_dev *zdev;
+	int r;
+
+	zdev = get_zdev_from_kvm_by_fh(kvm, args->fh);
+	if (!zdev)
+		return -ENODEV;
+
+	mutex_lock(&zdev->kzdev_lock);
+	mutex_lock(&kvm->lock);
+
+	kzdev = zdev->kzdev;
+	if (!kzdev) {
+		r = -ENODEV;
+		goto out;
+	}
+	if (kzdev->kvm != kvm) {
+		r = -EPERM;
+		goto out;
+	}
+
+	switch (args->op) {
+	case KVM_S390_ZPCIOP_REG_AEN:
+		/* Fail on unknown flags */
+		if (args->u.reg_aen.flags & ~KVM_S390_ZPCIOP_REGAEN_HOST) {
+			r = -EINVAL;
+			break;
+		}
+		r = kvm_s390_pci_zpci_reg_aen(zdev, args);
+		break;
+	case KVM_S390_ZPCIOP_DEREG_AEN:
+		r = kvm_s390_pci_aif_disable(zdev, false);
+		break;
+	default:
+		r = -EINVAL;
+	}
+
+out:
+	mutex_unlock(&kvm->lock);
+	mutex_unlock(&zdev->kzdev_lock);
+	return r;
+}
+
+int __init kvm_s390_pci_init(void)
+{
+	zpci_kvm_hook.kvm_register = kvm_s390_pci_register_kvm;
+	zpci_kvm_hook.kvm_unregister = kvm_s390_pci_unregister_kvm;
+
+	if (!kvm_s390_pci_interp_allowed())
+		return 0;
+
+	aift = kzalloc(sizeof(struct zpci_aift), GFP_KERNEL);
+	if (!aift)
+		return -ENOMEM;
+
+	spin_lock_init(&aift->gait_lock);
+	mutex_init(&aift->aift_lock);
+
+	return 0;
+}
+
+void kvm_s390_pci_exit(void)
+{
+	zpci_kvm_hook.kvm_register = NULL;
+	zpci_kvm_hook.kvm_unregister = NULL;
+
+	if (!kvm_s390_pci_interp_allowed())
+		return;
+
+	mutex_destroy(&aift->aift_lock);
+
+	kfree(aift);
+}
diff --git a/arch/s390/kvm/pci.h b/arch/s390/kvm/pci.h
new file mode 100644
index 0000000000..ff0972dd5e
--- /dev/null
+++ b/arch/s390/kvm/pci.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * s390 kvm PCI passthrough support
+ *
+ * Copyright IBM Corp. 2022
+ *
+ *    Author(s): Matthew Rosato <mjrosato@linux.ibm.com>
+ */
+
+#ifndef __KVM_S390_PCI_H
+#define __KVM_S390_PCI_H
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <asm/airq.h>
+#include <asm/cpu.h>
+
+struct kvm_zdev {
+	struct zpci_dev *zdev;
+	struct kvm *kvm;
+	struct zpci_fib fib;
+	struct list_head entry;
+};
+
+struct zpci_gaite {
+	u32 gisa;
+	u8 gisc;
+	u8 count;
+	u8 reserved;
+	u8 aisbo;
+	u64 aisb;
+};
+
+struct zpci_aift {
+	struct zpci_gaite *gait;
+	struct airq_iv *sbv;
+	struct kvm_zdev **kzdev;
+	spinlock_t gait_lock; /* Protects the gait, used during AEN forward */
+	struct mutex aift_lock; /* Protects the other structures in aift */
+};
+
+extern struct zpci_aift *aift;
+
+static inline struct kvm *kvm_s390_pci_si_to_kvm(struct zpci_aift *aift,
+						 unsigned long si)
+{
+	if (!IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM) || !aift->kzdev ||
+	    !aift->kzdev[si])
+		return NULL;
+	return aift->kzdev[si]->kvm;
+};
+
+int kvm_s390_pci_aen_init(u8 nisc);
+void kvm_s390_pci_aen_exit(void);
+
+void kvm_s390_pci_init_list(struct kvm *kvm);
+void kvm_s390_pci_clear_list(struct kvm *kvm);
+
+int kvm_s390_pci_zpci_op(struct kvm *kvm, struct kvm_s390_zpci_op *args);
+
+int __init kvm_s390_pci_init(void);
+void kvm_s390_pci_exit(void);
+
+static inline bool kvm_s390_pci_interp_allowed(void)
+{
+	struct cpuid cpu_id;
+
+	get_cpu_id(&cpu_id);
+	switch (cpu_id.machine) {
+	case 0x2817:
+	case 0x2818:
+	case 0x2827:
+	case 0x2828:
+	case 0x2964:
+	case 0x2965:
+		/* No SHM on certain machines */
+		return false;
+	default:
+		return (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM) &&
+			sclp.has_zpci_lsi && sclp.has_aeni && sclp.has_aisi &&
+			sclp.has_aisii);
+	}
+}
+
+#endif /* __KVM_S390_PCI_H */
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
new file mode 100644
index 0000000000..dc4cfa8795
--- /dev/null
+++ b/arch/s390/kvm/priv.c
@@ -0,0 +1,1573 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * handling privileged instructions
+ *
+ * Copyright IBM Corp. 2008, 2020
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ *               Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#include <linux/kvm.h>
+#include <linux/gfp.h>
+#include <linux/errno.h>
+#include <linux/mm_types.h>
+#include <linux/pgtable.h>
+#include <linux/io.h>
+#include <asm/asm-offsets.h>
+#include <asm/facility.h>
+#include <asm/current.h>
+#include <asm/debug.h>
+#include <asm/ebcdic.h>
+#include <asm/sysinfo.h>
+#include <asm/page-states.h>
+#include <asm/gmap.h>
+#include <asm/ptrace.h>
+#include <asm/sclp.h>
+#include <asm/ap.h>
+#include "gaccess.h"
+#include "kvm-s390.h"
+#include "trace.h"
+
+static int handle_ri(struct kvm_vcpu *vcpu)
+{
+	vcpu->stat.instruction_ri++;
+
+	if (test_kvm_facility(vcpu->kvm, 64)) {
+		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (lazy)");
+		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
+		kvm_s390_retry_instr(vcpu);
+		return 0;
+	} else
+		return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
+}
+
+int kvm_s390_handle_aa(struct kvm_vcpu *vcpu)
+{
+	if ((vcpu->arch.sie_block->ipa & 0xf) <= 4)
+		return handle_ri(vcpu);
+	else
+		return -EOPNOTSUPP;
+}
+
+static int handle_gs(struct kvm_vcpu *vcpu)
+{
+	vcpu->stat.instruction_gs++;
+
+	if (test_kvm_facility(vcpu->kvm, 133)) {
+		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (lazy)");
+		preempt_disable();
+		__ctl_set_bit(2, 4);
+		current->thread.gs_cb = (struct gs_cb *)&vcpu->run->s.regs.gscb;
+		restore_gs_cb(current->thread.gs_cb);
+		preempt_enable();
+		vcpu->arch.sie_block->ecb |= ECB_GS;
+		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
+		vcpu->arch.gs_enabled = 1;
+		kvm_s390_retry_instr(vcpu);
+		return 0;
+	} else
+		return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
+}
+
+int kvm_s390_handle_e3(struct kvm_vcpu *vcpu)
+{
+	int code = vcpu->arch.sie_block->ipb & 0xff;
+
+	if (code == 0x49 || code == 0x4d)
+		return handle_gs(vcpu);
+	else
+		return -EOPNOTSUPP;
+}
+/* Handle SCK (SET CLOCK) interception */
+static int handle_set_clock(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_vm_tod_clock gtod = { 0 };
+	int rc;
+	u8 ar;
+	u64 op2;
+
+	vcpu->stat.instruction_sck++;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	op2 = kvm_s390_get_base_disp_s(vcpu, &ar);
+	if (op2 & 7)	/* Operand must be on a doubleword boundary */
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+	rc = read_guest(vcpu, op2, ar, &gtod.tod, sizeof(gtod.tod));
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
+
+	VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", gtod.tod);
+	/*
+	 * To set the TOD clock the kvm lock must be taken, but the vcpu lock
+	 * is already held in handle_set_clock. The usual lock order is the
+	 * opposite.  As SCK is deprecated and should not be used in several
+	 * cases, for example when the multiple epoch facility or TOD clock
+	 * steering facility is installed (see Principles of Operation),  a
+	 * slow path can be used.  If the lock can not be taken via try_lock,
+	 * the instruction will be retried via -EAGAIN at a later point in
+	 * time.
+	 */
+	if (!kvm_s390_try_set_tod_clock(vcpu->kvm, &gtod)) {
+		kvm_s390_retry_instr(vcpu);
+		return -EAGAIN;
+	}
+
+	kvm_s390_set_psw_cc(vcpu, 0);
+	return 0;
+}
+
+static int handle_set_prefix(struct kvm_vcpu *vcpu)
+{
+	u64 operand2;
+	u32 address;
+	int rc;
+	u8 ar;
+
+	vcpu->stat.instruction_spx++;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	operand2 = kvm_s390_get_base_disp_s(vcpu, &ar);
+
+	/* must be word boundary */
+	if (operand2 & 3)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	/* get the value */
+	rc = read_guest(vcpu, operand2, ar, &address, sizeof(address));
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
+
+	address &= 0x7fffe000u;
+
+	/*
+	 * Make sure the new value is valid memory. We only need to check the
+	 * first page, since address is 8k aligned and memory pieces are always
+	 * at least 1MB aligned and have at least a size of 1MB.
+	 */
+	if (kvm_is_error_gpa(vcpu->kvm, address))
+		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+	kvm_s390_set_prefix(vcpu, address);
+	trace_kvm_s390_handle_prefix(vcpu, 1, address);
+	return 0;
+}
+
+static int handle_store_prefix(struct kvm_vcpu *vcpu)
+{
+	u64 operand2;
+	u32 address;
+	int rc;
+	u8 ar;
+
+	vcpu->stat.instruction_stpx++;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	operand2 = kvm_s390_get_base_disp_s(vcpu, &ar);
+
+	/* must be word boundary */
+	if (operand2 & 3)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	address = kvm_s390_get_prefix(vcpu);
+
+	/* get the value */
+	rc = write_guest(vcpu, operand2, ar, &address, sizeof(address));
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
+
+	VCPU_EVENT(vcpu, 3, "STPX: storing prefix 0x%x into 0x%llx", address, operand2);
+	trace_kvm_s390_handle_prefix(vcpu, 0, address);
+	return 0;
+}
+
+static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
+{
+	u16 vcpu_id = vcpu->vcpu_id;
+	u64 ga;
+	int rc;
+	u8 ar;
+
+	vcpu->stat.instruction_stap++;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	ga = kvm_s390_get_base_disp_s(vcpu, &ar);
+
+	if (ga & 1)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	rc = write_guest(vcpu, ga, ar, &vcpu_id, sizeof(vcpu_id));
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
+
+	VCPU_EVENT(vcpu, 3, "STAP: storing cpu address (%u) to 0x%llx", vcpu_id, ga);
+	trace_kvm_s390_handle_stap(vcpu, ga);
+	return 0;
+}
+
+int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu)
+{
+	int rc;
+
+	trace_kvm_s390_skey_related_inst(vcpu);
+	/* Already enabled? */
+	if (vcpu->arch.skey_enabled)
+		return 0;
+
+	rc = s390_enable_skey();
+	VCPU_EVENT(vcpu, 3, "enabling storage keys for guest: %d", rc);
+	if (rc)
+		return rc;
+
+	if (kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS))
+		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_KSS);
+	if (!vcpu->kvm->arch.use_skf)
+		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
+	else
+		vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
+	vcpu->arch.skey_enabled = true;
+	return 0;
+}
+
+static int try_handle_skey(struct kvm_vcpu *vcpu)
+{
+	int rc;
+
+	rc = kvm_s390_skey_check_enable(vcpu);
+	if (rc)
+		return rc;
+	if (vcpu->kvm->arch.use_skf) {
+		/* with storage-key facility, SIE interprets it for us */
+		kvm_s390_retry_instr(vcpu);
+		VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
+		return -EAGAIN;
+	}
+	return 0;
+}
+
+static int handle_iske(struct kvm_vcpu *vcpu)
+{
+	unsigned long gaddr, vmaddr;
+	unsigned char key;
+	int reg1, reg2;
+	bool unlocked;
+	int rc;
+
+	vcpu->stat.instruction_iske++;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	rc = try_handle_skey(vcpu);
+	if (rc)
+		return rc != -EAGAIN ? rc : 0;
+
+	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+
+	gaddr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
+	gaddr = kvm_s390_logical_to_effective(vcpu, gaddr);
+	gaddr = kvm_s390_real_to_abs(vcpu, gaddr);
+	vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gaddr));
+	if (kvm_is_error_hva(vmaddr))
+		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+retry:
+	unlocked = false;
+	mmap_read_lock(current->mm);
+	rc = get_guest_storage_key(current->mm, vmaddr, &key);
+
+	if (rc) {
+		rc = fixup_user_fault(current->mm, vmaddr,
+				      FAULT_FLAG_WRITE, &unlocked);
+		if (!rc) {
+			mmap_read_unlock(current->mm);
+			goto retry;
+		}
+	}
+	mmap_read_unlock(current->mm);
+	if (rc == -EFAULT)
+		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+	if (rc < 0)
+		return rc;
+	vcpu->run->s.regs.gprs[reg1] &= ~0xff;
+	vcpu->run->s.regs.gprs[reg1] |= key;
+	return 0;
+}
+
+static int handle_rrbe(struct kvm_vcpu *vcpu)
+{
+	unsigned long vmaddr, gaddr;
+	int reg1, reg2;
+	bool unlocked;
+	int rc;
+
+	vcpu->stat.instruction_rrbe++;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	rc = try_handle_skey(vcpu);
+	if (rc)
+		return rc != -EAGAIN ? rc : 0;
+
+	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+
+	gaddr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
+	gaddr = kvm_s390_logical_to_effective(vcpu, gaddr);
+	gaddr = kvm_s390_real_to_abs(vcpu, gaddr);
+	vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gaddr));
+	if (kvm_is_error_hva(vmaddr))
+		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+retry:
+	unlocked = false;
+	mmap_read_lock(current->mm);
+	rc = reset_guest_reference_bit(current->mm, vmaddr);
+	if (rc < 0) {
+		rc = fixup_user_fault(current->mm, vmaddr,
+				      FAULT_FLAG_WRITE, &unlocked);
+		if (!rc) {
+			mmap_read_unlock(current->mm);
+			goto retry;
+		}
+	}
+	mmap_read_unlock(current->mm);
+	if (rc == -EFAULT)
+		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+	if (rc < 0)
+		return rc;
+	kvm_s390_set_psw_cc(vcpu, rc);
+	return 0;
+}
+
+#define SSKE_NQ 0x8
+#define SSKE_MR 0x4
+#define SSKE_MC 0x2
+#define SSKE_MB 0x1
+static int handle_sske(struct kvm_vcpu *vcpu)
+{
+	unsigned char m3 = vcpu->arch.sie_block->ipb >> 28;
+	unsigned long start, end;
+	unsigned char key, oldkey;
+	int reg1, reg2;
+	bool unlocked;
+	int rc;
+
+	vcpu->stat.instruction_sske++;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	rc = try_handle_skey(vcpu);
+	if (rc)
+		return rc != -EAGAIN ? rc : 0;
+
+	if (!test_kvm_facility(vcpu->kvm, 8))
+		m3 &= ~SSKE_MB;
+	if (!test_kvm_facility(vcpu->kvm, 10))
+		m3 &= ~(SSKE_MC | SSKE_MR);
+	if (!test_kvm_facility(vcpu->kvm, 14))
+		m3 &= ~SSKE_NQ;
+
+	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+
+	key = vcpu->run->s.regs.gprs[reg1] & 0xfe;
+	start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
+	start = kvm_s390_logical_to_effective(vcpu, start);
+	if (m3 & SSKE_MB) {
+		/* start already designates an absolute address */
+		end = (start + _SEGMENT_SIZE) & ~(_SEGMENT_SIZE - 1);
+	} else {
+		start = kvm_s390_real_to_abs(vcpu, start);
+		end = start + PAGE_SIZE;
+	}
+
+	while (start != end) {
+		unsigned long vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start));
+		unlocked = false;
+
+		if (kvm_is_error_hva(vmaddr))
+			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+		mmap_read_lock(current->mm);
+		rc = cond_set_guest_storage_key(current->mm, vmaddr, key, &oldkey,
+						m3 & SSKE_NQ, m3 & SSKE_MR,
+						m3 & SSKE_MC);
+
+		if (rc < 0) {
+			rc = fixup_user_fault(current->mm, vmaddr,
+					      FAULT_FLAG_WRITE, &unlocked);
+			rc = !rc ? -EAGAIN : rc;
+		}
+		mmap_read_unlock(current->mm);
+		if (rc == -EFAULT)
+			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		if (rc == -EAGAIN)
+			continue;
+		if (rc < 0)
+			return rc;
+		start += PAGE_SIZE;
+	}
+
+	if (m3 & (SSKE_MC | SSKE_MR)) {
+		if (m3 & SSKE_MB) {
+			/* skey in reg1 is unpredictable */
+			kvm_s390_set_psw_cc(vcpu, 3);
+		} else {
+			kvm_s390_set_psw_cc(vcpu, rc);
+			vcpu->run->s.regs.gprs[reg1] &= ~0xff00UL;
+			vcpu->run->s.regs.gprs[reg1] |= (u64) oldkey << 8;
+		}
+	}
+	if (m3 & SSKE_MB) {
+		if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT)
+			vcpu->run->s.regs.gprs[reg2] &= ~PAGE_MASK;
+		else
+			vcpu->run->s.regs.gprs[reg2] &= ~0xfffff000UL;
+		end = kvm_s390_logical_to_effective(vcpu, end);
+		vcpu->run->s.regs.gprs[reg2] |= end;
+	}
+	return 0;
+}
+
+static int handle_ipte_interlock(struct kvm_vcpu *vcpu)
+{
+	vcpu->stat.instruction_ipte_interlock++;
+	if (psw_bits(vcpu->arch.sie_block->gpsw).pstate)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+	wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu->kvm));
+	kvm_s390_retry_instr(vcpu);
+	VCPU_EVENT(vcpu, 4, "%s", "retrying ipte interlock operation");
+	return 0;
+}
+
+static int handle_test_block(struct kvm_vcpu *vcpu)
+{
+	gpa_t addr;
+	int reg2;
+
+	vcpu->stat.instruction_tb++;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	kvm_s390_get_regs_rre(vcpu, NULL, &reg2);
+	addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
+	addr = kvm_s390_logical_to_effective(vcpu, addr);
+	if (kvm_s390_check_low_addr_prot_real(vcpu, addr))
+		return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
+	addr = kvm_s390_real_to_abs(vcpu, addr);
+
+	if (kvm_is_error_gpa(vcpu->kvm, addr))
+		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+	/*
+	 * We don't expect errors on modern systems, and do not care
+	 * about storage keys (yet), so let's just clear the page.
+	 */
+	if (kvm_clear_guest(vcpu->kvm, addr, PAGE_SIZE))
+		return -EFAULT;
+	kvm_s390_set_psw_cc(vcpu, 0);
+	vcpu->run->s.regs.gprs[0] = 0;
+	return 0;
+}
+
+static int handle_tpi(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_interrupt_info *inti;
+	unsigned long len;
+	u32 tpi_data[3];
+	int rc;
+	u64 addr;
+	u8 ar;
+
+	vcpu->stat.instruction_tpi++;
+
+	addr = kvm_s390_get_base_disp_s(vcpu, &ar);
+	if (addr & 3)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->arch.sie_block->gcr[6], 0);
+	if (!inti) {
+		kvm_s390_set_psw_cc(vcpu, 0);
+		return 0;
+	}
+
+	tpi_data[0] = inti->io.subchannel_id << 16 | inti->io.subchannel_nr;
+	tpi_data[1] = inti->io.io_int_parm;
+	tpi_data[2] = inti->io.io_int_word;
+	if (addr) {
+		/*
+		 * Store the two-word I/O interruption code into the
+		 * provided area.
+		 */
+		len = sizeof(tpi_data) - 4;
+		rc = write_guest(vcpu, addr, ar, &tpi_data, len);
+		if (rc) {
+			rc = kvm_s390_inject_prog_cond(vcpu, rc);
+			goto reinject_interrupt;
+		}
+	} else {
+		/*
+		 * Store the three-word I/O interruption code into
+		 * the appropriate lowcore area.
+		 */
+		len = sizeof(tpi_data);
+		if (write_guest_lc(vcpu, __LC_SUBCHANNEL_ID, &tpi_data, len)) {
+			/* failed writes to the low core are not recoverable */
+			rc = -EFAULT;
+			goto reinject_interrupt;
+		}
+	}
+
+	/* irq was successfully handed to the guest */
+	kfree(inti);
+	kvm_s390_set_psw_cc(vcpu, 1);
+	return 0;
+reinject_interrupt:
+	/*
+	 * If we encounter a problem storing the interruption code, the
+	 * instruction is suppressed from the guest's view: reinject the
+	 * interrupt.
+	 */
+	if (kvm_s390_reinject_io_int(vcpu->kvm, inti)) {
+		kfree(inti);
+		rc = -EFAULT;
+	}
+	/* don't set the cc, a pgm irq was injected or we drop to user space */
+	return rc ? -EFAULT : 0;
+}
+
+static int handle_tsch(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_interrupt_info *inti = NULL;
+	const u64 isc_mask = 0xffUL << 24; /* all iscs set */
+
+	vcpu->stat.instruction_tsch++;
+
+	/* a valid schid has at least one bit set */
+	if (vcpu->run->s.regs.gprs[1])
+		inti = kvm_s390_get_io_int(vcpu->kvm, isc_mask,
+					   vcpu->run->s.regs.gprs[1]);
+
+	/*
+	 * Prepare exit to userspace.
+	 * We indicate whether we dequeued a pending I/O interrupt
+	 * so that userspace can re-inject it if the instruction gets
+	 * a program check. While this may re-order the pending I/O
+	 * interrupts, this is no problem since the priority is kept
+	 * intact.
+	 */
+	vcpu->run->exit_reason = KVM_EXIT_S390_TSCH;
+	vcpu->run->s390_tsch.dequeued = !!inti;
+	if (inti) {
+		vcpu->run->s390_tsch.subchannel_id = inti->io.subchannel_id;
+		vcpu->run->s390_tsch.subchannel_nr = inti->io.subchannel_nr;
+		vcpu->run->s390_tsch.io_int_parm = inti->io.io_int_parm;
+		vcpu->run->s390_tsch.io_int_word = inti->io.io_int_word;
+	}
+	vcpu->run->s390_tsch.ipb = vcpu->arch.sie_block->ipb;
+	kfree(inti);
+	return -EREMOTE;
+}
+
+static int handle_io_inst(struct kvm_vcpu *vcpu)
+{
+	VCPU_EVENT(vcpu, 4, "%s", "I/O instruction");
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	if (vcpu->kvm->arch.css_support) {
+		/*
+		 * Most I/O instructions will be handled by userspace.
+		 * Exceptions are tpi and the interrupt portion of tsch.
+		 */
+		if (vcpu->arch.sie_block->ipa == 0xb236)
+			return handle_tpi(vcpu);
+		if (vcpu->arch.sie_block->ipa == 0xb235)
+			return handle_tsch(vcpu);
+		/* Handle in userspace. */
+		vcpu->stat.instruction_io_other++;
+		return -EOPNOTSUPP;
+	} else {
+		/*
+		 * Set condition code 3 to stop the guest from issuing channel
+		 * I/O instructions.
+		 */
+		kvm_s390_set_psw_cc(vcpu, 3);
+		return 0;
+	}
+}
+
+/*
+ * handle_pqap: Handling pqap interception
+ * @vcpu: the vcpu having issue the pqap instruction
+ *
+ * We now support PQAP/AQIC instructions and we need to correctly
+ * answer the guest even if no dedicated driver's hook is available.
+ *
+ * The intercepting code calls a dedicated callback for this instruction
+ * if a driver did register one in the CRYPTO satellite of the
+ * SIE block.
+ *
+ * If no callback is available, the queues are not available, return this
+ * response code to the caller and set CC to 3.
+ * Else return the response code returned by the callback.
+ */
+static int handle_pqap(struct kvm_vcpu *vcpu)
+{
+	struct ap_queue_status status = {};
+	crypto_hook pqap_hook;
+	unsigned long reg0;
+	int ret;
+	uint8_t fc;
+
+	/* Verify that the AP instruction are available */
+	if (!ap_instructions_available())
+		return -EOPNOTSUPP;
+	/* Verify that the guest is allowed to use AP instructions */
+	if (!(vcpu->arch.sie_block->eca & ECA_APIE))
+		return -EOPNOTSUPP;
+	/*
+	 * The only possibly intercepted functions when AP instructions are
+	 * available for the guest are AQIC and TAPQ with the t bit set
+	 * since we do not set IC.3 (FIII) we currently will only intercept
+	 * the AQIC function code.
+	 * Note: running nested under z/VM can result in intercepts for other
+	 * function codes, e.g. PQAP(QCI). We do not support this and bail out.
+	 */
+	reg0 = vcpu->run->s.regs.gprs[0];
+	fc = (reg0 >> 24) & 0xff;
+	if (fc != 0x03)
+		return -EOPNOTSUPP;
+
+	/* PQAP instruction is allowed for guest kernel only */
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	/* Common PQAP instruction specification exceptions */
+	/* bits 41-47 must all be zeros */
+	if (reg0 & 0x007f0000UL)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+	/* APFT not install and T bit set */
+	if (!test_kvm_facility(vcpu->kvm, 15) && (reg0 & 0x00800000UL))
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+	/* APXA not installed and APID greater 64 or APQI greater 16 */
+	if (!(vcpu->kvm->arch.crypto.crycbd & 0x02) && (reg0 & 0x0000c0f0UL))
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	/* AQIC function code specific exception */
+	/* facility 65 not present for AQIC function code */
+	if (!test_kvm_facility(vcpu->kvm, 65))
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	/*
+	 * If the hook callback is registered, there will be a pointer to the
+	 * hook function pointer in the kvm_s390_crypto structure. Lock the
+	 * owner, retrieve the hook function pointer and call the hook.
+	 */
+	down_read(&vcpu->kvm->arch.crypto.pqap_hook_rwsem);
+	if (vcpu->kvm->arch.crypto.pqap_hook) {
+		pqap_hook = *vcpu->kvm->arch.crypto.pqap_hook;
+		ret = pqap_hook(vcpu);
+		if (!ret && vcpu->run->s.regs.gprs[1] & 0x00ff0000)
+			kvm_s390_set_psw_cc(vcpu, 3);
+		up_read(&vcpu->kvm->arch.crypto.pqap_hook_rwsem);
+		return ret;
+	}
+	up_read(&vcpu->kvm->arch.crypto.pqap_hook_rwsem);
+	/*
+	 * A vfio_driver must register a hook.
+	 * No hook means no driver to enable the SIE CRYCB and no queues.
+	 * We send this response to the guest.
+	 */
+	status.response_code = 0x01;
+	memcpy(&vcpu->run->s.regs.gprs[1], &status, sizeof(status));
+	kvm_s390_set_psw_cc(vcpu, 3);
+	return 0;
+}
+
+static int handle_stfl(struct kvm_vcpu *vcpu)
+{
+	int rc;
+	unsigned int fac;
+
+	vcpu->stat.instruction_stfl++;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	/*
+	 * We need to shift the lower 32 facility bits (bit 0-31) from a u64
+	 * into a u32 memory representation. They will remain bits 0-31.
+	 */
+	fac = *vcpu->kvm->arch.model.fac_list >> 32;
+	rc = write_guest_lc(vcpu, offsetof(struct lowcore, stfl_fac_list),
+			    &fac, sizeof(fac));
+	if (rc)
+		return rc;
+	VCPU_EVENT(vcpu, 3, "STFL: store facility list 0x%x", fac);
+	trace_kvm_s390_handle_stfl(vcpu, fac);
+	return 0;
+}
+
+#define PSW_MASK_ADDR_MODE (PSW_MASK_EA | PSW_MASK_BA)
+#define PSW_MASK_UNASSIGNED 0xb80800fe7fffffffUL
+#define PSW_ADDR_24 0x0000000000ffffffUL
+#define PSW_ADDR_31 0x000000007fffffffUL
+
+int is_valid_psw(psw_t *psw)
+{
+	if (psw->mask & PSW_MASK_UNASSIGNED)
+		return 0;
+	if ((psw->mask & PSW_MASK_ADDR_MODE) == PSW_MASK_BA) {
+		if (psw->addr & ~PSW_ADDR_31)
+			return 0;
+	}
+	if (!(psw->mask & PSW_MASK_ADDR_MODE) && (psw->addr & ~PSW_ADDR_24))
+		return 0;
+	if ((psw->mask & PSW_MASK_ADDR_MODE) ==  PSW_MASK_EA)
+		return 0;
+	if (psw->addr & 1)
+		return 0;
+	return 1;
+}
+
+int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
+{
+	psw_t *gpsw = &vcpu->arch.sie_block->gpsw;
+	psw_compat_t new_psw;
+	u64 addr;
+	int rc;
+	u8 ar;
+
+	vcpu->stat.instruction_lpsw++;
+
+	if (gpsw->mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	addr = kvm_s390_get_base_disp_s(vcpu, &ar);
+	if (addr & 7)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	rc = read_guest(vcpu, addr, ar, &new_psw, sizeof(new_psw));
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
+	if (!(new_psw.mask & PSW32_MASK_BASE))
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+	gpsw->mask = (new_psw.mask & ~PSW32_MASK_BASE) << 32;
+	gpsw->mask |= new_psw.addr & PSW32_ADDR_AMODE;
+	gpsw->addr = new_psw.addr & ~PSW32_ADDR_AMODE;
+	if (!is_valid_psw(gpsw))
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+	return 0;
+}
+
+static int handle_lpswe(struct kvm_vcpu *vcpu)
+{
+	psw_t new_psw;
+	u64 addr;
+	int rc;
+	u8 ar;
+
+	vcpu->stat.instruction_lpswe++;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	addr = kvm_s390_get_base_disp_s(vcpu, &ar);
+	if (addr & 7)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+	rc = read_guest(vcpu, addr, ar, &new_psw, sizeof(new_psw));
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
+	vcpu->arch.sie_block->gpsw = new_psw;
+	if (!is_valid_psw(&vcpu->arch.sie_block->gpsw))
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+	return 0;
+}
+
+static int handle_stidp(struct kvm_vcpu *vcpu)
+{
+	u64 stidp_data = vcpu->kvm->arch.model.cpuid;
+	u64 operand2;
+	int rc;
+	u8 ar;
+
+	vcpu->stat.instruction_stidp++;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	operand2 = kvm_s390_get_base_disp_s(vcpu, &ar);
+
+	if (operand2 & 7)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	rc = write_guest(vcpu, operand2, ar, &stidp_data, sizeof(stidp_data));
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
+
+	VCPU_EVENT(vcpu, 3, "STIDP: store cpu id 0x%llx", stidp_data);
+	return 0;
+}
+
+static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem)
+{
+	int cpus = 0;
+	int n;
+
+	cpus = atomic_read(&vcpu->kvm->online_vcpus);
+
+	/* deal with other level 3 hypervisors */
+	if (stsi(mem, 3, 2, 2))
+		mem->count = 0;
+	if (mem->count < 8)
+		mem->count++;
+	for (n = mem->count - 1; n > 0 ; n--)
+		memcpy(&mem->vm[n], &mem->vm[n - 1], sizeof(mem->vm[0]));
+
+	memset(&mem->vm[0], 0, sizeof(mem->vm[0]));
+	mem->vm[0].cpus_total = cpus;
+	mem->vm[0].cpus_configured = cpus;
+	mem->vm[0].cpus_standby = 0;
+	mem->vm[0].cpus_reserved = 0;
+	mem->vm[0].caf = 1000;
+	memcpy(mem->vm[0].name, "KVMguest", 8);
+	ASCEBC(mem->vm[0].name, 8);
+	memcpy(mem->vm[0].cpi, "KVM/Linux       ", 16);
+	ASCEBC(mem->vm[0].cpi, 16);
+}
+
+static void insert_stsi_usr_data(struct kvm_vcpu *vcpu, u64 addr, u8 ar,
+				 u8 fc, u8 sel1, u16 sel2)
+{
+	vcpu->run->exit_reason = KVM_EXIT_S390_STSI;
+	vcpu->run->s390_stsi.addr = addr;
+	vcpu->run->s390_stsi.ar = ar;
+	vcpu->run->s390_stsi.fc = fc;
+	vcpu->run->s390_stsi.sel1 = sel1;
+	vcpu->run->s390_stsi.sel2 = sel2;
+}
+
+static int handle_stsi(struct kvm_vcpu *vcpu)
+{
+	int fc = (vcpu->run->s.regs.gprs[0] & 0xf0000000) >> 28;
+	int sel1 = vcpu->run->s.regs.gprs[0] & 0xff;
+	int sel2 = vcpu->run->s.regs.gprs[1] & 0xffff;
+	unsigned long mem = 0;
+	u64 operand2;
+	int rc = 0;
+	u8 ar;
+
+	vcpu->stat.instruction_stsi++;
+	VCPU_EVENT(vcpu, 3, "STSI: fc: %u sel1: %u sel2: %u", fc, sel1, sel2);
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	/* Bailout forbidden function codes */
+	if (fc > 3 && fc != 15)
+		goto out_no_data;
+
+	/*
+	 * fc 15 is provided only with
+	 *   - PTF/CPU topology support through facility 15
+	 *   - KVM_CAP_S390_USER_STSI
+	 */
+	if (fc == 15 && (!test_kvm_facility(vcpu->kvm, 11) ||
+			 !vcpu->kvm->arch.user_stsi))
+		goto out_no_data;
+
+	if (vcpu->run->s.regs.gprs[0] & 0x0fffff00
+	    || vcpu->run->s.regs.gprs[1] & 0xffff0000)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	if (fc == 0) {
+		vcpu->run->s.regs.gprs[0] = 3 << 28;
+		kvm_s390_set_psw_cc(vcpu, 0);
+		return 0;
+	}
+
+	operand2 = kvm_s390_get_base_disp_s(vcpu, &ar);
+
+	if (!kvm_s390_pv_cpu_is_protected(vcpu) && (operand2 & 0xfff))
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	switch (fc) {
+	case 1: /* same handling for 1 and 2 */
+	case 2:
+		mem = get_zeroed_page(GFP_KERNEL_ACCOUNT);
+		if (!mem)
+			goto out_no_data;
+		if (stsi((void *) mem, fc, sel1, sel2))
+			goto out_no_data;
+		break;
+	case 3:
+		if (sel1 != 2 || sel2 != 2)
+			goto out_no_data;
+		mem = get_zeroed_page(GFP_KERNEL_ACCOUNT);
+		if (!mem)
+			goto out_no_data;
+		handle_stsi_3_2_2(vcpu, (void *) mem);
+		break;
+	case 15: /* fc 15 is fully handled in userspace */
+		insert_stsi_usr_data(vcpu, operand2, ar, fc, sel1, sel2);
+		trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2);
+		return -EREMOTE;
+	}
+	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+		memcpy(sida_addr(vcpu->arch.sie_block), (void *)mem, PAGE_SIZE);
+		rc = 0;
+	} else {
+		rc = write_guest(vcpu, operand2, ar, (void *)mem, PAGE_SIZE);
+	}
+	if (rc) {
+		rc = kvm_s390_inject_prog_cond(vcpu, rc);
+		goto out;
+	}
+	if (vcpu->kvm->arch.user_stsi) {
+		insert_stsi_usr_data(vcpu, operand2, ar, fc, sel1, sel2);
+		rc = -EREMOTE;
+	}
+	trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2);
+	free_page(mem);
+	kvm_s390_set_psw_cc(vcpu, 0);
+	vcpu->run->s.regs.gprs[0] = 0;
+	return rc;
+out_no_data:
+	kvm_s390_set_psw_cc(vcpu, 3);
+out:
+	free_page(mem);
+	return rc;
+}
+
+int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)
+{
+	switch (vcpu->arch.sie_block->ipa & 0x00ff) {
+	case 0x02:
+		return handle_stidp(vcpu);
+	case 0x04:
+		return handle_set_clock(vcpu);
+	case 0x10:
+		return handle_set_prefix(vcpu);
+	case 0x11:
+		return handle_store_prefix(vcpu);
+	case 0x12:
+		return handle_store_cpu_address(vcpu);
+	case 0x14:
+		return kvm_s390_handle_vsie(vcpu);
+	case 0x21:
+	case 0x50:
+		return handle_ipte_interlock(vcpu);
+	case 0x29:
+		return handle_iske(vcpu);
+	case 0x2a:
+		return handle_rrbe(vcpu);
+	case 0x2b:
+		return handle_sske(vcpu);
+	case 0x2c:
+		return handle_test_block(vcpu);
+	case 0x30:
+	case 0x31:
+	case 0x32:
+	case 0x33:
+	case 0x34:
+	case 0x35:
+	case 0x36:
+	case 0x37:
+	case 0x38:
+	case 0x39:
+	case 0x3a:
+	case 0x3b:
+	case 0x3c:
+	case 0x5f:
+	case 0x74:
+	case 0x76:
+		return handle_io_inst(vcpu);
+	case 0x56:
+		return handle_sthyi(vcpu);
+	case 0x7d:
+		return handle_stsi(vcpu);
+	case 0xaf:
+		return handle_pqap(vcpu);
+	case 0xb1:
+		return handle_stfl(vcpu);
+	case 0xb2:
+		return handle_lpswe(vcpu);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int handle_epsw(struct kvm_vcpu *vcpu)
+{
+	int reg1, reg2;
+
+	vcpu->stat.instruction_epsw++;
+
+	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+
+	/* This basically extracts the mask half of the psw. */
+	vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000UL;
+	vcpu->run->s.regs.gprs[reg1] |= vcpu->arch.sie_block->gpsw.mask >> 32;
+	if (reg2) {
+		vcpu->run->s.regs.gprs[reg2] &= 0xffffffff00000000UL;
+		vcpu->run->s.regs.gprs[reg2] |=
+			vcpu->arch.sie_block->gpsw.mask & 0x00000000ffffffffUL;
+	}
+	return 0;
+}
+
+#define PFMF_RESERVED   0xfffc0101UL
+#define PFMF_SK         0x00020000UL
+#define PFMF_CF         0x00010000UL
+#define PFMF_UI         0x00008000UL
+#define PFMF_FSC        0x00007000UL
+#define PFMF_NQ         0x00000800UL
+#define PFMF_MR         0x00000400UL
+#define PFMF_MC         0x00000200UL
+#define PFMF_KEY        0x000000feUL
+
+static int handle_pfmf(struct kvm_vcpu *vcpu)
+{
+	bool mr = false, mc = false, nq;
+	int reg1, reg2;
+	unsigned long start, end;
+	unsigned char key;
+
+	vcpu->stat.instruction_pfmf++;
+
+	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+
+	if (!test_kvm_facility(vcpu->kvm, 8))
+		return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	if (vcpu->run->s.regs.gprs[reg1] & PFMF_RESERVED)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	/* Only provide non-quiescing support if enabled for the guest */
+	if (vcpu->run->s.regs.gprs[reg1] & PFMF_NQ &&
+	    !test_kvm_facility(vcpu->kvm, 14))
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	/* Only provide conditional-SSKE support if enabled for the guest */
+	if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK &&
+	    test_kvm_facility(vcpu->kvm, 10)) {
+		mr = vcpu->run->s.regs.gprs[reg1] & PFMF_MR;
+		mc = vcpu->run->s.regs.gprs[reg1] & PFMF_MC;
+	}
+
+	nq = vcpu->run->s.regs.gprs[reg1] & PFMF_NQ;
+	key = vcpu->run->s.regs.gprs[reg1] & PFMF_KEY;
+	start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
+	start = kvm_s390_logical_to_effective(vcpu, start);
+
+	if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
+		if (kvm_s390_check_low_addr_prot_real(vcpu, start))
+			return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
+	}
+
+	switch (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
+	case 0x00000000:
+		/* only 4k frames specify a real address */
+		start = kvm_s390_real_to_abs(vcpu, start);
+		end = (start + PAGE_SIZE) & ~(PAGE_SIZE - 1);
+		break;
+	case 0x00001000:
+		end = (start + _SEGMENT_SIZE) & ~(_SEGMENT_SIZE - 1);
+		break;
+	case 0x00002000:
+		/* only support 2G frame size if EDAT2 is available and we are
+		   not in 24-bit addressing mode */
+		if (!test_kvm_facility(vcpu->kvm, 78) ||
+		    psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_24BIT)
+			return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+		end = (start + _REGION3_SIZE) & ~(_REGION3_SIZE - 1);
+		break;
+	default:
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+	}
+
+	while (start != end) {
+		unsigned long vmaddr;
+		bool unlocked = false;
+
+		/* Translate guest address to host address */
+		vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start));
+		if (kvm_is_error_hva(vmaddr))
+			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+		if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
+			if (kvm_clear_guest(vcpu->kvm, start, PAGE_SIZE))
+				return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		}
+
+		if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK) {
+			int rc = kvm_s390_skey_check_enable(vcpu);
+
+			if (rc)
+				return rc;
+			mmap_read_lock(current->mm);
+			rc = cond_set_guest_storage_key(current->mm, vmaddr,
+							key, NULL, nq, mr, mc);
+			if (rc < 0) {
+				rc = fixup_user_fault(current->mm, vmaddr,
+						      FAULT_FLAG_WRITE, &unlocked);
+				rc = !rc ? -EAGAIN : rc;
+			}
+			mmap_read_unlock(current->mm);
+			if (rc == -EFAULT)
+				return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+			if (rc == -EAGAIN)
+				continue;
+			if (rc < 0)
+				return rc;
+		}
+		start += PAGE_SIZE;
+	}
+	if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
+		if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT) {
+			vcpu->run->s.regs.gprs[reg2] = end;
+		} else {
+			vcpu->run->s.regs.gprs[reg2] &= ~0xffffffffUL;
+			end = kvm_s390_logical_to_effective(vcpu, end);
+			vcpu->run->s.regs.gprs[reg2] |= end;
+		}
+	}
+	return 0;
+}
+
+/*
+ * Must be called with relevant read locks held (kvm->mm->mmap_lock, kvm->srcu)
+ */
+static inline int __do_essa(struct kvm_vcpu *vcpu, const int orc)
+{
+	int r1, r2, nappended, entries;
+	unsigned long gfn, hva, res, pgstev, ptev;
+	unsigned long *cbrlo;
+
+	/*
+	 * We don't need to set SD.FPF.SK to 1 here, because if we have a
+	 * machine check here we either handle it or crash
+	 */
+
+	kvm_s390_get_regs_rre(vcpu, &r1, &r2);
+	gfn = vcpu->run->s.regs.gprs[r2] >> PAGE_SHIFT;
+	hva = gfn_to_hva(vcpu->kvm, gfn);
+	entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3;
+
+	if (kvm_is_error_hva(hva))
+		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+	nappended = pgste_perform_essa(vcpu->kvm->mm, hva, orc, &ptev, &pgstev);
+	if (nappended < 0) {
+		res = orc ? 0x10 : 0;
+		vcpu->run->s.regs.gprs[r1] = res; /* Exception Indication */
+		return 0;
+	}
+	res = (pgstev & _PGSTE_GPS_USAGE_MASK) >> 22;
+	/*
+	 * Set the block-content state part of the result. 0 means resident, so
+	 * nothing to do if the page is valid. 2 is for preserved pages
+	 * (non-present and non-zero), and 3 for zero pages (non-present and
+	 * zero).
+	 */
+	if (ptev & _PAGE_INVALID) {
+		res |= 2;
+		if (pgstev & _PGSTE_GPS_ZERO)
+			res |= 1;
+	}
+	if (pgstev & _PGSTE_GPS_NODAT)
+		res |= 0x20;
+	vcpu->run->s.regs.gprs[r1] = res;
+	/*
+	 * It is possible that all the normal 511 slots were full, in which case
+	 * we will now write in the 512th slot, which is reserved for host use.
+	 * In both cases we let the normal essa handling code process all the
+	 * slots, including the reserved one, if needed.
+	 */
+	if (nappended > 0) {
+		cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo & PAGE_MASK);
+		cbrlo[entries] = gfn << PAGE_SHIFT;
+	}
+
+	if (orc) {
+		struct kvm_memory_slot *ms = gfn_to_memslot(vcpu->kvm, gfn);
+
+		/* Increment only if we are really flipping the bit */
+		if (ms && !test_and_set_bit(gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
+			atomic64_inc(&vcpu->kvm->arch.cmma_dirty_pages);
+	}
+
+	return nappended;
+}
+
+static int handle_essa(struct kvm_vcpu *vcpu)
+{
+	/* entries expected to be 1FF */
+	int entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3;
+	unsigned long *cbrlo;
+	struct gmap *gmap;
+	int i, orc;
+
+	VCPU_EVENT(vcpu, 4, "ESSA: release %d pages", entries);
+	gmap = vcpu->arch.gmap;
+	vcpu->stat.instruction_essa++;
+	if (!vcpu->kvm->arch.use_cmma)
+		return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+	/* Check for invalid operation request code */
+	orc = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28;
+	/* ORCs 0-6 are always valid */
+	if (orc > (test_kvm_facility(vcpu->kvm, 147) ? ESSA_SET_STABLE_NODAT
+						: ESSA_SET_STABLE_IF_RESIDENT))
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	if (!vcpu->kvm->arch.migration_mode) {
+		/*
+		 * CMMA is enabled in the KVM settings, but is disabled in
+		 * the SIE block and in the mm_context, and we are not doing
+		 * a migration. Enable CMMA in the mm_context.
+		 * Since we need to take a write lock to write to the context
+		 * to avoid races with storage keys handling, we check if the
+		 * value really needs to be written to; if the value is
+		 * already correct, we do nothing and avoid the lock.
+		 */
+		if (vcpu->kvm->mm->context.uses_cmm == 0) {
+			mmap_write_lock(vcpu->kvm->mm);
+			vcpu->kvm->mm->context.uses_cmm = 1;
+			mmap_write_unlock(vcpu->kvm->mm);
+		}
+		/*
+		 * If we are here, we are supposed to have CMMA enabled in
+		 * the SIE block. Enabling CMMA works on a per-CPU basis,
+		 * while the context use_cmma flag is per process.
+		 * It's possible that the context flag is enabled and the
+		 * SIE flag is not, so we set the flag always; if it was
+		 * already set, nothing changes, otherwise we enable it
+		 * on this CPU too.
+		 */
+		vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
+		/* Retry the ESSA instruction */
+		kvm_s390_retry_instr(vcpu);
+	} else {
+		int srcu_idx;
+
+		mmap_read_lock(vcpu->kvm->mm);
+		srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+		i = __do_essa(vcpu, orc);
+		srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+		mmap_read_unlock(vcpu->kvm->mm);
+		if (i < 0)
+			return i;
+		/* Account for the possible extra cbrl entry */
+		entries += i;
+	}
+	vcpu->arch.sie_block->cbrlo &= PAGE_MASK;	/* reset nceo */
+	cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo);
+	mmap_read_lock(gmap->mm);
+	for (i = 0; i < entries; ++i)
+		__gmap_zap(gmap, cbrlo[i]);
+	mmap_read_unlock(gmap->mm);
+	return 0;
+}
+
+int kvm_s390_handle_b9(struct kvm_vcpu *vcpu)
+{
+	switch (vcpu->arch.sie_block->ipa & 0x00ff) {
+	case 0x8a:
+	case 0x8e:
+	case 0x8f:
+		return handle_ipte_interlock(vcpu);
+	case 0x8d:
+		return handle_epsw(vcpu);
+	case 0xab:
+		return handle_essa(vcpu);
+	case 0xaf:
+		return handle_pfmf(vcpu);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
+{
+	int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+	int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+	int reg, rc, nr_regs;
+	u32 ctl_array[16];
+	u64 ga;
+	u8 ar;
+
+	vcpu->stat.instruction_lctl++;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	ga = kvm_s390_get_base_disp_rs(vcpu, &ar);
+
+	if (ga & 3)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	VCPU_EVENT(vcpu, 4, "LCTL: r1:%d, r3:%d, addr: 0x%llx", reg1, reg3, ga);
+	trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, ga);
+
+	nr_regs = ((reg3 - reg1) & 0xf) + 1;
+	rc = read_guest(vcpu, ga, ar, ctl_array, nr_regs * sizeof(u32));
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
+	reg = reg1;
+	nr_regs = 0;
+	do {
+		vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
+		vcpu->arch.sie_block->gcr[reg] |= ctl_array[nr_regs++];
+		if (reg == reg3)
+			break;
+		reg = (reg + 1) % 16;
+	} while (1);
+	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+	return 0;
+}
+
+int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu)
+{
+	int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+	int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+	int reg, rc, nr_regs;
+	u32 ctl_array[16];
+	u64 ga;
+	u8 ar;
+
+	vcpu->stat.instruction_stctl++;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	ga = kvm_s390_get_base_disp_rs(vcpu, &ar);
+
+	if (ga & 3)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	VCPU_EVENT(vcpu, 4, "STCTL r1:%d, r3:%d, addr: 0x%llx", reg1, reg3, ga);
+	trace_kvm_s390_handle_stctl(vcpu, 0, reg1, reg3, ga);
+
+	reg = reg1;
+	nr_regs = 0;
+	do {
+		ctl_array[nr_regs++] = vcpu->arch.sie_block->gcr[reg];
+		if (reg == reg3)
+			break;
+		reg = (reg + 1) % 16;
+	} while (1);
+	rc = write_guest(vcpu, ga, ar, ctl_array, nr_regs * sizeof(u32));
+	return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0;
+}
+
+static int handle_lctlg(struct kvm_vcpu *vcpu)
+{
+	int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+	int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+	int reg, rc, nr_regs;
+	u64 ctl_array[16];
+	u64 ga;
+	u8 ar;
+
+	vcpu->stat.instruction_lctlg++;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	ga = kvm_s390_get_base_disp_rsy(vcpu, &ar);
+
+	if (ga & 7)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	VCPU_EVENT(vcpu, 4, "LCTLG: r1:%d, r3:%d, addr: 0x%llx", reg1, reg3, ga);
+	trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, ga);
+
+	nr_regs = ((reg3 - reg1) & 0xf) + 1;
+	rc = read_guest(vcpu, ga, ar, ctl_array, nr_regs * sizeof(u64));
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
+	reg = reg1;
+	nr_regs = 0;
+	do {
+		vcpu->arch.sie_block->gcr[reg] = ctl_array[nr_regs++];
+		if (reg == reg3)
+			break;
+		reg = (reg + 1) % 16;
+	} while (1);
+	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+	return 0;
+}
+
+static int handle_stctg(struct kvm_vcpu *vcpu)
+{
+	int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+	int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+	int reg, rc, nr_regs;
+	u64 ctl_array[16];
+	u64 ga;
+	u8 ar;
+
+	vcpu->stat.instruction_stctg++;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	ga = kvm_s390_get_base_disp_rsy(vcpu, &ar);
+
+	if (ga & 7)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	VCPU_EVENT(vcpu, 4, "STCTG r1:%d, r3:%d, addr: 0x%llx", reg1, reg3, ga);
+	trace_kvm_s390_handle_stctl(vcpu, 1, reg1, reg3, ga);
+
+	reg = reg1;
+	nr_regs = 0;
+	do {
+		ctl_array[nr_regs++] = vcpu->arch.sie_block->gcr[reg];
+		if (reg == reg3)
+			break;
+		reg = (reg + 1) % 16;
+	} while (1);
+	rc = write_guest(vcpu, ga, ar, ctl_array, nr_regs * sizeof(u64));
+	return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0;
+}
+
+int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
+{
+	switch (vcpu->arch.sie_block->ipb & 0x000000ff) {
+	case 0x25:
+		return handle_stctg(vcpu);
+	case 0x2f:
+		return handle_lctlg(vcpu);
+	case 0x60:
+	case 0x61:
+	case 0x62:
+		return handle_ri(vcpu);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int handle_tprot(struct kvm_vcpu *vcpu)
+{
+	u64 address, operand2;
+	unsigned long gpa;
+	u8 access_key;
+	bool writable;
+	int ret, cc;
+	u8 ar;
+
+	vcpu->stat.instruction_tprot++;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	kvm_s390_get_base_disp_sse(vcpu, &address, &operand2, &ar, NULL);
+	access_key = (operand2 & 0xf0) >> 4;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT)
+		ipte_lock(vcpu->kvm);
+
+	ret = guest_translate_address_with_key(vcpu, address, ar, &gpa,
+					       GACC_STORE, access_key);
+	if (ret == 0) {
+		gfn_to_hva_prot(vcpu->kvm, gpa_to_gfn(gpa), &writable);
+	} else if (ret == PGM_PROTECTION) {
+		writable = false;
+		/* Write protected? Try again with read-only... */
+		ret = guest_translate_address_with_key(vcpu, address, ar, &gpa,
+						       GACC_FETCH, access_key);
+	}
+	if (ret >= 0) {
+		cc = -1;
+
+		/* Fetching permitted; storing permitted */
+		if (ret == 0 && writable)
+			cc = 0;
+		/* Fetching permitted; storing not permitted */
+		else if (ret == 0 && !writable)
+			cc = 1;
+		/* Fetching not permitted; storing not permitted */
+		else if (ret == PGM_PROTECTION)
+			cc = 2;
+		/* Translation not available */
+		else if (ret != PGM_ADDRESSING && ret != PGM_TRANSLATION_SPEC)
+			cc = 3;
+
+		if (cc != -1) {
+			kvm_s390_set_psw_cc(vcpu, cc);
+			ret = 0;
+		} else {
+			ret = kvm_s390_inject_program_int(vcpu, ret);
+		}
+	}
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT)
+		ipte_unlock(vcpu->kvm);
+	return ret;
+}
+
+int kvm_s390_handle_e5(struct kvm_vcpu *vcpu)
+{
+	switch (vcpu->arch.sie_block->ipa & 0x00ff) {
+	case 0x01:
+		return handle_tprot(vcpu);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int handle_sckpf(struct kvm_vcpu *vcpu)
+{
+	u32 value;
+
+	vcpu->stat.instruction_sckpf++;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	if (vcpu->run->s.regs.gprs[0] & 0x00000000ffff0000)
+		return kvm_s390_inject_program_int(vcpu,
+						   PGM_SPECIFICATION);
+
+	value = vcpu->run->s.regs.gprs[0] & 0x000000000000ffff;
+	vcpu->arch.sie_block->todpr = value;
+
+	return 0;
+}
+
+static int handle_ptff(struct kvm_vcpu *vcpu)
+{
+	vcpu->stat.instruction_ptff++;
+
+	/* we don't emulate any control instructions yet */
+	kvm_s390_set_psw_cc(vcpu, 3);
+	return 0;
+}
+
+int kvm_s390_handle_01(struct kvm_vcpu *vcpu)
+{
+	switch (vcpu->arch.sie_block->ipa & 0x00ff) {
+	case 0x04:
+		return handle_ptff(vcpu);
+	case 0x07:
+		return handle_sckpf(vcpu);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
new file mode 100644
index 0000000000..75e81ba26d
--- /dev/null
+++ b/arch/s390/kvm/pv.c
@@ -0,0 +1,894 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hosting Protected Virtual Machines
+ *
+ * Copyright IBM Corp. 2019, 2020
+ *    Author(s): Janosch Frank <frankja@linux.ibm.com>
+ */
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/minmax.h>
+#include <linux/pagemap.h>
+#include <linux/sched/signal.h>
+#include <asm/gmap.h>
+#include <asm/uv.h>
+#include <asm/mman.h>
+#include <linux/pagewalk.h>
+#include <linux/sched/mm.h>
+#include <linux/mmu_notifier.h>
+#include "kvm-s390.h"
+
+bool kvm_s390_pv_is_protected(struct kvm *kvm)
+{
+	lockdep_assert_held(&kvm->lock);
+	return !!kvm_s390_pv_get_handle(kvm);
+}
+EXPORT_SYMBOL_GPL(kvm_s390_pv_is_protected);
+
+bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu)
+{
+	lockdep_assert_held(&vcpu->mutex);
+	return !!kvm_s390_pv_cpu_get_handle(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvm_s390_pv_cpu_is_protected);
+
+/**
+ * struct pv_vm_to_be_destroyed - Represents a protected VM that needs to
+ * be destroyed
+ *
+ * @list: list head for the list of leftover VMs
+ * @old_gmap_table: the gmap table of the leftover protected VM
+ * @handle: the handle of the leftover protected VM
+ * @stor_var: pointer to the variable storage of the leftover protected VM
+ * @stor_base: address of the base storage of the leftover protected VM
+ *
+ * Represents a protected VM that is still registered with the Ultravisor,
+ * but which does not correspond any longer to an active KVM VM. It should
+ * be destroyed at some point later, either asynchronously or when the
+ * process terminates.
+ */
+struct pv_vm_to_be_destroyed {
+	struct list_head list;
+	unsigned long old_gmap_table;
+	u64 handle;
+	void *stor_var;
+	unsigned long stor_base;
+};
+
+static void kvm_s390_clear_pv_state(struct kvm *kvm)
+{
+	kvm->arch.pv.handle = 0;
+	kvm->arch.pv.guest_len = 0;
+	kvm->arch.pv.stor_base = 0;
+	kvm->arch.pv.stor_var = NULL;
+}
+
+int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
+{
+	int cc;
+
+	if (!kvm_s390_pv_cpu_get_handle(vcpu))
+		return 0;
+
+	cc = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), UVC_CMD_DESTROY_SEC_CPU, rc, rrc);
+
+	KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT DESTROY VCPU %d: rc %x rrc %x",
+		     vcpu->vcpu_id, *rc, *rrc);
+	WARN_ONCE(cc, "protvirt destroy cpu failed rc %x rrc %x", *rc, *rrc);
+
+	/* Intended memory leak for something that should never happen. */
+	if (!cc)
+		free_pages(vcpu->arch.pv.stor_base,
+			   get_order(uv_info.guest_cpu_stor_len));
+
+	free_page((unsigned long)sida_addr(vcpu->arch.sie_block));
+	vcpu->arch.sie_block->pv_handle_cpu = 0;
+	vcpu->arch.sie_block->pv_handle_config = 0;
+	memset(&vcpu->arch.pv, 0, sizeof(vcpu->arch.pv));
+	vcpu->arch.sie_block->sdf = 0;
+	/*
+	 * The sidad field (for sdf == 2) is now the gbea field (for sdf == 0).
+	 * Use the reset value of gbea to avoid leaking the kernel pointer of
+	 * the just freed sida.
+	 */
+	vcpu->arch.sie_block->gbea = 1;
+	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+
+	return cc ? EIO : 0;
+}
+
+int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
+{
+	struct uv_cb_csc uvcb = {
+		.header.cmd = UVC_CMD_CREATE_SEC_CPU,
+		.header.len = sizeof(uvcb),
+	};
+	void *sida_addr;
+	int cc;
+
+	if (kvm_s390_pv_cpu_get_handle(vcpu))
+		return -EINVAL;
+
+	vcpu->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT,
+						   get_order(uv_info.guest_cpu_stor_len));
+	if (!vcpu->arch.pv.stor_base)
+		return -ENOMEM;
+
+	/* Input */
+	uvcb.guest_handle = kvm_s390_pv_get_handle(vcpu->kvm);
+	uvcb.num = vcpu->arch.sie_block->icpua;
+	uvcb.state_origin = virt_to_phys(vcpu->arch.sie_block);
+	uvcb.stor_origin = virt_to_phys((void *)vcpu->arch.pv.stor_base);
+
+	/* Alloc Secure Instruction Data Area Designation */
+	sida_addr = (void *)__get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+	if (!sida_addr) {
+		free_pages(vcpu->arch.pv.stor_base,
+			   get_order(uv_info.guest_cpu_stor_len));
+		return -ENOMEM;
+	}
+	vcpu->arch.sie_block->sidad = virt_to_phys(sida_addr);
+
+	cc = uv_call(0, (u64)&uvcb);
+	*rc = uvcb.header.rc;
+	*rrc = uvcb.header.rrc;
+	KVM_UV_EVENT(vcpu->kvm, 3,
+		     "PROTVIRT CREATE VCPU: cpu %d handle %llx rc %x rrc %x",
+		     vcpu->vcpu_id, uvcb.cpu_handle, uvcb.header.rc,
+		     uvcb.header.rrc);
+
+	if (cc) {
+		u16 dummy;
+
+		kvm_s390_pv_destroy_cpu(vcpu, &dummy, &dummy);
+		return -EIO;
+	}
+
+	/* Output */
+	vcpu->arch.pv.handle = uvcb.cpu_handle;
+	vcpu->arch.sie_block->pv_handle_cpu = uvcb.cpu_handle;
+	vcpu->arch.sie_block->pv_handle_config = kvm_s390_pv_get_handle(vcpu->kvm);
+	vcpu->arch.sie_block->sdf = 2;
+	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+	return 0;
+}
+
+/* only free resources when the destroy was successful */
+static void kvm_s390_pv_dealloc_vm(struct kvm *kvm)
+{
+	vfree(kvm->arch.pv.stor_var);
+	free_pages(kvm->arch.pv.stor_base,
+		   get_order(uv_info.guest_base_stor_len));
+	kvm_s390_clear_pv_state(kvm);
+}
+
+static int kvm_s390_pv_alloc_vm(struct kvm *kvm)
+{
+	unsigned long base = uv_info.guest_base_stor_len;
+	unsigned long virt = uv_info.guest_virt_var_stor_len;
+	unsigned long npages = 0, vlen = 0;
+
+	kvm->arch.pv.stor_var = NULL;
+	kvm->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT, get_order(base));
+	if (!kvm->arch.pv.stor_base)
+		return -ENOMEM;
+
+	/*
+	 * Calculate current guest storage for allocation of the
+	 * variable storage, which is based on the length in MB.
+	 *
+	 * Slots are sorted by GFN
+	 */
+	mutex_lock(&kvm->slots_lock);
+	npages = kvm_s390_get_gfn_end(kvm_memslots(kvm));
+	mutex_unlock(&kvm->slots_lock);
+
+	kvm->arch.pv.guest_len = npages * PAGE_SIZE;
+
+	/* Allocate variable storage */
+	vlen = ALIGN(virt * ((npages * PAGE_SIZE) / HPAGE_SIZE), PAGE_SIZE);
+	vlen += uv_info.guest_virt_base_stor_len;
+	kvm->arch.pv.stor_var = vzalloc(vlen);
+	if (!kvm->arch.pv.stor_var)
+		goto out_err;
+	return 0;
+
+out_err:
+	kvm_s390_pv_dealloc_vm(kvm);
+	return -ENOMEM;
+}
+
+/**
+ * kvm_s390_pv_dispose_one_leftover - Clean up one leftover protected VM.
+ * @kvm: the KVM that was associated with this leftover protected VM
+ * @leftover: details about the leftover protected VM that needs a clean up
+ * @rc: the RC code of the Destroy Secure Configuration UVC
+ * @rrc: the RRC code of the Destroy Secure Configuration UVC
+ *
+ * Destroy one leftover protected VM.
+ * On success, kvm->mm->context.protected_count will be decremented atomically
+ * and all other resources used by the VM will be freed.
+ *
+ * Return: 0 in case of success, otherwise 1
+ */
+static int kvm_s390_pv_dispose_one_leftover(struct kvm *kvm,
+					    struct pv_vm_to_be_destroyed *leftover,
+					    u16 *rc, u16 *rrc)
+{
+	int cc;
+
+	/* It used the destroy-fast UVC, nothing left to do here */
+	if (!leftover->handle)
+		goto done_fast;
+	cc = uv_cmd_nodata(leftover->handle, UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
+	KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY LEFTOVER VM: rc %x rrc %x", *rc, *rrc);
+	WARN_ONCE(cc, "protvirt destroy leftover vm failed rc %x rrc %x", *rc, *rrc);
+	if (cc)
+		return cc;
+	/*
+	 * Intentionally leak unusable memory. If the UVC fails, the memory
+	 * used for the VM and its metadata is permanently unusable.
+	 * This can only happen in case of a serious KVM or hardware bug; it
+	 * is not expected to happen in normal operation.
+	 */
+	free_pages(leftover->stor_base, get_order(uv_info.guest_base_stor_len));
+	free_pages(leftover->old_gmap_table, CRST_ALLOC_ORDER);
+	vfree(leftover->stor_var);
+done_fast:
+	atomic_dec(&kvm->mm->context.protected_count);
+	return 0;
+}
+
+/**
+ * kvm_s390_destroy_lower_2g - Destroy the first 2GB of protected guest memory.
+ * @kvm: the VM whose memory is to be cleared.
+ *
+ * Destroy the first 2GB of guest memory, to avoid prefix issues after reboot.
+ * The CPUs of the protected VM need to be destroyed beforehand.
+ */
+static void kvm_s390_destroy_lower_2g(struct kvm *kvm)
+{
+	const unsigned long pages_2g = SZ_2G / PAGE_SIZE;
+	struct kvm_memory_slot *slot;
+	unsigned long len;
+	int srcu_idx;
+
+	srcu_idx = srcu_read_lock(&kvm->srcu);
+
+	/* Take the memslot containing guest absolute address 0 */
+	slot = gfn_to_memslot(kvm, 0);
+	/* Clear all slots or parts thereof that are below 2GB */
+	while (slot && slot->base_gfn < pages_2g) {
+		len = min_t(u64, slot->npages, pages_2g - slot->base_gfn) * PAGE_SIZE;
+		s390_uv_destroy_range(kvm->mm, slot->userspace_addr, slot->userspace_addr + len);
+		/* Take the next memslot */
+		slot = gfn_to_memslot(kvm, slot->base_gfn + slot->npages);
+	}
+
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
+}
+
+static int kvm_s390_pv_deinit_vm_fast(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+	struct uv_cb_destroy_fast uvcb = {
+		.header.cmd = UVC_CMD_DESTROY_SEC_CONF_FAST,
+		.header.len = sizeof(uvcb),
+		.handle = kvm_s390_pv_get_handle(kvm),
+	};
+	int cc;
+
+	cc = uv_call_sched(0, (u64)&uvcb);
+	if (rc)
+		*rc = uvcb.header.rc;
+	if (rrc)
+		*rrc = uvcb.header.rrc;
+	WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
+	KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM FAST: rc %x rrc %x",
+		     uvcb.header.rc, uvcb.header.rrc);
+	WARN_ONCE(cc && uvcb.header.rc != 0x104,
+		  "protvirt destroy vm fast failed handle %llx rc %x rrc %x",
+		  kvm_s390_pv_get_handle(kvm), uvcb.header.rc, uvcb.header.rrc);
+	/* Intended memory leak on "impossible" error */
+	if (!cc)
+		kvm_s390_pv_dealloc_vm(kvm);
+	return cc ? -EIO : 0;
+}
+
+static inline bool is_destroy_fast_available(void)
+{
+	return test_bit_inv(BIT_UVC_CMD_DESTROY_SEC_CONF_FAST, uv_info.inst_calls_list);
+}
+
+/**
+ * kvm_s390_pv_set_aside - Set aside a protected VM for later teardown.
+ * @kvm: the VM
+ * @rc: return value for the RC field of the UVCB
+ * @rrc: return value for the RRC field of the UVCB
+ *
+ * Set aside the protected VM for a subsequent teardown. The VM will be able
+ * to continue immediately as a non-secure VM, and the information needed to
+ * properly tear down the protected VM is set aside. If another protected VM
+ * was already set aside without starting its teardown, this function will
+ * fail.
+ * The CPUs of the protected VM need to be destroyed beforehand.
+ *
+ * Context: kvm->lock needs to be held
+ *
+ * Return: 0 in case of success, -EINVAL if another protected VM was already set
+ * aside, -ENOMEM if the system ran out of memory.
+ */
+int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+	struct pv_vm_to_be_destroyed *priv;
+	int res = 0;
+
+	lockdep_assert_held(&kvm->lock);
+	/*
+	 * If another protected VM was already prepared for teardown, refuse.
+	 * A normal deinitialization has to be performed instead.
+	 */
+	if (kvm->arch.pv.set_aside)
+		return -EINVAL;
+
+	/* Guest with segment type ASCE, refuse to destroy asynchronously */
+	if ((kvm->arch.gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT)
+		return -EINVAL;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	if (is_destroy_fast_available()) {
+		res = kvm_s390_pv_deinit_vm_fast(kvm, rc, rrc);
+	} else {
+		priv->stor_var = kvm->arch.pv.stor_var;
+		priv->stor_base = kvm->arch.pv.stor_base;
+		priv->handle = kvm_s390_pv_get_handle(kvm);
+		priv->old_gmap_table = (unsigned long)kvm->arch.gmap->table;
+		WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
+		if (s390_replace_asce(kvm->arch.gmap))
+			res = -ENOMEM;
+	}
+
+	if (res) {
+		kfree(priv);
+		return res;
+	}
+
+	kvm_s390_destroy_lower_2g(kvm);
+	kvm_s390_clear_pv_state(kvm);
+	kvm->arch.pv.set_aside = priv;
+
+	*rc = UVC_RC_EXECUTED;
+	*rrc = 42;
+	return 0;
+}
+
+/**
+ * kvm_s390_pv_deinit_vm - Deinitialize the current protected VM
+ * @kvm: the KVM whose protected VM needs to be deinitialized
+ * @rc: the RC code of the UVC
+ * @rrc: the RRC code of the UVC
+ *
+ * Deinitialize the current protected VM. This function will destroy and
+ * cleanup the current protected VM, but it will not cleanup the guest
+ * memory. This function should only be called when the protected VM has
+ * just been created and therefore does not have any guest memory, or when
+ * the caller cleans up the guest memory separately.
+ *
+ * This function should not fail, but if it does, the donated memory must
+ * not be freed.
+ *
+ * Context: kvm->lock needs to be held
+ *
+ * Return: 0 in case of success, otherwise -EIO
+ */
+int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+	int cc;
+
+	cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
+			   UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
+	WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
+	if (!cc) {
+		atomic_dec(&kvm->mm->context.protected_count);
+		kvm_s390_pv_dealloc_vm(kvm);
+	} else {
+		/* Intended memory leak on "impossible" error */
+		s390_replace_asce(kvm->arch.gmap);
+	}
+	KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM: rc %x rrc %x", *rc, *rrc);
+	WARN_ONCE(cc, "protvirt destroy vm failed rc %x rrc %x", *rc, *rrc);
+
+	return cc ? -EIO : 0;
+}
+
+/**
+ * kvm_s390_pv_deinit_cleanup_all - Clean up all protected VMs associated
+ * with a specific KVM.
+ * @kvm: the KVM to be cleaned up
+ * @rc: the RC code of the first failing UVC
+ * @rrc: the RRC code of the first failing UVC
+ *
+ * This function will clean up all protected VMs associated with a KVM.
+ * This includes the active one, the one prepared for deinitialization with
+ * kvm_s390_pv_set_aside, and any still pending in the need_cleanup list.
+ *
+ * Context: kvm->lock needs to be held unless being called from
+ * kvm_arch_destroy_vm.
+ *
+ * Return: 0 if all VMs are successfully cleaned up, otherwise -EIO
+ */
+int kvm_s390_pv_deinit_cleanup_all(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+	struct pv_vm_to_be_destroyed *cur;
+	bool need_zap = false;
+	u16 _rc, _rrc;
+	int cc = 0;
+
+	/*
+	 * Nothing to do if the counter was already 0. Otherwise make sure
+	 * the counter does not reach 0 before calling s390_uv_destroy_range.
+	 */
+	if (!atomic_inc_not_zero(&kvm->mm->context.protected_count))
+		return 0;
+
+	*rc = 1;
+	/* If the current VM is protected, destroy it */
+	if (kvm_s390_pv_get_handle(kvm)) {
+		cc = kvm_s390_pv_deinit_vm(kvm, rc, rrc);
+		need_zap = true;
+	}
+
+	/* If a previous protected VM was set aside, put it in the need_cleanup list */
+	if (kvm->arch.pv.set_aside) {
+		list_add(kvm->arch.pv.set_aside, &kvm->arch.pv.need_cleanup);
+		kvm->arch.pv.set_aside = NULL;
+	}
+
+	/* Cleanup all protected VMs in the need_cleanup list */
+	while (!list_empty(&kvm->arch.pv.need_cleanup)) {
+		cur = list_first_entry(&kvm->arch.pv.need_cleanup, typeof(*cur), list);
+		need_zap = true;
+		if (kvm_s390_pv_dispose_one_leftover(kvm, cur, &_rc, &_rrc)) {
+			cc = 1;
+			/*
+			 * Only return the first error rc and rrc, so make
+			 * sure it is not overwritten. All destroys will
+			 * additionally be reported via KVM_UV_EVENT().
+			 */
+			if (*rc == UVC_RC_EXECUTED) {
+				*rc = _rc;
+				*rrc = _rrc;
+			}
+		}
+		list_del(&cur->list);
+		kfree(cur);
+	}
+
+	/*
+	 * If the mm still has a mapping, try to mark all its pages as
+	 * accessible. The counter should not reach zero before this
+	 * cleanup has been performed.
+	 */
+	if (need_zap && mmget_not_zero(kvm->mm)) {
+		s390_uv_destroy_range(kvm->mm, 0, TASK_SIZE);
+		mmput(kvm->mm);
+	}
+
+	/* Now the counter can safely reach 0 */
+	atomic_dec(&kvm->mm->context.protected_count);
+	return cc ? -EIO : 0;
+}
+
+/**
+ * kvm_s390_pv_deinit_aside_vm - Teardown a previously set aside protected VM.
+ * @kvm: the VM previously associated with the protected VM
+ * @rc: return value for the RC field of the UVCB
+ * @rrc: return value for the RRC field of the UVCB
+ *
+ * Tear down the protected VM that had been previously prepared for teardown
+ * using kvm_s390_pv_set_aside_vm. Ideally this should be called by
+ * userspace asynchronously from a separate thread.
+ *
+ * Context: kvm->lock must not be held.
+ *
+ * Return: 0 in case of success, -EINVAL if no protected VM had been
+ * prepared for asynchronous teardowm, -EIO in case of other errors.
+ */
+int kvm_s390_pv_deinit_aside_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+	struct pv_vm_to_be_destroyed *p;
+	int ret = 0;
+
+	lockdep_assert_not_held(&kvm->lock);
+	mutex_lock(&kvm->lock);
+	p = kvm->arch.pv.set_aside;
+	kvm->arch.pv.set_aside = NULL;
+	mutex_unlock(&kvm->lock);
+	if (!p)
+		return -EINVAL;
+
+	/* When a fatal signal is received, stop immediately */
+	if (s390_uv_destroy_range_interruptible(kvm->mm, 0, TASK_SIZE_MAX))
+		goto done;
+	if (kvm_s390_pv_dispose_one_leftover(kvm, p, rc, rrc))
+		ret = -EIO;
+	kfree(p);
+	p = NULL;
+done:
+	/*
+	 * p is not NULL if we aborted because of a fatal signal, in which
+	 * case queue the leftover for later cleanup.
+	 */
+	if (p) {
+		mutex_lock(&kvm->lock);
+		list_add(&p->list, &kvm->arch.pv.need_cleanup);
+		mutex_unlock(&kvm->lock);
+		/* Did not finish, but pretend things went well */
+		*rc = UVC_RC_EXECUTED;
+		*rrc = 42;
+	}
+	return ret;
+}
+
+static void kvm_s390_pv_mmu_notifier_release(struct mmu_notifier *subscription,
+					     struct mm_struct *mm)
+{
+	struct kvm *kvm = container_of(subscription, struct kvm, arch.pv.mmu_notifier);
+	u16 dummy;
+	int r;
+
+	/*
+	 * No locking is needed since this is the last thread of the last user of this
+	 * struct mm.
+	 * When the struct kvm gets deinitialized, this notifier is also
+	 * unregistered. This means that if this notifier runs, then the
+	 * struct kvm is still valid.
+	 */
+	r = kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
+	if (!r && is_destroy_fast_available() && kvm_s390_pv_get_handle(kvm))
+		kvm_s390_pv_deinit_vm_fast(kvm, &dummy, &dummy);
+}
+
+static const struct mmu_notifier_ops kvm_s390_pv_mmu_notifier_ops = {
+	.release = kvm_s390_pv_mmu_notifier_release,
+};
+
+int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+	struct uv_cb_cgc uvcb = {
+		.header.cmd = UVC_CMD_CREATE_SEC_CONF,
+		.header.len = sizeof(uvcb)
+	};
+	int cc, ret;
+	u16 dummy;
+
+	ret = kvm_s390_pv_alloc_vm(kvm);
+	if (ret)
+		return ret;
+
+	/* Inputs */
+	uvcb.guest_stor_origin = 0; /* MSO is 0 for KVM */
+	uvcb.guest_stor_len = kvm->arch.pv.guest_len;
+	uvcb.guest_asce = kvm->arch.gmap->asce;
+	uvcb.guest_sca = virt_to_phys(kvm->arch.sca);
+	uvcb.conf_base_stor_origin =
+		virt_to_phys((void *)kvm->arch.pv.stor_base);
+	uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var;
+	uvcb.flags.ap_allow_instr = kvm->arch.model.uv_feat_guest.ap;
+	uvcb.flags.ap_instr_intr = kvm->arch.model.uv_feat_guest.ap_intr;
+
+	cc = uv_call_sched(0, (u64)&uvcb);
+	*rc = uvcb.header.rc;
+	*rrc = uvcb.header.rrc;
+	KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x flags %04x",
+		     uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc, uvcb.flags.raw);
+
+	/* Outputs */
+	kvm->arch.pv.handle = uvcb.guest_handle;
+
+	atomic_inc(&kvm->mm->context.protected_count);
+	if (cc) {
+		if (uvcb.header.rc & UVC_RC_NEED_DESTROY) {
+			kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
+		} else {
+			atomic_dec(&kvm->mm->context.protected_count);
+			kvm_s390_pv_dealloc_vm(kvm);
+		}
+		return -EIO;
+	}
+	kvm->arch.gmap->guest_handle = uvcb.guest_handle;
+	/* Add the notifier only once. No races because we hold kvm->lock */
+	if (kvm->arch.pv.mmu_notifier.ops != &kvm_s390_pv_mmu_notifier_ops) {
+		kvm->arch.pv.mmu_notifier.ops = &kvm_s390_pv_mmu_notifier_ops;
+		mmu_notifier_register(&kvm->arch.pv.mmu_notifier, kvm->mm);
+	}
+	return 0;
+}
+
+int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
+			      u16 *rrc)
+{
+	struct uv_cb_ssc uvcb = {
+		.header.cmd = UVC_CMD_SET_SEC_CONF_PARAMS,
+		.header.len = sizeof(uvcb),
+		.sec_header_origin = (u64)hdr,
+		.sec_header_len = length,
+		.guest_handle = kvm_s390_pv_get_handle(kvm),
+	};
+	int cc = uv_call(0, (u64)&uvcb);
+
+	*rc = uvcb.header.rc;
+	*rrc = uvcb.header.rrc;
+	KVM_UV_EVENT(kvm, 3, "PROTVIRT VM SET PARMS: rc %x rrc %x",
+		     *rc, *rrc);
+	return cc ? -EINVAL : 0;
+}
+
+static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak,
+		      u64 offset, u16 *rc, u16 *rrc)
+{
+	struct uv_cb_unp uvcb = {
+		.header.cmd = UVC_CMD_UNPACK_IMG,
+		.header.len = sizeof(uvcb),
+		.guest_handle = kvm_s390_pv_get_handle(kvm),
+		.gaddr = addr,
+		.tweak[0] = tweak,
+		.tweak[1] = offset,
+	};
+	int ret = gmap_make_secure(kvm->arch.gmap, addr, &uvcb);
+
+	*rc = uvcb.header.rc;
+	*rrc = uvcb.header.rrc;
+
+	if (ret && ret != -EAGAIN)
+		KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: failed addr %llx with rc %x rrc %x",
+			     uvcb.gaddr, *rc, *rrc);
+	return ret;
+}
+
+int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
+		       unsigned long tweak, u16 *rc, u16 *rrc)
+{
+	u64 offset = 0;
+	int ret = 0;
+
+	if (addr & ~PAGE_MASK || !size || size & ~PAGE_MASK)
+		return -EINVAL;
+
+	KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: start addr %lx size %lx",
+		     addr, size);
+
+	while (offset < size) {
+		ret = unpack_one(kvm, addr, tweak, offset, rc, rrc);
+		if (ret == -EAGAIN) {
+			cond_resched();
+			if (fatal_signal_pending(current))
+				break;
+			continue;
+		}
+		if (ret)
+			break;
+		addr += PAGE_SIZE;
+		offset += PAGE_SIZE;
+	}
+	if (!ret)
+		KVM_UV_EVENT(kvm, 3, "%s", "PROTVIRT VM UNPACK: successful");
+	return ret;
+}
+
+int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state)
+{
+	struct uv_cb_cpu_set_state uvcb = {
+		.header.cmd	= UVC_CMD_CPU_SET_STATE,
+		.header.len	= sizeof(uvcb),
+		.cpu_handle	= kvm_s390_pv_cpu_get_handle(vcpu),
+		.state		= state,
+	};
+	int cc;
+
+	cc = uv_call(0, (u64)&uvcb);
+	KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT SET CPU %d STATE %d rc %x rrc %x",
+		     vcpu->vcpu_id, state, uvcb.header.rc, uvcb.header.rrc);
+	if (cc)
+		return -EINVAL;
+	return 0;
+}
+
+int kvm_s390_pv_dump_cpu(struct kvm_vcpu *vcpu, void *buff, u16 *rc, u16 *rrc)
+{
+	struct uv_cb_dump_cpu uvcb = {
+		.header.cmd = UVC_CMD_DUMP_CPU,
+		.header.len = sizeof(uvcb),
+		.cpu_handle = vcpu->arch.pv.handle,
+		.dump_area_origin = (u64)buff,
+	};
+	int cc;
+
+	cc = uv_call_sched(0, (u64)&uvcb);
+	*rc = uvcb.header.rc;
+	*rrc = uvcb.header.rrc;
+	return cc;
+}
+
+/* Size of the cache for the storage state dump data. 1MB for now */
+#define DUMP_BUFF_LEN HPAGE_SIZE
+
+/**
+ * kvm_s390_pv_dump_stor_state
+ *
+ * @kvm: pointer to the guest's KVM struct
+ * @buff_user: Userspace pointer where we will write the results to
+ * @gaddr: Starting absolute guest address for which the storage state
+ *	   is requested.
+ * @buff_user_len: Length of the buff_user buffer
+ * @rc: Pointer to where the uvcb return code is stored
+ * @rrc: Pointer to where the uvcb return reason code is stored
+ *
+ * Stores buff_len bytes of tweak component values to buff_user
+ * starting with the 1MB block specified by the absolute guest address
+ * (gaddr). The gaddr pointer will be updated with the last address
+ * for which data was written when returning to userspace. buff_user
+ * might be written to even if an error rc is returned. For instance
+ * if we encounter a fault after writing the first page of data.
+ *
+ * Context: kvm->lock needs to be held
+ *
+ * Return:
+ *  0 on success
+ *  -ENOMEM if allocating the cache fails
+ *  -EINVAL if gaddr is not aligned to 1MB
+ *  -EINVAL if buff_user_len is not aligned to uv_info.conf_dump_storage_state_len
+ *  -EINVAL if the UV call fails, rc and rrc will be set in this case
+ *  -EFAULT if copying the result to buff_user failed
+ */
+int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user,
+				u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc)
+{
+	struct uv_cb_dump_stor_state uvcb = {
+		.header.cmd = UVC_CMD_DUMP_CONF_STOR_STATE,
+		.header.len = sizeof(uvcb),
+		.config_handle = kvm->arch.pv.handle,
+		.gaddr = *gaddr,
+		.dump_area_origin = 0,
+	};
+	const u64 increment_len = uv_info.conf_dump_storage_state_len;
+	size_t buff_kvm_size;
+	size_t size_done = 0;
+	u8 *buff_kvm = NULL;
+	int cc, ret;
+
+	ret = -EINVAL;
+	/* UV call processes 1MB guest storage chunks at a time */
+	if (!IS_ALIGNED(*gaddr, HPAGE_SIZE))
+		goto out;
+
+	/*
+	 * We provide the storage state for 1MB chunks of guest
+	 * storage. The buffer will need to be aligned to
+	 * conf_dump_storage_state_len so we don't end on a partial
+	 * chunk.
+	 */
+	if (!buff_user_len ||
+	    !IS_ALIGNED(buff_user_len, increment_len))
+		goto out;
+
+	/*
+	 * Allocate a buffer from which we will later copy to the user
+	 * process. We don't want userspace to dictate our buffer size
+	 * so we limit it to DUMP_BUFF_LEN.
+	 */
+	ret = -ENOMEM;
+	buff_kvm_size = min_t(u64, buff_user_len, DUMP_BUFF_LEN);
+	buff_kvm = vzalloc(buff_kvm_size);
+	if (!buff_kvm)
+		goto out;
+
+	ret = 0;
+	uvcb.dump_area_origin = (u64)buff_kvm;
+	/* We will loop until the user buffer is filled or an error occurs */
+	do {
+		/* Get 1MB worth of guest storage state data */
+		cc = uv_call_sched(0, (u64)&uvcb);
+
+		/* All or nothing */
+		if (cc) {
+			ret = -EINVAL;
+			break;
+		}
+
+		size_done += increment_len;
+		uvcb.dump_area_origin += increment_len;
+		buff_user_len -= increment_len;
+		uvcb.gaddr += HPAGE_SIZE;
+
+		/* KVM Buffer full, time to copy to the process */
+		if (!buff_user_len || size_done == DUMP_BUFF_LEN) {
+			if (copy_to_user(buff_user, buff_kvm, size_done)) {
+				ret = -EFAULT;
+				break;
+			}
+
+			buff_user += size_done;
+			size_done = 0;
+			uvcb.dump_area_origin = (u64)buff_kvm;
+		}
+	} while (buff_user_len);
+
+	/* Report back where we ended dumping */
+	*gaddr = uvcb.gaddr;
+
+	/* Lets only log errors, we don't want to spam */
+out:
+	if (ret)
+		KVM_UV_EVENT(kvm, 3,
+			     "PROTVIRT DUMP STORAGE STATE: addr %llx ret %d, uvcb rc %x rrc %x",
+			     uvcb.gaddr, ret, uvcb.header.rc, uvcb.header.rrc);
+	*rc = uvcb.header.rc;
+	*rrc = uvcb.header.rrc;
+	vfree(buff_kvm);
+
+	return ret;
+}
+
+/**
+ * kvm_s390_pv_dump_complete
+ *
+ * @kvm: pointer to the guest's KVM struct
+ * @buff_user: Userspace pointer where we will write the results to
+ * @rc: Pointer to where the uvcb return code is stored
+ * @rrc: Pointer to where the uvcb return reason code is stored
+ *
+ * Completes the dumping operation and writes the completion data to
+ * user space.
+ *
+ * Context: kvm->lock needs to be held
+ *
+ * Return:
+ *  0 on success
+ *  -ENOMEM if allocating the completion buffer fails
+ *  -EINVAL if the UV call fails, rc and rrc will be set in this case
+ *  -EFAULT if copying the result to buff_user failed
+ */
+int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user,
+			      u16 *rc, u16 *rrc)
+{
+	struct uv_cb_dump_complete complete = {
+		.header.len = sizeof(complete),
+		.header.cmd = UVC_CMD_DUMP_COMPLETE,
+		.config_handle = kvm_s390_pv_get_handle(kvm),
+	};
+	u64 *compl_data;
+	int ret;
+
+	/* Allocate dump area */
+	compl_data = vzalloc(uv_info.conf_dump_finalize_len);
+	if (!compl_data)
+		return -ENOMEM;
+	complete.dump_area_origin = (u64)compl_data;
+
+	ret = uv_call_sched(0, (u64)&complete);
+	*rc = complete.header.rc;
+	*rrc = complete.header.rrc;
+	KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP COMPLETE: rc %x rrc %x",
+		     complete.header.rc, complete.header.rrc);
+
+	if (!ret) {
+		/*
+		 * kvm_s390_pv_dealloc_vm() will also (mem)set
+		 * this to false on a reboot or other destroy
+		 * operation for this vm.
+		 */
+		kvm->arch.pv.dumping = false;
+		kvm_s390_vcpu_unblock_all(kvm);
+		ret = copy_to_user(buff_user, compl_data, uv_info.conf_dump_finalize_len);
+		if (ret)
+			ret = -EFAULT;
+	}
+	vfree(compl_data);
+	/* If the UVC returned an error, translate it to -EINVAL */
+	if (ret > 0)
+		ret = -EINVAL;
+	return ret;
+}
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
new file mode 100644
index 0000000000..d9696b5300
--- /dev/null
+++ b/arch/s390/kvm/sigp.c
@@ -0,0 +1,495 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * handling interprocessor communication
+ *
+ * Copyright IBM Corp. 2008, 2013
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ *               Christian Borntraeger <borntraeger@de.ibm.com>
+ *               Christian Ehrhardt <ehrhardt@de.ibm.com>
+ */
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/slab.h>
+#include <asm/sigp.h>
+#include "gaccess.h"
+#include "kvm-s390.h"
+#include "trace.h"
+
+static int __sigp_sense(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
+			u64 *reg)
+{
+	const bool stopped = kvm_s390_test_cpuflags(dst_vcpu, CPUSTAT_STOPPED);
+	int rc;
+	int ext_call_pending;
+
+	ext_call_pending = kvm_s390_ext_call_pending(dst_vcpu);
+	if (!stopped && !ext_call_pending)
+		rc = SIGP_CC_ORDER_CODE_ACCEPTED;
+	else {
+		*reg &= 0xffffffff00000000UL;
+		if (ext_call_pending)
+			*reg |= SIGP_STATUS_EXT_CALL_PENDING;
+		if (stopped)
+			*reg |= SIGP_STATUS_STOPPED;
+		rc = SIGP_CC_STATUS_STORED;
+	}
+
+	VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", dst_vcpu->vcpu_id,
+		   rc);
+	return rc;
+}
+
+static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
+				    struct kvm_vcpu *dst_vcpu)
+{
+	struct kvm_s390_irq irq = {
+		.type = KVM_S390_INT_EMERGENCY,
+		.u.emerg.code = vcpu->vcpu_id,
+	};
+	int rc = 0;
+
+	rc = kvm_s390_inject_vcpu(dst_vcpu, &irq);
+	if (!rc)
+		VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x",
+			   dst_vcpu->vcpu_id);
+
+	return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED;
+}
+
+static int __sigp_emergency(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu)
+{
+	return __inject_sigp_emergency(vcpu, dst_vcpu);
+}
+
+static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu,
+					struct kvm_vcpu *dst_vcpu,
+					u16 asn, u64 *reg)
+{
+	const u64 psw_int_mask = PSW_MASK_IO | PSW_MASK_EXT;
+	u16 p_asn, s_asn;
+	psw_t *psw;
+	bool idle;
+
+	idle = is_vcpu_idle(vcpu);
+	psw = &dst_vcpu->arch.sie_block->gpsw;
+	p_asn = dst_vcpu->arch.sie_block->gcr[4] & 0xffff;  /* Primary ASN */
+	s_asn = dst_vcpu->arch.sie_block->gcr[3] & 0xffff;  /* Secondary ASN */
+
+	/* Inject the emergency signal? */
+	if (!is_vcpu_stopped(vcpu)
+	    || (psw->mask & psw_int_mask) != psw_int_mask
+	    || (idle && psw->addr != 0)
+	    || (!idle && (asn == p_asn || asn == s_asn))) {
+		return __inject_sigp_emergency(vcpu, dst_vcpu);
+	} else {
+		*reg &= 0xffffffff00000000UL;
+		*reg |= SIGP_STATUS_INCORRECT_STATE;
+		return SIGP_CC_STATUS_STORED;
+	}
+}
+
+static int __sigp_external_call(struct kvm_vcpu *vcpu,
+				struct kvm_vcpu *dst_vcpu, u64 *reg)
+{
+	struct kvm_s390_irq irq = {
+		.type = KVM_S390_INT_EXTERNAL_CALL,
+		.u.extcall.code = vcpu->vcpu_id,
+	};
+	int rc;
+
+	rc = kvm_s390_inject_vcpu(dst_vcpu, &irq);
+	if (rc == -EBUSY) {
+		*reg &= 0xffffffff00000000UL;
+		*reg |= SIGP_STATUS_EXT_CALL_PENDING;
+		return SIGP_CC_STATUS_STORED;
+	} else if (rc == 0) {
+		VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x",
+			   dst_vcpu->vcpu_id);
+	}
+
+	return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED;
+}
+
+static int __sigp_stop(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu)
+{
+	struct kvm_s390_irq irq = {
+		.type = KVM_S390_SIGP_STOP,
+	};
+	int rc;
+
+	rc = kvm_s390_inject_vcpu(dst_vcpu, &irq);
+	if (rc == -EBUSY)
+		rc = SIGP_CC_BUSY;
+	else if (rc == 0)
+		VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x",
+			   dst_vcpu->vcpu_id);
+
+	return rc;
+}
+
+static int __sigp_stop_and_store_status(struct kvm_vcpu *vcpu,
+					struct kvm_vcpu *dst_vcpu, u64 *reg)
+{
+	struct kvm_s390_irq irq = {
+		.type = KVM_S390_SIGP_STOP,
+		.u.stop.flags = KVM_S390_STOP_FLAG_STORE_STATUS,
+	};
+	int rc;
+
+	rc = kvm_s390_inject_vcpu(dst_vcpu, &irq);
+	if (rc == -EBUSY)
+		rc = SIGP_CC_BUSY;
+	else if (rc == 0)
+		VCPU_EVENT(vcpu, 4, "sent sigp stop and store status to cpu %x",
+			   dst_vcpu->vcpu_id);
+
+	return rc;
+}
+
+static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter,
+			   u64 *status_reg)
+{
+	*status_reg &= 0xffffffff00000000UL;
+
+	/* Reject set arch order, with czam we're always in z/Arch mode. */
+	*status_reg |= SIGP_STATUS_INVALID_PARAMETER;
+	return SIGP_CC_STATUS_STORED;
+}
+
+static int __sigp_set_prefix(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
+			     u32 address, u64 *reg)
+{
+	struct kvm_s390_irq irq = {
+		.type = KVM_S390_SIGP_SET_PREFIX,
+		.u.prefix.address = address & 0x7fffe000u,
+	};
+	int rc;
+
+	/*
+	 * Make sure the new value is valid memory. We only need to check the
+	 * first page, since address is 8k aligned and memory pieces are always
+	 * at least 1MB aligned and have at least a size of 1MB.
+	 */
+	if (kvm_is_error_gpa(vcpu->kvm, irq.u.prefix.address)) {
+		*reg &= 0xffffffff00000000UL;
+		*reg |= SIGP_STATUS_INVALID_PARAMETER;
+		return SIGP_CC_STATUS_STORED;
+	}
+
+	rc = kvm_s390_inject_vcpu(dst_vcpu, &irq);
+	if (rc == -EBUSY) {
+		*reg &= 0xffffffff00000000UL;
+		*reg |= SIGP_STATUS_INCORRECT_STATE;
+		return SIGP_CC_STATUS_STORED;
+	}
+
+	return rc;
+}
+
+static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu,
+				       struct kvm_vcpu *dst_vcpu,
+				       u32 addr, u64 *reg)
+{
+	int rc;
+
+	if (!kvm_s390_test_cpuflags(dst_vcpu, CPUSTAT_STOPPED)) {
+		*reg &= 0xffffffff00000000UL;
+		*reg |= SIGP_STATUS_INCORRECT_STATE;
+		return SIGP_CC_STATUS_STORED;
+	}
+
+	addr &= 0x7ffffe00;
+	rc = kvm_s390_store_status_unloaded(dst_vcpu, addr);
+	if (rc == -EFAULT) {
+		*reg &= 0xffffffff00000000UL;
+		*reg |= SIGP_STATUS_INVALID_PARAMETER;
+		rc = SIGP_CC_STATUS_STORED;
+	}
+	return rc;
+}
+
+static int __sigp_sense_running(struct kvm_vcpu *vcpu,
+				struct kvm_vcpu *dst_vcpu, u64 *reg)
+{
+	int rc;
+
+	if (!test_kvm_facility(vcpu->kvm, 9)) {
+		*reg &= 0xffffffff00000000UL;
+		*reg |= SIGP_STATUS_INVALID_ORDER;
+		return SIGP_CC_STATUS_STORED;
+	}
+
+	if (kvm_s390_test_cpuflags(dst_vcpu, CPUSTAT_RUNNING)) {
+		/* running */
+		rc = SIGP_CC_ORDER_CODE_ACCEPTED;
+	} else {
+		/* not running */
+		*reg &= 0xffffffff00000000UL;
+		*reg |= SIGP_STATUS_NOT_RUNNING;
+		rc = SIGP_CC_STATUS_STORED;
+	}
+
+	VCPU_EVENT(vcpu, 4, "sensed running status of cpu %x rc %x",
+		   dst_vcpu->vcpu_id, rc);
+
+	return rc;
+}
+
+static int __prepare_sigp_re_start(struct kvm_vcpu *vcpu,
+				   struct kvm_vcpu *dst_vcpu, u8 order_code)
+{
+	struct kvm_s390_local_interrupt *li = &dst_vcpu->arch.local_int;
+	/* handle (RE)START in user space */
+	int rc = -EOPNOTSUPP;
+
+	/* make sure we don't race with STOP irq injection */
+	spin_lock(&li->lock);
+	if (kvm_s390_is_stop_irq_pending(dst_vcpu))
+		rc = SIGP_CC_BUSY;
+	spin_unlock(&li->lock);
+
+	return rc;
+}
+
+static int __prepare_sigp_cpu_reset(struct kvm_vcpu *vcpu,
+				    struct kvm_vcpu *dst_vcpu, u8 order_code)
+{
+	/* handle (INITIAL) CPU RESET in user space */
+	return -EOPNOTSUPP;
+}
+
+static int __prepare_sigp_unknown(struct kvm_vcpu *vcpu,
+				  struct kvm_vcpu *dst_vcpu)
+{
+	/* handle unknown orders in user space */
+	return -EOPNOTSUPP;
+}
+
+static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code,
+			   u16 cpu_addr, u32 parameter, u64 *status_reg)
+{
+	int rc;
+	struct kvm_vcpu *dst_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr);
+
+	if (!dst_vcpu)
+		return SIGP_CC_NOT_OPERATIONAL;
+
+	/*
+	 * SIGP RESTART, SIGP STOP, and SIGP STOP AND STORE STATUS orders
+	 * are processed asynchronously. Until the affected VCPU finishes
+	 * its work and calls back into KVM to clear the (RESTART or STOP)
+	 * interrupt, we need to return any new non-reset orders "busy".
+	 *
+	 * This is important because a single VCPU could issue:
+	 *  1) SIGP STOP $DESTINATION
+	 *  2) SIGP SENSE $DESTINATION
+	 *
+	 * If the SIGP SENSE would not be rejected as "busy", it could
+	 * return an incorrect answer as to whether the VCPU is STOPPED
+	 * or OPERATING.
+	 */
+	if (order_code != SIGP_INITIAL_CPU_RESET &&
+	    order_code != SIGP_CPU_RESET) {
+		/*
+		 * Lockless check. Both SIGP STOP and SIGP (RE)START
+		 * properly synchronize everything while processing
+		 * their orders, while the guest cannot observe a
+		 * difference when issuing other orders from two
+		 * different VCPUs.
+		 */
+		if (kvm_s390_is_stop_irq_pending(dst_vcpu) ||
+		    kvm_s390_is_restart_irq_pending(dst_vcpu))
+			return SIGP_CC_BUSY;
+	}
+
+	switch (order_code) {
+	case SIGP_SENSE:
+		vcpu->stat.instruction_sigp_sense++;
+		rc = __sigp_sense(vcpu, dst_vcpu, status_reg);
+		break;
+	case SIGP_EXTERNAL_CALL:
+		vcpu->stat.instruction_sigp_external_call++;
+		rc = __sigp_external_call(vcpu, dst_vcpu, status_reg);
+		break;
+	case SIGP_EMERGENCY_SIGNAL:
+		vcpu->stat.instruction_sigp_emergency++;
+		rc = __sigp_emergency(vcpu, dst_vcpu);
+		break;
+	case SIGP_STOP:
+		vcpu->stat.instruction_sigp_stop++;
+		rc = __sigp_stop(vcpu, dst_vcpu);
+		break;
+	case SIGP_STOP_AND_STORE_STATUS:
+		vcpu->stat.instruction_sigp_stop_store_status++;
+		rc = __sigp_stop_and_store_status(vcpu, dst_vcpu, status_reg);
+		break;
+	case SIGP_STORE_STATUS_AT_ADDRESS:
+		vcpu->stat.instruction_sigp_store_status++;
+		rc = __sigp_store_status_at_addr(vcpu, dst_vcpu, parameter,
+						 status_reg);
+		break;
+	case SIGP_SET_PREFIX:
+		vcpu->stat.instruction_sigp_prefix++;
+		rc = __sigp_set_prefix(vcpu, dst_vcpu, parameter, status_reg);
+		break;
+	case SIGP_COND_EMERGENCY_SIGNAL:
+		vcpu->stat.instruction_sigp_cond_emergency++;
+		rc = __sigp_conditional_emergency(vcpu, dst_vcpu, parameter,
+						  status_reg);
+		break;
+	case SIGP_SENSE_RUNNING:
+		vcpu->stat.instruction_sigp_sense_running++;
+		rc = __sigp_sense_running(vcpu, dst_vcpu, status_reg);
+		break;
+	case SIGP_START:
+		vcpu->stat.instruction_sigp_start++;
+		rc = __prepare_sigp_re_start(vcpu, dst_vcpu, order_code);
+		break;
+	case SIGP_RESTART:
+		vcpu->stat.instruction_sigp_restart++;
+		rc = __prepare_sigp_re_start(vcpu, dst_vcpu, order_code);
+		break;
+	case SIGP_INITIAL_CPU_RESET:
+		vcpu->stat.instruction_sigp_init_cpu_reset++;
+		rc = __prepare_sigp_cpu_reset(vcpu, dst_vcpu, order_code);
+		break;
+	case SIGP_CPU_RESET:
+		vcpu->stat.instruction_sigp_cpu_reset++;
+		rc = __prepare_sigp_cpu_reset(vcpu, dst_vcpu, order_code);
+		break;
+	default:
+		vcpu->stat.instruction_sigp_unknown++;
+		rc = __prepare_sigp_unknown(vcpu, dst_vcpu);
+	}
+
+	if (rc == -EOPNOTSUPP)
+		VCPU_EVENT(vcpu, 4,
+			   "sigp order %u -> cpu %x: handled in user space",
+			   order_code, dst_vcpu->vcpu_id);
+
+	return rc;
+}
+
+static int handle_sigp_order_in_user_space(struct kvm_vcpu *vcpu, u8 order_code,
+					   u16 cpu_addr)
+{
+	if (!vcpu->kvm->arch.user_sigp)
+		return 0;
+
+	switch (order_code) {
+	case SIGP_SENSE:
+	case SIGP_EXTERNAL_CALL:
+	case SIGP_EMERGENCY_SIGNAL:
+	case SIGP_COND_EMERGENCY_SIGNAL:
+	case SIGP_SENSE_RUNNING:
+		return 0;
+	/* update counters as we're directly dropping to user space */
+	case SIGP_STOP:
+		vcpu->stat.instruction_sigp_stop++;
+		break;
+	case SIGP_STOP_AND_STORE_STATUS:
+		vcpu->stat.instruction_sigp_stop_store_status++;
+		break;
+	case SIGP_STORE_STATUS_AT_ADDRESS:
+		vcpu->stat.instruction_sigp_store_status++;
+		break;
+	case SIGP_STORE_ADDITIONAL_STATUS:
+		vcpu->stat.instruction_sigp_store_adtl_status++;
+		break;
+	case SIGP_SET_PREFIX:
+		vcpu->stat.instruction_sigp_prefix++;
+		break;
+	case SIGP_START:
+		vcpu->stat.instruction_sigp_start++;
+		break;
+	case SIGP_RESTART:
+		vcpu->stat.instruction_sigp_restart++;
+		break;
+	case SIGP_INITIAL_CPU_RESET:
+		vcpu->stat.instruction_sigp_init_cpu_reset++;
+		break;
+	case SIGP_CPU_RESET:
+		vcpu->stat.instruction_sigp_cpu_reset++;
+		break;
+	default:
+		vcpu->stat.instruction_sigp_unknown++;
+	}
+	VCPU_EVENT(vcpu, 3, "SIGP: order %u for CPU %d handled in userspace",
+		   order_code, cpu_addr);
+
+	return 1;
+}
+
+int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
+{
+	int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+	int r3 = vcpu->arch.sie_block->ipa & 0x000f;
+	u32 parameter;
+	u16 cpu_addr = vcpu->run->s.regs.gprs[r3];
+	u8 order_code;
+	int rc;
+
+	/* sigp in userspace can exit */
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	order_code = kvm_s390_get_base_disp_rs(vcpu, NULL);
+	if (handle_sigp_order_in_user_space(vcpu, order_code, cpu_addr))
+		return -EOPNOTSUPP;
+
+	if (r1 % 2)
+		parameter = vcpu->run->s.regs.gprs[r1];
+	else
+		parameter = vcpu->run->s.regs.gprs[r1 + 1];
+
+	trace_kvm_s390_handle_sigp(vcpu, order_code, cpu_addr, parameter);
+	switch (order_code) {
+	case SIGP_SET_ARCHITECTURE:
+		vcpu->stat.instruction_sigp_arch++;
+		rc = __sigp_set_arch(vcpu, parameter,
+				     &vcpu->run->s.regs.gprs[r1]);
+		break;
+	default:
+		rc = handle_sigp_dst(vcpu, order_code, cpu_addr,
+				     parameter,
+				     &vcpu->run->s.regs.gprs[r1]);
+	}
+
+	if (rc < 0)
+		return rc;
+
+	kvm_s390_set_psw_cc(vcpu, rc);
+	return 0;
+}
+
+/*
+ * Handle SIGP partial execution interception.
+ *
+ * This interception will occur at the source cpu when a source cpu sends an
+ * external call to a target cpu and the target cpu has the WAIT bit set in
+ * its cpuflags. Interception will occur after the interrupt indicator bits at
+ * the target cpu have been set. All error cases will lead to instruction
+ * interception, therefore nothing is to be checked or prepared.
+ */
+int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu)
+{
+	int r3 = vcpu->arch.sie_block->ipa & 0x000f;
+	u16 cpu_addr = vcpu->run->s.regs.gprs[r3];
+	struct kvm_vcpu *dest_vcpu;
+	u8 order_code = kvm_s390_get_base_disp_rs(vcpu, NULL);
+
+	if (order_code == SIGP_EXTERNAL_CALL) {
+		trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr);
+
+		dest_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr);
+		BUG_ON(dest_vcpu == NULL);
+
+		kvm_s390_vcpu_wakeup(dest_vcpu);
+		kvm_s390_set_psw_cc(vcpu, SIGP_CC_ORDER_CODE_ACCEPTED);
+		return 0;
+	}
+
+	return -EOPNOTSUPP;
+}
diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h
new file mode 100644
index 0000000000..6f0209d451
--- /dev/null
+++ b/arch/s390/kvm/trace-s390.h
@@ -0,0 +1,340 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(_TRACE_KVMS390_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVMS390_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm-s390
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace-s390
+
+/*
+ * The TRACE_SYSTEM_VAR defaults to TRACE_SYSTEM, but must be a
+ * legitimate C variable. It is not exported to user space.
+ */
+#undef TRACE_SYSTEM_VAR
+#define TRACE_SYSTEM_VAR kvm_s390
+
+/*
+ * Trace point for the creation of the kvm instance.
+ */
+TRACE_EVENT(kvm_s390_create_vm,
+	    TP_PROTO(unsigned long type),
+	    TP_ARGS(type),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned long, type)
+		    ),
+
+	    TP_fast_assign(
+		    __entry->type = type;
+		    ),
+
+	    TP_printk("create vm%s",
+		      __entry->type & KVM_VM_S390_UCONTROL ? " (UCONTROL)" : "")
+	);
+
+/*
+ * Trace points for creation and destruction of vpcus.
+ */
+TRACE_EVENT(kvm_s390_create_vcpu,
+	    TP_PROTO(unsigned int id, struct kvm_vcpu *vcpu,
+		     struct kvm_s390_sie_block *sie_block),
+	    TP_ARGS(id, vcpu, sie_block),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int, id)
+		    __field(struct kvm_vcpu *, vcpu)
+		    __field(struct kvm_s390_sie_block *, sie_block)
+		    ),
+
+	    TP_fast_assign(
+		    __entry->id = id;
+		    __entry->vcpu = vcpu;
+		    __entry->sie_block = sie_block;
+		    ),
+
+	    TP_printk("create cpu %d at 0x%pK, sie block at 0x%pK",
+		      __entry->id, __entry->vcpu, __entry->sie_block)
+	);
+
+TRACE_EVENT(kvm_s390_destroy_vcpu,
+	    TP_PROTO(unsigned int id),
+	    TP_ARGS(id),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int, id)
+		    ),
+
+	    TP_fast_assign(
+		    __entry->id = id;
+		    ),
+
+	    TP_printk("destroy cpu %d", __entry->id)
+	);
+
+/*
+ * Trace point for start and stop of vpcus.
+ */
+TRACE_EVENT(kvm_s390_vcpu_start_stop,
+	    TP_PROTO(unsigned int id, int state),
+	    TP_ARGS(id, state),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int, id)
+		    __field(int, state)
+		    ),
+
+	    TP_fast_assign(
+		    __entry->id = id;
+		    __entry->state = state;
+		    ),
+
+	    TP_printk("%s cpu %d", __entry->state ? "starting" : "stopping",
+		      __entry->id)
+	);
+
+/*
+ * Trace points for injection of interrupts, either per machine or
+ * per vcpu.
+ */
+
+#define kvm_s390_int_type						\
+	{KVM_S390_SIGP_STOP, "sigp stop"},				\
+	{KVM_S390_PROGRAM_INT, "program interrupt"},			\
+	{KVM_S390_SIGP_SET_PREFIX, "sigp set prefix"},			\
+	{KVM_S390_RESTART, "sigp restart"},				\
+	{KVM_S390_INT_PFAULT_INIT, "pfault init"},			\
+	{KVM_S390_INT_PFAULT_DONE, "pfault done"},			\
+	{KVM_S390_MCHK, "machine check"},				\
+	{KVM_S390_INT_CLOCK_COMP, "clock comparator"},			\
+	{KVM_S390_INT_CPU_TIMER, "cpu timer"},				\
+	{KVM_S390_INT_VIRTIO, "virtio interrupt"},			\
+	{KVM_S390_INT_SERVICE, "sclp interrupt"},			\
+	{KVM_S390_INT_EMERGENCY, "sigp emergency"},			\
+	{KVM_S390_INT_EXTERNAL_CALL, "sigp ext call"}
+
+#define get_irq_name(__type) \
+	(__type > KVM_S390_INT_IO_MAX ? \
+	__print_symbolic(__type, kvm_s390_int_type) : \
+		(__type & KVM_S390_INT_IO_AI_MASK ? \
+		 "adapter I/O interrupt" : "subchannel I/O interrupt"))
+
+TRACE_EVENT(kvm_s390_inject_vm,
+	    TP_PROTO(__u64 type, __u32 parm, __u64 parm64, int who),
+	    TP_ARGS(type, parm, parm64, who),
+
+	    TP_STRUCT__entry(
+		    __field(__u32, inttype)
+		    __field(__u32, parm)
+		    __field(__u64, parm64)
+		    __field(int, who)
+		    ),
+
+	    TP_fast_assign(
+		    __entry->inttype = type & 0x00000000ffffffff;
+		    __entry->parm = parm;
+		    __entry->parm64 = parm64;
+		    __entry->who = who;
+		    ),
+
+	    TP_printk("inject%s: type:%x (%s) parm:%x parm64:%llx",
+		      (__entry->who == 1) ? " (from kernel)" :
+		      (__entry->who == 2) ? " (from user)" : "",
+		      __entry->inttype, get_irq_name(__entry->inttype),
+		      __entry->parm, __entry->parm64)
+	);
+
+TRACE_EVENT(kvm_s390_inject_vcpu,
+	    TP_PROTO(unsigned int id, __u64 type, __u32 parm, __u64 parm64),
+	    TP_ARGS(id, type, parm, parm64),
+
+	    TP_STRUCT__entry(
+		    __field(int, id)
+		    __field(__u32, inttype)
+		    __field(__u32, parm)
+		    __field(__u64, parm64)
+		    ),
+
+	    TP_fast_assign(
+		    __entry->id = id;
+		    __entry->inttype = type & 0x00000000ffffffff;
+		    __entry->parm = parm;
+		    __entry->parm64 = parm64;
+		    ),
+
+	    TP_printk("inject (vcpu %d): type:%x (%s) parm:%x parm64:%llx",
+		      __entry->id, __entry->inttype,
+		      get_irq_name(__entry->inttype), __entry->parm,
+		      __entry->parm64)
+	);
+
+/*
+ * Trace point for the actual delivery of interrupts.
+ */
+TRACE_EVENT(kvm_s390_deliver_interrupt,
+	    TP_PROTO(unsigned int id, __u64 type, __u64 data0, __u64 data1),
+	    TP_ARGS(id, type, data0, data1),
+
+	    TP_STRUCT__entry(
+		    __field(int, id)
+		    __field(__u32, inttype)
+		    __field(__u64, data0)
+		    __field(__u64, data1)
+		    ),
+
+	    TP_fast_assign(
+		    __entry->id = id;
+		    __entry->inttype = type & 0x00000000ffffffff;
+		    __entry->data0 = data0;
+		    __entry->data1 = data1;
+		    ),
+
+	    TP_printk("deliver interrupt (vcpu %d): type:%x (%s) "	\
+		      "data:%08llx %016llx",
+		      __entry->id, __entry->inttype,
+		      get_irq_name(__entry->inttype), __entry->data0,
+		      __entry->data1)
+	);
+
+/*
+ * Trace point for resets that may be requested from userspace.
+ */
+TRACE_EVENT(kvm_s390_request_resets,
+	    TP_PROTO(__u64 resets),
+	    TP_ARGS(resets),
+
+	    TP_STRUCT__entry(
+		    __field(__u64, resets)
+		    ),
+
+	    TP_fast_assign(
+		    __entry->resets = resets;
+		    ),
+
+	    TP_printk("requesting userspace resets %llx",
+		      __entry->resets)
+	);
+
+/*
+ * Trace point for a vcpu's stop requests.
+ */
+TRACE_EVENT(kvm_s390_stop_request,
+	    TP_PROTO(unsigned char stop_irq, unsigned char flags),
+	    TP_ARGS(stop_irq, flags),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned char, stop_irq)
+		    __field(unsigned char, flags)
+		    ),
+
+	    TP_fast_assign(
+		    __entry->stop_irq = stop_irq;
+		    __entry->flags = flags;
+		    ),
+
+	    TP_printk("stop request, stop irq = %u, flags = %08x",
+		      __entry->stop_irq, __entry->flags)
+	);
+
+
+/*
+ * Trace point for enabling channel I/O instruction support.
+ */
+TRACE_EVENT(kvm_s390_enable_css,
+	    TP_PROTO(void *kvm),
+	    TP_ARGS(kvm),
+
+	    TP_STRUCT__entry(
+		    __field(void *, kvm)
+		    ),
+
+	    TP_fast_assign(
+		    __entry->kvm = kvm;
+		    ),
+
+	    TP_printk("enabling channel I/O support (kvm @ %pK)\n",
+		      __entry->kvm)
+	);
+
+/*
+ * Trace point for enabling and disabling interlocking-and-broadcasting
+ * suppression.
+ */
+TRACE_EVENT(kvm_s390_enable_disable_ibs,
+	    TP_PROTO(unsigned int id, int state),
+	    TP_ARGS(id, state),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int, id)
+		    __field(int, state)
+		    ),
+
+	    TP_fast_assign(
+		    __entry->id = id;
+		    __entry->state = state;
+		    ),
+
+	    TP_printk("%s ibs on cpu %d",
+		      __entry->state ? "enabling" : "disabling", __entry->id)
+	);
+
+/*
+ * Trace point for modifying ais mode for a given isc.
+ */
+TRACE_EVENT(kvm_s390_modify_ais_mode,
+	    TP_PROTO(__u8 isc, __u16 from, __u16 to),
+	    TP_ARGS(isc, from, to),
+
+	    TP_STRUCT__entry(
+		    __field(__u8, isc)
+		    __field(__u16, from)
+		    __field(__u16, to)
+		    ),
+
+	    TP_fast_assign(
+		    __entry->isc = isc;
+		    __entry->from = from;
+		    __entry->to = to;
+		    ),
+
+	    TP_printk("for isc %x, modifying interruption mode from %s to %s",
+		      __entry->isc,
+		      (__entry->from == KVM_S390_AIS_MODE_ALL) ?
+		      "ALL-Interruptions Mode" :
+		      (__entry->from == KVM_S390_AIS_MODE_SINGLE) ?
+		      "Single-Interruption Mode" : "No-Interruptions Mode",
+		      (__entry->to == KVM_S390_AIS_MODE_ALL) ?
+		      "ALL-Interruptions Mode" :
+		      (__entry->to == KVM_S390_AIS_MODE_SINGLE) ?
+		      "Single-Interruption Mode" : "No-Interruptions Mode")
+	);
+
+/*
+ * Trace point for suppressed adapter I/O interrupt.
+ */
+TRACE_EVENT(kvm_s390_airq_suppressed,
+	    TP_PROTO(__u32 id, __u8 isc),
+	    TP_ARGS(id, isc),
+
+	    TP_STRUCT__entry(
+		    __field(__u32, id)
+		    __field(__u8, isc)
+		    ),
+
+	    TP_fast_assign(
+		    __entry->id = id;
+		    __entry->isc = isc;
+		    ),
+
+	    TP_printk("adapter I/O interrupt suppressed (id:%x isc:%x)",
+		      __entry->id, __entry->isc)
+	);
+
+
+#endif /* _TRACE_KVMS390_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h
new file mode 100644
index 0000000000..aa419eb6a0
--- /dev/null
+++ b/arch/s390/kvm/trace.h
@@ -0,0 +1,462 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_H
+
+#include <linux/tracepoint.h>
+#include <asm/sie.h>
+#include <asm/debug.h>
+#include <asm/dis.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+
+/*
+ * Helpers for vcpu-specific tracepoints containing the same information
+ * as s390dbf VCPU_EVENTs.
+ */
+#define VCPU_PROTO_COMMON struct kvm_vcpu *vcpu
+#define VCPU_ARGS_COMMON vcpu
+#define VCPU_FIELD_COMMON __field(int, id)			\
+	__field(unsigned long, pswmask)				\
+	__field(unsigned long, pswaddr)
+#define VCPU_ASSIGN_COMMON do {						\
+	__entry->id = vcpu->vcpu_id;					\
+	__entry->pswmask = vcpu->arch.sie_block->gpsw.mask;		\
+	__entry->pswaddr = vcpu->arch.sie_block->gpsw.addr;		\
+	} while (0);
+#define VCPU_TP_PRINTK(p_str, p_args...)				\
+	TP_printk("%02d[%016lx-%016lx]: " p_str, __entry->id,		\
+		  __entry->pswmask, __entry->pswaddr, p_args)
+
+TRACE_EVENT(kvm_s390_skey_related_inst,
+	    TP_PROTO(VCPU_PROTO_COMMON),
+	    TP_ARGS(VCPU_ARGS_COMMON),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    ),
+	    VCPU_TP_PRINTK("%s", "storage key related instruction")
+	);
+
+TRACE_EVENT(kvm_s390_major_guest_pfault,
+	    TP_PROTO(VCPU_PROTO_COMMON),
+	    TP_ARGS(VCPU_ARGS_COMMON),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    ),
+	    VCPU_TP_PRINTK("%s", "major fault, maybe applicable for pfault")
+	);
+
+TRACE_EVENT(kvm_s390_pfault_init,
+	    TP_PROTO(VCPU_PROTO_COMMON, long pfault_token),
+	    TP_ARGS(VCPU_ARGS_COMMON, pfault_token),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(long, pfault_token)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->pfault_token = pfault_token;
+		    ),
+	    VCPU_TP_PRINTK("init pfault token %ld", __entry->pfault_token)
+	);
+
+TRACE_EVENT(kvm_s390_pfault_done,
+	    TP_PROTO(VCPU_PROTO_COMMON, long pfault_token),
+	    TP_ARGS(VCPU_ARGS_COMMON, pfault_token),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(long, pfault_token)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->pfault_token = pfault_token;
+		    ),
+	    VCPU_TP_PRINTK("done pfault token %ld", __entry->pfault_token)
+	);
+
+/*
+ * Tracepoints for SIE entry and exit.
+ */
+TRACE_EVENT(kvm_s390_sie_enter,
+	    TP_PROTO(VCPU_PROTO_COMMON, int cpuflags),
+	    TP_ARGS(VCPU_ARGS_COMMON, cpuflags),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(int, cpuflags)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->cpuflags = cpuflags;
+		    ),
+
+	    VCPU_TP_PRINTK("entering sie flags %x", __entry->cpuflags)
+	);
+
+TRACE_EVENT(kvm_s390_sie_fault,
+	    TP_PROTO(VCPU_PROTO_COMMON),
+	    TP_ARGS(VCPU_ARGS_COMMON),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    ),
+
+	    VCPU_TP_PRINTK("%s", "fault in sie instruction")
+	);
+
+TRACE_EVENT(kvm_s390_sie_exit,
+	    TP_PROTO(VCPU_PROTO_COMMON, u8 icptcode),
+	    TP_ARGS(VCPU_ARGS_COMMON, icptcode),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(u8, icptcode)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->icptcode = icptcode;
+		    ),
+
+	    VCPU_TP_PRINTK("exit sie icptcode %d (%s)", __entry->icptcode,
+			   __print_symbolic(__entry->icptcode,
+					    sie_intercept_code))
+	);
+
+/*
+ * Trace point for intercepted instructions.
+ */
+TRACE_EVENT(kvm_s390_intercept_instruction,
+	    TP_PROTO(VCPU_PROTO_COMMON, __u16 ipa, __u32 ipb),
+	    TP_ARGS(VCPU_ARGS_COMMON, ipa, ipb),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(__u64, instruction)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->instruction = ((__u64)ipa << 48) |
+		    ((__u64)ipb << 16);
+		    ),
+
+	    VCPU_TP_PRINTK("intercepted instruction %016llx (%s)",
+			   __entry->instruction,
+			   __print_symbolic(icpt_insn_decoder(__entry->instruction),
+					    icpt_insn_codes))
+	);
+
+/*
+ * Trace point for intercepted program interruptions.
+ */
+TRACE_EVENT(kvm_s390_intercept_prog,
+	    TP_PROTO(VCPU_PROTO_COMMON, __u16 code),
+	    TP_ARGS(VCPU_ARGS_COMMON, code),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(__u16, code)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->code = code;
+		    ),
+
+	    VCPU_TP_PRINTK("intercepted program interruption %04x (%s)",
+			   __entry->code,
+			   __print_symbolic(__entry->code,
+					    icpt_prog_codes))
+	);
+
+/*
+ * Trace point for validity intercepts.
+ */
+TRACE_EVENT(kvm_s390_intercept_validity,
+	    TP_PROTO(VCPU_PROTO_COMMON, __u16 viwhy),
+	    TP_ARGS(VCPU_ARGS_COMMON, viwhy),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(__u16, viwhy)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->viwhy = viwhy;
+		    ),
+
+	    VCPU_TP_PRINTK("got validity intercept %04x", __entry->viwhy)
+	);
+
+/*
+ * Trace points for instructions that are of special interest.
+ */
+
+TRACE_EVENT(kvm_s390_handle_sigp,
+	    TP_PROTO(VCPU_PROTO_COMMON, __u8 order_code, __u16 cpu_addr, \
+		     __u32 parameter),
+	    TP_ARGS(VCPU_ARGS_COMMON, order_code, cpu_addr, parameter),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(__u8, order_code)
+		    __field(__u16, cpu_addr)
+		    __field(__u32, parameter)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->order_code = order_code;
+		    __entry->cpu_addr = cpu_addr;
+		    __entry->parameter = parameter;
+		    ),
+
+	    VCPU_TP_PRINTK("handle sigp order %02x (%s), cpu address %04x, " \
+			   "parameter %08x", __entry->order_code,
+			   __print_symbolic(__entry->order_code,
+					    sigp_order_codes),
+			   __entry->cpu_addr, __entry->parameter)
+	);
+
+TRACE_EVENT(kvm_s390_handle_sigp_pei,
+	    TP_PROTO(VCPU_PROTO_COMMON, __u8 order_code, __u16 cpu_addr),
+	    TP_ARGS(VCPU_ARGS_COMMON, order_code, cpu_addr),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(__u8, order_code)
+		    __field(__u16, cpu_addr)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->order_code = order_code;
+		    __entry->cpu_addr = cpu_addr;
+		    ),
+
+	    VCPU_TP_PRINTK("handle sigp pei order %02x (%s), cpu address %04x",
+			   __entry->order_code,
+			   __print_symbolic(__entry->order_code,
+					    sigp_order_codes),
+			   __entry->cpu_addr)
+	);
+
+TRACE_EVENT(kvm_s390_handle_diag,
+	    TP_PROTO(VCPU_PROTO_COMMON, __u16 code),
+	    TP_ARGS(VCPU_ARGS_COMMON, code),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(__u16, code)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->code = code;
+		    ),
+
+	    VCPU_TP_PRINTK("handle diagnose call %04x (%s)", __entry->code,
+			   __print_symbolic(__entry->code, diagnose_codes))
+	);
+
+TRACE_EVENT(kvm_s390_handle_lctl,
+	    TP_PROTO(VCPU_PROTO_COMMON, int g, int reg1, int reg3, u64 addr),
+	    TP_ARGS(VCPU_ARGS_COMMON, g, reg1, reg3, addr),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(int, g)
+		    __field(int, reg1)
+		    __field(int, reg3)
+		    __field(u64, addr)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->g = g;
+		    __entry->reg1 = reg1;
+		    __entry->reg3 = reg3;
+		    __entry->addr = addr;
+		    ),
+
+	    VCPU_TP_PRINTK("%s: loading cr %x-%x from %016llx",
+			   __entry->g ? "lctlg" : "lctl",
+			   __entry->reg1, __entry->reg3, __entry->addr)
+	);
+
+TRACE_EVENT(kvm_s390_handle_stctl,
+	    TP_PROTO(VCPU_PROTO_COMMON, int g, int reg1, int reg3, u64 addr),
+	    TP_ARGS(VCPU_ARGS_COMMON, g, reg1, reg3, addr),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(int, g)
+		    __field(int, reg1)
+		    __field(int, reg3)
+		    __field(u64, addr)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->g = g;
+		    __entry->reg1 = reg1;
+		    __entry->reg3 = reg3;
+		    __entry->addr = addr;
+		    ),
+
+	    VCPU_TP_PRINTK("%s: storing cr %x-%x to %016llx",
+			   __entry->g ? "stctg" : "stctl",
+			   __entry->reg1, __entry->reg3, __entry->addr)
+	);
+
+TRACE_EVENT(kvm_s390_handle_prefix,
+	    TP_PROTO(VCPU_PROTO_COMMON, int set, u32 address),
+	    TP_ARGS(VCPU_ARGS_COMMON, set, address),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(int, set)
+		    __field(u32, address)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->set = set;
+		    __entry->address = address;
+		    ),
+
+	    VCPU_TP_PRINTK("%s prefix to %08x",
+			   __entry->set ? "setting" : "storing",
+			   __entry->address)
+	);
+
+TRACE_EVENT(kvm_s390_handle_stap,
+	    TP_PROTO(VCPU_PROTO_COMMON, u64 address),
+	    TP_ARGS(VCPU_ARGS_COMMON, address),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(u64, address)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->address = address;
+		    ),
+
+	    VCPU_TP_PRINTK("storing cpu address to %016llx",
+			   __entry->address)
+	);
+
+TRACE_EVENT(kvm_s390_handle_stfl,
+	    TP_PROTO(VCPU_PROTO_COMMON, unsigned int facility_list),
+	    TP_ARGS(VCPU_ARGS_COMMON, facility_list),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(unsigned int, facility_list)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->facility_list = facility_list;
+		    ),
+
+	    VCPU_TP_PRINTK("store facility list value %08x",
+			   __entry->facility_list)
+	);
+
+TRACE_EVENT(kvm_s390_handle_stsi,
+	    TP_PROTO(VCPU_PROTO_COMMON, int fc, int sel1, int sel2, u64 addr),
+	    TP_ARGS(VCPU_ARGS_COMMON, fc, sel1, sel2, addr),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(int, fc)
+		    __field(int, sel1)
+		    __field(int, sel2)
+		    __field(u64, addr)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->fc = fc;
+		    __entry->sel1 = sel1;
+		    __entry->sel2 = sel2;
+		    __entry->addr = addr;
+		    ),
+
+	    VCPU_TP_PRINTK("STSI %d.%d.%d information stored to %016llx",
+			   __entry->fc, __entry->sel1, __entry->sel2,
+			   __entry->addr)
+	);
+
+TRACE_EVENT(kvm_s390_handle_operexc,
+	    TP_PROTO(VCPU_PROTO_COMMON, __u16 ipa, __u32 ipb),
+	    TP_ARGS(VCPU_ARGS_COMMON, ipa, ipb),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(__u64, instruction)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->instruction = ((__u64)ipa << 48) |
+		    ((__u64)ipb << 16);
+		    ),
+
+	    VCPU_TP_PRINTK("operation exception on instruction %016llx (%s)",
+			   __entry->instruction,
+			   __print_symbolic(icpt_insn_decoder(__entry->instruction),
+					    icpt_insn_codes))
+	);
+
+TRACE_EVENT(kvm_s390_handle_sthyi,
+	    TP_PROTO(VCPU_PROTO_COMMON, u64 code, u64 addr),
+	    TP_ARGS(VCPU_ARGS_COMMON, code, addr),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(u64, code)
+		    __field(u64, addr)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->code = code;
+		    __entry->addr = addr;
+		    ),
+
+	    VCPU_TP_PRINTK("STHYI fc: %llu addr: %016llx",
+			   __entry->code, __entry->addr)
+	);
+
+#endif /* _TRACE_KVM_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
new file mode 100644
index 0000000000..e55f489e1f
--- /dev/null
+++ b/arch/s390/kvm/vsie.c
@@ -0,0 +1,1488 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * kvm nested virtualization support for s390x
+ *
+ * Copyright IBM Corp. 2016, 2018
+ *
+ *    Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
+ */
+#include <linux/vmalloc.h>
+#include <linux/kvm_host.h>
+#include <linux/bug.h>
+#include <linux/list.h>
+#include <linux/bitmap.h>
+#include <linux/sched/signal.h>
+
+#include <asm/gmap.h>
+#include <asm/mmu_context.h>
+#include <asm/sclp.h>
+#include <asm/nmi.h>
+#include <asm/dis.h>
+#include <asm/fpu/api.h>
+#include "kvm-s390.h"
+#include "gaccess.h"
+
+struct vsie_page {
+	struct kvm_s390_sie_block scb_s;	/* 0x0000 */
+	/*
+	 * the backup info for machine check. ensure it's at
+	 * the same offset as that in struct sie_page!
+	 */
+	struct mcck_volatile_info mcck_info;    /* 0x0200 */
+	/*
+	 * The pinned original scb. Be aware that other VCPUs can modify
+	 * it while we read from it. Values that are used for conditions or
+	 * are reused conditionally, should be accessed via READ_ONCE.
+	 */
+	struct kvm_s390_sie_block *scb_o;	/* 0x0218 */
+	/* the shadow gmap in use by the vsie_page */
+	struct gmap *gmap;			/* 0x0220 */
+	/* address of the last reported fault to guest2 */
+	unsigned long fault_addr;		/* 0x0228 */
+	/* calculated guest addresses of satellite control blocks */
+	gpa_t sca_gpa;				/* 0x0230 */
+	gpa_t itdba_gpa;			/* 0x0238 */
+	gpa_t gvrd_gpa;				/* 0x0240 */
+	gpa_t riccbd_gpa;			/* 0x0248 */
+	gpa_t sdnx_gpa;				/* 0x0250 */
+	__u8 reserved[0x0700 - 0x0258];		/* 0x0258 */
+	struct kvm_s390_crypto_cb crycb;	/* 0x0700 */
+	__u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE];	/* 0x0800 */
+};
+
+/* trigger a validity icpt for the given scb */
+static int set_validity_icpt(struct kvm_s390_sie_block *scb,
+			     __u16 reason_code)
+{
+	scb->ipa = 0x1000;
+	scb->ipb = ((__u32) reason_code) << 16;
+	scb->icptcode = ICPT_VALIDITY;
+	return 1;
+}
+
+/* mark the prefix as unmapped, this will block the VSIE */
+static void prefix_unmapped(struct vsie_page *vsie_page)
+{
+	atomic_or(PROG_REQUEST, &vsie_page->scb_s.prog20);
+}
+
+/* mark the prefix as unmapped and wait until the VSIE has been left */
+static void prefix_unmapped_sync(struct vsie_page *vsie_page)
+{
+	prefix_unmapped(vsie_page);
+	if (vsie_page->scb_s.prog0c & PROG_IN_SIE)
+		atomic_or(CPUSTAT_STOP_INT, &vsie_page->scb_s.cpuflags);
+	while (vsie_page->scb_s.prog0c & PROG_IN_SIE)
+		cpu_relax();
+}
+
+/* mark the prefix as mapped, this will allow the VSIE to run */
+static void prefix_mapped(struct vsie_page *vsie_page)
+{
+	atomic_andnot(PROG_REQUEST, &vsie_page->scb_s.prog20);
+}
+
+/* test if the prefix is mapped into the gmap shadow */
+static int prefix_is_mapped(struct vsie_page *vsie_page)
+{
+	return !(atomic_read(&vsie_page->scb_s.prog20) & PROG_REQUEST);
+}
+
+/* copy the updated intervention request bits into the shadow scb */
+static void update_intervention_requests(struct vsie_page *vsie_page)
+{
+	const int bits = CPUSTAT_STOP_INT | CPUSTAT_IO_INT | CPUSTAT_EXT_INT;
+	int cpuflags;
+
+	cpuflags = atomic_read(&vsie_page->scb_o->cpuflags);
+	atomic_andnot(bits, &vsie_page->scb_s.cpuflags);
+	atomic_or(cpuflags & bits, &vsie_page->scb_s.cpuflags);
+}
+
+/* shadow (filter and validate) the cpuflags  */
+static int prepare_cpuflags(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+{
+	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+	struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
+	int newflags, cpuflags = atomic_read(&scb_o->cpuflags);
+
+	/* we don't allow ESA/390 guests */
+	if (!(cpuflags & CPUSTAT_ZARCH))
+		return set_validity_icpt(scb_s, 0x0001U);
+
+	if (cpuflags & (CPUSTAT_RRF | CPUSTAT_MCDS))
+		return set_validity_icpt(scb_s, 0x0001U);
+	else if (cpuflags & (CPUSTAT_SLSV | CPUSTAT_SLSR))
+		return set_validity_icpt(scb_s, 0x0007U);
+
+	/* intervention requests will be set later */
+	newflags = CPUSTAT_ZARCH;
+	if (cpuflags & CPUSTAT_GED && test_kvm_facility(vcpu->kvm, 8))
+		newflags |= CPUSTAT_GED;
+	if (cpuflags & CPUSTAT_GED2 && test_kvm_facility(vcpu->kvm, 78)) {
+		if (cpuflags & CPUSTAT_GED)
+			return set_validity_icpt(scb_s, 0x0001U);
+		newflags |= CPUSTAT_GED2;
+	}
+	if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_GPERE))
+		newflags |= cpuflags & CPUSTAT_P;
+	if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_GSLS))
+		newflags |= cpuflags & CPUSTAT_SM;
+	if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IBS))
+		newflags |= cpuflags & CPUSTAT_IBS;
+	if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_KSS))
+		newflags |= cpuflags & CPUSTAT_KSS;
+
+	atomic_set(&scb_s->cpuflags, newflags);
+	return 0;
+}
+/* Copy to APCB FORMAT1 from APCB FORMAT0 */
+static int setup_apcb10(struct kvm_vcpu *vcpu, struct kvm_s390_apcb1 *apcb_s,
+			unsigned long crycb_gpa, struct kvm_s390_apcb1 *apcb_h)
+{
+	struct kvm_s390_apcb0 tmp;
+	unsigned long apcb_gpa;
+
+	apcb_gpa = crycb_gpa + offsetof(struct kvm_s390_crypto_cb, apcb0);
+
+	if (read_guest_real(vcpu, apcb_gpa, &tmp,
+			    sizeof(struct kvm_s390_apcb0)))
+		return -EFAULT;
+
+	apcb_s->apm[0] = apcb_h->apm[0] & tmp.apm[0];
+	apcb_s->aqm[0] = apcb_h->aqm[0] & tmp.aqm[0] & 0xffff000000000000UL;
+	apcb_s->adm[0] = apcb_h->adm[0] & tmp.adm[0] & 0xffff000000000000UL;
+
+	return 0;
+
+}
+
+/**
+ * setup_apcb00 - Copy to APCB FORMAT0 from APCB FORMAT0
+ * @vcpu: pointer to the virtual CPU
+ * @apcb_s: pointer to start of apcb in the shadow crycb
+ * @crycb_gpa: guest physical address to start of original guest crycb
+ * @apcb_h: pointer to start of apcb in the guest1
+ *
+ * Returns 0 and -EFAULT on error reading guest apcb
+ */
+static int setup_apcb00(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
+			unsigned long crycb_gpa, unsigned long *apcb_h)
+{
+	unsigned long apcb_gpa;
+
+	apcb_gpa = crycb_gpa + offsetof(struct kvm_s390_crypto_cb, apcb0);
+
+	if (read_guest_real(vcpu, apcb_gpa, apcb_s,
+			    sizeof(struct kvm_s390_apcb0)))
+		return -EFAULT;
+
+	bitmap_and(apcb_s, apcb_s, apcb_h,
+		   BITS_PER_BYTE * sizeof(struct kvm_s390_apcb0));
+
+	return 0;
+}
+
+/**
+ * setup_apcb11 - Copy the FORMAT1 APCB from the guest to the shadow CRYCB
+ * @vcpu: pointer to the virtual CPU
+ * @apcb_s: pointer to start of apcb in the shadow crycb
+ * @crycb_gpa: guest physical address to start of original guest crycb
+ * @apcb_h: pointer to start of apcb in the host
+ *
+ * Returns 0 and -EFAULT on error reading guest apcb
+ */
+static int setup_apcb11(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
+			unsigned long crycb_gpa,
+			unsigned long *apcb_h)
+{
+	unsigned long apcb_gpa;
+
+	apcb_gpa = crycb_gpa + offsetof(struct kvm_s390_crypto_cb, apcb1);
+
+	if (read_guest_real(vcpu, apcb_gpa, apcb_s,
+			    sizeof(struct kvm_s390_apcb1)))
+		return -EFAULT;
+
+	bitmap_and(apcb_s, apcb_s, apcb_h,
+		   BITS_PER_BYTE * sizeof(struct kvm_s390_apcb1));
+
+	return 0;
+}
+
+/**
+ * setup_apcb - Create a shadow copy of the apcb.
+ * @vcpu: pointer to the virtual CPU
+ * @crycb_s: pointer to shadow crycb
+ * @crycb_gpa: guest physical address of original guest crycb
+ * @crycb_h: pointer to the host crycb
+ * @fmt_o: format of the original guest crycb.
+ * @fmt_h: format of the host crycb.
+ *
+ * Checks the compatibility between the guest and host crycb and calls the
+ * appropriate copy function.
+ *
+ * Return 0 or an error number if the guest and host crycb are incompatible.
+ */
+static int setup_apcb(struct kvm_vcpu *vcpu, struct kvm_s390_crypto_cb *crycb_s,
+	       const u32 crycb_gpa,
+	       struct kvm_s390_crypto_cb *crycb_h,
+	       int fmt_o, int fmt_h)
+{
+	switch (fmt_o) {
+	case CRYCB_FORMAT2:
+		if ((crycb_gpa & PAGE_MASK) != ((crycb_gpa + 256) & PAGE_MASK))
+			return -EACCES;
+		if (fmt_h != CRYCB_FORMAT2)
+			return -EINVAL;
+		return setup_apcb11(vcpu, (unsigned long *)&crycb_s->apcb1,
+				    crycb_gpa,
+				    (unsigned long *)&crycb_h->apcb1);
+	case CRYCB_FORMAT1:
+		switch (fmt_h) {
+		case CRYCB_FORMAT2:
+			return setup_apcb10(vcpu, &crycb_s->apcb1,
+					    crycb_gpa,
+					    &crycb_h->apcb1);
+		case CRYCB_FORMAT1:
+			return setup_apcb00(vcpu,
+					    (unsigned long *) &crycb_s->apcb0,
+					    crycb_gpa,
+					    (unsigned long *) &crycb_h->apcb0);
+		}
+		break;
+	case CRYCB_FORMAT0:
+		if ((crycb_gpa & PAGE_MASK) != ((crycb_gpa + 32) & PAGE_MASK))
+			return -EACCES;
+
+		switch (fmt_h) {
+		case CRYCB_FORMAT2:
+			return setup_apcb10(vcpu, &crycb_s->apcb1,
+					    crycb_gpa,
+					    &crycb_h->apcb1);
+		case CRYCB_FORMAT1:
+		case CRYCB_FORMAT0:
+			return setup_apcb00(vcpu,
+					    (unsigned long *) &crycb_s->apcb0,
+					    crycb_gpa,
+					    (unsigned long *) &crycb_h->apcb0);
+		}
+	}
+	return -EINVAL;
+}
+
+/**
+ * shadow_crycb - Create a shadow copy of the crycb block
+ * @vcpu: a pointer to the virtual CPU
+ * @vsie_page: a pointer to internal date used for the vSIE
+ *
+ * Create a shadow copy of the crycb block and setup key wrapping, if
+ * requested for guest 3 and enabled for guest 2.
+ *
+ * We accept format-1 or format-2, but we convert format-1 into format-2
+ * in the shadow CRYCB.
+ * Using format-2 enables the firmware to choose the right format when
+ * scheduling the SIE.
+ * There is nothing to do for format-0.
+ *
+ * This function centralize the issuing of set_validity_icpt() for all
+ * the subfunctions working on the crycb.
+ *
+ * Returns: - 0 if shadowed or nothing to do
+ *          - > 0 if control has to be given to guest 2
+ */
+static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+{
+	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+	struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
+	const uint32_t crycbd_o = READ_ONCE(scb_o->crycbd);
+	const u32 crycb_addr = crycbd_o & 0x7ffffff8U;
+	unsigned long *b1, *b2;
+	u8 ecb3_flags;
+	u32 ecd_flags;
+	int apie_h;
+	int apie_s;
+	int key_msk = test_kvm_facility(vcpu->kvm, 76);
+	int fmt_o = crycbd_o & CRYCB_FORMAT_MASK;
+	int fmt_h = vcpu->arch.sie_block->crycbd & CRYCB_FORMAT_MASK;
+	int ret = 0;
+
+	scb_s->crycbd = 0;
+
+	apie_h = vcpu->arch.sie_block->eca & ECA_APIE;
+	apie_s = apie_h & scb_o->eca;
+	if (!apie_s && (!key_msk || (fmt_o == CRYCB_FORMAT0)))
+		return 0;
+
+	if (!crycb_addr)
+		return set_validity_icpt(scb_s, 0x0039U);
+
+	if (fmt_o == CRYCB_FORMAT1)
+		if ((crycb_addr & PAGE_MASK) !=
+		    ((crycb_addr + 128) & PAGE_MASK))
+			return set_validity_icpt(scb_s, 0x003CU);
+
+	if (apie_s) {
+		ret = setup_apcb(vcpu, &vsie_page->crycb, crycb_addr,
+				 vcpu->kvm->arch.crypto.crycb,
+				 fmt_o, fmt_h);
+		if (ret)
+			goto end;
+		scb_s->eca |= scb_o->eca & ECA_APIE;
+	}
+
+	/* we may only allow it if enabled for guest 2 */
+	ecb3_flags = scb_o->ecb3 & vcpu->arch.sie_block->ecb3 &
+		     (ECB3_AES | ECB3_DEA);
+	ecd_flags = scb_o->ecd & vcpu->arch.sie_block->ecd & ECD_ECC;
+	if (!ecb3_flags && !ecd_flags)
+		goto end;
+
+	/* copy only the wrapping keys */
+	if (read_guest_real(vcpu, crycb_addr + 72,
+			    vsie_page->crycb.dea_wrapping_key_mask, 56))
+		return set_validity_icpt(scb_s, 0x0035U);
+
+	scb_s->ecb3 |= ecb3_flags;
+	scb_s->ecd |= ecd_flags;
+
+	/* xor both blocks in one run */
+	b1 = (unsigned long *) vsie_page->crycb.dea_wrapping_key_mask;
+	b2 = (unsigned long *)
+			    vcpu->kvm->arch.crypto.crycb->dea_wrapping_key_mask;
+	/* as 56%8 == 0, bitmap_xor won't overwrite any data */
+	bitmap_xor(b1, b1, b2, BITS_PER_BYTE * 56);
+end:
+	switch (ret) {
+	case -EINVAL:
+		return set_validity_icpt(scb_s, 0x0022U);
+	case -EFAULT:
+		return set_validity_icpt(scb_s, 0x0035U);
+	case -EACCES:
+		return set_validity_icpt(scb_s, 0x003CU);
+	}
+	scb_s->crycbd = ((__u32)(__u64) &vsie_page->crycb) | CRYCB_FORMAT2;
+	return 0;
+}
+
+/* shadow (round up/down) the ibc to avoid validity icpt */
+static void prepare_ibc(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+{
+	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+	struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
+	/* READ_ONCE does not work on bitfields - use a temporary variable */
+	const uint32_t __new_ibc = scb_o->ibc;
+	const uint32_t new_ibc = READ_ONCE(__new_ibc) & 0x0fffU;
+	__u64 min_ibc = (sclp.ibc >> 16) & 0x0fffU;
+
+	scb_s->ibc = 0;
+	/* ibc installed in g2 and requested for g3 */
+	if (vcpu->kvm->arch.model.ibc && new_ibc) {
+		scb_s->ibc = new_ibc;
+		/* takte care of the minimum ibc level of the machine */
+		if (scb_s->ibc < min_ibc)
+			scb_s->ibc = min_ibc;
+		/* take care of the maximum ibc level set for the guest */
+		if (scb_s->ibc > vcpu->kvm->arch.model.ibc)
+			scb_s->ibc = vcpu->kvm->arch.model.ibc;
+	}
+}
+
+/* unshadow the scb, copying parameters back to the real scb */
+static void unshadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+{
+	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+	struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
+
+	/* interception */
+	scb_o->icptcode = scb_s->icptcode;
+	scb_o->icptstatus = scb_s->icptstatus;
+	scb_o->ipa = scb_s->ipa;
+	scb_o->ipb = scb_s->ipb;
+	scb_o->gbea = scb_s->gbea;
+
+	/* timer */
+	scb_o->cputm = scb_s->cputm;
+	scb_o->ckc = scb_s->ckc;
+	scb_o->todpr = scb_s->todpr;
+
+	/* guest state */
+	scb_o->gpsw = scb_s->gpsw;
+	scb_o->gg14 = scb_s->gg14;
+	scb_o->gg15 = scb_s->gg15;
+	memcpy(scb_o->gcr, scb_s->gcr, 128);
+	scb_o->pp = scb_s->pp;
+
+	/* branch prediction */
+	if (test_kvm_facility(vcpu->kvm, 82)) {
+		scb_o->fpf &= ~FPF_BPBC;
+		scb_o->fpf |= scb_s->fpf & FPF_BPBC;
+	}
+
+	/* interrupt intercept */
+	switch (scb_s->icptcode) {
+	case ICPT_PROGI:
+	case ICPT_INSTPROGI:
+	case ICPT_EXTINT:
+		memcpy((void *)((u64)scb_o + 0xc0),
+		       (void *)((u64)scb_s + 0xc0), 0xf0 - 0xc0);
+		break;
+	}
+
+	if (scb_s->ihcpu != 0xffffU)
+		scb_o->ihcpu = scb_s->ihcpu;
+}
+
+/*
+ * Setup the shadow scb by copying and checking the relevant parts of the g2
+ * provided scb.
+ *
+ * Returns: - 0 if the scb has been shadowed
+ *          - > 0 if control has to be given to guest 2
+ */
+static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+{
+	struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
+	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+	/* READ_ONCE does not work on bitfields - use a temporary variable */
+	const uint32_t __new_prefix = scb_o->prefix;
+	const uint32_t new_prefix = READ_ONCE(__new_prefix);
+	const bool wants_tx = READ_ONCE(scb_o->ecb) & ECB_TE;
+	bool had_tx = scb_s->ecb & ECB_TE;
+	unsigned long new_mso = 0;
+	int rc;
+
+	/* make sure we don't have any leftovers when reusing the scb */
+	scb_s->icptcode = 0;
+	scb_s->eca = 0;
+	scb_s->ecb = 0;
+	scb_s->ecb2 = 0;
+	scb_s->ecb3 = 0;
+	scb_s->ecd = 0;
+	scb_s->fac = 0;
+	scb_s->fpf = 0;
+
+	rc = prepare_cpuflags(vcpu, vsie_page);
+	if (rc)
+		goto out;
+
+	/* timer */
+	scb_s->cputm = scb_o->cputm;
+	scb_s->ckc = scb_o->ckc;
+	scb_s->todpr = scb_o->todpr;
+	scb_s->epoch = scb_o->epoch;
+
+	/* guest state */
+	scb_s->gpsw = scb_o->gpsw;
+	scb_s->gg14 = scb_o->gg14;
+	scb_s->gg15 = scb_o->gg15;
+	memcpy(scb_s->gcr, scb_o->gcr, 128);
+	scb_s->pp = scb_o->pp;
+
+	/* interception / execution handling */
+	scb_s->gbea = scb_o->gbea;
+	scb_s->lctl = scb_o->lctl;
+	scb_s->svcc = scb_o->svcc;
+	scb_s->ictl = scb_o->ictl;
+	/*
+	 * SKEY handling functions can't deal with false setting of PTE invalid
+	 * bits. Therefore we cannot provide interpretation and would later
+	 * have to provide own emulation handlers.
+	 */
+	if (!(atomic_read(&scb_s->cpuflags) & CPUSTAT_KSS))
+		scb_s->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
+
+	scb_s->icpua = scb_o->icpua;
+
+	if (!(atomic_read(&scb_s->cpuflags) & CPUSTAT_SM))
+		new_mso = READ_ONCE(scb_o->mso) & 0xfffffffffff00000UL;
+	/* if the hva of the prefix changes, we have to remap the prefix */
+	if (scb_s->mso != new_mso || scb_s->prefix != new_prefix)
+		prefix_unmapped(vsie_page);
+	 /* SIE will do mso/msl validity and exception checks for us */
+	scb_s->msl = scb_o->msl & 0xfffffffffff00000UL;
+	scb_s->mso = new_mso;
+	scb_s->prefix = new_prefix;
+
+	/* We have to definitely flush the tlb if this scb never ran */
+	if (scb_s->ihcpu != 0xffffU)
+		scb_s->ihcpu = scb_o->ihcpu;
+
+	/* MVPG and Protection Exception Interpretation are always available */
+	scb_s->eca |= scb_o->eca & (ECA_MVPGI | ECA_PROTEXCI);
+	/* Host-protection-interruption introduced with ESOP */
+	if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_ESOP))
+		scb_s->ecb |= scb_o->ecb & ECB_HOSTPROTINT;
+	/*
+	 * CPU Topology
+	 * This facility only uses the utility field of the SCA and none of
+	 * the cpu entries that are problematic with the other interpretation
+	 * facilities so we can pass it through
+	 */
+	if (test_kvm_facility(vcpu->kvm, 11))
+		scb_s->ecb |= scb_o->ecb & ECB_PTF;
+	/* transactional execution */
+	if (test_kvm_facility(vcpu->kvm, 73) && wants_tx) {
+		/* remap the prefix is tx is toggled on */
+		if (!had_tx)
+			prefix_unmapped(vsie_page);
+		scb_s->ecb |= ECB_TE;
+	}
+	/* specification exception interpretation */
+	scb_s->ecb |= scb_o->ecb & ECB_SPECI;
+	/* branch prediction */
+	if (test_kvm_facility(vcpu->kvm, 82))
+		scb_s->fpf |= scb_o->fpf & FPF_BPBC;
+	/* SIMD */
+	if (test_kvm_facility(vcpu->kvm, 129)) {
+		scb_s->eca |= scb_o->eca & ECA_VX;
+		scb_s->ecd |= scb_o->ecd & ECD_HOSTREGMGMT;
+	}
+	/* Run-time-Instrumentation */
+	if (test_kvm_facility(vcpu->kvm, 64))
+		scb_s->ecb3 |= scb_o->ecb3 & ECB3_RI;
+	/* Instruction Execution Prevention */
+	if (test_kvm_facility(vcpu->kvm, 130))
+		scb_s->ecb2 |= scb_o->ecb2 & ECB2_IEP;
+	/* Guarded Storage */
+	if (test_kvm_facility(vcpu->kvm, 133)) {
+		scb_s->ecb |= scb_o->ecb & ECB_GS;
+		scb_s->ecd |= scb_o->ecd & ECD_HOSTREGMGMT;
+	}
+	if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_SIIF))
+		scb_s->eca |= scb_o->eca & ECA_SII;
+	if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IB))
+		scb_s->eca |= scb_o->eca & ECA_IB;
+	if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI))
+		scb_s->eca |= scb_o->eca & ECA_CEI;
+	/* Epoch Extension */
+	if (test_kvm_facility(vcpu->kvm, 139)) {
+		scb_s->ecd |= scb_o->ecd & ECD_MEF;
+		scb_s->epdx = scb_o->epdx;
+	}
+
+	/* etoken */
+	if (test_kvm_facility(vcpu->kvm, 156))
+		scb_s->ecd |= scb_o->ecd & ECD_ETOKENF;
+
+	scb_s->hpid = HPID_VSIE;
+	scb_s->cpnc = scb_o->cpnc;
+
+	prepare_ibc(vcpu, vsie_page);
+	rc = shadow_crycb(vcpu, vsie_page);
+out:
+	if (rc)
+		unshadow_scb(vcpu, vsie_page);
+	return rc;
+}
+
+void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
+				 unsigned long end)
+{
+	struct kvm *kvm = gmap->private;
+	struct vsie_page *cur;
+	unsigned long prefix;
+	struct page *page;
+	int i;
+
+	if (!gmap_is_shadow(gmap))
+		return;
+	/*
+	 * Only new shadow blocks are added to the list during runtime,
+	 * therefore we can safely reference them all the time.
+	 */
+	for (i = 0; i < kvm->arch.vsie.page_count; i++) {
+		page = READ_ONCE(kvm->arch.vsie.pages[i]);
+		if (!page)
+			continue;
+		cur = page_to_virt(page);
+		if (READ_ONCE(cur->gmap) != gmap)
+			continue;
+		prefix = cur->scb_s.prefix << GUEST_PREFIX_SHIFT;
+		/* with mso/msl, the prefix lies at an offset */
+		prefix += cur->scb_s.mso;
+		if (prefix <= end && start <= prefix + 2 * PAGE_SIZE - 1)
+			prefix_unmapped_sync(cur);
+	}
+}
+
+/*
+ * Map the first prefix page and if tx is enabled also the second prefix page.
+ *
+ * The prefix will be protected, a gmap notifier will inform about unmaps.
+ * The shadow scb must not be executed until the prefix is remapped, this is
+ * guaranteed by properly handling PROG_REQUEST.
+ *
+ * Returns: - 0 on if successfully mapped or already mapped
+ *          - > 0 if control has to be given to guest 2
+ *          - -EAGAIN if the caller can retry immediately
+ *          - -ENOMEM if out of memory
+ */
+static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+{
+	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+	u64 prefix = scb_s->prefix << GUEST_PREFIX_SHIFT;
+	int rc;
+
+	if (prefix_is_mapped(vsie_page))
+		return 0;
+
+	/* mark it as mapped so we can catch any concurrent unmappers */
+	prefix_mapped(vsie_page);
+
+	/* with mso/msl, the prefix lies at offset *mso* */
+	prefix += scb_s->mso;
+
+	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix, NULL);
+	if (!rc && (scb_s->ecb & ECB_TE))
+		rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
+					   prefix + PAGE_SIZE, NULL);
+	/*
+	 * We don't have to mprotect, we will be called for all unshadows.
+	 * SIE will detect if protection applies and trigger a validity.
+	 */
+	if (rc)
+		prefix_unmapped(vsie_page);
+	if (rc > 0 || rc == -EFAULT)
+		rc = set_validity_icpt(scb_s, 0x0037U);
+	return rc;
+}
+
+/*
+ * Pin the guest page given by gpa and set hpa to the pinned host address.
+ * Will always be pinned writable.
+ *
+ * Returns: - 0 on success
+ *          - -EINVAL if the gpa is not valid guest storage
+ */
+static int pin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t *hpa)
+{
+	struct page *page;
+
+	page = gfn_to_page(kvm, gpa_to_gfn(gpa));
+	if (is_error_page(page))
+		return -EINVAL;
+	*hpa = (hpa_t)page_to_phys(page) + (gpa & ~PAGE_MASK);
+	return 0;
+}
+
+/* Unpins a page previously pinned via pin_guest_page, marking it as dirty. */
+static void unpin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t hpa)
+{
+	kvm_release_pfn_dirty(hpa >> PAGE_SHIFT);
+	/* mark the page always as dirty for migration */
+	mark_page_dirty(kvm, gpa_to_gfn(gpa));
+}
+
+/* unpin all blocks previously pinned by pin_blocks(), marking them dirty */
+static void unpin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+{
+	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+	hpa_t hpa;
+
+	hpa = (u64) scb_s->scaoh << 32 | scb_s->scaol;
+	if (hpa) {
+		unpin_guest_page(vcpu->kvm, vsie_page->sca_gpa, hpa);
+		vsie_page->sca_gpa = 0;
+		scb_s->scaol = 0;
+		scb_s->scaoh = 0;
+	}
+
+	hpa = scb_s->itdba;
+	if (hpa) {
+		unpin_guest_page(vcpu->kvm, vsie_page->itdba_gpa, hpa);
+		vsie_page->itdba_gpa = 0;
+		scb_s->itdba = 0;
+	}
+
+	hpa = scb_s->gvrd;
+	if (hpa) {
+		unpin_guest_page(vcpu->kvm, vsie_page->gvrd_gpa, hpa);
+		vsie_page->gvrd_gpa = 0;
+		scb_s->gvrd = 0;
+	}
+
+	hpa = scb_s->riccbd;
+	if (hpa) {
+		unpin_guest_page(vcpu->kvm, vsie_page->riccbd_gpa, hpa);
+		vsie_page->riccbd_gpa = 0;
+		scb_s->riccbd = 0;
+	}
+
+	hpa = scb_s->sdnxo;
+	if (hpa) {
+		unpin_guest_page(vcpu->kvm, vsie_page->sdnx_gpa, hpa);
+		vsie_page->sdnx_gpa = 0;
+		scb_s->sdnxo = 0;
+	}
+}
+
+/*
+ * Instead of shadowing some blocks, we can simply forward them because the
+ * addresses in the scb are 64 bit long.
+ *
+ * This works as long as the data lies in one page. If blocks ever exceed one
+ * page, we have to fall back to shadowing.
+ *
+ * As we reuse the sca, the vcpu pointers contained in it are invalid. We must
+ * therefore not enable any facilities that access these pointers (e.g. SIGPIF).
+ *
+ * Returns: - 0 if all blocks were pinned.
+ *          - > 0 if control has to be given to guest 2
+ *          - -ENOMEM if out of memory
+ */
+static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+{
+	struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
+	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+	hpa_t hpa;
+	gpa_t gpa;
+	int rc = 0;
+
+	gpa = READ_ONCE(scb_o->scaol) & ~0xfUL;
+	if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_64BSCAO))
+		gpa |= (u64) READ_ONCE(scb_o->scaoh) << 32;
+	if (gpa) {
+		if (gpa < 2 * PAGE_SIZE)
+			rc = set_validity_icpt(scb_s, 0x0038U);
+		else if ((gpa & ~0x1fffUL) == kvm_s390_get_prefix(vcpu))
+			rc = set_validity_icpt(scb_s, 0x0011U);
+		else if ((gpa & PAGE_MASK) !=
+			 ((gpa + sizeof(struct bsca_block) - 1) & PAGE_MASK))
+			rc = set_validity_icpt(scb_s, 0x003bU);
+		if (!rc) {
+			rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
+			if (rc)
+				rc = set_validity_icpt(scb_s, 0x0034U);
+		}
+		if (rc)
+			goto unpin;
+		vsie_page->sca_gpa = gpa;
+		scb_s->scaoh = (u32)((u64)hpa >> 32);
+		scb_s->scaol = (u32)(u64)hpa;
+	}
+
+	gpa = READ_ONCE(scb_o->itdba) & ~0xffUL;
+	if (gpa && (scb_s->ecb & ECB_TE)) {
+		if (gpa < 2 * PAGE_SIZE) {
+			rc = set_validity_icpt(scb_s, 0x0080U);
+			goto unpin;
+		}
+		/* 256 bytes cannot cross page boundaries */
+		rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
+		if (rc) {
+			rc = set_validity_icpt(scb_s, 0x0080U);
+			goto unpin;
+		}
+		vsie_page->itdba_gpa = gpa;
+		scb_s->itdba = hpa;
+	}
+
+	gpa = READ_ONCE(scb_o->gvrd) & ~0x1ffUL;
+	if (gpa && (scb_s->eca & ECA_VX) && !(scb_s->ecd & ECD_HOSTREGMGMT)) {
+		if (gpa < 2 * PAGE_SIZE) {
+			rc = set_validity_icpt(scb_s, 0x1310U);
+			goto unpin;
+		}
+		/*
+		 * 512 bytes vector registers cannot cross page boundaries
+		 * if this block gets bigger, we have to shadow it.
+		 */
+		rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
+		if (rc) {
+			rc = set_validity_icpt(scb_s, 0x1310U);
+			goto unpin;
+		}
+		vsie_page->gvrd_gpa = gpa;
+		scb_s->gvrd = hpa;
+	}
+
+	gpa = READ_ONCE(scb_o->riccbd) & ~0x3fUL;
+	if (gpa && (scb_s->ecb3 & ECB3_RI)) {
+		if (gpa < 2 * PAGE_SIZE) {
+			rc = set_validity_icpt(scb_s, 0x0043U);
+			goto unpin;
+		}
+		/* 64 bytes cannot cross page boundaries */
+		rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
+		if (rc) {
+			rc = set_validity_icpt(scb_s, 0x0043U);
+			goto unpin;
+		}
+		/* Validity 0x0044 will be checked by SIE */
+		vsie_page->riccbd_gpa = gpa;
+		scb_s->riccbd = hpa;
+	}
+	if (((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) ||
+	    (scb_s->ecd & ECD_ETOKENF)) {
+		unsigned long sdnxc;
+
+		gpa = READ_ONCE(scb_o->sdnxo) & ~0xfUL;
+		sdnxc = READ_ONCE(scb_o->sdnxo) & 0xfUL;
+		if (!gpa || gpa < 2 * PAGE_SIZE) {
+			rc = set_validity_icpt(scb_s, 0x10b0U);
+			goto unpin;
+		}
+		if (sdnxc < 6 || sdnxc > 12) {
+			rc = set_validity_icpt(scb_s, 0x10b1U);
+			goto unpin;
+		}
+		if (gpa & ((1 << sdnxc) - 1)) {
+			rc = set_validity_icpt(scb_s, 0x10b2U);
+			goto unpin;
+		}
+		/* Due to alignment rules (checked above) this cannot
+		 * cross page boundaries
+		 */
+		rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
+		if (rc) {
+			rc = set_validity_icpt(scb_s, 0x10b0U);
+			goto unpin;
+		}
+		vsie_page->sdnx_gpa = gpa;
+		scb_s->sdnxo = hpa | sdnxc;
+	}
+	return 0;
+unpin:
+	unpin_blocks(vcpu, vsie_page);
+	return rc;
+}
+
+/* unpin the scb provided by guest 2, marking it as dirty */
+static void unpin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
+		      gpa_t gpa)
+{
+	hpa_t hpa = (hpa_t) vsie_page->scb_o;
+
+	if (hpa)
+		unpin_guest_page(vcpu->kvm, gpa, hpa);
+	vsie_page->scb_o = NULL;
+}
+
+/*
+ * Pin the scb at gpa provided by guest 2 at vsie_page->scb_o.
+ *
+ * Returns: - 0 if the scb was pinned.
+ *          - > 0 if control has to be given to guest 2
+ */
+static int pin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
+		   gpa_t gpa)
+{
+	hpa_t hpa;
+	int rc;
+
+	rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
+	if (rc) {
+		rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		WARN_ON_ONCE(rc);
+		return 1;
+	}
+	vsie_page->scb_o = phys_to_virt(hpa);
+	return 0;
+}
+
+/*
+ * Inject a fault into guest 2.
+ *
+ * Returns: - > 0 if control has to be given to guest 2
+ *            < 0 if an error occurred during injection.
+ */
+static int inject_fault(struct kvm_vcpu *vcpu, __u16 code, __u64 vaddr,
+			bool write_flag)
+{
+	struct kvm_s390_pgm_info pgm = {
+		.code = code,
+		.trans_exc_code =
+			/* 0-51: virtual address */
+			(vaddr & 0xfffffffffffff000UL) |
+			/* 52-53: store / fetch */
+			(((unsigned int) !write_flag) + 1) << 10,
+			/* 62-63: asce id (always primary == 0) */
+		.exc_access_id = 0, /* always primary */
+		.op_access_id = 0, /* not MVPG */
+	};
+	int rc;
+
+	if (code == PGM_PROTECTION)
+		pgm.trans_exc_code |= 0x4UL;
+
+	rc = kvm_s390_inject_prog_irq(vcpu, &pgm);
+	return rc ? rc : 1;
+}
+
+/*
+ * Handle a fault during vsie execution on a gmap shadow.
+ *
+ * Returns: - 0 if the fault was resolved
+ *          - > 0 if control has to be given to guest 2
+ *          - < 0 if an error occurred
+ */
+static int handle_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+{
+	int rc;
+
+	if (current->thread.gmap_int_code == PGM_PROTECTION)
+		/* we can directly forward all protection exceptions */
+		return inject_fault(vcpu, PGM_PROTECTION,
+				    current->thread.gmap_addr, 1);
+
+	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
+				   current->thread.gmap_addr, NULL);
+	if (rc > 0) {
+		rc = inject_fault(vcpu, rc,
+				  current->thread.gmap_addr,
+				  current->thread.gmap_write_flag);
+		if (rc >= 0)
+			vsie_page->fault_addr = current->thread.gmap_addr;
+	}
+	return rc;
+}
+
+/*
+ * Retry the previous fault that required guest 2 intervention. This avoids
+ * one superfluous SIE re-entry and direct exit.
+ *
+ * Will ignore any errors. The next SIE fault will do proper fault handling.
+ */
+static void handle_last_fault(struct kvm_vcpu *vcpu,
+			      struct vsie_page *vsie_page)
+{
+	if (vsie_page->fault_addr)
+		kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
+				      vsie_page->fault_addr, NULL);
+	vsie_page->fault_addr = 0;
+}
+
+static inline void clear_vsie_icpt(struct vsie_page *vsie_page)
+{
+	vsie_page->scb_s.icptcode = 0;
+}
+
+/* rewind the psw and clear the vsie icpt, so we can retry execution */
+static void retry_vsie_icpt(struct vsie_page *vsie_page)
+{
+	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+	int ilen = insn_length(scb_s->ipa >> 8);
+
+	/* take care of EXECUTE instructions */
+	if (scb_s->icptstatus & 1) {
+		ilen = (scb_s->icptstatus >> 4) & 0x6;
+		if (!ilen)
+			ilen = 4;
+	}
+	scb_s->gpsw.addr = __rewind_psw(scb_s->gpsw, ilen);
+	clear_vsie_icpt(vsie_page);
+}
+
+/*
+ * Try to shadow + enable the guest 2 provided facility list.
+ * Retry instruction execution if enabled for and provided by guest 2.
+ *
+ * Returns: - 0 if handled (retry or guest 2 icpt)
+ *          - > 0 if control has to be given to guest 2
+ */
+static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+{
+	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+	__u32 fac = READ_ONCE(vsie_page->scb_o->fac) & 0x7ffffff8U;
+
+	if (fac && test_kvm_facility(vcpu->kvm, 7)) {
+		retry_vsie_icpt(vsie_page);
+		if (read_guest_real(vcpu, fac, &vsie_page->fac,
+				    sizeof(vsie_page->fac)))
+			return set_validity_icpt(scb_s, 0x1090U);
+		scb_s->fac = (__u32)(__u64) &vsie_page->fac;
+	}
+	return 0;
+}
+
+/*
+ * Get a register for a nested guest.
+ * @vcpu the vcpu of the guest
+ * @vsie_page the vsie_page for the nested guest
+ * @reg the register number, the upper 4 bits are ignored.
+ * returns: the value of the register.
+ */
+static u64 vsie_get_register(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, u8 reg)
+{
+	/* no need to validate the parameter and/or perform error handling */
+	reg &= 0xf;
+	switch (reg) {
+	case 15:
+		return vsie_page->scb_s.gg15;
+	case 14:
+		return vsie_page->scb_s.gg14;
+	default:
+		return vcpu->run->s.regs.gprs[reg];
+	}
+}
+
+static int vsie_handle_mvpg(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+{
+	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+	unsigned long pei_dest, pei_src, src, dest, mask, prefix;
+	u64 *pei_block = &vsie_page->scb_o->mcic;
+	int edat, rc_dest, rc_src;
+	union ctlreg0 cr0;
+
+	cr0.val = vcpu->arch.sie_block->gcr[0];
+	edat = cr0.edat && test_kvm_facility(vcpu->kvm, 8);
+	mask = _kvm_s390_logical_to_effective(&scb_s->gpsw, PAGE_MASK);
+	prefix = scb_s->prefix << GUEST_PREFIX_SHIFT;
+
+	dest = vsie_get_register(vcpu, vsie_page, scb_s->ipb >> 20) & mask;
+	dest = _kvm_s390_real_to_abs(prefix, dest) + scb_s->mso;
+	src = vsie_get_register(vcpu, vsie_page, scb_s->ipb >> 16) & mask;
+	src = _kvm_s390_real_to_abs(prefix, src) + scb_s->mso;
+
+	rc_dest = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, dest, &pei_dest);
+	rc_src = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, src, &pei_src);
+	/*
+	 * Either everything went well, or something non-critical went wrong
+	 * e.g. because of a race. In either case, simply retry.
+	 */
+	if (rc_dest == -EAGAIN || rc_src == -EAGAIN || (!rc_dest && !rc_src)) {
+		retry_vsie_icpt(vsie_page);
+		return -EAGAIN;
+	}
+	/* Something more serious went wrong, propagate the error */
+	if (rc_dest < 0)
+		return rc_dest;
+	if (rc_src < 0)
+		return rc_src;
+
+	/* The only possible suppressing exception: just deliver it */
+	if (rc_dest == PGM_TRANSLATION_SPEC || rc_src == PGM_TRANSLATION_SPEC) {
+		clear_vsie_icpt(vsie_page);
+		rc_dest = kvm_s390_inject_program_int(vcpu, PGM_TRANSLATION_SPEC);
+		WARN_ON_ONCE(rc_dest);
+		return 1;
+	}
+
+	/*
+	 * Forward the PEI intercept to the guest if it was a page fault, or
+	 * also for segment and region table faults if EDAT applies.
+	 */
+	if (edat) {
+		rc_dest = rc_dest == PGM_ASCE_TYPE ? rc_dest : 0;
+		rc_src = rc_src == PGM_ASCE_TYPE ? rc_src : 0;
+	} else {
+		rc_dest = rc_dest != PGM_PAGE_TRANSLATION ? rc_dest : 0;
+		rc_src = rc_src != PGM_PAGE_TRANSLATION ? rc_src : 0;
+	}
+	if (!rc_dest && !rc_src) {
+		pei_block[0] = pei_dest;
+		pei_block[1] = pei_src;
+		return 1;
+	}
+
+	retry_vsie_icpt(vsie_page);
+
+	/*
+	 * The host has edat, and the guest does not, or it was an ASCE type
+	 * exception. The host needs to inject the appropriate DAT interrupts
+	 * into the guest.
+	 */
+	if (rc_dest)
+		return inject_fault(vcpu, rc_dest, dest, 1);
+	return inject_fault(vcpu, rc_src, src, 0);
+}
+
+/*
+ * Run the vsie on a shadow scb and a shadow gmap, without any further
+ * sanity checks, handling SIE faults.
+ *
+ * Returns: - 0 everything went fine
+ *          - > 0 if control has to be given to guest 2
+ *          - < 0 if an error occurred
+ */
+static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+	__releases(vcpu->kvm->srcu)
+	__acquires(vcpu->kvm->srcu)
+{
+	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+	struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
+	int guest_bp_isolation;
+	int rc = 0;
+
+	handle_last_fault(vcpu, vsie_page);
+
+	kvm_vcpu_srcu_read_unlock(vcpu);
+
+	/* save current guest state of bp isolation override */
+	guest_bp_isolation = test_thread_flag(TIF_ISOLATE_BP_GUEST);
+
+	/*
+	 * The guest is running with BPBC, so we have to force it on for our
+	 * nested guest. This is done by enabling BPBC globally, so the BPBC
+	 * control in the SCB (which the nested guest can modify) is simply
+	 * ignored.
+	 */
+	if (test_kvm_facility(vcpu->kvm, 82) &&
+	    vcpu->arch.sie_block->fpf & FPF_BPBC)
+		set_thread_flag(TIF_ISOLATE_BP_GUEST);
+
+	local_irq_disable();
+	guest_enter_irqoff();
+	local_irq_enable();
+
+	/*
+	 * Simulate a SIE entry of the VCPU (see sie64a), so VCPU blocking
+	 * and VCPU requests also hinder the vSIE from running and lead
+	 * to an immediate exit. kvm_s390_vsie_kick() has to be used to
+	 * also kick the vSIE.
+	 */
+	vcpu->arch.sie_block->prog0c |= PROG_IN_SIE;
+	barrier();
+	if (test_cpu_flag(CIF_FPU))
+		load_fpu_regs();
+	if (!kvm_s390_vcpu_sie_inhibited(vcpu))
+		rc = sie64a(scb_s, vcpu->run->s.regs.gprs);
+	barrier();
+	vcpu->arch.sie_block->prog0c &= ~PROG_IN_SIE;
+
+	local_irq_disable();
+	guest_exit_irqoff();
+	local_irq_enable();
+
+	/* restore guest state for bp isolation override */
+	if (!guest_bp_isolation)
+		clear_thread_flag(TIF_ISOLATE_BP_GUEST);
+
+	kvm_vcpu_srcu_read_lock(vcpu);
+
+	if (rc == -EINTR) {
+		VCPU_EVENT(vcpu, 3, "%s", "machine check");
+		kvm_s390_reinject_machine_check(vcpu, &vsie_page->mcck_info);
+		return 0;
+	}
+
+	if (rc > 0)
+		rc = 0; /* we could still have an icpt */
+	else if (rc == -EFAULT)
+		return handle_fault(vcpu, vsie_page);
+
+	switch (scb_s->icptcode) {
+	case ICPT_INST:
+		if (scb_s->ipa == 0xb2b0)
+			rc = handle_stfle(vcpu, vsie_page);
+		break;
+	case ICPT_STOP:
+		/* stop not requested by g2 - must have been a kick */
+		if (!(atomic_read(&scb_o->cpuflags) & CPUSTAT_STOP_INT))
+			clear_vsie_icpt(vsie_page);
+		break;
+	case ICPT_VALIDITY:
+		if ((scb_s->ipa & 0xf000) != 0xf000)
+			scb_s->ipa += 0x1000;
+		break;
+	case ICPT_PARTEXEC:
+		if (scb_s->ipa == 0xb254)
+			rc = vsie_handle_mvpg(vcpu, vsie_page);
+		break;
+	}
+	return rc;
+}
+
+static void release_gmap_shadow(struct vsie_page *vsie_page)
+{
+	if (vsie_page->gmap)
+		gmap_put(vsie_page->gmap);
+	WRITE_ONCE(vsie_page->gmap, NULL);
+	prefix_unmapped(vsie_page);
+}
+
+static int acquire_gmap_shadow(struct kvm_vcpu *vcpu,
+			       struct vsie_page *vsie_page)
+{
+	unsigned long asce;
+	union ctlreg0 cr0;
+	struct gmap *gmap;
+	int edat;
+
+	asce = vcpu->arch.sie_block->gcr[1];
+	cr0.val = vcpu->arch.sie_block->gcr[0];
+	edat = cr0.edat && test_kvm_facility(vcpu->kvm, 8);
+	edat += edat && test_kvm_facility(vcpu->kvm, 78);
+
+	/*
+	 * ASCE or EDAT could have changed since last icpt, or the gmap
+	 * we're holding has been unshadowed. If the gmap is still valid,
+	 * we can safely reuse it.
+	 */
+	if (vsie_page->gmap && gmap_shadow_valid(vsie_page->gmap, asce, edat))
+		return 0;
+
+	/* release the old shadow - if any, and mark the prefix as unmapped */
+	release_gmap_shadow(vsie_page);
+	gmap = gmap_shadow(vcpu->arch.gmap, asce, edat);
+	if (IS_ERR(gmap))
+		return PTR_ERR(gmap);
+	gmap->private = vcpu->kvm;
+	WRITE_ONCE(vsie_page->gmap, gmap);
+	return 0;
+}
+
+/*
+ * Register the shadow scb at the VCPU, e.g. for kicking out of vsie.
+ */
+static void register_shadow_scb(struct kvm_vcpu *vcpu,
+				struct vsie_page *vsie_page)
+{
+	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+
+	WRITE_ONCE(vcpu->arch.vsie_block, &vsie_page->scb_s);
+	/*
+	 * External calls have to lead to a kick of the vcpu and
+	 * therefore the vsie -> Simulate Wait state.
+	 */
+	kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT);
+	/*
+	 * We have to adjust the g3 epoch by the g2 epoch. The epoch will
+	 * automatically be adjusted on tod clock changes via kvm_sync_clock.
+	 */
+	preempt_disable();
+	scb_s->epoch += vcpu->kvm->arch.epoch;
+
+	if (scb_s->ecd & ECD_MEF) {
+		scb_s->epdx += vcpu->kvm->arch.epdx;
+		if (scb_s->epoch < vcpu->kvm->arch.epoch)
+			scb_s->epdx += 1;
+	}
+
+	preempt_enable();
+}
+
+/*
+ * Unregister a shadow scb from a VCPU.
+ */
+static void unregister_shadow_scb(struct kvm_vcpu *vcpu)
+{
+	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT);
+	WRITE_ONCE(vcpu->arch.vsie_block, NULL);
+}
+
+/*
+ * Run the vsie on a shadowed scb, managing the gmap shadow, handling
+ * prefix pages and faults.
+ *
+ * Returns: - 0 if no errors occurred
+ *          - > 0 if control has to be given to guest 2
+ *          - -ENOMEM if out of memory
+ */
+static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+{
+	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+	int rc = 0;
+
+	while (1) {
+		rc = acquire_gmap_shadow(vcpu, vsie_page);
+		if (!rc)
+			rc = map_prefix(vcpu, vsie_page);
+		if (!rc) {
+			gmap_enable(vsie_page->gmap);
+			update_intervention_requests(vsie_page);
+			rc = do_vsie_run(vcpu, vsie_page);
+			gmap_enable(vcpu->arch.gmap);
+		}
+		atomic_andnot(PROG_BLOCK_SIE, &scb_s->prog20);
+
+		if (rc == -EAGAIN)
+			rc = 0;
+		if (rc || scb_s->icptcode || signal_pending(current) ||
+		    kvm_s390_vcpu_has_irq(vcpu, 0) ||
+		    kvm_s390_vcpu_sie_inhibited(vcpu))
+			break;
+		cond_resched();
+	}
+
+	if (rc == -EFAULT) {
+		/*
+		 * Addressing exceptions are always presentes as intercepts.
+		 * As addressing exceptions are suppressing and our guest 3 PSW
+		 * points at the responsible instruction, we have to
+		 * forward the PSW and set the ilc. If we can't read guest 3
+		 * instruction, we can use an arbitrary ilc. Let's always use
+		 * ilen = 4 for now, so we can avoid reading in guest 3 virtual
+		 * memory. (we could also fake the shadow so the hardware
+		 * handles it).
+		 */
+		scb_s->icptcode = ICPT_PROGI;
+		scb_s->iprcc = PGM_ADDRESSING;
+		scb_s->pgmilc = 4;
+		scb_s->gpsw.addr = __rewind_psw(scb_s->gpsw, 4);
+		rc = 1;
+	}
+	return rc;
+}
+
+/*
+ * Get or create a vsie page for a scb address.
+ *
+ * Returns: - address of a vsie page (cached or new one)
+ *          - NULL if the same scb address is already used by another VCPU
+ *          - ERR_PTR(-ENOMEM) if out of memory
+ */
+static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
+{
+	struct vsie_page *vsie_page;
+	struct page *page;
+	int nr_vcpus;
+
+	rcu_read_lock();
+	page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9);
+	rcu_read_unlock();
+	if (page) {
+		if (page_ref_inc_return(page) == 2)
+			return page_to_virt(page);
+		page_ref_dec(page);
+	}
+
+	/*
+	 * We want at least #online_vcpus shadows, so every VCPU can execute
+	 * the VSIE in parallel.
+	 */
+	nr_vcpus = atomic_read(&kvm->online_vcpus);
+
+	mutex_lock(&kvm->arch.vsie.mutex);
+	if (kvm->arch.vsie.page_count < nr_vcpus) {
+		page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO | GFP_DMA);
+		if (!page) {
+			mutex_unlock(&kvm->arch.vsie.mutex);
+			return ERR_PTR(-ENOMEM);
+		}
+		page_ref_inc(page);
+		kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = page;
+		kvm->arch.vsie.page_count++;
+	} else {
+		/* reuse an existing entry that belongs to nobody */
+		while (true) {
+			page = kvm->arch.vsie.pages[kvm->arch.vsie.next];
+			if (page_ref_inc_return(page) == 2)
+				break;
+			page_ref_dec(page);
+			kvm->arch.vsie.next++;
+			kvm->arch.vsie.next %= nr_vcpus;
+		}
+		radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9);
+	}
+	page->index = addr;
+	/* double use of the same address */
+	if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9, page)) {
+		page_ref_dec(page);
+		mutex_unlock(&kvm->arch.vsie.mutex);
+		return NULL;
+	}
+	mutex_unlock(&kvm->arch.vsie.mutex);
+
+	vsie_page = page_to_virt(page);
+	memset(&vsie_page->scb_s, 0, sizeof(struct kvm_s390_sie_block));
+	release_gmap_shadow(vsie_page);
+	vsie_page->fault_addr = 0;
+	vsie_page->scb_s.ihcpu = 0xffffU;
+	return vsie_page;
+}
+
+/* put a vsie page acquired via get_vsie_page */
+static void put_vsie_page(struct kvm *kvm, struct vsie_page *vsie_page)
+{
+	struct page *page = pfn_to_page(__pa(vsie_page) >> PAGE_SHIFT);
+
+	page_ref_dec(page);
+}
+
+int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu)
+{
+	struct vsie_page *vsie_page;
+	unsigned long scb_addr;
+	int rc;
+
+	vcpu->stat.instruction_sie++;
+	if (!test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_SIEF2))
+		return -EOPNOTSUPP;
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	BUILD_BUG_ON(sizeof(struct vsie_page) != PAGE_SIZE);
+	scb_addr = kvm_s390_get_base_disp_s(vcpu, NULL);
+
+	/* 512 byte alignment */
+	if (unlikely(scb_addr & 0x1ffUL))
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	if (signal_pending(current) || kvm_s390_vcpu_has_irq(vcpu, 0) ||
+	    kvm_s390_vcpu_sie_inhibited(vcpu))
+		return 0;
+
+	vsie_page = get_vsie_page(vcpu->kvm, scb_addr);
+	if (IS_ERR(vsie_page))
+		return PTR_ERR(vsie_page);
+	else if (!vsie_page)
+		/* double use of sie control block - simply do nothing */
+		return 0;
+
+	rc = pin_scb(vcpu, vsie_page, scb_addr);
+	if (rc)
+		goto out_put;
+	rc = shadow_scb(vcpu, vsie_page);
+	if (rc)
+		goto out_unpin_scb;
+	rc = pin_blocks(vcpu, vsie_page);
+	if (rc)
+		goto out_unshadow;
+	register_shadow_scb(vcpu, vsie_page);
+	rc = vsie_run(vcpu, vsie_page);
+	unregister_shadow_scb(vcpu);
+	unpin_blocks(vcpu, vsie_page);
+out_unshadow:
+	unshadow_scb(vcpu, vsie_page);
+out_unpin_scb:
+	unpin_scb(vcpu, vsie_page, scb_addr);
+out_put:
+	put_vsie_page(vcpu->kvm, vsie_page);
+
+	return rc < 0 ? rc : 0;
+}
+
+/* Init the vsie data structures. To be called when a vm is initialized. */
+void kvm_s390_vsie_init(struct kvm *kvm)
+{
+	mutex_init(&kvm->arch.vsie.mutex);
+	INIT_RADIX_TREE(&kvm->arch.vsie.addr_to_page, GFP_KERNEL_ACCOUNT);
+}
+
+/* Destroy the vsie data structures. To be called when a vm is destroyed. */
+void kvm_s390_vsie_destroy(struct kvm *kvm)
+{
+	struct vsie_page *vsie_page;
+	struct page *page;
+	int i;
+
+	mutex_lock(&kvm->arch.vsie.mutex);
+	for (i = 0; i < kvm->arch.vsie.page_count; i++) {
+		page = kvm->arch.vsie.pages[i];
+		kvm->arch.vsie.pages[i] = NULL;
+		vsie_page = page_to_virt(page);
+		release_gmap_shadow(vsie_page);
+		/* free the radix tree entry */
+		radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9);
+		__free_page(page);
+	}
+	kvm->arch.vsie.page_count = 0;
+	mutex_unlock(&kvm->arch.vsie.mutex);
+}
+
+void kvm_s390_vsie_kick(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_sie_block *scb = READ_ONCE(vcpu->arch.vsie_block);
+
+	/*
+	 * Even if the VCPU lets go of the shadow sie block reference, it is
+	 * still valid in the cache. So we can safely kick it.
+	 */
+	if (scb) {
+		atomic_or(PROG_BLOCK_SIE, &scb->prog20);
+		if (scb->prog0c & PROG_IN_SIE)
+			atomic_or(CPUSTAT_STOP_INT, &scb->cpuflags);
+	}
+}
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
new file mode 100644
index 0000000000..7c50eca85c
--- /dev/null
+++ b/arch/s390/lib/Makefile
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for s390-specific library files..
+#
+
+lib-y += delay.o string.o uaccess.o find.o spinlock.o tishift.o
+obj-y += mem.o xor.o
+lib-$(CONFIG_KPROBES) += probes.o
+lib-$(CONFIG_UPROBES) += probes.o
+obj-$(CONFIG_S390_KPROBES_SANITY_TEST) += test_kprobes_s390.o
+test_kprobes_s390-objs += test_kprobes_asm.o test_kprobes.o
+
+# Instrumenting memory accesses to __user data (in different address space)
+# produce false positives
+KASAN_SANITIZE_uaccess.o := n
+
+obj-$(CONFIG_S390_UNWIND_SELFTEST) += test_unwind.o
+CFLAGS_test_unwind.o += -fno-optimize-sibling-calls
+
+obj-$(CONFIG_S390_MODULES_SANITY_TEST) += test_modules.o
+obj-$(CONFIG_S390_MODULES_SANITY_TEST_HELPERS) += test_modules_helpers.o
+
+lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
+
+obj-$(CONFIG_EXPOLINE_EXTERN) += expoline/
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
new file mode 100644
index 0000000000..be14c58cb9
--- /dev/null
+++ b/arch/s390/lib/delay.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    Precise Delay Loops for S390
+ *
+ *    Copyright IBM Corp. 1999, 2008
+ *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ */
+
+#include <linux/processor.h>
+#include <linux/delay.h>
+#include <asm/div64.h>
+#include <asm/timex.h>
+
+void __delay(unsigned long loops)
+{
+	/*
+	 * Loop 'loops' times. Callers must not assume a specific
+	 * amount of time passes before this function returns.
+	 */
+	asm volatile("0: brct %0,0b" : : "d" ((loops/2) + 1));
+}
+EXPORT_SYMBOL(__delay);
+
+static void delay_loop(unsigned long delta)
+{
+	unsigned long end;
+
+	end = get_tod_clock_monotonic() + delta;
+	while (!tod_after(get_tod_clock_monotonic(), end))
+		cpu_relax();
+}
+
+void __udelay(unsigned long usecs)
+{
+	delay_loop(usecs << 12);
+}
+EXPORT_SYMBOL(__udelay);
+
+void __ndelay(unsigned long nsecs)
+{
+	nsecs <<= 9;
+	do_div(nsecs, 125);
+	delay_loop(nsecs);
+}
+EXPORT_SYMBOL(__ndelay);
diff --git a/arch/s390/lib/error-inject.c b/arch/s390/lib/error-inject.c
new file mode 100644
index 0000000000..8c9d4da87e
--- /dev/null
+++ b/arch/s390/lib/error-inject.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0+
+#include <asm/ptrace.h>
+#include <linux/error-injection.h>
+#include <linux/kprobes.h>
+
+void override_function_with_return(struct pt_regs *regs)
+{
+	/*
+	 * Emulate 'br 14'. 'regs' is captured by kprobes on entry to some
+	 * kernel function.
+	 */
+	regs->psw.addr = regs->gprs[14];
+}
+NOKPROBE_SYMBOL(override_function_with_return);
diff --git a/arch/s390/lib/expoline/Makefile b/arch/s390/lib/expoline/Makefile
new file mode 100644
index 0000000000..854631d9cb
--- /dev/null
+++ b/arch/s390/lib/expoline/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-y += expoline.o
diff --git a/arch/s390/lib/expoline/expoline.S b/arch/s390/lib/expoline/expoline.S
new file mode 100644
index 0000000000..92ed8409a7
--- /dev/null
+++ b/arch/s390/lib/expoline/expoline.S
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <asm/nospec-insn.h>
+#include <linux/linkage.h>
+
+.macro GEN_ALL_BR_THUNK_EXTERN
+	.irp r1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+	GEN_BR_THUNK_EXTERN %r\r1
+	.endr
+.endm
+
+GEN_ALL_BR_THUNK_EXTERN
diff --git a/arch/s390/lib/find.c b/arch/s390/lib/find.c
new file mode 100644
index 0000000000..96a8a2e2d0
--- /dev/null
+++ b/arch/s390/lib/find.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * MSB0 numbered special bitops handling.
+ *
+ * The bits are numbered:
+ *   |0..............63|64............127|128...........191|192...........255|
+ *
+ * The reason for this bit numbering is the fact that the hardware sets bits
+ * in a bitmap starting at bit 0 (MSB) and we don't want to scan the bitmap
+ * from the 'wrong end'.
+ */
+
+#include <linux/compiler.h>
+#include <linux/bitops.h>
+#include <linux/export.h>
+
+unsigned long find_first_bit_inv(const unsigned long *addr, unsigned long size)
+{
+	const unsigned long *p = addr;
+	unsigned long result = 0;
+	unsigned long tmp;
+
+	while (size & ~(BITS_PER_LONG - 1)) {
+		if ((tmp = *(p++)))
+			goto found;
+		result += BITS_PER_LONG;
+		size -= BITS_PER_LONG;
+	}
+	if (!size)
+		return result;
+	tmp = (*p) & (~0UL << (BITS_PER_LONG - size));
+	if (!tmp)		/* Are any bits set? */
+		return result + size;	/* Nope. */
+found:
+	return result + (__fls(tmp) ^ (BITS_PER_LONG - 1));
+}
+EXPORT_SYMBOL(find_first_bit_inv);
+
+unsigned long find_next_bit_inv(const unsigned long *addr, unsigned long size,
+				unsigned long offset)
+{
+	const unsigned long *p = addr + (offset / BITS_PER_LONG);
+	unsigned long result = offset & ~(BITS_PER_LONG - 1);
+	unsigned long tmp;
+
+	if (offset >= size)
+		return size;
+	size -= result;
+	offset %= BITS_PER_LONG;
+	if (offset) {
+		tmp = *(p++);
+		tmp &= (~0UL >> offset);
+		if (size < BITS_PER_LONG)
+			goto found_first;
+		if (tmp)
+			goto found_middle;
+		size -= BITS_PER_LONG;
+		result += BITS_PER_LONG;
+	}
+	while (size & ~(BITS_PER_LONG-1)) {
+		if ((tmp = *(p++)))
+			goto found_middle;
+		result += BITS_PER_LONG;
+		size -= BITS_PER_LONG;
+	}
+	if (!size)
+		return result;
+	tmp = *p;
+found_first:
+	tmp &= (~0UL << (BITS_PER_LONG - size));
+	if (!tmp)		/* Are any bits set? */
+		return result + size;	/* Nope. */
+found_middle:
+	return result + (__fls(tmp) ^ (BITS_PER_LONG - 1));
+}
+EXPORT_SYMBOL(find_next_bit_inv);
diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S
new file mode 100644
index 0000000000..08f60a42b9
--- /dev/null
+++ b/arch/s390/lib/mem.S
@@ -0,0 +1,197 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * String handling functions.
+ *
+ * Copyright IBM Corp. 2012
+ */
+
+#include <linux/export.h>
+#include <linux/linkage.h>
+#include <asm/nospec-insn.h>
+
+	GEN_BR_THUNK %r14
+
+/*
+ * void *memmove(void *dest, const void *src, size_t n)
+ */
+SYM_FUNC_START(__memmove)
+	ltgr	%r4,%r4
+	lgr	%r1,%r2
+	jz	.Lmemmove_exit
+	aghi	%r4,-1
+	clgr	%r2,%r3
+	jnh	.Lmemmove_forward
+	la	%r5,1(%r4,%r3)
+	clgr	%r2,%r5
+	jl	.Lmemmove_reverse
+.Lmemmove_forward:
+	srlg	%r0,%r4,8
+	ltgr	%r0,%r0
+	jz	.Lmemmove_forward_remainder
+.Lmemmove_forward_loop:
+	mvc	0(256,%r1),0(%r3)
+	la	%r1,256(%r1)
+	la	%r3,256(%r3)
+	brctg	%r0,.Lmemmove_forward_loop
+.Lmemmove_forward_remainder:
+	larl	%r5,.Lmemmove_mvc
+	ex	%r4,0(%r5)
+.Lmemmove_exit:
+	BR_EX	%r14
+.Lmemmove_reverse:
+	ic	%r0,0(%r4,%r3)
+	stc	%r0,0(%r4,%r1)
+	brctg	%r4,.Lmemmove_reverse
+	ic	%r0,0(%r4,%r3)
+	stc	%r0,0(%r4,%r1)
+	BR_EX	%r14
+.Lmemmove_mvc:
+	mvc	0(1,%r1),0(%r3)
+SYM_FUNC_END(__memmove)
+EXPORT_SYMBOL(__memmove)
+
+SYM_FUNC_ALIAS(memmove, __memmove)
+EXPORT_SYMBOL(memmove)
+
+/*
+ * memset implementation
+ *
+ * This code corresponds to the C construct below. We do distinguish
+ * between clearing (c == 0) and setting a memory array (c != 0) simply
+ * because nearly all memset invocations in the kernel clear memory and
+ * the xc instruction is preferred in such cases.
+ *
+ * void *memset(void *s, int c, size_t n)
+ * {
+ *	if (likely(c == 0))
+ *		return __builtin_memset(s, 0, n);
+ *	return __builtin_memset(s, c, n);
+ * }
+ */
+SYM_FUNC_START(__memset)
+	ltgr	%r4,%r4
+	jz	.Lmemset_exit
+	ltgr	%r3,%r3
+	jnz	.Lmemset_fill
+	aghi	%r4,-1
+	srlg	%r3,%r4,8
+	ltgr	%r3,%r3
+	lgr	%r1,%r2
+	jz	.Lmemset_clear_remainder
+.Lmemset_clear_loop:
+	xc	0(256,%r1),0(%r1)
+	la	%r1,256(%r1)
+	brctg	%r3,.Lmemset_clear_loop
+.Lmemset_clear_remainder:
+	larl	%r3,.Lmemset_xc
+	ex	%r4,0(%r3)
+.Lmemset_exit:
+	BR_EX	%r14
+.Lmemset_fill:
+	cghi	%r4,1
+	lgr	%r1,%r2
+	je	.Lmemset_fill_exit
+	aghi	%r4,-2
+	srlg	%r5,%r4,8
+	ltgr	%r5,%r5
+	jz	.Lmemset_fill_remainder
+.Lmemset_fill_loop:
+	stc	%r3,0(%r1)
+	mvc	1(255,%r1),0(%r1)
+	la	%r1,256(%r1)
+	brctg	%r5,.Lmemset_fill_loop
+.Lmemset_fill_remainder:
+	stc	%r3,0(%r1)
+	larl	%r5,.Lmemset_mvc
+	ex	%r4,0(%r5)
+	BR_EX	%r14
+.Lmemset_fill_exit:
+	stc	%r3,0(%r1)
+	BR_EX	%r14
+.Lmemset_xc:
+	xc	0(1,%r1),0(%r1)
+.Lmemset_mvc:
+	mvc	1(1,%r1),0(%r1)
+SYM_FUNC_END(__memset)
+EXPORT_SYMBOL(__memset)
+
+SYM_FUNC_ALIAS(memset, __memset)
+EXPORT_SYMBOL(memset)
+
+/*
+ * memcpy implementation
+ *
+ * void *memcpy(void *dest, const void *src, size_t n)
+ */
+SYM_FUNC_START(__memcpy)
+	ltgr	%r4,%r4
+	jz	.Lmemcpy_exit
+	aghi	%r4,-1
+	srlg	%r5,%r4,8
+	ltgr	%r5,%r5
+	lgr	%r1,%r2
+	jnz	.Lmemcpy_loop
+.Lmemcpy_remainder:
+	larl	%r5,.Lmemcpy_mvc
+	ex	%r4,0(%r5)
+.Lmemcpy_exit:
+	BR_EX	%r14
+.Lmemcpy_loop:
+	mvc	0(256,%r1),0(%r3)
+	la	%r1,256(%r1)
+	la	%r3,256(%r3)
+	brctg	%r5,.Lmemcpy_loop
+	j	.Lmemcpy_remainder
+.Lmemcpy_mvc:
+	mvc	0(1,%r1),0(%r3)
+SYM_FUNC_END(__memcpy)
+EXPORT_SYMBOL(__memcpy)
+
+SYM_FUNC_ALIAS(memcpy, __memcpy)
+EXPORT_SYMBOL(memcpy)
+
+/*
+ * __memset16/32/64
+ *
+ * void *__memset16(uint16_t *s, uint16_t v, size_t count)
+ * void *__memset32(uint32_t *s, uint32_t v, size_t count)
+ * void *__memset64(uint64_t *s, uint64_t v, size_t count)
+ */
+.macro __MEMSET bits,bytes,insn
+SYM_FUNC_START(__memset\bits)
+	ltgr	%r4,%r4
+	jz	.L__memset_exit\bits
+	cghi	%r4,\bytes
+	je	.L__memset_store\bits
+	aghi	%r4,-(\bytes+1)
+	srlg	%r5,%r4,8
+	ltgr	%r5,%r5
+	lgr	%r1,%r2
+	jz	.L__memset_remainder\bits
+.L__memset_loop\bits:
+	\insn	%r3,0(%r1)
+	mvc	\bytes(256-\bytes,%r1),0(%r1)
+	la	%r1,256(%r1)
+	brctg	%r5,.L__memset_loop\bits
+.L__memset_remainder\bits:
+	\insn	%r3,0(%r1)
+	larl	%r5,.L__memset_mvc\bits
+	ex	%r4,0(%r5)
+	BR_EX	%r14
+.L__memset_store\bits:
+	\insn	%r3,0(%r2)
+.L__memset_exit\bits:
+	BR_EX	%r14
+.L__memset_mvc\bits:
+	mvc	\bytes(1,%r1),0(%r1)
+SYM_FUNC_END(__memset\bits)
+.endm
+
+__MEMSET 16,2,sth
+EXPORT_SYMBOL(__memset16)
+
+__MEMSET 32,4,st
+EXPORT_SYMBOL(__memset32)
+
+__MEMSET 64,8,stg
+EXPORT_SYMBOL(__memset64)
diff --git a/arch/s390/lib/probes.c b/arch/s390/lib/probes.c
new file mode 100644
index 0000000000..1e184a0344
--- /dev/null
+++ b/arch/s390/lib/probes.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    Common helper functions for kprobes and uprobes
+ *
+ *    Copyright IBM Corp. 2014
+ */
+
+#include <linux/errno.h>
+#include <asm/kprobes.h>
+#include <asm/dis.h>
+
+int probe_is_prohibited_opcode(u16 *insn)
+{
+	if (!is_known_insn((unsigned char *)insn))
+		return -EINVAL;
+	switch (insn[0] >> 8) {
+	case 0x0c:	/* bassm */
+	case 0x0b:	/* bsm	 */
+	case 0x83:	/* diag  */
+	case 0x44:	/* ex	 */
+	case 0xac:	/* stnsm */
+	case 0xad:	/* stosm */
+		return -EINVAL;
+	case 0xc6:
+		switch (insn[0] & 0x0f) {
+		case 0x00: /* exrl   */
+			return -EINVAL;
+		}
+	}
+	switch (insn[0]) {
+	case 0x0101:	/* pr	 */
+	case 0xb25a:	/* bsa	 */
+	case 0xb240:	/* bakr  */
+	case 0xb258:	/* bsg	 */
+	case 0xb218:	/* pc	 */
+	case 0xb228:	/* pt	 */
+	case 0xb98d:	/* epsw	 */
+	case 0xe560:	/* tbegin */
+	case 0xe561:	/* tbeginc */
+	case 0xb2f8:	/* tend	 */
+		return -EINVAL;
+	}
+	return 0;
+}
+
+int probe_get_fixup_type(u16 *insn)
+{
+	/* default fixup method */
+	int fixup = FIXUP_PSW_NORMAL;
+
+	switch (insn[0] >> 8) {
+	case 0x05:	/* balr	*/
+	case 0x0d:	/* basr */
+		fixup = FIXUP_RETURN_REGISTER;
+		/* if r2 = 0, no branch will be taken */
+		if ((insn[0] & 0x0f) == 0)
+			fixup |= FIXUP_BRANCH_NOT_TAKEN;
+		break;
+	case 0x06:	/* bctr	*/
+	case 0x07:	/* bcr	*/
+		fixup = FIXUP_BRANCH_NOT_TAKEN;
+		break;
+	case 0x45:	/* bal	*/
+	case 0x4d:	/* bas	*/
+		fixup = FIXUP_RETURN_REGISTER;
+		break;
+	case 0x47:	/* bc	*/
+	case 0x46:	/* bct	*/
+	case 0x86:	/* bxh	*/
+	case 0x87:	/* bxle	*/
+		fixup = FIXUP_BRANCH_NOT_TAKEN;
+		break;
+	case 0x82:	/* lpsw	*/
+		fixup = FIXUP_NOT_REQUIRED;
+		break;
+	case 0xb2:	/* lpswe */
+		if ((insn[0] & 0xff) == 0xb2)
+			fixup = FIXUP_NOT_REQUIRED;
+		break;
+	case 0xa7:	/* bras	*/
+		if ((insn[0] & 0x0f) == 0x05)
+			fixup |= FIXUP_RETURN_REGISTER;
+		break;
+	case 0xc0:
+		if ((insn[0] & 0x0f) == 0x05)	/* brasl */
+			fixup |= FIXUP_RETURN_REGISTER;
+		break;
+	case 0xeb:
+		switch (insn[2] & 0xff) {
+		case 0x44: /* bxhg  */
+		case 0x45: /* bxleg */
+			fixup = FIXUP_BRANCH_NOT_TAKEN;
+			break;
+		}
+		break;
+	case 0xe3:	/* bctg	*/
+		if ((insn[2] & 0xff) == 0x46)
+			fixup = FIXUP_BRANCH_NOT_TAKEN;
+		break;
+	case 0xec:
+		switch (insn[2] & 0xff) {
+		case 0xe5: /* clgrb */
+		case 0xe6: /* cgrb  */
+		case 0xf6: /* crb   */
+		case 0xf7: /* clrb  */
+		case 0xfc: /* cgib  */
+		case 0xfd: /* cglib */
+		case 0xfe: /* cib   */
+		case 0xff: /* clib  */
+			fixup = FIXUP_BRANCH_NOT_TAKEN;
+			break;
+		}
+		break;
+	}
+	return fixup;
+}
+
+int probe_is_insn_relative_long(u16 *insn)
+{
+	/* Check if we have a RIL-b or RIL-c format instruction which
+	 * we need to modify in order to avoid instruction emulation. */
+	switch (insn[0] >> 8) {
+	case 0xc0:
+		if ((insn[0] & 0x0f) == 0x00) /* larl */
+			return true;
+		break;
+	case 0xc4:
+		switch (insn[0] & 0x0f) {
+		case 0x02: /* llhrl  */
+		case 0x04: /* lghrl  */
+		case 0x05: /* lhrl   */
+		case 0x06: /* llghrl */
+		case 0x07: /* sthrl  */
+		case 0x08: /* lgrl   */
+		case 0x0b: /* stgrl  */
+		case 0x0c: /* lgfrl  */
+		case 0x0d: /* lrl    */
+		case 0x0e: /* llgfrl */
+		case 0x0f: /* strl   */
+			return true;
+		}
+		break;
+	case 0xc6:
+		switch (insn[0] & 0x0f) {
+		case 0x02: /* pfdrl  */
+		case 0x04: /* cghrl  */
+		case 0x05: /* chrl   */
+		case 0x06: /* clghrl */
+		case 0x07: /* clhrl  */
+		case 0x08: /* cgrl   */
+		case 0x0a: /* clgrl  */
+		case 0x0c: /* cgfrl  */
+		case 0x0d: /* crl    */
+		case 0x0e: /* clgfrl */
+		case 0x0f: /* clrl   */
+			return true;
+		}
+		break;
+	}
+	return false;
+}
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
new file mode 100644
index 0000000000..81c53440b3
--- /dev/null
+++ b/arch/s390/lib/spinlock.c
@@ -0,0 +1,324 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    Out of line spinlock code.
+ *
+ *    Copyright IBM Corp. 2004, 2006
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+
+#include <linux/types.h>
+#include <linux/export.h>
+#include <linux/spinlock.h>
+#include <linux/jiffies.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/percpu.h>
+#include <linux/io.h>
+#include <asm/alternative.h>
+
+int spin_retry = -1;
+
+static int __init spin_retry_init(void)
+{
+	if (spin_retry < 0)
+		spin_retry = 1000;
+	return 0;
+}
+early_initcall(spin_retry_init);
+
+/*
+ * spin_retry= parameter
+ */
+static int __init spin_retry_setup(char *str)
+{
+	spin_retry = simple_strtoul(str, &str, 0);
+	return 1;
+}
+__setup("spin_retry=", spin_retry_setup);
+
+struct spin_wait {
+	struct spin_wait *next, *prev;
+	int node_id;
+} __aligned(32);
+
+static DEFINE_PER_CPU_ALIGNED(struct spin_wait, spin_wait[4]);
+
+#define _Q_LOCK_CPU_OFFSET	0
+#define _Q_LOCK_STEAL_OFFSET	16
+#define _Q_TAIL_IDX_OFFSET	18
+#define _Q_TAIL_CPU_OFFSET	20
+
+#define _Q_LOCK_CPU_MASK	0x0000ffff
+#define _Q_LOCK_STEAL_ADD	0x00010000
+#define _Q_LOCK_STEAL_MASK	0x00030000
+#define _Q_TAIL_IDX_MASK	0x000c0000
+#define _Q_TAIL_CPU_MASK	0xfff00000
+
+#define _Q_LOCK_MASK		(_Q_LOCK_CPU_MASK | _Q_LOCK_STEAL_MASK)
+#define _Q_TAIL_MASK		(_Q_TAIL_IDX_MASK | _Q_TAIL_CPU_MASK)
+
+void arch_spin_lock_setup(int cpu)
+{
+	struct spin_wait *node;
+	int ix;
+
+	node = per_cpu_ptr(&spin_wait[0], cpu);
+	for (ix = 0; ix < 4; ix++, node++) {
+		memset(node, 0, sizeof(*node));
+		node->node_id = ((cpu + 1) << _Q_TAIL_CPU_OFFSET) +
+			(ix << _Q_TAIL_IDX_OFFSET);
+	}
+}
+
+static inline int arch_load_niai4(int *lock)
+{
+	int owner;
+
+	asm_inline volatile(
+		ALTERNATIVE("nop", ".insn rre,0xb2fa0000,4,0", 49) /* NIAI 4 */
+		"	l	%0,%1\n"
+		: "=d" (owner) : "Q" (*lock) : "memory");
+	return owner;
+}
+
+static inline int arch_cmpxchg_niai8(int *lock, int old, int new)
+{
+	int expected = old;
+
+	asm_inline volatile(
+		ALTERNATIVE("nop", ".insn rre,0xb2fa0000,8,0", 49) /* NIAI 8 */
+		"	cs	%0,%3,%1\n"
+		: "=d" (old), "=Q" (*lock)
+		: "0" (old), "d" (new), "Q" (*lock)
+		: "cc", "memory");
+	return expected == old;
+}
+
+static inline struct spin_wait *arch_spin_decode_tail(int lock)
+{
+	int ix, cpu;
+
+	ix = (lock & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;
+	cpu = (lock & _Q_TAIL_CPU_MASK) >> _Q_TAIL_CPU_OFFSET;
+	return per_cpu_ptr(&spin_wait[ix], cpu - 1);
+}
+
+static inline int arch_spin_yield_target(int lock, struct spin_wait *node)
+{
+	if (lock & _Q_LOCK_CPU_MASK)
+		return lock & _Q_LOCK_CPU_MASK;
+	if (node == NULL || node->prev == NULL)
+		return 0;	/* 0 -> no target cpu */
+	while (node->prev)
+		node = node->prev;
+	return node->node_id >> _Q_TAIL_CPU_OFFSET;
+}
+
+static inline void arch_spin_lock_queued(arch_spinlock_t *lp)
+{
+	struct spin_wait *node, *next;
+	int lockval, ix, node_id, tail_id, old, new, owner, count;
+
+	ix = S390_lowcore.spinlock_index++;
+	barrier();
+	lockval = SPINLOCK_LOCKVAL;	/* cpu + 1 */
+	node = this_cpu_ptr(&spin_wait[ix]);
+	node->prev = node->next = NULL;
+	node_id = node->node_id;
+
+	/* Enqueue the node for this CPU in the spinlock wait queue */
+	while (1) {
+		old = READ_ONCE(lp->lock);
+		if ((old & _Q_LOCK_CPU_MASK) == 0 &&
+		    (old & _Q_LOCK_STEAL_MASK) != _Q_LOCK_STEAL_MASK) {
+			/*
+			 * The lock is free but there may be waiters.
+			 * With no waiters simply take the lock, if there
+			 * are waiters try to steal the lock. The lock may
+			 * be stolen three times before the next queued
+			 * waiter will get the lock.
+			 */
+			new = (old ? (old + _Q_LOCK_STEAL_ADD) : 0) | lockval;
+			if (__atomic_cmpxchg_bool(&lp->lock, old, new))
+				/* Got the lock */
+				goto out;
+			/* lock passing in progress */
+			continue;
+		}
+		/* Make the node of this CPU the new tail. */
+		new = node_id | (old & _Q_LOCK_MASK);
+		if (__atomic_cmpxchg_bool(&lp->lock, old, new))
+			break;
+	}
+	/* Set the 'next' pointer of the tail node in the queue */
+	tail_id = old & _Q_TAIL_MASK;
+	if (tail_id != 0) {
+		node->prev = arch_spin_decode_tail(tail_id);
+		WRITE_ONCE(node->prev->next, node);
+	}
+
+	/* Pass the virtual CPU to the lock holder if it is not running */
+	owner = arch_spin_yield_target(old, node);
+	if (owner && arch_vcpu_is_preempted(owner - 1))
+		smp_yield_cpu(owner - 1);
+
+	/* Spin on the CPU local node->prev pointer */
+	if (tail_id != 0) {
+		count = spin_retry;
+		while (READ_ONCE(node->prev) != NULL) {
+			if (count-- >= 0)
+				continue;
+			count = spin_retry;
+			/* Query running state of lock holder again. */
+			owner = arch_spin_yield_target(old, node);
+			if (owner && arch_vcpu_is_preempted(owner - 1))
+				smp_yield_cpu(owner - 1);
+		}
+	}
+
+	/* Spin on the lock value in the spinlock_t */
+	count = spin_retry;
+	while (1) {
+		old = READ_ONCE(lp->lock);
+		owner = old & _Q_LOCK_CPU_MASK;
+		if (!owner) {
+			tail_id = old & _Q_TAIL_MASK;
+			new = ((tail_id != node_id) ? tail_id : 0) | lockval;
+			if (__atomic_cmpxchg_bool(&lp->lock, old, new))
+				/* Got the lock */
+				break;
+			continue;
+		}
+		if (count-- >= 0)
+			continue;
+		count = spin_retry;
+		if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(owner - 1))
+			smp_yield_cpu(owner - 1);
+	}
+
+	/* Pass lock_spin job to next CPU in the queue */
+	if (node_id && tail_id != node_id) {
+		/* Wait until the next CPU has set up the 'next' pointer */
+		while ((next = READ_ONCE(node->next)) == NULL)
+			;
+		next->prev = NULL;
+	}
+
+ out:
+	S390_lowcore.spinlock_index--;
+}
+
+static inline void arch_spin_lock_classic(arch_spinlock_t *lp)
+{
+	int lockval, old, new, owner, count;
+
+	lockval = SPINLOCK_LOCKVAL;	/* cpu + 1 */
+
+	/* Pass the virtual CPU to the lock holder if it is not running */
+	owner = arch_spin_yield_target(READ_ONCE(lp->lock), NULL);
+	if (owner && arch_vcpu_is_preempted(owner - 1))
+		smp_yield_cpu(owner - 1);
+
+	count = spin_retry;
+	while (1) {
+		old = arch_load_niai4(&lp->lock);
+		owner = old & _Q_LOCK_CPU_MASK;
+		/* Try to get the lock if it is free. */
+		if (!owner) {
+			new = (old & _Q_TAIL_MASK) | lockval;
+			if (arch_cmpxchg_niai8(&lp->lock, old, new)) {
+				/* Got the lock */
+				return;
+			}
+			continue;
+		}
+		if (count-- >= 0)
+			continue;
+		count = spin_retry;
+		if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(owner - 1))
+			smp_yield_cpu(owner - 1);
+	}
+}
+
+void arch_spin_lock_wait(arch_spinlock_t *lp)
+{
+	if (test_cpu_flag(CIF_DEDICATED_CPU))
+		arch_spin_lock_queued(lp);
+	else
+		arch_spin_lock_classic(lp);
+}
+EXPORT_SYMBOL(arch_spin_lock_wait);
+
+int arch_spin_trylock_retry(arch_spinlock_t *lp)
+{
+	int cpu = SPINLOCK_LOCKVAL;
+	int owner, count;
+
+	for (count = spin_retry; count > 0; count--) {
+		owner = READ_ONCE(lp->lock);
+		/* Try to get the lock if it is free. */
+		if (!owner) {
+			if (__atomic_cmpxchg_bool(&lp->lock, 0, cpu))
+				return 1;
+		}
+	}
+	return 0;
+}
+EXPORT_SYMBOL(arch_spin_trylock_retry);
+
+void arch_read_lock_wait(arch_rwlock_t *rw)
+{
+	if (unlikely(in_interrupt())) {
+		while (READ_ONCE(rw->cnts) & 0x10000)
+			barrier();
+		return;
+	}
+
+	/* Remove this reader again to allow recursive read locking */
+	__atomic_add_const(-1, &rw->cnts);
+	/* Put the reader into the wait queue */
+	arch_spin_lock(&rw->wait);
+	/* Now add this reader to the count value again */
+	__atomic_add_const(1, &rw->cnts);
+	/* Loop until the writer is done */
+	while (READ_ONCE(rw->cnts) & 0x10000)
+		barrier();
+	arch_spin_unlock(&rw->wait);
+}
+EXPORT_SYMBOL(arch_read_lock_wait);
+
+void arch_write_lock_wait(arch_rwlock_t *rw)
+{
+	int old;
+
+	/* Add this CPU to the write waiters */
+	__atomic_add(0x20000, &rw->cnts);
+
+	/* Put the writer into the wait queue */
+	arch_spin_lock(&rw->wait);
+
+	while (1) {
+		old = READ_ONCE(rw->cnts);
+		if ((old & 0x1ffff) == 0 &&
+		    __atomic_cmpxchg_bool(&rw->cnts, old, old | 0x10000))
+			/* Got the lock */
+			break;
+		barrier();
+	}
+
+	arch_spin_unlock(&rw->wait);
+}
+EXPORT_SYMBOL(arch_write_lock_wait);
+
+void arch_spin_relax(arch_spinlock_t *lp)
+{
+	int cpu;
+
+	cpu = READ_ONCE(lp->lock) & _Q_LOCK_CPU_MASK;
+	if (!cpu)
+		return;
+	if (MACHINE_IS_LPAR && !arch_vcpu_is_preempted(cpu - 1))
+		return;
+	smp_yield_cpu(cpu - 1);
+}
+EXPORT_SYMBOL(arch_spin_relax);
diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c
new file mode 100644
index 0000000000..7d87418182
--- /dev/null
+++ b/arch/s390/lib/string.c
@@ -0,0 +1,348 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    Optimized string functions
+ *
+ *  S390 version
+ *    Copyright IBM Corp. 2004
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+
+#define IN_ARCH_STRING_C 1
+#ifndef __NO_FORTIFY
+# define __NO_FORTIFY
+#endif
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/export.h>
+
+/*
+ * Helper functions to find the end of a string
+ */
+static inline char *__strend(const char *s)
+{
+	unsigned long e = 0;
+
+	asm volatile(
+		"	lghi	0,0\n"
+		"0:	srst	%[e],%[s]\n"
+		"	jo	0b\n"
+		: [e] "+&a" (e), [s] "+&a" (s)
+		:
+		: "cc", "memory", "0");
+	return (char *)e;
+}
+
+static inline char *__strnend(const char *s, size_t n)
+{
+	const char *p = s + n;
+
+	asm volatile(
+		"	lghi	0,0\n"
+		"0:	srst	%[p],%[s]\n"
+		"	jo	0b\n"
+		: [p] "+&d" (p), [s] "+&a" (s)
+		:
+		: "cc", "memory", "0");
+	return (char *)p;
+}
+
+/**
+ * strlen - Find the length of a string
+ * @s: The string to be sized
+ *
+ * returns the length of @s
+ */
+#ifdef __HAVE_ARCH_STRLEN
+size_t strlen(const char *s)
+{
+	return __strend(s) - s;
+}
+EXPORT_SYMBOL(strlen);
+#endif
+
+/**
+ * strnlen - Find the length of a length-limited string
+ * @s: The string to be sized
+ * @n: The maximum number of bytes to search
+ *
+ * returns the minimum of the length of @s and @n
+ */
+#ifdef __HAVE_ARCH_STRNLEN
+size_t strnlen(const char *s, size_t n)
+{
+	return __strnend(s, n) - s;
+}
+EXPORT_SYMBOL(strnlen);
+#endif
+
+/**
+ * strcpy - Copy a %NUL terminated string
+ * @dest: Where to copy the string to
+ * @src: Where to copy the string from
+ *
+ * returns a pointer to @dest
+ */
+#ifdef __HAVE_ARCH_STRCPY
+char *strcpy(char *dest, const char *src)
+{
+	char *ret = dest;
+
+	asm volatile(
+		"	lghi	0,0\n"
+		"0:	mvst	%[dest],%[src]\n"
+		"	jo	0b\n"
+		: [dest] "+&a" (dest), [src] "+&a" (src)
+		:
+		: "cc", "memory", "0");
+	return ret;
+}
+EXPORT_SYMBOL(strcpy);
+#endif
+
+/**
+ * strncpy - Copy a length-limited, %NUL-terminated string
+ * @dest: Where to copy the string to
+ * @src: Where to copy the string from
+ * @n: The maximum number of bytes to copy
+ *
+ * The result is not %NUL-terminated if the source exceeds
+ * @n bytes.
+ */
+#ifdef __HAVE_ARCH_STRNCPY
+char *strncpy(char *dest, const char *src, size_t n)
+{
+	size_t len = __strnend(src, n) - src;
+	memset(dest + len, 0, n - len);
+	memcpy(dest, src, len);
+	return dest;
+}
+EXPORT_SYMBOL(strncpy);
+#endif
+
+/**
+ * strcat - Append one %NUL-terminated string to another
+ * @dest: The string to be appended to
+ * @src: The string to append to it
+ *
+ * returns a pointer to @dest
+ */
+#ifdef __HAVE_ARCH_STRCAT
+char *strcat(char *dest, const char *src)
+{
+	unsigned long dummy = 0;
+	char *ret = dest;
+
+	asm volatile(
+		"	lghi	0,0\n"
+		"0:	srst	%[dummy],%[dest]\n"
+		"	jo	0b\n"
+		"1:	mvst	%[dummy],%[src]\n"
+		"	jo	1b\n"
+		: [dummy] "+&a" (dummy), [dest] "+&a" (dest), [src] "+&a" (src)
+		:
+		: "cc", "memory", "0");
+	return ret;
+}
+EXPORT_SYMBOL(strcat);
+#endif
+
+/**
+ * strlcat - Append a length-limited, %NUL-terminated string to another
+ * @dest: The string to be appended to
+ * @src: The string to append to it
+ * @n: The size of the destination buffer.
+ */
+#ifdef __HAVE_ARCH_STRLCAT
+size_t strlcat(char *dest, const char *src, size_t n)
+{
+	size_t dsize = __strend(dest) - dest;
+	size_t len = __strend(src) - src;
+	size_t res = dsize + len;
+
+	if (dsize < n) {
+		dest += dsize;
+		n -= dsize;
+		if (len >= n)
+			len = n - 1;
+		dest[len] = '\0';
+		memcpy(dest, src, len);
+	}
+	return res;
+}
+EXPORT_SYMBOL(strlcat);
+#endif
+
+/**
+ * strncat - Append a length-limited, %NUL-terminated string to another
+ * @dest: The string to be appended to
+ * @src: The string to append to it
+ * @n: The maximum numbers of bytes to copy
+ *
+ * returns a pointer to @dest
+ *
+ * Note that in contrast to strncpy, strncat ensures the result is
+ * terminated.
+ */
+#ifdef __HAVE_ARCH_STRNCAT
+char *strncat(char *dest, const char *src, size_t n)
+{
+	size_t len = __strnend(src, n) - src;
+	char *p = __strend(dest);
+
+	p[len] = '\0';
+	memcpy(p, src, len);
+	return dest;
+}
+EXPORT_SYMBOL(strncat);
+#endif
+
+/**
+ * strcmp - Compare two strings
+ * @s1: One string
+ * @s2: Another string
+ *
+ * returns   0 if @s1 and @s2 are equal,
+ *	   < 0 if @s1 is less than @s2
+ *	   > 0 if @s1 is greater than @s2
+ */
+#ifdef __HAVE_ARCH_STRCMP
+int strcmp(const char *s1, const char *s2)
+{
+	int ret = 0;
+
+	asm volatile(
+		"	lghi	0,0\n"
+		"0:	clst	%[s1],%[s2]\n"
+		"	jo	0b\n"
+		"	je	1f\n"
+		"	ic	%[ret],0(%[s1])\n"
+		"	ic	0,0(%[s2])\n"
+		"	sr	%[ret],0\n"
+		"1:"
+		: [ret] "+&d" (ret), [s1] "+&a" (s1), [s2] "+&a" (s2)
+		:
+		: "cc", "memory", "0");
+	return ret;
+}
+EXPORT_SYMBOL(strcmp);
+#endif
+
+static inline int clcle(const char *s1, unsigned long l1,
+			const char *s2, unsigned long l2)
+{
+	union register_pair r1 = { .even = (unsigned long)s1, .odd = l1, };
+	union register_pair r3 = { .even = (unsigned long)s2, .odd = l2, };
+	int cc;
+
+	asm volatile(
+		"0:	clcle	%[r1],%[r3],0\n"
+		"	jo	0b\n"
+		"	ipm	%[cc]\n"
+		"	srl	%[cc],28\n"
+		: [cc] "=&d" (cc), [r1] "+&d" (r1.pair), [r3] "+&d" (r3.pair)
+		:
+		: "cc", "memory");
+	return cc;
+}
+
+/**
+ * strstr - Find the first substring in a %NUL terminated string
+ * @s1: The string to be searched
+ * @s2: The string to search for
+ */
+#ifdef __HAVE_ARCH_STRSTR
+char *strstr(const char *s1, const char *s2)
+{
+	int l1, l2;
+
+	l2 = __strend(s2) - s2;
+	if (!l2)
+		return (char *) s1;
+	l1 = __strend(s1) - s1;
+	while (l1-- >= l2) {
+		int cc;
+
+		cc = clcle(s1, l2, s2, l2);
+		if (!cc)
+			return (char *) s1;
+		s1++;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(strstr);
+#endif
+
+/**
+ * memchr - Find a character in an area of memory.
+ * @s: The memory area
+ * @c: The byte to search for
+ * @n: The size of the area.
+ *
+ * returns the address of the first occurrence of @c, or %NULL
+ * if @c is not found
+ */
+#ifdef __HAVE_ARCH_MEMCHR
+void *memchr(const void *s, int c, size_t n)
+{
+	const void *ret = s + n;
+
+	asm volatile(
+		"	lgr	0,%[c]\n"
+		"0:	srst	%[ret],%[s]\n"
+		"	jo	0b\n"
+		"	jl	1f\n"
+		"	la	%[ret],0\n"
+		"1:"
+		: [ret] "+&a" (ret), [s] "+&a" (s)
+		: [c] "d" (c)
+		: "cc", "memory", "0");
+	return (void *) ret;
+}
+EXPORT_SYMBOL(memchr);
+#endif
+
+/**
+ * memcmp - Compare two areas of memory
+ * @s1: One area of memory
+ * @s2: Another area of memory
+ * @n: The size of the area.
+ */
+#ifdef __HAVE_ARCH_MEMCMP
+int memcmp(const void *s1, const void *s2, size_t n)
+{
+	int ret;
+
+	ret = clcle(s1, n, s2, n);
+	if (ret)
+		ret = ret == 1 ? -1 : 1;
+	return ret;
+}
+EXPORT_SYMBOL(memcmp);
+#endif
+
+/**
+ * memscan - Find a character in an area of memory.
+ * @s: The memory area
+ * @c: The byte to search for
+ * @n: The size of the area.
+ *
+ * returns the address of the first occurrence of @c, or 1 byte past
+ * the area if @c is not found
+ */
+#ifdef __HAVE_ARCH_MEMSCAN
+void *memscan(void *s, int c, size_t n)
+{
+	const void *ret = s + n;
+
+	asm volatile(
+		"	lgr	0,%[c]\n"
+		"0:	srst	%[ret],%[s]\n"
+		"	jo	0b\n"
+		: [ret] "+&a" (ret), [s] "+&a" (s)
+		: [c] "d" (c)
+		: "cc", "memory", "0");
+	return (void *)ret;
+}
+EXPORT_SYMBOL(memscan);
+#endif
diff --git a/arch/s390/lib/test_kprobes.c b/arch/s390/lib/test_kprobes.c
new file mode 100644
index 0000000000..9e62d62812
--- /dev/null
+++ b/arch/s390/lib/test_kprobes.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+#include <linux/random.h>
+#include <kunit/test.h>
+#include "test_kprobes.h"
+
+static struct kprobe kp;
+
+static void setup_kprobe(struct kunit *test, struct kprobe *kp,
+			 const char *symbol, int offset)
+{
+	kp->offset = offset;
+	kp->addr = NULL;
+	kp->symbol_name = symbol;
+}
+
+static void test_kprobe_offset(struct kunit *test, struct kprobe *kp,
+			       const char *target, int offset)
+{
+	int ret;
+
+	setup_kprobe(test, kp, target, 0);
+	ret = register_kprobe(kp);
+	if (!ret)
+		unregister_kprobe(kp);
+	KUNIT_EXPECT_EQ(test, 0, ret);
+	setup_kprobe(test, kp, target, offset);
+	ret = register_kprobe(kp);
+	KUNIT_EXPECT_EQ(test, -EINVAL, ret);
+	if (!ret)
+		unregister_kprobe(kp);
+}
+
+static void test_kprobe_odd(struct kunit *test)
+{
+	test_kprobe_offset(test, &kp, "kprobes_target_odd",
+			   kprobes_target_odd_offs);
+}
+
+static void test_kprobe_in_insn4(struct kunit *test)
+{
+	test_kprobe_offset(test, &kp, "kprobes_target_in_insn4",
+			   kprobes_target_in_insn4_offs);
+}
+
+static void test_kprobe_in_insn6_lo(struct kunit *test)
+{
+	test_kprobe_offset(test, &kp, "kprobes_target_in_insn6_lo",
+			   kprobes_target_in_insn6_lo_offs);
+}
+
+static void test_kprobe_in_insn6_hi(struct kunit *test)
+{
+	test_kprobe_offset(test, &kp, "kprobes_target_in_insn6_hi",
+			   kprobes_target_in_insn6_hi_offs);
+}
+
+static struct kunit_case kprobes_testcases[] = {
+	KUNIT_CASE(test_kprobe_odd),
+	KUNIT_CASE(test_kprobe_in_insn4),
+	KUNIT_CASE(test_kprobe_in_insn6_lo),
+	KUNIT_CASE(test_kprobe_in_insn6_hi),
+	{}
+};
+
+static struct kunit_suite kprobes_test_suite = {
+	.name = "kprobes_test_s390",
+	.test_cases = kprobes_testcases,
+};
+
+kunit_test_suites(&kprobes_test_suite);
+
+MODULE_LICENSE("GPL");
diff --git a/arch/s390/lib/test_kprobes.h b/arch/s390/lib/test_kprobes.h
new file mode 100644
index 0000000000..2b4c9bc337
--- /dev/null
+++ b/arch/s390/lib/test_kprobes.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#ifndef TEST_KPROBES_H
+#define TEST_KPROBES_H
+
+extern unsigned long kprobes_target_odd_offs;
+extern unsigned long kprobes_target_in_insn4_offs;
+extern unsigned long kprobes_target_in_insn6_lo_offs;
+extern unsigned long kprobes_target_in_insn6_hi_offs;
+
+#endif
diff --git a/arch/s390/lib/test_kprobes_asm.S b/arch/s390/lib/test_kprobes_asm.S
new file mode 100644
index 0000000000..ade7a30423
--- /dev/null
+++ b/arch/s390/lib/test_kprobes_asm.S
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#include <linux/linkage.h>
+#include <asm/ftrace.h>
+
+#define KPROBES_TARGET_START(name)	\
+	SYM_FUNC_START(name);		\
+	FTRACE_GEN_NOP_ASM(name)
+
+#define KPROBES_TARGET_END(name)	\
+	SYM_FUNC_END(name);		\
+	SYM_DATA(name##_offs, .quad 1b - name)
+
+KPROBES_TARGET_START(kprobes_target_in_insn4)
+	.word 0x4700 // bc 0,0
+1:	.word 0x0000
+	br %r14
+KPROBES_TARGET_END(kprobes_target_in_insn4)
+
+KPROBES_TARGET_START(kprobes_target_in_insn6_lo)
+	.word 0xe310 // ly 1,0
+1:	.word 0x0000
+	.word 0x0058
+	br %r14
+KPROBES_TARGET_END(kprobes_target_in_insn6_lo)
+
+KPROBES_TARGET_START(kprobes_target_in_insn6_hi)
+	.word 0xe310 // ly 1,0
+	.word 0x0000
+1:	.word 0x0058
+	br %r14
+KPROBES_TARGET_END(kprobes_target_in_insn6_hi)
+
+KPROBES_TARGET_START(kprobes_target_bp)
+	nop
+	.word 0x0000
+	nop
+1:	br %r14
+KPROBES_TARGET_END(kprobes_target_bp)
+
+KPROBES_TARGET_START(kprobes_target_odd)
+	.byte 0x07
+1:	.byte 0x07
+	br %r14
+KPROBES_TARGET_END(kprobes_target_odd)
diff --git a/arch/s390/lib/test_modules.c b/arch/s390/lib/test_modules.c
new file mode 100644
index 0000000000..9894009fc1
--- /dev/null
+++ b/arch/s390/lib/test_modules.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <kunit/test.h>
+#include <linux/module.h>
+
+#include "test_modules.h"
+
+/*
+ * Test that modules with many relocations are loaded properly.
+ */
+static void test_modules_many_vmlinux_relocs(struct kunit *test)
+{
+	int result = 0;
+
+#define CALL_RETURN(i) result += test_modules_return_ ## i()
+	REPEAT_10000(CALL_RETURN);
+	KUNIT_ASSERT_EQ(test, result, 49995000);
+}
+
+static struct kunit_case modules_testcases[] = {
+	KUNIT_CASE(test_modules_many_vmlinux_relocs),
+	{}
+};
+
+static struct kunit_suite modules_test_suite = {
+	.name = "modules_test_s390",
+	.test_cases = modules_testcases,
+};
+
+kunit_test_suites(&modules_test_suite);
+
+MODULE_LICENSE("GPL");
diff --git a/arch/s390/lib/test_modules.h b/arch/s390/lib/test_modules.h
new file mode 100644
index 0000000000..6371fcf176
--- /dev/null
+++ b/arch/s390/lib/test_modules.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#ifndef TEST_MODULES_H
+#define TEST_MODULES_H
+
+#define __REPEAT_10000_3(f, x) \
+	f(x ## 0); \
+	f(x ## 1); \
+	f(x ## 2); \
+	f(x ## 3); \
+	f(x ## 4); \
+	f(x ## 5); \
+	f(x ## 6); \
+	f(x ## 7); \
+	f(x ## 8); \
+	f(x ## 9)
+#define __REPEAT_10000_2(f, x) \
+	__REPEAT_10000_3(f, x ## 0); \
+	__REPEAT_10000_3(f, x ## 1); \
+	__REPEAT_10000_3(f, x ## 2); \
+	__REPEAT_10000_3(f, x ## 3); \
+	__REPEAT_10000_3(f, x ## 4); \
+	__REPEAT_10000_3(f, x ## 5); \
+	__REPEAT_10000_3(f, x ## 6); \
+	__REPEAT_10000_3(f, x ## 7); \
+	__REPEAT_10000_3(f, x ## 8); \
+	__REPEAT_10000_3(f, x ## 9)
+#define __REPEAT_10000_1(f, x) \
+	__REPEAT_10000_2(f, x ## 0); \
+	__REPEAT_10000_2(f, x ## 1); \
+	__REPEAT_10000_2(f, x ## 2); \
+	__REPEAT_10000_2(f, x ## 3); \
+	__REPEAT_10000_2(f, x ## 4); \
+	__REPEAT_10000_2(f, x ## 5); \
+	__REPEAT_10000_2(f, x ## 6); \
+	__REPEAT_10000_2(f, x ## 7); \
+	__REPEAT_10000_2(f, x ## 8); \
+	__REPEAT_10000_2(f, x ## 9)
+#define REPEAT_10000(f) \
+	__REPEAT_10000_1(f, 0); \
+	__REPEAT_10000_1(f, 1); \
+	__REPEAT_10000_1(f, 2); \
+	__REPEAT_10000_1(f, 3); \
+	__REPEAT_10000_1(f, 4); \
+	__REPEAT_10000_1(f, 5); \
+	__REPEAT_10000_1(f, 6); \
+	__REPEAT_10000_1(f, 7); \
+	__REPEAT_10000_1(f, 8); \
+	__REPEAT_10000_1(f, 9)
+
+#define DECLARE_RETURN(i) int test_modules_return_ ## i(void)
+REPEAT_10000(DECLARE_RETURN);
+
+#endif
diff --git a/arch/s390/lib/test_modules_helpers.c b/arch/s390/lib/test_modules_helpers.c
new file mode 100644
index 0000000000..1670349a03
--- /dev/null
+++ b/arch/s390/lib/test_modules_helpers.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <linux/export.h>
+
+#include "test_modules.h"
+
+#define DEFINE_RETURN(i) \
+	int test_modules_return_ ## i(void) \
+	{ \
+		return 1 ## i - 10000; \
+	} \
+	EXPORT_SYMBOL_GPL(test_modules_return_ ## i)
+REPEAT_10000(DEFINE_RETURN);
diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c
new file mode 100644
index 0000000000..7231bf97b9
--- /dev/null
+++ b/arch/s390/lib/test_unwind.c
@@ -0,0 +1,522 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test module for unwind_for_each_frame
+ */
+
+#include <kunit/test.h>
+#include <asm/unwind.h>
+#include <linux/completion.h>
+#include <linux/kallsyms.h>
+#include <linux/kthread.h>
+#include <linux/ftrace.h>
+#include <linux/module.h>
+#include <linux/timer.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/kprobes.h>
+#include <linux/wait.h>
+#include <asm/irq.h>
+
+static struct kunit *current_test;
+
+#define BT_BUF_SIZE (PAGE_SIZE * 4)
+
+static bool force_bt;
+module_param_named(backtrace, force_bt, bool, 0444);
+MODULE_PARM_DESC(backtrace, "print backtraces for all tests");
+
+/*
+ * To avoid printk line limit split backtrace by lines
+ */
+static void print_backtrace(char *bt)
+{
+	char *p;
+
+	while (true) {
+		p = strsep(&bt, "\n");
+		if (!p)
+			break;
+		kunit_err(current_test, "%s\n", p);
+	}
+}
+
+/*
+ * Calls unwind_for_each_frame(task, regs, sp) and verifies that the result
+ * contains unwindme_func2 followed by unwindme_func1.
+ */
+static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs,
+				unsigned long sp)
+{
+	int frame_count, prev_is_func2, seen_func2_func1, seen_arch_rethook_trampoline;
+	const int max_frames = 128;
+	struct unwind_state state;
+	size_t bt_pos = 0;
+	int ret = 0;
+	char *bt;
+
+	bt = kmalloc(BT_BUF_SIZE, GFP_ATOMIC);
+	if (!bt) {
+		kunit_err(current_test, "failed to allocate backtrace buffer\n");
+		return -ENOMEM;
+	}
+	/* Unwind. */
+	frame_count = 0;
+	prev_is_func2 = 0;
+	seen_func2_func1 = 0;
+	seen_arch_rethook_trampoline = 0;
+	unwind_for_each_frame(&state, task, regs, sp) {
+		unsigned long addr = unwind_get_return_address(&state);
+		char sym[KSYM_SYMBOL_LEN];
+
+		if (frame_count++ == max_frames)
+			break;
+		if (state.reliable && !addr) {
+			kunit_err(current_test, "unwind state reliable but addr is 0\n");
+			ret = -EINVAL;
+			break;
+		}
+		sprint_symbol(sym, addr);
+		if (bt_pos < BT_BUF_SIZE) {
+			bt_pos += snprintf(bt + bt_pos, BT_BUF_SIZE - bt_pos,
+					   state.reliable ? " [%-7s%px] %pSR\n" :
+							    "([%-7s%px] %pSR)\n",
+					   stack_type_name(state.stack_info.type),
+					   (void *)state.sp, (void *)state.ip);
+			if (bt_pos >= BT_BUF_SIZE)
+				kunit_err(current_test, "backtrace buffer is too small\n");
+		}
+		frame_count += 1;
+		if (prev_is_func2 && str_has_prefix(sym, "unwindme_func1"))
+			seen_func2_func1 = 1;
+		prev_is_func2 = str_has_prefix(sym, "unwindme_func2");
+		if (str_has_prefix(sym, "arch_rethook_trampoline+0x0/"))
+			seen_arch_rethook_trampoline = 1;
+	}
+
+	/* Check the results. */
+	if (unwind_error(&state)) {
+		kunit_err(current_test, "unwind error\n");
+		ret = -EINVAL;
+	}
+	if (!seen_func2_func1) {
+		kunit_err(current_test, "unwindme_func2 and unwindme_func1 not found\n");
+		ret = -EINVAL;
+	}
+	if (frame_count == max_frames) {
+		kunit_err(current_test, "Maximum number of frames exceeded\n");
+		ret = -EINVAL;
+	}
+	if (seen_arch_rethook_trampoline) {
+		kunit_err(current_test, "arch_rethook_trampoline+0x0 in unwinding results\n");
+		ret = -EINVAL;
+	}
+	if (ret || force_bt)
+		print_backtrace(bt);
+	kfree(bt);
+	return ret;
+}
+
+/* State of the task being unwound. */
+struct unwindme {
+	int flags;
+	int ret;
+	struct task_struct *task;
+	struct completion task_ready;
+	wait_queue_head_t task_wq;
+	unsigned long sp;
+};
+
+static struct unwindme *unwindme;
+
+/* Values of unwindme.flags. */
+#define UWM_DEFAULT		0x0
+#define UWM_THREAD		0x1	/* Unwind a separate task. */
+#define UWM_REGS		0x2	/* Pass regs to test_unwind(). */
+#define UWM_SP			0x4	/* Pass sp to test_unwind(). */
+#define UWM_CALLER		0x8	/* Unwind starting from caller. */
+#define UWM_SWITCH_STACK	0x10	/* Use call_on_stack. */
+#define UWM_IRQ			0x20	/* Unwind from irq context. */
+#define UWM_PGM			0x40	/* Unwind from program check handler */
+#define UWM_KPROBE_ON_FTRACE	0x80	/* Unwind from kprobe handler called via ftrace. */
+#define UWM_FTRACE		0x100	/* Unwind from ftrace handler. */
+#define UWM_KRETPROBE		0x200	/* Unwind through kretprobed function. */
+#define UWM_KRETPROBE_HANDLER	0x400	/* Unwind from kretprobe handler. */
+
+static __always_inline struct pt_regs fake_pt_regs(void)
+{
+	struct pt_regs regs;
+
+	memset(&regs, 0, sizeof(regs));
+	regs.gprs[15] = current_stack_pointer;
+
+	asm volatile(
+		"basr	%[psw_addr],0\n"
+		: [psw_addr] "=d" (regs.psw.addr));
+	return regs;
+}
+
+static int kretprobe_ret_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
+{
+	struct unwindme *u = unwindme;
+
+	if (!(u->flags & UWM_KRETPROBE_HANDLER))
+		return 0;
+
+	u->ret = test_unwind(NULL, (u->flags & UWM_REGS) ? regs : NULL,
+			     (u->flags & UWM_SP) ? u->sp : 0);
+
+	return 0;
+}
+
+static noinline notrace int test_unwind_kretprobed_func(struct unwindme *u)
+{
+	struct pt_regs regs;
+
+	if (!(u->flags & UWM_KRETPROBE))
+		return 0;
+
+	regs = fake_pt_regs();
+	return test_unwind(NULL, (u->flags & UWM_REGS) ? &regs : NULL,
+			   (u->flags & UWM_SP) ? u->sp : 0);
+}
+
+static noinline int test_unwind_kretprobed_func_caller(struct unwindme *u)
+{
+	return test_unwind_kretprobed_func(u);
+}
+
+static int test_unwind_kretprobe(struct unwindme *u)
+{
+	int ret;
+	struct kretprobe my_kretprobe;
+
+	if (!IS_ENABLED(CONFIG_KPROBES))
+		kunit_skip(current_test, "requires CONFIG_KPROBES");
+
+	u->ret = -1; /* make sure kprobe is called */
+	unwindme = u;
+
+	memset(&my_kretprobe, 0, sizeof(my_kretprobe));
+	my_kretprobe.handler = kretprobe_ret_handler;
+	my_kretprobe.maxactive = 1;
+	my_kretprobe.kp.addr = (kprobe_opcode_t *)test_unwind_kretprobed_func;
+
+	ret = register_kretprobe(&my_kretprobe);
+
+	if (ret < 0) {
+		kunit_err(current_test, "register_kretprobe failed %d\n", ret);
+		return -EINVAL;
+	}
+
+	ret = test_unwind_kretprobed_func_caller(u);
+	unregister_kretprobe(&my_kretprobe);
+	unwindme = NULL;
+	if (u->flags & UWM_KRETPROBE_HANDLER)
+		ret = u->ret;
+	return ret;
+}
+
+static int kprobe_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	struct unwindme *u = unwindme;
+
+	u->ret = test_unwind(NULL, (u->flags & UWM_REGS) ? regs : NULL,
+			     (u->flags & UWM_SP) ? u->sp : 0);
+	return 0;
+}
+
+extern const char test_unwind_kprobed_insn[];
+
+static noinline void test_unwind_kprobed_func(void)
+{
+	asm volatile(
+		"	nopr	%%r7\n"
+		"test_unwind_kprobed_insn:\n"
+		"	nopr	%%r7\n"
+		:);
+}
+
+static int test_unwind_kprobe(struct unwindme *u)
+{
+	struct kprobe kp;
+	int ret;
+
+	if (!IS_ENABLED(CONFIG_KPROBES))
+		kunit_skip(current_test, "requires CONFIG_KPROBES");
+	if (!IS_ENABLED(CONFIG_KPROBES_ON_FTRACE) && u->flags & UWM_KPROBE_ON_FTRACE)
+		kunit_skip(current_test, "requires CONFIG_KPROBES_ON_FTRACE");
+
+	u->ret = -1; /* make sure kprobe is called */
+	unwindme = u;
+	memset(&kp, 0, sizeof(kp));
+	kp.pre_handler = kprobe_pre_handler;
+	kp.addr = u->flags & UWM_KPROBE_ON_FTRACE ?
+				(kprobe_opcode_t *)test_unwind_kprobed_func :
+				(kprobe_opcode_t *)test_unwind_kprobed_insn;
+	ret = register_kprobe(&kp);
+	if (ret < 0) {
+		kunit_err(current_test, "register_kprobe failed %d\n", ret);
+		return -EINVAL;
+	}
+
+	test_unwind_kprobed_func();
+	unregister_kprobe(&kp);
+	unwindme = NULL;
+	return u->ret;
+}
+
+static void notrace __used test_unwind_ftrace_handler(unsigned long ip,
+						      unsigned long parent_ip,
+						      struct ftrace_ops *fops,
+						      struct ftrace_regs *fregs)
+{
+	struct unwindme *u = (struct unwindme *)fregs->regs.gprs[2];
+
+	u->ret = test_unwind(NULL, (u->flags & UWM_REGS) ? &fregs->regs : NULL,
+			     (u->flags & UWM_SP) ? u->sp : 0);
+}
+
+static noinline int test_unwind_ftraced_func(struct unwindme *u)
+{
+	return READ_ONCE(u)->ret;
+}
+
+static int test_unwind_ftrace(struct unwindme *u)
+{
+	int ret;
+#ifdef CONFIG_DYNAMIC_FTRACE
+	struct ftrace_ops *fops;
+
+	fops = kunit_kzalloc(current_test, sizeof(*fops), GFP_KERNEL);
+	fops->func = test_unwind_ftrace_handler;
+	fops->flags = FTRACE_OPS_FL_DYNAMIC |
+		     FTRACE_OPS_FL_RECURSION |
+		     FTRACE_OPS_FL_SAVE_REGS |
+		     FTRACE_OPS_FL_PERMANENT;
+#else
+	kunit_skip(current_test, "requires CONFIG_DYNAMIC_FTRACE");
+#endif
+
+	ret = ftrace_set_filter_ip(fops, (unsigned long)test_unwind_ftraced_func, 0, 0);
+	if (ret) {
+		kunit_err(current_test, "failed to set ftrace filter (%d)\n", ret);
+		return -1;
+	}
+
+	ret = register_ftrace_function(fops);
+	if (!ret) {
+		ret = test_unwind_ftraced_func(u);
+		unregister_ftrace_function(fops);
+	} else {
+		kunit_err(current_test, "failed to register ftrace handler (%d)\n", ret);
+	}
+
+	ftrace_set_filter_ip(fops, (unsigned long)test_unwind_ftraced_func, 1, 0);
+	return ret;
+}
+
+/* This function may or may not appear in the backtrace. */
+static noinline int unwindme_func4(struct unwindme *u)
+{
+	if (!(u->flags & UWM_CALLER))
+		u->sp = current_frame_address();
+	if (u->flags & UWM_THREAD) {
+		complete(&u->task_ready);
+		wait_event(u->task_wq, kthread_should_park());
+		kthread_parkme();
+		return 0;
+	} else if (u->flags & (UWM_PGM | UWM_KPROBE_ON_FTRACE)) {
+		return test_unwind_kprobe(u);
+	} else if (u->flags & (UWM_KRETPROBE | UWM_KRETPROBE_HANDLER)) {
+		return test_unwind_kretprobe(u);
+	} else if (u->flags & UWM_FTRACE) {
+		return test_unwind_ftrace(u);
+	} else {
+		struct pt_regs regs = fake_pt_regs();
+
+		return test_unwind(NULL,
+				   (u->flags & UWM_REGS) ? &regs : NULL,
+				   (u->flags & UWM_SP) ? u->sp : 0);
+	}
+}
+
+/* This function may or may not appear in the backtrace. */
+static noinline int unwindme_func3(struct unwindme *u)
+{
+	u->sp = current_frame_address();
+	return unwindme_func4(u);
+}
+
+/* This function must appear in the backtrace. */
+static noinline int unwindme_func2(struct unwindme *u)
+{
+	unsigned long flags;
+	int rc;
+
+	if (u->flags & UWM_SWITCH_STACK) {
+		local_irq_save(flags);
+		local_mcck_disable();
+		rc = call_on_stack(1, S390_lowcore.nodat_stack,
+				   int, unwindme_func3, struct unwindme *, u);
+		local_mcck_enable();
+		local_irq_restore(flags);
+		return rc;
+	} else {
+		return unwindme_func3(u);
+	}
+}
+
+/* This function must follow unwindme_func2 in the backtrace. */
+static noinline int unwindme_func1(void *u)
+{
+	return unwindme_func2((struct unwindme *)u);
+}
+
+static void unwindme_timer_fn(struct timer_list *unused)
+{
+	struct unwindme *u = READ_ONCE(unwindme);
+
+	if (u) {
+		unwindme = NULL;
+		u->task = NULL;
+		u->ret = unwindme_func1(u);
+		complete(&u->task_ready);
+	}
+}
+
+static struct timer_list unwind_timer;
+
+static int test_unwind_irq(struct unwindme *u)
+{
+	unwindme = u;
+	init_completion(&u->task_ready);
+	timer_setup(&unwind_timer, unwindme_timer_fn, 0);
+	mod_timer(&unwind_timer, jiffies + 1);
+	wait_for_completion(&u->task_ready);
+	return u->ret;
+}
+
+/* Spawns a task and passes it to test_unwind(). */
+static int test_unwind_task(struct unwindme *u)
+{
+	struct task_struct *task;
+	int ret;
+
+	/* Initialize thread-related fields. */
+	init_completion(&u->task_ready);
+	init_waitqueue_head(&u->task_wq);
+
+	/*
+	 * Start the task and wait until it reaches unwindme_func4() and sleeps
+	 * in (task_ready, unwind_done] range.
+	 */
+	task = kthread_run(unwindme_func1, u, "%s", __func__);
+	if (IS_ERR(task)) {
+		kunit_err(current_test, "kthread_run() failed\n");
+		return PTR_ERR(task);
+	}
+	/*
+	 * Make sure task reaches unwindme_func4 before parking it,
+	 * we might park it before kthread function has been executed otherwise
+	 */
+	wait_for_completion(&u->task_ready);
+	kthread_park(task);
+	/* Unwind. */
+	ret = test_unwind(task, NULL, (u->flags & UWM_SP) ? u->sp : 0);
+	kthread_stop(task);
+	return ret;
+}
+
+struct test_params {
+	int flags;
+	char *name;
+};
+
+/*
+ * Create required parameter list for tests
+ */
+#define TEST_WITH_FLAGS(f) { .flags = f, .name = #f }
+static const struct test_params param_list[] = {
+	TEST_WITH_FLAGS(UWM_DEFAULT),
+	TEST_WITH_FLAGS(UWM_SP),
+	TEST_WITH_FLAGS(UWM_REGS),
+	TEST_WITH_FLAGS(UWM_SWITCH_STACK),
+	TEST_WITH_FLAGS(UWM_SP | UWM_REGS),
+	TEST_WITH_FLAGS(UWM_CALLER | UWM_SP),
+	TEST_WITH_FLAGS(UWM_CALLER | UWM_SP | UWM_REGS),
+	TEST_WITH_FLAGS(UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK),
+	TEST_WITH_FLAGS(UWM_THREAD),
+	TEST_WITH_FLAGS(UWM_THREAD | UWM_SP),
+	TEST_WITH_FLAGS(UWM_THREAD | UWM_CALLER | UWM_SP),
+	TEST_WITH_FLAGS(UWM_IRQ),
+	TEST_WITH_FLAGS(UWM_IRQ | UWM_SWITCH_STACK),
+	TEST_WITH_FLAGS(UWM_IRQ | UWM_SP),
+	TEST_WITH_FLAGS(UWM_IRQ | UWM_REGS),
+	TEST_WITH_FLAGS(UWM_IRQ | UWM_SP | UWM_REGS),
+	TEST_WITH_FLAGS(UWM_IRQ | UWM_CALLER | UWM_SP),
+	TEST_WITH_FLAGS(UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS),
+	TEST_WITH_FLAGS(UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK),
+	TEST_WITH_FLAGS(UWM_PGM),
+	TEST_WITH_FLAGS(UWM_PGM | UWM_SP),
+	TEST_WITH_FLAGS(UWM_PGM | UWM_REGS),
+	TEST_WITH_FLAGS(UWM_PGM | UWM_SP | UWM_REGS),
+	TEST_WITH_FLAGS(UWM_KPROBE_ON_FTRACE),
+	TEST_WITH_FLAGS(UWM_KPROBE_ON_FTRACE | UWM_SP),
+	TEST_WITH_FLAGS(UWM_KPROBE_ON_FTRACE | UWM_REGS),
+	TEST_WITH_FLAGS(UWM_KPROBE_ON_FTRACE | UWM_SP | UWM_REGS),
+	TEST_WITH_FLAGS(UWM_FTRACE),
+	TEST_WITH_FLAGS(UWM_FTRACE | UWM_SP),
+	TEST_WITH_FLAGS(UWM_FTRACE | UWM_REGS),
+	TEST_WITH_FLAGS(UWM_FTRACE | UWM_SP | UWM_REGS),
+	TEST_WITH_FLAGS(UWM_KRETPROBE),
+	TEST_WITH_FLAGS(UWM_KRETPROBE | UWM_SP),
+	TEST_WITH_FLAGS(UWM_KRETPROBE | UWM_REGS),
+	TEST_WITH_FLAGS(UWM_KRETPROBE | UWM_SP | UWM_REGS),
+	TEST_WITH_FLAGS(UWM_KRETPROBE_HANDLER),
+	TEST_WITH_FLAGS(UWM_KRETPROBE_HANDLER | UWM_SP),
+	TEST_WITH_FLAGS(UWM_KRETPROBE_HANDLER | UWM_REGS),
+	TEST_WITH_FLAGS(UWM_KRETPROBE_HANDLER | UWM_SP | UWM_REGS),
+};
+
+/*
+ * Parameter description generator: required for KUNIT_ARRAY_PARAM()
+ */
+static void get_desc(const struct test_params *params, char *desc)
+{
+	strscpy(desc, params->name, KUNIT_PARAM_DESC_SIZE);
+}
+
+/*
+ * Create test_unwind_gen_params
+ */
+KUNIT_ARRAY_PARAM(test_unwind, param_list, get_desc);
+
+static void test_unwind_flags(struct kunit *test)
+{
+	struct unwindme u;
+	const struct test_params *params;
+
+	current_test = test;
+	params = (const struct test_params *)test->param_value;
+	u.flags = params->flags;
+	if (u.flags & UWM_THREAD)
+		KUNIT_EXPECT_EQ(test, 0, test_unwind_task(&u));
+	else if (u.flags & UWM_IRQ)
+		KUNIT_EXPECT_EQ(test, 0, test_unwind_irq(&u));
+	else
+		KUNIT_EXPECT_EQ(test, 0, unwindme_func1(&u));
+}
+
+static struct kunit_case unwind_test_cases[] = {
+	KUNIT_CASE_PARAM(test_unwind_flags, test_unwind_gen_params),
+	{}
+};
+
+static struct kunit_suite test_unwind_suite = {
+	.name = "test_unwind",
+	.test_cases = unwind_test_cases,
+};
+
+kunit_test_suites(&test_unwind_suite);
+
+MODULE_LICENSE("GPL");
diff --git a/arch/s390/lib/tishift.S b/arch/s390/lib/tishift.S
new file mode 100644
index 0000000000..96214f51f4
--- /dev/null
+++ b/arch/s390/lib/tishift.S
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/export.h>
+#include <linux/linkage.h>
+#include <asm/nospec-insn.h>
+
+	.section .noinstr.text, "ax"
+
+	GEN_BR_THUNK %r14
+
+SYM_FUNC_START(__ashlti3)
+	lmg	%r0,%r1,0(%r3)
+	cije	%r4,0,1f
+	lhi	%r3,64
+	sr	%r3,%r4
+	jnh	0f
+	srlg	%r3,%r1,0(%r3)
+	sllg	%r0,%r0,0(%r4)
+	sllg	%r1,%r1,0(%r4)
+	ogr	%r0,%r3
+	j	1f
+0:	sllg	%r0,%r1,-64(%r4)
+	lghi	%r1,0
+1:	stmg	%r0,%r1,0(%r2)
+	BR_EX	%r14
+SYM_FUNC_END(__ashlti3)
+EXPORT_SYMBOL(__ashlti3)
+
+SYM_FUNC_START(__ashrti3)
+	lmg	%r0,%r1,0(%r3)
+	cije	%r4,0,1f
+	lhi	%r3,64
+	sr	%r3,%r4
+	jnh	0f
+	sllg	%r3,%r0,0(%r3)
+	srlg	%r1,%r1,0(%r4)
+	srag	%r0,%r0,0(%r4)
+	ogr	%r1,%r3
+	j	1f
+0:	srag	%r1,%r0,-64(%r4)
+	srag	%r0,%r0,63
+1:	stmg	%r0,%r1,0(%r2)
+	BR_EX	%r14
+SYM_FUNC_END(__ashrti3)
+EXPORT_SYMBOL(__ashrti3)
+
+SYM_FUNC_START(__lshrti3)
+	lmg	%r0,%r1,0(%r3)
+	cije	%r4,0,1f
+	lhi	%r3,64
+	sr	%r3,%r4
+	jnh	0f
+	sllg	%r3,%r0,0(%r3)
+	srlg	%r1,%r1,0(%r4)
+	srlg	%r0,%r0,0(%r4)
+	ogr	%r1,%r3
+	j	1f
+0:	srlg	%r1,%r0,-64(%r4)
+	lghi	%r0,0
+1:	stmg	%r0,%r1,0(%r2)
+	BR_EX	%r14
+SYM_FUNC_END(__lshrti3)
+EXPORT_SYMBOL(__lshrti3)
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
new file mode 100644
index 0000000000..e4a13d7cab
--- /dev/null
+++ b/arch/s390/lib/uaccess.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Standard user space access functions based on mvcp/mvcs and doing
+ *  interesting things in the secondary space mode.
+ *
+ *    Copyright IBM Corp. 2006,2014
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ *		 Gerald Schaefer (gerald.schaefer@de.ibm.com)
+ */
+
+#include <linux/uaccess.h>
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <asm/asm-extable.h>
+
+#ifdef CONFIG_DEBUG_ENTRY
+void debug_user_asce(int exit)
+{
+	unsigned long cr1, cr7;
+
+	__ctl_store(cr1, 1, 1);
+	__ctl_store(cr7, 7, 7);
+	if (cr1 == S390_lowcore.kernel_asce && cr7 == S390_lowcore.user_asce)
+		return;
+	panic("incorrect ASCE on kernel %s\n"
+	      "cr1:    %016lx cr7:  %016lx\n"
+	      "kernel: %016llx user: %016llx\n",
+	      exit ? "exit" : "entry", cr1, cr7,
+	      S390_lowcore.kernel_asce, S390_lowcore.user_asce);
+}
+#endif /*CONFIG_DEBUG_ENTRY */
+
+static unsigned long raw_copy_from_user_key(void *to, const void __user *from,
+					    unsigned long size, unsigned long key)
+{
+	unsigned long rem;
+	union oac spec = {
+		.oac2.key = key,
+		.oac2.as = PSW_BITS_AS_SECONDARY,
+		.oac2.k = 1,
+		.oac2.a = 1,
+	};
+
+	asm volatile(
+		"	lr	0,%[spec]\n"
+		"0:	mvcos	0(%[to]),0(%[from]),%[size]\n"
+		"1:	jz	5f\n"
+		"	algr	%[size],%[val]\n"
+		"	slgr	%[from],%[val]\n"
+		"	slgr	%[to],%[val]\n"
+		"	j	0b\n"
+		"2:	la	%[rem],4095(%[from])\n"	/* rem = from + 4095 */
+		"	nr	%[rem],%[val]\n"	/* rem = (from + 4095) & -4096 */
+		"	slgr	%[rem],%[from]\n"
+		"	clgr	%[size],%[rem]\n"	/* copy crosses next page boundary? */
+		"	jnh	6f\n"
+		"3:	mvcos	0(%[to]),0(%[from]),%[rem]\n"
+		"4:	slgr	%[size],%[rem]\n"
+		"	j	6f\n"
+		"5:	slgr	%[size],%[size]\n"
+		"6:\n"
+		EX_TABLE(0b, 2b)
+		EX_TABLE(1b, 2b)
+		EX_TABLE(3b, 6b)
+		EX_TABLE(4b, 6b)
+		: [size] "+&a" (size), [from] "+&a" (from), [to] "+&a" (to), [rem] "=&a" (rem)
+		: [val] "a" (-4096UL), [spec] "d" (spec.val)
+		: "cc", "memory", "0");
+	return size;
+}
+
+unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+	return raw_copy_from_user_key(to, from, n, 0);
+}
+EXPORT_SYMBOL(raw_copy_from_user);
+
+unsigned long _copy_from_user_key(void *to, const void __user *from,
+				  unsigned long n, unsigned long key)
+{
+	unsigned long res = n;
+
+	might_fault();
+	if (!should_fail_usercopy()) {
+		instrument_copy_from_user_before(to, from, n);
+		res = raw_copy_from_user_key(to, from, n, key);
+		instrument_copy_from_user_after(to, from, n, res);
+	}
+	if (unlikely(res))
+		memset(to + (n - res), 0, res);
+	return res;
+}
+EXPORT_SYMBOL(_copy_from_user_key);
+
+static unsigned long raw_copy_to_user_key(void __user *to, const void *from,
+					  unsigned long size, unsigned long key)
+{
+	unsigned long rem;
+	union oac spec = {
+		.oac1.key = key,
+		.oac1.as = PSW_BITS_AS_SECONDARY,
+		.oac1.k = 1,
+		.oac1.a = 1,
+	};
+
+	asm volatile(
+		"	lr	0,%[spec]\n"
+		"0:	mvcos	0(%[to]),0(%[from]),%[size]\n"
+		"1:	jz	5f\n"
+		"	algr	%[size],%[val]\n"
+		"	slgr	%[to],%[val]\n"
+		"	slgr	%[from],%[val]\n"
+		"	j	0b\n"
+		"2:	la	%[rem],4095(%[to])\n"	/* rem = to + 4095 */
+		"	nr	%[rem],%[val]\n"	/* rem = (to + 4095) & -4096 */
+		"	slgr	%[rem],%[to]\n"
+		"	clgr	%[size],%[rem]\n"	/* copy crosses next page boundary? */
+		"	jnh	6f\n"
+		"3:	mvcos	0(%[to]),0(%[from]),%[rem]\n"
+		"4:	slgr	%[size],%[rem]\n"
+		"	j	6f\n"
+		"5:	slgr	%[size],%[size]\n"
+		"6:\n"
+		EX_TABLE(0b, 2b)
+		EX_TABLE(1b, 2b)
+		EX_TABLE(3b, 6b)
+		EX_TABLE(4b, 6b)
+		: [size] "+&a" (size), [to] "+&a" (to), [from] "+&a" (from), [rem] "=&a" (rem)
+		: [val] "a" (-4096UL), [spec] "d" (spec.val)
+		: "cc", "memory", "0");
+	return size;
+}
+
+unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+	return raw_copy_to_user_key(to, from, n, 0);
+}
+EXPORT_SYMBOL(raw_copy_to_user);
+
+unsigned long _copy_to_user_key(void __user *to, const void *from,
+				unsigned long n, unsigned long key)
+{
+	might_fault();
+	if (should_fail_usercopy())
+		return n;
+	instrument_copy_to_user(to, from, n);
+	return raw_copy_to_user_key(to, from, n, key);
+}
+EXPORT_SYMBOL(_copy_to_user_key);
+
+unsigned long __clear_user(void __user *to, unsigned long size)
+{
+	unsigned long rem;
+	union oac spec = {
+		.oac1.as = PSW_BITS_AS_SECONDARY,
+		.oac1.a = 1,
+	};
+
+	asm volatile(
+		"	lr	0,%[spec]\n"
+		"0:	mvcos	0(%[to]),0(%[zeropg]),%[size]\n"
+		"1:	jz	5f\n"
+		"	algr	%[size],%[val]\n"
+		"	slgr	%[to],%[val]\n"
+		"	j	0b\n"
+		"2:	la	%[rem],4095(%[to])\n"	/* rem = to + 4095 */
+		"	nr	%[rem],%[val]\n"	/* rem = (to + 4095) & -4096 */
+		"	slgr	%[rem],%[to]\n"
+		"	clgr	%[size],%[rem]\n"	/* copy crosses next page boundary? */
+		"	jnh	6f\n"
+		"3:	mvcos	0(%[to]),0(%[zeropg]),%[rem]\n"
+		"4:	slgr	%[size],%[rem]\n"
+		"	j	6f\n"
+		"5:	slgr	%[size],%[size]\n"
+		"6:\n"
+		EX_TABLE(0b, 2b)
+		EX_TABLE(1b, 2b)
+		EX_TABLE(3b, 6b)
+		EX_TABLE(4b, 6b)
+		: [size] "+&a" (size), [to] "+&a" (to), [rem] "=&a" (rem)
+		: [val] "a" (-4096UL), [zeropg] "a" (empty_zero_page), [spec] "d" (spec.val)
+		: "cc", "memory", "0");
+	return size;
+}
+EXPORT_SYMBOL(__clear_user);
diff --git a/arch/s390/lib/xor.c b/arch/s390/lib/xor.c
new file mode 100644
index 0000000000..fb924a8041
--- /dev/null
+++ b/arch/s390/lib/xor.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Optimized xor_block operation for RAID4/5
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/export.h>
+#include <linux/raid/xor.h>
+#include <asm/xor.h>
+
+static void xor_xc_2(unsigned long bytes, unsigned long * __restrict p1,
+		     const unsigned long * __restrict p2)
+{
+	asm volatile(
+		"	larl	1,2f\n"
+		"	aghi	%0,-1\n"
+		"	jm	3f\n"
+		"	srlg	0,%0,8\n"
+		"	ltgr	0,0\n"
+		"	jz	1f\n"
+		"0:	xc	0(256,%1),0(%2)\n"
+		"	la	%1,256(%1)\n"
+		"	la	%2,256(%2)\n"
+		"	brctg	0,0b\n"
+		"1:	ex	%0,0(1)\n"
+		"	j	3f\n"
+		"2:	xc	0(1,%1),0(%2)\n"
+		"3:\n"
+		: : "d" (bytes), "a" (p1), "a" (p2)
+		: "0", "1", "cc", "memory");
+}
+
+static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1,
+		     const unsigned long * __restrict p2,
+		     const unsigned long * __restrict p3)
+{
+	asm volatile(
+		"	larl	1,2f\n"
+		"	aghi	%0,-1\n"
+		"	jm	3f\n"
+		"	srlg	0,%0,8\n"
+		"	ltgr	0,0\n"
+		"	jz	1f\n"
+		"0:	xc	0(256,%1),0(%2)\n"
+		"	xc	0(256,%1),0(%3)\n"
+		"	la	%1,256(%1)\n"
+		"	la	%2,256(%2)\n"
+		"	la	%3,256(%3)\n"
+		"	brctg	0,0b\n"
+		"1:	ex	%0,0(1)\n"
+		"	ex	%0,6(1)\n"
+		"	j	3f\n"
+		"2:	xc	0(1,%1),0(%2)\n"
+		"	xc	0(1,%1),0(%3)\n"
+		"3:\n"
+		: "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3)
+		: : "0", "1", "cc", "memory");
+}
+
+static void xor_xc_4(unsigned long bytes, unsigned long * __restrict p1,
+		     const unsigned long * __restrict p2,
+		     const unsigned long * __restrict p3,
+		     const unsigned long * __restrict p4)
+{
+	asm volatile(
+		"	larl	1,2f\n"
+		"	aghi	%0,-1\n"
+		"	jm	3f\n"
+		"	srlg	0,%0,8\n"
+		"	ltgr	0,0\n"
+		"	jz	1f\n"
+		"0:	xc	0(256,%1),0(%2)\n"
+		"	xc	0(256,%1),0(%3)\n"
+		"	xc	0(256,%1),0(%4)\n"
+		"	la	%1,256(%1)\n"
+		"	la	%2,256(%2)\n"
+		"	la	%3,256(%3)\n"
+		"	la	%4,256(%4)\n"
+		"	brctg	0,0b\n"
+		"1:	ex	%0,0(1)\n"
+		"	ex	%0,6(1)\n"
+		"	ex	%0,12(1)\n"
+		"	j	3f\n"
+		"2:	xc	0(1,%1),0(%2)\n"
+		"	xc	0(1,%1),0(%3)\n"
+		"	xc	0(1,%1),0(%4)\n"
+		"3:\n"
+		: "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4)
+		: : "0", "1", "cc", "memory");
+}
+
+static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1,
+		     const unsigned long * __restrict p2,
+		     const unsigned long * __restrict p3,
+		     const unsigned long * __restrict p4,
+		     const unsigned long * __restrict p5)
+{
+	asm volatile(
+		"	larl	1,2f\n"
+		"	aghi	%0,-1\n"
+		"	jm	3f\n"
+		"	srlg	0,%0,8\n"
+		"	ltgr	0,0\n"
+		"	jz	1f\n"
+		"0:	xc	0(256,%1),0(%2)\n"
+		"	xc	0(256,%1),0(%3)\n"
+		"	xc	0(256,%1),0(%4)\n"
+		"	xc	0(256,%1),0(%5)\n"
+		"	la	%1,256(%1)\n"
+		"	la	%2,256(%2)\n"
+		"	la	%3,256(%3)\n"
+		"	la	%4,256(%4)\n"
+		"	la	%5,256(%5)\n"
+		"	brctg	0,0b\n"
+		"1:	ex	%0,0(1)\n"
+		"	ex	%0,6(1)\n"
+		"	ex	%0,12(1)\n"
+		"	ex	%0,18(1)\n"
+		"	j	3f\n"
+		"2:	xc	0(1,%1),0(%2)\n"
+		"	xc	0(1,%1),0(%3)\n"
+		"	xc	0(1,%1),0(%4)\n"
+		"	xc	0(1,%1),0(%5)\n"
+		"3:\n"
+		: "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4),
+		  "+a" (p5)
+		: : "0", "1", "cc", "memory");
+}
+
+struct xor_block_template xor_block_xc = {
+	.name = "xc",
+	.do_2 = xor_xc_2,
+	.do_3 = xor_xc_3,
+	.do_4 = xor_xc_4,
+	.do_5 = xor_xc_5,
+};
+EXPORT_SYMBOL(xor_block_xc);
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
new file mode 100644
index 0000000000..352ff520fd
--- /dev/null
+++ b/arch/s390/mm/Makefile
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the linux s390-specific parts of the memory manager.
+#
+
+obj-y		:= init.o fault.o extmem.o mmap.o vmem.o maccess.o
+obj-y		+= page-states.o pageattr.o pgtable.o pgalloc.o extable.o
+
+obj-$(CONFIG_CMM)		+= cmm.o
+obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
+obj-$(CONFIG_PTDUMP_CORE)	+= dump_pagetables.o
+obj-$(CONFIG_PGSTE)		+= gmap.o
+obj-$(CONFIG_PFAULT)		+= pfault.o
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
new file mode 100644
index 0000000000..f475153132
--- /dev/null
+++ b/arch/s390/mm/cmm.c
@@ -0,0 +1,431 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Collaborative memory management interface.
+ *
+ *    Copyright IBM Corp 2003, 2010
+ *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ *
+ */
+
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/gfp.h>
+#include <linux/sched.h>
+#include <linux/string_helpers.h>
+#include <linux/sysctl.h>
+#include <linux/swap.h>
+#include <linux/kthread.h>
+#include <linux/oom.h>
+#include <linux/uaccess.h>
+
+#include <asm/diag.h>
+
+#ifdef CONFIG_CMM_IUCV
+static char *cmm_default_sender = "VMRMSVM";
+#endif
+static char *sender;
+module_param(sender, charp, 0400);
+MODULE_PARM_DESC(sender,
+		 "Guest name that may send SMSG messages (default VMRMSVM)");
+
+#include "../../../drivers/s390/net/smsgiucv.h"
+
+#define CMM_NR_PAGES ((PAGE_SIZE / sizeof(unsigned long)) - 2)
+
+struct cmm_page_array {
+	struct cmm_page_array *next;
+	unsigned long index;
+	unsigned long pages[CMM_NR_PAGES];
+};
+
+static long cmm_pages;
+static long cmm_timed_pages;
+static volatile long cmm_pages_target;
+static volatile long cmm_timed_pages_target;
+static long cmm_timeout_pages;
+static long cmm_timeout_seconds;
+
+static struct cmm_page_array *cmm_page_list;
+static struct cmm_page_array *cmm_timed_page_list;
+static DEFINE_SPINLOCK(cmm_lock);
+
+static struct task_struct *cmm_thread_ptr;
+static DECLARE_WAIT_QUEUE_HEAD(cmm_thread_wait);
+
+static void cmm_timer_fn(struct timer_list *);
+static void cmm_set_timer(void);
+static DEFINE_TIMER(cmm_timer, cmm_timer_fn);
+
+static long cmm_alloc_pages(long nr, long *counter,
+			    struct cmm_page_array **list)
+{
+	struct cmm_page_array *pa, *npa;
+	unsigned long addr;
+
+	while (nr) {
+		addr = __get_free_page(GFP_NOIO);
+		if (!addr)
+			break;
+		spin_lock(&cmm_lock);
+		pa = *list;
+		if (!pa || pa->index >= CMM_NR_PAGES) {
+			/* Need a new page for the page list. */
+			spin_unlock(&cmm_lock);
+			npa = (struct cmm_page_array *)
+				__get_free_page(GFP_NOIO);
+			if (!npa) {
+				free_page(addr);
+				break;
+			}
+			spin_lock(&cmm_lock);
+			pa = *list;
+			if (!pa || pa->index >= CMM_NR_PAGES) {
+				npa->next = pa;
+				npa->index = 0;
+				pa = npa;
+				*list = pa;
+			} else
+				free_page((unsigned long) npa);
+		}
+		diag10_range(virt_to_pfn((void *)addr), 1);
+		pa->pages[pa->index++] = addr;
+		(*counter)++;
+		spin_unlock(&cmm_lock);
+		nr--;
+	}
+	return nr;
+}
+
+static long cmm_free_pages(long nr, long *counter, struct cmm_page_array **list)
+{
+	struct cmm_page_array *pa;
+	unsigned long addr;
+
+	spin_lock(&cmm_lock);
+	pa = *list;
+	while (nr) {
+		if (!pa || pa->index <= 0)
+			break;
+		addr = pa->pages[--pa->index];
+		if (pa->index == 0) {
+			pa = pa->next;
+			free_page((unsigned long) *list);
+			*list = pa;
+		}
+		free_page(addr);
+		(*counter)--;
+		nr--;
+	}
+	spin_unlock(&cmm_lock);
+	return nr;
+}
+
+static int cmm_oom_notify(struct notifier_block *self,
+			  unsigned long dummy, void *parm)
+{
+	unsigned long *freed = parm;
+	long nr = 256;
+
+	nr = cmm_free_pages(nr, &cmm_timed_pages, &cmm_timed_page_list);
+	if (nr > 0)
+		nr = cmm_free_pages(nr, &cmm_pages, &cmm_page_list);
+	cmm_pages_target = cmm_pages;
+	cmm_timed_pages_target = cmm_timed_pages;
+	*freed += 256 - nr;
+	return NOTIFY_OK;
+}
+
+static struct notifier_block cmm_oom_nb = {
+	.notifier_call = cmm_oom_notify,
+};
+
+static int cmm_thread(void *dummy)
+{
+	int rc;
+
+	while (1) {
+		rc = wait_event_interruptible(cmm_thread_wait,
+			cmm_pages != cmm_pages_target ||
+			cmm_timed_pages != cmm_timed_pages_target ||
+			kthread_should_stop());
+		if (kthread_should_stop() || rc == -ERESTARTSYS) {
+			cmm_pages_target = cmm_pages;
+			cmm_timed_pages_target = cmm_timed_pages;
+			break;
+		}
+		if (cmm_pages_target > cmm_pages) {
+			if (cmm_alloc_pages(1, &cmm_pages, &cmm_page_list))
+				cmm_pages_target = cmm_pages;
+		} else if (cmm_pages_target < cmm_pages) {
+			cmm_free_pages(1, &cmm_pages, &cmm_page_list);
+		}
+		if (cmm_timed_pages_target > cmm_timed_pages) {
+			if (cmm_alloc_pages(1, &cmm_timed_pages,
+					   &cmm_timed_page_list))
+				cmm_timed_pages_target = cmm_timed_pages;
+		} else if (cmm_timed_pages_target < cmm_timed_pages) {
+			cmm_free_pages(1, &cmm_timed_pages,
+				       &cmm_timed_page_list);
+		}
+		if (cmm_timed_pages > 0 && !timer_pending(&cmm_timer))
+			cmm_set_timer();
+	}
+	return 0;
+}
+
+static void cmm_kick_thread(void)
+{
+	wake_up(&cmm_thread_wait);
+}
+
+static void cmm_set_timer(void)
+{
+	if (cmm_timed_pages_target <= 0 || cmm_timeout_seconds <= 0) {
+		if (timer_pending(&cmm_timer))
+			del_timer(&cmm_timer);
+		return;
+	}
+	mod_timer(&cmm_timer, jiffies + msecs_to_jiffies(cmm_timeout_seconds * MSEC_PER_SEC));
+}
+
+static void cmm_timer_fn(struct timer_list *unused)
+{
+	long nr;
+
+	nr = cmm_timed_pages_target - cmm_timeout_pages;
+	if (nr < 0)
+		cmm_timed_pages_target = 0;
+	else
+		cmm_timed_pages_target = nr;
+	cmm_kick_thread();
+	cmm_set_timer();
+}
+
+static void cmm_set_pages(long nr)
+{
+	cmm_pages_target = nr;
+	cmm_kick_thread();
+}
+
+static long cmm_get_pages(void)
+{
+	return cmm_pages;
+}
+
+static void cmm_add_timed_pages(long nr)
+{
+	cmm_timed_pages_target += nr;
+	cmm_kick_thread();
+}
+
+static long cmm_get_timed_pages(void)
+{
+	return cmm_timed_pages;
+}
+
+static void cmm_set_timeout(long nr, long seconds)
+{
+	cmm_timeout_pages = nr;
+	cmm_timeout_seconds = seconds;
+	cmm_set_timer();
+}
+
+static int cmm_skip_blanks(char *cp, char **endp)
+{
+	char *str;
+
+	for (str = cp; *str == ' ' || *str == '\t'; str++)
+		;
+	*endp = str;
+	return str != cp;
+}
+
+static int cmm_pages_handler(struct ctl_table *ctl, int write,
+			     void *buffer, size_t *lenp, loff_t *ppos)
+{
+	long nr = cmm_get_pages();
+	struct ctl_table ctl_entry = {
+		.procname	= ctl->procname,
+		.data		= &nr,
+		.maxlen		= sizeof(long),
+	};
+	int rc;
+
+	rc = proc_doulongvec_minmax(&ctl_entry, write, buffer, lenp, ppos);
+	if (rc < 0 || !write)
+		return rc;
+
+	cmm_set_pages(nr);
+	return 0;
+}
+
+static int cmm_timed_pages_handler(struct ctl_table *ctl, int write,
+				   void *buffer, size_t *lenp,
+				   loff_t *ppos)
+{
+	long nr = cmm_get_timed_pages();
+	struct ctl_table ctl_entry = {
+		.procname	= ctl->procname,
+		.data		= &nr,
+		.maxlen		= sizeof(long),
+	};
+	int rc;
+
+	rc = proc_doulongvec_minmax(&ctl_entry, write, buffer, lenp, ppos);
+	if (rc < 0 || !write)
+		return rc;
+
+	cmm_add_timed_pages(nr);
+	return 0;
+}
+
+static int cmm_timeout_handler(struct ctl_table *ctl, int write,
+			       void *buffer, size_t *lenp, loff_t *ppos)
+{
+	char buf[64], *p;
+	long nr, seconds;
+	unsigned int len;
+
+	if (!*lenp || (*ppos && !write)) {
+		*lenp = 0;
+		return 0;
+	}
+
+	if (write) {
+		len = min(*lenp, sizeof(buf));
+		memcpy(buf, buffer, len);
+		buf[len - 1] = '\0';
+		cmm_skip_blanks(buf, &p);
+		nr = simple_strtoul(p, &p, 0);
+		cmm_skip_blanks(p, &p);
+		seconds = simple_strtoul(p, &p, 0);
+		cmm_set_timeout(nr, seconds);
+		*ppos += *lenp;
+	} else {
+		len = sprintf(buf, "%ld %ld\n",
+			      cmm_timeout_pages, cmm_timeout_seconds);
+		if (len > *lenp)
+			len = *lenp;
+		memcpy(buffer, buf, len);
+		*lenp = len;
+		*ppos += len;
+	}
+	return 0;
+}
+
+static struct ctl_table cmm_table[] = {
+	{
+		.procname	= "cmm_pages",
+		.mode		= 0644,
+		.proc_handler	= cmm_pages_handler,
+	},
+	{
+		.procname	= "cmm_timed_pages",
+		.mode		= 0644,
+		.proc_handler	= cmm_timed_pages_handler,
+	},
+	{
+		.procname	= "cmm_timeout",
+		.mode		= 0644,
+		.proc_handler	= cmm_timeout_handler,
+	},
+	{ }
+};
+
+#ifdef CONFIG_CMM_IUCV
+#define SMSG_PREFIX "CMM"
+static void cmm_smsg_target(const char *from, char *msg)
+{
+	long nr, seconds;
+
+	if (strlen(sender) > 0 && strcmp(from, sender) != 0)
+		return;
+	if (!cmm_skip_blanks(msg + strlen(SMSG_PREFIX), &msg))
+		return;
+	if (strncmp(msg, "SHRINK", 6) == 0) {
+		if (!cmm_skip_blanks(msg + 6, &msg))
+			return;
+		nr = simple_strtoul(msg, &msg, 0);
+		cmm_skip_blanks(msg, &msg);
+		if (*msg == '\0')
+			cmm_set_pages(nr);
+	} else if (strncmp(msg, "RELEASE", 7) == 0) {
+		if (!cmm_skip_blanks(msg + 7, &msg))
+			return;
+		nr = simple_strtoul(msg, &msg, 0);
+		cmm_skip_blanks(msg, &msg);
+		if (*msg == '\0')
+			cmm_add_timed_pages(nr);
+	} else if (strncmp(msg, "REUSE", 5) == 0) {
+		if (!cmm_skip_blanks(msg + 5, &msg))
+			return;
+		nr = simple_strtoul(msg, &msg, 0);
+		if (!cmm_skip_blanks(msg, &msg))
+			return;
+		seconds = simple_strtoul(msg, &msg, 0);
+		cmm_skip_blanks(msg, &msg);
+		if (*msg == '\0')
+			cmm_set_timeout(nr, seconds);
+	}
+}
+#endif
+
+static struct ctl_table_header *cmm_sysctl_header;
+
+static int __init cmm_init(void)
+{
+	int rc = -ENOMEM;
+
+	cmm_sysctl_header = register_sysctl("vm", cmm_table);
+	if (!cmm_sysctl_header)
+		goto out_sysctl;
+#ifdef CONFIG_CMM_IUCV
+	/* convert sender to uppercase characters */
+	if (sender)
+		string_upper(sender, sender);
+	else
+		sender = cmm_default_sender;
+
+	rc = smsg_register_callback(SMSG_PREFIX, cmm_smsg_target);
+	if (rc < 0)
+		goto out_smsg;
+#endif
+	rc = register_oom_notifier(&cmm_oom_nb);
+	if (rc < 0)
+		goto out_oom_notify;
+	cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
+	if (!IS_ERR(cmm_thread_ptr))
+		return 0;
+
+	rc = PTR_ERR(cmm_thread_ptr);
+	unregister_oom_notifier(&cmm_oom_nb);
+out_oom_notify:
+#ifdef CONFIG_CMM_IUCV
+	smsg_unregister_callback(SMSG_PREFIX, cmm_smsg_target);
+out_smsg:
+#endif
+	unregister_sysctl_table(cmm_sysctl_header);
+out_sysctl:
+	del_timer_sync(&cmm_timer);
+	return rc;
+}
+module_init(cmm_init);
+
+static void __exit cmm_exit(void)
+{
+	unregister_sysctl_table(cmm_sysctl_header);
+#ifdef CONFIG_CMM_IUCV
+	smsg_unregister_callback(SMSG_PREFIX, cmm_smsg_target);
+#endif
+	unregister_oom_notifier(&cmm_oom_nb);
+	kthread_stop(cmm_thread_ptr);
+	del_timer_sync(&cmm_timer);
+	cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list);
+	cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list);
+}
+module_exit(cmm_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
new file mode 100644
index 0000000000..b51666967a
--- /dev/null
+++ b/arch/s390/mm/dump_pagetables.c
@@ -0,0 +1,315 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/set_memory.h>
+#include <linux/ptdump.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <linux/mm.h>
+#include <linux/kfence.h>
+#include <linux/kasan.h>
+#include <asm/ptdump.h>
+#include <asm/kasan.h>
+#include <asm/abs_lowcore.h>
+#include <asm/nospec-branch.h>
+#include <asm/sections.h>
+#include <asm/maccess.h>
+
+static unsigned long max_addr;
+
+struct addr_marker {
+	unsigned long start_address;
+	const char *name;
+};
+
+enum address_markers_idx {
+	IDENTITY_BEFORE_NR = 0,
+	IDENTITY_BEFORE_END_NR,
+	AMODE31_START_NR,
+	AMODE31_END_NR,
+	KERNEL_START_NR,
+	KERNEL_END_NR,
+#ifdef CONFIG_KFENCE
+	KFENCE_START_NR,
+	KFENCE_END_NR,
+#endif
+	IDENTITY_AFTER_NR,
+	IDENTITY_AFTER_END_NR,
+	VMEMMAP_NR,
+	VMEMMAP_END_NR,
+	VMALLOC_NR,
+	VMALLOC_END_NR,
+	MODULES_NR,
+	MODULES_END_NR,
+	ABS_LOWCORE_NR,
+	ABS_LOWCORE_END_NR,
+	MEMCPY_REAL_NR,
+	MEMCPY_REAL_END_NR,
+#ifdef CONFIG_KASAN
+	KASAN_SHADOW_START_NR,
+	KASAN_SHADOW_END_NR,
+#endif
+};
+
+static struct addr_marker address_markers[] = {
+	[IDENTITY_BEFORE_NR]	= {0, "Identity Mapping Start"},
+	[IDENTITY_BEFORE_END_NR] = {(unsigned long)_stext, "Identity Mapping End"},
+	[AMODE31_START_NR]	= {0, "Amode31 Area Start"},
+	[AMODE31_END_NR]	= {0, "Amode31 Area End"},
+	[KERNEL_START_NR]	= {(unsigned long)_stext, "Kernel Image Start"},
+	[KERNEL_END_NR]		= {(unsigned long)_end, "Kernel Image End"},
+#ifdef CONFIG_KFENCE
+	[KFENCE_START_NR]	= {0, "KFence Pool Start"},
+	[KFENCE_END_NR]		= {0, "KFence Pool End"},
+#endif
+	[IDENTITY_AFTER_NR]	= {(unsigned long)_end, "Identity Mapping Start"},
+	[IDENTITY_AFTER_END_NR]	= {0, "Identity Mapping End"},
+	[VMEMMAP_NR]		= {0, "vmemmap Area Start"},
+	[VMEMMAP_END_NR]	= {0, "vmemmap Area End"},
+	[VMALLOC_NR]		= {0, "vmalloc Area Start"},
+	[VMALLOC_END_NR]	= {0, "vmalloc Area End"},
+	[MODULES_NR]		= {0, "Modules Area Start"},
+	[MODULES_END_NR]	= {0, "Modules Area End"},
+	[ABS_LOWCORE_NR]	= {0, "Lowcore Area Start"},
+	[ABS_LOWCORE_END_NR]	= {0, "Lowcore Area End"},
+	[MEMCPY_REAL_NR]	= {0, "Real Memory Copy Area Start"},
+	[MEMCPY_REAL_END_NR]	= {0, "Real Memory Copy Area End"},
+#ifdef CONFIG_KASAN
+	[KASAN_SHADOW_START_NR]	= {KASAN_SHADOW_START, "Kasan Shadow Start"},
+	[KASAN_SHADOW_END_NR]	= {KASAN_SHADOW_END, "Kasan Shadow End"},
+#endif
+	{ -1, NULL }
+};
+
+struct pg_state {
+	struct ptdump_state ptdump;
+	struct seq_file *seq;
+	int level;
+	unsigned int current_prot;
+	bool check_wx;
+	unsigned long wx_pages;
+	unsigned long start_address;
+	const struct addr_marker *marker;
+};
+
+#define pt_dump_seq_printf(m, fmt, args...)	\
+({						\
+	struct seq_file *__m = (m);		\
+						\
+	if (__m)				\
+		seq_printf(__m, fmt, ##args);	\
+})
+
+#define pt_dump_seq_puts(m, fmt)		\
+({						\
+	struct seq_file *__m = (m);		\
+						\
+	if (__m)				\
+		seq_printf(__m, fmt);		\
+})
+
+static void print_prot(struct seq_file *m, unsigned int pr, int level)
+{
+	static const char * const level_name[] =
+		{ "ASCE", "PGD", "PUD", "PMD", "PTE" };
+
+	pt_dump_seq_printf(m, "%s ", level_name[level]);
+	if (pr & _PAGE_INVALID) {
+		pt_dump_seq_printf(m, "I\n");
+		return;
+	}
+	pt_dump_seq_puts(m, (pr & _PAGE_PROTECT) ? "RO " : "RW ");
+	pt_dump_seq_puts(m, (pr & _PAGE_NOEXEC) ? "NX\n" : "X\n");
+}
+
+static void note_prot_wx(struct pg_state *st, unsigned long addr)
+{
+#ifdef CONFIG_DEBUG_WX
+	if (!st->check_wx)
+		return;
+	if (st->current_prot & _PAGE_INVALID)
+		return;
+	if (st->current_prot & _PAGE_PROTECT)
+		return;
+	if (st->current_prot & _PAGE_NOEXEC)
+		return;
+	/*
+	 * The first lowcore page is W+X if spectre mitigations are using
+	 * trampolines or the BEAR enhancements facility is not installed,
+	 * in which case we have two lpswe instructions in lowcore that need
+	 * to be executable.
+	 */
+	if (addr == PAGE_SIZE && (nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear)))
+		return;
+	WARN_ONCE(1, "s390/mm: Found insecure W+X mapping at address %pS\n",
+		  (void *)st->start_address);
+	st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
+#endif /* CONFIG_DEBUG_WX */
+}
+
+static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, u64 val)
+{
+	int width = sizeof(unsigned long) * 2;
+	static const char units[] = "KMGTPE";
+	const char *unit = units;
+	unsigned long delta;
+	struct pg_state *st;
+	struct seq_file *m;
+	unsigned int prot;
+
+	st = container_of(pt_st, struct pg_state, ptdump);
+	m = st->seq;
+	prot = val & (_PAGE_PROTECT | _PAGE_NOEXEC);
+	if (level == 4 && (val & _PAGE_INVALID))
+		prot = _PAGE_INVALID;
+	/* For pmd_none() & friends val gets passed as zero. */
+	if (level != 4 && !val)
+		prot = _PAGE_INVALID;
+	/* Final flush from generic code. */
+	if (level == -1)
+		addr = max_addr;
+	if (st->level == -1) {
+		pt_dump_seq_printf(m, "---[ %s ]---\n", st->marker->name);
+		st->start_address = addr;
+		st->current_prot = prot;
+		st->level = level;
+	} else if (prot != st->current_prot || level != st->level ||
+		   addr >= st->marker[1].start_address) {
+		note_prot_wx(st, addr);
+		pt_dump_seq_printf(m, "0x%0*lx-0x%0*lx ",
+				   width, st->start_address,
+				   width, addr);
+		delta = (addr - st->start_address) >> 10;
+		while (!(delta & 0x3ff) && unit[1]) {
+			delta >>= 10;
+			unit++;
+		}
+		pt_dump_seq_printf(m, "%9lu%c ", delta, *unit);
+		print_prot(m, st->current_prot, st->level);
+		while (addr >= st->marker[1].start_address) {
+			st->marker++;
+			pt_dump_seq_printf(m, "---[ %s ]---\n", st->marker->name);
+		}
+		st->start_address = addr;
+		st->current_prot = prot;
+		st->level = level;
+	}
+}
+
+#ifdef CONFIG_DEBUG_WX
+void ptdump_check_wx(void)
+{
+	struct pg_state st = {
+		.ptdump = {
+			.note_page = note_page,
+			.range = (struct ptdump_range[]) {
+				{.start = 0, .end = max_addr},
+				{.start = 0, .end = 0},
+			}
+		},
+		.seq = NULL,
+		.level = -1,
+		.current_prot = 0,
+		.check_wx = true,
+		.wx_pages = 0,
+		.start_address = 0,
+		.marker = (struct addr_marker[]) {
+			{ .start_address =  0, .name = NULL},
+			{ .start_address = -1, .name = NULL},
+		},
+	};
+
+	if (!MACHINE_HAS_NX)
+		return;
+	ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
+	if (st.wx_pages)
+		pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found\n", st.wx_pages);
+	else
+		pr_info("Checked W+X mappings: passed, no %sW+X pages found\n",
+			(nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear)) ?
+			"unexpected " : "");
+}
+#endif /* CONFIG_DEBUG_WX */
+
+#ifdef CONFIG_PTDUMP_DEBUGFS
+static int ptdump_show(struct seq_file *m, void *v)
+{
+	struct pg_state st = {
+		.ptdump = {
+			.note_page = note_page,
+			.range = (struct ptdump_range[]) {
+				{.start = 0, .end = max_addr},
+				{.start = 0, .end = 0},
+			}
+		},
+		.seq = m,
+		.level = -1,
+		.current_prot = 0,
+		.check_wx = false,
+		.wx_pages = 0,
+		.start_address = 0,
+		.marker = address_markers,
+	};
+
+	get_online_mems();
+	mutex_lock(&cpa_mutex);
+	ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
+	mutex_unlock(&cpa_mutex);
+	put_online_mems();
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(ptdump);
+#endif /* CONFIG_PTDUMP_DEBUGFS */
+
+/*
+ * Heapsort from lib/sort.c is not a stable sorting algorithm, do a simple
+ * insertion sort to preserve the original order of markers with the same
+ * start address.
+ */
+static void sort_address_markers(void)
+{
+	struct addr_marker tmp;
+	int i, j;
+
+	for (i = 1; i < ARRAY_SIZE(address_markers) - 1; i++) {
+		tmp = address_markers[i];
+		for (j = i - 1; j >= 0 && address_markers[j].start_address > tmp.start_address; j--)
+			address_markers[j + 1] = address_markers[j];
+		address_markers[j + 1] = tmp;
+	}
+}
+
+static int pt_dump_init(void)
+{
+#ifdef CONFIG_KFENCE
+	unsigned long kfence_start = (unsigned long)__kfence_pool;
+#endif
+	/*
+	 * Figure out the maximum virtual address being accessible with the
+	 * kernel ASCE. We need this to keep the page table walker functions
+	 * from accessing non-existent entries.
+	 */
+	max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2;
+	max_addr = 1UL << (max_addr * 11 + 31);
+	address_markers[IDENTITY_AFTER_END_NR].start_address = ident_map_size;
+	address_markers[AMODE31_START_NR].start_address = (unsigned long)__samode31;
+	address_markers[AMODE31_END_NR].start_address = (unsigned long)__eamode31;
+	address_markers[MODULES_NR].start_address = MODULES_VADDR;
+	address_markers[MODULES_END_NR].start_address = MODULES_END;
+	address_markers[ABS_LOWCORE_NR].start_address = __abs_lowcore;
+	address_markers[ABS_LOWCORE_END_NR].start_address = __abs_lowcore + ABS_LOWCORE_MAP_SIZE;
+	address_markers[MEMCPY_REAL_NR].start_address = __memcpy_real_area;
+	address_markers[MEMCPY_REAL_END_NR].start_address = __memcpy_real_area + MEMCPY_REAL_SIZE;
+	address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap;
+	address_markers[VMEMMAP_END_NR].start_address = (unsigned long)vmemmap + vmemmap_size;
+	address_markers[VMALLOC_NR].start_address = VMALLOC_START;
+	address_markers[VMALLOC_END_NR].start_address = VMALLOC_END;
+#ifdef CONFIG_KFENCE
+	address_markers[KFENCE_START_NR].start_address = kfence_start;
+	address_markers[KFENCE_END_NR].start_address = kfence_start + KFENCE_POOL_SIZE;
+#endif
+	sort_address_markers();
+#ifdef CONFIG_PTDUMP_DEBUGFS
+	debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops);
+#endif /* CONFIG_PTDUMP_DEBUGFS */
+	return 0;
+}
+device_initcall(pt_dump_init);
diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c
new file mode 100644
index 0000000000..fe87291df9
--- /dev/null
+++ b/arch/s390/mm/extable.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bitfield.h>
+#include <linux/extable.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/panic.h>
+#include <asm/asm-extable.h>
+#include <asm/extable.h>
+
+const struct exception_table_entry *s390_search_extables(unsigned long addr)
+{
+	const struct exception_table_entry *fixup;
+	size_t num;
+
+	fixup = search_exception_tables(addr);
+	if (fixup)
+		return fixup;
+	num = __stop_amode31_ex_table - __start_amode31_ex_table;
+	return search_extable(__start_amode31_ex_table, num, addr);
+}
+
+static bool ex_handler_fixup(const struct exception_table_entry *ex, struct pt_regs *regs)
+{
+	regs->psw.addr = extable_fixup(ex);
+	return true;
+}
+
+static bool ex_handler_ua_store(const struct exception_table_entry *ex, struct pt_regs *regs)
+{
+	unsigned int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data);
+
+	regs->gprs[reg_err] = -EFAULT;
+	regs->psw.addr = extable_fixup(ex);
+	return true;
+}
+
+static bool ex_handler_ua_load_mem(const struct exception_table_entry *ex, struct pt_regs *regs)
+{
+	unsigned int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->data);
+	unsigned int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data);
+	size_t len = FIELD_GET(EX_DATA_LEN, ex->data);
+
+	regs->gprs[reg_err] = -EFAULT;
+	memset((void *)regs->gprs[reg_addr], 0, len);
+	regs->psw.addr = extable_fixup(ex);
+	return true;
+}
+
+static bool ex_handler_ua_load_reg(const struct exception_table_entry *ex,
+				   bool pair, struct pt_regs *regs)
+{
+	unsigned int reg_zero = FIELD_GET(EX_DATA_REG_ADDR, ex->data);
+	unsigned int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data);
+
+	regs->gprs[reg_err] = -EFAULT;
+	regs->gprs[reg_zero] = 0;
+	if (pair)
+		regs->gprs[reg_zero + 1] = 0;
+	regs->psw.addr = extable_fixup(ex);
+	return true;
+}
+
+bool fixup_exception(struct pt_regs *regs)
+{
+	const struct exception_table_entry *ex;
+
+	ex = s390_search_extables(instruction_pointer(regs));
+	if (!ex)
+		return false;
+	switch (ex->type) {
+	case EX_TYPE_FIXUP:
+		return ex_handler_fixup(ex, regs);
+	case EX_TYPE_BPF:
+		return ex_handler_bpf(ex, regs);
+	case EX_TYPE_UA_STORE:
+		return ex_handler_ua_store(ex, regs);
+	case EX_TYPE_UA_LOAD_MEM:
+		return ex_handler_ua_load_mem(ex, regs);
+	case EX_TYPE_UA_LOAD_REG:
+		return ex_handler_ua_load_reg(ex, false, regs);
+	case EX_TYPE_UA_LOAD_REGPAIR:
+		return ex_handler_ua_load_reg(ex, true, regs);
+	}
+	panic("invalid exception table entry");
+}
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
new file mode 100644
index 0000000000..e41869f5cc
--- /dev/null
+++ b/arch/s390/mm/extmem.c
@@ -0,0 +1,660 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Author(s)......: Carsten Otte <cotte@de.ibm.com>
+ * 		    Rob M van der Heij <rvdheij@nl.ibm.com>
+ * 		    Steven Shultz <shultzss@us.ibm.com>
+ * Bugreports.to..: <Linux390@de.ibm.com>
+ * Copyright IBM Corp. 2002, 2004
+ */
+
+#define KMSG_COMPONENT "extmem"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/memblock.h>
+#include <linux/ctype.h>
+#include <linux/ioport.h>
+#include <linux/refcount.h>
+#include <linux/pgtable.h>
+#include <asm/diag.h>
+#include <asm/page.h>
+#include <asm/ebcdic.h>
+#include <asm/errno.h>
+#include <asm/extmem.h>
+#include <asm/cpcmd.h>
+#include <asm/setup.h>
+
+#define DCSS_PURGESEG   0x08
+#define DCSS_LOADSHRX	0x20
+#define DCSS_LOADNSRX	0x24
+#define DCSS_FINDSEGX	0x2c
+#define DCSS_SEGEXTX	0x38
+#define DCSS_FINDSEGA   0x0c
+
+struct qrange {
+	unsigned long  start; /* last byte type */
+	unsigned long  end;   /* last byte reserved */
+};
+
+struct qout64 {
+	unsigned long segstart;
+	unsigned long segend;
+	int segcnt;
+	int segrcnt;
+	struct qrange range[6];
+};
+
+struct qin64 {
+	char qopcode;
+	char rsrv1[3];
+	char qrcode;
+	char rsrv2[3];
+	char qname[8];
+	unsigned int qoutptr;
+	short int qoutlen;
+};
+
+struct dcss_segment {
+	struct list_head list;
+	char dcss_name[8];
+	char res_name[16];
+	unsigned long start_addr;
+	unsigned long end;
+	refcount_t ref_count;
+	int do_nonshared;
+	unsigned int vm_segtype;
+	struct qrange range[6];
+	int segcnt;
+	struct resource *res;
+};
+
+static DEFINE_MUTEX(dcss_lock);
+static LIST_HEAD(dcss_list);
+static char *segtype_string[] = { "SW", "EW", "SR", "ER", "SN", "EN", "SC",
+					"EW/EN-MIXED" };
+static int loadshr_scode = DCSS_LOADSHRX;
+static int loadnsr_scode = DCSS_LOADNSRX;
+static int purgeseg_scode = DCSS_PURGESEG;
+static int segext_scode = DCSS_SEGEXTX;
+
+/*
+ * Create the 8 bytes, ebcdic VM segment name from
+ * an ascii name.
+ */
+static void
+dcss_mkname(char *name, char *dcss_name)
+{
+	int i;
+
+	for (i = 0; i < 8; i++) {
+		if (name[i] == '\0')
+			break;
+		dcss_name[i] = toupper(name[i]);
+	}
+	for (; i < 8; i++)
+		dcss_name[i] = ' ';
+	ASCEBC(dcss_name, 8);
+}
+
+
+/*
+ * search all segments in dcss_list, and return the one
+ * namend *name. If not found, return NULL.
+ */
+static struct dcss_segment *
+segment_by_name (char *name)
+{
+	char dcss_name[9];
+	struct list_head *l;
+	struct dcss_segment *tmp, *retval = NULL;
+
+	BUG_ON(!mutex_is_locked(&dcss_lock));
+	dcss_mkname (name, dcss_name);
+	list_for_each (l, &dcss_list) {
+		tmp = list_entry (l, struct dcss_segment, list);
+		if (memcmp(tmp->dcss_name, dcss_name, 8) == 0) {
+			retval = tmp;
+			break;
+		}
+	}
+	return retval;
+}
+
+
+/*
+ * Perform a function on a dcss segment.
+ */
+static inline int
+dcss_diag(int *func, void *parameter,
+           unsigned long *ret1, unsigned long *ret2)
+{
+	unsigned long rx, ry;
+	int rc;
+
+	rx = (unsigned long) parameter;
+	ry = (unsigned long) *func;
+
+	diag_stat_inc(DIAG_STAT_X064);
+	asm volatile(
+		"	diag	%0,%1,0x64\n"
+		"	ipm	%2\n"
+		"	srl	%2,28\n"
+		: "+d" (rx), "+d" (ry), "=d" (rc) : : "cc");
+	*ret1 = rx;
+	*ret2 = ry;
+	return rc;
+}
+
+static inline int
+dcss_diag_translate_rc (int vm_rc) {
+	if (vm_rc == 44)
+		return -ENOENT;
+	return -EIO;
+}
+
+
+/* do a diag to get info about a segment.
+ * fills start_address, end and vm_segtype fields
+ */
+static int
+query_segment_type (struct dcss_segment *seg)
+{
+	unsigned long dummy, vmrc;
+	int diag_cc, rc, i;
+	struct qout64 *qout;
+	struct qin64 *qin;
+
+	qin = kmalloc(sizeof(*qin), GFP_KERNEL | GFP_DMA);
+	qout = kmalloc(sizeof(*qout), GFP_KERNEL | GFP_DMA);
+	if ((qin == NULL) || (qout == NULL)) {
+		rc = -ENOMEM;
+		goto out_free;
+	}
+
+	/* initialize diag input parameters */
+	qin->qopcode = DCSS_FINDSEGA;
+	qin->qoutptr = (unsigned long) qout;
+	qin->qoutlen = sizeof(struct qout64);
+	memcpy (qin->qname, seg->dcss_name, 8);
+
+	diag_cc = dcss_diag(&segext_scode, qin, &dummy, &vmrc);
+
+	if (diag_cc < 0) {
+		rc = diag_cc;
+		goto out_free;
+	}
+	if (diag_cc > 1) {
+		pr_warn("Querying a DCSS type failed with rc=%ld\n", vmrc);
+		rc = dcss_diag_translate_rc (vmrc);
+		goto out_free;
+	}
+
+	if (qout->segcnt > 6) {
+		rc = -EOPNOTSUPP;
+		goto out_free;
+	}
+
+	if (qout->segcnt == 1) {
+		seg->vm_segtype = qout->range[0].start & 0xff;
+	} else {
+		/* multi-part segment. only one type supported here:
+		    - all parts are contiguous
+		    - all parts are either EW or EN type
+		    - maximum 6 parts allowed */
+		unsigned long start = qout->segstart >> PAGE_SHIFT;
+		for (i=0; i<qout->segcnt; i++) {
+			if (((qout->range[i].start & 0xff) != SEG_TYPE_EW) &&
+			    ((qout->range[i].start & 0xff) != SEG_TYPE_EN)) {
+				rc = -EOPNOTSUPP;
+				goto out_free;
+			}
+			if (start != qout->range[i].start >> PAGE_SHIFT) {
+				rc = -EOPNOTSUPP;
+				goto out_free;
+			}
+			start = (qout->range[i].end >> PAGE_SHIFT) + 1;
+		}
+		seg->vm_segtype = SEG_TYPE_EWEN;
+	}
+
+	/* analyze diag output and update seg */
+	seg->start_addr = qout->segstart;
+	seg->end = qout->segend;
+
+	memcpy (seg->range, qout->range, 6*sizeof(struct qrange));
+	seg->segcnt = qout->segcnt;
+
+	rc = 0;
+
+ out_free:
+	kfree(qin);
+	kfree(qout);
+	return rc;
+}
+
+/*
+ * get info about a segment
+ * possible return values:
+ * -ENOSYS  : we are not running on VM
+ * -EIO     : could not perform query diagnose
+ * -ENOENT  : no such segment
+ * -EOPNOTSUPP: multi-part segment cannot be used with linux
+ * -ENOMEM  : out of memory
+ * 0 .. 6   : type of segment as defined in include/asm-s390/extmem.h
+ */
+int
+segment_type (char* name)
+{
+	int rc;
+	struct dcss_segment seg;
+
+	if (!MACHINE_IS_VM)
+		return -ENOSYS;
+
+	dcss_mkname(name, seg.dcss_name);
+	rc = query_segment_type (&seg);
+	if (rc < 0)
+		return rc;
+	return seg.vm_segtype;
+}
+
+/*
+ * check if segment collides with other segments that are currently loaded
+ * returns 1 if this is the case, 0 if no collision was found
+ */
+static int
+segment_overlaps_others (struct dcss_segment *seg)
+{
+	struct list_head *l;
+	struct dcss_segment *tmp;
+
+	BUG_ON(!mutex_is_locked(&dcss_lock));
+	list_for_each(l, &dcss_list) {
+		tmp = list_entry(l, struct dcss_segment, list);
+		if ((tmp->start_addr >> 20) > (seg->end >> 20))
+			continue;
+		if ((tmp->end >> 20) < (seg->start_addr >> 20))
+			continue;
+		if (seg == tmp)
+			continue;
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * real segment loading function, called from segment_load
+ * Must return either an error code < 0, or the segment type code >= 0
+ */
+static int
+__segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long *end)
+{
+	unsigned long start_addr, end_addr, dummy;
+	struct dcss_segment *seg;
+	int rc, diag_cc, segtype;
+
+	start_addr = end_addr = 0;
+	segtype = -1;
+	seg = kmalloc(sizeof(*seg), GFP_KERNEL | GFP_DMA);
+	if (seg == NULL) {
+		rc = -ENOMEM;
+		goto out;
+	}
+	dcss_mkname (name, seg->dcss_name);
+	rc = query_segment_type (seg);
+	if (rc < 0)
+		goto out_free;
+
+	if (segment_overlaps_others(seg)) {
+		rc = -EBUSY;
+		goto out_free;
+	}
+
+	seg->res = kzalloc(sizeof(struct resource), GFP_KERNEL);
+	if (seg->res == NULL) {
+		rc = -ENOMEM;
+		goto out_free;
+	}
+	seg->res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
+	seg->res->start = seg->start_addr;
+	seg->res->end = seg->end;
+	memcpy(&seg->res_name, seg->dcss_name, 8);
+	EBCASC(seg->res_name, 8);
+	seg->res_name[8] = '\0';
+	strlcat(seg->res_name, " (DCSS)", sizeof(seg->res_name));
+	seg->res->name = seg->res_name;
+	segtype = seg->vm_segtype;
+	if (segtype == SEG_TYPE_SC ||
+	    ((segtype == SEG_TYPE_SR || segtype == SEG_TYPE_ER) && !do_nonshared))
+		seg->res->flags |= IORESOURCE_READONLY;
+
+	/* Check for overlapping resources before adding the mapping. */
+	if (request_resource(&iomem_resource, seg->res)) {
+		rc = -EBUSY;
+		goto out_free_resource;
+	}
+
+	rc = vmem_add_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
+	if (rc)
+		goto out_resource;
+
+	if (do_nonshared)
+		diag_cc = dcss_diag(&loadnsr_scode, seg->dcss_name,
+				&start_addr, &end_addr);
+	else
+		diag_cc = dcss_diag(&loadshr_scode, seg->dcss_name,
+				&start_addr, &end_addr);
+	if (diag_cc < 0) {
+		dcss_diag(&purgeseg_scode, seg->dcss_name,
+				&dummy, &dummy);
+		rc = diag_cc;
+		goto out_mapping;
+	}
+	if (diag_cc > 1) {
+		pr_warn("Loading DCSS %s failed with rc=%ld\n", name, end_addr);
+		rc = dcss_diag_translate_rc(end_addr);
+		dcss_diag(&purgeseg_scode, seg->dcss_name,
+				&dummy, &dummy);
+		goto out_mapping;
+	}
+	seg->start_addr = start_addr;
+	seg->end = end_addr;
+	seg->do_nonshared = do_nonshared;
+	refcount_set(&seg->ref_count, 1);
+	list_add(&seg->list, &dcss_list);
+	*addr = seg->start_addr;
+	*end  = seg->end;
+	if (do_nonshared)
+		pr_info("DCSS %s of range %px to %px and type %s loaded as "
+			"exclusive-writable\n", name, (void*) seg->start_addr,
+			(void*) seg->end, segtype_string[seg->vm_segtype]);
+	else {
+		pr_info("DCSS %s of range %px to %px and type %s loaded in "
+			"shared access mode\n", name, (void*) seg->start_addr,
+			(void*) seg->end, segtype_string[seg->vm_segtype]);
+	}
+	goto out;
+ out_mapping:
+	vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
+ out_resource:
+	release_resource(seg->res);
+ out_free_resource:
+	kfree(seg->res);
+ out_free:
+	kfree(seg);
+ out:
+	return rc < 0 ? rc : segtype;
+}
+
+/*
+ * this function loads a DCSS segment
+ * name         : name of the DCSS
+ * do_nonshared : 0 indicates that the dcss should be shared with other linux images
+ *                1 indicates that the dcss should be exclusive for this linux image
+ * addr         : will be filled with start address of the segment
+ * end          : will be filled with end address of the segment
+ * return values:
+ * -ENOSYS  : we are not running on VM
+ * -EIO     : could not perform query or load diagnose
+ * -ENOENT  : no such segment
+ * -EOPNOTSUPP: multi-part segment cannot be used with linux
+ * -EBUSY   : segment cannot be used (overlaps with dcss or storage)
+ * -ERANGE  : segment cannot be used (exceeds kernel mapping range)
+ * -EPERM   : segment is currently loaded with incompatible permissions
+ * -ENOMEM  : out of memory
+ * 0 .. 6   : type of segment as defined in include/asm-s390/extmem.h
+ */
+int
+segment_load (char *name, int do_nonshared, unsigned long *addr,
+		unsigned long *end)
+{
+	struct dcss_segment *seg;
+	int rc;
+
+	if (!MACHINE_IS_VM)
+		return -ENOSYS;
+
+	mutex_lock(&dcss_lock);
+	seg = segment_by_name (name);
+	if (seg == NULL)
+		rc = __segment_load (name, do_nonshared, addr, end);
+	else {
+		if (do_nonshared == seg->do_nonshared) {
+			refcount_inc(&seg->ref_count);
+			*addr = seg->start_addr;
+			*end  = seg->end;
+			rc    = seg->vm_segtype;
+		} else {
+			*addr = *end = 0;
+			rc    = -EPERM;
+		}
+	}
+	mutex_unlock(&dcss_lock);
+	return rc;
+}
+
+/*
+ * this function modifies the shared state of a DCSS segment. note that
+ * name         : name of the DCSS
+ * do_nonshared : 0 indicates that the dcss should be shared with other linux images
+ *                1 indicates that the dcss should be exclusive for this linux image
+ * return values:
+ * -EIO     : could not perform load diagnose (segment gone!)
+ * -ENOENT  : no such segment (segment gone!)
+ * -EAGAIN  : segment is in use by other exploiters, try later
+ * -EINVAL  : no segment with the given name is currently loaded - name invalid
+ * -EBUSY   : segment can temporarily not be used (overlaps with dcss)
+ * 0	    : operation succeeded
+ */
+int
+segment_modify_shared (char *name, int do_nonshared)
+{
+	struct dcss_segment *seg;
+	unsigned long start_addr, end_addr, dummy;
+	int rc, diag_cc;
+
+	start_addr = end_addr = 0;
+	mutex_lock(&dcss_lock);
+	seg = segment_by_name (name);
+	if (seg == NULL) {
+		rc = -EINVAL;
+		goto out_unlock;
+	}
+	if (do_nonshared == seg->do_nonshared) {
+		pr_info("DCSS %s is already in the requested access "
+			"mode\n", name);
+		rc = 0;
+		goto out_unlock;
+	}
+	if (refcount_read(&seg->ref_count) != 1) {
+		pr_warn("DCSS %s is in use and cannot be reloaded\n", name);
+		rc = -EAGAIN;
+		goto out_unlock;
+	}
+	release_resource(seg->res);
+	if (do_nonshared)
+		seg->res->flags &= ~IORESOURCE_READONLY;
+	else
+		if (seg->vm_segtype == SEG_TYPE_SR ||
+		    seg->vm_segtype == SEG_TYPE_ER)
+			seg->res->flags |= IORESOURCE_READONLY;
+
+	if (request_resource(&iomem_resource, seg->res)) {
+		pr_warn("DCSS %s overlaps with used memory resources and cannot be reloaded\n",
+			name);
+		rc = -EBUSY;
+		kfree(seg->res);
+		goto out_del_mem;
+	}
+
+	dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy);
+	if (do_nonshared)
+		diag_cc = dcss_diag(&loadnsr_scode, seg->dcss_name,
+				&start_addr, &end_addr);
+	else
+		diag_cc = dcss_diag(&loadshr_scode, seg->dcss_name,
+				&start_addr, &end_addr);
+	if (diag_cc < 0) {
+		rc = diag_cc;
+		goto out_del_res;
+	}
+	if (diag_cc > 1) {
+		pr_warn("Reloading DCSS %s failed with rc=%ld\n",
+			name, end_addr);
+		rc = dcss_diag_translate_rc(end_addr);
+		goto out_del_res;
+	}
+	seg->start_addr = start_addr;
+	seg->end = end_addr;
+	seg->do_nonshared = do_nonshared;
+	rc = 0;
+	goto out_unlock;
+ out_del_res:
+	release_resource(seg->res);
+	kfree(seg->res);
+ out_del_mem:
+	vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
+	list_del(&seg->list);
+	dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy);
+	kfree(seg);
+ out_unlock:
+	mutex_unlock(&dcss_lock);
+	return rc;
+}
+
+/*
+ * Decrease the use count of a DCSS segment and remove
+ * it from the address space if nobody is using it
+ * any longer.
+ */
+void
+segment_unload(char *name)
+{
+	unsigned long dummy;
+	struct dcss_segment *seg;
+
+	if (!MACHINE_IS_VM)
+		return;
+
+	mutex_lock(&dcss_lock);
+	seg = segment_by_name (name);
+	if (seg == NULL) {
+		pr_err("Unloading unknown DCSS %s failed\n", name);
+		goto out_unlock;
+	}
+	if (!refcount_dec_and_test(&seg->ref_count))
+		goto out_unlock;
+	release_resource(seg->res);
+	kfree(seg->res);
+	vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
+	list_del(&seg->list);
+	dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy);
+	kfree(seg);
+out_unlock:
+	mutex_unlock(&dcss_lock);
+}
+
+/*
+ * save segment content permanently
+ */
+void
+segment_save(char *name)
+{
+	struct dcss_segment *seg;
+	char cmd1[160];
+	char cmd2[80];
+	int i, response;
+
+	if (!MACHINE_IS_VM)
+		return;
+
+	mutex_lock(&dcss_lock);
+	seg = segment_by_name (name);
+
+	if (seg == NULL) {
+		pr_err("Saving unknown DCSS %s failed\n", name);
+		goto out;
+	}
+
+	sprintf(cmd1, "DEFSEG %s", name);
+	for (i=0; i<seg->segcnt; i++) {
+		sprintf(cmd1+strlen(cmd1), " %lX-%lX %s",
+			seg->range[i].start >> PAGE_SHIFT,
+			seg->range[i].end >> PAGE_SHIFT,
+			segtype_string[seg->range[i].start & 0xff]);
+	}
+	sprintf(cmd2, "SAVESEG %s", name);
+	response = 0;
+	cpcmd(cmd1, NULL, 0, &response);
+	if (response) {
+		pr_err("Saving a DCSS failed with DEFSEG response code "
+		       "%i\n", response);
+		goto out;
+	}
+	cpcmd(cmd2, NULL, 0, &response);
+	if (response) {
+		pr_err("Saving a DCSS failed with SAVESEG response code "
+		       "%i\n", response);
+		goto out;
+	}
+out:
+	mutex_unlock(&dcss_lock);
+}
+
+/*
+ * print appropriate error message for segment_load()/segment_type()
+ * return code
+ */
+void segment_warning(int rc, char *seg_name)
+{
+	switch (rc) {
+	case -ENOENT:
+		pr_err("DCSS %s cannot be loaded or queried\n", seg_name);
+		break;
+	case -ENOSYS:
+		pr_err("DCSS %s cannot be loaded or queried without "
+		       "z/VM\n", seg_name);
+		break;
+	case -EIO:
+		pr_err("Loading or querying DCSS %s resulted in a "
+		       "hardware error\n", seg_name);
+		break;
+	case -EOPNOTSUPP:
+		pr_err("DCSS %s has multiple page ranges and cannot be "
+		       "loaded or queried\n", seg_name);
+		break;
+	case -EBUSY:
+		pr_err("%s needs used memory resources and cannot be "
+		       "loaded or queried\n", seg_name);
+		break;
+	case -EPERM:
+		pr_err("DCSS %s is already loaded in a different access "
+		       "mode\n", seg_name);
+		break;
+	case -ENOMEM:
+		pr_err("There is not enough memory to load or query "
+		       "DCSS %s\n", seg_name);
+		break;
+	case -ERANGE: {
+		struct range mhp_range = arch_get_mappable_range();
+
+		pr_err("DCSS %s exceeds the kernel mapping range (%llu) "
+		       "and cannot be loaded\n", seg_name, mhp_range.end + 1);
+		break;
+	}
+	default:
+		break;
+	}
+}
+
+EXPORT_SYMBOL(segment_load);
+EXPORT_SYMBOL(segment_unload);
+EXPORT_SYMBOL(segment_save);
+EXPORT_SYMBOL(segment_type);
+EXPORT_SYMBOL(segment_modify_shared);
+EXPORT_SYMBOL(segment_warning);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
new file mode 100644
index 0000000000..b678295931
--- /dev/null
+++ b/arch/s390/mm/fault.c
@@ -0,0 +1,708 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999
+ *    Author(s): Hartmut Penner (hp@de.ibm.com)
+ *               Ulrich Weigand (uweigand@de.ibm.com)
+ *
+ *  Derived from "arch/i386/mm/fault.c"
+ *    Copyright (C) 1995  Linus Torvalds
+ */
+
+#include <linux/kernel_stat.h>
+#include <linux/perf_event.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/sched/debug.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/compat.h>
+#include <linux/smp.h>
+#include <linux/kdebug.h>
+#include <linux/init.h>
+#include <linux/console.h>
+#include <linux/extable.h>
+#include <linux/hardirq.h>
+#include <linux/kprobes.h>
+#include <linux/uaccess.h>
+#include <linux/hugetlb.h>
+#include <linux/kfence.h>
+#include <asm/asm-extable.h>
+#include <asm/asm-offsets.h>
+#include <asm/diag.h>
+#include <asm/gmap.h>
+#include <asm/irq.h>
+#include <asm/mmu_context.h>
+#include <asm/facility.h>
+#include <asm/uv.h>
+#include "../kernel/entry.h"
+
+#define __FAIL_ADDR_MASK -4096L
+
+/*
+ * Allocate private vm_fault_reason from top.  Please make sure it won't
+ * collide with vm_fault_reason.
+ */
+#define VM_FAULT_BADCONTEXT	((__force vm_fault_t)0x80000000)
+#define VM_FAULT_BADMAP		((__force vm_fault_t)0x40000000)
+#define VM_FAULT_BADACCESS	((__force vm_fault_t)0x20000000)
+#define VM_FAULT_SIGNAL		((__force vm_fault_t)0x10000000)
+#define VM_FAULT_PFAULT		((__force vm_fault_t)0x8000000)
+
+enum fault_type {
+	KERNEL_FAULT,
+	USER_FAULT,
+	GMAP_FAULT,
+};
+
+static unsigned long store_indication __read_mostly;
+
+static int __init fault_init(void)
+{
+	if (test_facility(75))
+		store_indication = 0xc00;
+	return 0;
+}
+early_initcall(fault_init);
+
+/*
+ * Find out which address space caused the exception.
+ */
+static enum fault_type get_fault_type(struct pt_regs *regs)
+{
+	unsigned long trans_exc_code;
+
+	trans_exc_code = regs->int_parm_long & 3;
+	if (likely(trans_exc_code == 0)) {
+		/* primary space exception */
+		if (user_mode(regs))
+			return USER_FAULT;
+		if (!IS_ENABLED(CONFIG_PGSTE))
+			return KERNEL_FAULT;
+		if (test_pt_regs_flag(regs, PIF_GUEST_FAULT))
+			return GMAP_FAULT;
+		return KERNEL_FAULT;
+	}
+	if (trans_exc_code == 2)
+		return USER_FAULT;
+	if (trans_exc_code == 1) {
+		/* access register mode, not used in the kernel */
+		return USER_FAULT;
+	}
+	/* home space exception -> access via kernel ASCE */
+	return KERNEL_FAULT;
+}
+
+static unsigned long get_fault_address(struct pt_regs *regs)
+{
+	unsigned long trans_exc_code = regs->int_parm_long;
+
+	return trans_exc_code & __FAIL_ADDR_MASK;
+}
+
+static bool fault_is_write(struct pt_regs *regs)
+{
+	unsigned long trans_exc_code = regs->int_parm_long;
+
+	return (trans_exc_code & store_indication) == 0x400;
+}
+
+static int bad_address(void *p)
+{
+	unsigned long dummy;
+
+	return get_kernel_nofault(dummy, (unsigned long *)p);
+}
+
+static void dump_pagetable(unsigned long asce, unsigned long address)
+{
+	unsigned long *table = __va(asce & _ASCE_ORIGIN);
+
+	pr_alert("AS:%016lx ", asce);
+	switch (asce & _ASCE_TYPE_MASK) {
+	case _ASCE_TYPE_REGION1:
+		table += (address & _REGION1_INDEX) >> _REGION1_SHIFT;
+		if (bad_address(table))
+			goto bad;
+		pr_cont("R1:%016lx ", *table);
+		if (*table & _REGION_ENTRY_INVALID)
+			goto out;
+		table = __va(*table & _REGION_ENTRY_ORIGIN);
+		fallthrough;
+	case _ASCE_TYPE_REGION2:
+		table += (address & _REGION2_INDEX) >> _REGION2_SHIFT;
+		if (bad_address(table))
+			goto bad;
+		pr_cont("R2:%016lx ", *table);
+		if (*table & _REGION_ENTRY_INVALID)
+			goto out;
+		table = __va(*table & _REGION_ENTRY_ORIGIN);
+		fallthrough;
+	case _ASCE_TYPE_REGION3:
+		table += (address & _REGION3_INDEX) >> _REGION3_SHIFT;
+		if (bad_address(table))
+			goto bad;
+		pr_cont("R3:%016lx ", *table);
+		if (*table & (_REGION_ENTRY_INVALID | _REGION3_ENTRY_LARGE))
+			goto out;
+		table = __va(*table & _REGION_ENTRY_ORIGIN);
+		fallthrough;
+	case _ASCE_TYPE_SEGMENT:
+		table += (address & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
+		if (bad_address(table))
+			goto bad;
+		pr_cont("S:%016lx ", *table);
+		if (*table & (_SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_LARGE))
+			goto out;
+		table = __va(*table & _SEGMENT_ENTRY_ORIGIN);
+	}
+	table += (address & _PAGE_INDEX) >> _PAGE_SHIFT;
+	if (bad_address(table))
+		goto bad;
+	pr_cont("P:%016lx ", *table);
+out:
+	pr_cont("\n");
+	return;
+bad:
+	pr_cont("BAD\n");
+}
+
+static void dump_fault_info(struct pt_regs *regs)
+{
+	unsigned long asce;
+
+	pr_alert("Failing address: %016lx TEID: %016lx\n",
+		 regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long);
+	pr_alert("Fault in ");
+	switch (regs->int_parm_long & 3) {
+	case 3:
+		pr_cont("home space ");
+		break;
+	case 2:
+		pr_cont("secondary space ");
+		break;
+	case 1:
+		pr_cont("access register ");
+		break;
+	case 0:
+		pr_cont("primary space ");
+		break;
+	}
+	pr_cont("mode while using ");
+	switch (get_fault_type(regs)) {
+	case USER_FAULT:
+		asce = S390_lowcore.user_asce;
+		pr_cont("user ");
+		break;
+	case GMAP_FAULT:
+		asce = ((struct gmap *) S390_lowcore.gmap)->asce;
+		pr_cont("gmap ");
+		break;
+	case KERNEL_FAULT:
+		asce = S390_lowcore.kernel_asce;
+		pr_cont("kernel ");
+		break;
+	default:
+		unreachable();
+	}
+	pr_cont("ASCE.\n");
+	dump_pagetable(asce, regs->int_parm_long & __FAIL_ADDR_MASK);
+}
+
+int show_unhandled_signals = 1;
+
+void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault)
+{
+	if ((task_pid_nr(current) > 1) && !show_unhandled_signals)
+		return;
+	if (!unhandled_signal(current, signr))
+		return;
+	if (!printk_ratelimit())
+		return;
+	printk(KERN_ALERT "User process fault: interruption code %04x ilc:%d ",
+	       regs->int_code & 0xffff, regs->int_code >> 17);
+	print_vma_addr(KERN_CONT "in ", regs->psw.addr);
+	printk(KERN_CONT "\n");
+	if (is_mm_fault)
+		dump_fault_info(regs);
+	show_regs(regs);
+}
+
+/*
+ * Send SIGSEGV to task.  This is an external routine
+ * to keep the stack usage of do_page_fault small.
+ */
+static noinline void do_sigsegv(struct pt_regs *regs, int si_code)
+{
+	report_user_fault(regs, SIGSEGV, 1);
+	force_sig_fault(SIGSEGV, si_code,
+			(void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK));
+}
+
+static noinline void do_no_context(struct pt_regs *regs, vm_fault_t fault)
+{
+	enum fault_type fault_type;
+	unsigned long address;
+	bool is_write;
+
+	if (fixup_exception(regs))
+		return;
+	fault_type = get_fault_type(regs);
+	if ((fault_type == KERNEL_FAULT) && (fault == VM_FAULT_BADCONTEXT)) {
+		address = get_fault_address(regs);
+		is_write = fault_is_write(regs);
+		if (kfence_handle_page_fault(address, is_write, regs))
+			return;
+	}
+	/*
+	 * Oops. The kernel tried to access some bad page. We'll have to
+	 * terminate things with extreme prejudice.
+	 */
+	if (fault_type == KERNEL_FAULT)
+		printk(KERN_ALERT "Unable to handle kernel pointer dereference"
+		       " in virtual kernel address space\n");
+	else
+		printk(KERN_ALERT "Unable to handle kernel paging request"
+		       " in virtual user address space\n");
+	dump_fault_info(regs);
+	die(regs, "Oops");
+}
+
+static noinline void do_low_address(struct pt_regs *regs)
+{
+	/* Low-address protection hit in kernel mode means
+	   NULL pointer write access in kernel mode.  */
+	if (regs->psw.mask & PSW_MASK_PSTATE) {
+		/* Low-address protection hit in user mode 'cannot happen'. */
+		die (regs, "Low-address protection");
+	}
+
+	do_no_context(regs, VM_FAULT_BADACCESS);
+}
+
+static noinline void do_sigbus(struct pt_regs *regs)
+{
+	/*
+	 * Send a sigbus, regardless of whether we were in kernel
+	 * or user mode.
+	 */
+	force_sig_fault(SIGBUS, BUS_ADRERR,
+			(void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK));
+}
+
+static noinline void do_fault_error(struct pt_regs *regs, vm_fault_t fault)
+{
+	int si_code;
+
+	switch (fault) {
+	case VM_FAULT_BADACCESS:
+	case VM_FAULT_BADMAP:
+		/* Bad memory access. Check if it is kernel or user space. */
+		if (user_mode(regs)) {
+			/* User mode accesses just cause a SIGSEGV */
+			si_code = (fault == VM_FAULT_BADMAP) ?
+				SEGV_MAPERR : SEGV_ACCERR;
+			do_sigsegv(regs, si_code);
+			break;
+		}
+		fallthrough;
+	case VM_FAULT_BADCONTEXT:
+	case VM_FAULT_PFAULT:
+		do_no_context(regs, fault);
+		break;
+	case VM_FAULT_SIGNAL:
+		if (!user_mode(regs))
+			do_no_context(regs, fault);
+		break;
+	default: /* fault & VM_FAULT_ERROR */
+		if (fault & VM_FAULT_OOM) {
+			if (!user_mode(regs))
+				do_no_context(regs, fault);
+			else
+				pagefault_out_of_memory();
+		} else if (fault & VM_FAULT_SIGSEGV) {
+			/* Kernel mode? Handle exceptions or die */
+			if (!user_mode(regs))
+				do_no_context(regs, fault);
+			else
+				do_sigsegv(regs, SEGV_MAPERR);
+		} else if (fault & VM_FAULT_SIGBUS) {
+			/* Kernel mode? Handle exceptions or die */
+			if (!user_mode(regs))
+				do_no_context(regs, fault);
+			else
+				do_sigbus(regs);
+		} else
+			BUG();
+		break;
+	}
+}
+
+/*
+ * This routine handles page faults.  It determines the address,
+ * and the problem, and then passes it off to one of the appropriate
+ * routines.
+ *
+ * interruption code (int_code):
+ *   04       Protection           ->  Write-Protection  (suppression)
+ *   10       Segment translation  ->  Not present       (nullification)
+ *   11       Page translation     ->  Not present       (nullification)
+ *   3b       Region third trans.  ->  Not present       (nullification)
+ */
+static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
+{
+	struct gmap *gmap;
+	struct task_struct *tsk;
+	struct mm_struct *mm;
+	struct vm_area_struct *vma;
+	enum fault_type type;
+	unsigned long address;
+	unsigned int flags;
+	vm_fault_t fault;
+	bool is_write;
+
+	tsk = current;
+	/*
+	 * The instruction that caused the program check has
+	 * been nullified. Don't signal single step via SIGTRAP.
+	 */
+	clear_thread_flag(TIF_PER_TRAP);
+
+	if (kprobe_page_fault(regs, 14))
+		return 0;
+
+	mm = tsk->mm;
+	address = get_fault_address(regs);
+	is_write = fault_is_write(regs);
+
+	/*
+	 * Verify that the fault happened in user space, that
+	 * we are not in an interrupt and that there is a 
+	 * user context.
+	 */
+	fault = VM_FAULT_BADCONTEXT;
+	type = get_fault_type(regs);
+	switch (type) {
+	case KERNEL_FAULT:
+		goto out;
+	case USER_FAULT:
+	case GMAP_FAULT:
+		if (faulthandler_disabled() || !mm)
+			goto out;
+		break;
+	}
+
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+	flags = FAULT_FLAG_DEFAULT;
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
+	if (is_write)
+		access = VM_WRITE;
+	if (access == VM_WRITE)
+		flags |= FAULT_FLAG_WRITE;
+	if (!(flags & FAULT_FLAG_USER))
+		goto lock_mmap;
+	vma = lock_vma_under_rcu(mm, address);
+	if (!vma)
+		goto lock_mmap;
+	if (!(vma->vm_flags & access)) {
+		vma_end_read(vma);
+		goto lock_mmap;
+	}
+	fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
+	if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
+		vma_end_read(vma);
+	if (!(fault & VM_FAULT_RETRY)) {
+		count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+		if (likely(!(fault & VM_FAULT_ERROR)))
+			fault = 0;
+		goto out;
+	}
+	count_vm_vma_lock_event(VMA_LOCK_RETRY);
+	/* Quick path to respond to signals */
+	if (fault_signal_pending(fault, regs)) {
+		fault = VM_FAULT_SIGNAL;
+		goto out;
+	}
+lock_mmap:
+	mmap_read_lock(mm);
+
+	gmap = NULL;
+	if (IS_ENABLED(CONFIG_PGSTE) && type == GMAP_FAULT) {
+		gmap = (struct gmap *) S390_lowcore.gmap;
+		current->thread.gmap_addr = address;
+		current->thread.gmap_write_flag = !!(flags & FAULT_FLAG_WRITE);
+		current->thread.gmap_int_code = regs->int_code & 0xffff;
+		address = __gmap_translate(gmap, address);
+		if (address == -EFAULT) {
+			fault = VM_FAULT_BADMAP;
+			goto out_up;
+		}
+		if (gmap->pfault_enabled)
+			flags |= FAULT_FLAG_RETRY_NOWAIT;
+	}
+
+retry:
+	fault = VM_FAULT_BADMAP;
+	vma = find_vma(mm, address);
+	if (!vma)
+		goto out_up;
+
+	if (unlikely(vma->vm_start > address)) {
+		if (!(vma->vm_flags & VM_GROWSDOWN))
+			goto out_up;
+		vma = expand_stack(mm, address);
+		if (!vma)
+			goto out;
+	}
+
+	/*
+	 * Ok, we have a good vm_area for this memory access, so
+	 * we can handle it..
+	 */
+	fault = VM_FAULT_BADACCESS;
+	if (unlikely(!(vma->vm_flags & access)))
+		goto out_up;
+
+	/*
+	 * If for any reason at all we couldn't handle the fault,
+	 * make sure we exit gracefully rather than endlessly redo
+	 * the fault.
+	 */
+	fault = handle_mm_fault(vma, address, flags, regs);
+	if (fault_signal_pending(fault, regs)) {
+		fault = VM_FAULT_SIGNAL;
+		if (flags & FAULT_FLAG_RETRY_NOWAIT)
+			goto out_up;
+		goto out;
+	}
+
+	/* The fault is fully completed (including releasing mmap lock) */
+	if (fault & VM_FAULT_COMPLETED) {
+		if (gmap) {
+			mmap_read_lock(mm);
+			goto out_gmap;
+		}
+		fault = 0;
+		goto out;
+	}
+
+	if (unlikely(fault & VM_FAULT_ERROR))
+		goto out_up;
+
+	if (fault & VM_FAULT_RETRY) {
+		if (IS_ENABLED(CONFIG_PGSTE) && gmap &&
+			(flags & FAULT_FLAG_RETRY_NOWAIT)) {
+			/*
+			 * FAULT_FLAG_RETRY_NOWAIT has been set, mmap_lock has
+			 * not been released
+			 */
+			current->thread.gmap_pfault = 1;
+			fault = VM_FAULT_PFAULT;
+			goto out_up;
+		}
+		flags &= ~FAULT_FLAG_RETRY_NOWAIT;
+		flags |= FAULT_FLAG_TRIED;
+		mmap_read_lock(mm);
+		goto retry;
+	}
+out_gmap:
+	if (IS_ENABLED(CONFIG_PGSTE) && gmap) {
+		address =  __gmap_link(gmap, current->thread.gmap_addr,
+				       address);
+		if (address == -EFAULT) {
+			fault = VM_FAULT_BADMAP;
+			goto out_up;
+		}
+		if (address == -ENOMEM) {
+			fault = VM_FAULT_OOM;
+			goto out_up;
+		}
+	}
+	fault = 0;
+out_up:
+	mmap_read_unlock(mm);
+out:
+	return fault;
+}
+
+void do_protection_exception(struct pt_regs *regs)
+{
+	unsigned long trans_exc_code;
+	int access;
+	vm_fault_t fault;
+
+	trans_exc_code = regs->int_parm_long;
+	/*
+	 * Protection exceptions are suppressing, decrement psw address.
+	 * The exception to this rule are aborted transactions, for these
+	 * the PSW already points to the correct location.
+	 */
+	if (!(regs->int_code & 0x200))
+		regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16);
+	/*
+	 * Check for low-address protection.  This needs to be treated
+	 * as a special case because the translation exception code
+	 * field is not guaranteed to contain valid data in this case.
+	 */
+	if (unlikely(!(trans_exc_code & 4))) {
+		do_low_address(regs);
+		return;
+	}
+	if (unlikely(MACHINE_HAS_NX && (trans_exc_code & 0x80))) {
+		regs->int_parm_long = (trans_exc_code & ~PAGE_MASK) |
+					(regs->psw.addr & PAGE_MASK);
+		access = VM_EXEC;
+		fault = VM_FAULT_BADACCESS;
+	} else {
+		access = VM_WRITE;
+		fault = do_exception(regs, access);
+	}
+	if (unlikely(fault))
+		do_fault_error(regs, fault);
+}
+NOKPROBE_SYMBOL(do_protection_exception);
+
+void do_dat_exception(struct pt_regs *regs)
+{
+	int access;
+	vm_fault_t fault;
+
+	access = VM_ACCESS_FLAGS;
+	fault = do_exception(regs, access);
+	if (unlikely(fault))
+		do_fault_error(regs, fault);
+}
+NOKPROBE_SYMBOL(do_dat_exception);
+
+#if IS_ENABLED(CONFIG_PGSTE)
+
+void do_secure_storage_access(struct pt_regs *regs)
+{
+	unsigned long addr = regs->int_parm_long & __FAIL_ADDR_MASK;
+	struct vm_area_struct *vma;
+	struct mm_struct *mm;
+	struct page *page;
+	struct gmap *gmap;
+	int rc;
+
+	/*
+	 * bit 61 tells us if the address is valid, if it's not we
+	 * have a major problem and should stop the kernel or send a
+	 * SIGSEGV to the process. Unfortunately bit 61 is not
+	 * reliable without the misc UV feature so we need to check
+	 * for that as well.
+	 */
+	if (uv_has_feature(BIT_UV_FEAT_MISC) &&
+	    !test_bit_inv(61, &regs->int_parm_long)) {
+		/*
+		 * When this happens, userspace did something that it
+		 * was not supposed to do, e.g. branching into secure
+		 * memory. Trigger a segmentation fault.
+		 */
+		if (user_mode(regs)) {
+			send_sig(SIGSEGV, current, 0);
+			return;
+		}
+
+		/*
+		 * The kernel should never run into this case and we
+		 * have no way out of this situation.
+		 */
+		panic("Unexpected PGM 0x3d with TEID bit 61=0");
+	}
+
+	switch (get_fault_type(regs)) {
+	case GMAP_FAULT:
+		mm = current->mm;
+		gmap = (struct gmap *)S390_lowcore.gmap;
+		mmap_read_lock(mm);
+		addr = __gmap_translate(gmap, addr);
+		mmap_read_unlock(mm);
+		if (IS_ERR_VALUE(addr)) {
+			do_fault_error(regs, VM_FAULT_BADMAP);
+			break;
+		}
+		fallthrough;
+	case USER_FAULT:
+		mm = current->mm;
+		mmap_read_lock(mm);
+		vma = find_vma(mm, addr);
+		if (!vma) {
+			mmap_read_unlock(mm);
+			do_fault_error(regs, VM_FAULT_BADMAP);
+			break;
+		}
+		page = follow_page(vma, addr, FOLL_WRITE | FOLL_GET);
+		if (IS_ERR_OR_NULL(page)) {
+			mmap_read_unlock(mm);
+			break;
+		}
+		if (arch_make_page_accessible(page))
+			send_sig(SIGSEGV, current, 0);
+		put_page(page);
+		mmap_read_unlock(mm);
+		break;
+	case KERNEL_FAULT:
+		page = phys_to_page(addr);
+		if (unlikely(!try_get_page(page)))
+			break;
+		rc = arch_make_page_accessible(page);
+		put_page(page);
+		if (rc)
+			BUG();
+		break;
+	default:
+		do_fault_error(regs, VM_FAULT_BADMAP);
+		WARN_ON_ONCE(1);
+	}
+}
+NOKPROBE_SYMBOL(do_secure_storage_access);
+
+void do_non_secure_storage_access(struct pt_regs *regs)
+{
+	unsigned long gaddr = regs->int_parm_long & __FAIL_ADDR_MASK;
+	struct gmap *gmap = (struct gmap *)S390_lowcore.gmap;
+
+	if (get_fault_type(regs) != GMAP_FAULT) {
+		do_fault_error(regs, VM_FAULT_BADMAP);
+		WARN_ON_ONCE(1);
+		return;
+	}
+
+	if (gmap_convert_to_secure(gmap, gaddr) == -EINVAL)
+		send_sig(SIGSEGV, current, 0);
+}
+NOKPROBE_SYMBOL(do_non_secure_storage_access);
+
+void do_secure_storage_violation(struct pt_regs *regs)
+{
+	unsigned long gaddr = regs->int_parm_long & __FAIL_ADDR_MASK;
+	struct gmap *gmap = (struct gmap *)S390_lowcore.gmap;
+
+	/*
+	 * If the VM has been rebooted, its address space might still contain
+	 * secure pages from the previous boot.
+	 * Clear the page so it can be reused.
+	 */
+	if (!gmap_destroy_page(gmap, gaddr))
+		return;
+	/*
+	 * Either KVM messed up the secure guest mapping or the same
+	 * page is mapped into multiple secure guests.
+	 *
+	 * This exception is only triggered when a guest 2 is running
+	 * and can therefore never occur in kernel context.
+	 */
+	printk_ratelimited(KERN_WARNING
+			   "Secure storage violation in task: %s, pid %d\n",
+			   current->comm, current->pid);
+	send_sig(SIGSEGV, current, 0);
+}
+
+#endif /* CONFIG_PGSTE */
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
new file mode 100644
index 0000000000..20786f6883
--- /dev/null
+++ b/arch/s390/mm/gmap.c
@@ -0,0 +1,2894 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  KVM guest address space mapping code
+ *
+ *    Copyright IBM Corp. 2007, 2020
+ *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *		 David Hildenbrand <david@redhat.com>
+ *		 Janosch Frank <frankja@linux.vnet.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/pagewalk.h>
+#include <linux/swap.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/swapops.h>
+#include <linux/ksm.h>
+#include <linux/mman.h>
+#include <linux/pgtable.h>
+
+#include <asm/pgalloc.h>
+#include <asm/gmap.h>
+#include <asm/page.h>
+#include <asm/tlb.h>
+
+#define GMAP_SHADOW_FAKE_TABLE 1ULL
+
+static struct page *gmap_alloc_crst(void)
+{
+	struct page *page;
+
+	page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER);
+	if (!page)
+		return NULL;
+	arch_set_page_dat(page, CRST_ALLOC_ORDER);
+	return page;
+}
+
+/**
+ * gmap_alloc - allocate and initialize a guest address space
+ * @limit: maximum address of the gmap address space
+ *
+ * Returns a guest address space structure.
+ */
+static struct gmap *gmap_alloc(unsigned long limit)
+{
+	struct gmap *gmap;
+	struct page *page;
+	unsigned long *table;
+	unsigned long etype, atype;
+
+	if (limit < _REGION3_SIZE) {
+		limit = _REGION3_SIZE - 1;
+		atype = _ASCE_TYPE_SEGMENT;
+		etype = _SEGMENT_ENTRY_EMPTY;
+	} else if (limit < _REGION2_SIZE) {
+		limit = _REGION2_SIZE - 1;
+		atype = _ASCE_TYPE_REGION3;
+		etype = _REGION3_ENTRY_EMPTY;
+	} else if (limit < _REGION1_SIZE) {
+		limit = _REGION1_SIZE - 1;
+		atype = _ASCE_TYPE_REGION2;
+		etype = _REGION2_ENTRY_EMPTY;
+	} else {
+		limit = -1UL;
+		atype = _ASCE_TYPE_REGION1;
+		etype = _REGION1_ENTRY_EMPTY;
+	}
+	gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL_ACCOUNT);
+	if (!gmap)
+		goto out;
+	INIT_LIST_HEAD(&gmap->crst_list);
+	INIT_LIST_HEAD(&gmap->children);
+	INIT_LIST_HEAD(&gmap->pt_list);
+	INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL_ACCOUNT);
+	INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC | __GFP_ACCOUNT);
+	INIT_RADIX_TREE(&gmap->host_to_rmap, GFP_ATOMIC | __GFP_ACCOUNT);
+	spin_lock_init(&gmap->guest_table_lock);
+	spin_lock_init(&gmap->shadow_lock);
+	refcount_set(&gmap->ref_count, 1);
+	page = gmap_alloc_crst();
+	if (!page)
+		goto out_free;
+	page->index = 0;
+	list_add(&page->lru, &gmap->crst_list);
+	table = page_to_virt(page);
+	crst_table_init(table, etype);
+	gmap->table = table;
+	gmap->asce = atype | _ASCE_TABLE_LENGTH |
+		_ASCE_USER_BITS | __pa(table);
+	gmap->asce_end = limit;
+	return gmap;
+
+out_free:
+	kfree(gmap);
+out:
+	return NULL;
+}
+
+/**
+ * gmap_create - create a guest address space
+ * @mm: pointer to the parent mm_struct
+ * @limit: maximum size of the gmap address space
+ *
+ * Returns a guest address space structure.
+ */
+struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit)
+{
+	struct gmap *gmap;
+	unsigned long gmap_asce;
+
+	gmap = gmap_alloc(limit);
+	if (!gmap)
+		return NULL;
+	gmap->mm = mm;
+	spin_lock(&mm->context.lock);
+	list_add_rcu(&gmap->list, &mm->context.gmap_list);
+	if (list_is_singular(&mm->context.gmap_list))
+		gmap_asce = gmap->asce;
+	else
+		gmap_asce = -1UL;
+	WRITE_ONCE(mm->context.gmap_asce, gmap_asce);
+	spin_unlock(&mm->context.lock);
+	return gmap;
+}
+EXPORT_SYMBOL_GPL(gmap_create);
+
+static void gmap_flush_tlb(struct gmap *gmap)
+{
+	if (MACHINE_HAS_IDTE)
+		__tlb_flush_idte(gmap->asce);
+	else
+		__tlb_flush_global();
+}
+
+static void gmap_radix_tree_free(struct radix_tree_root *root)
+{
+	struct radix_tree_iter iter;
+	unsigned long indices[16];
+	unsigned long index;
+	void __rcu **slot;
+	int i, nr;
+
+	/* A radix tree is freed by deleting all of its entries */
+	index = 0;
+	do {
+		nr = 0;
+		radix_tree_for_each_slot(slot, root, &iter, index) {
+			indices[nr] = iter.index;
+			if (++nr == 16)
+				break;
+		}
+		for (i = 0; i < nr; i++) {
+			index = indices[i];
+			radix_tree_delete(root, index);
+		}
+	} while (nr > 0);
+}
+
+static void gmap_rmap_radix_tree_free(struct radix_tree_root *root)
+{
+	struct gmap_rmap *rmap, *rnext, *head;
+	struct radix_tree_iter iter;
+	unsigned long indices[16];
+	unsigned long index;
+	void __rcu **slot;
+	int i, nr;
+
+	/* A radix tree is freed by deleting all of its entries */
+	index = 0;
+	do {
+		nr = 0;
+		radix_tree_for_each_slot(slot, root, &iter, index) {
+			indices[nr] = iter.index;
+			if (++nr == 16)
+				break;
+		}
+		for (i = 0; i < nr; i++) {
+			index = indices[i];
+			head = radix_tree_delete(root, index);
+			gmap_for_each_rmap_safe(rmap, rnext, head)
+				kfree(rmap);
+		}
+	} while (nr > 0);
+}
+
+/**
+ * gmap_free - free a guest address space
+ * @gmap: pointer to the guest address space structure
+ *
+ * No locks required. There are no references to this gmap anymore.
+ */
+static void gmap_free(struct gmap *gmap)
+{
+	struct page *page, *next;
+
+	/* Flush tlb of all gmaps (if not already done for shadows) */
+	if (!(gmap_is_shadow(gmap) && gmap->removed))
+		gmap_flush_tlb(gmap);
+	/* Free all segment & region tables. */
+	list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
+		__free_pages(page, CRST_ALLOC_ORDER);
+	gmap_radix_tree_free(&gmap->guest_to_host);
+	gmap_radix_tree_free(&gmap->host_to_guest);
+
+	/* Free additional data for a shadow gmap */
+	if (gmap_is_shadow(gmap)) {
+		/* Free all page tables. */
+		list_for_each_entry_safe(page, next, &gmap->pt_list, lru)
+			page_table_free_pgste(page);
+		gmap_rmap_radix_tree_free(&gmap->host_to_rmap);
+		/* Release reference to the parent */
+		gmap_put(gmap->parent);
+	}
+
+	kfree(gmap);
+}
+
+/**
+ * gmap_get - increase reference counter for guest address space
+ * @gmap: pointer to the guest address space structure
+ *
+ * Returns the gmap pointer
+ */
+struct gmap *gmap_get(struct gmap *gmap)
+{
+	refcount_inc(&gmap->ref_count);
+	return gmap;
+}
+EXPORT_SYMBOL_GPL(gmap_get);
+
+/**
+ * gmap_put - decrease reference counter for guest address space
+ * @gmap: pointer to the guest address space structure
+ *
+ * If the reference counter reaches zero the guest address space is freed.
+ */
+void gmap_put(struct gmap *gmap)
+{
+	if (refcount_dec_and_test(&gmap->ref_count))
+		gmap_free(gmap);
+}
+EXPORT_SYMBOL_GPL(gmap_put);
+
+/**
+ * gmap_remove - remove a guest address space but do not free it yet
+ * @gmap: pointer to the guest address space structure
+ */
+void gmap_remove(struct gmap *gmap)
+{
+	struct gmap *sg, *next;
+	unsigned long gmap_asce;
+
+	/* Remove all shadow gmaps linked to this gmap */
+	if (!list_empty(&gmap->children)) {
+		spin_lock(&gmap->shadow_lock);
+		list_for_each_entry_safe(sg, next, &gmap->children, list) {
+			list_del(&sg->list);
+			gmap_put(sg);
+		}
+		spin_unlock(&gmap->shadow_lock);
+	}
+	/* Remove gmap from the pre-mm list */
+	spin_lock(&gmap->mm->context.lock);
+	list_del_rcu(&gmap->list);
+	if (list_empty(&gmap->mm->context.gmap_list))
+		gmap_asce = 0;
+	else if (list_is_singular(&gmap->mm->context.gmap_list))
+		gmap_asce = list_first_entry(&gmap->mm->context.gmap_list,
+					     struct gmap, list)->asce;
+	else
+		gmap_asce = -1UL;
+	WRITE_ONCE(gmap->mm->context.gmap_asce, gmap_asce);
+	spin_unlock(&gmap->mm->context.lock);
+	synchronize_rcu();
+	/* Put reference */
+	gmap_put(gmap);
+}
+EXPORT_SYMBOL_GPL(gmap_remove);
+
+/**
+ * gmap_enable - switch primary space to the guest address space
+ * @gmap: pointer to the guest address space structure
+ */
+void gmap_enable(struct gmap *gmap)
+{
+	S390_lowcore.gmap = (unsigned long) gmap;
+}
+EXPORT_SYMBOL_GPL(gmap_enable);
+
+/**
+ * gmap_disable - switch back to the standard primary address space
+ * @gmap: pointer to the guest address space structure
+ */
+void gmap_disable(struct gmap *gmap)
+{
+	S390_lowcore.gmap = 0UL;
+}
+EXPORT_SYMBOL_GPL(gmap_disable);
+
+/**
+ * gmap_get_enabled - get a pointer to the currently enabled gmap
+ *
+ * Returns a pointer to the currently enabled gmap. 0 if none is enabled.
+ */
+struct gmap *gmap_get_enabled(void)
+{
+	return (struct gmap *) S390_lowcore.gmap;
+}
+EXPORT_SYMBOL_GPL(gmap_get_enabled);
+
+/*
+ * gmap_alloc_table is assumed to be called with mmap_lock held
+ */
+static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
+			    unsigned long init, unsigned long gaddr)
+{
+	struct page *page;
+	unsigned long *new;
+
+	/* since we dont free the gmap table until gmap_free we can unlock */
+	page = gmap_alloc_crst();
+	if (!page)
+		return -ENOMEM;
+	new = page_to_virt(page);
+	crst_table_init(new, init);
+	spin_lock(&gmap->guest_table_lock);
+	if (*table & _REGION_ENTRY_INVALID) {
+		list_add(&page->lru, &gmap->crst_list);
+		*table = __pa(new) | _REGION_ENTRY_LENGTH |
+			(*table & _REGION_ENTRY_TYPE_MASK);
+		page->index = gaddr;
+		page = NULL;
+	}
+	spin_unlock(&gmap->guest_table_lock);
+	if (page)
+		__free_pages(page, CRST_ALLOC_ORDER);
+	return 0;
+}
+
+/**
+ * __gmap_segment_gaddr - find virtual address from segment pointer
+ * @entry: pointer to a segment table entry in the guest address space
+ *
+ * Returns the virtual address in the guest address space for the segment
+ */
+static unsigned long __gmap_segment_gaddr(unsigned long *entry)
+{
+	struct page *page;
+	unsigned long offset;
+
+	offset = (unsigned long) entry / sizeof(unsigned long);
+	offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE;
+	page = pmd_pgtable_page((pmd_t *) entry);
+	return page->index + offset;
+}
+
+/**
+ * __gmap_unlink_by_vmaddr - unlink a single segment via a host address
+ * @gmap: pointer to the guest address space structure
+ * @vmaddr: address in the host process address space
+ *
+ * Returns 1 if a TLB flush is required
+ */
+static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr)
+{
+	unsigned long *entry;
+	int flush = 0;
+
+	BUG_ON(gmap_is_shadow(gmap));
+	spin_lock(&gmap->guest_table_lock);
+	entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
+	if (entry) {
+		flush = (*entry != _SEGMENT_ENTRY_EMPTY);
+		*entry = _SEGMENT_ENTRY_EMPTY;
+	}
+	spin_unlock(&gmap->guest_table_lock);
+	return flush;
+}
+
+/**
+ * __gmap_unmap_by_gaddr - unmap a single segment via a guest address
+ * @gmap: pointer to the guest address space structure
+ * @gaddr: address in the guest address space
+ *
+ * Returns 1 if a TLB flush is required
+ */
+static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr)
+{
+	unsigned long vmaddr;
+
+	vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host,
+						   gaddr >> PMD_SHIFT);
+	return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0;
+}
+
+/**
+ * gmap_unmap_segment - unmap segment from the guest address space
+ * @gmap: pointer to the guest address space structure
+ * @to: address in the guest address space
+ * @len: length of the memory area to unmap
+ *
+ * Returns 0 if the unmap succeeded, -EINVAL if not.
+ */
+int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
+{
+	unsigned long off;
+	int flush;
+
+	BUG_ON(gmap_is_shadow(gmap));
+	if ((to | len) & (PMD_SIZE - 1))
+		return -EINVAL;
+	if (len == 0 || to + len < to)
+		return -EINVAL;
+
+	flush = 0;
+	mmap_write_lock(gmap->mm);
+	for (off = 0; off < len; off += PMD_SIZE)
+		flush |= __gmap_unmap_by_gaddr(gmap, to + off);
+	mmap_write_unlock(gmap->mm);
+	if (flush)
+		gmap_flush_tlb(gmap);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(gmap_unmap_segment);
+
+/**
+ * gmap_map_segment - map a segment to the guest address space
+ * @gmap: pointer to the guest address space structure
+ * @from: source address in the parent address space
+ * @to: target address in the guest address space
+ * @len: length of the memory area to map
+ *
+ * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not.
+ */
+int gmap_map_segment(struct gmap *gmap, unsigned long from,
+		     unsigned long to, unsigned long len)
+{
+	unsigned long off;
+	int flush;
+
+	BUG_ON(gmap_is_shadow(gmap));
+	if ((from | to | len) & (PMD_SIZE - 1))
+		return -EINVAL;
+	if (len == 0 || from + len < from || to + len < to ||
+	    from + len - 1 > TASK_SIZE_MAX || to + len - 1 > gmap->asce_end)
+		return -EINVAL;
+
+	flush = 0;
+	mmap_write_lock(gmap->mm);
+	for (off = 0; off < len; off += PMD_SIZE) {
+		/* Remove old translation */
+		flush |= __gmap_unmap_by_gaddr(gmap, to + off);
+		/* Store new translation */
+		if (radix_tree_insert(&gmap->guest_to_host,
+				      (to + off) >> PMD_SHIFT,
+				      (void *) from + off))
+			break;
+	}
+	mmap_write_unlock(gmap->mm);
+	if (flush)
+		gmap_flush_tlb(gmap);
+	if (off >= len)
+		return 0;
+	gmap_unmap_segment(gmap, to, len);
+	return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(gmap_map_segment);
+
+/**
+ * __gmap_translate - translate a guest address to a user space address
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
+ *
+ * Returns user space address which corresponds to the guest address or
+ * -EFAULT if no such mapping exists.
+ * This function does not establish potentially missing page table entries.
+ * The mmap_lock of the mm that belongs to the address space must be held
+ * when this function gets called.
+ *
+ * Note: Can also be called for shadow gmaps.
+ */
+unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
+{
+	unsigned long vmaddr;
+
+	vmaddr = (unsigned long)
+		radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT);
+	/* Note: guest_to_host is empty for a shadow gmap */
+	return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT;
+}
+EXPORT_SYMBOL_GPL(__gmap_translate);
+
+/**
+ * gmap_translate - translate a guest address to a user space address
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
+ *
+ * Returns user space address which corresponds to the guest address or
+ * -EFAULT if no such mapping exists.
+ * This function does not establish potentially missing page table entries.
+ */
+unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr)
+{
+	unsigned long rc;
+
+	mmap_read_lock(gmap->mm);
+	rc = __gmap_translate(gmap, gaddr);
+	mmap_read_unlock(gmap->mm);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_translate);
+
+/**
+ * gmap_unlink - disconnect a page table from the gmap shadow tables
+ * @mm: pointer to the parent mm_struct
+ * @table: pointer to the host page table
+ * @vmaddr: vm address associated with the host page table
+ */
+void gmap_unlink(struct mm_struct *mm, unsigned long *table,
+		 unsigned long vmaddr)
+{
+	struct gmap *gmap;
+	int flush;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
+		flush = __gmap_unlink_by_vmaddr(gmap, vmaddr);
+		if (flush)
+			gmap_flush_tlb(gmap);
+	}
+	rcu_read_unlock();
+}
+
+static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *old, pmd_t new,
+			   unsigned long gaddr);
+
+/**
+ * __gmap_link - set up shadow page tables to connect a host to a guest address
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
+ * @vmaddr: vm address
+ *
+ * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
+ * if the vm address is already mapped to a different guest segment.
+ * The mmap_lock of the mm that belongs to the address space must be held
+ * when this function gets called.
+ */
+int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
+{
+	struct mm_struct *mm;
+	unsigned long *table;
+	spinlock_t *ptl;
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+	u64 unprot;
+	int rc;
+
+	BUG_ON(gmap_is_shadow(gmap));
+	/* Create higher level tables in the gmap page table */
+	table = gmap->table;
+	if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) {
+		table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT;
+		if ((*table & _REGION_ENTRY_INVALID) &&
+		    gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY,
+				     gaddr & _REGION1_MASK))
+			return -ENOMEM;
+		table = __va(*table & _REGION_ENTRY_ORIGIN);
+	}
+	if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) {
+		table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT;
+		if ((*table & _REGION_ENTRY_INVALID) &&
+		    gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY,
+				     gaddr & _REGION2_MASK))
+			return -ENOMEM;
+		table = __va(*table & _REGION_ENTRY_ORIGIN);
+	}
+	if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) {
+		table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT;
+		if ((*table & _REGION_ENTRY_INVALID) &&
+		    gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY,
+				     gaddr & _REGION3_MASK))
+			return -ENOMEM;
+		table = __va(*table & _REGION_ENTRY_ORIGIN);
+	}
+	table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
+	/* Walk the parent mm page table */
+	mm = gmap->mm;
+	pgd = pgd_offset(mm, vmaddr);
+	VM_BUG_ON(pgd_none(*pgd));
+	p4d = p4d_offset(pgd, vmaddr);
+	VM_BUG_ON(p4d_none(*p4d));
+	pud = pud_offset(p4d, vmaddr);
+	VM_BUG_ON(pud_none(*pud));
+	/* large puds cannot yet be handled */
+	if (pud_large(*pud))
+		return -EFAULT;
+	pmd = pmd_offset(pud, vmaddr);
+	VM_BUG_ON(pmd_none(*pmd));
+	/* Are we allowed to use huge pages? */
+	if (pmd_large(*pmd) && !gmap->mm->context.allow_gmap_hpage_1m)
+		return -EFAULT;
+	/* Link gmap segment table entry location to page table. */
+	rc = radix_tree_preload(GFP_KERNEL_ACCOUNT);
+	if (rc)
+		return rc;
+	ptl = pmd_lock(mm, pmd);
+	spin_lock(&gmap->guest_table_lock);
+	if (*table == _SEGMENT_ENTRY_EMPTY) {
+		rc = radix_tree_insert(&gmap->host_to_guest,
+				       vmaddr >> PMD_SHIFT, table);
+		if (!rc) {
+			if (pmd_large(*pmd)) {
+				*table = (pmd_val(*pmd) &
+					  _SEGMENT_ENTRY_HARDWARE_BITS_LARGE)
+					| _SEGMENT_ENTRY_GMAP_UC;
+			} else
+				*table = pmd_val(*pmd) &
+					_SEGMENT_ENTRY_HARDWARE_BITS;
+		}
+	} else if (*table & _SEGMENT_ENTRY_PROTECT &&
+		   !(pmd_val(*pmd) & _SEGMENT_ENTRY_PROTECT)) {
+		unprot = (u64)*table;
+		unprot &= ~_SEGMENT_ENTRY_PROTECT;
+		unprot |= _SEGMENT_ENTRY_GMAP_UC;
+		gmap_pmdp_xchg(gmap, (pmd_t *)table, __pmd(unprot), gaddr);
+	}
+	spin_unlock(&gmap->guest_table_lock);
+	spin_unlock(ptl);
+	radix_tree_preload_end();
+	return rc;
+}
+
+/**
+ * gmap_fault - resolve a fault on a guest address
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
+ * @fault_flags: flags to pass down to handle_mm_fault()
+ *
+ * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
+ * if the vm address is already mapped to a different guest segment.
+ */
+int gmap_fault(struct gmap *gmap, unsigned long gaddr,
+	       unsigned int fault_flags)
+{
+	unsigned long vmaddr;
+	int rc;
+	bool unlocked;
+
+	mmap_read_lock(gmap->mm);
+
+retry:
+	unlocked = false;
+	vmaddr = __gmap_translate(gmap, gaddr);
+	if (IS_ERR_VALUE(vmaddr)) {
+		rc = vmaddr;
+		goto out_up;
+	}
+	if (fixup_user_fault(gmap->mm, vmaddr, fault_flags,
+			     &unlocked)) {
+		rc = -EFAULT;
+		goto out_up;
+	}
+	/*
+	 * In the case that fixup_user_fault unlocked the mmap_lock during
+	 * faultin redo __gmap_translate to not race with a map/unmap_segment.
+	 */
+	if (unlocked)
+		goto retry;
+
+	rc = __gmap_link(gmap, gaddr, vmaddr);
+out_up:
+	mmap_read_unlock(gmap->mm);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_fault);
+
+/*
+ * this function is assumed to be called with mmap_lock held
+ */
+void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
+{
+	struct vm_area_struct *vma;
+	unsigned long vmaddr;
+	spinlock_t *ptl;
+	pte_t *ptep;
+
+	/* Find the vm address for the guest address */
+	vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host,
+						   gaddr >> PMD_SHIFT);
+	if (vmaddr) {
+		vmaddr |= gaddr & ~PMD_MASK;
+
+		vma = vma_lookup(gmap->mm, vmaddr);
+		if (!vma || is_vm_hugetlb_page(vma))
+			return;
+
+		/* Get pointer to the page table entry */
+		ptep = get_locked_pte(gmap->mm, vmaddr, &ptl);
+		if (likely(ptep)) {
+			ptep_zap_unused(gmap->mm, vmaddr, ptep, 0);
+			pte_unmap_unlock(ptep, ptl);
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(__gmap_zap);
+
+void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
+{
+	unsigned long gaddr, vmaddr, size;
+	struct vm_area_struct *vma;
+
+	mmap_read_lock(gmap->mm);
+	for (gaddr = from; gaddr < to;
+	     gaddr = (gaddr + PMD_SIZE) & PMD_MASK) {
+		/* Find the vm address for the guest address */
+		vmaddr = (unsigned long)
+			radix_tree_lookup(&gmap->guest_to_host,
+					  gaddr >> PMD_SHIFT);
+		if (!vmaddr)
+			continue;
+		vmaddr |= gaddr & ~PMD_MASK;
+		/* Find vma in the parent mm */
+		vma = find_vma(gmap->mm, vmaddr);
+		if (!vma)
+			continue;
+		/*
+		 * We do not discard pages that are backed by
+		 * hugetlbfs, so we don't have to refault them.
+		 */
+		if (is_vm_hugetlb_page(vma))
+			continue;
+		size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
+		zap_page_range_single(vma, vmaddr, size, NULL);
+	}
+	mmap_read_unlock(gmap->mm);
+}
+EXPORT_SYMBOL_GPL(gmap_discard);
+
+static LIST_HEAD(gmap_notifier_list);
+static DEFINE_SPINLOCK(gmap_notifier_lock);
+
+/**
+ * gmap_register_pte_notifier - register a pte invalidation callback
+ * @nb: pointer to the gmap notifier block
+ */
+void gmap_register_pte_notifier(struct gmap_notifier *nb)
+{
+	spin_lock(&gmap_notifier_lock);
+	list_add_rcu(&nb->list, &gmap_notifier_list);
+	spin_unlock(&gmap_notifier_lock);
+}
+EXPORT_SYMBOL_GPL(gmap_register_pte_notifier);
+
+/**
+ * gmap_unregister_pte_notifier - remove a pte invalidation callback
+ * @nb: pointer to the gmap notifier block
+ */
+void gmap_unregister_pte_notifier(struct gmap_notifier *nb)
+{
+	spin_lock(&gmap_notifier_lock);
+	list_del_rcu(&nb->list);
+	spin_unlock(&gmap_notifier_lock);
+	synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(gmap_unregister_pte_notifier);
+
+/**
+ * gmap_call_notifier - call all registered invalidation callbacks
+ * @gmap: pointer to guest mapping meta data structure
+ * @start: start virtual address in the guest address space
+ * @end: end virtual address in the guest address space
+ */
+static void gmap_call_notifier(struct gmap *gmap, unsigned long start,
+			       unsigned long end)
+{
+	struct gmap_notifier *nb;
+
+	list_for_each_entry(nb, &gmap_notifier_list, list)
+		nb->notifier_call(gmap, start, end);
+}
+
+/**
+ * gmap_table_walk - walk the gmap page tables
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: virtual address in the guest address space
+ * @level: page table level to stop at
+ *
+ * Returns a table entry pointer for the given guest address and @level
+ * @level=0 : returns a pointer to a page table table entry (or NULL)
+ * @level=1 : returns a pointer to a segment table entry (or NULL)
+ * @level=2 : returns a pointer to a region-3 table entry (or NULL)
+ * @level=3 : returns a pointer to a region-2 table entry (or NULL)
+ * @level=4 : returns a pointer to a region-1 table entry (or NULL)
+ *
+ * Returns NULL if the gmap page tables could not be walked to the
+ * requested level.
+ *
+ * Note: Can also be called for shadow gmaps.
+ */
+static inline unsigned long *gmap_table_walk(struct gmap *gmap,
+					     unsigned long gaddr, int level)
+{
+	const int asce_type = gmap->asce & _ASCE_TYPE_MASK;
+	unsigned long *table = gmap->table;
+
+	if (gmap_is_shadow(gmap) && gmap->removed)
+		return NULL;
+
+	if (WARN_ON_ONCE(level > (asce_type >> 2) + 1))
+		return NULL;
+
+	if (asce_type != _ASCE_TYPE_REGION1 &&
+	    gaddr & (-1UL << (31 + (asce_type >> 2) * 11)))
+		return NULL;
+
+	switch (asce_type) {
+	case _ASCE_TYPE_REGION1:
+		table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT;
+		if (level == 4)
+			break;
+		if (*table & _REGION_ENTRY_INVALID)
+			return NULL;
+		table = __va(*table & _REGION_ENTRY_ORIGIN);
+		fallthrough;
+	case _ASCE_TYPE_REGION2:
+		table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT;
+		if (level == 3)
+			break;
+		if (*table & _REGION_ENTRY_INVALID)
+			return NULL;
+		table = __va(*table & _REGION_ENTRY_ORIGIN);
+		fallthrough;
+	case _ASCE_TYPE_REGION3:
+		table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT;
+		if (level == 2)
+			break;
+		if (*table & _REGION_ENTRY_INVALID)
+			return NULL;
+		table = __va(*table & _REGION_ENTRY_ORIGIN);
+		fallthrough;
+	case _ASCE_TYPE_SEGMENT:
+		table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
+		if (level == 1)
+			break;
+		if (*table & _REGION_ENTRY_INVALID)
+			return NULL;
+		table = __va(*table & _SEGMENT_ENTRY_ORIGIN);
+		table += (gaddr & _PAGE_INDEX) >> _PAGE_SHIFT;
+	}
+	return table;
+}
+
+/**
+ * gmap_pte_op_walk - walk the gmap page table, get the page table lock
+ *		      and return the pte pointer
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: virtual address in the guest address space
+ * @ptl: pointer to the spinlock pointer
+ *
+ * Returns a pointer to the locked pte for a guest address, or NULL
+ */
+static pte_t *gmap_pte_op_walk(struct gmap *gmap, unsigned long gaddr,
+			       spinlock_t **ptl)
+{
+	unsigned long *table;
+
+	BUG_ON(gmap_is_shadow(gmap));
+	/* Walk the gmap page table, lock and get pte pointer */
+	table = gmap_table_walk(gmap, gaddr, 1); /* get segment pointer */
+	if (!table || *table & _SEGMENT_ENTRY_INVALID)
+		return NULL;
+	return pte_alloc_map_lock(gmap->mm, (pmd_t *) table, gaddr, ptl);
+}
+
+/**
+ * gmap_pte_op_fixup - force a page in and connect the gmap page table
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: virtual address in the guest address space
+ * @vmaddr: address in the host process address space
+ * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
+ *
+ * Returns 0 if the caller can retry __gmap_translate (might fail again),
+ * -ENOMEM if out of memory and -EFAULT if anything goes wrong while fixing
+ * up or connecting the gmap page table.
+ */
+static int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr,
+			     unsigned long vmaddr, int prot)
+{
+	struct mm_struct *mm = gmap->mm;
+	unsigned int fault_flags;
+	bool unlocked = false;
+
+	BUG_ON(gmap_is_shadow(gmap));
+	fault_flags = (prot == PROT_WRITE) ? FAULT_FLAG_WRITE : 0;
+	if (fixup_user_fault(mm, vmaddr, fault_flags, &unlocked))
+		return -EFAULT;
+	if (unlocked)
+		/* lost mmap_lock, caller has to retry __gmap_translate */
+		return 0;
+	/* Connect the page tables */
+	return __gmap_link(gmap, gaddr, vmaddr);
+}
+
+/**
+ * gmap_pte_op_end - release the page table lock
+ * @ptep: pointer to the locked pte
+ * @ptl: pointer to the page table spinlock
+ */
+static void gmap_pte_op_end(pte_t *ptep, spinlock_t *ptl)
+{
+	pte_unmap_unlock(ptep, ptl);
+}
+
+/**
+ * gmap_pmd_op_walk - walk the gmap tables, get the guest table lock
+ *		      and return the pmd pointer
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: virtual address in the guest address space
+ *
+ * Returns a pointer to the pmd for a guest address, or NULL
+ */
+static inline pmd_t *gmap_pmd_op_walk(struct gmap *gmap, unsigned long gaddr)
+{
+	pmd_t *pmdp;
+
+	BUG_ON(gmap_is_shadow(gmap));
+	pmdp = (pmd_t *) gmap_table_walk(gmap, gaddr, 1);
+	if (!pmdp)
+		return NULL;
+
+	/* without huge pages, there is no need to take the table lock */
+	if (!gmap->mm->context.allow_gmap_hpage_1m)
+		return pmd_none(*pmdp) ? NULL : pmdp;
+
+	spin_lock(&gmap->guest_table_lock);
+	if (pmd_none(*pmdp)) {
+		spin_unlock(&gmap->guest_table_lock);
+		return NULL;
+	}
+
+	/* 4k page table entries are locked via the pte (pte_alloc_map_lock). */
+	if (!pmd_large(*pmdp))
+		spin_unlock(&gmap->guest_table_lock);
+	return pmdp;
+}
+
+/**
+ * gmap_pmd_op_end - release the guest_table_lock if needed
+ * @gmap: pointer to the guest mapping meta data structure
+ * @pmdp: pointer to the pmd
+ */
+static inline void gmap_pmd_op_end(struct gmap *gmap, pmd_t *pmdp)
+{
+	if (pmd_large(*pmdp))
+		spin_unlock(&gmap->guest_table_lock);
+}
+
+/*
+ * gmap_protect_pmd - remove access rights to memory and set pmd notification bits
+ * @pmdp: pointer to the pmd to be protected
+ * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
+ * @bits: notification bits to set
+ *
+ * Returns:
+ * 0 if successfully protected
+ * -EAGAIN if a fixup is needed
+ * -EINVAL if unsupported notifier bits have been specified
+ *
+ * Expected to be called with sg->mm->mmap_lock in read and
+ * guest_table_lock held.
+ */
+static int gmap_protect_pmd(struct gmap *gmap, unsigned long gaddr,
+			    pmd_t *pmdp, int prot, unsigned long bits)
+{
+	int pmd_i = pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID;
+	int pmd_p = pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT;
+	pmd_t new = *pmdp;
+
+	/* Fixup needed */
+	if ((pmd_i && (prot != PROT_NONE)) || (pmd_p && (prot == PROT_WRITE)))
+		return -EAGAIN;
+
+	if (prot == PROT_NONE && !pmd_i) {
+		new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID));
+		gmap_pmdp_xchg(gmap, pmdp, new, gaddr);
+	}
+
+	if (prot == PROT_READ && !pmd_p) {
+		new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID));
+		new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_PROTECT));
+		gmap_pmdp_xchg(gmap, pmdp, new, gaddr);
+	}
+
+	if (bits & GMAP_NOTIFY_MPROT)
+		set_pmd(pmdp, set_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_IN)));
+
+	/* Shadow GMAP protection needs split PMDs */
+	if (bits & GMAP_NOTIFY_SHADOW)
+		return -EINVAL;
+
+	return 0;
+}
+
+/*
+ * gmap_protect_pte - remove access rights to memory and set pgste bits
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: virtual address in the guest address space
+ * @pmdp: pointer to the pmd associated with the pte
+ * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
+ * @bits: notification bits to set
+ *
+ * Returns 0 if successfully protected, -ENOMEM if out of memory and
+ * -EAGAIN if a fixup is needed.
+ *
+ * Expected to be called with sg->mm->mmap_lock in read
+ */
+static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr,
+			    pmd_t *pmdp, int prot, unsigned long bits)
+{
+	int rc;
+	pte_t *ptep;
+	spinlock_t *ptl;
+	unsigned long pbits = 0;
+
+	if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
+		return -EAGAIN;
+
+	ptep = pte_alloc_map_lock(gmap->mm, pmdp, gaddr, &ptl);
+	if (!ptep)
+		return -ENOMEM;
+
+	pbits |= (bits & GMAP_NOTIFY_MPROT) ? PGSTE_IN_BIT : 0;
+	pbits |= (bits & GMAP_NOTIFY_SHADOW) ? PGSTE_VSIE_BIT : 0;
+	/* Protect and unlock. */
+	rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot, pbits);
+	gmap_pte_op_end(ptep, ptl);
+	return rc;
+}
+
+/*
+ * gmap_protect_range - remove access rights to memory and set pgste bits
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: virtual address in the guest address space
+ * @len: size of area
+ * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
+ * @bits: pgste notification bits to set
+ *
+ * Returns 0 if successfully protected, -ENOMEM if out of memory and
+ * -EFAULT if gaddr is invalid (or mapping for shadows is missing).
+ *
+ * Called with sg->mm->mmap_lock in read.
+ */
+static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr,
+			      unsigned long len, int prot, unsigned long bits)
+{
+	unsigned long vmaddr, dist;
+	pmd_t *pmdp;
+	int rc;
+
+	BUG_ON(gmap_is_shadow(gmap));
+	while (len) {
+		rc = -EAGAIN;
+		pmdp = gmap_pmd_op_walk(gmap, gaddr);
+		if (pmdp) {
+			if (!pmd_large(*pmdp)) {
+				rc = gmap_protect_pte(gmap, gaddr, pmdp, prot,
+						      bits);
+				if (!rc) {
+					len -= PAGE_SIZE;
+					gaddr += PAGE_SIZE;
+				}
+			} else {
+				rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot,
+						      bits);
+				if (!rc) {
+					dist = HPAGE_SIZE - (gaddr & ~HPAGE_MASK);
+					len = len < dist ? 0 : len - dist;
+					gaddr = (gaddr & HPAGE_MASK) + HPAGE_SIZE;
+				}
+			}
+			gmap_pmd_op_end(gmap, pmdp);
+		}
+		if (rc) {
+			if (rc == -EINVAL)
+				return rc;
+
+			/* -EAGAIN, fixup of userspace mm and gmap */
+			vmaddr = __gmap_translate(gmap, gaddr);
+			if (IS_ERR_VALUE(vmaddr))
+				return vmaddr;
+			rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, prot);
+			if (rc)
+				return rc;
+		}
+	}
+	return 0;
+}
+
+/**
+ * gmap_mprotect_notify - change access rights for a range of ptes and
+ *                        call the notifier if any pte changes again
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: virtual address in the guest address space
+ * @len: size of area
+ * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
+ *
+ * Returns 0 if for each page in the given range a gmap mapping exists,
+ * the new access rights could be set and the notifier could be armed.
+ * If the gmap mapping is missing for one or more pages -EFAULT is
+ * returned. If no memory could be allocated -ENOMEM is returned.
+ * This function establishes missing page table entries.
+ */
+int gmap_mprotect_notify(struct gmap *gmap, unsigned long gaddr,
+			 unsigned long len, int prot)
+{
+	int rc;
+
+	if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK) || gmap_is_shadow(gmap))
+		return -EINVAL;
+	if (!MACHINE_HAS_ESOP && prot == PROT_READ)
+		return -EINVAL;
+	mmap_read_lock(gmap->mm);
+	rc = gmap_protect_range(gmap, gaddr, len, prot, GMAP_NOTIFY_MPROT);
+	mmap_read_unlock(gmap->mm);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_mprotect_notify);
+
+/**
+ * gmap_read_table - get an unsigned long value from a guest page table using
+ *                   absolute addressing, without marking the page referenced.
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: virtual address in the guest address space
+ * @val: pointer to the unsigned long value to return
+ *
+ * Returns 0 if the value was read, -ENOMEM if out of memory and -EFAULT
+ * if reading using the virtual address failed. -EINVAL if called on a gmap
+ * shadow.
+ *
+ * Called with gmap->mm->mmap_lock in read.
+ */
+int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val)
+{
+	unsigned long address, vmaddr;
+	spinlock_t *ptl;
+	pte_t *ptep, pte;
+	int rc;
+
+	if (gmap_is_shadow(gmap))
+		return -EINVAL;
+
+	while (1) {
+		rc = -EAGAIN;
+		ptep = gmap_pte_op_walk(gmap, gaddr, &ptl);
+		if (ptep) {
+			pte = *ptep;
+			if (pte_present(pte) && (pte_val(pte) & _PAGE_READ)) {
+				address = pte_val(pte) & PAGE_MASK;
+				address += gaddr & ~PAGE_MASK;
+				*val = *(unsigned long *)__va(address);
+				set_pte(ptep, set_pte_bit(*ptep, __pgprot(_PAGE_YOUNG)));
+				/* Do *NOT* clear the _PAGE_INVALID bit! */
+				rc = 0;
+			}
+			gmap_pte_op_end(ptep, ptl);
+		}
+		if (!rc)
+			break;
+		vmaddr = __gmap_translate(gmap, gaddr);
+		if (IS_ERR_VALUE(vmaddr)) {
+			rc = vmaddr;
+			break;
+		}
+		rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, PROT_READ);
+		if (rc)
+			break;
+	}
+	return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_read_table);
+
+/**
+ * gmap_insert_rmap - add a rmap to the host_to_rmap radix tree
+ * @sg: pointer to the shadow guest address space structure
+ * @vmaddr: vm address associated with the rmap
+ * @rmap: pointer to the rmap structure
+ *
+ * Called with the sg->guest_table_lock
+ */
+static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr,
+				    struct gmap_rmap *rmap)
+{
+	struct gmap_rmap *temp;
+	void __rcu **slot;
+
+	BUG_ON(!gmap_is_shadow(sg));
+	slot = radix_tree_lookup_slot(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT);
+	if (slot) {
+		rmap->next = radix_tree_deref_slot_protected(slot,
+							&sg->guest_table_lock);
+		for (temp = rmap->next; temp; temp = temp->next) {
+			if (temp->raddr == rmap->raddr) {
+				kfree(rmap);
+				return;
+			}
+		}
+		radix_tree_replace_slot(&sg->host_to_rmap, slot, rmap);
+	} else {
+		rmap->next = NULL;
+		radix_tree_insert(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT,
+				  rmap);
+	}
+}
+
+/**
+ * gmap_protect_rmap - restrict access rights to memory (RO) and create an rmap
+ * @sg: pointer to the shadow guest address space structure
+ * @raddr: rmap address in the shadow gmap
+ * @paddr: address in the parent guest address space
+ * @len: length of the memory area to protect
+ *
+ * Returns 0 if successfully protected and the rmap was created, -ENOMEM
+ * if out of memory and -EFAULT if paddr is invalid.
+ */
+static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr,
+			     unsigned long paddr, unsigned long len)
+{
+	struct gmap *parent;
+	struct gmap_rmap *rmap;
+	unsigned long vmaddr;
+	spinlock_t *ptl;
+	pte_t *ptep;
+	int rc;
+
+	BUG_ON(!gmap_is_shadow(sg));
+	parent = sg->parent;
+	while (len) {
+		vmaddr = __gmap_translate(parent, paddr);
+		if (IS_ERR_VALUE(vmaddr))
+			return vmaddr;
+		rmap = kzalloc(sizeof(*rmap), GFP_KERNEL_ACCOUNT);
+		if (!rmap)
+			return -ENOMEM;
+		rmap->raddr = raddr;
+		rc = radix_tree_preload(GFP_KERNEL_ACCOUNT);
+		if (rc) {
+			kfree(rmap);
+			return rc;
+		}
+		rc = -EAGAIN;
+		ptep = gmap_pte_op_walk(parent, paddr, &ptl);
+		if (ptep) {
+			spin_lock(&sg->guest_table_lock);
+			rc = ptep_force_prot(parent->mm, paddr, ptep, PROT_READ,
+					     PGSTE_VSIE_BIT);
+			if (!rc)
+				gmap_insert_rmap(sg, vmaddr, rmap);
+			spin_unlock(&sg->guest_table_lock);
+			gmap_pte_op_end(ptep, ptl);
+		}
+		radix_tree_preload_end();
+		if (rc) {
+			kfree(rmap);
+			rc = gmap_pte_op_fixup(parent, paddr, vmaddr, PROT_READ);
+			if (rc)
+				return rc;
+			continue;
+		}
+		paddr += PAGE_SIZE;
+		len -= PAGE_SIZE;
+	}
+	return 0;
+}
+
+#define _SHADOW_RMAP_MASK	0x7
+#define _SHADOW_RMAP_REGION1	0x5
+#define _SHADOW_RMAP_REGION2	0x4
+#define _SHADOW_RMAP_REGION3	0x3
+#define _SHADOW_RMAP_SEGMENT	0x2
+#define _SHADOW_RMAP_PGTABLE	0x1
+
+/**
+ * gmap_idte_one - invalidate a single region or segment table entry
+ * @asce: region or segment table *origin* + table-type bits
+ * @vaddr: virtual address to identify the table entry to flush
+ *
+ * The invalid bit of a single region or segment table entry is set
+ * and the associated TLB entries depending on the entry are flushed.
+ * The table-type of the @asce identifies the portion of the @vaddr
+ * that is used as the invalidation index.
+ */
+static inline void gmap_idte_one(unsigned long asce, unsigned long vaddr)
+{
+	asm volatile(
+		"	idte	%0,0,%1"
+		: : "a" (asce), "a" (vaddr) : "cc", "memory");
+}
+
+/**
+ * gmap_unshadow_page - remove a page from a shadow page table
+ * @sg: pointer to the shadow guest address space structure
+ * @raddr: rmap address in the shadow guest address space
+ *
+ * Called with the sg->guest_table_lock
+ */
+static void gmap_unshadow_page(struct gmap *sg, unsigned long raddr)
+{
+	unsigned long *table;
+
+	BUG_ON(!gmap_is_shadow(sg));
+	table = gmap_table_walk(sg, raddr, 0); /* get page table pointer */
+	if (!table || *table & _PAGE_INVALID)
+		return;
+	gmap_call_notifier(sg, raddr, raddr + _PAGE_SIZE - 1);
+	ptep_unshadow_pte(sg->mm, raddr, (pte_t *) table);
+}
+
+/**
+ * __gmap_unshadow_pgt - remove all entries from a shadow page table
+ * @sg: pointer to the shadow guest address space structure
+ * @raddr: rmap address in the shadow guest address space
+ * @pgt: pointer to the start of a shadow page table
+ *
+ * Called with the sg->guest_table_lock
+ */
+static void __gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr,
+				unsigned long *pgt)
+{
+	int i;
+
+	BUG_ON(!gmap_is_shadow(sg));
+	for (i = 0; i < _PAGE_ENTRIES; i++, raddr += _PAGE_SIZE)
+		pgt[i] = _PAGE_INVALID;
+}
+
+/**
+ * gmap_unshadow_pgt - remove a shadow page table from a segment entry
+ * @sg: pointer to the shadow guest address space structure
+ * @raddr: address in the shadow guest address space
+ *
+ * Called with the sg->guest_table_lock
+ */
+static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr)
+{
+	unsigned long *ste;
+	phys_addr_t sto, pgt;
+	struct page *page;
+
+	BUG_ON(!gmap_is_shadow(sg));
+	ste = gmap_table_walk(sg, raddr, 1); /* get segment pointer */
+	if (!ste || !(*ste & _SEGMENT_ENTRY_ORIGIN))
+		return;
+	gmap_call_notifier(sg, raddr, raddr + _SEGMENT_SIZE - 1);
+	sto = __pa(ste - ((raddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT));
+	gmap_idte_one(sto | _ASCE_TYPE_SEGMENT, raddr);
+	pgt = *ste & _SEGMENT_ENTRY_ORIGIN;
+	*ste = _SEGMENT_ENTRY_EMPTY;
+	__gmap_unshadow_pgt(sg, raddr, __va(pgt));
+	/* Free page table */
+	page = phys_to_page(pgt);
+	list_del(&page->lru);
+	page_table_free_pgste(page);
+}
+
+/**
+ * __gmap_unshadow_sgt - remove all entries from a shadow segment table
+ * @sg: pointer to the shadow guest address space structure
+ * @raddr: rmap address in the shadow guest address space
+ * @sgt: pointer to the start of a shadow segment table
+ *
+ * Called with the sg->guest_table_lock
+ */
+static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr,
+				unsigned long *sgt)
+{
+	struct page *page;
+	phys_addr_t pgt;
+	int i;
+
+	BUG_ON(!gmap_is_shadow(sg));
+	for (i = 0; i < _CRST_ENTRIES; i++, raddr += _SEGMENT_SIZE) {
+		if (!(sgt[i] & _SEGMENT_ENTRY_ORIGIN))
+			continue;
+		pgt = sgt[i] & _REGION_ENTRY_ORIGIN;
+		sgt[i] = _SEGMENT_ENTRY_EMPTY;
+		__gmap_unshadow_pgt(sg, raddr, __va(pgt));
+		/* Free page table */
+		page = phys_to_page(pgt);
+		list_del(&page->lru);
+		page_table_free_pgste(page);
+	}
+}
+
+/**
+ * gmap_unshadow_sgt - remove a shadow segment table from a region-3 entry
+ * @sg: pointer to the shadow guest address space structure
+ * @raddr: rmap address in the shadow guest address space
+ *
+ * Called with the shadow->guest_table_lock
+ */
+static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr)
+{
+	unsigned long r3o, *r3e;
+	phys_addr_t sgt;
+	struct page *page;
+
+	BUG_ON(!gmap_is_shadow(sg));
+	r3e = gmap_table_walk(sg, raddr, 2); /* get region-3 pointer */
+	if (!r3e || !(*r3e & _REGION_ENTRY_ORIGIN))
+		return;
+	gmap_call_notifier(sg, raddr, raddr + _REGION3_SIZE - 1);
+	r3o = (unsigned long) (r3e - ((raddr & _REGION3_INDEX) >> _REGION3_SHIFT));
+	gmap_idte_one(__pa(r3o) | _ASCE_TYPE_REGION3, raddr);
+	sgt = *r3e & _REGION_ENTRY_ORIGIN;
+	*r3e = _REGION3_ENTRY_EMPTY;
+	__gmap_unshadow_sgt(sg, raddr, __va(sgt));
+	/* Free segment table */
+	page = phys_to_page(sgt);
+	list_del(&page->lru);
+	__free_pages(page, CRST_ALLOC_ORDER);
+}
+
+/**
+ * __gmap_unshadow_r3t - remove all entries from a shadow region-3 table
+ * @sg: pointer to the shadow guest address space structure
+ * @raddr: address in the shadow guest address space
+ * @r3t: pointer to the start of a shadow region-3 table
+ *
+ * Called with the sg->guest_table_lock
+ */
+static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr,
+				unsigned long *r3t)
+{
+	struct page *page;
+	phys_addr_t sgt;
+	int i;
+
+	BUG_ON(!gmap_is_shadow(sg));
+	for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION3_SIZE) {
+		if (!(r3t[i] & _REGION_ENTRY_ORIGIN))
+			continue;
+		sgt = r3t[i] & _REGION_ENTRY_ORIGIN;
+		r3t[i] = _REGION3_ENTRY_EMPTY;
+		__gmap_unshadow_sgt(sg, raddr, __va(sgt));
+		/* Free segment table */
+		page = phys_to_page(sgt);
+		list_del(&page->lru);
+		__free_pages(page, CRST_ALLOC_ORDER);
+	}
+}
+
+/**
+ * gmap_unshadow_r3t - remove a shadow region-3 table from a region-2 entry
+ * @sg: pointer to the shadow guest address space structure
+ * @raddr: rmap address in the shadow guest address space
+ *
+ * Called with the sg->guest_table_lock
+ */
+static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr)
+{
+	unsigned long r2o, *r2e;
+	phys_addr_t r3t;
+	struct page *page;
+
+	BUG_ON(!gmap_is_shadow(sg));
+	r2e = gmap_table_walk(sg, raddr, 3); /* get region-2 pointer */
+	if (!r2e || !(*r2e & _REGION_ENTRY_ORIGIN))
+		return;
+	gmap_call_notifier(sg, raddr, raddr + _REGION2_SIZE - 1);
+	r2o = (unsigned long) (r2e - ((raddr & _REGION2_INDEX) >> _REGION2_SHIFT));
+	gmap_idte_one(__pa(r2o) | _ASCE_TYPE_REGION2, raddr);
+	r3t = *r2e & _REGION_ENTRY_ORIGIN;
+	*r2e = _REGION2_ENTRY_EMPTY;
+	__gmap_unshadow_r3t(sg, raddr, __va(r3t));
+	/* Free region 3 table */
+	page = phys_to_page(r3t);
+	list_del(&page->lru);
+	__free_pages(page, CRST_ALLOC_ORDER);
+}
+
+/**
+ * __gmap_unshadow_r2t - remove all entries from a shadow region-2 table
+ * @sg: pointer to the shadow guest address space structure
+ * @raddr: rmap address in the shadow guest address space
+ * @r2t: pointer to the start of a shadow region-2 table
+ *
+ * Called with the sg->guest_table_lock
+ */
+static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr,
+				unsigned long *r2t)
+{
+	phys_addr_t r3t;
+	struct page *page;
+	int i;
+
+	BUG_ON(!gmap_is_shadow(sg));
+	for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION2_SIZE) {
+		if (!(r2t[i] & _REGION_ENTRY_ORIGIN))
+			continue;
+		r3t = r2t[i] & _REGION_ENTRY_ORIGIN;
+		r2t[i] = _REGION2_ENTRY_EMPTY;
+		__gmap_unshadow_r3t(sg, raddr, __va(r3t));
+		/* Free region 3 table */
+		page = phys_to_page(r3t);
+		list_del(&page->lru);
+		__free_pages(page, CRST_ALLOC_ORDER);
+	}
+}
+
+/**
+ * gmap_unshadow_r2t - remove a shadow region-2 table from a region-1 entry
+ * @sg: pointer to the shadow guest address space structure
+ * @raddr: rmap address in the shadow guest address space
+ *
+ * Called with the sg->guest_table_lock
+ */
+static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr)
+{
+	unsigned long r1o, *r1e;
+	struct page *page;
+	phys_addr_t r2t;
+
+	BUG_ON(!gmap_is_shadow(sg));
+	r1e = gmap_table_walk(sg, raddr, 4); /* get region-1 pointer */
+	if (!r1e || !(*r1e & _REGION_ENTRY_ORIGIN))
+		return;
+	gmap_call_notifier(sg, raddr, raddr + _REGION1_SIZE - 1);
+	r1o = (unsigned long) (r1e - ((raddr & _REGION1_INDEX) >> _REGION1_SHIFT));
+	gmap_idte_one(__pa(r1o) | _ASCE_TYPE_REGION1, raddr);
+	r2t = *r1e & _REGION_ENTRY_ORIGIN;
+	*r1e = _REGION1_ENTRY_EMPTY;
+	__gmap_unshadow_r2t(sg, raddr, __va(r2t));
+	/* Free region 2 table */
+	page = phys_to_page(r2t);
+	list_del(&page->lru);
+	__free_pages(page, CRST_ALLOC_ORDER);
+}
+
+/**
+ * __gmap_unshadow_r1t - remove all entries from a shadow region-1 table
+ * @sg: pointer to the shadow guest address space structure
+ * @raddr: rmap address in the shadow guest address space
+ * @r1t: pointer to the start of a shadow region-1 table
+ *
+ * Called with the shadow->guest_table_lock
+ */
+static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr,
+				unsigned long *r1t)
+{
+	unsigned long asce;
+	struct page *page;
+	phys_addr_t r2t;
+	int i;
+
+	BUG_ON(!gmap_is_shadow(sg));
+	asce = __pa(r1t) | _ASCE_TYPE_REGION1;
+	for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION1_SIZE) {
+		if (!(r1t[i] & _REGION_ENTRY_ORIGIN))
+			continue;
+		r2t = r1t[i] & _REGION_ENTRY_ORIGIN;
+		__gmap_unshadow_r2t(sg, raddr, __va(r2t));
+		/* Clear entry and flush translation r1t -> r2t */
+		gmap_idte_one(asce, raddr);
+		r1t[i] = _REGION1_ENTRY_EMPTY;
+		/* Free region 2 table */
+		page = phys_to_page(r2t);
+		list_del(&page->lru);
+		__free_pages(page, CRST_ALLOC_ORDER);
+	}
+}
+
+/**
+ * gmap_unshadow - remove a shadow page table completely
+ * @sg: pointer to the shadow guest address space structure
+ *
+ * Called with sg->guest_table_lock
+ */
+static void gmap_unshadow(struct gmap *sg)
+{
+	unsigned long *table;
+
+	BUG_ON(!gmap_is_shadow(sg));
+	if (sg->removed)
+		return;
+	sg->removed = 1;
+	gmap_call_notifier(sg, 0, -1UL);
+	gmap_flush_tlb(sg);
+	table = __va(sg->asce & _ASCE_ORIGIN);
+	switch (sg->asce & _ASCE_TYPE_MASK) {
+	case _ASCE_TYPE_REGION1:
+		__gmap_unshadow_r1t(sg, 0, table);
+		break;
+	case _ASCE_TYPE_REGION2:
+		__gmap_unshadow_r2t(sg, 0, table);
+		break;
+	case _ASCE_TYPE_REGION3:
+		__gmap_unshadow_r3t(sg, 0, table);
+		break;
+	case _ASCE_TYPE_SEGMENT:
+		__gmap_unshadow_sgt(sg, 0, table);
+		break;
+	}
+}
+
+/**
+ * gmap_find_shadow - find a specific asce in the list of shadow tables
+ * @parent: pointer to the parent gmap
+ * @asce: ASCE for which the shadow table is created
+ * @edat_level: edat level to be used for the shadow translation
+ *
+ * Returns the pointer to a gmap if a shadow table with the given asce is
+ * already available, ERR_PTR(-EAGAIN) if another one is just being created,
+ * otherwise NULL
+ */
+static struct gmap *gmap_find_shadow(struct gmap *parent, unsigned long asce,
+				     int edat_level)
+{
+	struct gmap *sg;
+
+	list_for_each_entry(sg, &parent->children, list) {
+		if (sg->orig_asce != asce || sg->edat_level != edat_level ||
+		    sg->removed)
+			continue;
+		if (!sg->initialized)
+			return ERR_PTR(-EAGAIN);
+		refcount_inc(&sg->ref_count);
+		return sg;
+	}
+	return NULL;
+}
+
+/**
+ * gmap_shadow_valid - check if a shadow guest address space matches the
+ *                     given properties and is still valid
+ * @sg: pointer to the shadow guest address space structure
+ * @asce: ASCE for which the shadow table is requested
+ * @edat_level: edat level to be used for the shadow translation
+ *
+ * Returns 1 if the gmap shadow is still valid and matches the given
+ * properties, the caller can continue using it. Returns 0 otherwise, the
+ * caller has to request a new shadow gmap in this case.
+ *
+ */
+int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level)
+{
+	if (sg->removed)
+		return 0;
+	return sg->orig_asce == asce && sg->edat_level == edat_level;
+}
+EXPORT_SYMBOL_GPL(gmap_shadow_valid);
+
+/**
+ * gmap_shadow - create/find a shadow guest address space
+ * @parent: pointer to the parent gmap
+ * @asce: ASCE for which the shadow table is created
+ * @edat_level: edat level to be used for the shadow translation
+ *
+ * The pages of the top level page table referred by the asce parameter
+ * will be set to read-only and marked in the PGSTEs of the kvm process.
+ * The shadow table will be removed automatically on any change to the
+ * PTE mapping for the source table.
+ *
+ * Returns a guest address space structure, ERR_PTR(-ENOMEM) if out of memory,
+ * ERR_PTR(-EAGAIN) if the caller has to retry and ERR_PTR(-EFAULT) if the
+ * parent gmap table could not be protected.
+ */
+struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
+			 int edat_level)
+{
+	struct gmap *sg, *new;
+	unsigned long limit;
+	int rc;
+
+	BUG_ON(parent->mm->context.allow_gmap_hpage_1m);
+	BUG_ON(gmap_is_shadow(parent));
+	spin_lock(&parent->shadow_lock);
+	sg = gmap_find_shadow(parent, asce, edat_level);
+	spin_unlock(&parent->shadow_lock);
+	if (sg)
+		return sg;
+	/* Create a new shadow gmap */
+	limit = -1UL >> (33 - (((asce & _ASCE_TYPE_MASK) >> 2) * 11));
+	if (asce & _ASCE_REAL_SPACE)
+		limit = -1UL;
+	new = gmap_alloc(limit);
+	if (!new)
+		return ERR_PTR(-ENOMEM);
+	new->mm = parent->mm;
+	new->parent = gmap_get(parent);
+	new->orig_asce = asce;
+	new->edat_level = edat_level;
+	new->initialized = false;
+	spin_lock(&parent->shadow_lock);
+	/* Recheck if another CPU created the same shadow */
+	sg = gmap_find_shadow(parent, asce, edat_level);
+	if (sg) {
+		spin_unlock(&parent->shadow_lock);
+		gmap_free(new);
+		return sg;
+	}
+	if (asce & _ASCE_REAL_SPACE) {
+		/* only allow one real-space gmap shadow */
+		list_for_each_entry(sg, &parent->children, list) {
+			if (sg->orig_asce & _ASCE_REAL_SPACE) {
+				spin_lock(&sg->guest_table_lock);
+				gmap_unshadow(sg);
+				spin_unlock(&sg->guest_table_lock);
+				list_del(&sg->list);
+				gmap_put(sg);
+				break;
+			}
+		}
+	}
+	refcount_set(&new->ref_count, 2);
+	list_add(&new->list, &parent->children);
+	if (asce & _ASCE_REAL_SPACE) {
+		/* nothing to protect, return right away */
+		new->initialized = true;
+		spin_unlock(&parent->shadow_lock);
+		return new;
+	}
+	spin_unlock(&parent->shadow_lock);
+	/* protect after insertion, so it will get properly invalidated */
+	mmap_read_lock(parent->mm);
+	rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN,
+				((asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE,
+				PROT_READ, GMAP_NOTIFY_SHADOW);
+	mmap_read_unlock(parent->mm);
+	spin_lock(&parent->shadow_lock);
+	new->initialized = true;
+	if (rc) {
+		list_del(&new->list);
+		gmap_free(new);
+		new = ERR_PTR(rc);
+	}
+	spin_unlock(&parent->shadow_lock);
+	return new;
+}
+EXPORT_SYMBOL_GPL(gmap_shadow);
+
+/**
+ * gmap_shadow_r2t - create an empty shadow region 2 table
+ * @sg: pointer to the shadow guest address space structure
+ * @saddr: faulting address in the shadow gmap
+ * @r2t: parent gmap address of the region 2 table to get shadowed
+ * @fake: r2t references contiguous guest memory block, not a r2t
+ *
+ * The r2t parameter specifies the address of the source table. The
+ * four pages of the source table are made read-only in the parent gmap
+ * address space. A write to the source table area @r2t will automatically
+ * remove the shadow r2 table and all of its descendants.
+ *
+ * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the
+ * shadow table structure is incomplete, -ENOMEM if out of memory and
+ * -EFAULT if an address in the parent gmap could not be resolved.
+ *
+ * Called with sg->mm->mmap_lock in read.
+ */
+int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
+		    int fake)
+{
+	unsigned long raddr, origin, offset, len;
+	unsigned long *table;
+	phys_addr_t s_r2t;
+	struct page *page;
+	int rc;
+
+	BUG_ON(!gmap_is_shadow(sg));
+	/* Allocate a shadow region second table */
+	page = gmap_alloc_crst();
+	if (!page)
+		return -ENOMEM;
+	page->index = r2t & _REGION_ENTRY_ORIGIN;
+	if (fake)
+		page->index |= GMAP_SHADOW_FAKE_TABLE;
+	s_r2t = page_to_phys(page);
+	/* Install shadow region second table */
+	spin_lock(&sg->guest_table_lock);
+	table = gmap_table_walk(sg, saddr, 4); /* get region-1 pointer */
+	if (!table) {
+		rc = -EAGAIN;		/* Race with unshadow */
+		goto out_free;
+	}
+	if (!(*table & _REGION_ENTRY_INVALID)) {
+		rc = 0;			/* Already established */
+		goto out_free;
+	} else if (*table & _REGION_ENTRY_ORIGIN) {
+		rc = -EAGAIN;		/* Race with shadow */
+		goto out_free;
+	}
+	crst_table_init(__va(s_r2t), _REGION2_ENTRY_EMPTY);
+	/* mark as invalid as long as the parent table is not protected */
+	*table = s_r2t | _REGION_ENTRY_LENGTH |
+		 _REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID;
+	if (sg->edat_level >= 1)
+		*table |= (r2t & _REGION_ENTRY_PROTECT);
+	list_add(&page->lru, &sg->crst_list);
+	if (fake) {
+		/* nothing to protect for fake tables */
+		*table &= ~_REGION_ENTRY_INVALID;
+		spin_unlock(&sg->guest_table_lock);
+		return 0;
+	}
+	spin_unlock(&sg->guest_table_lock);
+	/* Make r2t read-only in parent gmap page table */
+	raddr = (saddr & _REGION1_MASK) | _SHADOW_RMAP_REGION1;
+	origin = r2t & _REGION_ENTRY_ORIGIN;
+	offset = ((r2t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
+	len = ((r2t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
+	rc = gmap_protect_rmap(sg, raddr, origin + offset, len);
+	spin_lock(&sg->guest_table_lock);
+	if (!rc) {
+		table = gmap_table_walk(sg, saddr, 4);
+		if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_r2t)
+			rc = -EAGAIN;		/* Race with unshadow */
+		else
+			*table &= ~_REGION_ENTRY_INVALID;
+	} else {
+		gmap_unshadow_r2t(sg, raddr);
+	}
+	spin_unlock(&sg->guest_table_lock);
+	return rc;
+out_free:
+	spin_unlock(&sg->guest_table_lock);
+	__free_pages(page, CRST_ALLOC_ORDER);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_shadow_r2t);
+
+/**
+ * gmap_shadow_r3t - create a shadow region 3 table
+ * @sg: pointer to the shadow guest address space structure
+ * @saddr: faulting address in the shadow gmap
+ * @r3t: parent gmap address of the region 3 table to get shadowed
+ * @fake: r3t references contiguous guest memory block, not a r3t
+ *
+ * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the
+ * shadow table structure is incomplete, -ENOMEM if out of memory and
+ * -EFAULT if an address in the parent gmap could not be resolved.
+ *
+ * Called with sg->mm->mmap_lock in read.
+ */
+int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
+		    int fake)
+{
+	unsigned long raddr, origin, offset, len;
+	unsigned long *table;
+	phys_addr_t s_r3t;
+	struct page *page;
+	int rc;
+
+	BUG_ON(!gmap_is_shadow(sg));
+	/* Allocate a shadow region second table */
+	page = gmap_alloc_crst();
+	if (!page)
+		return -ENOMEM;
+	page->index = r3t & _REGION_ENTRY_ORIGIN;
+	if (fake)
+		page->index |= GMAP_SHADOW_FAKE_TABLE;
+	s_r3t = page_to_phys(page);
+	/* Install shadow region second table */
+	spin_lock(&sg->guest_table_lock);
+	table = gmap_table_walk(sg, saddr, 3); /* get region-2 pointer */
+	if (!table) {
+		rc = -EAGAIN;		/* Race with unshadow */
+		goto out_free;
+	}
+	if (!(*table & _REGION_ENTRY_INVALID)) {
+		rc = 0;			/* Already established */
+		goto out_free;
+	} else if (*table & _REGION_ENTRY_ORIGIN) {
+		rc = -EAGAIN;		/* Race with shadow */
+		goto out_free;
+	}
+	crst_table_init(__va(s_r3t), _REGION3_ENTRY_EMPTY);
+	/* mark as invalid as long as the parent table is not protected */
+	*table = s_r3t | _REGION_ENTRY_LENGTH |
+		 _REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID;
+	if (sg->edat_level >= 1)
+		*table |= (r3t & _REGION_ENTRY_PROTECT);
+	list_add(&page->lru, &sg->crst_list);
+	if (fake) {
+		/* nothing to protect for fake tables */
+		*table &= ~_REGION_ENTRY_INVALID;
+		spin_unlock(&sg->guest_table_lock);
+		return 0;
+	}
+	spin_unlock(&sg->guest_table_lock);
+	/* Make r3t read-only in parent gmap page table */
+	raddr = (saddr & _REGION2_MASK) | _SHADOW_RMAP_REGION2;
+	origin = r3t & _REGION_ENTRY_ORIGIN;
+	offset = ((r3t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
+	len = ((r3t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
+	rc = gmap_protect_rmap(sg, raddr, origin + offset, len);
+	spin_lock(&sg->guest_table_lock);
+	if (!rc) {
+		table = gmap_table_walk(sg, saddr, 3);
+		if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_r3t)
+			rc = -EAGAIN;		/* Race with unshadow */
+		else
+			*table &= ~_REGION_ENTRY_INVALID;
+	} else {
+		gmap_unshadow_r3t(sg, raddr);
+	}
+	spin_unlock(&sg->guest_table_lock);
+	return rc;
+out_free:
+	spin_unlock(&sg->guest_table_lock);
+	__free_pages(page, CRST_ALLOC_ORDER);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_shadow_r3t);
+
+/**
+ * gmap_shadow_sgt - create a shadow segment table
+ * @sg: pointer to the shadow guest address space structure
+ * @saddr: faulting address in the shadow gmap
+ * @sgt: parent gmap address of the segment table to get shadowed
+ * @fake: sgt references contiguous guest memory block, not a sgt
+ *
+ * Returns: 0 if successfully shadowed or already shadowed, -EAGAIN if the
+ * shadow table structure is incomplete, -ENOMEM if out of memory and
+ * -EFAULT if an address in the parent gmap could not be resolved.
+ *
+ * Called with sg->mm->mmap_lock in read.
+ */
+int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
+		    int fake)
+{
+	unsigned long raddr, origin, offset, len;
+	unsigned long *table;
+	phys_addr_t s_sgt;
+	struct page *page;
+	int rc;
+
+	BUG_ON(!gmap_is_shadow(sg) || (sgt & _REGION3_ENTRY_LARGE));
+	/* Allocate a shadow segment table */
+	page = gmap_alloc_crst();
+	if (!page)
+		return -ENOMEM;
+	page->index = sgt & _REGION_ENTRY_ORIGIN;
+	if (fake)
+		page->index |= GMAP_SHADOW_FAKE_TABLE;
+	s_sgt = page_to_phys(page);
+	/* Install shadow region second table */
+	spin_lock(&sg->guest_table_lock);
+	table = gmap_table_walk(sg, saddr, 2); /* get region-3 pointer */
+	if (!table) {
+		rc = -EAGAIN;		/* Race with unshadow */
+		goto out_free;
+	}
+	if (!(*table & _REGION_ENTRY_INVALID)) {
+		rc = 0;			/* Already established */
+		goto out_free;
+	} else if (*table & _REGION_ENTRY_ORIGIN) {
+		rc = -EAGAIN;		/* Race with shadow */
+		goto out_free;
+	}
+	crst_table_init(__va(s_sgt), _SEGMENT_ENTRY_EMPTY);
+	/* mark as invalid as long as the parent table is not protected */
+	*table = s_sgt | _REGION_ENTRY_LENGTH |
+		 _REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID;
+	if (sg->edat_level >= 1)
+		*table |= sgt & _REGION_ENTRY_PROTECT;
+	list_add(&page->lru, &sg->crst_list);
+	if (fake) {
+		/* nothing to protect for fake tables */
+		*table &= ~_REGION_ENTRY_INVALID;
+		spin_unlock(&sg->guest_table_lock);
+		return 0;
+	}
+	spin_unlock(&sg->guest_table_lock);
+	/* Make sgt read-only in parent gmap page table */
+	raddr = (saddr & _REGION3_MASK) | _SHADOW_RMAP_REGION3;
+	origin = sgt & _REGION_ENTRY_ORIGIN;
+	offset = ((sgt & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
+	len = ((sgt & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
+	rc = gmap_protect_rmap(sg, raddr, origin + offset, len);
+	spin_lock(&sg->guest_table_lock);
+	if (!rc) {
+		table = gmap_table_walk(sg, saddr, 2);
+		if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_sgt)
+			rc = -EAGAIN;		/* Race with unshadow */
+		else
+			*table &= ~_REGION_ENTRY_INVALID;
+	} else {
+		gmap_unshadow_sgt(sg, raddr);
+	}
+	spin_unlock(&sg->guest_table_lock);
+	return rc;
+out_free:
+	spin_unlock(&sg->guest_table_lock);
+	__free_pages(page, CRST_ALLOC_ORDER);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_shadow_sgt);
+
+/**
+ * gmap_shadow_pgt_lookup - find a shadow page table
+ * @sg: pointer to the shadow guest address space structure
+ * @saddr: the address in the shadow aguest address space
+ * @pgt: parent gmap address of the page table to get shadowed
+ * @dat_protection: if the pgtable is marked as protected by dat
+ * @fake: pgt references contiguous guest memory block, not a pgtable
+ *
+ * Returns 0 if the shadow page table was found and -EAGAIN if the page
+ * table was not found.
+ *
+ * Called with sg->mm->mmap_lock in read.
+ */
+int gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr,
+			   unsigned long *pgt, int *dat_protection,
+			   int *fake)
+{
+	unsigned long *table;
+	struct page *page;
+	int rc;
+
+	BUG_ON(!gmap_is_shadow(sg));
+	spin_lock(&sg->guest_table_lock);
+	table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */
+	if (table && !(*table & _SEGMENT_ENTRY_INVALID)) {
+		/* Shadow page tables are full pages (pte+pgste) */
+		page = pfn_to_page(*table >> PAGE_SHIFT);
+		*pgt = page->index & ~GMAP_SHADOW_FAKE_TABLE;
+		*dat_protection = !!(*table & _SEGMENT_ENTRY_PROTECT);
+		*fake = !!(page->index & GMAP_SHADOW_FAKE_TABLE);
+		rc = 0;
+	} else  {
+		rc = -EAGAIN;
+	}
+	spin_unlock(&sg->guest_table_lock);
+	return rc;
+
+}
+EXPORT_SYMBOL_GPL(gmap_shadow_pgt_lookup);
+
+/**
+ * gmap_shadow_pgt - instantiate a shadow page table
+ * @sg: pointer to the shadow guest address space structure
+ * @saddr: faulting address in the shadow gmap
+ * @pgt: parent gmap address of the page table to get shadowed
+ * @fake: pgt references contiguous guest memory block, not a pgtable
+ *
+ * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the
+ * shadow table structure is incomplete, -ENOMEM if out of memory,
+ * -EFAULT if an address in the parent gmap could not be resolved and
+ *
+ * Called with gmap->mm->mmap_lock in read
+ */
+int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
+		    int fake)
+{
+	unsigned long raddr, origin;
+	unsigned long *table;
+	struct page *page;
+	phys_addr_t s_pgt;
+	int rc;
+
+	BUG_ON(!gmap_is_shadow(sg) || (pgt & _SEGMENT_ENTRY_LARGE));
+	/* Allocate a shadow page table */
+	page = page_table_alloc_pgste(sg->mm);
+	if (!page)
+		return -ENOMEM;
+	page->index = pgt & _SEGMENT_ENTRY_ORIGIN;
+	if (fake)
+		page->index |= GMAP_SHADOW_FAKE_TABLE;
+	s_pgt = page_to_phys(page);
+	/* Install shadow page table */
+	spin_lock(&sg->guest_table_lock);
+	table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */
+	if (!table) {
+		rc = -EAGAIN;		/* Race with unshadow */
+		goto out_free;
+	}
+	if (!(*table & _SEGMENT_ENTRY_INVALID)) {
+		rc = 0;			/* Already established */
+		goto out_free;
+	} else if (*table & _SEGMENT_ENTRY_ORIGIN) {
+		rc = -EAGAIN;		/* Race with shadow */
+		goto out_free;
+	}
+	/* mark as invalid as long as the parent table is not protected */
+	*table = (unsigned long) s_pgt | _SEGMENT_ENTRY |
+		 (pgt & _SEGMENT_ENTRY_PROTECT) | _SEGMENT_ENTRY_INVALID;
+	list_add(&page->lru, &sg->pt_list);
+	if (fake) {
+		/* nothing to protect for fake tables */
+		*table &= ~_SEGMENT_ENTRY_INVALID;
+		spin_unlock(&sg->guest_table_lock);
+		return 0;
+	}
+	spin_unlock(&sg->guest_table_lock);
+	/* Make pgt read-only in parent gmap page table (not the pgste) */
+	raddr = (saddr & _SEGMENT_MASK) | _SHADOW_RMAP_SEGMENT;
+	origin = pgt & _SEGMENT_ENTRY_ORIGIN & PAGE_MASK;
+	rc = gmap_protect_rmap(sg, raddr, origin, PAGE_SIZE);
+	spin_lock(&sg->guest_table_lock);
+	if (!rc) {
+		table = gmap_table_walk(sg, saddr, 1);
+		if (!table || (*table & _SEGMENT_ENTRY_ORIGIN) != s_pgt)
+			rc = -EAGAIN;		/* Race with unshadow */
+		else
+			*table &= ~_SEGMENT_ENTRY_INVALID;
+	} else {
+		gmap_unshadow_pgt(sg, raddr);
+	}
+	spin_unlock(&sg->guest_table_lock);
+	return rc;
+out_free:
+	spin_unlock(&sg->guest_table_lock);
+	page_table_free_pgste(page);
+	return rc;
+
+}
+EXPORT_SYMBOL_GPL(gmap_shadow_pgt);
+
+/**
+ * gmap_shadow_page - create a shadow page mapping
+ * @sg: pointer to the shadow guest address space structure
+ * @saddr: faulting address in the shadow gmap
+ * @pte: pte in parent gmap address space to get shadowed
+ *
+ * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the
+ * shadow table structure is incomplete, -ENOMEM if out of memory and
+ * -EFAULT if an address in the parent gmap could not be resolved.
+ *
+ * Called with sg->mm->mmap_lock in read.
+ */
+int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte)
+{
+	struct gmap *parent;
+	struct gmap_rmap *rmap;
+	unsigned long vmaddr, paddr;
+	spinlock_t *ptl;
+	pte_t *sptep, *tptep;
+	int prot;
+	int rc;
+
+	BUG_ON(!gmap_is_shadow(sg));
+	parent = sg->parent;
+	prot = (pte_val(pte) & _PAGE_PROTECT) ? PROT_READ : PROT_WRITE;
+
+	rmap = kzalloc(sizeof(*rmap), GFP_KERNEL_ACCOUNT);
+	if (!rmap)
+		return -ENOMEM;
+	rmap->raddr = (saddr & PAGE_MASK) | _SHADOW_RMAP_PGTABLE;
+
+	while (1) {
+		paddr = pte_val(pte) & PAGE_MASK;
+		vmaddr = __gmap_translate(parent, paddr);
+		if (IS_ERR_VALUE(vmaddr)) {
+			rc = vmaddr;
+			break;
+		}
+		rc = radix_tree_preload(GFP_KERNEL_ACCOUNT);
+		if (rc)
+			break;
+		rc = -EAGAIN;
+		sptep = gmap_pte_op_walk(parent, paddr, &ptl);
+		if (sptep) {
+			spin_lock(&sg->guest_table_lock);
+			/* Get page table pointer */
+			tptep = (pte_t *) gmap_table_walk(sg, saddr, 0);
+			if (!tptep) {
+				spin_unlock(&sg->guest_table_lock);
+				gmap_pte_op_end(sptep, ptl);
+				radix_tree_preload_end();
+				break;
+			}
+			rc = ptep_shadow_pte(sg->mm, saddr, sptep, tptep, pte);
+			if (rc > 0) {
+				/* Success and a new mapping */
+				gmap_insert_rmap(sg, vmaddr, rmap);
+				rmap = NULL;
+				rc = 0;
+			}
+			gmap_pte_op_end(sptep, ptl);
+			spin_unlock(&sg->guest_table_lock);
+		}
+		radix_tree_preload_end();
+		if (!rc)
+			break;
+		rc = gmap_pte_op_fixup(parent, paddr, vmaddr, prot);
+		if (rc)
+			break;
+	}
+	kfree(rmap);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_shadow_page);
+
+/*
+ * gmap_shadow_notify - handle notifications for shadow gmap
+ *
+ * Called with sg->parent->shadow_lock.
+ */
+static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr,
+			       unsigned long gaddr)
+{
+	struct gmap_rmap *rmap, *rnext, *head;
+	unsigned long start, end, bits, raddr;
+
+	BUG_ON(!gmap_is_shadow(sg));
+
+	spin_lock(&sg->guest_table_lock);
+	if (sg->removed) {
+		spin_unlock(&sg->guest_table_lock);
+		return;
+	}
+	/* Check for top level table */
+	start = sg->orig_asce & _ASCE_ORIGIN;
+	end = start + ((sg->orig_asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE;
+	if (!(sg->orig_asce & _ASCE_REAL_SPACE) && gaddr >= start &&
+	    gaddr < end) {
+		/* The complete shadow table has to go */
+		gmap_unshadow(sg);
+		spin_unlock(&sg->guest_table_lock);
+		list_del(&sg->list);
+		gmap_put(sg);
+		return;
+	}
+	/* Remove the page table tree from on specific entry */
+	head = radix_tree_delete(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT);
+	gmap_for_each_rmap_safe(rmap, rnext, head) {
+		bits = rmap->raddr & _SHADOW_RMAP_MASK;
+		raddr = rmap->raddr ^ bits;
+		switch (bits) {
+		case _SHADOW_RMAP_REGION1:
+			gmap_unshadow_r2t(sg, raddr);
+			break;
+		case _SHADOW_RMAP_REGION2:
+			gmap_unshadow_r3t(sg, raddr);
+			break;
+		case _SHADOW_RMAP_REGION3:
+			gmap_unshadow_sgt(sg, raddr);
+			break;
+		case _SHADOW_RMAP_SEGMENT:
+			gmap_unshadow_pgt(sg, raddr);
+			break;
+		case _SHADOW_RMAP_PGTABLE:
+			gmap_unshadow_page(sg, raddr);
+			break;
+		}
+		kfree(rmap);
+	}
+	spin_unlock(&sg->guest_table_lock);
+}
+
+/**
+ * ptep_notify - call all invalidation callbacks for a specific pte.
+ * @mm: pointer to the process mm_struct
+ * @vmaddr: virtual address in the process address space
+ * @pte: pointer to the page table entry
+ * @bits: bits from the pgste that caused the notify call
+ *
+ * This function is assumed to be called with the page table lock held
+ * for the pte to notify.
+ */
+void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
+		 pte_t *pte, unsigned long bits)
+{
+	unsigned long offset, gaddr = 0;
+	unsigned long *table;
+	struct gmap *gmap, *sg, *next;
+
+	offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
+	offset = offset * (PAGE_SIZE / sizeof(pte_t));
+	rcu_read_lock();
+	list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
+		spin_lock(&gmap->guest_table_lock);
+		table = radix_tree_lookup(&gmap->host_to_guest,
+					  vmaddr >> PMD_SHIFT);
+		if (table)
+			gaddr = __gmap_segment_gaddr(table) + offset;
+		spin_unlock(&gmap->guest_table_lock);
+		if (!table)
+			continue;
+
+		if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) {
+			spin_lock(&gmap->shadow_lock);
+			list_for_each_entry_safe(sg, next,
+						 &gmap->children, list)
+				gmap_shadow_notify(sg, vmaddr, gaddr);
+			spin_unlock(&gmap->shadow_lock);
+		}
+		if (bits & PGSTE_IN_BIT)
+			gmap_call_notifier(gmap, gaddr, gaddr + PAGE_SIZE - 1);
+	}
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(ptep_notify);
+
+static void pmdp_notify_gmap(struct gmap *gmap, pmd_t *pmdp,
+			     unsigned long gaddr)
+{
+	set_pmd(pmdp, clear_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_IN)));
+	gmap_call_notifier(gmap, gaddr, gaddr + HPAGE_SIZE - 1);
+}
+
+/**
+ * gmap_pmdp_xchg - exchange a gmap pmd with another
+ * @gmap: pointer to the guest address space structure
+ * @pmdp: pointer to the pmd entry
+ * @new: replacement entry
+ * @gaddr: the affected guest address
+ *
+ * This function is assumed to be called with the guest_table_lock
+ * held.
+ */
+static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *pmdp, pmd_t new,
+			   unsigned long gaddr)
+{
+	gaddr &= HPAGE_MASK;
+	pmdp_notify_gmap(gmap, pmdp, gaddr);
+	new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_GMAP_IN));
+	if (MACHINE_HAS_TLB_GUEST)
+		__pmdp_idte(gaddr, (pmd_t *)pmdp, IDTE_GUEST_ASCE, gmap->asce,
+			    IDTE_GLOBAL);
+	else if (MACHINE_HAS_IDTE)
+		__pmdp_idte(gaddr, (pmd_t *)pmdp, 0, 0, IDTE_GLOBAL);
+	else
+		__pmdp_csp(pmdp);
+	set_pmd(pmdp, new);
+}
+
+static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr,
+			    int purge)
+{
+	pmd_t *pmdp;
+	struct gmap *gmap;
+	unsigned long gaddr;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
+		spin_lock(&gmap->guest_table_lock);
+		pmdp = (pmd_t *)radix_tree_delete(&gmap->host_to_guest,
+						  vmaddr >> PMD_SHIFT);
+		if (pmdp) {
+			gaddr = __gmap_segment_gaddr((unsigned long *)pmdp);
+			pmdp_notify_gmap(gmap, pmdp, gaddr);
+			WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
+						   _SEGMENT_ENTRY_GMAP_UC));
+			if (purge)
+				__pmdp_csp(pmdp);
+			set_pmd(pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
+		}
+		spin_unlock(&gmap->guest_table_lock);
+	}
+	rcu_read_unlock();
+}
+
+/**
+ * gmap_pmdp_invalidate - invalidate all affected guest pmd entries without
+ *                        flushing
+ * @mm: pointer to the process mm_struct
+ * @vmaddr: virtual address in the process address space
+ */
+void gmap_pmdp_invalidate(struct mm_struct *mm, unsigned long vmaddr)
+{
+	gmap_pmdp_clear(mm, vmaddr, 0);
+}
+EXPORT_SYMBOL_GPL(gmap_pmdp_invalidate);
+
+/**
+ * gmap_pmdp_csp - csp all affected guest pmd entries
+ * @mm: pointer to the process mm_struct
+ * @vmaddr: virtual address in the process address space
+ */
+void gmap_pmdp_csp(struct mm_struct *mm, unsigned long vmaddr)
+{
+	gmap_pmdp_clear(mm, vmaddr, 1);
+}
+EXPORT_SYMBOL_GPL(gmap_pmdp_csp);
+
+/**
+ * gmap_pmdp_idte_local - invalidate and clear a guest pmd entry
+ * @mm: pointer to the process mm_struct
+ * @vmaddr: virtual address in the process address space
+ */
+void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr)
+{
+	unsigned long *entry, gaddr;
+	struct gmap *gmap;
+	pmd_t *pmdp;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
+		spin_lock(&gmap->guest_table_lock);
+		entry = radix_tree_delete(&gmap->host_to_guest,
+					  vmaddr >> PMD_SHIFT);
+		if (entry) {
+			pmdp = (pmd_t *)entry;
+			gaddr = __gmap_segment_gaddr(entry);
+			pmdp_notify_gmap(gmap, pmdp, gaddr);
+			WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
+					   _SEGMENT_ENTRY_GMAP_UC));
+			if (MACHINE_HAS_TLB_GUEST)
+				__pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,
+					    gmap->asce, IDTE_LOCAL);
+			else if (MACHINE_HAS_IDTE)
+				__pmdp_idte(gaddr, pmdp, 0, 0, IDTE_LOCAL);
+			*entry = _SEGMENT_ENTRY_EMPTY;
+		}
+		spin_unlock(&gmap->guest_table_lock);
+	}
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(gmap_pmdp_idte_local);
+
+/**
+ * gmap_pmdp_idte_global - invalidate and clear a guest pmd entry
+ * @mm: pointer to the process mm_struct
+ * @vmaddr: virtual address in the process address space
+ */
+void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr)
+{
+	unsigned long *entry, gaddr;
+	struct gmap *gmap;
+	pmd_t *pmdp;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
+		spin_lock(&gmap->guest_table_lock);
+		entry = radix_tree_delete(&gmap->host_to_guest,
+					  vmaddr >> PMD_SHIFT);
+		if (entry) {
+			pmdp = (pmd_t *)entry;
+			gaddr = __gmap_segment_gaddr(entry);
+			pmdp_notify_gmap(gmap, pmdp, gaddr);
+			WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
+					   _SEGMENT_ENTRY_GMAP_UC));
+			if (MACHINE_HAS_TLB_GUEST)
+				__pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,
+					    gmap->asce, IDTE_GLOBAL);
+			else if (MACHINE_HAS_IDTE)
+				__pmdp_idte(gaddr, pmdp, 0, 0, IDTE_GLOBAL);
+			else
+				__pmdp_csp(pmdp);
+			*entry = _SEGMENT_ENTRY_EMPTY;
+		}
+		spin_unlock(&gmap->guest_table_lock);
+	}
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(gmap_pmdp_idte_global);
+
+/**
+ * gmap_test_and_clear_dirty_pmd - test and reset segment dirty status
+ * @gmap: pointer to guest address space
+ * @pmdp: pointer to the pmd to be tested
+ * @gaddr: virtual address in the guest address space
+ *
+ * This function is assumed to be called with the guest_table_lock
+ * held.
+ */
+static bool gmap_test_and_clear_dirty_pmd(struct gmap *gmap, pmd_t *pmdp,
+					  unsigned long gaddr)
+{
+	if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
+		return false;
+
+	/* Already protected memory, which did not change is clean */
+	if (pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT &&
+	    !(pmd_val(*pmdp) & _SEGMENT_ENTRY_GMAP_UC))
+		return false;
+
+	/* Clear UC indication and reset protection */
+	set_pmd(pmdp, clear_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_UC)));
+	gmap_protect_pmd(gmap, gaddr, pmdp, PROT_READ, 0);
+	return true;
+}
+
+/**
+ * gmap_sync_dirty_log_pmd - set bitmap based on dirty status of segment
+ * @gmap: pointer to guest address space
+ * @bitmap: dirty bitmap for this pmd
+ * @gaddr: virtual address in the guest address space
+ * @vmaddr: virtual address in the host address space
+ *
+ * This function is assumed to be called with the guest_table_lock
+ * held.
+ */
+void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4],
+			     unsigned long gaddr, unsigned long vmaddr)
+{
+	int i;
+	pmd_t *pmdp;
+	pte_t *ptep;
+	spinlock_t *ptl;
+
+	pmdp = gmap_pmd_op_walk(gmap, gaddr);
+	if (!pmdp)
+		return;
+
+	if (pmd_large(*pmdp)) {
+		if (gmap_test_and_clear_dirty_pmd(gmap, pmdp, gaddr))
+			bitmap_fill(bitmap, _PAGE_ENTRIES);
+	} else {
+		for (i = 0; i < _PAGE_ENTRIES; i++, vmaddr += PAGE_SIZE) {
+			ptep = pte_alloc_map_lock(gmap->mm, pmdp, vmaddr, &ptl);
+			if (!ptep)
+				continue;
+			if (ptep_test_and_clear_uc(gmap->mm, vmaddr, ptep))
+				set_bit(i, bitmap);
+			pte_unmap_unlock(ptep, ptl);
+		}
+	}
+	gmap_pmd_op_end(gmap, pmdp);
+}
+EXPORT_SYMBOL_GPL(gmap_sync_dirty_log_pmd);
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static int thp_split_walk_pmd_entry(pmd_t *pmd, unsigned long addr,
+				    unsigned long end, struct mm_walk *walk)
+{
+	struct vm_area_struct *vma = walk->vma;
+
+	split_huge_pmd(vma, pmd, addr);
+	return 0;
+}
+
+static const struct mm_walk_ops thp_split_walk_ops = {
+	.pmd_entry	= thp_split_walk_pmd_entry,
+	.walk_lock	= PGWALK_WRLOCK_VERIFY,
+};
+
+static inline void thp_split_mm(struct mm_struct *mm)
+{
+	struct vm_area_struct *vma;
+	VMA_ITERATOR(vmi, mm, 0);
+
+	for_each_vma(vmi, vma) {
+		vm_flags_mod(vma, VM_NOHUGEPAGE, VM_HUGEPAGE);
+		walk_page_vma(vma, &thp_split_walk_ops, NULL);
+	}
+	mm->def_flags |= VM_NOHUGEPAGE;
+}
+#else
+static inline void thp_split_mm(struct mm_struct *mm)
+{
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+/*
+ * Remove all empty zero pages from the mapping for lazy refaulting
+ * - This must be called after mm->context.has_pgste is set, to avoid
+ *   future creation of zero pages
+ * - This must be called after THP was disabled.
+ *
+ * mm contracts with s390, that even if mm were to remove a page table,
+ * racing with the loop below and so causing pte_offset_map_lock() to fail,
+ * it will never insert a page table containing empty zero pages once
+ * mm_forbids_zeropage(mm) i.e. mm->context.has_pgste is set.
+ */
+static int __zap_zero_pages(pmd_t *pmd, unsigned long start,
+			   unsigned long end, struct mm_walk *walk)
+{
+	unsigned long addr;
+
+	for (addr = start; addr != end; addr += PAGE_SIZE) {
+		pte_t *ptep;
+		spinlock_t *ptl;
+
+		ptep = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+		if (!ptep)
+			break;
+		if (is_zero_pfn(pte_pfn(*ptep)))
+			ptep_xchg_direct(walk->mm, addr, ptep, __pte(_PAGE_INVALID));
+		pte_unmap_unlock(ptep, ptl);
+	}
+	return 0;
+}
+
+static const struct mm_walk_ops zap_zero_walk_ops = {
+	.pmd_entry	= __zap_zero_pages,
+	.walk_lock	= PGWALK_WRLOCK,
+};
+
+/*
+ * switch on pgstes for its userspace process (for kvm)
+ */
+int s390_enable_sie(void)
+{
+	struct mm_struct *mm = current->mm;
+
+	/* Do we have pgstes? if yes, we are done */
+	if (mm_has_pgste(mm))
+		return 0;
+	/* Fail if the page tables are 2K */
+	if (!mm_alloc_pgste(mm))
+		return -EINVAL;
+	mmap_write_lock(mm);
+	mm->context.has_pgste = 1;
+	/* split thp mappings and disable thp for future mappings */
+	thp_split_mm(mm);
+	walk_page_range(mm, 0, TASK_SIZE, &zap_zero_walk_ops, NULL);
+	mmap_write_unlock(mm);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(s390_enable_sie);
+
+int gmap_mark_unmergeable(void)
+{
+	/*
+	 * Make sure to disable KSM (if enabled for the whole process or
+	 * individual VMAs). Note that nothing currently hinders user space
+	 * from re-enabling it.
+	 */
+	return ksm_disable(current->mm);
+}
+EXPORT_SYMBOL_GPL(gmap_mark_unmergeable);
+
+/*
+ * Enable storage key handling from now on and initialize the storage
+ * keys with the default key.
+ */
+static int __s390_enable_skey_pte(pte_t *pte, unsigned long addr,
+				  unsigned long next, struct mm_walk *walk)
+{
+	/* Clear storage key */
+	ptep_zap_key(walk->mm, addr, pte);
+	return 0;
+}
+
+/*
+ * Give a chance to schedule after setting a key to 256 pages.
+ * We only hold the mm lock, which is a rwsem and the kvm srcu.
+ * Both can sleep.
+ */
+static int __s390_enable_skey_pmd(pmd_t *pmd, unsigned long addr,
+				  unsigned long next, struct mm_walk *walk)
+{
+	cond_resched();
+	return 0;
+}
+
+static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr,
+				      unsigned long hmask, unsigned long next,
+				      struct mm_walk *walk)
+{
+	pmd_t *pmd = (pmd_t *)pte;
+	unsigned long start, end;
+	struct page *page = pmd_page(*pmd);
+
+	/*
+	 * The write check makes sure we do not set a key on shared
+	 * memory. This is needed as the walker does not differentiate
+	 * between actual guest memory and the process executable or
+	 * shared libraries.
+	 */
+	if (pmd_val(*pmd) & _SEGMENT_ENTRY_INVALID ||
+	    !(pmd_val(*pmd) & _SEGMENT_ENTRY_WRITE))
+		return 0;
+
+	start = pmd_val(*pmd) & HPAGE_MASK;
+	end = start + HPAGE_SIZE - 1;
+	__storage_key_init_range(start, end);
+	set_bit(PG_arch_1, &page->flags);
+	cond_resched();
+	return 0;
+}
+
+static const struct mm_walk_ops enable_skey_walk_ops = {
+	.hugetlb_entry		= __s390_enable_skey_hugetlb,
+	.pte_entry		= __s390_enable_skey_pte,
+	.pmd_entry		= __s390_enable_skey_pmd,
+	.walk_lock		= PGWALK_WRLOCK,
+};
+
+int s390_enable_skey(void)
+{
+	struct mm_struct *mm = current->mm;
+	int rc = 0;
+
+	mmap_write_lock(mm);
+	if (mm_uses_skeys(mm))
+		goto out_up;
+
+	mm->context.uses_skeys = 1;
+	rc = gmap_mark_unmergeable();
+	if (rc) {
+		mm->context.uses_skeys = 0;
+		goto out_up;
+	}
+	walk_page_range(mm, 0, TASK_SIZE, &enable_skey_walk_ops, NULL);
+
+out_up:
+	mmap_write_unlock(mm);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(s390_enable_skey);
+
+/*
+ * Reset CMMA state, make all pages stable again.
+ */
+static int __s390_reset_cmma(pte_t *pte, unsigned long addr,
+			     unsigned long next, struct mm_walk *walk)
+{
+	ptep_zap_unused(walk->mm, addr, pte, 1);
+	return 0;
+}
+
+static const struct mm_walk_ops reset_cmma_walk_ops = {
+	.pte_entry		= __s390_reset_cmma,
+	.walk_lock		= PGWALK_WRLOCK,
+};
+
+void s390_reset_cmma(struct mm_struct *mm)
+{
+	mmap_write_lock(mm);
+	walk_page_range(mm, 0, TASK_SIZE, &reset_cmma_walk_ops, NULL);
+	mmap_write_unlock(mm);
+}
+EXPORT_SYMBOL_GPL(s390_reset_cmma);
+
+#define GATHER_GET_PAGES 32
+
+struct reset_walk_state {
+	unsigned long next;
+	unsigned long count;
+	unsigned long pfns[GATHER_GET_PAGES];
+};
+
+static int s390_gather_pages(pte_t *ptep, unsigned long addr,
+			     unsigned long next, struct mm_walk *walk)
+{
+	struct reset_walk_state *p = walk->private;
+	pte_t pte = READ_ONCE(*ptep);
+
+	if (pte_present(pte)) {
+		/* we have a reference from the mapping, take an extra one */
+		get_page(phys_to_page(pte_val(pte)));
+		p->pfns[p->count] = phys_to_pfn(pte_val(pte));
+		p->next = next;
+		p->count++;
+	}
+	return p->count >= GATHER_GET_PAGES;
+}
+
+static const struct mm_walk_ops gather_pages_ops = {
+	.pte_entry = s390_gather_pages,
+	.walk_lock = PGWALK_RDLOCK,
+};
+
+/*
+ * Call the Destroy secure page UVC on each page in the given array of PFNs.
+ * Each page needs to have an extra reference, which will be released here.
+ */
+void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns)
+{
+	unsigned long i;
+
+	for (i = 0; i < count; i++) {
+		/* we always have an extra reference */
+		uv_destroy_owned_page(pfn_to_phys(pfns[i]));
+		/* get rid of the extra reference */
+		put_page(pfn_to_page(pfns[i]));
+		cond_resched();
+	}
+}
+EXPORT_SYMBOL_GPL(s390_uv_destroy_pfns);
+
+/**
+ * __s390_uv_destroy_range - Call the destroy secure page UVC on each page
+ * in the given range of the given address space.
+ * @mm: the mm to operate on
+ * @start: the start of the range
+ * @end: the end of the range
+ * @interruptible: if not 0, stop when a fatal signal is received
+ *
+ * Walk the given range of the given address space and call the destroy
+ * secure page UVC on each page. Optionally exit early if a fatal signal is
+ * pending.
+ *
+ * Return: 0 on success, -EINTR if the function stopped before completing
+ */
+int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
+			    unsigned long end, bool interruptible)
+{
+	struct reset_walk_state state = { .next = start };
+	int r = 1;
+
+	while (r > 0) {
+		state.count = 0;
+		mmap_read_lock(mm);
+		r = walk_page_range(mm, state.next, end, &gather_pages_ops, &state);
+		mmap_read_unlock(mm);
+		cond_resched();
+		s390_uv_destroy_pfns(state.count, state.pfns);
+		if (interruptible && fatal_signal_pending(current))
+			return -EINTR;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__s390_uv_destroy_range);
+
+/**
+ * s390_unlist_old_asce - Remove the topmost level of page tables from the
+ * list of page tables of the gmap.
+ * @gmap: the gmap whose table is to be removed
+ *
+ * On s390x, KVM keeps a list of all pages containing the page tables of the
+ * gmap (the CRST list). This list is used at tear down time to free all
+ * pages that are now not needed anymore.
+ *
+ * This function removes the topmost page of the tree (the one pointed to by
+ * the ASCE) from the CRST list.
+ *
+ * This means that it will not be freed when the VM is torn down, and needs
+ * to be handled separately by the caller, unless a leak is actually
+ * intended. Notice that this function will only remove the page from the
+ * list, the page will still be used as a top level page table (and ASCE).
+ */
+void s390_unlist_old_asce(struct gmap *gmap)
+{
+	struct page *old;
+
+	old = virt_to_page(gmap->table);
+	spin_lock(&gmap->guest_table_lock);
+	list_del(&old->lru);
+	/*
+	 * Sometimes the topmost page might need to be "removed" multiple
+	 * times, for example if the VM is rebooted into secure mode several
+	 * times concurrently, or if s390_replace_asce fails after calling
+	 * s390_remove_old_asce and is attempted again later. In that case
+	 * the old asce has been removed from the list, and therefore it
+	 * will not be freed when the VM terminates, but the ASCE is still
+	 * in use and still pointed to.
+	 * A subsequent call to replace_asce will follow the pointer and try
+	 * to remove the same page from the list again.
+	 * Therefore it's necessary that the page of the ASCE has valid
+	 * pointers, so list_del can work (and do nothing) without
+	 * dereferencing stale or invalid pointers.
+	 */
+	INIT_LIST_HEAD(&old->lru);
+	spin_unlock(&gmap->guest_table_lock);
+}
+EXPORT_SYMBOL_GPL(s390_unlist_old_asce);
+
+/**
+ * s390_replace_asce - Try to replace the current ASCE of a gmap with a copy
+ * @gmap: the gmap whose ASCE needs to be replaced
+ *
+ * If the ASCE is a SEGMENT type then this function will return -EINVAL,
+ * otherwise the pointers in the host_to_guest radix tree will keep pointing
+ * to the wrong pages, causing use-after-free and memory corruption.
+ * If the allocation of the new top level page table fails, the ASCE is not
+ * replaced.
+ * In any case, the old ASCE is always removed from the gmap CRST list.
+ * Therefore the caller has to make sure to save a pointer to it
+ * beforehand, unless a leak is actually intended.
+ */
+int s390_replace_asce(struct gmap *gmap)
+{
+	unsigned long asce;
+	struct page *page;
+	void *table;
+
+	s390_unlist_old_asce(gmap);
+
+	/* Replacing segment type ASCEs would cause serious issues */
+	if ((gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT)
+		return -EINVAL;
+
+	page = gmap_alloc_crst();
+	if (!page)
+		return -ENOMEM;
+	page->index = 0;
+	table = page_to_virt(page);
+	memcpy(table, gmap->table, 1UL << (CRST_ALLOC_ORDER + PAGE_SHIFT));
+
+	/*
+	 * The caller has to deal with the old ASCE, but here we make sure
+	 * the new one is properly added to the CRST list, so that
+	 * it will be freed when the VM is torn down.
+	 */
+	spin_lock(&gmap->guest_table_lock);
+	list_add(&page->lru, &gmap->crst_list);
+	spin_unlock(&gmap->guest_table_lock);
+
+	/* Set new table origin while preserving existing ASCE control bits */
+	asce = (gmap->asce & ~_ASCE_ORIGIN) | __pa(table);
+	WRITE_ONCE(gmap->asce, asce);
+	WRITE_ONCE(gmap->mm->context.gmap_asce, asce);
+	WRITE_ONCE(gmap->table, table);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(s390_replace_asce);
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
new file mode 100644
index 0000000000..297a6d897d
--- /dev/null
+++ b/arch/s390/mm/hugetlbpage.c
@@ -0,0 +1,342 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  IBM System z Huge TLB Page Support for Kernel.
+ *
+ *    Copyright IBM Corp. 2007,2020
+ *    Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
+ */
+
+#define KMSG_COMPONENT "hugetlb"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <asm/pgalloc.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/mman.h>
+#include <linux/sched/mm.h>
+#include <linux/security.h>
+
+/*
+ * If the bit selected by single-bit bitmask "a" is set within "x", move
+ * it to the position indicated by single-bit bitmask "b".
+ */
+#define move_set_bit(x, a, b)	(((x) & (a)) >> ilog2(a) << ilog2(b))
+
+static inline unsigned long __pte_to_rste(pte_t pte)
+{
+	unsigned long rste;
+
+	/*
+	 * Convert encoding		  pte bits	pmd / pud bits
+	 *				lIR.uswrdy.p	dy..R...I...wr
+	 * empty			010.000000.0 -> 00..0...1...00
+	 * prot-none, clean, old	111.000000.1 -> 00..1...1...00
+	 * prot-none, clean, young	111.000001.1 -> 01..1...1...00
+	 * prot-none, dirty, old	111.000010.1 -> 10..1...1...00
+	 * prot-none, dirty, young	111.000011.1 -> 11..1...1...00
+	 * read-only, clean, old	111.000100.1 -> 00..1...1...01
+	 * read-only, clean, young	101.000101.1 -> 01..1...0...01
+	 * read-only, dirty, old	111.000110.1 -> 10..1...1...01
+	 * read-only, dirty, young	101.000111.1 -> 11..1...0...01
+	 * read-write, clean, old	111.001100.1 -> 00..1...1...11
+	 * read-write, clean, young	101.001101.1 -> 01..1...0...11
+	 * read-write, dirty, old	110.001110.1 -> 10..0...1...11
+	 * read-write, dirty, young	100.001111.1 -> 11..0...0...11
+	 * HW-bits: R read-only, I invalid
+	 * SW-bits: p present, y young, d dirty, r read, w write, s special,
+	 *	    u unused, l large
+	 */
+	if (pte_present(pte)) {
+		rste = pte_val(pte) & PAGE_MASK;
+		rste |= move_set_bit(pte_val(pte), _PAGE_READ,
+				     _SEGMENT_ENTRY_READ);
+		rste |= move_set_bit(pte_val(pte), _PAGE_WRITE,
+				     _SEGMENT_ENTRY_WRITE);
+		rste |= move_set_bit(pte_val(pte), _PAGE_INVALID,
+				     _SEGMENT_ENTRY_INVALID);
+		rste |= move_set_bit(pte_val(pte), _PAGE_PROTECT,
+				     _SEGMENT_ENTRY_PROTECT);
+		rste |= move_set_bit(pte_val(pte), _PAGE_DIRTY,
+				     _SEGMENT_ENTRY_DIRTY);
+		rste |= move_set_bit(pte_val(pte), _PAGE_YOUNG,
+				     _SEGMENT_ENTRY_YOUNG);
+#ifdef CONFIG_MEM_SOFT_DIRTY
+		rste |= move_set_bit(pte_val(pte), _PAGE_SOFT_DIRTY,
+				     _SEGMENT_ENTRY_SOFT_DIRTY);
+#endif
+		rste |= move_set_bit(pte_val(pte), _PAGE_NOEXEC,
+				     _SEGMENT_ENTRY_NOEXEC);
+	} else
+		rste = _SEGMENT_ENTRY_EMPTY;
+	return rste;
+}
+
+static inline pte_t __rste_to_pte(unsigned long rste)
+{
+	unsigned long pteval;
+	int present;
+
+	if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
+		present = pud_present(__pud(rste));
+	else
+		present = pmd_present(__pmd(rste));
+
+	/*
+	 * Convert encoding		pmd / pud bits	    pte bits
+	 *				dy..R...I...wr	  lIR.uswrdy.p
+	 * empty			00..0...1...00 -> 010.000000.0
+	 * prot-none, clean, old	00..1...1...00 -> 111.000000.1
+	 * prot-none, clean, young	01..1...1...00 -> 111.000001.1
+	 * prot-none, dirty, old	10..1...1...00 -> 111.000010.1
+	 * prot-none, dirty, young	11..1...1...00 -> 111.000011.1
+	 * read-only, clean, old	00..1...1...01 -> 111.000100.1
+	 * read-only, clean, young	01..1...0...01 -> 101.000101.1
+	 * read-only, dirty, old	10..1...1...01 -> 111.000110.1
+	 * read-only, dirty, young	11..1...0...01 -> 101.000111.1
+	 * read-write, clean, old	00..1...1...11 -> 111.001100.1
+	 * read-write, clean, young	01..1...0...11 -> 101.001101.1
+	 * read-write, dirty, old	10..0...1...11 -> 110.001110.1
+	 * read-write, dirty, young	11..0...0...11 -> 100.001111.1
+	 * HW-bits: R read-only, I invalid
+	 * SW-bits: p present, y young, d dirty, r read, w write, s special,
+	 *	    u unused, l large
+	 */
+	if (present) {
+		pteval = rste & _SEGMENT_ENTRY_ORIGIN_LARGE;
+		pteval |= _PAGE_LARGE | _PAGE_PRESENT;
+		pteval |= move_set_bit(rste, _SEGMENT_ENTRY_READ, _PAGE_READ);
+		pteval |= move_set_bit(rste, _SEGMENT_ENTRY_WRITE, _PAGE_WRITE);
+		pteval |= move_set_bit(rste, _SEGMENT_ENTRY_INVALID, _PAGE_INVALID);
+		pteval |= move_set_bit(rste, _SEGMENT_ENTRY_PROTECT, _PAGE_PROTECT);
+		pteval |= move_set_bit(rste, _SEGMENT_ENTRY_DIRTY, _PAGE_DIRTY);
+		pteval |= move_set_bit(rste, _SEGMENT_ENTRY_YOUNG, _PAGE_YOUNG);
+#ifdef CONFIG_MEM_SOFT_DIRTY
+		pteval |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY, _PAGE_SOFT_DIRTY);
+#endif
+		pteval |= move_set_bit(rste, _SEGMENT_ENTRY_NOEXEC, _PAGE_NOEXEC);
+	} else
+		pteval = _PAGE_INVALID;
+	return __pte(pteval);
+}
+
+static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste)
+{
+	struct page *page;
+	unsigned long size, paddr;
+
+	if (!mm_uses_skeys(mm) ||
+	    rste & _SEGMENT_ENTRY_INVALID)
+		return;
+
+	if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) {
+		page = pud_page(__pud(rste));
+		size = PUD_SIZE;
+		paddr = rste & PUD_MASK;
+	} else {
+		page = pmd_page(__pmd(rste));
+		size = PMD_SIZE;
+		paddr = rste & PMD_MASK;
+	}
+
+	if (!test_and_set_bit(PG_arch_1, &page->flags))
+		__storage_key_init_range(paddr, paddr + size - 1);
+}
+
+void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+		     pte_t *ptep, pte_t pte)
+{
+	unsigned long rste;
+
+	rste = __pte_to_rste(pte);
+	if (!MACHINE_HAS_NX)
+		rste &= ~_SEGMENT_ENTRY_NOEXEC;
+
+	/* Set correct table type for 2G hugepages */
+	if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) {
+		if (likely(pte_present(pte)))
+			rste |= _REGION3_ENTRY_LARGE;
+		rste |= _REGION_ENTRY_TYPE_R3;
+	} else if (likely(pte_present(pte)))
+		rste |= _SEGMENT_ENTRY_LARGE;
+
+	clear_huge_pte_skeys(mm, rste);
+	set_pte(ptep, __pte(rste));
+}
+
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+		     pte_t *ptep, pte_t pte, unsigned long sz)
+{
+	__set_huge_pte_at(mm, addr, ptep, pte);
+}
+
+pte_t huge_ptep_get(pte_t *ptep)
+{
+	return __rste_to_pte(pte_val(*ptep));
+}
+
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+			      unsigned long addr, pte_t *ptep)
+{
+	pte_t pte = huge_ptep_get(ptep);
+	pmd_t *pmdp = (pmd_t *) ptep;
+	pud_t *pudp = (pud_t *) ptep;
+
+	if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
+		pudp_xchg_direct(mm, addr, pudp, __pud(_REGION3_ENTRY_EMPTY));
+	else
+		pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
+	return pte;
+}
+
+pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
+			unsigned long addr, unsigned long sz)
+{
+	pgd_t *pgdp;
+	p4d_t *p4dp;
+	pud_t *pudp;
+	pmd_t *pmdp = NULL;
+
+	pgdp = pgd_offset(mm, addr);
+	p4dp = p4d_alloc(mm, pgdp, addr);
+	if (p4dp) {
+		pudp = pud_alloc(mm, p4dp, addr);
+		if (pudp) {
+			if (sz == PUD_SIZE)
+				return (pte_t *) pudp;
+			else if (sz == PMD_SIZE)
+				pmdp = pmd_alloc(mm, pudp, addr);
+		}
+	}
+	return (pte_t *) pmdp;
+}
+
+pte_t *huge_pte_offset(struct mm_struct *mm,
+		       unsigned long addr, unsigned long sz)
+{
+	pgd_t *pgdp;
+	p4d_t *p4dp;
+	pud_t *pudp;
+	pmd_t *pmdp = NULL;
+
+	pgdp = pgd_offset(mm, addr);
+	if (pgd_present(*pgdp)) {
+		p4dp = p4d_offset(pgdp, addr);
+		if (p4d_present(*p4dp)) {
+			pudp = pud_offset(p4dp, addr);
+			if (pud_present(*pudp)) {
+				if (pud_large(*pudp))
+					return (pte_t *) pudp;
+				pmdp = pmd_offset(pudp, addr);
+			}
+		}
+	}
+	return (pte_t *) pmdp;
+}
+
+int pmd_huge(pmd_t pmd)
+{
+	return pmd_large(pmd);
+}
+
+int pud_huge(pud_t pud)
+{
+	return pud_large(pud);
+}
+
+bool __init arch_hugetlb_valid_size(unsigned long size)
+{
+	if (MACHINE_HAS_EDAT1 && size == PMD_SIZE)
+		return true;
+	else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE)
+		return true;
+	else
+		return false;
+}
+
+static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
+		unsigned long addr, unsigned long len,
+		unsigned long pgoff, unsigned long flags)
+{
+	struct hstate *h = hstate_file(file);
+	struct vm_unmapped_area_info info;
+
+	info.flags = 0;
+	info.length = len;
+	info.low_limit = current->mm->mmap_base;
+	info.high_limit = TASK_SIZE;
+	info.align_mask = PAGE_MASK & ~huge_page_mask(h);
+	info.align_offset = 0;
+	return vm_unmapped_area(&info);
+}
+
+static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
+		unsigned long addr0, unsigned long len,
+		unsigned long pgoff, unsigned long flags)
+{
+	struct hstate *h = hstate_file(file);
+	struct vm_unmapped_area_info info;
+	unsigned long addr;
+
+	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+	info.length = len;
+	info.low_limit = PAGE_SIZE;
+	info.high_limit = current->mm->mmap_base;
+	info.align_mask = PAGE_MASK & ~huge_page_mask(h);
+	info.align_offset = 0;
+	addr = vm_unmapped_area(&info);
+
+	/*
+	 * A failed mmap() very likely causes application failure,
+	 * so fall back to the bottom-up function here. This scenario
+	 * can happen with large stack limits and large mmap()
+	 * allocations.
+	 */
+	if (addr & ~PAGE_MASK) {
+		VM_BUG_ON(addr != -ENOMEM);
+		info.flags = 0;
+		info.low_limit = TASK_UNMAPPED_BASE;
+		info.high_limit = TASK_SIZE;
+		addr = vm_unmapped_area(&info);
+	}
+
+	return addr;
+}
+
+unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+		unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+	struct hstate *h = hstate_file(file);
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+
+	if (len & ~huge_page_mask(h))
+		return -EINVAL;
+	if (len > TASK_SIZE - mmap_min_addr)
+		return -ENOMEM;
+
+	if (flags & MAP_FIXED) {
+		if (prepare_hugepage_range(file, addr, len))
+			return -EINVAL;
+		goto check_asce_limit;
+	}
+
+	if (addr) {
+		addr = ALIGN(addr, huge_page_size(h));
+		vma = find_vma(mm, addr);
+		if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
+		    (!vma || addr + len <= vm_start_gap(vma)))
+			goto check_asce_limit;
+	}
+
+	if (mm->get_unmapped_area == arch_get_unmapped_area)
+		addr = hugetlb_get_unmapped_area_bottomup(file, addr, len,
+				pgoff, flags);
+	else
+		addr = hugetlb_get_unmapped_area_topdown(file, addr, len,
+				pgoff, flags);
+	if (offset_in_page(addr))
+		return addr;
+
+check_asce_limit:
+	return check_asce_limit(mm, addr, len);
+}
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
new file mode 100644
index 0000000000..8b94d2212d
--- /dev/null
+++ b/arch/s390/mm/init.c
@@ -0,0 +1,313 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999
+ *    Author(s): Hartmut Penner (hp@de.ibm.com)
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1995  Linus Torvalds
+ */
+
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/swiotlb.h>
+#include <linux/smp.h>
+#include <linux/init.h>
+#include <linux/pagemap.h>
+#include <linux/memblock.h>
+#include <linux/memory.h>
+#include <linux/pfn.h>
+#include <linux/poison.h>
+#include <linux/initrd.h>
+#include <linux/export.h>
+#include <linux/cma.h>
+#include <linux/gfp.h>
+#include <linux/dma-direct.h>
+#include <linux/percpu.h>
+#include <asm/processor.h>
+#include <linux/uaccess.h>
+#include <asm/pgalloc.h>
+#include <asm/kfence.h>
+#include <asm/ptdump.h>
+#include <asm/dma.h>
+#include <asm/abs_lowcore.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/sections.h>
+#include <asm/ctl_reg.h>
+#include <asm/sclp.h>
+#include <asm/set_memory.h>
+#include <asm/kasan.h>
+#include <asm/dma-mapping.h>
+#include <asm/uv.h>
+#include <linux/virtio_anchor.h>
+#include <linux/virtio_config.h>
+
+pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(".bss..swapper_pg_dir");
+pgd_t invalid_pg_dir[PTRS_PER_PGD] __section(".bss..invalid_pg_dir");
+
+unsigned long __bootdata_preserved(s390_invalid_asce);
+
+unsigned long empty_zero_page, zero_page_mask;
+EXPORT_SYMBOL(empty_zero_page);
+EXPORT_SYMBOL(zero_page_mask);
+
+static void __init setup_zero_pages(void)
+{
+	unsigned int order;
+	struct page *page;
+	int i;
+
+	/* Latest machines require a mapping granularity of 512KB */
+	order = 7;
+
+	/* Limit number of empty zero pages for small memory sizes */
+	while (order > 2 && (totalram_pages() >> 10) < (1UL << order))
+		order--;
+
+	empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
+	if (!empty_zero_page)
+		panic("Out of memory in setup_zero_pages");
+
+	page = virt_to_page((void *) empty_zero_page);
+	split_page(page, order);
+	for (i = 1 << order; i > 0; i--) {
+		mark_page_reserved(page);
+		page++;
+	}
+
+	zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK;
+}
+
+/*
+ * paging_init() sets up the page tables
+ */
+void __init paging_init(void)
+{
+	unsigned long max_zone_pfns[MAX_NR_ZONES];
+
+	vmem_map_init();
+	sparse_init();
+	zone_dma_bits = 31;
+	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
+	max_zone_pfns[ZONE_DMA] = virt_to_pfn(MAX_DMA_ADDRESS);
+	max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
+	free_area_init(max_zone_pfns);
+}
+
+void mark_rodata_ro(void)
+{
+	unsigned long size = __end_ro_after_init - __start_ro_after_init;
+
+	__set_memory_ro(__start_ro_after_init, __end_ro_after_init);
+	pr_info("Write protected read-only-after-init data: %luk\n", size >> 10);
+	debug_checkwx();
+}
+
+int set_memory_encrypted(unsigned long vaddr, int numpages)
+{
+	int i;
+
+	/* make specified pages unshared, (swiotlb, dma_free) */
+	for (i = 0; i < numpages; ++i) {
+		uv_remove_shared(virt_to_phys((void *)vaddr));
+		vaddr += PAGE_SIZE;
+	}
+	return 0;
+}
+
+int set_memory_decrypted(unsigned long vaddr, int numpages)
+{
+	int i;
+	/* make specified pages shared (swiotlb, dma_alloca) */
+	for (i = 0; i < numpages; ++i) {
+		uv_set_shared(virt_to_phys((void *)vaddr));
+		vaddr += PAGE_SIZE;
+	}
+	return 0;
+}
+
+/* are we a protected virtualization guest? */
+bool force_dma_unencrypted(struct device *dev)
+{
+	return is_prot_virt_guest();
+}
+
+/* protected virtualization */
+static void pv_init(void)
+{
+	if (!is_prot_virt_guest())
+		return;
+
+	virtio_set_mem_acc_cb(virtio_require_restricted_mem_acc);
+
+	/* make sure bounce buffers are shared */
+	swiotlb_init(true, SWIOTLB_FORCE | SWIOTLB_VERBOSE);
+	swiotlb_update_mem_attributes();
+}
+
+void __init mem_init(void)
+{
+	cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask);
+	cpumask_set_cpu(0, mm_cpumask(&init_mm));
+
+	set_max_mapnr(max_low_pfn);
+        high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
+
+	pv_init();
+	kfence_split_mapping();
+	/* Setup guest page hinting */
+	cmma_init();
+
+	/* this will put all low memory onto the freelists */
+	memblock_free_all();
+	setup_zero_pages();	/* Setup zeroed pages. */
+
+	cmma_init_nodat();
+}
+
+void free_initmem(void)
+{
+	set_memory_rwnx((unsigned long)_sinittext,
+			(unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT);
+	free_initmem_default(POISON_FREE_INITMEM);
+}
+
+unsigned long memory_block_size_bytes(void)
+{
+	/*
+	 * Make sure the memory block size is always greater
+	 * or equal than the memory increment size.
+	 */
+	return max_t(unsigned long, MIN_MEMORY_BLOCK_SIZE, sclp.rzm);
+}
+
+unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
+EXPORT_SYMBOL(__per_cpu_offset);
+
+static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
+{
+	return LOCAL_DISTANCE;
+}
+
+static int __init pcpu_cpu_to_node(int cpu)
+{
+	return 0;
+}
+
+void __init setup_per_cpu_areas(void)
+{
+	unsigned long delta;
+	unsigned int cpu;
+	int rc;
+
+	/*
+	 * Always reserve area for module percpu variables.  That's
+	 * what the legacy allocator did.
+	 */
+	rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
+				    PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
+				    pcpu_cpu_distance,
+				    pcpu_cpu_to_node);
+	if (rc < 0)
+		panic("Failed to initialize percpu areas.");
+
+	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
+	for_each_possible_cpu(cpu)
+		__per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+
+#ifdef CONFIG_CMA
+
+/* Prevent memory blocks which contain cma regions from going offline */
+
+struct s390_cma_mem_data {
+	unsigned long start;
+	unsigned long end;
+};
+
+static int s390_cma_check_range(struct cma *cma, void *data)
+{
+	struct s390_cma_mem_data *mem_data;
+	unsigned long start, end;
+
+	mem_data = data;
+	start = cma_get_base(cma);
+	end = start + cma_get_size(cma);
+	if (end < mem_data->start)
+		return 0;
+	if (start >= mem_data->end)
+		return 0;
+	return -EBUSY;
+}
+
+static int s390_cma_mem_notifier(struct notifier_block *nb,
+				 unsigned long action, void *data)
+{
+	struct s390_cma_mem_data mem_data;
+	struct memory_notify *arg;
+	int rc = 0;
+
+	arg = data;
+	mem_data.start = arg->start_pfn << PAGE_SHIFT;
+	mem_data.end = mem_data.start + (arg->nr_pages << PAGE_SHIFT);
+	if (action == MEM_GOING_OFFLINE)
+		rc = cma_for_each_area(s390_cma_check_range, &mem_data);
+	return notifier_from_errno(rc);
+}
+
+static struct notifier_block s390_cma_mem_nb = {
+	.notifier_call = s390_cma_mem_notifier,
+};
+
+static int __init s390_cma_mem_init(void)
+{
+	return register_memory_notifier(&s390_cma_mem_nb);
+}
+device_initcall(s390_cma_mem_init);
+
+#endif /* CONFIG_CMA */
+
+int arch_add_memory(int nid, u64 start, u64 size,
+		    struct mhp_params *params)
+{
+	unsigned long start_pfn = PFN_DOWN(start);
+	unsigned long size_pages = PFN_DOWN(size);
+	int rc;
+
+	if (WARN_ON_ONCE(params->altmap))
+		return -EINVAL;
+
+	if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot))
+		return -EINVAL;
+
+	VM_BUG_ON(!mhp_range_allowed(start, size, true));
+	rc = vmem_add_mapping(start, size);
+	if (rc)
+		return rc;
+
+	rc = __add_pages(nid, start_pfn, size_pages, params);
+	if (rc)
+		vmem_remove_mapping(start, size);
+	return rc;
+}
+
+void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
+{
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long nr_pages = size >> PAGE_SHIFT;
+
+	__remove_pages(start_pfn, nr_pages, altmap);
+	vmem_remove_mapping(start, size);
+}
+#endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
new file mode 100644
index 0000000000..c805b3e259
--- /dev/null
+++ b/arch/s390/mm/maccess.c
@@ -0,0 +1,193 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Access kernel memory without faulting -- s390 specific implementation.
+ *
+ * Copyright IBM Corp. 2009, 2015
+ *
+ */
+
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/gfp.h>
+#include <linux/cpu.h>
+#include <linux/uio.h>
+#include <linux/io.h>
+#include <asm/asm-extable.h>
+#include <asm/ctl_reg.h>
+#include <asm/abs_lowcore.h>
+#include <asm/stacktrace.h>
+#include <asm/maccess.h>
+
+unsigned long __bootdata_preserved(__memcpy_real_area);
+pte_t *__bootdata_preserved(memcpy_real_ptep);
+static DEFINE_MUTEX(memcpy_real_mutex);
+
+static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t size)
+{
+	unsigned long aligned, offset, count;
+	char tmp[8];
+
+	aligned = (unsigned long) dst & ~7UL;
+	offset = (unsigned long) dst & 7UL;
+	size = min(8UL - offset, size);
+	count = size - 1;
+	asm volatile(
+		"	bras	1,0f\n"
+		"	mvc	0(1,%4),0(%5)\n"
+		"0:	mvc	0(8,%3),0(%0)\n"
+		"	ex	%1,0(1)\n"
+		"	lg	%1,0(%3)\n"
+		"	lra	%0,0(%0)\n"
+		"	sturg	%1,%0\n"
+		: "+&a" (aligned), "+&a" (count), "=m" (tmp)
+		: "a" (&tmp), "a" (&tmp[offset]), "a" (src)
+		: "cc", "memory", "1");
+	return size;
+}
+
+/*
+ * s390_kernel_write - write to kernel memory bypassing DAT
+ * @dst: destination address
+ * @src: source address
+ * @size: number of bytes to copy
+ *
+ * This function writes to kernel memory bypassing DAT and possible page table
+ * write protection. It writes to the destination using the sturg instruction.
+ * Therefore we have a read-modify-write sequence: the function reads eight
+ * bytes from destination at an eight byte boundary, modifies the bytes
+ * requested and writes the result back in a loop.
+ */
+static DEFINE_SPINLOCK(s390_kernel_write_lock);
+
+notrace void *s390_kernel_write(void *dst, const void *src, size_t size)
+{
+	void *tmp = dst;
+	unsigned long flags;
+	long copied;
+
+	spin_lock_irqsave(&s390_kernel_write_lock, flags);
+	while (size) {
+		copied = s390_kernel_write_odd(tmp, src, size);
+		tmp += copied;
+		src += copied;
+		size -= copied;
+	}
+	spin_unlock_irqrestore(&s390_kernel_write_lock, flags);
+
+	return dst;
+}
+
+size_t memcpy_real_iter(struct iov_iter *iter, unsigned long src, size_t count)
+{
+	size_t len, copied, res = 0;
+	unsigned long phys, offset;
+	void *chunk;
+	pte_t pte;
+
+	BUILD_BUG_ON(MEMCPY_REAL_SIZE != PAGE_SIZE);
+	while (count) {
+		phys = src & MEMCPY_REAL_MASK;
+		offset = src & ~MEMCPY_REAL_MASK;
+		chunk = (void *)(__memcpy_real_area + offset);
+		len = min(count, MEMCPY_REAL_SIZE - offset);
+		pte = mk_pte_phys(phys, PAGE_KERNEL_RO);
+
+		mutex_lock(&memcpy_real_mutex);
+		if (pte_val(pte) != pte_val(*memcpy_real_ptep)) {
+			__ptep_ipte(__memcpy_real_area, memcpy_real_ptep, 0, 0, IPTE_GLOBAL);
+			set_pte(memcpy_real_ptep, pte);
+		}
+		copied = copy_to_iter(chunk, len, iter);
+		mutex_unlock(&memcpy_real_mutex);
+
+		count -= copied;
+		src += copied;
+		res += copied;
+		if (copied < len)
+			break;
+	}
+	return res;
+}
+
+int memcpy_real(void *dest, unsigned long src, size_t count)
+{
+	struct iov_iter iter;
+	struct kvec kvec;
+
+	kvec.iov_base = dest;
+	kvec.iov_len = count;
+	iov_iter_kvec(&iter, ITER_DEST, &kvec, 1, count);
+	if (memcpy_real_iter(&iter, src, count) < count)
+		return -EFAULT;
+	return 0;
+}
+
+/*
+ * Find CPU that owns swapped prefix page
+ */
+static int get_swapped_owner(phys_addr_t addr)
+{
+	phys_addr_t lc;
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		lc = virt_to_phys(lowcore_ptr[cpu]);
+		if (addr > lc + sizeof(struct lowcore) - 1 || addr < lc)
+			continue;
+		return cpu;
+	}
+	return -1;
+}
+
+/*
+ * Convert a physical pointer for /dev/mem access
+ *
+ * For swapped prefix pages a new buffer is returned that contains a copy of
+ * the absolute memory. The buffer size is maximum one page large.
+ */
+void *xlate_dev_mem_ptr(phys_addr_t addr)
+{
+	void *ptr = phys_to_virt(addr);
+	void *bounce = ptr;
+	struct lowcore *abs_lc;
+	unsigned long size;
+	int this_cpu, cpu;
+
+	cpus_read_lock();
+	this_cpu = get_cpu();
+	if (addr >= sizeof(struct lowcore)) {
+		cpu = get_swapped_owner(addr);
+		if (cpu < 0)
+			goto out;
+	}
+	bounce = (void *)__get_free_page(GFP_ATOMIC);
+	if (!bounce)
+		goto out;
+	size = PAGE_SIZE - (addr & ~PAGE_MASK);
+	if (addr < sizeof(struct lowcore)) {
+		abs_lc = get_abs_lowcore();
+		ptr = (void *)abs_lc + addr;
+		memcpy(bounce, ptr, size);
+		put_abs_lowcore(abs_lc);
+	} else if (cpu == this_cpu) {
+		ptr = (void *)(addr - virt_to_phys(lowcore_ptr[cpu]));
+		memcpy(bounce, ptr, size);
+	} else {
+		memcpy(bounce, ptr, size);
+	}
+out:
+	put_cpu();
+	cpus_read_unlock();
+	return bounce;
+}
+
+/*
+ * Free converted buffer for /dev/mem access (if necessary)
+ */
+void unxlate_dev_mem_ptr(phys_addr_t addr, void *ptr)
+{
+	if (addr != virt_to_phys(ptr))
+		free_page((unsigned long)ptr);
+}
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
new file mode 100644
index 0000000000..fc9a7dc26c
--- /dev/null
+++ b/arch/s390/mm/mmap.c
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ *  flexible mmap layout support
+ *
+ * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
+ * All Rights Reserved.
+ *
+ * Started by Ingo Molnar <mingo@elte.hu>
+ */
+
+#include <linux/elf-randomize.h>
+#include <linux/personality.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
+#include <linux/random.h>
+#include <linux/compat.h>
+#include <linux/security.h>
+#include <asm/elf.h>
+
+static unsigned long stack_maxrandom_size(void)
+{
+	if (!(current->flags & PF_RANDOMIZE))
+		return 0;
+	return STACK_RND_MASK << PAGE_SHIFT;
+}
+
+static inline int mmap_is_legacy(struct rlimit *rlim_stack)
+{
+	if (current->personality & ADDR_COMPAT_LAYOUT)
+		return 1;
+	if (rlim_stack->rlim_cur == RLIM_INFINITY)
+		return 1;
+	return sysctl_legacy_va_layout;
+}
+
+unsigned long arch_mmap_rnd(void)
+{
+	return (get_random_u32() & MMAP_RND_MASK) << PAGE_SHIFT;
+}
+
+static unsigned long mmap_base_legacy(unsigned long rnd)
+{
+	return TASK_UNMAPPED_BASE + rnd;
+}
+
+static inline unsigned long mmap_base(unsigned long rnd,
+				      struct rlimit *rlim_stack)
+{
+	unsigned long gap = rlim_stack->rlim_cur;
+	unsigned long pad = stack_maxrandom_size() + stack_guard_gap;
+	unsigned long gap_min, gap_max;
+
+	/* Values close to RLIM_INFINITY can overflow. */
+	if (gap + pad > gap)
+		gap += pad;
+
+	/*
+	 * Top of mmap area (just below the process stack).
+	 * Leave at least a ~128 MB hole.
+	 */
+	gap_min = SZ_128M;
+	gap_max = (STACK_TOP / 6) * 5;
+
+	if (gap < gap_min)
+		gap = gap_min;
+	else if (gap > gap_max)
+		gap = gap_max;
+
+	return PAGE_ALIGN(STACK_TOP - gap - rnd);
+}
+
+unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
+				     unsigned long len, unsigned long pgoff,
+				     unsigned long flags)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	struct vm_unmapped_area_info info;
+
+	if (len > TASK_SIZE - mmap_min_addr)
+		return -ENOMEM;
+
+	if (flags & MAP_FIXED)
+		goto check_asce_limit;
+
+	if (addr) {
+		addr = PAGE_ALIGN(addr);
+		vma = find_vma(mm, addr);
+		if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
+		    (!vma || addr + len <= vm_start_gap(vma)))
+			goto check_asce_limit;
+	}
+
+	info.flags = 0;
+	info.length = len;
+	info.low_limit = mm->mmap_base;
+	info.high_limit = TASK_SIZE;
+	if (filp || (flags & MAP_SHARED))
+		info.align_mask = MMAP_ALIGN_MASK << PAGE_SHIFT;
+	else
+		info.align_mask = 0;
+	info.align_offset = pgoff << PAGE_SHIFT;
+	addr = vm_unmapped_area(&info);
+	if (offset_in_page(addr))
+		return addr;
+
+check_asce_limit:
+	return check_asce_limit(mm, addr, len);
+}
+
+unsigned long arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
+					     unsigned long len, unsigned long pgoff,
+					     unsigned long flags)
+{
+	struct vm_area_struct *vma;
+	struct mm_struct *mm = current->mm;
+	struct vm_unmapped_area_info info;
+
+	/* requested length too big for entire address space */
+	if (len > TASK_SIZE - mmap_min_addr)
+		return -ENOMEM;
+
+	if (flags & MAP_FIXED)
+		goto check_asce_limit;
+
+	/* requesting a specific address */
+	if (addr) {
+		addr = PAGE_ALIGN(addr);
+		vma = find_vma(mm, addr);
+		if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
+				(!vma || addr + len <= vm_start_gap(vma)))
+			goto check_asce_limit;
+	}
+
+	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+	info.length = len;
+	info.low_limit = PAGE_SIZE;
+	info.high_limit = mm->mmap_base;
+	if (filp || (flags & MAP_SHARED))
+		info.align_mask = MMAP_ALIGN_MASK << PAGE_SHIFT;
+	else
+		info.align_mask = 0;
+	info.align_offset = pgoff << PAGE_SHIFT;
+	addr = vm_unmapped_area(&info);
+
+	/*
+	 * A failed mmap() very likely causes application failure,
+	 * so fall back to the bottom-up function here. This scenario
+	 * can happen with large stack limits and large mmap()
+	 * allocations.
+	 */
+	if (offset_in_page(addr)) {
+		VM_BUG_ON(addr != -ENOMEM);
+		info.flags = 0;
+		info.low_limit = TASK_UNMAPPED_BASE;
+		info.high_limit = TASK_SIZE;
+		addr = vm_unmapped_area(&info);
+		if (offset_in_page(addr))
+			return addr;
+	}
+
+check_asce_limit:
+	return check_asce_limit(mm, addr, len);
+}
+
+/*
+ * This function, called very early during the creation of a new
+ * process VM image, sets up which VM layout function to use:
+ */
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
+{
+	unsigned long random_factor = 0UL;
+
+	if (current->flags & PF_RANDOMIZE)
+		random_factor = arch_mmap_rnd();
+
+	/*
+	 * Fall back to the standard layout if the personality
+	 * bit is set, or if the expected stack growth is unlimited:
+	 */
+	if (mmap_is_legacy(rlim_stack)) {
+		mm->mmap_base = mmap_base_legacy(random_factor);
+		mm->get_unmapped_area = arch_get_unmapped_area;
+	} else {
+		mm->mmap_base = mmap_base(random_factor, rlim_stack);
+		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+	}
+}
+
+static const pgprot_t protection_map[16] = {
+	[VM_NONE]					= PAGE_NONE,
+	[VM_READ]					= PAGE_RO,
+	[VM_WRITE]					= PAGE_RO,
+	[VM_WRITE | VM_READ]				= PAGE_RO,
+	[VM_EXEC]					= PAGE_RX,
+	[VM_EXEC | VM_READ]				= PAGE_RX,
+	[VM_EXEC | VM_WRITE]				= PAGE_RX,
+	[VM_EXEC | VM_WRITE | VM_READ]			= PAGE_RX,
+	[VM_SHARED]					= PAGE_NONE,
+	[VM_SHARED | VM_READ]				= PAGE_RO,
+	[VM_SHARED | VM_WRITE]				= PAGE_RW,
+	[VM_SHARED | VM_WRITE | VM_READ]		= PAGE_RW,
+	[VM_SHARED | VM_EXEC]				= PAGE_RX,
+	[VM_SHARED | VM_EXEC | VM_READ]			= PAGE_RX,
+	[VM_SHARED | VM_EXEC | VM_WRITE]		= PAGE_RWX,
+	[VM_SHARED | VM_EXEC | VM_WRITE | VM_READ]	= PAGE_RWX
+};
+DECLARE_VM_GET_PAGE_PROT
diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c
new file mode 100644
index 0000000000..79a037f49f
--- /dev/null
+++ b/arch/s390/mm/page-states.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2008
+ *
+ * Guest page hinting for unused pages.
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/memblock.h>
+#include <linux/gfp.h>
+#include <linux/init.h>
+#include <asm/asm-extable.h>
+#include <asm/facility.h>
+#include <asm/page-states.h>
+
+static int cmma_flag = 1;
+
+static int __init cmma(char *str)
+{
+	bool enabled;
+
+	if (!kstrtobool(str, &enabled))
+		cmma_flag = enabled;
+	return 1;
+}
+__setup("cmma=", cmma);
+
+static inline int cmma_test_essa(void)
+{
+	unsigned long tmp = 0;
+	int rc = -EOPNOTSUPP;
+
+	/* test ESSA_GET_STATE */
+	asm volatile(
+		"	.insn	rrf,0xb9ab0000,%[tmp],%[tmp],%[cmd],0\n"
+		"0:     la      %[rc],0\n"
+		"1:\n"
+		EX_TABLE(0b,1b)
+		: [rc] "+&d" (rc), [tmp] "+&d" (tmp)
+		: [cmd] "i" (ESSA_GET_STATE));
+	return rc;
+}
+
+void __init cmma_init(void)
+{
+	if (!cmma_flag)
+		return;
+	if (cmma_test_essa()) {
+		cmma_flag = 0;
+		return;
+	}
+	if (test_facility(147))
+		cmma_flag = 2;
+}
+
+static inline void set_page_unused(struct page *page, int order)
+{
+	int i, rc;
+
+	for (i = 0; i < (1 << order); i++)
+		asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
+			     : "=&d" (rc)
+			     : "a" (page_to_phys(page + i)),
+			       "i" (ESSA_SET_UNUSED));
+}
+
+static inline void set_page_stable_dat(struct page *page, int order)
+{
+	int i, rc;
+
+	for (i = 0; i < (1 << order); i++)
+		asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
+			     : "=&d" (rc)
+			     : "a" (page_to_phys(page + i)),
+			       "i" (ESSA_SET_STABLE));
+}
+
+static inline void set_page_stable_nodat(struct page *page, int order)
+{
+	int i, rc;
+
+	for (i = 0; i < (1 << order); i++)
+		asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
+			     : "=&d" (rc)
+			     : "a" (page_to_phys(page + i)),
+			       "i" (ESSA_SET_STABLE_NODAT));
+}
+
+static void mark_kernel_pmd(pud_t *pud, unsigned long addr, unsigned long end)
+{
+	unsigned long next;
+	struct page *page;
+	pmd_t *pmd;
+
+	pmd = pmd_offset(pud, addr);
+	do {
+		next = pmd_addr_end(addr, end);
+		if (pmd_none(*pmd) || pmd_large(*pmd))
+			continue;
+		page = phys_to_page(pmd_val(*pmd));
+		set_bit(PG_arch_1, &page->flags);
+	} while (pmd++, addr = next, addr != end);
+}
+
+static void mark_kernel_pud(p4d_t *p4d, unsigned long addr, unsigned long end)
+{
+	unsigned long next;
+	struct page *page;
+	pud_t *pud;
+	int i;
+
+	pud = pud_offset(p4d, addr);
+	do {
+		next = pud_addr_end(addr, end);
+		if (pud_none(*pud) || pud_large(*pud))
+			continue;
+		if (!pud_folded(*pud)) {
+			page = phys_to_page(pud_val(*pud));
+			for (i = 0; i < 4; i++)
+				set_bit(PG_arch_1, &page[i].flags);
+		}
+		mark_kernel_pmd(pud, addr, next);
+	} while (pud++, addr = next, addr != end);
+}
+
+static void mark_kernel_p4d(pgd_t *pgd, unsigned long addr, unsigned long end)
+{
+	unsigned long next;
+	struct page *page;
+	p4d_t *p4d;
+	int i;
+
+	p4d = p4d_offset(pgd, addr);
+	do {
+		next = p4d_addr_end(addr, end);
+		if (p4d_none(*p4d))
+			continue;
+		if (!p4d_folded(*p4d)) {
+			page = phys_to_page(p4d_val(*p4d));
+			for (i = 0; i < 4; i++)
+				set_bit(PG_arch_1, &page[i].flags);
+		}
+		mark_kernel_pud(p4d, addr, next);
+	} while (p4d++, addr = next, addr != end);
+}
+
+static void mark_kernel_pgd(void)
+{
+	unsigned long addr, next;
+	struct page *page;
+	pgd_t *pgd;
+	int i;
+
+	addr = 0;
+	pgd = pgd_offset_k(addr);
+	do {
+		next = pgd_addr_end(addr, MODULES_END);
+		if (pgd_none(*pgd))
+			continue;
+		if (!pgd_folded(*pgd)) {
+			page = phys_to_page(pgd_val(*pgd));
+			for (i = 0; i < 4; i++)
+				set_bit(PG_arch_1, &page[i].flags);
+		}
+		mark_kernel_p4d(pgd, addr, next);
+	} while (pgd++, addr = next, addr != MODULES_END);
+}
+
+void __init cmma_init_nodat(void)
+{
+	struct page *page;
+	unsigned long start, end, ix;
+	int i;
+
+	if (cmma_flag < 2)
+		return;
+	/* Mark pages used in kernel page tables */
+	mark_kernel_pgd();
+	page = virt_to_page(&swapper_pg_dir);
+	for (i = 0; i < 4; i++)
+		set_bit(PG_arch_1, &page[i].flags);
+	page = virt_to_page(&invalid_pg_dir);
+	for (i = 0; i < 4; i++)
+		set_bit(PG_arch_1, &page[i].flags);
+
+	/* Set all kernel pages not used for page tables to stable/no-dat */
+	for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) {
+		page = pfn_to_page(start);
+		for (ix = start; ix < end; ix++, page++) {
+			if (__test_and_clear_bit(PG_arch_1, &page->flags))
+				continue;	/* skip page table pages */
+			if (!list_empty(&page->lru))
+				continue;	/* skip free pages */
+			set_page_stable_nodat(page, 0);
+		}
+	}
+}
+
+void arch_free_page(struct page *page, int order)
+{
+	if (!cmma_flag)
+		return;
+	set_page_unused(page, order);
+}
+
+void arch_alloc_page(struct page *page, int order)
+{
+	if (!cmma_flag)
+		return;
+	if (cmma_flag < 2)
+		set_page_stable_dat(page, order);
+	else
+		set_page_stable_nodat(page, order);
+}
+
+void arch_set_page_dat(struct page *page, int order)
+{
+	if (!cmma_flag)
+		return;
+	set_page_stable_dat(page, order);
+}
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
new file mode 100644
index 0000000000..b87e96c64b
--- /dev/null
+++ b/arch/s390/mm/pageattr.c
@@ -0,0 +1,452 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2011
+ * Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+#include <linux/hugetlb.h>
+#include <linux/proc_fs.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <asm/cacheflush.h>
+#include <asm/facility.h>
+#include <asm/pgalloc.h>
+#include <asm/kfence.h>
+#include <asm/page.h>
+#include <asm/set_memory.h>
+
+static inline unsigned long sske_frame(unsigned long addr, unsigned char skey)
+{
+	asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],1,0"
+		     : [addr] "+a" (addr) : [skey] "d" (skey));
+	return addr;
+}
+
+void __storage_key_init_range(unsigned long start, unsigned long end)
+{
+	unsigned long boundary, size;
+
+	while (start < end) {
+		if (MACHINE_HAS_EDAT1) {
+			/* set storage keys for a 1MB frame */
+			size = 1UL << 20;
+			boundary = (start + size) & ~(size - 1);
+			if (boundary <= end) {
+				do {
+					start = sske_frame(start, PAGE_DEFAULT_KEY);
+				} while (start < boundary);
+				continue;
+			}
+		}
+		page_set_storage_key(start, PAGE_DEFAULT_KEY, 1);
+		start += PAGE_SIZE;
+	}
+}
+
+#ifdef CONFIG_PROC_FS
+atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]);
+
+void arch_report_meminfo(struct seq_file *m)
+{
+	seq_printf(m, "DirectMap4k:    %8lu kB\n",
+		   atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_4K]) << 2);
+	seq_printf(m, "DirectMap1M:    %8lu kB\n",
+		   atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_1M]) << 10);
+	seq_printf(m, "DirectMap2G:    %8lu kB\n",
+		   atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_2G]) << 21);
+}
+#endif /* CONFIG_PROC_FS */
+
+static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr,
+		    unsigned long dtt)
+{
+	unsigned long *table, mask;
+
+	mask = 0;
+	if (MACHINE_HAS_EDAT2) {
+		switch (dtt) {
+		case CRDTE_DTT_REGION3:
+			mask = ~(PTRS_PER_PUD * sizeof(pud_t) - 1);
+			break;
+		case CRDTE_DTT_SEGMENT:
+			mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1);
+			break;
+		case CRDTE_DTT_PAGE:
+			mask = ~(PTRS_PER_PTE * sizeof(pte_t) - 1);
+			break;
+		}
+		table = (unsigned long *)((unsigned long)old & mask);
+		crdte(*old, new, table, dtt, addr, S390_lowcore.kernel_asce);
+	} else if (MACHINE_HAS_IDTE) {
+		cspg(old, *old, new);
+	} else {
+		csp((unsigned int *)old + 1, *old, new);
+	}
+}
+
+static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end,
+			  unsigned long flags)
+{
+	pte_t *ptep, new;
+
+	if (flags == SET_MEMORY_4K)
+		return 0;
+	ptep = pte_offset_kernel(pmdp, addr);
+	do {
+		new = *ptep;
+		if (pte_none(new))
+			return -EINVAL;
+		if (flags & SET_MEMORY_RO)
+			new = pte_wrprotect(new);
+		else if (flags & SET_MEMORY_RW)
+			new = pte_mkwrite_novma(pte_mkdirty(new));
+		if (flags & SET_MEMORY_NX)
+			new = set_pte_bit(new, __pgprot(_PAGE_NOEXEC));
+		else if (flags & SET_MEMORY_X)
+			new = clear_pte_bit(new, __pgprot(_PAGE_NOEXEC));
+		if (flags & SET_MEMORY_INV) {
+			new = set_pte_bit(new, __pgprot(_PAGE_INVALID));
+		} else if (flags & SET_MEMORY_DEF) {
+			new = __pte(pte_val(new) & PAGE_MASK);
+			new = set_pte_bit(new, PAGE_KERNEL);
+			if (!MACHINE_HAS_NX)
+				new = clear_pte_bit(new, __pgprot(_PAGE_NOEXEC));
+		}
+		pgt_set((unsigned long *)ptep, pte_val(new), addr, CRDTE_DTT_PAGE);
+		ptep++;
+		addr += PAGE_SIZE;
+		cond_resched();
+	} while (addr < end);
+	return 0;
+}
+
+static int split_pmd_page(pmd_t *pmdp, unsigned long addr)
+{
+	unsigned long pte_addr, prot;
+	pte_t *pt_dir, *ptep;
+	pmd_t new;
+	int i, ro, nx;
+
+	pt_dir = vmem_pte_alloc();
+	if (!pt_dir)
+		return -ENOMEM;
+	pte_addr = pmd_pfn(*pmdp) << PAGE_SHIFT;
+	ro = !!(pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT);
+	nx = !!(pmd_val(*pmdp) & _SEGMENT_ENTRY_NOEXEC);
+	prot = pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL);
+	if (!nx)
+		prot &= ~_PAGE_NOEXEC;
+	ptep = pt_dir;
+	for (i = 0; i < PTRS_PER_PTE; i++) {
+		set_pte(ptep, __pte(pte_addr | prot));
+		pte_addr += PAGE_SIZE;
+		ptep++;
+	}
+	new = __pmd(__pa(pt_dir) | _SEGMENT_ENTRY);
+	pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT);
+	update_page_count(PG_DIRECT_MAP_4K, PTRS_PER_PTE);
+	update_page_count(PG_DIRECT_MAP_1M, -1);
+	return 0;
+}
+
+static void modify_pmd_page(pmd_t *pmdp, unsigned long addr,
+			    unsigned long flags)
+{
+	pmd_t new = *pmdp;
+
+	if (flags & SET_MEMORY_RO)
+		new = pmd_wrprotect(new);
+	else if (flags & SET_MEMORY_RW)
+		new = pmd_mkwrite_novma(pmd_mkdirty(new));
+	if (flags & SET_MEMORY_NX)
+		new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC));
+	else if (flags & SET_MEMORY_X)
+		new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC));
+	if (flags & SET_MEMORY_INV) {
+		new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID));
+	} else if (flags & SET_MEMORY_DEF) {
+		new = __pmd(pmd_val(new) & PMD_MASK);
+		new = set_pmd_bit(new, SEGMENT_KERNEL);
+		if (!MACHINE_HAS_NX)
+			new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC));
+	}
+	pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT);
+}
+
+static int walk_pmd_level(pud_t *pudp, unsigned long addr, unsigned long end,
+			  unsigned long flags)
+{
+	unsigned long next;
+	int need_split;
+	pmd_t *pmdp;
+	int rc = 0;
+
+	pmdp = pmd_offset(pudp, addr);
+	do {
+		if (pmd_none(*pmdp))
+			return -EINVAL;
+		next = pmd_addr_end(addr, end);
+		if (pmd_large(*pmdp)) {
+			need_split  = !!(flags & SET_MEMORY_4K);
+			need_split |= !!(addr & ~PMD_MASK);
+			need_split |= !!(addr + PMD_SIZE > next);
+			if (need_split) {
+				rc = split_pmd_page(pmdp, addr);
+				if (rc)
+					return rc;
+				continue;
+			}
+			modify_pmd_page(pmdp, addr, flags);
+		} else {
+			rc = walk_pte_level(pmdp, addr, next, flags);
+			if (rc)
+				return rc;
+		}
+		pmdp++;
+		addr = next;
+		cond_resched();
+	} while (addr < end);
+	return rc;
+}
+
+static int split_pud_page(pud_t *pudp, unsigned long addr)
+{
+	unsigned long pmd_addr, prot;
+	pmd_t *pm_dir, *pmdp;
+	pud_t new;
+	int i, ro, nx;
+
+	pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
+	if (!pm_dir)
+		return -ENOMEM;
+	pmd_addr = pud_pfn(*pudp) << PAGE_SHIFT;
+	ro = !!(pud_val(*pudp) & _REGION_ENTRY_PROTECT);
+	nx = !!(pud_val(*pudp) & _REGION_ENTRY_NOEXEC);
+	prot = pgprot_val(ro ? SEGMENT_KERNEL_RO : SEGMENT_KERNEL);
+	if (!nx)
+		prot &= ~_SEGMENT_ENTRY_NOEXEC;
+	pmdp = pm_dir;
+	for (i = 0; i < PTRS_PER_PMD; i++) {
+		set_pmd(pmdp, __pmd(pmd_addr | prot));
+		pmd_addr += PMD_SIZE;
+		pmdp++;
+	}
+	new = __pud(__pa(pm_dir) | _REGION3_ENTRY);
+	pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
+	update_page_count(PG_DIRECT_MAP_1M, PTRS_PER_PMD);
+	update_page_count(PG_DIRECT_MAP_2G, -1);
+	return 0;
+}
+
+static void modify_pud_page(pud_t *pudp, unsigned long addr,
+			    unsigned long flags)
+{
+	pud_t new = *pudp;
+
+	if (flags & SET_MEMORY_RO)
+		new = pud_wrprotect(new);
+	else if (flags & SET_MEMORY_RW)
+		new = pud_mkwrite(pud_mkdirty(new));
+	if (flags & SET_MEMORY_NX)
+		new = set_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC));
+	else if (flags & SET_MEMORY_X)
+		new = clear_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC));
+	if (flags & SET_MEMORY_INV) {
+		new = set_pud_bit(new, __pgprot(_REGION_ENTRY_INVALID));
+	} else if (flags & SET_MEMORY_DEF) {
+		new = __pud(pud_val(new) & PUD_MASK);
+		new = set_pud_bit(new, REGION3_KERNEL);
+		if (!MACHINE_HAS_NX)
+			new = clear_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC));
+	}
+	pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
+}
+
+static int walk_pud_level(p4d_t *p4d, unsigned long addr, unsigned long end,
+			  unsigned long flags)
+{
+	unsigned long next;
+	int need_split;
+	pud_t *pudp;
+	int rc = 0;
+
+	pudp = pud_offset(p4d, addr);
+	do {
+		if (pud_none(*pudp))
+			return -EINVAL;
+		next = pud_addr_end(addr, end);
+		if (pud_large(*pudp)) {
+			need_split  = !!(flags & SET_MEMORY_4K);
+			need_split |= !!(addr & ~PUD_MASK);
+			need_split |= !!(addr + PUD_SIZE > next);
+			if (need_split) {
+				rc = split_pud_page(pudp, addr);
+				if (rc)
+					break;
+				continue;
+			}
+			modify_pud_page(pudp, addr, flags);
+		} else {
+			rc = walk_pmd_level(pudp, addr, next, flags);
+		}
+		pudp++;
+		addr = next;
+		cond_resched();
+	} while (addr < end && !rc);
+	return rc;
+}
+
+static int walk_p4d_level(pgd_t *pgd, unsigned long addr, unsigned long end,
+			  unsigned long flags)
+{
+	unsigned long next;
+	p4d_t *p4dp;
+	int rc = 0;
+
+	p4dp = p4d_offset(pgd, addr);
+	do {
+		if (p4d_none(*p4dp))
+			return -EINVAL;
+		next = p4d_addr_end(addr, end);
+		rc = walk_pud_level(p4dp, addr, next, flags);
+		p4dp++;
+		addr = next;
+		cond_resched();
+	} while (addr < end && !rc);
+	return rc;
+}
+
+DEFINE_MUTEX(cpa_mutex);
+
+static int change_page_attr(unsigned long addr, unsigned long end,
+			    unsigned long flags)
+{
+	unsigned long next;
+	int rc = -EINVAL;
+	pgd_t *pgdp;
+
+	pgdp = pgd_offset_k(addr);
+	do {
+		if (pgd_none(*pgdp))
+			break;
+		next = pgd_addr_end(addr, end);
+		rc = walk_p4d_level(pgdp, addr, next, flags);
+		if (rc)
+			break;
+		cond_resched();
+	} while (pgdp++, addr = next, addr < end && !rc);
+	return rc;
+}
+
+static int change_page_attr_alias(unsigned long addr, unsigned long end,
+				  unsigned long flags)
+{
+	unsigned long alias, offset, va_start, va_end;
+	struct vm_struct *area;
+	int rc = 0;
+
+	/*
+	 * Changes to read-only permissions on kernel VA mappings are also
+	 * applied to the kernel direct mapping. Execute permissions are
+	 * intentionally not transferred to keep all allocated pages within
+	 * the direct mapping non-executable.
+	 */
+	flags &= SET_MEMORY_RO | SET_MEMORY_RW;
+	if (!flags)
+		return 0;
+	area = NULL;
+	while (addr < end) {
+		if (!area)
+			area = find_vm_area((void *)addr);
+		if (!area || !(area->flags & VM_ALLOC))
+			return 0;
+		va_start = (unsigned long)area->addr;
+		va_end = va_start + area->nr_pages * PAGE_SIZE;
+		offset = (addr - va_start) >> PAGE_SHIFT;
+		alias = (unsigned long)page_address(area->pages[offset]);
+		rc = change_page_attr(alias, alias + PAGE_SIZE, flags);
+		if (rc)
+			break;
+		addr += PAGE_SIZE;
+		if (addr >= va_end)
+			area = NULL;
+	}
+	return rc;
+}
+
+int __set_memory(unsigned long addr, unsigned long numpages, unsigned long flags)
+{
+	unsigned long end;
+	int rc;
+
+	if (!MACHINE_HAS_NX)
+		flags &= ~(SET_MEMORY_NX | SET_MEMORY_X);
+	if (!flags)
+		return 0;
+	if (!numpages)
+		return 0;
+	addr &= PAGE_MASK;
+	end = addr + numpages * PAGE_SIZE;
+	mutex_lock(&cpa_mutex);
+	rc = change_page_attr(addr, end, flags);
+	if (rc)
+		goto out;
+	rc = change_page_attr_alias(addr, end, flags);
+out:
+	mutex_unlock(&cpa_mutex);
+	return rc;
+}
+
+int set_direct_map_invalid_noflush(struct page *page)
+{
+	return __set_memory((unsigned long)page_to_virt(page), 1, SET_MEMORY_INV);
+}
+
+int set_direct_map_default_noflush(struct page *page)
+{
+	return __set_memory((unsigned long)page_to_virt(page), 1, SET_MEMORY_DEF);
+}
+
+#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
+
+static void ipte_range(pte_t *pte, unsigned long address, int nr)
+{
+	int i;
+
+	if (test_facility(13)) {
+		__ptep_ipte_range(address, nr - 1, pte, IPTE_GLOBAL);
+		return;
+	}
+	for (i = 0; i < nr; i++) {
+		__ptep_ipte(address, pte, 0, 0, IPTE_GLOBAL);
+		address += PAGE_SIZE;
+		pte++;
+	}
+}
+
+void __kernel_map_pages(struct page *page, int numpages, int enable)
+{
+	unsigned long address;
+	pte_t *ptep, pte;
+	int nr, i, j;
+
+	for (i = 0; i < numpages;) {
+		address = (unsigned long)page_to_virt(page + i);
+		ptep = virt_to_kpte(address);
+		nr = (unsigned long)ptep >> ilog2(sizeof(long));
+		nr = PTRS_PER_PTE - (nr & (PTRS_PER_PTE - 1));
+		nr = min(numpages - i, nr);
+		if (enable) {
+			for (j = 0; j < nr; j++) {
+				pte = clear_pte_bit(*ptep, __pgprot(_PAGE_INVALID));
+				set_pte(ptep, pte);
+				address += PAGE_SIZE;
+				ptep++;
+			}
+		} else {
+			ipte_range(ptep, address, nr);
+		}
+		i += nr;
+	}
+}
+
+#endif /* CONFIG_DEBUG_PAGEALLOC */
diff --git a/arch/s390/mm/pfault.c b/arch/s390/mm/pfault.c
new file mode 100644
index 0000000000..1aac13bb8f
--- /dev/null
+++ b/arch/s390/mm/pfault.c
@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 1999, 2023
+ */
+
+#include <linux/cpuhotplug.h>
+#include <linux/sched/task.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <asm/asm-extable.h>
+#include <asm/pfault.h>
+#include <asm/diag.h>
+
+#define __SUBCODE_MASK 0x0600
+#define __PF_RES_FIELD 0x8000000000000000UL
+
+/*
+ * 'pfault' pseudo page faults routines.
+ */
+static int pfault_disable;
+
+static int __init nopfault(char *str)
+{
+	pfault_disable = 1;
+	return 1;
+}
+early_param("nopfault", nopfault);
+
+struct pfault_refbk {
+	u16 refdiagc;
+	u16 reffcode;
+	u16 refdwlen;
+	u16 refversn;
+	u64 refgaddr;
+	u64 refselmk;
+	u64 refcmpmk;
+	u64 reserved;
+};
+
+static struct pfault_refbk pfault_init_refbk = {
+	.refdiagc = 0x258,
+	.reffcode = 0,
+	.refdwlen = 5,
+	.refversn = 2,
+	.refgaddr = __LC_LPP,
+	.refselmk = 1UL << 48,
+	.refcmpmk = 1UL << 48,
+	.reserved = __PF_RES_FIELD
+};
+
+int __pfault_init(void)
+{
+	int rc = -EOPNOTSUPP;
+
+	if (pfault_disable)
+		return rc;
+	diag_stat_inc(DIAG_STAT_X258);
+	asm volatile(
+		"	diag	%[refbk],%[rc],0x258\n"
+		"0:	nopr	%%r7\n"
+		EX_TABLE(0b, 0b)
+		: [rc] "+d" (rc)
+		: [refbk] "a" (&pfault_init_refbk), "m" (pfault_init_refbk)
+		: "cc");
+	return rc;
+}
+
+static struct pfault_refbk pfault_fini_refbk = {
+	.refdiagc = 0x258,
+	.reffcode = 1,
+	.refdwlen = 5,
+	.refversn = 2,
+};
+
+void __pfault_fini(void)
+{
+	if (pfault_disable)
+		return;
+	diag_stat_inc(DIAG_STAT_X258);
+	asm volatile(
+		"	diag	%[refbk],0,0x258\n"
+		"0:	nopr	%%r7\n"
+		EX_TABLE(0b, 0b)
+		:
+		: [refbk] "a" (&pfault_fini_refbk), "m" (pfault_fini_refbk)
+		: "cc");
+}
+
+static DEFINE_SPINLOCK(pfault_lock);
+static LIST_HEAD(pfault_list);
+
+#define PF_COMPLETE	0x0080
+
+/*
+ * The mechanism of our pfault code: if Linux is running as guest, runs a user
+ * space process and the user space process accesses a page that the host has
+ * paged out we get a pfault interrupt.
+ *
+ * This allows us, within the guest, to schedule a different process. Without
+ * this mechanism the host would have to suspend the whole virtual cpu until
+ * the page has been paged in.
+ *
+ * So when we get such an interrupt then we set the state of the current task
+ * to uninterruptible and also set the need_resched flag. Both happens within
+ * interrupt context(!). If we later on want to return to user space we
+ * recognize the need_resched flag and then call schedule().  It's not very
+ * obvious how this works...
+ *
+ * Of course we have a lot of additional fun with the completion interrupt (->
+ * host signals that a page of a process has been paged in and the process can
+ * continue to run). This interrupt can arrive on any cpu and, since we have
+ * virtual cpus, actually appear before the interrupt that signals that a page
+ * is missing.
+ */
+static void pfault_interrupt(struct ext_code ext_code,
+			     unsigned int param32, unsigned long param64)
+{
+	struct task_struct *tsk;
+	__u16 subcode;
+	pid_t pid;
+
+	/*
+	 * Get the external interruption subcode & pfault initial/completion
+	 * signal bit. VM stores this in the 'cpu address' field associated
+	 * with the external interrupt.
+	 */
+	subcode = ext_code.subcode;
+	if ((subcode & 0xff00) != __SUBCODE_MASK)
+		return;
+	inc_irq_stat(IRQEXT_PFL);
+	/* Get the token (= pid of the affected task). */
+	pid = param64 & LPP_PID_MASK;
+	rcu_read_lock();
+	tsk = find_task_by_pid_ns(pid, &init_pid_ns);
+	if (tsk)
+		get_task_struct(tsk);
+	rcu_read_unlock();
+	if (!tsk)
+		return;
+	spin_lock(&pfault_lock);
+	if (subcode & PF_COMPLETE) {
+		/* signal bit is set -> a page has been swapped in by VM */
+		if (tsk->thread.pfault_wait == 1) {
+			/*
+			 * Initial interrupt was faster than the completion
+			 * interrupt. pfault_wait is valid. Set pfault_wait
+			 * back to zero and wake up the process. This can
+			 * safely be done because the task is still sleeping
+			 * and can't produce new pfaults.
+			 */
+			tsk->thread.pfault_wait = 0;
+			list_del(&tsk->thread.list);
+			wake_up_process(tsk);
+			put_task_struct(tsk);
+		} else {
+			/*
+			 * Completion interrupt was faster than initial
+			 * interrupt. Set pfault_wait to -1 so the initial
+			 * interrupt doesn't put the task to sleep.
+			 * If the task is not running, ignore the completion
+			 * interrupt since it must be a leftover of a PFAULT
+			 * CANCEL operation which didn't remove all pending
+			 * completion interrupts.
+			 */
+			if (task_is_running(tsk))
+				tsk->thread.pfault_wait = -1;
+		}
+	} else {
+		/* signal bit not set -> a real page is missing. */
+		if (WARN_ON_ONCE(tsk != current))
+			goto out;
+		if (tsk->thread.pfault_wait == 1) {
+			/* Already on the list with a reference: put to sleep */
+			goto block;
+		} else if (tsk->thread.pfault_wait == -1) {
+			/*
+			 * Completion interrupt was faster than the initial
+			 * interrupt (pfault_wait == -1). Set pfault_wait
+			 * back to zero and exit.
+			 */
+			tsk->thread.pfault_wait = 0;
+		} else {
+			/*
+			 * Initial interrupt arrived before completion
+			 * interrupt. Let the task sleep.
+			 * An extra task reference is needed since a different
+			 * cpu may set the task state to TASK_RUNNING again
+			 * before the scheduler is reached.
+			 */
+			get_task_struct(tsk);
+			tsk->thread.pfault_wait = 1;
+			list_add(&tsk->thread.list, &pfault_list);
+block:
+			/*
+			 * Since this must be a userspace fault, there
+			 * is no kernel task state to trample. Rely on the
+			 * return to userspace schedule() to block.
+			 */
+			__set_current_state(TASK_UNINTERRUPTIBLE);
+			set_tsk_need_resched(tsk);
+			set_preempt_need_resched();
+		}
+	}
+out:
+	spin_unlock(&pfault_lock);
+	put_task_struct(tsk);
+}
+
+static int pfault_cpu_dead(unsigned int cpu)
+{
+	struct thread_struct *thread, *next;
+	struct task_struct *tsk;
+
+	spin_lock_irq(&pfault_lock);
+	list_for_each_entry_safe(thread, next, &pfault_list, list) {
+		thread->pfault_wait = 0;
+		list_del(&thread->list);
+		tsk = container_of(thread, struct task_struct, thread);
+		wake_up_process(tsk);
+		put_task_struct(tsk);
+	}
+	spin_unlock_irq(&pfault_lock);
+	return 0;
+}
+
+static int __init pfault_irq_init(void)
+{
+	int rc;
+
+	rc = register_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt);
+	if (rc)
+		goto out_extint;
+	rc = pfault_init() == 0 ? 0 : -EOPNOTSUPP;
+	if (rc)
+		goto out_pfault;
+	irq_subclass_register(IRQ_SUBCLASS_SERVICE_SIGNAL);
+	cpuhp_setup_state_nocalls(CPUHP_S390_PFAULT_DEAD, "s390/pfault:dead",
+				  NULL, pfault_cpu_dead);
+	return 0;
+
+out_pfault:
+	unregister_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt);
+out_extint:
+	pfault_disable = 1;
+	return rc;
+}
+early_initcall(pfault_irq_init);
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
new file mode 100644
index 0000000000..6396d6b06a
--- /dev/null
+++ b/arch/s390/mm/pgalloc.c
@@ -0,0 +1,757 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Page table allocation functions
+ *
+ *    Copyright IBM Corp. 2016
+ *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/sysctl.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <asm/mmu_context.h>
+#include <asm/pgalloc.h>
+#include <asm/gmap.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+
+#ifdef CONFIG_PGSTE
+
+int page_table_allocate_pgste = 0;
+EXPORT_SYMBOL(page_table_allocate_pgste);
+
+static struct ctl_table page_table_sysctl[] = {
+	{
+		.procname	= "allocate_pgste",
+		.data		= &page_table_allocate_pgste,
+		.maxlen		= sizeof(int),
+		.mode		= S_IRUGO | S_IWUSR,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+	{ }
+};
+
+static int __init page_table_register_sysctl(void)
+{
+	return register_sysctl("vm", page_table_sysctl) ? 0 : -ENOMEM;
+}
+__initcall(page_table_register_sysctl);
+
+#endif /* CONFIG_PGSTE */
+
+unsigned long *crst_table_alloc(struct mm_struct *mm)
+{
+	struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, CRST_ALLOC_ORDER);
+
+	if (!ptdesc)
+		return NULL;
+	arch_set_page_dat(ptdesc_page(ptdesc), CRST_ALLOC_ORDER);
+	return (unsigned long *) ptdesc_to_virt(ptdesc);
+}
+
+void crst_table_free(struct mm_struct *mm, unsigned long *table)
+{
+	pagetable_free(virt_to_ptdesc(table));
+}
+
+static void __crst_table_upgrade(void *arg)
+{
+	struct mm_struct *mm = arg;
+
+	/* change all active ASCEs to avoid the creation of new TLBs */
+	if (current->active_mm == mm) {
+		S390_lowcore.user_asce = mm->context.asce;
+		__ctl_load(S390_lowcore.user_asce, 7, 7);
+	}
+	__tlb_flush_local();
+}
+
+int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
+{
+	unsigned long *pgd = NULL, *p4d = NULL, *__pgd;
+	unsigned long asce_limit = mm->context.asce_limit;
+
+	/* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */
+	VM_BUG_ON(asce_limit < _REGION2_SIZE);
+
+	if (end <= asce_limit)
+		return 0;
+
+	if (asce_limit == _REGION2_SIZE) {
+		p4d = crst_table_alloc(mm);
+		if (unlikely(!p4d))
+			goto err_p4d;
+		crst_table_init(p4d, _REGION2_ENTRY_EMPTY);
+	}
+	if (end > _REGION1_SIZE) {
+		pgd = crst_table_alloc(mm);
+		if (unlikely(!pgd))
+			goto err_pgd;
+		crst_table_init(pgd, _REGION1_ENTRY_EMPTY);
+	}
+
+	spin_lock_bh(&mm->page_table_lock);
+
+	/*
+	 * This routine gets called with mmap_lock lock held and there is
+	 * no reason to optimize for the case of otherwise. However, if
+	 * that would ever change, the below check will let us know.
+	 */
+	VM_BUG_ON(asce_limit != mm->context.asce_limit);
+
+	if (p4d) {
+		__pgd = (unsigned long *) mm->pgd;
+		p4d_populate(mm, (p4d_t *) p4d, (pud_t *) __pgd);
+		mm->pgd = (pgd_t *) p4d;
+		mm->context.asce_limit = _REGION1_SIZE;
+		mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+			_ASCE_USER_BITS | _ASCE_TYPE_REGION2;
+		mm_inc_nr_puds(mm);
+	}
+	if (pgd) {
+		__pgd = (unsigned long *) mm->pgd;
+		pgd_populate(mm, (pgd_t *) pgd, (p4d_t *) __pgd);
+		mm->pgd = (pgd_t *) pgd;
+		mm->context.asce_limit = TASK_SIZE_MAX;
+		mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+			_ASCE_USER_BITS | _ASCE_TYPE_REGION1;
+	}
+
+	spin_unlock_bh(&mm->page_table_lock);
+
+	on_each_cpu(__crst_table_upgrade, mm, 0);
+
+	return 0;
+
+err_pgd:
+	crst_table_free(mm, p4d);
+err_p4d:
+	return -ENOMEM;
+}
+
+static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
+{
+	return atomic_fetch_xor(bits, v) ^ bits;
+}
+
+#ifdef CONFIG_PGSTE
+
+struct page *page_table_alloc_pgste(struct mm_struct *mm)
+{
+	struct ptdesc *ptdesc;
+	u64 *table;
+
+	ptdesc = pagetable_alloc(GFP_KERNEL, 0);
+	if (ptdesc) {
+		table = (u64 *)ptdesc_to_virt(ptdesc);
+		arch_set_page_dat(virt_to_page(table), 0);
+		memset64(table, _PAGE_INVALID, PTRS_PER_PTE);
+		memset64(table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
+	}
+	return ptdesc_page(ptdesc);
+}
+
+void page_table_free_pgste(struct page *page)
+{
+	pagetable_free(page_ptdesc(page));
+}
+
+#endif /* CONFIG_PGSTE */
+
+/*
+ * A 2KB-pgtable is either upper or lower half of a normal page.
+ * The second half of the page may be unused or used as another
+ * 2KB-pgtable.
+ *
+ * Whenever possible the parent page for a new 2KB-pgtable is picked
+ * from the list of partially allocated pages mm_context_t::pgtable_list.
+ * In case the list is empty a new parent page is allocated and added to
+ * the list.
+ *
+ * When a parent page gets fully allocated it contains 2KB-pgtables in both
+ * upper and lower halves and is removed from mm_context_t::pgtable_list.
+ *
+ * When 2KB-pgtable is freed from to fully allocated parent page that
+ * page turns partially allocated and added to mm_context_t::pgtable_list.
+ *
+ * If 2KB-pgtable is freed from the partially allocated parent page that
+ * page turns unused and gets removed from mm_context_t::pgtable_list.
+ * Furthermore, the unused parent page is released.
+ *
+ * As follows from the above, no unallocated or fully allocated parent
+ * pages are contained in mm_context_t::pgtable_list.
+ *
+ * The upper byte (bits 24-31) of the parent page _refcount is used
+ * for tracking contained 2KB-pgtables and has the following format:
+ *
+ *   PP  AA
+ * 01234567    upper byte (bits 24-31) of struct page::_refcount
+ *   ||  ||
+ *   ||  |+--- upper 2KB-pgtable is allocated
+ *   ||  +---- lower 2KB-pgtable is allocated
+ *   |+------- upper 2KB-pgtable is pending for removal
+ *   +-------- lower 2KB-pgtable is pending for removal
+ *
+ * (See commit 620b4e903179 ("s390: use _refcount for pgtables") on why
+ * using _refcount is possible).
+ *
+ * When 2KB-pgtable is allocated the corresponding AA bit is set to 1.
+ * The parent page is either:
+ *   - added to mm_context_t::pgtable_list in case the second half of the
+ *     parent page is still unallocated;
+ *   - removed from mm_context_t::pgtable_list in case both hales of the
+ *     parent page are allocated;
+ * These operations are protected with mm_context_t::lock.
+ *
+ * When 2KB-pgtable is deallocated the corresponding AA bit is set to 0
+ * and the corresponding PP bit is set to 1 in a single atomic operation.
+ * Thus, PP and AA bits corresponding to the same 2KB-pgtable are mutually
+ * exclusive and may never be both set to 1!
+ * The parent page is either:
+ *   - added to mm_context_t::pgtable_list in case the second half of the
+ *     parent page is still allocated;
+ *   - removed from mm_context_t::pgtable_list in case the second half of
+ *     the parent page is unallocated;
+ * These operations are protected with mm_context_t::lock.
+ *
+ * It is important to understand that mm_context_t::lock only protects
+ * mm_context_t::pgtable_list and AA bits, but not the parent page itself
+ * and PP bits.
+ *
+ * Releasing the parent page happens whenever the PP bit turns from 1 to 0,
+ * while both AA bits and the second PP bit are already unset. Then the
+ * parent page does not contain any 2KB-pgtable fragment anymore, and it has
+ * also been removed from mm_context_t::pgtable_list. It is safe to release
+ * the page therefore.
+ *
+ * PGSTE memory spaces use full 4KB-pgtables and do not need most of the
+ * logic described above. Both AA bits are set to 1 to denote a 4KB-pgtable
+ * while the PP bits are never used, nor such a page is added to or removed
+ * from mm_context_t::pgtable_list.
+ *
+ * pte_free_defer() overrides those rules: it takes the page off pgtable_list,
+ * and prevents both 2K fragments from being reused. pte_free_defer() has to
+ * guarantee that its pgtable cannot be reused before the RCU grace period
+ * has elapsed (which page_table_free_rcu() does not actually guarantee).
+ * But for simplicity, because page->rcu_head overlays page->lru, and because
+ * the RCU callback might not be called before the mm_context_t has been freed,
+ * pte_free_defer() in this implementation prevents both fragments from being
+ * reused, and delays making the call to RCU until both fragments are freed.
+ */
+unsigned long *page_table_alloc(struct mm_struct *mm)
+{
+	unsigned long *table;
+	struct ptdesc *ptdesc;
+	unsigned int mask, bit;
+
+	/* Try to get a fragment of a 4K page as a 2K page table */
+	if (!mm_alloc_pgste(mm)) {
+		table = NULL;
+		spin_lock_bh(&mm->context.lock);
+		if (!list_empty(&mm->context.pgtable_list)) {
+			ptdesc = list_first_entry(&mm->context.pgtable_list,
+						struct ptdesc, pt_list);
+			mask = atomic_read(&ptdesc->_refcount) >> 24;
+			/*
+			 * The pending removal bits must also be checked.
+			 * Failure to do so might lead to an impossible
+			 * value of (i.e 0x13 or 0x23) written to _refcount.
+			 * Such values violate the assumption that pending and
+			 * allocation bits are mutually exclusive, and the rest
+			 * of the code unrails as result. That could lead to
+			 * a whole bunch of races and corruptions.
+			 */
+			mask = (mask | (mask >> 4)) & 0x03U;
+			if (mask != 0x03U) {
+				table = (unsigned long *) ptdesc_to_virt(ptdesc);
+				bit = mask & 1;		/* =1 -> second 2K */
+				if (bit)
+					table += PTRS_PER_PTE;
+				atomic_xor_bits(&ptdesc->_refcount,
+							0x01U << (bit + 24));
+				list_del_init(&ptdesc->pt_list);
+			}
+		}
+		spin_unlock_bh(&mm->context.lock);
+		if (table)
+			return table;
+	}
+	/* Allocate a fresh page */
+	ptdesc = pagetable_alloc(GFP_KERNEL, 0);
+	if (!ptdesc)
+		return NULL;
+	if (!pagetable_pte_ctor(ptdesc)) {
+		pagetable_free(ptdesc);
+		return NULL;
+	}
+	arch_set_page_dat(ptdesc_page(ptdesc), 0);
+	/* Initialize page table */
+	table = (unsigned long *) ptdesc_to_virt(ptdesc);
+	if (mm_alloc_pgste(mm)) {
+		/* Return 4K page table with PGSTEs */
+		INIT_LIST_HEAD(&ptdesc->pt_list);
+		atomic_xor_bits(&ptdesc->_refcount, 0x03U << 24);
+		memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE);
+		memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
+	} else {
+		/* Return the first 2K fragment of the page */
+		atomic_xor_bits(&ptdesc->_refcount, 0x01U << 24);
+		memset64((u64 *)table, _PAGE_INVALID, 2 * PTRS_PER_PTE);
+		spin_lock_bh(&mm->context.lock);
+		list_add(&ptdesc->pt_list, &mm->context.pgtable_list);
+		spin_unlock_bh(&mm->context.lock);
+	}
+	return table;
+}
+
+static void page_table_release_check(struct page *page, void *table,
+				     unsigned int half, unsigned int mask)
+{
+	char msg[128];
+
+	if (!IS_ENABLED(CONFIG_DEBUG_VM))
+		return;
+	if (!mask && list_empty(&page->lru))
+		return;
+	snprintf(msg, sizeof(msg),
+		 "Invalid pgtable %p release half 0x%02x mask 0x%02x",
+		 table, half, mask);
+	dump_page(page, msg);
+}
+
+static void pte_free_now(struct rcu_head *head)
+{
+	struct ptdesc *ptdesc;
+
+	ptdesc = container_of(head, struct ptdesc, pt_rcu_head);
+	pagetable_pte_dtor(ptdesc);
+	pagetable_free(ptdesc);
+}
+
+void page_table_free(struct mm_struct *mm, unsigned long *table)
+{
+	unsigned int mask, bit, half;
+	struct ptdesc *ptdesc = virt_to_ptdesc(table);
+
+	if (!mm_alloc_pgste(mm)) {
+		/* Free 2K page table fragment of a 4K page */
+		bit = ((unsigned long) table & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
+		spin_lock_bh(&mm->context.lock);
+		/*
+		 * Mark the page for delayed release. The actual release
+		 * will happen outside of the critical section from this
+		 * function or from __tlb_remove_table()
+		 */
+		mask = atomic_xor_bits(&ptdesc->_refcount, 0x11U << (bit + 24));
+		mask >>= 24;
+		if ((mask & 0x03U) && !folio_test_active(ptdesc_folio(ptdesc))) {
+			/*
+			 * Other half is allocated, and neither half has had
+			 * its free deferred: add page to head of list, to make
+			 * this freed half available for immediate reuse.
+			 */
+			list_add(&ptdesc->pt_list, &mm->context.pgtable_list);
+		} else {
+			/* If page is on list, now remove it. */
+			list_del_init(&ptdesc->pt_list);
+		}
+		spin_unlock_bh(&mm->context.lock);
+		mask = atomic_xor_bits(&ptdesc->_refcount, 0x10U << (bit + 24));
+		mask >>= 24;
+		if (mask != 0x00U)
+			return;
+		half = 0x01U << bit;
+	} else {
+		half = 0x03U;
+		mask = atomic_xor_bits(&ptdesc->_refcount, 0x03U << 24);
+		mask >>= 24;
+	}
+
+	page_table_release_check(ptdesc_page(ptdesc), table, half, mask);
+	if (folio_test_clear_active(ptdesc_folio(ptdesc)))
+		call_rcu(&ptdesc->pt_rcu_head, pte_free_now);
+	else
+		pte_free_now(&ptdesc->pt_rcu_head);
+}
+
+void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
+			 unsigned long vmaddr)
+{
+	struct mm_struct *mm;
+	unsigned int bit, mask;
+	struct ptdesc *ptdesc = virt_to_ptdesc(table);
+
+	mm = tlb->mm;
+	if (mm_alloc_pgste(mm)) {
+		gmap_unlink(mm, table, vmaddr);
+		table = (unsigned long *) ((unsigned long)table | 0x03U);
+		tlb_remove_ptdesc(tlb, table);
+		return;
+	}
+	bit = ((unsigned long) table & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
+	spin_lock_bh(&mm->context.lock);
+	/*
+	 * Mark the page for delayed release. The actual release will happen
+	 * outside of the critical section from __tlb_remove_table() or from
+	 * page_table_free()
+	 */
+	mask = atomic_xor_bits(&ptdesc->_refcount, 0x11U << (bit + 24));
+	mask >>= 24;
+	if ((mask & 0x03U) && !folio_test_active(ptdesc_folio(ptdesc))) {
+		/*
+		 * Other half is allocated, and neither half has had
+		 * its free deferred: add page to end of list, to make
+		 * this freed half available for reuse once its pending
+		 * bit has been cleared by __tlb_remove_table().
+		 */
+		list_add_tail(&ptdesc->pt_list, &mm->context.pgtable_list);
+	} else {
+		/* If page is on list, now remove it. */
+		list_del_init(&ptdesc->pt_list);
+	}
+	spin_unlock_bh(&mm->context.lock);
+	table = (unsigned long *) ((unsigned long) table | (0x01U << bit));
+	tlb_remove_table(tlb, table);
+}
+
+void __tlb_remove_table(void *_table)
+{
+	unsigned int mask = (unsigned long) _table & 0x03U, half = mask;
+	void *table = (void *)((unsigned long) _table ^ mask);
+	struct ptdesc *ptdesc = virt_to_ptdesc(table);
+
+	switch (half) {
+	case 0x00U:	/* pmd, pud, or p4d */
+		pagetable_free(ptdesc);
+		return;
+	case 0x01U:	/* lower 2K of a 4K page table */
+	case 0x02U:	/* higher 2K of a 4K page table */
+		mask = atomic_xor_bits(&ptdesc->_refcount, mask << (4 + 24));
+		mask >>= 24;
+		if (mask != 0x00U)
+			return;
+		break;
+	case 0x03U:	/* 4K page table with pgstes */
+		mask = atomic_xor_bits(&ptdesc->_refcount, 0x03U << 24);
+		mask >>= 24;
+		break;
+	}
+
+	page_table_release_check(ptdesc_page(ptdesc), table, half, mask);
+	if (folio_test_clear_active(ptdesc_folio(ptdesc)))
+		call_rcu(&ptdesc->pt_rcu_head, pte_free_now);
+	else
+		pte_free_now(&ptdesc->pt_rcu_head);
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable)
+{
+	struct page *page;
+
+	page = virt_to_page(pgtable);
+	SetPageActive(page);
+	page_table_free(mm, (unsigned long *)pgtable);
+	/*
+	 * page_table_free() does not do the pgste gmap_unlink() which
+	 * page_table_free_rcu() does: warn us if pgste ever reaches here.
+	 */
+	WARN_ON_ONCE(mm_has_pgste(mm));
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+/*
+ * Base infrastructure required to generate basic asces, region, segment,
+ * and page tables that do not make use of enhanced features like EDAT1.
+ */
+
+static struct kmem_cache *base_pgt_cache;
+
+static unsigned long *base_pgt_alloc(void)
+{
+	unsigned long *table;
+
+	table = kmem_cache_alloc(base_pgt_cache, GFP_KERNEL);
+	if (table)
+		memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE);
+	return table;
+}
+
+static void base_pgt_free(unsigned long *table)
+{
+	kmem_cache_free(base_pgt_cache, table);
+}
+
+static unsigned long *base_crst_alloc(unsigned long val)
+{
+	unsigned long *table;
+	struct ptdesc *ptdesc;
+
+	ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM, CRST_ALLOC_ORDER);
+	if (!ptdesc)
+		return NULL;
+	table = ptdesc_address(ptdesc);
+
+	crst_table_init(table, val);
+	return table;
+}
+
+static void base_crst_free(unsigned long *table)
+{
+	pagetable_free(virt_to_ptdesc(table));
+}
+
+#define BASE_ADDR_END_FUNC(NAME, SIZE)					\
+static inline unsigned long base_##NAME##_addr_end(unsigned long addr,	\
+						   unsigned long end)	\
+{									\
+	unsigned long next = (addr + (SIZE)) & ~((SIZE) - 1);		\
+									\
+	return (next - 1) < (end - 1) ? next : end;			\
+}
+
+BASE_ADDR_END_FUNC(page,    _PAGE_SIZE)
+BASE_ADDR_END_FUNC(segment, _SEGMENT_SIZE)
+BASE_ADDR_END_FUNC(region3, _REGION3_SIZE)
+BASE_ADDR_END_FUNC(region2, _REGION2_SIZE)
+BASE_ADDR_END_FUNC(region1, _REGION1_SIZE)
+
+static inline unsigned long base_lra(unsigned long address)
+{
+	unsigned long real;
+
+	asm volatile(
+		"	lra	%0,0(%1)\n"
+		: "=d" (real) : "a" (address) : "cc");
+	return real;
+}
+
+static int base_page_walk(unsigned long *origin, unsigned long addr,
+			  unsigned long end, int alloc)
+{
+	unsigned long *pte, next;
+
+	if (!alloc)
+		return 0;
+	pte = origin;
+	pte += (addr & _PAGE_INDEX) >> _PAGE_SHIFT;
+	do {
+		next = base_page_addr_end(addr, end);
+		*pte = base_lra(addr);
+	} while (pte++, addr = next, addr < end);
+	return 0;
+}
+
+static int base_segment_walk(unsigned long *origin, unsigned long addr,
+			     unsigned long end, int alloc)
+{
+	unsigned long *ste, next, *table;
+	int rc;
+
+	ste = origin;
+	ste += (addr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
+	do {
+		next = base_segment_addr_end(addr, end);
+		if (*ste & _SEGMENT_ENTRY_INVALID) {
+			if (!alloc)
+				continue;
+			table = base_pgt_alloc();
+			if (!table)
+				return -ENOMEM;
+			*ste = __pa(table) | _SEGMENT_ENTRY;
+		}
+		table = __va(*ste & _SEGMENT_ENTRY_ORIGIN);
+		rc = base_page_walk(table, addr, next, alloc);
+		if (rc)
+			return rc;
+		if (!alloc)
+			base_pgt_free(table);
+		cond_resched();
+	} while (ste++, addr = next, addr < end);
+	return 0;
+}
+
+static int base_region3_walk(unsigned long *origin, unsigned long addr,
+			     unsigned long end, int alloc)
+{
+	unsigned long *rtte, next, *table;
+	int rc;
+
+	rtte = origin;
+	rtte += (addr & _REGION3_INDEX) >> _REGION3_SHIFT;
+	do {
+		next = base_region3_addr_end(addr, end);
+		if (*rtte & _REGION_ENTRY_INVALID) {
+			if (!alloc)
+				continue;
+			table = base_crst_alloc(_SEGMENT_ENTRY_EMPTY);
+			if (!table)
+				return -ENOMEM;
+			*rtte = __pa(table) | _REGION3_ENTRY;
+		}
+		table = __va(*rtte & _REGION_ENTRY_ORIGIN);
+		rc = base_segment_walk(table, addr, next, alloc);
+		if (rc)
+			return rc;
+		if (!alloc)
+			base_crst_free(table);
+	} while (rtte++, addr = next, addr < end);
+	return 0;
+}
+
+static int base_region2_walk(unsigned long *origin, unsigned long addr,
+			     unsigned long end, int alloc)
+{
+	unsigned long *rste, next, *table;
+	int rc;
+
+	rste = origin;
+	rste += (addr & _REGION2_INDEX) >> _REGION2_SHIFT;
+	do {
+		next = base_region2_addr_end(addr, end);
+		if (*rste & _REGION_ENTRY_INVALID) {
+			if (!alloc)
+				continue;
+			table = base_crst_alloc(_REGION3_ENTRY_EMPTY);
+			if (!table)
+				return -ENOMEM;
+			*rste = __pa(table) | _REGION2_ENTRY;
+		}
+		table = __va(*rste & _REGION_ENTRY_ORIGIN);
+		rc = base_region3_walk(table, addr, next, alloc);
+		if (rc)
+			return rc;
+		if (!alloc)
+			base_crst_free(table);
+	} while (rste++, addr = next, addr < end);
+	return 0;
+}
+
+static int base_region1_walk(unsigned long *origin, unsigned long addr,
+			     unsigned long end, int alloc)
+{
+	unsigned long *rfte, next, *table;
+	int rc;
+
+	rfte = origin;
+	rfte += (addr & _REGION1_INDEX) >> _REGION1_SHIFT;
+	do {
+		next = base_region1_addr_end(addr, end);
+		if (*rfte & _REGION_ENTRY_INVALID) {
+			if (!alloc)
+				continue;
+			table = base_crst_alloc(_REGION2_ENTRY_EMPTY);
+			if (!table)
+				return -ENOMEM;
+			*rfte = __pa(table) | _REGION1_ENTRY;
+		}
+		table = __va(*rfte & _REGION_ENTRY_ORIGIN);
+		rc = base_region2_walk(table, addr, next, alloc);
+		if (rc)
+			return rc;
+		if (!alloc)
+			base_crst_free(table);
+	} while (rfte++, addr = next, addr < end);
+	return 0;
+}
+
+/**
+ * base_asce_free - free asce and tables returned from base_asce_alloc()
+ * @asce: asce to be freed
+ *
+ * Frees all region, segment, and page tables that were allocated with a
+ * corresponding base_asce_alloc() call.
+ */
+void base_asce_free(unsigned long asce)
+{
+	unsigned long *table = __va(asce & _ASCE_ORIGIN);
+
+	if (!asce)
+		return;
+	switch (asce & _ASCE_TYPE_MASK) {
+	case _ASCE_TYPE_SEGMENT:
+		base_segment_walk(table, 0, _REGION3_SIZE, 0);
+		break;
+	case _ASCE_TYPE_REGION3:
+		base_region3_walk(table, 0, _REGION2_SIZE, 0);
+		break;
+	case _ASCE_TYPE_REGION2:
+		base_region2_walk(table, 0, _REGION1_SIZE, 0);
+		break;
+	case _ASCE_TYPE_REGION1:
+		base_region1_walk(table, 0, TASK_SIZE_MAX, 0);
+		break;
+	}
+	base_crst_free(table);
+}
+
+static int base_pgt_cache_init(void)
+{
+	static DEFINE_MUTEX(base_pgt_cache_mutex);
+	unsigned long sz = _PAGE_TABLE_SIZE;
+
+	if (base_pgt_cache)
+		return 0;
+	mutex_lock(&base_pgt_cache_mutex);
+	if (!base_pgt_cache)
+		base_pgt_cache = kmem_cache_create("base_pgt", sz, sz, 0, NULL);
+	mutex_unlock(&base_pgt_cache_mutex);
+	return base_pgt_cache ? 0 : -ENOMEM;
+}
+
+/**
+ * base_asce_alloc - create kernel mapping without enhanced DAT features
+ * @addr: virtual start address of kernel mapping
+ * @num_pages: number of consecutive pages
+ *
+ * Generate an asce, including all required region, segment and page tables,
+ * that can be used to access the virtual kernel mapping. The difference is
+ * that the returned asce does not make use of any enhanced DAT features like
+ * e.g. large pages. This is required for some I/O functions that pass an
+ * asce, like e.g. some service call requests.
+ *
+ * Note: the returned asce may NEVER be attached to any cpu. It may only be
+ *	 used for I/O requests. tlb entries that might result because the
+ *	 asce was attached to a cpu won't be cleared.
+ */
+unsigned long base_asce_alloc(unsigned long addr, unsigned long num_pages)
+{
+	unsigned long asce, *table, end;
+	int rc;
+
+	if (base_pgt_cache_init())
+		return 0;
+	end = addr + num_pages * PAGE_SIZE;
+	if (end <= _REGION3_SIZE) {
+		table = base_crst_alloc(_SEGMENT_ENTRY_EMPTY);
+		if (!table)
+			return 0;
+		rc = base_segment_walk(table, addr, end, 1);
+		asce = __pa(table) | _ASCE_TYPE_SEGMENT | _ASCE_TABLE_LENGTH;
+	} else if (end <= _REGION2_SIZE) {
+		table = base_crst_alloc(_REGION3_ENTRY_EMPTY);
+		if (!table)
+			return 0;
+		rc = base_region3_walk(table, addr, end, 1);
+		asce = __pa(table) | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
+	} else if (end <= _REGION1_SIZE) {
+		table = base_crst_alloc(_REGION2_ENTRY_EMPTY);
+		if (!table)
+			return 0;
+		rc = base_region2_walk(table, addr, end, 1);
+		asce = __pa(table) | _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
+	} else {
+		table = base_crst_alloc(_REGION1_ENTRY_EMPTY);
+		if (!table)
+			return 0;
+		rc = base_region1_walk(table, addr, end, 1);
+		asce = __pa(table) | _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH;
+	}
+	if (rc) {
+		base_asce_free(asce);
+		asce = 0;
+	}
+	return asce;
+}
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
new file mode 100644
index 0000000000..5cb9294154
--- /dev/null
+++ b/arch/s390/mm/pgtable.c
@@ -0,0 +1,1207 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    Copyright IBM Corp. 2007, 2011
+ *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/gfp.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/rcupdate.h>
+#include <linux/slab.h>
+#include <linux/swapops.h>
+#include <linux/sysctl.h>
+#include <linux/ksm.h>
+#include <linux/mman.h>
+
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/mmu_context.h>
+#include <asm/page-states.h>
+
+pgprot_t pgprot_writecombine(pgprot_t prot)
+{
+	/*
+	 * mio_wb_bit_mask may be set on a different CPU, but it is only set
+	 * once at init and only read afterwards.
+	 */
+	return __pgprot(pgprot_val(prot) | mio_wb_bit_mask);
+}
+EXPORT_SYMBOL_GPL(pgprot_writecombine);
+
+pgprot_t pgprot_writethrough(pgprot_t prot)
+{
+	/*
+	 * mio_wb_bit_mask may be set on a different CPU, but it is only set
+	 * once at init and only read afterwards.
+	 */
+	return __pgprot(pgprot_val(prot) & ~mio_wb_bit_mask);
+}
+EXPORT_SYMBOL_GPL(pgprot_writethrough);
+
+static inline void ptep_ipte_local(struct mm_struct *mm, unsigned long addr,
+				   pte_t *ptep, int nodat)
+{
+	unsigned long opt, asce;
+
+	if (MACHINE_HAS_TLB_GUEST) {
+		opt = 0;
+		asce = READ_ONCE(mm->context.gmap_asce);
+		if (asce == 0UL || nodat)
+			opt |= IPTE_NODAT;
+		if (asce != -1UL) {
+			asce = asce ? : mm->context.asce;
+			opt |= IPTE_GUEST_ASCE;
+		}
+		__ptep_ipte(addr, ptep, opt, asce, IPTE_LOCAL);
+	} else {
+		__ptep_ipte(addr, ptep, 0, 0, IPTE_LOCAL);
+	}
+}
+
+static inline void ptep_ipte_global(struct mm_struct *mm, unsigned long addr,
+				    pte_t *ptep, int nodat)
+{
+	unsigned long opt, asce;
+
+	if (MACHINE_HAS_TLB_GUEST) {
+		opt = 0;
+		asce = READ_ONCE(mm->context.gmap_asce);
+		if (asce == 0UL || nodat)
+			opt |= IPTE_NODAT;
+		if (asce != -1UL) {
+			asce = asce ? : mm->context.asce;
+			opt |= IPTE_GUEST_ASCE;
+		}
+		__ptep_ipte(addr, ptep, opt, asce, IPTE_GLOBAL);
+	} else {
+		__ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL);
+	}
+}
+
+static inline pte_t ptep_flush_direct(struct mm_struct *mm,
+				      unsigned long addr, pte_t *ptep,
+				      int nodat)
+{
+	pte_t old;
+
+	old = *ptep;
+	if (unlikely(pte_val(old) & _PAGE_INVALID))
+		return old;
+	atomic_inc(&mm->context.flush_count);
+	if (MACHINE_HAS_TLB_LC &&
+	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
+		ptep_ipte_local(mm, addr, ptep, nodat);
+	else
+		ptep_ipte_global(mm, addr, ptep, nodat);
+	atomic_dec(&mm->context.flush_count);
+	return old;
+}
+
+static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
+				    unsigned long addr, pte_t *ptep,
+				    int nodat)
+{
+	pte_t old;
+
+	old = *ptep;
+	if (unlikely(pte_val(old) & _PAGE_INVALID))
+		return old;
+	atomic_inc(&mm->context.flush_count);
+	if (cpumask_equal(&mm->context.cpu_attach_mask,
+			  cpumask_of(smp_processor_id()))) {
+		set_pte(ptep, set_pte_bit(*ptep, __pgprot(_PAGE_INVALID)));
+		mm->context.flush_mm = 1;
+	} else
+		ptep_ipte_global(mm, addr, ptep, nodat);
+	atomic_dec(&mm->context.flush_count);
+	return old;
+}
+
+static inline pgste_t pgste_get_lock(pte_t *ptep)
+{
+	unsigned long new = 0;
+#ifdef CONFIG_PGSTE
+	unsigned long old;
+
+	asm(
+		"	lg	%0,%2\n"
+		"0:	lgr	%1,%0\n"
+		"	nihh	%0,0xff7f\n"	/* clear PCL bit in old */
+		"	oihh	%1,0x0080\n"	/* set PCL bit in new */
+		"	csg	%0,%1,%2\n"
+		"	jl	0b\n"
+		: "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE])
+		: "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory");
+#endif
+	return __pgste(new);
+}
+
+static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
+{
+#ifdef CONFIG_PGSTE
+	asm(
+		"	nihh	%1,0xff7f\n"	/* clear PCL bit */
+		"	stg	%1,%0\n"
+		: "=Q" (ptep[PTRS_PER_PTE])
+		: "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE])
+		: "cc", "memory");
+#endif
+}
+
+static inline pgste_t pgste_get(pte_t *ptep)
+{
+	unsigned long pgste = 0;
+#ifdef CONFIG_PGSTE
+	pgste = *(unsigned long *)(ptep + PTRS_PER_PTE);
+#endif
+	return __pgste(pgste);
+}
+
+static inline void pgste_set(pte_t *ptep, pgste_t pgste)
+{
+#ifdef CONFIG_PGSTE
+	*(pgste_t *)(ptep + PTRS_PER_PTE) = pgste;
+#endif
+}
+
+static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste,
+				       struct mm_struct *mm)
+{
+#ifdef CONFIG_PGSTE
+	unsigned long address, bits, skey;
+
+	if (!mm_uses_skeys(mm) || pte_val(pte) & _PAGE_INVALID)
+		return pgste;
+	address = pte_val(pte) & PAGE_MASK;
+	skey = (unsigned long) page_get_storage_key(address);
+	bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
+	/* Transfer page changed & referenced bit to guest bits in pgste */
+	pgste_val(pgste) |= bits << 48;		/* GR bit & GC bit */
+	/* Copy page access key and fetch protection bit to pgste */
+	pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
+	pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
+#endif
+	return pgste;
+
+}
+
+static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
+				 struct mm_struct *mm)
+{
+#ifdef CONFIG_PGSTE
+	unsigned long address;
+	unsigned long nkey;
+
+	if (!mm_uses_skeys(mm) || pte_val(entry) & _PAGE_INVALID)
+		return;
+	VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID));
+	address = pte_val(entry) & PAGE_MASK;
+	/*
+	 * Set page access key and fetch protection bit from pgste.
+	 * The guest C/R information is still in the PGSTE, set real
+	 * key C/R to 0.
+	 */
+	nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
+	nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
+	page_set_storage_key(address, nkey, 0);
+#endif
+}
+
+static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
+{
+#ifdef CONFIG_PGSTE
+	if ((pte_val(entry) & _PAGE_PRESENT) &&
+	    (pte_val(entry) & _PAGE_WRITE) &&
+	    !(pte_val(entry) & _PAGE_INVALID)) {
+		if (!MACHINE_HAS_ESOP) {
+			/*
+			 * Without enhanced suppression-on-protection force
+			 * the dirty bit on for all writable ptes.
+			 */
+			entry = set_pte_bit(entry, __pgprot(_PAGE_DIRTY));
+			entry = clear_pte_bit(entry, __pgprot(_PAGE_PROTECT));
+		}
+		if (!(pte_val(entry) & _PAGE_PROTECT))
+			/* This pte allows write access, set user-dirty */
+			pgste_val(pgste) |= PGSTE_UC_BIT;
+	}
+#endif
+	set_pte(ptep, entry);
+	return pgste;
+}
+
+static inline pgste_t pgste_pte_notify(struct mm_struct *mm,
+				       unsigned long addr,
+				       pte_t *ptep, pgste_t pgste)
+{
+#ifdef CONFIG_PGSTE
+	unsigned long bits;
+
+	bits = pgste_val(pgste) & (PGSTE_IN_BIT | PGSTE_VSIE_BIT);
+	if (bits) {
+		pgste_val(pgste) ^= bits;
+		ptep_notify(mm, addr, ptep, bits);
+	}
+#endif
+	return pgste;
+}
+
+static inline pgste_t ptep_xchg_start(struct mm_struct *mm,
+				      unsigned long addr, pte_t *ptep)
+{
+	pgste_t pgste = __pgste(0);
+
+	if (mm_has_pgste(mm)) {
+		pgste = pgste_get_lock(ptep);
+		pgste = pgste_pte_notify(mm, addr, ptep, pgste);
+	}
+	return pgste;
+}
+
+static inline pte_t ptep_xchg_commit(struct mm_struct *mm,
+				    unsigned long addr, pte_t *ptep,
+				    pgste_t pgste, pte_t old, pte_t new)
+{
+	if (mm_has_pgste(mm)) {
+		if (pte_val(old) & _PAGE_INVALID)
+			pgste_set_key(ptep, pgste, new, mm);
+		if (pte_val(new) & _PAGE_INVALID) {
+			pgste = pgste_update_all(old, pgste, mm);
+			if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) ==
+			    _PGSTE_GPS_USAGE_UNUSED)
+				old = set_pte_bit(old, __pgprot(_PAGE_UNUSED));
+		}
+		pgste = pgste_set_pte(ptep, pgste, new);
+		pgste_set_unlock(ptep, pgste);
+	} else {
+		set_pte(ptep, new);
+	}
+	return old;
+}
+
+pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr,
+		       pte_t *ptep, pte_t new)
+{
+	pgste_t pgste;
+	pte_t old;
+	int nodat;
+
+	preempt_disable();
+	pgste = ptep_xchg_start(mm, addr, ptep);
+	nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
+	old = ptep_flush_direct(mm, addr, ptep, nodat);
+	old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
+	preempt_enable();
+	return old;
+}
+EXPORT_SYMBOL(ptep_xchg_direct);
+
+/*
+ * Caller must check that new PTE only differs in _PAGE_PROTECT HW bit, so that
+ * RDP can be used instead of IPTE. See also comments at pte_allow_rdp().
+ */
+void ptep_reset_dat_prot(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+			 pte_t new)
+{
+	preempt_disable();
+	atomic_inc(&mm->context.flush_count);
+	if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
+		__ptep_rdp(addr, ptep, 0, 0, 1);
+	else
+		__ptep_rdp(addr, ptep, 0, 0, 0);
+	/*
+	 * PTE is not invalidated by RDP, only _PAGE_PROTECT is cleared. That
+	 * means it is still valid and active, and must not be changed according
+	 * to the architecture. But writing a new value that only differs in SW
+	 * bits is allowed.
+	 */
+	set_pte(ptep, new);
+	atomic_dec(&mm->context.flush_count);
+	preempt_enable();
+}
+EXPORT_SYMBOL(ptep_reset_dat_prot);
+
+pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr,
+		     pte_t *ptep, pte_t new)
+{
+	pgste_t pgste;
+	pte_t old;
+	int nodat;
+
+	preempt_disable();
+	pgste = ptep_xchg_start(mm, addr, ptep);
+	nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
+	old = ptep_flush_lazy(mm, addr, ptep, nodat);
+	old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
+	preempt_enable();
+	return old;
+}
+EXPORT_SYMBOL(ptep_xchg_lazy);
+
+pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr,
+			     pte_t *ptep)
+{
+	pgste_t pgste;
+	pte_t old;
+	int nodat;
+	struct mm_struct *mm = vma->vm_mm;
+
+	preempt_disable();
+	pgste = ptep_xchg_start(mm, addr, ptep);
+	nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
+	old = ptep_flush_lazy(mm, addr, ptep, nodat);
+	if (mm_has_pgste(mm)) {
+		pgste = pgste_update_all(old, pgste, mm);
+		pgste_set(ptep, pgste);
+	}
+	return old;
+}
+
+void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
+			     pte_t *ptep, pte_t old_pte, pte_t pte)
+{
+	pgste_t pgste;
+	struct mm_struct *mm = vma->vm_mm;
+
+	if (!MACHINE_HAS_NX)
+		pte = clear_pte_bit(pte, __pgprot(_PAGE_NOEXEC));
+	if (mm_has_pgste(mm)) {
+		pgste = pgste_get(ptep);
+		pgste_set_key(ptep, pgste, pte, mm);
+		pgste = pgste_set_pte(ptep, pgste, pte);
+		pgste_set_unlock(ptep, pgste);
+	} else {
+		set_pte(ptep, pte);
+	}
+	preempt_enable();
+}
+
+static inline void pmdp_idte_local(struct mm_struct *mm,
+				   unsigned long addr, pmd_t *pmdp)
+{
+	if (MACHINE_HAS_TLB_GUEST)
+		__pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
+			    mm->context.asce, IDTE_LOCAL);
+	else
+		__pmdp_idte(addr, pmdp, 0, 0, IDTE_LOCAL);
+	if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
+		gmap_pmdp_idte_local(mm, addr);
+}
+
+static inline void pmdp_idte_global(struct mm_struct *mm,
+				    unsigned long addr, pmd_t *pmdp)
+{
+	if (MACHINE_HAS_TLB_GUEST) {
+		__pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
+			    mm->context.asce, IDTE_GLOBAL);
+		if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
+			gmap_pmdp_idte_global(mm, addr);
+	} else if (MACHINE_HAS_IDTE) {
+		__pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL);
+		if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
+			gmap_pmdp_idte_global(mm, addr);
+	} else {
+		__pmdp_csp(pmdp);
+		if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
+			gmap_pmdp_csp(mm, addr);
+	}
+}
+
+static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
+				      unsigned long addr, pmd_t *pmdp)
+{
+	pmd_t old;
+
+	old = *pmdp;
+	if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
+		return old;
+	atomic_inc(&mm->context.flush_count);
+	if (MACHINE_HAS_TLB_LC &&
+	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
+		pmdp_idte_local(mm, addr, pmdp);
+	else
+		pmdp_idte_global(mm, addr, pmdp);
+	atomic_dec(&mm->context.flush_count);
+	return old;
+}
+
+static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
+				    unsigned long addr, pmd_t *pmdp)
+{
+	pmd_t old;
+
+	old = *pmdp;
+	if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
+		return old;
+	atomic_inc(&mm->context.flush_count);
+	if (cpumask_equal(&mm->context.cpu_attach_mask,
+			  cpumask_of(smp_processor_id()))) {
+		set_pmd(pmdp, set_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_INVALID)));
+		mm->context.flush_mm = 1;
+		if (mm_has_pgste(mm))
+			gmap_pmdp_invalidate(mm, addr);
+	} else {
+		pmdp_idte_global(mm, addr, pmdp);
+	}
+	atomic_dec(&mm->context.flush_count);
+	return old;
+}
+
+#ifdef CONFIG_PGSTE
+static int pmd_lookup(struct mm_struct *mm, unsigned long addr, pmd_t **pmdp)
+{
+	struct vm_area_struct *vma;
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+
+	/* We need a valid VMA, otherwise this is clearly a fault. */
+	vma = vma_lookup(mm, addr);
+	if (!vma)
+		return -EFAULT;
+
+	pgd = pgd_offset(mm, addr);
+	if (!pgd_present(*pgd))
+		return -ENOENT;
+
+	p4d = p4d_offset(pgd, addr);
+	if (!p4d_present(*p4d))
+		return -ENOENT;
+
+	pud = pud_offset(p4d, addr);
+	if (!pud_present(*pud))
+		return -ENOENT;
+
+	/* Large PUDs are not supported yet. */
+	if (pud_large(*pud))
+		return -EFAULT;
+
+	*pmdp = pmd_offset(pud, addr);
+	return 0;
+}
+#endif
+
+pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr,
+		       pmd_t *pmdp, pmd_t new)
+{
+	pmd_t old;
+
+	preempt_disable();
+	old = pmdp_flush_direct(mm, addr, pmdp);
+	set_pmd(pmdp, new);
+	preempt_enable();
+	return old;
+}
+EXPORT_SYMBOL(pmdp_xchg_direct);
+
+pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr,
+		     pmd_t *pmdp, pmd_t new)
+{
+	pmd_t old;
+
+	preempt_disable();
+	old = pmdp_flush_lazy(mm, addr, pmdp);
+	set_pmd(pmdp, new);
+	preempt_enable();
+	return old;
+}
+EXPORT_SYMBOL(pmdp_xchg_lazy);
+
+static inline void pudp_idte_local(struct mm_struct *mm,
+				   unsigned long addr, pud_t *pudp)
+{
+	if (MACHINE_HAS_TLB_GUEST)
+		__pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE,
+			    mm->context.asce, IDTE_LOCAL);
+	else
+		__pudp_idte(addr, pudp, 0, 0, IDTE_LOCAL);
+}
+
+static inline void pudp_idte_global(struct mm_struct *mm,
+				    unsigned long addr, pud_t *pudp)
+{
+	if (MACHINE_HAS_TLB_GUEST)
+		__pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE,
+			    mm->context.asce, IDTE_GLOBAL);
+	else if (MACHINE_HAS_IDTE)
+		__pudp_idte(addr, pudp, 0, 0, IDTE_GLOBAL);
+	else
+		/*
+		 * Invalid bit position is the same for pmd and pud, so we can
+		 * re-use _pmd_csp() here
+		 */
+		__pmdp_csp((pmd_t *) pudp);
+}
+
+static inline pud_t pudp_flush_direct(struct mm_struct *mm,
+				      unsigned long addr, pud_t *pudp)
+{
+	pud_t old;
+
+	old = *pudp;
+	if (pud_val(old) & _REGION_ENTRY_INVALID)
+		return old;
+	atomic_inc(&mm->context.flush_count);
+	if (MACHINE_HAS_TLB_LC &&
+	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
+		pudp_idte_local(mm, addr, pudp);
+	else
+		pudp_idte_global(mm, addr, pudp);
+	atomic_dec(&mm->context.flush_count);
+	return old;
+}
+
+pud_t pudp_xchg_direct(struct mm_struct *mm, unsigned long addr,
+		       pud_t *pudp, pud_t new)
+{
+	pud_t old;
+
+	preempt_disable();
+	old = pudp_flush_direct(mm, addr, pudp);
+	set_pud(pudp, new);
+	preempt_enable();
+	return old;
+}
+EXPORT_SYMBOL(pudp_xchg_direct);
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+				pgtable_t pgtable)
+{
+	struct list_head *lh = (struct list_head *) pgtable;
+
+	assert_spin_locked(pmd_lockptr(mm, pmdp));
+
+	/* FIFO */
+	if (!pmd_huge_pte(mm, pmdp))
+		INIT_LIST_HEAD(lh);
+	else
+		list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
+	pmd_huge_pte(mm, pmdp) = pgtable;
+}
+
+pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
+{
+	struct list_head *lh;
+	pgtable_t pgtable;
+	pte_t *ptep;
+
+	assert_spin_locked(pmd_lockptr(mm, pmdp));
+
+	/* FIFO */
+	pgtable = pmd_huge_pte(mm, pmdp);
+	lh = (struct list_head *) pgtable;
+	if (list_empty(lh))
+		pmd_huge_pte(mm, pmdp) = NULL;
+	else {
+		pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
+		list_del(lh);
+	}
+	ptep = (pte_t *) pgtable;
+	set_pte(ptep, __pte(_PAGE_INVALID));
+	ptep++;
+	set_pte(ptep, __pte(_PAGE_INVALID));
+	return pgtable;
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+#ifdef CONFIG_PGSTE
+void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,
+		     pte_t *ptep, pte_t entry)
+{
+	pgste_t pgste;
+
+	/* the mm_has_pgste() check is done in set_pte_at() */
+	preempt_disable();
+	pgste = pgste_get_lock(ptep);
+	pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
+	pgste_set_key(ptep, pgste, entry, mm);
+	pgste = pgste_set_pte(ptep, pgste, entry);
+	pgste_set_unlock(ptep, pgste);
+	preempt_enable();
+}
+
+void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+	pgste_t pgste;
+
+	preempt_disable();
+	pgste = pgste_get_lock(ptep);
+	pgste_val(pgste) |= PGSTE_IN_BIT;
+	pgste_set_unlock(ptep, pgste);
+	preempt_enable();
+}
+
+/**
+ * ptep_force_prot - change access rights of a locked pte
+ * @mm: pointer to the process mm_struct
+ * @addr: virtual address in the guest address space
+ * @ptep: pointer to the page table entry
+ * @prot: indicates guest access rights: PROT_NONE, PROT_READ or PROT_WRITE
+ * @bit: pgste bit to set (e.g. for notification)
+ *
+ * Returns 0 if the access rights were changed and -EAGAIN if the current
+ * and requested access rights are incompatible.
+ */
+int ptep_force_prot(struct mm_struct *mm, unsigned long addr,
+		    pte_t *ptep, int prot, unsigned long bit)
+{
+	pte_t entry;
+	pgste_t pgste;
+	int pte_i, pte_p, nodat;
+
+	pgste = pgste_get_lock(ptep);
+	entry = *ptep;
+	/* Check pte entry after all locks have been acquired */
+	pte_i = pte_val(entry) & _PAGE_INVALID;
+	pte_p = pte_val(entry) & _PAGE_PROTECT;
+	if ((pte_i && (prot != PROT_NONE)) ||
+	    (pte_p && (prot & PROT_WRITE))) {
+		pgste_set_unlock(ptep, pgste);
+		return -EAGAIN;
+	}
+	/* Change access rights and set pgste bit */
+	nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
+	if (prot == PROT_NONE && !pte_i) {
+		ptep_flush_direct(mm, addr, ptep, nodat);
+		pgste = pgste_update_all(entry, pgste, mm);
+		entry = set_pte_bit(entry, __pgprot(_PAGE_INVALID));
+	}
+	if (prot == PROT_READ && !pte_p) {
+		ptep_flush_direct(mm, addr, ptep, nodat);
+		entry = clear_pte_bit(entry, __pgprot(_PAGE_INVALID));
+		entry = set_pte_bit(entry, __pgprot(_PAGE_PROTECT));
+	}
+	pgste_val(pgste) |= bit;
+	pgste = pgste_set_pte(ptep, pgste, entry);
+	pgste_set_unlock(ptep, pgste);
+	return 0;
+}
+
+int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr,
+		    pte_t *sptep, pte_t *tptep, pte_t pte)
+{
+	pgste_t spgste, tpgste;
+	pte_t spte, tpte;
+	int rc = -EAGAIN;
+
+	if (!(pte_val(*tptep) & _PAGE_INVALID))
+		return 0;	/* already shadowed */
+	spgste = pgste_get_lock(sptep);
+	spte = *sptep;
+	if (!(pte_val(spte) & _PAGE_INVALID) &&
+	    !((pte_val(spte) & _PAGE_PROTECT) &&
+	      !(pte_val(pte) & _PAGE_PROTECT))) {
+		pgste_val(spgste) |= PGSTE_VSIE_BIT;
+		tpgste = pgste_get_lock(tptep);
+		tpte = __pte((pte_val(spte) & PAGE_MASK) |
+			     (pte_val(pte) & _PAGE_PROTECT));
+		/* don't touch the storage key - it belongs to parent pgste */
+		tpgste = pgste_set_pte(tptep, tpgste, tpte);
+		pgste_set_unlock(tptep, tpgste);
+		rc = 1;
+	}
+	pgste_set_unlock(sptep, spgste);
+	return rc;
+}
+
+void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep)
+{
+	pgste_t pgste;
+	int nodat;
+
+	pgste = pgste_get_lock(ptep);
+	/* notifier is called by the caller */
+	nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
+	ptep_flush_direct(mm, saddr, ptep, nodat);
+	/* don't touch the storage key - it belongs to parent pgste */
+	pgste = pgste_set_pte(ptep, pgste, __pte(_PAGE_INVALID));
+	pgste_set_unlock(ptep, pgste);
+}
+
+static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
+{
+	if (!non_swap_entry(entry))
+		dec_mm_counter(mm, MM_SWAPENTS);
+	else if (is_migration_entry(entry)) {
+		struct page *page = pfn_swap_entry_to_page(entry);
+
+		dec_mm_counter(mm, mm_counter(page));
+	}
+	free_swap_and_cache(entry);
+}
+
+void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
+		     pte_t *ptep, int reset)
+{
+	unsigned long pgstev;
+	pgste_t pgste;
+	pte_t pte;
+
+	/* Zap unused and logically-zero pages */
+	preempt_disable();
+	pgste = pgste_get_lock(ptep);
+	pgstev = pgste_val(pgste);
+	pte = *ptep;
+	if (!reset && pte_swap(pte) &&
+	    ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED ||
+	     (pgstev & _PGSTE_GPS_ZERO))) {
+		ptep_zap_swap_entry(mm, pte_to_swp_entry(pte));
+		pte_clear(mm, addr, ptep);
+	}
+	if (reset)
+		pgste_val(pgste) &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT);
+	pgste_set_unlock(ptep, pgste);
+	preempt_enable();
+}
+
+void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+	unsigned long ptev;
+	pgste_t pgste;
+
+	/* Clear storage key ACC and F, but set R/C */
+	preempt_disable();
+	pgste = pgste_get_lock(ptep);
+	pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
+	pgste_val(pgste) |= PGSTE_GR_BIT | PGSTE_GC_BIT;
+	ptev = pte_val(*ptep);
+	if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
+		page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 0);
+	pgste_set_unlock(ptep, pgste);
+	preempt_enable();
+}
+
+/*
+ * Test and reset if a guest page is dirty
+ */
+bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long addr,
+		       pte_t *ptep)
+{
+	pgste_t pgste;
+	pte_t pte;
+	bool dirty;
+	int nodat;
+
+	pgste = pgste_get_lock(ptep);
+	dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
+	pgste_val(pgste) &= ~PGSTE_UC_BIT;
+	pte = *ptep;
+	if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
+		pgste = pgste_pte_notify(mm, addr, ptep, pgste);
+		nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
+		ptep_ipte_global(mm, addr, ptep, nodat);
+		if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
+			pte = set_pte_bit(pte, __pgprot(_PAGE_PROTECT));
+		else
+			pte = set_pte_bit(pte, __pgprot(_PAGE_INVALID));
+		set_pte(ptep, pte);
+	}
+	pgste_set_unlock(ptep, pgste);
+	return dirty;
+}
+EXPORT_SYMBOL_GPL(ptep_test_and_clear_uc);
+
+int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
+			  unsigned char key, bool nq)
+{
+	unsigned long keyul, paddr;
+	spinlock_t *ptl;
+	pgste_t old, new;
+	pmd_t *pmdp;
+	pte_t *ptep;
+
+	/*
+	 * If we don't have a PTE table and if there is no huge page mapped,
+	 * we can ignore attempts to set the key to 0, because it already is 0.
+	 */
+	switch (pmd_lookup(mm, addr, &pmdp)) {
+	case -ENOENT:
+		return key ? -EFAULT : 0;
+	case 0:
+		break;
+	default:
+		return -EFAULT;
+	}
+again:
+	ptl = pmd_lock(mm, pmdp);
+	if (!pmd_present(*pmdp)) {
+		spin_unlock(ptl);
+		return key ? -EFAULT : 0;
+	}
+
+	if (pmd_large(*pmdp)) {
+		paddr = pmd_val(*pmdp) & HPAGE_MASK;
+		paddr |= addr & ~HPAGE_MASK;
+		/*
+		 * Huge pmds need quiescing operations, they are
+		 * always mapped.
+		 */
+		page_set_storage_key(paddr, key, 1);
+		spin_unlock(ptl);
+		return 0;
+	}
+	spin_unlock(ptl);
+
+	ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
+	if (!ptep)
+		goto again;
+	new = old = pgste_get_lock(ptep);
+	pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT |
+			    PGSTE_ACC_BITS | PGSTE_FP_BIT);
+	keyul = (unsigned long) key;
+	pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
+	pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
+	if (!(pte_val(*ptep) & _PAGE_INVALID)) {
+		unsigned long bits, skey;
+
+		paddr = pte_val(*ptep) & PAGE_MASK;
+		skey = (unsigned long) page_get_storage_key(paddr);
+		bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
+		skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT);
+		/* Set storage key ACC and FP */
+		page_set_storage_key(paddr, skey, !nq);
+		/* Merge host changed & referenced into pgste  */
+		pgste_val(new) |= bits << 52;
+	}
+	/* changing the guest storage key is considered a change of the page */
+	if ((pgste_val(new) ^ pgste_val(old)) &
+	    (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))
+		pgste_val(new) |= PGSTE_UC_BIT;
+
+	pgste_set_unlock(ptep, new);
+	pte_unmap_unlock(ptep, ptl);
+	return 0;
+}
+EXPORT_SYMBOL(set_guest_storage_key);
+
+/*
+ * Conditionally set a guest storage key (handling csske).
+ * oldkey will be updated when either mr or mc is set and a pointer is given.
+ *
+ * Returns 0 if a guests storage key update wasn't necessary, 1 if the guest
+ * storage key was updated and -EFAULT on access errors.
+ */
+int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
+			       unsigned char key, unsigned char *oldkey,
+			       bool nq, bool mr, bool mc)
+{
+	unsigned char tmp, mask = _PAGE_ACC_BITS | _PAGE_FP_BIT;
+	int rc;
+
+	/* we can drop the pgste lock between getting and setting the key */
+	if (mr | mc) {
+		rc = get_guest_storage_key(current->mm, addr, &tmp);
+		if (rc)
+			return rc;
+		if (oldkey)
+			*oldkey = tmp;
+		if (!mr)
+			mask |= _PAGE_REFERENCED;
+		if (!mc)
+			mask |= _PAGE_CHANGED;
+		if (!((tmp ^ key) & mask))
+			return 0;
+	}
+	rc = set_guest_storage_key(current->mm, addr, key, nq);
+	return rc < 0 ? rc : 1;
+}
+EXPORT_SYMBOL(cond_set_guest_storage_key);
+
+/*
+ * Reset a guest reference bit (rrbe), returning the reference and changed bit.
+ *
+ * Returns < 0 in case of error, otherwise the cc to be reported to the guest.
+ */
+int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)
+{
+	spinlock_t *ptl;
+	unsigned long paddr;
+	pgste_t old, new;
+	pmd_t *pmdp;
+	pte_t *ptep;
+	int cc = 0;
+
+	/*
+	 * If we don't have a PTE table and if there is no huge page mapped,
+	 * the storage key is 0 and there is nothing for us to do.
+	 */
+	switch (pmd_lookup(mm, addr, &pmdp)) {
+	case -ENOENT:
+		return 0;
+	case 0:
+		break;
+	default:
+		return -EFAULT;
+	}
+again:
+	ptl = pmd_lock(mm, pmdp);
+	if (!pmd_present(*pmdp)) {
+		spin_unlock(ptl);
+		return 0;
+	}
+
+	if (pmd_large(*pmdp)) {
+		paddr = pmd_val(*pmdp) & HPAGE_MASK;
+		paddr |= addr & ~HPAGE_MASK;
+		cc = page_reset_referenced(paddr);
+		spin_unlock(ptl);
+		return cc;
+	}
+	spin_unlock(ptl);
+
+	ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
+	if (!ptep)
+		goto again;
+	new = old = pgste_get_lock(ptep);
+	/* Reset guest reference bit only */
+	pgste_val(new) &= ~PGSTE_GR_BIT;
+
+	if (!(pte_val(*ptep) & _PAGE_INVALID)) {
+		paddr = pte_val(*ptep) & PAGE_MASK;
+		cc = page_reset_referenced(paddr);
+		/* Merge real referenced bit into host-set */
+		pgste_val(new) |= ((unsigned long) cc << 53) & PGSTE_HR_BIT;
+	}
+	/* Reflect guest's logical view, not physical */
+	cc |= (pgste_val(old) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 49;
+	/* Changing the guest storage key is considered a change of the page */
+	if ((pgste_val(new) ^ pgste_val(old)) & PGSTE_GR_BIT)
+		pgste_val(new) |= PGSTE_UC_BIT;
+
+	pgste_set_unlock(ptep, new);
+	pte_unmap_unlock(ptep, ptl);
+	return cc;
+}
+EXPORT_SYMBOL(reset_guest_reference_bit);
+
+int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
+			  unsigned char *key)
+{
+	unsigned long paddr;
+	spinlock_t *ptl;
+	pgste_t pgste;
+	pmd_t *pmdp;
+	pte_t *ptep;
+
+	/*
+	 * If we don't have a PTE table and if there is no huge page mapped,
+	 * the storage key is 0.
+	 */
+	*key = 0;
+
+	switch (pmd_lookup(mm, addr, &pmdp)) {
+	case -ENOENT:
+		return 0;
+	case 0:
+		break;
+	default:
+		return -EFAULT;
+	}
+again:
+	ptl = pmd_lock(mm, pmdp);
+	if (!pmd_present(*pmdp)) {
+		spin_unlock(ptl);
+		return 0;
+	}
+
+	if (pmd_large(*pmdp)) {
+		paddr = pmd_val(*pmdp) & HPAGE_MASK;
+		paddr |= addr & ~HPAGE_MASK;
+		*key = page_get_storage_key(paddr);
+		spin_unlock(ptl);
+		return 0;
+	}
+	spin_unlock(ptl);
+
+	ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
+	if (!ptep)
+		goto again;
+	pgste = pgste_get_lock(ptep);
+	*key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
+	paddr = pte_val(*ptep) & PAGE_MASK;
+	if (!(pte_val(*ptep) & _PAGE_INVALID))
+		*key = page_get_storage_key(paddr);
+	/* Reflect guest's logical view, not physical */
+	*key |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
+	pgste_set_unlock(ptep, pgste);
+	pte_unmap_unlock(ptep, ptl);
+	return 0;
+}
+EXPORT_SYMBOL(get_guest_storage_key);
+
+/**
+ * pgste_perform_essa - perform ESSA actions on the PGSTE.
+ * @mm: the memory context. It must have PGSTEs, no check is performed here!
+ * @hva: the host virtual address of the page whose PGSTE is to be processed
+ * @orc: the specific action to perform, see the ESSA_SET_* macros.
+ * @oldpte: the PTE will be saved there if the pointer is not NULL.
+ * @oldpgste: the old PGSTE will be saved there if the pointer is not NULL.
+ *
+ * Return: 1 if the page is to be added to the CBRL, otherwise 0,
+ *	   or < 0 in case of error. -EINVAL is returned for invalid values
+ *	   of orc, -EFAULT for invalid addresses.
+ */
+int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
+			unsigned long *oldpte, unsigned long *oldpgste)
+{
+	struct vm_area_struct *vma;
+	unsigned long pgstev;
+	spinlock_t *ptl;
+	pgste_t pgste;
+	pte_t *ptep;
+	int res = 0;
+
+	WARN_ON_ONCE(orc > ESSA_MAX);
+	if (unlikely(orc > ESSA_MAX))
+		return -EINVAL;
+
+	vma = vma_lookup(mm, hva);
+	if (!vma || is_vm_hugetlb_page(vma))
+		return -EFAULT;
+	ptep = get_locked_pte(mm, hva, &ptl);
+	if (unlikely(!ptep))
+		return -EFAULT;
+	pgste = pgste_get_lock(ptep);
+	pgstev = pgste_val(pgste);
+	if (oldpte)
+		*oldpte = pte_val(*ptep);
+	if (oldpgste)
+		*oldpgste = pgstev;
+
+	switch (orc) {
+	case ESSA_GET_STATE:
+		break;
+	case ESSA_SET_STABLE:
+		pgstev &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT);
+		pgstev |= _PGSTE_GPS_USAGE_STABLE;
+		break;
+	case ESSA_SET_UNUSED:
+		pgstev &= ~_PGSTE_GPS_USAGE_MASK;
+		pgstev |= _PGSTE_GPS_USAGE_UNUSED;
+		if (pte_val(*ptep) & _PAGE_INVALID)
+			res = 1;
+		break;
+	case ESSA_SET_VOLATILE:
+		pgstev &= ~_PGSTE_GPS_USAGE_MASK;
+		pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
+		if (pte_val(*ptep) & _PAGE_INVALID)
+			res = 1;
+		break;
+	case ESSA_SET_POT_VOLATILE:
+		pgstev &= ~_PGSTE_GPS_USAGE_MASK;
+		if (!(pte_val(*ptep) & _PAGE_INVALID)) {
+			pgstev |= _PGSTE_GPS_USAGE_POT_VOLATILE;
+			break;
+		}
+		if (pgstev & _PGSTE_GPS_ZERO) {
+			pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
+			break;
+		}
+		if (!(pgstev & PGSTE_GC_BIT)) {
+			pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
+			res = 1;
+			break;
+		}
+		break;
+	case ESSA_SET_STABLE_RESIDENT:
+		pgstev &= ~_PGSTE_GPS_USAGE_MASK;
+		pgstev |= _PGSTE_GPS_USAGE_STABLE;
+		/*
+		 * Since the resident state can go away any time after this
+		 * call, we will not make this page resident. We can revisit
+		 * this decision if a guest will ever start using this.
+		 */
+		break;
+	case ESSA_SET_STABLE_IF_RESIDENT:
+		if (!(pte_val(*ptep) & _PAGE_INVALID)) {
+			pgstev &= ~_PGSTE_GPS_USAGE_MASK;
+			pgstev |= _PGSTE_GPS_USAGE_STABLE;
+		}
+		break;
+	case ESSA_SET_STABLE_NODAT:
+		pgstev &= ~_PGSTE_GPS_USAGE_MASK;
+		pgstev |= _PGSTE_GPS_USAGE_STABLE | _PGSTE_GPS_NODAT;
+		break;
+	default:
+		/* we should never get here! */
+		break;
+	}
+	/* If we are discarding a page, set it to logical zero */
+	if (res)
+		pgstev |= _PGSTE_GPS_ZERO;
+
+	pgste_val(pgste) = pgstev;
+	pgste_set_unlock(ptep, pgste);
+	pte_unmap_unlock(ptep, ptl);
+	return res;
+}
+EXPORT_SYMBOL(pgste_perform_essa);
+
+/**
+ * set_pgste_bits - set specific PGSTE bits.
+ * @mm: the memory context. It must have PGSTEs, no check is performed here!
+ * @hva: the host virtual address of the page whose PGSTE is to be processed
+ * @bits: a bitmask representing the bits that will be touched
+ * @value: the values of the bits to be written. Only the bits in the mask
+ *	   will be written.
+ *
+ * Return: 0 on success, < 0 in case of error.
+ */
+int set_pgste_bits(struct mm_struct *mm, unsigned long hva,
+			unsigned long bits, unsigned long value)
+{
+	struct vm_area_struct *vma;
+	spinlock_t *ptl;
+	pgste_t new;
+	pte_t *ptep;
+
+	vma = vma_lookup(mm, hva);
+	if (!vma || is_vm_hugetlb_page(vma))
+		return -EFAULT;
+	ptep = get_locked_pte(mm, hva, &ptl);
+	if (unlikely(!ptep))
+		return -EFAULT;
+	new = pgste_get_lock(ptep);
+
+	pgste_val(new) &= ~bits;
+	pgste_val(new) |= value & bits;
+
+	pgste_set_unlock(ptep, new);
+	pte_unmap_unlock(ptep, ptl);
+	return 0;
+}
+EXPORT_SYMBOL(set_pgste_bits);
+
+/**
+ * get_pgste - get the current PGSTE for the given address.
+ * @mm: the memory context. It must have PGSTEs, no check is performed here!
+ * @hva: the host virtual address of the page whose PGSTE is to be processed
+ * @pgstep: will be written with the current PGSTE for the given address.
+ *
+ * Return: 0 on success, < 0 in case of error.
+ */
+int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep)
+{
+	struct vm_area_struct *vma;
+	spinlock_t *ptl;
+	pte_t *ptep;
+
+	vma = vma_lookup(mm, hva);
+	if (!vma || is_vm_hugetlb_page(vma))
+		return -EFAULT;
+	ptep = get_locked_pte(mm, hva, &ptl);
+	if (unlikely(!ptep))
+		return -EFAULT;
+	*pgstep = pgste_val(pgste_get(ptep));
+	pte_unmap_unlock(ptep, ptl);
+	return 0;
+}
+EXPORT_SYMBOL(get_pgste);
+#endif
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
new file mode 100644
index 0000000000..6d276103c6
--- /dev/null
+++ b/arch/s390/mm/vmem.c
@@ -0,0 +1,669 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    Copyright IBM Corp. 2006
+ */
+
+#include <linux/memory_hotplug.h>
+#include <linux/memblock.h>
+#include <linux/pfn.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/hugetlb.h>
+#include <linux/slab.h>
+#include <linux/sort.h>
+#include <asm/page-states.h>
+#include <asm/cacheflush.h>
+#include <asm/nospec-branch.h>
+#include <asm/pgalloc.h>
+#include <asm/setup.h>
+#include <asm/tlbflush.h>
+#include <asm/sections.h>
+#include <asm/set_memory.h>
+
+static DEFINE_MUTEX(vmem_mutex);
+
+static void __ref *vmem_alloc_pages(unsigned int order)
+{
+	unsigned long size = PAGE_SIZE << order;
+
+	if (slab_is_available())
+		return (void *)__get_free_pages(GFP_KERNEL, order);
+	return memblock_alloc(size, size);
+}
+
+static void vmem_free_pages(unsigned long addr, int order)
+{
+	/* We don't expect boot memory to be removed ever. */
+	if (!slab_is_available() ||
+	    WARN_ON_ONCE(PageReserved(virt_to_page((void *)addr))))
+		return;
+	free_pages(addr, order);
+}
+
+void *vmem_crst_alloc(unsigned long val)
+{
+	unsigned long *table;
+
+	table = vmem_alloc_pages(CRST_ALLOC_ORDER);
+	if (!table)
+		return NULL;
+	crst_table_init(table, val);
+	if (slab_is_available())
+		arch_set_page_dat(virt_to_page(table), CRST_ALLOC_ORDER);
+	return table;
+}
+
+pte_t __ref *vmem_pte_alloc(void)
+{
+	unsigned long size = PTRS_PER_PTE * sizeof(pte_t);
+	pte_t *pte;
+
+	if (slab_is_available())
+		pte = (pte_t *) page_table_alloc(&init_mm);
+	else
+		pte = (pte_t *) memblock_alloc(size, size);
+	if (!pte)
+		return NULL;
+	memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
+	return pte;
+}
+
+static void vmem_pte_free(unsigned long *table)
+{
+	/* We don't expect boot memory to be removed ever. */
+	if (!slab_is_available() ||
+	    WARN_ON_ONCE(PageReserved(virt_to_page(table))))
+		return;
+	page_table_free(&init_mm, table);
+}
+
+#define PAGE_UNUSED 0xFD
+
+/*
+ * The unused vmemmap range, which was not yet memset(PAGE_UNUSED) ranges
+ * from unused_sub_pmd_start to next PMD_SIZE boundary.
+ */
+static unsigned long unused_sub_pmd_start;
+
+static void vmemmap_flush_unused_sub_pmd(void)
+{
+	if (!unused_sub_pmd_start)
+		return;
+	memset((void *)unused_sub_pmd_start, PAGE_UNUSED,
+	       ALIGN(unused_sub_pmd_start, PMD_SIZE) - unused_sub_pmd_start);
+	unused_sub_pmd_start = 0;
+}
+
+static void vmemmap_mark_sub_pmd_used(unsigned long start, unsigned long end)
+{
+	/*
+	 * As we expect to add in the same granularity as we remove, it's
+	 * sufficient to mark only some piece used to block the memmap page from
+	 * getting removed (just in case the memmap never gets initialized,
+	 * e.g., because the memory block never gets onlined).
+	 */
+	memset((void *)start, 0, sizeof(struct page));
+}
+
+static void vmemmap_use_sub_pmd(unsigned long start, unsigned long end)
+{
+	/*
+	 * We only optimize if the new used range directly follows the
+	 * previously unused range (esp., when populating consecutive sections).
+	 */
+	if (unused_sub_pmd_start == start) {
+		unused_sub_pmd_start = end;
+		if (likely(IS_ALIGNED(unused_sub_pmd_start, PMD_SIZE)))
+			unused_sub_pmd_start = 0;
+		return;
+	}
+	vmemmap_flush_unused_sub_pmd();
+	vmemmap_mark_sub_pmd_used(start, end);
+}
+
+static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end)
+{
+	unsigned long page = ALIGN_DOWN(start, PMD_SIZE);
+
+	vmemmap_flush_unused_sub_pmd();
+
+	/* Could be our memmap page is filled with PAGE_UNUSED already ... */
+	vmemmap_mark_sub_pmd_used(start, end);
+
+	/* Mark the unused parts of the new memmap page PAGE_UNUSED. */
+	if (!IS_ALIGNED(start, PMD_SIZE))
+		memset((void *)page, PAGE_UNUSED, start - page);
+	/*
+	 * We want to avoid memset(PAGE_UNUSED) when populating the vmemmap of
+	 * consecutive sections. Remember for the last added PMD the last
+	 * unused range in the populated PMD.
+	 */
+	if (!IS_ALIGNED(end, PMD_SIZE))
+		unused_sub_pmd_start = end;
+}
+
+/* Returns true if the PMD is completely unused and can be freed. */
+static bool vmemmap_unuse_sub_pmd(unsigned long start, unsigned long end)
+{
+	unsigned long page = ALIGN_DOWN(start, PMD_SIZE);
+
+	vmemmap_flush_unused_sub_pmd();
+	memset((void *)start, PAGE_UNUSED, end - start);
+	return !memchr_inv((void *)page, PAGE_UNUSED, PMD_SIZE);
+}
+
+/* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
+static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr,
+				  unsigned long end, bool add, bool direct)
+{
+	unsigned long prot, pages = 0;
+	int ret = -ENOMEM;
+	pte_t *pte;
+
+	prot = pgprot_val(PAGE_KERNEL);
+	if (!MACHINE_HAS_NX)
+		prot &= ~_PAGE_NOEXEC;
+
+	pte = pte_offset_kernel(pmd, addr);
+	for (; addr < end; addr += PAGE_SIZE, pte++) {
+		if (!add) {
+			if (pte_none(*pte))
+				continue;
+			if (!direct)
+				vmem_free_pages((unsigned long) pfn_to_virt(pte_pfn(*pte)), 0);
+			pte_clear(&init_mm, addr, pte);
+		} else if (pte_none(*pte)) {
+			if (!direct) {
+				void *new_page = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE);
+
+				if (!new_page)
+					goto out;
+				set_pte(pte, __pte(__pa(new_page) | prot));
+			} else {
+				set_pte(pte, __pte(__pa(addr) | prot));
+			}
+		} else {
+			continue;
+		}
+		pages++;
+	}
+	ret = 0;
+out:
+	if (direct)
+		update_page_count(PG_DIRECT_MAP_4K, add ? pages : -pages);
+	return ret;
+}
+
+static void try_free_pte_table(pmd_t *pmd, unsigned long start)
+{
+	pte_t *pte;
+	int i;
+
+	/* We can safely assume this is fully in 1:1 mapping & vmemmap area */
+	pte = pte_offset_kernel(pmd, start);
+	for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
+		if (!pte_none(*pte))
+			return;
+	}
+	vmem_pte_free((unsigned long *) pmd_deref(*pmd));
+	pmd_clear(pmd);
+}
+
+/* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
+static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
+				  unsigned long end, bool add, bool direct)
+{
+	unsigned long next, prot, pages = 0;
+	int ret = -ENOMEM;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	prot = pgprot_val(SEGMENT_KERNEL);
+	if (!MACHINE_HAS_NX)
+		prot &= ~_SEGMENT_ENTRY_NOEXEC;
+
+	pmd = pmd_offset(pud, addr);
+	for (; addr < end; addr = next, pmd++) {
+		next = pmd_addr_end(addr, end);
+		if (!add) {
+			if (pmd_none(*pmd))
+				continue;
+			if (pmd_large(*pmd)) {
+				if (IS_ALIGNED(addr, PMD_SIZE) &&
+				    IS_ALIGNED(next, PMD_SIZE)) {
+					if (!direct)
+						vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE));
+					pmd_clear(pmd);
+					pages++;
+				} else if (!direct && vmemmap_unuse_sub_pmd(addr, next)) {
+					vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE));
+					pmd_clear(pmd);
+				}
+				continue;
+			}
+		} else if (pmd_none(*pmd)) {
+			if (IS_ALIGNED(addr, PMD_SIZE) &&
+			    IS_ALIGNED(next, PMD_SIZE) &&
+			    MACHINE_HAS_EDAT1 && direct &&
+			    !debug_pagealloc_enabled()) {
+				set_pmd(pmd, __pmd(__pa(addr) | prot));
+				pages++;
+				continue;
+			} else if (!direct && MACHINE_HAS_EDAT1) {
+				void *new_page;
+
+				/*
+				 * Use 1MB frames for vmemmap if available. We
+				 * always use large frames even if they are only
+				 * partially used. Otherwise we would have also
+				 * page tables since vmemmap_populate gets
+				 * called for each section separately.
+				 */
+				new_page = vmemmap_alloc_block(PMD_SIZE, NUMA_NO_NODE);
+				if (new_page) {
+					set_pmd(pmd, __pmd(__pa(new_page) | prot));
+					if (!IS_ALIGNED(addr, PMD_SIZE) ||
+					    !IS_ALIGNED(next, PMD_SIZE)) {
+						vmemmap_use_new_sub_pmd(addr, next);
+					}
+					continue;
+				}
+			}
+			pte = vmem_pte_alloc();
+			if (!pte)
+				goto out;
+			pmd_populate(&init_mm, pmd, pte);
+		} else if (pmd_large(*pmd)) {
+			if (!direct)
+				vmemmap_use_sub_pmd(addr, next);
+			continue;
+		}
+		ret = modify_pte_table(pmd, addr, next, add, direct);
+		if (ret)
+			goto out;
+		if (!add)
+			try_free_pte_table(pmd, addr & PMD_MASK);
+	}
+	ret = 0;
+out:
+	if (direct)
+		update_page_count(PG_DIRECT_MAP_1M, add ? pages : -pages);
+	return ret;
+}
+
+static void try_free_pmd_table(pud_t *pud, unsigned long start)
+{
+	pmd_t *pmd;
+	int i;
+
+	pmd = pmd_offset(pud, start);
+	for (i = 0; i < PTRS_PER_PMD; i++, pmd++)
+		if (!pmd_none(*pmd))
+			return;
+	vmem_free_pages(pud_deref(*pud), CRST_ALLOC_ORDER);
+	pud_clear(pud);
+}
+
+static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
+			    bool add, bool direct)
+{
+	unsigned long next, prot, pages = 0;
+	int ret = -ENOMEM;
+	pud_t *pud;
+	pmd_t *pmd;
+
+	prot = pgprot_val(REGION3_KERNEL);
+	if (!MACHINE_HAS_NX)
+		prot &= ~_REGION_ENTRY_NOEXEC;
+	pud = pud_offset(p4d, addr);
+	for (; addr < end; addr = next, pud++) {
+		next = pud_addr_end(addr, end);
+		if (!add) {
+			if (pud_none(*pud))
+				continue;
+			if (pud_large(*pud)) {
+				if (IS_ALIGNED(addr, PUD_SIZE) &&
+				    IS_ALIGNED(next, PUD_SIZE)) {
+					pud_clear(pud);
+					pages++;
+				}
+				continue;
+			}
+		} else if (pud_none(*pud)) {
+			if (IS_ALIGNED(addr, PUD_SIZE) &&
+			    IS_ALIGNED(next, PUD_SIZE) &&
+			    MACHINE_HAS_EDAT2 && direct &&
+			    !debug_pagealloc_enabled()) {
+				set_pud(pud, __pud(__pa(addr) | prot));
+				pages++;
+				continue;
+			}
+			pmd = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
+			if (!pmd)
+				goto out;
+			pud_populate(&init_mm, pud, pmd);
+		} else if (pud_large(*pud)) {
+			continue;
+		}
+		ret = modify_pmd_table(pud, addr, next, add, direct);
+		if (ret)
+			goto out;
+		if (!add)
+			try_free_pmd_table(pud, addr & PUD_MASK);
+	}
+	ret = 0;
+out:
+	if (direct)
+		update_page_count(PG_DIRECT_MAP_2G, add ? pages : -pages);
+	return ret;
+}
+
+static void try_free_pud_table(p4d_t *p4d, unsigned long start)
+{
+	pud_t *pud;
+	int i;
+
+	pud = pud_offset(p4d, start);
+	for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
+		if (!pud_none(*pud))
+			return;
+	}
+	vmem_free_pages(p4d_deref(*p4d), CRST_ALLOC_ORDER);
+	p4d_clear(p4d);
+}
+
+static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end,
+			    bool add, bool direct)
+{
+	unsigned long next;
+	int ret = -ENOMEM;
+	p4d_t *p4d;
+	pud_t *pud;
+
+	p4d = p4d_offset(pgd, addr);
+	for (; addr < end; addr = next, p4d++) {
+		next = p4d_addr_end(addr, end);
+		if (!add) {
+			if (p4d_none(*p4d))
+				continue;
+		} else if (p4d_none(*p4d)) {
+			pud = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
+			if (!pud)
+				goto out;
+			p4d_populate(&init_mm, p4d, pud);
+		}
+		ret = modify_pud_table(p4d, addr, next, add, direct);
+		if (ret)
+			goto out;
+		if (!add)
+			try_free_pud_table(p4d, addr & P4D_MASK);
+	}
+	ret = 0;
+out:
+	return ret;
+}
+
+static void try_free_p4d_table(pgd_t *pgd, unsigned long start)
+{
+	p4d_t *p4d;
+	int i;
+
+	p4d = p4d_offset(pgd, start);
+	for (i = 0; i < PTRS_PER_P4D; i++, p4d++) {
+		if (!p4d_none(*p4d))
+			return;
+	}
+	vmem_free_pages(pgd_deref(*pgd), CRST_ALLOC_ORDER);
+	pgd_clear(pgd);
+}
+
+static int modify_pagetable(unsigned long start, unsigned long end, bool add,
+			    bool direct)
+{
+	unsigned long addr, next;
+	int ret = -ENOMEM;
+	pgd_t *pgd;
+	p4d_t *p4d;
+
+	if (WARN_ON_ONCE(!PAGE_ALIGNED(start | end)))
+		return -EINVAL;
+	/* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
+	if (WARN_ON_ONCE(end > VMALLOC_START))
+		return -EINVAL;
+	for (addr = start; addr < end; addr = next) {
+		next = pgd_addr_end(addr, end);
+		pgd = pgd_offset_k(addr);
+
+		if (!add) {
+			if (pgd_none(*pgd))
+				continue;
+		} else if (pgd_none(*pgd)) {
+			p4d = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
+			if (!p4d)
+				goto out;
+			pgd_populate(&init_mm, pgd, p4d);
+		}
+		ret = modify_p4d_table(pgd, addr, next, add, direct);
+		if (ret)
+			goto out;
+		if (!add)
+			try_free_p4d_table(pgd, addr & PGDIR_MASK);
+	}
+	ret = 0;
+out:
+	if (!add)
+		flush_tlb_kernel_range(start, end);
+	return ret;
+}
+
+static int add_pagetable(unsigned long start, unsigned long end, bool direct)
+{
+	return modify_pagetable(start, end, true, direct);
+}
+
+static int remove_pagetable(unsigned long start, unsigned long end, bool direct)
+{
+	return modify_pagetable(start, end, false, direct);
+}
+
+/*
+ * Add a physical memory range to the 1:1 mapping.
+ */
+static int vmem_add_range(unsigned long start, unsigned long size)
+{
+	start = (unsigned long)__va(start);
+	return add_pagetable(start, start + size, true);
+}
+
+/*
+ * Remove a physical memory range from the 1:1 mapping.
+ */
+static void vmem_remove_range(unsigned long start, unsigned long size)
+{
+	start = (unsigned long)__va(start);
+	remove_pagetable(start, start + size, true);
+}
+
+/*
+ * Add a backed mem_map array to the virtual mem_map array.
+ */
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
+			       struct vmem_altmap *altmap)
+{
+	int ret;
+
+	mutex_lock(&vmem_mutex);
+	/* We don't care about the node, just use NUMA_NO_NODE on allocations */
+	ret = add_pagetable(start, end, false);
+	if (ret)
+		remove_pagetable(start, end, false);
+	mutex_unlock(&vmem_mutex);
+	return ret;
+}
+
+void vmemmap_free(unsigned long start, unsigned long end,
+		  struct vmem_altmap *altmap)
+{
+	mutex_lock(&vmem_mutex);
+	remove_pagetable(start, end, false);
+	mutex_unlock(&vmem_mutex);
+}
+
+void vmem_remove_mapping(unsigned long start, unsigned long size)
+{
+	mutex_lock(&vmem_mutex);
+	vmem_remove_range(start, size);
+	mutex_unlock(&vmem_mutex);
+}
+
+struct range arch_get_mappable_range(void)
+{
+	struct range mhp_range;
+
+	mhp_range.start = 0;
+	mhp_range.end = max_mappable - 1;
+	return mhp_range;
+}
+
+int vmem_add_mapping(unsigned long start, unsigned long size)
+{
+	struct range range = arch_get_mappable_range();
+	int ret;
+
+	if (start < range.start ||
+	    start + size > range.end + 1 ||
+	    start + size < start)
+		return -ERANGE;
+
+	mutex_lock(&vmem_mutex);
+	ret = vmem_add_range(start, size);
+	if (ret)
+		vmem_remove_range(start, size);
+	mutex_unlock(&vmem_mutex);
+	return ret;
+}
+
+/*
+ * Allocate new or return existing page-table entry, but do not map it
+ * to any physical address. If missing, allocate segment- and region-
+ * table entries along. Meeting a large segment- or region-table entry
+ * while traversing is an error, since the function is expected to be
+ * called against virtual regions reserved for 4KB mappings only.
+ */
+pte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc)
+{
+	pte_t *ptep = NULL;
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	pgd = pgd_offset_k(addr);
+	if (pgd_none(*pgd)) {
+		if (!alloc)
+			goto out;
+		p4d = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
+		if (!p4d)
+			goto out;
+		pgd_populate(&init_mm, pgd, p4d);
+	}
+	p4d = p4d_offset(pgd, addr);
+	if (p4d_none(*p4d)) {
+		if (!alloc)
+			goto out;
+		pud = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
+		if (!pud)
+			goto out;
+		p4d_populate(&init_mm, p4d, pud);
+	}
+	pud = pud_offset(p4d, addr);
+	if (pud_none(*pud)) {
+		if (!alloc)
+			goto out;
+		pmd = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
+		if (!pmd)
+			goto out;
+		pud_populate(&init_mm, pud, pmd);
+	} else if (WARN_ON_ONCE(pud_large(*pud))) {
+		goto out;
+	}
+	pmd = pmd_offset(pud, addr);
+	if (pmd_none(*pmd)) {
+		if (!alloc)
+			goto out;
+		pte = vmem_pte_alloc();
+		if (!pte)
+			goto out;
+		pmd_populate(&init_mm, pmd, pte);
+	} else if (WARN_ON_ONCE(pmd_large(*pmd))) {
+		goto out;
+	}
+	ptep = pte_offset_kernel(pmd, addr);
+out:
+	return ptep;
+}
+
+int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc)
+{
+	pte_t *ptep, pte;
+
+	if (!IS_ALIGNED(addr, PAGE_SIZE))
+		return -EINVAL;
+	ptep = vmem_get_alloc_pte(addr, alloc);
+	if (!ptep)
+		return -ENOMEM;
+	__ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL);
+	pte = mk_pte_phys(phys, prot);
+	set_pte(ptep, pte);
+	return 0;
+}
+
+int vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot)
+{
+	int rc;
+
+	mutex_lock(&vmem_mutex);
+	rc = __vmem_map_4k_page(addr, phys, prot, true);
+	mutex_unlock(&vmem_mutex);
+	return rc;
+}
+
+void vmem_unmap_4k_page(unsigned long addr)
+{
+	pte_t *ptep;
+
+	mutex_lock(&vmem_mutex);
+	ptep = virt_to_kpte(addr);
+	__ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL);
+	pte_clear(&init_mm, addr, ptep);
+	mutex_unlock(&vmem_mutex);
+}
+
+void __init vmem_map_init(void)
+{
+	__set_memory_rox(_stext, _etext);
+	__set_memory_ro(_etext, __end_rodata);
+	__set_memory_rox(_sinittext, _einittext);
+	__set_memory_rox(__stext_amode31, __etext_amode31);
+	/*
+	 * If the BEAR-enhancement facility is not installed the first
+	 * prefix page is used to return to the previous context with
+	 * an LPSWE instruction and therefore must be executable.
+	 */
+	if (!static_key_enabled(&cpu_has_bear))
+		set_memory_x(0, 1);
+	if (debug_pagealloc_enabled()) {
+		/*
+		 * Use RELOC_HIDE() as long as __va(0) translates to NULL,
+		 * since performing pointer arithmetic on a NULL pointer
+		 * has undefined behavior and generates compiler warnings.
+		 */
+		__set_memory_4k(__va(0), RELOC_HIDE(__va(0), ident_map_size));
+	}
+	if (MACHINE_HAS_NX)
+		ctl_set_bit(0, 20);
+	pr_info("Write protected kernel read-only data: %luk\n",
+		(unsigned long)(__end_rodata - _stext) >> 10);
+}
diff --git a/arch/s390/net/Makefile b/arch/s390/net/Makefile
new file mode 100644
index 0000000000..8cab6deb04
--- /dev/null
+++ b/arch/s390/net/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Arch-specific network modules
+#
+obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o
+obj-$(CONFIG_HAVE_PNETID) += pnet.o
diff --git a/arch/s390/net/bpf_jit.h b/arch/s390/net/bpf_jit.h
new file mode 100644
index 0000000000..7822ea92e5
--- /dev/null
+++ b/arch/s390/net/bpf_jit.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * BPF Jit compiler defines
+ *
+ * Copyright IBM Corp. 2012,2015
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *	      Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#ifndef __ARCH_S390_NET_BPF_JIT_H
+#define __ARCH_S390_NET_BPF_JIT_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/filter.h>
+#include <linux/types.h>
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * Stackframe layout (packed stack):
+ *
+ *				    ^ high
+ *	      +---------------+     |
+ *	      | old backchain |     |
+ *	      +---------------+     |
+ *	      |   r15 - r6    |     |
+ *	      +---------------+     |
+ *	      | 4 byte align  |     |
+ *	      | tail_call_cnt |     |
+ * BFP	   -> +===============+     |
+ *	      |		      |     |
+ *	      |   BPF stack   |     |
+ *	      |		      |     |
+ * R15+160 -> +---------------+     |
+ *	      | new backchain |     |
+ * R15+152 -> +---------------+     |
+ *	      | + 152 byte SA |     |
+ * R15	   -> +---------------+     + low
+ *
+ * We get 160 bytes stack space from calling function, but only use
+ * 12 * 8 byte for old backchain, r15..r6, and tail_call_cnt.
+ *
+ * The stack size used by the BPF program ("BPF stack" above) is passed
+ * via "aux->stack_depth".
+ */
+#define STK_SPACE_ADD	(160)
+#define STK_160_UNUSED	(160 - 12 * 8)
+#define STK_OFF		(STK_SPACE_ADD - STK_160_UNUSED)
+
+#define STK_OFF_R6	(160 - 11 * 8)	/* Offset of r6 on stack */
+#define STK_OFF_TCCNT	(160 - 12 * 8)	/* Offset of tail_call_cnt on stack */
+
+#endif /* __ARCH_S390_NET_BPF_JIT_H */
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
new file mode 100644
index 0000000000..e507692e51
--- /dev/null
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -0,0 +1,2537 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * BPF Jit compiler for s390.
+ *
+ * Minimum build requirements:
+ *
+ *  - HAVE_MARCH_Z196_FEATURES: laal, laalg
+ *  - HAVE_MARCH_Z10_FEATURES: msfi, cgrj, clgrj
+ *  - HAVE_MARCH_Z9_109_FEATURES: alfi, llilf, clfi, oilf, nilf
+ *  - 64BIT
+ *
+ * Copyright IBM Corp. 2012,2015
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *	      Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#define KMSG_COMPONENT "bpf_jit"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/netdevice.h>
+#include <linux/filter.h>
+#include <linux/init.h>
+#include <linux/bpf.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <asm/cacheflush.h>
+#include <asm/extable.h>
+#include <asm/dis.h>
+#include <asm/facility.h>
+#include <asm/nospec-branch.h>
+#include <asm/set_memory.h>
+#include <asm/text-patching.h>
+#include "bpf_jit.h"
+
+struct bpf_jit {
+	u32 seen;		/* Flags to remember seen eBPF instructions */
+	u32 seen_reg[16];	/* Array to remember which registers are used */
+	u32 *addrs;		/* Array with relative instruction addresses */
+	u8 *prg_buf;		/* Start of program */
+	int size;		/* Size of program and literal pool */
+	int size_prg;		/* Size of program */
+	int prg;		/* Current position in program */
+	int lit32_start;	/* Start of 32-bit literal pool */
+	int lit32;		/* Current position in 32-bit literal pool */
+	int lit64_start;	/* Start of 64-bit literal pool */
+	int lit64;		/* Current position in 64-bit literal pool */
+	int base_ip;		/* Base address for literal pool */
+	int exit_ip;		/* Address of exit */
+	int r1_thunk_ip;	/* Address of expoline thunk for 'br %r1' */
+	int r14_thunk_ip;	/* Address of expoline thunk for 'br %r14' */
+	int tail_call_start;	/* Tail call start offset */
+	int excnt;		/* Number of exception table entries */
+	int prologue_plt_ret;	/* Return address for prologue hotpatch PLT */
+	int prologue_plt;	/* Start of prologue hotpatch PLT */
+};
+
+#define SEEN_MEM	BIT(0)		/* use mem[] for temporary storage */
+#define SEEN_LITERAL	BIT(1)		/* code uses literals */
+#define SEEN_FUNC	BIT(2)		/* calls C functions */
+#define SEEN_STACK	(SEEN_FUNC | SEEN_MEM)
+
+/*
+ * s390 registers
+ */
+#define REG_W0		(MAX_BPF_JIT_REG + 0)	/* Work register 1 (even) */
+#define REG_W1		(MAX_BPF_JIT_REG + 1)	/* Work register 2 (odd) */
+#define REG_L		(MAX_BPF_JIT_REG + 2)	/* Literal pool register */
+#define REG_15		(MAX_BPF_JIT_REG + 3)	/* Register 15 */
+#define REG_0		REG_W0			/* Register 0 */
+#define REG_1		REG_W1			/* Register 1 */
+#define REG_2		BPF_REG_1		/* Register 2 */
+#define REG_3		BPF_REG_2		/* Register 3 */
+#define REG_4		BPF_REG_3		/* Register 4 */
+#define REG_7		BPF_REG_6		/* Register 7 */
+#define REG_8		BPF_REG_7		/* Register 8 */
+#define REG_14		BPF_REG_0		/* Register 14 */
+
+/*
+ * Mapping of BPF registers to s390 registers
+ */
+static const int reg2hex[] = {
+	/* Return code */
+	[BPF_REG_0]	= 14,
+	/* Function parameters */
+	[BPF_REG_1]	= 2,
+	[BPF_REG_2]	= 3,
+	[BPF_REG_3]	= 4,
+	[BPF_REG_4]	= 5,
+	[BPF_REG_5]	= 6,
+	/* Call saved registers */
+	[BPF_REG_6]	= 7,
+	[BPF_REG_7]	= 8,
+	[BPF_REG_8]	= 9,
+	[BPF_REG_9]	= 10,
+	/* BPF stack pointer */
+	[BPF_REG_FP]	= 13,
+	/* Register for blinding */
+	[BPF_REG_AX]	= 12,
+	/* Work registers for s390x backend */
+	[REG_W0]	= 0,
+	[REG_W1]	= 1,
+	[REG_L]		= 11,
+	[REG_15]	= 15,
+};
+
+static inline u32 reg(u32 dst_reg, u32 src_reg)
+{
+	return reg2hex[dst_reg] << 4 | reg2hex[src_reg];
+}
+
+static inline u32 reg_high(u32 reg)
+{
+	return reg2hex[reg] << 4;
+}
+
+static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
+{
+	u32 r1 = reg2hex[b1];
+
+	if (r1 >= 6 && r1 <= 15 && !jit->seen_reg[r1])
+		jit->seen_reg[r1] = 1;
+}
+
+#define REG_SET_SEEN(b1)					\
+({								\
+	reg_set_seen(jit, b1);					\
+})
+
+#define REG_SEEN(b1) jit->seen_reg[reg2hex[(b1)]]
+
+/*
+ * EMIT macros for code generation
+ */
+
+#define _EMIT2(op)						\
+({								\
+	if (jit->prg_buf)					\
+		*(u16 *) (jit->prg_buf + jit->prg) = (op);	\
+	jit->prg += 2;						\
+})
+
+#define EMIT2(op, b1, b2)					\
+({								\
+	_EMIT2((op) | reg(b1, b2));				\
+	REG_SET_SEEN(b1);					\
+	REG_SET_SEEN(b2);					\
+})
+
+#define _EMIT4(op)						\
+({								\
+	if (jit->prg_buf)					\
+		*(u32 *) (jit->prg_buf + jit->prg) = (op);	\
+	jit->prg += 4;						\
+})
+
+#define EMIT4(op, b1, b2)					\
+({								\
+	_EMIT4((op) | reg(b1, b2));				\
+	REG_SET_SEEN(b1);					\
+	REG_SET_SEEN(b2);					\
+})
+
+#define EMIT4_RRF(op, b1, b2, b3)				\
+({								\
+	_EMIT4((op) | reg_high(b3) << 8 | reg(b1, b2));		\
+	REG_SET_SEEN(b1);					\
+	REG_SET_SEEN(b2);					\
+	REG_SET_SEEN(b3);					\
+})
+
+#define _EMIT4_DISP(op, disp)					\
+({								\
+	unsigned int __disp = (disp) & 0xfff;			\
+	_EMIT4((op) | __disp);					\
+})
+
+#define EMIT4_DISP(op, b1, b2, disp)				\
+({								\
+	_EMIT4_DISP((op) | reg_high(b1) << 16 |			\
+		    reg_high(b2) << 8, (disp));			\
+	REG_SET_SEEN(b1);					\
+	REG_SET_SEEN(b2);					\
+})
+
+#define EMIT4_IMM(op, b1, imm)					\
+({								\
+	unsigned int __imm = (imm) & 0xffff;			\
+	_EMIT4((op) | reg_high(b1) << 16 | __imm);		\
+	REG_SET_SEEN(b1);					\
+})
+
+#define EMIT4_PCREL(op, pcrel)					\
+({								\
+	long __pcrel = ((pcrel) >> 1) & 0xffff;			\
+	_EMIT4((op) | __pcrel);					\
+})
+
+#define EMIT4_PCREL_RIC(op, mask, target)			\
+({								\
+	int __rel = ((target) - jit->prg) / 2;			\
+	_EMIT4((op) | (mask) << 20 | (__rel & 0xffff));		\
+})
+
+#define _EMIT6(op1, op2)					\
+({								\
+	if (jit->prg_buf) {					\
+		*(u32 *) (jit->prg_buf + jit->prg) = (op1);	\
+		*(u16 *) (jit->prg_buf + jit->prg + 4) = (op2);	\
+	}							\
+	jit->prg += 6;						\
+})
+
+#define _EMIT6_DISP(op1, op2, disp)				\
+({								\
+	unsigned int __disp = (disp) & 0xfff;			\
+	_EMIT6((op1) | __disp, op2);				\
+})
+
+#define _EMIT6_DISP_LH(op1, op2, disp)				\
+({								\
+	u32 _disp = (u32) (disp);				\
+	unsigned int __disp_h = _disp & 0xff000;		\
+	unsigned int __disp_l = _disp & 0x00fff;		\
+	_EMIT6((op1) | __disp_l, (op2) | __disp_h >> 4);	\
+})
+
+#define EMIT6_DISP_LH(op1, op2, b1, b2, b3, disp)		\
+({								\
+	_EMIT6_DISP_LH((op1) | reg(b1, b2) << 16 |		\
+		       reg_high(b3) << 8, op2, disp);		\
+	REG_SET_SEEN(b1);					\
+	REG_SET_SEEN(b2);					\
+	REG_SET_SEEN(b3);					\
+})
+
+#define EMIT6_PCREL_RIEB(op1, op2, b1, b2, mask, target)	\
+({								\
+	unsigned int rel = (int)((target) - jit->prg) / 2;	\
+	_EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff),	\
+	       (op2) | (mask) << 12);				\
+	REG_SET_SEEN(b1);					\
+	REG_SET_SEEN(b2);					\
+})
+
+#define EMIT6_PCREL_RIEC(op1, op2, b1, imm, mask, target)	\
+({								\
+	unsigned int rel = (int)((target) - jit->prg) / 2;	\
+	_EMIT6((op1) | (reg_high(b1) | (mask)) << 16 |		\
+		(rel & 0xffff), (op2) | ((imm) & 0xff) << 8);	\
+	REG_SET_SEEN(b1);					\
+	BUILD_BUG_ON(((unsigned long) (imm)) > 0xff);		\
+})
+
+#define EMIT6_PCREL(op1, op2, b1, b2, i, off, mask)		\
+({								\
+	int rel = (addrs[(i) + (off) + 1] - jit->prg) / 2;	\
+	_EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), (op2) | (mask));\
+	REG_SET_SEEN(b1);					\
+	REG_SET_SEEN(b2);					\
+})
+
+#define EMIT6_PCREL_RILB(op, b, target)				\
+({								\
+	unsigned int rel = (int)((target) - jit->prg) / 2;	\
+	_EMIT6((op) | reg_high(b) << 16 | rel >> 16, rel & 0xffff);\
+	REG_SET_SEEN(b);					\
+})
+
+#define EMIT6_PCREL_RIL(op, target)				\
+({								\
+	unsigned int rel = (int)((target) - jit->prg) / 2;	\
+	_EMIT6((op) | rel >> 16, rel & 0xffff);			\
+})
+
+#define EMIT6_PCREL_RILC(op, mask, target)			\
+({								\
+	EMIT6_PCREL_RIL((op) | (mask) << 20, (target));		\
+})
+
+#define _EMIT6_IMM(op, imm)					\
+({								\
+	unsigned int __imm = (imm);				\
+	_EMIT6((op) | (__imm >> 16), __imm & 0xffff);		\
+})
+
+#define EMIT6_IMM(op, b1, imm)					\
+({								\
+	_EMIT6_IMM((op) | reg_high(b1) << 16, imm);		\
+	REG_SET_SEEN(b1);					\
+})
+
+#define _EMIT_CONST_U32(val)					\
+({								\
+	unsigned int ret;					\
+	ret = jit->lit32;					\
+	if (jit->prg_buf)					\
+		*(u32 *)(jit->prg_buf + jit->lit32) = (u32)(val);\
+	jit->lit32 += 4;					\
+	ret;							\
+})
+
+#define EMIT_CONST_U32(val)					\
+({								\
+	jit->seen |= SEEN_LITERAL;				\
+	_EMIT_CONST_U32(val) - jit->base_ip;			\
+})
+
+#define _EMIT_CONST_U64(val)					\
+({								\
+	unsigned int ret;					\
+	ret = jit->lit64;					\
+	if (jit->prg_buf)					\
+		*(u64 *)(jit->prg_buf + jit->lit64) = (u64)(val);\
+	jit->lit64 += 8;					\
+	ret;							\
+})
+
+#define EMIT_CONST_U64(val)					\
+({								\
+	jit->seen |= SEEN_LITERAL;				\
+	_EMIT_CONST_U64(val) - jit->base_ip;			\
+})
+
+#define EMIT_ZERO(b1)						\
+({								\
+	if (!fp->aux->verifier_zext) {				\
+		/* llgfr %dst,%dst (zero extend to 64 bit) */	\
+		EMIT4(0xb9160000, b1, b1);			\
+		REG_SET_SEEN(b1);				\
+	}							\
+})
+
+/*
+ * Return whether this is the first pass. The first pass is special, since we
+ * don't know any sizes yet, and thus must be conservative.
+ */
+static bool is_first_pass(struct bpf_jit *jit)
+{
+	return jit->size == 0;
+}
+
+/*
+ * Return whether this is the code generation pass. The code generation pass is
+ * special, since we should change as little as possible.
+ */
+static bool is_codegen_pass(struct bpf_jit *jit)
+{
+	return jit->prg_buf;
+}
+
+/*
+ * Return whether "rel" can be encoded as a short PC-relative offset
+ */
+static bool is_valid_rel(int rel)
+{
+	return rel >= -65536 && rel <= 65534;
+}
+
+/*
+ * Return whether "off" can be reached using a short PC-relative offset
+ */
+static bool can_use_rel(struct bpf_jit *jit, int off)
+{
+	return is_valid_rel(off - jit->prg);
+}
+
+/*
+ * Return whether given displacement can be encoded using
+ * Long-Displacement Facility
+ */
+static bool is_valid_ldisp(int disp)
+{
+	return disp >= -524288 && disp <= 524287;
+}
+
+/*
+ * Return whether the next 32-bit literal pool entry can be referenced using
+ * Long-Displacement Facility
+ */
+static bool can_use_ldisp_for_lit32(struct bpf_jit *jit)
+{
+	return is_valid_ldisp(jit->lit32 - jit->base_ip);
+}
+
+/*
+ * Return whether the next 64-bit literal pool entry can be referenced using
+ * Long-Displacement Facility
+ */
+static bool can_use_ldisp_for_lit64(struct bpf_jit *jit)
+{
+	return is_valid_ldisp(jit->lit64 - jit->base_ip);
+}
+
+/*
+ * Fill whole space with illegal instructions
+ */
+static void jit_fill_hole(void *area, unsigned int size)
+{
+	memset(area, 0, size);
+}
+
+/*
+ * Save registers from "rs" (register start) to "re" (register end) on stack
+ */
+static void save_regs(struct bpf_jit *jit, u32 rs, u32 re)
+{
+	u32 off = STK_OFF_R6 + (rs - 6) * 8;
+
+	if (rs == re)
+		/* stg %rs,off(%r15) */
+		_EMIT6(0xe300f000 | rs << 20 | off, 0x0024);
+	else
+		/* stmg %rs,%re,off(%r15) */
+		_EMIT6_DISP(0xeb00f000 | rs << 20 | re << 16, 0x0024, off);
+}
+
+/*
+ * Restore registers from "rs" (register start) to "re" (register end) on stack
+ */
+static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re, u32 stack_depth)
+{
+	u32 off = STK_OFF_R6 + (rs - 6) * 8;
+
+	if (jit->seen & SEEN_STACK)
+		off += STK_OFF + stack_depth;
+
+	if (rs == re)
+		/* lg %rs,off(%r15) */
+		_EMIT6(0xe300f000 | rs << 20 | off, 0x0004);
+	else
+		/* lmg %rs,%re,off(%r15) */
+		_EMIT6_DISP(0xeb00f000 | rs << 20 | re << 16, 0x0004, off);
+}
+
+/*
+ * Return first seen register (from start)
+ */
+static int get_start(struct bpf_jit *jit, int start)
+{
+	int i;
+
+	for (i = start; i <= 15; i++) {
+		if (jit->seen_reg[i])
+			return i;
+	}
+	return 0;
+}
+
+/*
+ * Return last seen register (from start) (gap >= 2)
+ */
+static int get_end(struct bpf_jit *jit, int start)
+{
+	int i;
+
+	for (i = start; i < 15; i++) {
+		if (!jit->seen_reg[i] && !jit->seen_reg[i + 1])
+			return i - 1;
+	}
+	return jit->seen_reg[15] ? 15 : 14;
+}
+
+#define REGS_SAVE	1
+#define REGS_RESTORE	0
+/*
+ * Save and restore clobbered registers (6-15) on stack.
+ * We save/restore registers in chunks with gap >= 2 registers.
+ */
+static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth)
+{
+	const int last = 15, save_restore_size = 6;
+	int re = 6, rs;
+
+	if (is_first_pass(jit)) {
+		/*
+		 * We don't know yet which registers are used. Reserve space
+		 * conservatively.
+		 */
+		jit->prg += (last - re + 1) * save_restore_size;
+		return;
+	}
+
+	do {
+		rs = get_start(jit, re);
+		if (!rs)
+			break;
+		re = get_end(jit, rs + 1);
+		if (op == REGS_SAVE)
+			save_regs(jit, rs, re);
+		else
+			restore_regs(jit, rs, re, stack_depth);
+		re++;
+	} while (re <= last);
+}
+
+static void bpf_skip(struct bpf_jit *jit, int size)
+{
+	if (size >= 6 && !is_valid_rel(size)) {
+		/* brcl 0xf,size */
+		EMIT6_PCREL_RIL(0xc0f4000000, size);
+		size -= 6;
+	} else if (size >= 4 && is_valid_rel(size)) {
+		/* brc 0xf,size */
+		EMIT4_PCREL(0xa7f40000, size);
+		size -= 4;
+	}
+	while (size >= 2) {
+		/* bcr 0,%0 */
+		_EMIT2(0x0700);
+		size -= 2;
+	}
+}
+
+/*
+ * PLT for hotpatchable calls. The calling convention is the same as for the
+ * ftrace hotpatch trampolines: %r0 is return address, %r1 is clobbered.
+ */
+extern const char bpf_plt[];
+extern const char bpf_plt_ret[];
+extern const char bpf_plt_target[];
+extern const char bpf_plt_end[];
+#define BPF_PLT_SIZE 32
+asm(
+	".pushsection .rodata\n"
+	"	.balign 8\n"
+	"bpf_plt:\n"
+	"	lgrl %r0,bpf_plt_ret\n"
+	"	lgrl %r1,bpf_plt_target\n"
+	"	br %r1\n"
+	"	.balign 8\n"
+	"bpf_plt_ret: .quad 0\n"
+	"bpf_plt_target: .quad 0\n"
+	"bpf_plt_end:\n"
+	"	.popsection\n"
+);
+
+static void bpf_jit_plt(void *plt, void *ret, void *target)
+{
+	memcpy(plt, bpf_plt, BPF_PLT_SIZE);
+	*(void **)((char *)plt + (bpf_plt_ret - bpf_plt)) = ret;
+	*(void **)((char *)plt + (bpf_plt_target - bpf_plt)) = target ?: ret;
+}
+
+/*
+ * Emit function prologue
+ *
+ * Save registers and create stack frame if necessary.
+ * See stack frame layout description in "bpf_jit.h"!
+ */
+static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp,
+			     u32 stack_depth)
+{
+	/* No-op for hotpatching */
+	/* brcl 0,prologue_plt */
+	EMIT6_PCREL_RILC(0xc0040000, 0, jit->prologue_plt);
+	jit->prologue_plt_ret = jit->prg;
+
+	if (fp->aux->func_idx == 0) {
+		/* Initialize the tail call counter in the main program. */
+		/* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */
+		_EMIT6(0xd703f000 | STK_OFF_TCCNT, 0xf000 | STK_OFF_TCCNT);
+	} else {
+		/*
+		 * Skip the tail call counter initialization in subprograms.
+		 * Insert nops in order to have tail_call_start at a
+		 * predictable offset.
+		 */
+		bpf_skip(jit, 6);
+	}
+	/* Tail calls have to skip above initialization */
+	jit->tail_call_start = jit->prg;
+	/* Save registers */
+	save_restore_regs(jit, REGS_SAVE, stack_depth);
+	/* Setup literal pool */
+	if (is_first_pass(jit) || (jit->seen & SEEN_LITERAL)) {
+		if (!is_first_pass(jit) &&
+		    is_valid_ldisp(jit->size - (jit->prg + 2))) {
+			/* basr %l,0 */
+			EMIT2(0x0d00, REG_L, REG_0);
+			jit->base_ip = jit->prg;
+		} else {
+			/* larl %l,lit32_start */
+			EMIT6_PCREL_RILB(0xc0000000, REG_L, jit->lit32_start);
+			jit->base_ip = jit->lit32_start;
+		}
+	}
+	/* Setup stack and backchain */
+	if (is_first_pass(jit) || (jit->seen & SEEN_STACK)) {
+		if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
+			/* lgr %w1,%r15 (backchain) */
+			EMIT4(0xb9040000, REG_W1, REG_15);
+		/* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */
+		EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED);
+		/* aghi %r15,-STK_OFF */
+		EMIT4_IMM(0xa70b0000, REG_15, -(STK_OFF + stack_depth));
+		if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
+			/* stg %w1,152(%r15) (backchain) */
+			EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
+				      REG_15, 152);
+	}
+}
+
+/*
+ * Emit an expoline for a jump that follows
+ */
+static void emit_expoline(struct bpf_jit *jit)
+{
+	/* exrl %r0,.+10 */
+	EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10);
+	/* j . */
+	EMIT4_PCREL(0xa7f40000, 0);
+}
+
+/*
+ * Emit __s390_indirect_jump_r1 thunk if necessary
+ */
+static void emit_r1_thunk(struct bpf_jit *jit)
+{
+	if (nospec_uses_trampoline()) {
+		jit->r1_thunk_ip = jit->prg;
+		emit_expoline(jit);
+		/* br %r1 */
+		_EMIT2(0x07f1);
+	}
+}
+
+/*
+ * Call r1 either directly or via __s390_indirect_jump_r1 thunk
+ */
+static void call_r1(struct bpf_jit *jit)
+{
+	if (nospec_uses_trampoline())
+		/* brasl %r14,__s390_indirect_jump_r1 */
+		EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip);
+	else
+		/* basr %r14,%r1 */
+		EMIT2(0x0d00, REG_14, REG_1);
+}
+
+/*
+ * Function epilogue
+ */
+static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
+{
+	jit->exit_ip = jit->prg;
+	/* Load exit code: lgr %r2,%b0 */
+	EMIT4(0xb9040000, REG_2, BPF_REG_0);
+	/* Restore registers */
+	save_restore_regs(jit, REGS_RESTORE, stack_depth);
+	if (nospec_uses_trampoline()) {
+		jit->r14_thunk_ip = jit->prg;
+		/* Generate __s390_indirect_jump_r14 thunk */
+		emit_expoline(jit);
+	}
+	/* br %r14 */
+	_EMIT2(0x07fe);
+
+	if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
+		emit_r1_thunk(jit);
+
+	jit->prg = ALIGN(jit->prg, 8);
+	jit->prologue_plt = jit->prg;
+	if (jit->prg_buf)
+		bpf_jit_plt(jit->prg_buf + jit->prg,
+			    jit->prg_buf + jit->prologue_plt_ret, NULL);
+	jit->prg += BPF_PLT_SIZE;
+}
+
+static int get_probe_mem_regno(const u8 *insn)
+{
+	/*
+	 * insn must point to llgc, llgh, llgf or lg, which have destination
+	 * register at the same position.
+	 */
+	if (insn[0] != 0xe3) /* common llgc, llgh, llgf and lg prefix */
+		return -1;
+	if (insn[5] != 0x90 && /* llgc */
+	    insn[5] != 0x91 && /* llgh */
+	    insn[5] != 0x16 && /* llgf */
+	    insn[5] != 0x04) /* lg */
+		return -1;
+	return insn[1] >> 4;
+}
+
+bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs)
+{
+	regs->psw.addr = extable_fixup(x);
+	regs->gprs[x->data] = 0;
+	return true;
+}
+
+static int bpf_jit_probe_mem(struct bpf_jit *jit, struct bpf_prog *fp,
+			     int probe_prg, int nop_prg)
+{
+	struct exception_table_entry *ex;
+	int reg, prg;
+	s64 delta;
+	u8 *insn;
+	int i;
+
+	if (!fp->aux->extable)
+		/* Do nothing during early JIT passes. */
+		return 0;
+	insn = jit->prg_buf + probe_prg;
+	reg = get_probe_mem_regno(insn);
+	if (WARN_ON_ONCE(reg < 0))
+		/* JIT bug - unexpected probe instruction. */
+		return -1;
+	if (WARN_ON_ONCE(probe_prg + insn_length(*insn) != nop_prg))
+		/* JIT bug - gap between probe and nop instructions. */
+		return -1;
+	for (i = 0; i < 2; i++) {
+		if (WARN_ON_ONCE(jit->excnt >= fp->aux->num_exentries))
+			/* Verifier bug - not enough entries. */
+			return -1;
+		ex = &fp->aux->extable[jit->excnt];
+		/* Add extable entries for probe and nop instructions. */
+		prg = i == 0 ? probe_prg : nop_prg;
+		delta = jit->prg_buf + prg - (u8 *)&ex->insn;
+		if (WARN_ON_ONCE(delta < INT_MIN || delta > INT_MAX))
+			/* JIT bug - code and extable must be close. */
+			return -1;
+		ex->insn = delta;
+		/*
+		 * Always land on the nop. Note that extable infrastructure
+		 * ignores fixup field, it is handled by ex_handler_bpf().
+		 */
+		delta = jit->prg_buf + nop_prg - (u8 *)&ex->fixup;
+		if (WARN_ON_ONCE(delta < INT_MIN || delta > INT_MAX))
+			/* JIT bug - landing pad and extable must be close. */
+			return -1;
+		ex->fixup = delta;
+		ex->type = EX_TYPE_BPF;
+		ex->data = reg;
+		jit->excnt++;
+	}
+	return 0;
+}
+
+/*
+ * Sign-extend the register if necessary
+ */
+static int sign_extend(struct bpf_jit *jit, int r, u8 size, u8 flags)
+{
+	if (!(flags & BTF_FMODEL_SIGNED_ARG))
+		return 0;
+
+	switch (size) {
+	case 1:
+		/* lgbr %r,%r */
+		EMIT4(0xb9060000, r, r);
+		return 0;
+	case 2:
+		/* lghr %r,%r */
+		EMIT4(0xb9070000, r, r);
+		return 0;
+	case 4:
+		/* lgfr %r,%r */
+		EMIT4(0xb9140000, r, r);
+		return 0;
+	case 8:
+		return 0;
+	default:
+		return -1;
+	}
+}
+
+/*
+ * Compile one eBPF instruction into s390x code
+ *
+ * NOTE: Use noinline because for gcov (-fprofile-arcs) gcc allocates a lot of
+ * stack space for the large switch statement.
+ */
+static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
+				 int i, bool extra_pass, u32 stack_depth)
+{
+	struct bpf_insn *insn = &fp->insnsi[i];
+	u32 dst_reg = insn->dst_reg;
+	u32 src_reg = insn->src_reg;
+	int last, insn_count = 1;
+	u32 *addrs = jit->addrs;
+	s32 imm = insn->imm;
+	s16 off = insn->off;
+	int probe_prg = -1;
+	unsigned int mask;
+	int nop_prg;
+	int err;
+
+	if (BPF_CLASS(insn->code) == BPF_LDX &&
+	    BPF_MODE(insn->code) == BPF_PROBE_MEM)
+		probe_prg = jit->prg;
+
+	switch (insn->code) {
+	/*
+	 * BPF_MOV
+	 */
+	case BPF_ALU | BPF_MOV | BPF_X: /* dst = (u32) src */
+		/* llgfr %dst,%src */
+		EMIT4(0xb9160000, dst_reg, src_reg);
+		if (insn_is_zext(&insn[1]))
+			insn_count = 2;
+		break;
+	case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
+		/* lgr %dst,%src */
+		EMIT4(0xb9040000, dst_reg, src_reg);
+		break;
+	case BPF_ALU | BPF_MOV | BPF_K: /* dst = (u32) imm */
+		/* llilf %dst,imm */
+		EMIT6_IMM(0xc00f0000, dst_reg, imm);
+		if (insn_is_zext(&insn[1]))
+			insn_count = 2;
+		break;
+	case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = imm */
+		/* lgfi %dst,imm */
+		EMIT6_IMM(0xc0010000, dst_reg, imm);
+		break;
+	/*
+	 * BPF_LD 64
+	 */
+	case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
+	{
+		/* 16 byte instruction that uses two 'struct bpf_insn' */
+		u64 imm64;
+
+		imm64 = (u64)(u32) insn[0].imm | ((u64)(u32) insn[1].imm) << 32;
+		/* lgrl %dst,imm */
+		EMIT6_PCREL_RILB(0xc4080000, dst_reg, _EMIT_CONST_U64(imm64));
+		insn_count = 2;
+		break;
+	}
+	/*
+	 * BPF_ADD
+	 */
+	case BPF_ALU | BPF_ADD | BPF_X: /* dst = (u32) dst + (u32) src */
+		/* ar %dst,%src */
+		EMIT2(0x1a00, dst_reg, src_reg);
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_ADD | BPF_X: /* dst = dst + src */
+		/* agr %dst,%src */
+		EMIT4(0xb9080000, dst_reg, src_reg);
+		break;
+	case BPF_ALU | BPF_ADD | BPF_K: /* dst = (u32) dst + (u32) imm */
+		if (imm != 0) {
+			/* alfi %dst,imm */
+			EMIT6_IMM(0xc20b0000, dst_reg, imm);
+		}
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_ADD | BPF_K: /* dst = dst + imm */
+		if (!imm)
+			break;
+		/* agfi %dst,imm */
+		EMIT6_IMM(0xc2080000, dst_reg, imm);
+		break;
+	/*
+	 * BPF_SUB
+	 */
+	case BPF_ALU | BPF_SUB | BPF_X: /* dst = (u32) dst - (u32) src */
+		/* sr %dst,%src */
+		EMIT2(0x1b00, dst_reg, src_reg);
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_SUB | BPF_X: /* dst = dst - src */
+		/* sgr %dst,%src */
+		EMIT4(0xb9090000, dst_reg, src_reg);
+		break;
+	case BPF_ALU | BPF_SUB | BPF_K: /* dst = (u32) dst - (u32) imm */
+		if (imm != 0) {
+			/* alfi %dst,-imm */
+			EMIT6_IMM(0xc20b0000, dst_reg, -imm);
+		}
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_SUB | BPF_K: /* dst = dst - imm */
+		if (!imm)
+			break;
+		if (imm == -0x80000000) {
+			/* algfi %dst,0x80000000 */
+			EMIT6_IMM(0xc20a0000, dst_reg, 0x80000000);
+		} else {
+			/* agfi %dst,-imm */
+			EMIT6_IMM(0xc2080000, dst_reg, -imm);
+		}
+		break;
+	/*
+	 * BPF_MUL
+	 */
+	case BPF_ALU | BPF_MUL | BPF_X: /* dst = (u32) dst * (u32) src */
+		/* msr %dst,%src */
+		EMIT4(0xb2520000, dst_reg, src_reg);
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_MUL | BPF_X: /* dst = dst * src */
+		/* msgr %dst,%src */
+		EMIT4(0xb90c0000, dst_reg, src_reg);
+		break;
+	case BPF_ALU | BPF_MUL | BPF_K: /* dst = (u32) dst * (u32) imm */
+		if (imm != 1) {
+			/* msfi %r5,imm */
+			EMIT6_IMM(0xc2010000, dst_reg, imm);
+		}
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_MUL | BPF_K: /* dst = dst * imm */
+		if (imm == 1)
+			break;
+		/* msgfi %dst,imm */
+		EMIT6_IMM(0xc2000000, dst_reg, imm);
+		break;
+	/*
+	 * BPF_DIV / BPF_MOD
+	 */
+	case BPF_ALU | BPF_DIV | BPF_X: /* dst = (u32) dst / (u32) src */
+	case BPF_ALU | BPF_MOD | BPF_X: /* dst = (u32) dst % (u32) src */
+	{
+		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
+
+		/* lhi %w0,0 */
+		EMIT4_IMM(0xa7080000, REG_W0, 0);
+		/* lr %w1,%dst */
+		EMIT2(0x1800, REG_W1, dst_reg);
+		/* dlr %w0,%src */
+		EMIT4(0xb9970000, REG_W0, src_reg);
+		/* llgfr %dst,%rc */
+		EMIT4(0xb9160000, dst_reg, rc_reg);
+		if (insn_is_zext(&insn[1]))
+			insn_count = 2;
+		break;
+	}
+	case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / src */
+	case BPF_ALU64 | BPF_MOD | BPF_X: /* dst = dst % src */
+	{
+		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
+
+		/* lghi %w0,0 */
+		EMIT4_IMM(0xa7090000, REG_W0, 0);
+		/* lgr %w1,%dst */
+		EMIT4(0xb9040000, REG_W1, dst_reg);
+		/* dlgr %w0,%dst */
+		EMIT4(0xb9870000, REG_W0, src_reg);
+		/* lgr %dst,%rc */
+		EMIT4(0xb9040000, dst_reg, rc_reg);
+		break;
+	}
+	case BPF_ALU | BPF_DIV | BPF_K: /* dst = (u32) dst / (u32) imm */
+	case BPF_ALU | BPF_MOD | BPF_K: /* dst = (u32) dst % (u32) imm */
+	{
+		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
+
+		if (imm == 1) {
+			if (BPF_OP(insn->code) == BPF_MOD)
+				/* lhgi %dst,0 */
+				EMIT4_IMM(0xa7090000, dst_reg, 0);
+			else
+				EMIT_ZERO(dst_reg);
+			break;
+		}
+		/* lhi %w0,0 */
+		EMIT4_IMM(0xa7080000, REG_W0, 0);
+		/* lr %w1,%dst */
+		EMIT2(0x1800, REG_W1, dst_reg);
+		if (!is_first_pass(jit) && can_use_ldisp_for_lit32(jit)) {
+			/* dl %w0,<d(imm)>(%l) */
+			EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0, REG_L,
+				      EMIT_CONST_U32(imm));
+		} else {
+			/* lgfrl %dst,imm */
+			EMIT6_PCREL_RILB(0xc40c0000, dst_reg,
+					 _EMIT_CONST_U32(imm));
+			jit->seen |= SEEN_LITERAL;
+			/* dlr %w0,%dst */
+			EMIT4(0xb9970000, REG_W0, dst_reg);
+		}
+		/* llgfr %dst,%rc */
+		EMIT4(0xb9160000, dst_reg, rc_reg);
+		if (insn_is_zext(&insn[1]))
+			insn_count = 2;
+		break;
+	}
+	case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / imm */
+	case BPF_ALU64 | BPF_MOD | BPF_K: /* dst = dst % imm */
+	{
+		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
+
+		if (imm == 1) {
+			if (BPF_OP(insn->code) == BPF_MOD)
+				/* lhgi %dst,0 */
+				EMIT4_IMM(0xa7090000, dst_reg, 0);
+			break;
+		}
+		/* lghi %w0,0 */
+		EMIT4_IMM(0xa7090000, REG_W0, 0);
+		/* lgr %w1,%dst */
+		EMIT4(0xb9040000, REG_W1, dst_reg);
+		if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
+			/* dlg %w0,<d(imm)>(%l) */
+			EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L,
+				      EMIT_CONST_U64(imm));
+		} else {
+			/* lgrl %dst,imm */
+			EMIT6_PCREL_RILB(0xc4080000, dst_reg,
+					 _EMIT_CONST_U64(imm));
+			jit->seen |= SEEN_LITERAL;
+			/* dlgr %w0,%dst */
+			EMIT4(0xb9870000, REG_W0, dst_reg);
+		}
+		/* lgr %dst,%rc */
+		EMIT4(0xb9040000, dst_reg, rc_reg);
+		break;
+	}
+	/*
+	 * BPF_AND
+	 */
+	case BPF_ALU | BPF_AND | BPF_X: /* dst = (u32) dst & (u32) src */
+		/* nr %dst,%src */
+		EMIT2(0x1400, dst_reg, src_reg);
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */
+		/* ngr %dst,%src */
+		EMIT4(0xb9800000, dst_reg, src_reg);
+		break;
+	case BPF_ALU | BPF_AND | BPF_K: /* dst = (u32) dst & (u32) imm */
+		/* nilf %dst,imm */
+		EMIT6_IMM(0xc00b0000, dst_reg, imm);
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */
+		if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
+			/* ng %dst,<d(imm)>(%l) */
+			EMIT6_DISP_LH(0xe3000000, 0x0080,
+				      dst_reg, REG_0, REG_L,
+				      EMIT_CONST_U64(imm));
+		} else {
+			/* lgrl %w0,imm */
+			EMIT6_PCREL_RILB(0xc4080000, REG_W0,
+					 _EMIT_CONST_U64(imm));
+			jit->seen |= SEEN_LITERAL;
+			/* ngr %dst,%w0 */
+			EMIT4(0xb9800000, dst_reg, REG_W0);
+		}
+		break;
+	/*
+	 * BPF_OR
+	 */
+	case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */
+		/* or %dst,%src */
+		EMIT2(0x1600, dst_reg, src_reg);
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */
+		/* ogr %dst,%src */
+		EMIT4(0xb9810000, dst_reg, src_reg);
+		break;
+	case BPF_ALU | BPF_OR | BPF_K: /* dst = (u32) dst | (u32) imm */
+		/* oilf %dst,imm */
+		EMIT6_IMM(0xc00d0000, dst_reg, imm);
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_OR | BPF_K: /* dst = dst | imm */
+		if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
+			/* og %dst,<d(imm)>(%l) */
+			EMIT6_DISP_LH(0xe3000000, 0x0081,
+				      dst_reg, REG_0, REG_L,
+				      EMIT_CONST_U64(imm));
+		} else {
+			/* lgrl %w0,imm */
+			EMIT6_PCREL_RILB(0xc4080000, REG_W0,
+					 _EMIT_CONST_U64(imm));
+			jit->seen |= SEEN_LITERAL;
+			/* ogr %dst,%w0 */
+			EMIT4(0xb9810000, dst_reg, REG_W0);
+		}
+		break;
+	/*
+	 * BPF_XOR
+	 */
+	case BPF_ALU | BPF_XOR | BPF_X: /* dst = (u32) dst ^ (u32) src */
+		/* xr %dst,%src */
+		EMIT2(0x1700, dst_reg, src_reg);
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_XOR | BPF_X: /* dst = dst ^ src */
+		/* xgr %dst,%src */
+		EMIT4(0xb9820000, dst_reg, src_reg);
+		break;
+	case BPF_ALU | BPF_XOR | BPF_K: /* dst = (u32) dst ^ (u32) imm */
+		if (imm != 0) {
+			/* xilf %dst,imm */
+			EMIT6_IMM(0xc0070000, dst_reg, imm);
+		}
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_XOR | BPF_K: /* dst = dst ^ imm */
+		if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
+			/* xg %dst,<d(imm)>(%l) */
+			EMIT6_DISP_LH(0xe3000000, 0x0082,
+				      dst_reg, REG_0, REG_L,
+				      EMIT_CONST_U64(imm));
+		} else {
+			/* lgrl %w0,imm */
+			EMIT6_PCREL_RILB(0xc4080000, REG_W0,
+					 _EMIT_CONST_U64(imm));
+			jit->seen |= SEEN_LITERAL;
+			/* xgr %dst,%w0 */
+			EMIT4(0xb9820000, dst_reg, REG_W0);
+		}
+		break;
+	/*
+	 * BPF_LSH
+	 */
+	case BPF_ALU | BPF_LSH | BPF_X: /* dst = (u32) dst << (u32) src */
+		/* sll %dst,0(%src) */
+		EMIT4_DISP(0x89000000, dst_reg, src_reg, 0);
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_LSH | BPF_X: /* dst = dst << src */
+		/* sllg %dst,%dst,0(%src) */
+		EMIT6_DISP_LH(0xeb000000, 0x000d, dst_reg, dst_reg, src_reg, 0);
+		break;
+	case BPF_ALU | BPF_LSH | BPF_K: /* dst = (u32) dst << (u32) imm */
+		if (imm != 0) {
+			/* sll %dst,imm(%r0) */
+			EMIT4_DISP(0x89000000, dst_reg, REG_0, imm);
+		}
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_LSH | BPF_K: /* dst = dst << imm */
+		if (imm == 0)
+			break;
+		/* sllg %dst,%dst,imm(%r0) */
+		EMIT6_DISP_LH(0xeb000000, 0x000d, dst_reg, dst_reg, REG_0, imm);
+		break;
+	/*
+	 * BPF_RSH
+	 */
+	case BPF_ALU | BPF_RSH | BPF_X: /* dst = (u32) dst >> (u32) src */
+		/* srl %dst,0(%src) */
+		EMIT4_DISP(0x88000000, dst_reg, src_reg, 0);
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_RSH | BPF_X: /* dst = dst >> src */
+		/* srlg %dst,%dst,0(%src) */
+		EMIT6_DISP_LH(0xeb000000, 0x000c, dst_reg, dst_reg, src_reg, 0);
+		break;
+	case BPF_ALU | BPF_RSH | BPF_K: /* dst = (u32) dst >> (u32) imm */
+		if (imm != 0) {
+			/* srl %dst,imm(%r0) */
+			EMIT4_DISP(0x88000000, dst_reg, REG_0, imm);
+		}
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_RSH | BPF_K: /* dst = dst >> imm */
+		if (imm == 0)
+			break;
+		/* srlg %dst,%dst,imm(%r0) */
+		EMIT6_DISP_LH(0xeb000000, 0x000c, dst_reg, dst_reg, REG_0, imm);
+		break;
+	/*
+	 * BPF_ARSH
+	 */
+	case BPF_ALU | BPF_ARSH | BPF_X: /* ((s32) dst) >>= src */
+		/* sra %dst,%dst,0(%src) */
+		EMIT4_DISP(0x8a000000, dst_reg, src_reg, 0);
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_ARSH | BPF_X: /* ((s64) dst) >>= src */
+		/* srag %dst,%dst,0(%src) */
+		EMIT6_DISP_LH(0xeb000000, 0x000a, dst_reg, dst_reg, src_reg, 0);
+		break;
+	case BPF_ALU | BPF_ARSH | BPF_K: /* ((s32) dst >> imm */
+		if (imm != 0) {
+			/* sra %dst,imm(%r0) */
+			EMIT4_DISP(0x8a000000, dst_reg, REG_0, imm);
+		}
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_ARSH | BPF_K: /* ((s64) dst) >>= imm */
+		if (imm == 0)
+			break;
+		/* srag %dst,%dst,imm(%r0) */
+		EMIT6_DISP_LH(0xeb000000, 0x000a, dst_reg, dst_reg, REG_0, imm);
+		break;
+	/*
+	 * BPF_NEG
+	 */
+	case BPF_ALU | BPF_NEG: /* dst = (u32) -dst */
+		/* lcr %dst,%dst */
+		EMIT2(0x1300, dst_reg, dst_reg);
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_NEG: /* dst = -dst */
+		/* lcgr %dst,%dst */
+		EMIT4(0xb9030000, dst_reg, dst_reg);
+		break;
+	/*
+	 * BPF_FROM_BE/LE
+	 */
+	case BPF_ALU | BPF_END | BPF_FROM_BE:
+		/* s390 is big endian, therefore only clear high order bytes */
+		switch (imm) {
+		case 16: /* dst = (u16) cpu_to_be16(dst) */
+			/* llghr %dst,%dst */
+			EMIT4(0xb9850000, dst_reg, dst_reg);
+			if (insn_is_zext(&insn[1]))
+				insn_count = 2;
+			break;
+		case 32: /* dst = (u32) cpu_to_be32(dst) */
+			if (!fp->aux->verifier_zext)
+				/* llgfr %dst,%dst */
+				EMIT4(0xb9160000, dst_reg, dst_reg);
+			break;
+		case 64: /* dst = (u64) cpu_to_be64(dst) */
+			break;
+		}
+		break;
+	case BPF_ALU | BPF_END | BPF_FROM_LE:
+		switch (imm) {
+		case 16: /* dst = (u16) cpu_to_le16(dst) */
+			/* lrvr %dst,%dst */
+			EMIT4(0xb91f0000, dst_reg, dst_reg);
+			/* srl %dst,16(%r0) */
+			EMIT4_DISP(0x88000000, dst_reg, REG_0, 16);
+			/* llghr %dst,%dst */
+			EMIT4(0xb9850000, dst_reg, dst_reg);
+			if (insn_is_zext(&insn[1]))
+				insn_count = 2;
+			break;
+		case 32: /* dst = (u32) cpu_to_le32(dst) */
+			/* lrvr %dst,%dst */
+			EMIT4(0xb91f0000, dst_reg, dst_reg);
+			if (!fp->aux->verifier_zext)
+				/* llgfr %dst,%dst */
+				EMIT4(0xb9160000, dst_reg, dst_reg);
+			break;
+		case 64: /* dst = (u64) cpu_to_le64(dst) */
+			/* lrvgr %dst,%dst */
+			EMIT4(0xb90f0000, dst_reg, dst_reg);
+			break;
+		}
+		break;
+	/*
+	 * BPF_NOSPEC (speculation barrier)
+	 */
+	case BPF_ST | BPF_NOSPEC:
+		break;
+	/*
+	 * BPF_ST(X)
+	 */
+	case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src_reg */
+		/* stcy %src,off(%dst) */
+		EMIT6_DISP_LH(0xe3000000, 0x0072, src_reg, dst_reg, REG_0, off);
+		jit->seen |= SEEN_MEM;
+		break;
+	case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */
+		/* sthy %src,off(%dst) */
+		EMIT6_DISP_LH(0xe3000000, 0x0070, src_reg, dst_reg, REG_0, off);
+		jit->seen |= SEEN_MEM;
+		break;
+	case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */
+		/* sty %src,off(%dst) */
+		EMIT6_DISP_LH(0xe3000000, 0x0050, src_reg, dst_reg, REG_0, off);
+		jit->seen |= SEEN_MEM;
+		break;
+	case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */
+		/* stg %src,off(%dst) */
+		EMIT6_DISP_LH(0xe3000000, 0x0024, src_reg, dst_reg, REG_0, off);
+		jit->seen |= SEEN_MEM;
+		break;
+	case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */
+		/* lhi %w0,imm */
+		EMIT4_IMM(0xa7080000, REG_W0, (u8) imm);
+		/* stcy %w0,off(dst) */
+		EMIT6_DISP_LH(0xe3000000, 0x0072, REG_W0, dst_reg, REG_0, off);
+		jit->seen |= SEEN_MEM;
+		break;
+	case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */
+		/* lhi %w0,imm */
+		EMIT4_IMM(0xa7080000, REG_W0, (u16) imm);
+		/* sthy %w0,off(dst) */
+		EMIT6_DISP_LH(0xe3000000, 0x0070, REG_W0, dst_reg, REG_0, off);
+		jit->seen |= SEEN_MEM;
+		break;
+	case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */
+		/* llilf %w0,imm  */
+		EMIT6_IMM(0xc00f0000, REG_W0, (u32) imm);
+		/* sty %w0,off(%dst) */
+		EMIT6_DISP_LH(0xe3000000, 0x0050, REG_W0, dst_reg, REG_0, off);
+		jit->seen |= SEEN_MEM;
+		break;
+	case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */
+		/* lgfi %w0,imm */
+		EMIT6_IMM(0xc0010000, REG_W0, imm);
+		/* stg %w0,off(%dst) */
+		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W0, dst_reg, REG_0, off);
+		jit->seen |= SEEN_MEM;
+		break;
+	/*
+	 * BPF_ATOMIC
+	 */
+	case BPF_STX | BPF_ATOMIC | BPF_DW:
+	case BPF_STX | BPF_ATOMIC | BPF_W:
+	{
+		bool is32 = BPF_SIZE(insn->code) == BPF_W;
+
+		switch (insn->imm) {
+/* {op32|op64} {%w0|%src},%src,off(%dst) */
+#define EMIT_ATOMIC(op32, op64) do {					\
+	EMIT6_DISP_LH(0xeb000000, is32 ? (op32) : (op64),		\
+		      (insn->imm & BPF_FETCH) ? src_reg : REG_W0,	\
+		      src_reg, dst_reg, off);				\
+	if (is32 && (insn->imm & BPF_FETCH))				\
+		EMIT_ZERO(src_reg);					\
+} while (0)
+		case BPF_ADD:
+		case BPF_ADD | BPF_FETCH:
+			/* {laal|laalg} */
+			EMIT_ATOMIC(0x00fa, 0x00ea);
+			break;
+		case BPF_AND:
+		case BPF_AND | BPF_FETCH:
+			/* {lan|lang} */
+			EMIT_ATOMIC(0x00f4, 0x00e4);
+			break;
+		case BPF_OR:
+		case BPF_OR | BPF_FETCH:
+			/* {lao|laog} */
+			EMIT_ATOMIC(0x00f6, 0x00e6);
+			break;
+		case BPF_XOR:
+		case BPF_XOR | BPF_FETCH:
+			/* {lax|laxg} */
+			EMIT_ATOMIC(0x00f7, 0x00e7);
+			break;
+#undef EMIT_ATOMIC
+		case BPF_XCHG:
+			/* {ly|lg} %w0,off(%dst) */
+			EMIT6_DISP_LH(0xe3000000,
+				      is32 ? 0x0058 : 0x0004, REG_W0, REG_0,
+				      dst_reg, off);
+			/* 0: {csy|csg} %w0,%src,off(%dst) */
+			EMIT6_DISP_LH(0xeb000000, is32 ? 0x0014 : 0x0030,
+				      REG_W0, src_reg, dst_reg, off);
+			/* brc 4,0b */
+			EMIT4_PCREL_RIC(0xa7040000, 4, jit->prg - 6);
+			/* {llgfr|lgr} %src,%w0 */
+			EMIT4(is32 ? 0xb9160000 : 0xb9040000, src_reg, REG_W0);
+			if (is32 && insn_is_zext(&insn[1]))
+				insn_count = 2;
+			break;
+		case BPF_CMPXCHG:
+			/* 0: {csy|csg} %b0,%src,off(%dst) */
+			EMIT6_DISP_LH(0xeb000000, is32 ? 0x0014 : 0x0030,
+				      BPF_REG_0, src_reg, dst_reg, off);
+			break;
+		default:
+			pr_err("Unknown atomic operation %02x\n", insn->imm);
+			return -1;
+		}
+
+		jit->seen |= SEEN_MEM;
+		break;
+	}
+	/*
+	 * BPF_LDX
+	 */
+	case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */
+	case BPF_LDX | BPF_PROBE_MEM | BPF_B:
+		/* llgc %dst,0(off,%src) */
+		EMIT6_DISP_LH(0xe3000000, 0x0090, dst_reg, src_reg, REG_0, off);
+		jit->seen |= SEEN_MEM;
+		if (insn_is_zext(&insn[1]))
+			insn_count = 2;
+		break;
+	case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */
+	case BPF_LDX | BPF_PROBE_MEM | BPF_H:
+		/* llgh %dst,0(off,%src) */
+		EMIT6_DISP_LH(0xe3000000, 0x0091, dst_reg, src_reg, REG_0, off);
+		jit->seen |= SEEN_MEM;
+		if (insn_is_zext(&insn[1]))
+			insn_count = 2;
+		break;
+	case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */
+	case BPF_LDX | BPF_PROBE_MEM | BPF_W:
+		/* llgf %dst,off(%src) */
+		jit->seen |= SEEN_MEM;
+		EMIT6_DISP_LH(0xe3000000, 0x0016, dst_reg, src_reg, REG_0, off);
+		if (insn_is_zext(&insn[1]))
+			insn_count = 2;
+		break;
+	case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */
+	case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
+		/* lg %dst,0(off,%src) */
+		jit->seen |= SEEN_MEM;
+		EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, src_reg, REG_0, off);
+		break;
+	/*
+	 * BPF_JMP / CALL
+	 */
+	case BPF_JMP | BPF_CALL:
+	{
+		const struct btf_func_model *m;
+		bool func_addr_fixed;
+		int j, ret;
+		u64 func;
+
+		ret = bpf_jit_get_func_addr(fp, insn, extra_pass,
+					    &func, &func_addr_fixed);
+		if (ret < 0)
+			return -1;
+
+		REG_SET_SEEN(BPF_REG_5);
+		jit->seen |= SEEN_FUNC;
+		/*
+		 * Copy the tail call counter to where the callee expects it.
+		 *
+		 * Note 1: The callee can increment the tail call counter, but
+		 * we do not load it back, since the x86 JIT does not do this
+		 * either.
+		 *
+		 * Note 2: We assume that the verifier does not let us call the
+		 * main program, which clears the tail call counter on entry.
+		 */
+		/* mvc STK_OFF_TCCNT(4,%r15),N(%r15) */
+		_EMIT6(0xd203f000 | STK_OFF_TCCNT,
+		       0xf000 | (STK_OFF_TCCNT + STK_OFF + stack_depth));
+
+		/* Sign-extend the kfunc arguments. */
+		if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
+			m = bpf_jit_find_kfunc_model(fp, insn);
+			if (!m)
+				return -1;
+
+			for (j = 0; j < m->nr_args; j++) {
+				if (sign_extend(jit, BPF_REG_1 + j,
+						m->arg_size[j],
+						m->arg_flags[j]))
+					return -1;
+			}
+		}
+
+		/* lgrl %w1,func */
+		EMIT6_PCREL_RILB(0xc4080000, REG_W1, _EMIT_CONST_U64(func));
+		/* %r1() */
+		call_r1(jit);
+		/* lgr %b0,%r2: load return value into %b0 */
+		EMIT4(0xb9040000, BPF_REG_0, REG_2);
+		break;
+	}
+	case BPF_JMP | BPF_TAIL_CALL: {
+		int patch_1_clrj, patch_2_clij, patch_3_brc;
+
+		/*
+		 * Implicit input:
+		 *  B1: pointer to ctx
+		 *  B2: pointer to bpf_array
+		 *  B3: index in bpf_array
+		 *
+		 * if (index >= array->map.max_entries)
+		 *         goto out;
+		 */
+
+		/* llgf %w1,map.max_entries(%b2) */
+		EMIT6_DISP_LH(0xe3000000, 0x0016, REG_W1, REG_0, BPF_REG_2,
+			      offsetof(struct bpf_array, map.max_entries));
+		/* if ((u32)%b3 >= (u32)%w1) goto out; */
+		/* clrj %b3,%w1,0xa,out */
+		patch_1_clrj = jit->prg;
+		EMIT6_PCREL_RIEB(0xec000000, 0x0077, BPF_REG_3, REG_W1, 0xa,
+				 jit->prg);
+
+		/*
+		 * if (tail_call_cnt++ >= MAX_TAIL_CALL_CNT)
+		 *         goto out;
+		 */
+
+		if (jit->seen & SEEN_STACK)
+			off = STK_OFF_TCCNT + STK_OFF + stack_depth;
+		else
+			off = STK_OFF_TCCNT;
+		/* lhi %w0,1 */
+		EMIT4_IMM(0xa7080000, REG_W0, 1);
+		/* laal %w1,%w0,off(%r15) */
+		EMIT6_DISP_LH(0xeb000000, 0x00fa, REG_W1, REG_W0, REG_15, off);
+		/* clij %w1,MAX_TAIL_CALL_CNT-1,0x2,out */
+		patch_2_clij = jit->prg;
+		EMIT6_PCREL_RIEC(0xec000000, 0x007f, REG_W1, MAX_TAIL_CALL_CNT - 1,
+				 2, jit->prg);
+
+		/*
+		 * prog = array->ptrs[index];
+		 * if (prog == NULL)
+		 *         goto out;
+		 */
+
+		/* llgfr %r1,%b3: %r1 = (u32) index */
+		EMIT4(0xb9160000, REG_1, BPF_REG_3);
+		/* sllg %r1,%r1,3: %r1 *= 8 */
+		EMIT6_DISP_LH(0xeb000000, 0x000d, REG_1, REG_1, REG_0, 3);
+		/* ltg %r1,prog(%b2,%r1) */
+		EMIT6_DISP_LH(0xe3000000, 0x0002, REG_1, BPF_REG_2,
+			      REG_1, offsetof(struct bpf_array, ptrs));
+		/* brc 0x8,out */
+		patch_3_brc = jit->prg;
+		EMIT4_PCREL_RIC(0xa7040000, 8, jit->prg);
+
+		/*
+		 * Restore registers before calling function
+		 */
+		save_restore_regs(jit, REGS_RESTORE, stack_depth);
+
+		/*
+		 * goto *(prog->bpf_func + tail_call_start);
+		 */
+
+		/* lg %r1,bpf_func(%r1) */
+		EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, REG_1, REG_0,
+			      offsetof(struct bpf_prog, bpf_func));
+		if (nospec_uses_trampoline()) {
+			jit->seen |= SEEN_FUNC;
+			/* aghi %r1,tail_call_start */
+			EMIT4_IMM(0xa70b0000, REG_1, jit->tail_call_start);
+			/* brcl 0xf,__s390_indirect_jump_r1 */
+			EMIT6_PCREL_RILC(0xc0040000, 0xf, jit->r1_thunk_ip);
+		} else {
+			/* bc 0xf,tail_call_start(%r1) */
+			_EMIT4(0x47f01000 + jit->tail_call_start);
+		}
+		/* out: */
+		if (jit->prg_buf) {
+			*(u16 *)(jit->prg_buf + patch_1_clrj + 2) =
+				(jit->prg - patch_1_clrj) >> 1;
+			*(u16 *)(jit->prg_buf + patch_2_clij + 2) =
+				(jit->prg - patch_2_clij) >> 1;
+			*(u16 *)(jit->prg_buf + patch_3_brc + 2) =
+				(jit->prg - patch_3_brc) >> 1;
+		}
+		break;
+	}
+	case BPF_JMP | BPF_EXIT: /* return b0 */
+		last = (i == fp->len - 1) ? 1 : 0;
+		if (last)
+			break;
+		if (!is_first_pass(jit) && can_use_rel(jit, jit->exit_ip))
+			/* brc 0xf, <exit> */
+			EMIT4_PCREL_RIC(0xa7040000, 0xf, jit->exit_ip);
+		else
+			/* brcl 0xf, <exit> */
+			EMIT6_PCREL_RILC(0xc0040000, 0xf, jit->exit_ip);
+		break;
+	/*
+	 * Branch relative (number of skipped instructions) to offset on
+	 * condition.
+	 *
+	 * Condition code to mask mapping:
+	 *
+	 * CC | Description	   | Mask
+	 * ------------------------------
+	 * 0  | Operands equal	   |	8
+	 * 1  | First operand low  |	4
+	 * 2  | First operand high |	2
+	 * 3  | Unused		   |	1
+	 *
+	 * For s390x relative branches: ip = ip + off_bytes
+	 * For BPF relative branches:	insn = insn + off_insns + 1
+	 *
+	 * For example for s390x with offset 0 we jump to the branch
+	 * instruction itself (loop) and for BPF with offset 0 we
+	 * branch to the instruction behind the branch.
+	 */
+	case BPF_JMP | BPF_JA: /* if (true) */
+		mask = 0xf000; /* j */
+		goto branch_oc;
+	case BPF_JMP | BPF_JSGT | BPF_K: /* ((s64) dst > (s64) imm) */
+	case BPF_JMP32 | BPF_JSGT | BPF_K: /* ((s32) dst > (s32) imm) */
+		mask = 0x2000; /* jh */
+		goto branch_ks;
+	case BPF_JMP | BPF_JSLT | BPF_K: /* ((s64) dst < (s64) imm) */
+	case BPF_JMP32 | BPF_JSLT | BPF_K: /* ((s32) dst < (s32) imm) */
+		mask = 0x4000; /* jl */
+		goto branch_ks;
+	case BPF_JMP | BPF_JSGE | BPF_K: /* ((s64) dst >= (s64) imm) */
+	case BPF_JMP32 | BPF_JSGE | BPF_K: /* ((s32) dst >= (s32) imm) */
+		mask = 0xa000; /* jhe */
+		goto branch_ks;
+	case BPF_JMP | BPF_JSLE | BPF_K: /* ((s64) dst <= (s64) imm) */
+	case BPF_JMP32 | BPF_JSLE | BPF_K: /* ((s32) dst <= (s32) imm) */
+		mask = 0xc000; /* jle */
+		goto branch_ks;
+	case BPF_JMP | BPF_JGT | BPF_K: /* (dst_reg > imm) */
+	case BPF_JMP32 | BPF_JGT | BPF_K: /* ((u32) dst_reg > (u32) imm) */
+		mask = 0x2000; /* jh */
+		goto branch_ku;
+	case BPF_JMP | BPF_JLT | BPF_K: /* (dst_reg < imm) */
+	case BPF_JMP32 | BPF_JLT | BPF_K: /* ((u32) dst_reg < (u32) imm) */
+		mask = 0x4000; /* jl */
+		goto branch_ku;
+	case BPF_JMP | BPF_JGE | BPF_K: /* (dst_reg >= imm) */
+	case BPF_JMP32 | BPF_JGE | BPF_K: /* ((u32) dst_reg >= (u32) imm) */
+		mask = 0xa000; /* jhe */
+		goto branch_ku;
+	case BPF_JMP | BPF_JLE | BPF_K: /* (dst_reg <= imm) */
+	case BPF_JMP32 | BPF_JLE | BPF_K: /* ((u32) dst_reg <= (u32) imm) */
+		mask = 0xc000; /* jle */
+		goto branch_ku;
+	case BPF_JMP | BPF_JNE | BPF_K: /* (dst_reg != imm) */
+	case BPF_JMP32 | BPF_JNE | BPF_K: /* ((u32) dst_reg != (u32) imm) */
+		mask = 0x7000; /* jne */
+		goto branch_ku;
+	case BPF_JMP | BPF_JEQ | BPF_K: /* (dst_reg == imm) */
+	case BPF_JMP32 | BPF_JEQ | BPF_K: /* ((u32) dst_reg == (u32) imm) */
+		mask = 0x8000; /* je */
+		goto branch_ku;
+	case BPF_JMP | BPF_JSET | BPF_K: /* (dst_reg & imm) */
+	case BPF_JMP32 | BPF_JSET | BPF_K: /* ((u32) dst_reg & (u32) imm) */
+		mask = 0x7000; /* jnz */
+		if (BPF_CLASS(insn->code) == BPF_JMP32) {
+			/* llilf %w1,imm (load zero extend imm) */
+			EMIT6_IMM(0xc00f0000, REG_W1, imm);
+			/* nr %w1,%dst */
+			EMIT2(0x1400, REG_W1, dst_reg);
+		} else {
+			/* lgfi %w1,imm (load sign extend imm) */
+			EMIT6_IMM(0xc0010000, REG_W1, imm);
+			/* ngr %w1,%dst */
+			EMIT4(0xb9800000, REG_W1, dst_reg);
+		}
+		goto branch_oc;
+
+	case BPF_JMP | BPF_JSGT | BPF_X: /* ((s64) dst > (s64) src) */
+	case BPF_JMP32 | BPF_JSGT | BPF_X: /* ((s32) dst > (s32) src) */
+		mask = 0x2000; /* jh */
+		goto branch_xs;
+	case BPF_JMP | BPF_JSLT | BPF_X: /* ((s64) dst < (s64) src) */
+	case BPF_JMP32 | BPF_JSLT | BPF_X: /* ((s32) dst < (s32) src) */
+		mask = 0x4000; /* jl */
+		goto branch_xs;
+	case BPF_JMP | BPF_JSGE | BPF_X: /* ((s64) dst >= (s64) src) */
+	case BPF_JMP32 | BPF_JSGE | BPF_X: /* ((s32) dst >= (s32) src) */
+		mask = 0xa000; /* jhe */
+		goto branch_xs;
+	case BPF_JMP | BPF_JSLE | BPF_X: /* ((s64) dst <= (s64) src) */
+	case BPF_JMP32 | BPF_JSLE | BPF_X: /* ((s32) dst <= (s32) src) */
+		mask = 0xc000; /* jle */
+		goto branch_xs;
+	case BPF_JMP | BPF_JGT | BPF_X: /* (dst > src) */
+	case BPF_JMP32 | BPF_JGT | BPF_X: /* ((u32) dst > (u32) src) */
+		mask = 0x2000; /* jh */
+		goto branch_xu;
+	case BPF_JMP | BPF_JLT | BPF_X: /* (dst < src) */
+	case BPF_JMP32 | BPF_JLT | BPF_X: /* ((u32) dst < (u32) src) */
+		mask = 0x4000; /* jl */
+		goto branch_xu;
+	case BPF_JMP | BPF_JGE | BPF_X: /* (dst >= src) */
+	case BPF_JMP32 | BPF_JGE | BPF_X: /* ((u32) dst >= (u32) src) */
+		mask = 0xa000; /* jhe */
+		goto branch_xu;
+	case BPF_JMP | BPF_JLE | BPF_X: /* (dst <= src) */
+	case BPF_JMP32 | BPF_JLE | BPF_X: /* ((u32) dst <= (u32) src) */
+		mask = 0xc000; /* jle */
+		goto branch_xu;
+	case BPF_JMP | BPF_JNE | BPF_X: /* (dst != src) */
+	case BPF_JMP32 | BPF_JNE | BPF_X: /* ((u32) dst != (u32) src) */
+		mask = 0x7000; /* jne */
+		goto branch_xu;
+	case BPF_JMP | BPF_JEQ | BPF_X: /* (dst == src) */
+	case BPF_JMP32 | BPF_JEQ | BPF_X: /* ((u32) dst == (u32) src) */
+		mask = 0x8000; /* je */
+		goto branch_xu;
+	case BPF_JMP | BPF_JSET | BPF_X: /* (dst & src) */
+	case BPF_JMP32 | BPF_JSET | BPF_X: /* ((u32) dst & (u32) src) */
+	{
+		bool is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
+
+		mask = 0x7000; /* jnz */
+		/* nrk or ngrk %w1,%dst,%src */
+		EMIT4_RRF((is_jmp32 ? 0xb9f40000 : 0xb9e40000),
+			  REG_W1, dst_reg, src_reg);
+		goto branch_oc;
+branch_ks:
+		is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
+		/* cfi or cgfi %dst,imm */
+		EMIT6_IMM(is_jmp32 ? 0xc20d0000 : 0xc20c0000,
+			  dst_reg, imm);
+		if (!is_first_pass(jit) &&
+		    can_use_rel(jit, addrs[i + off + 1])) {
+			/* brc mask,off */
+			EMIT4_PCREL_RIC(0xa7040000,
+					mask >> 12, addrs[i + off + 1]);
+		} else {
+			/* brcl mask,off */
+			EMIT6_PCREL_RILC(0xc0040000,
+					 mask >> 12, addrs[i + off + 1]);
+		}
+		break;
+branch_ku:
+		/* lgfi %w1,imm (load sign extend imm) */
+		src_reg = REG_1;
+		EMIT6_IMM(0xc0010000, src_reg, imm);
+		goto branch_xu;
+branch_xs:
+		is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
+		if (!is_first_pass(jit) &&
+		    can_use_rel(jit, addrs[i + off + 1])) {
+			/* crj or cgrj %dst,%src,mask,off */
+			EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064),
+				    dst_reg, src_reg, i, off, mask);
+		} else {
+			/* cr or cgr %dst,%src */
+			if (is_jmp32)
+				EMIT2(0x1900, dst_reg, src_reg);
+			else
+				EMIT4(0xb9200000, dst_reg, src_reg);
+			/* brcl mask,off */
+			EMIT6_PCREL_RILC(0xc0040000,
+					 mask >> 12, addrs[i + off + 1]);
+		}
+		break;
+branch_xu:
+		is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
+		if (!is_first_pass(jit) &&
+		    can_use_rel(jit, addrs[i + off + 1])) {
+			/* clrj or clgrj %dst,%src,mask,off */
+			EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065),
+				    dst_reg, src_reg, i, off, mask);
+		} else {
+			/* clr or clgr %dst,%src */
+			if (is_jmp32)
+				EMIT2(0x1500, dst_reg, src_reg);
+			else
+				EMIT4(0xb9210000, dst_reg, src_reg);
+			/* brcl mask,off */
+			EMIT6_PCREL_RILC(0xc0040000,
+					 mask >> 12, addrs[i + off + 1]);
+		}
+		break;
+branch_oc:
+		if (!is_first_pass(jit) &&
+		    can_use_rel(jit, addrs[i + off + 1])) {
+			/* brc mask,off */
+			EMIT4_PCREL_RIC(0xa7040000,
+					mask >> 12, addrs[i + off + 1]);
+		} else {
+			/* brcl mask,off */
+			EMIT6_PCREL_RILC(0xc0040000,
+					 mask >> 12, addrs[i + off + 1]);
+		}
+		break;
+	}
+	default: /* too complex, give up */
+		pr_err("Unknown opcode %02x\n", insn->code);
+		return -1;
+	}
+
+	if (probe_prg != -1) {
+		/*
+		 * Handlers of certain exceptions leave psw.addr pointing to
+		 * the instruction directly after the failing one. Therefore,
+		 * create two exception table entries and also add a nop in
+		 * case two probing instructions come directly after each
+		 * other.
+		 */
+		nop_prg = jit->prg;
+		/* bcr 0,%0 */
+		_EMIT2(0x0700);
+		err = bpf_jit_probe_mem(jit, fp, probe_prg, nop_prg);
+		if (err < 0)
+			return err;
+	}
+
+	return insn_count;
+}
+
+/*
+ * Return whether new i-th instruction address does not violate any invariant
+ */
+static bool bpf_is_new_addr_sane(struct bpf_jit *jit, int i)
+{
+	/* On the first pass anything goes */
+	if (is_first_pass(jit))
+		return true;
+
+	/* The codegen pass must not change anything */
+	if (is_codegen_pass(jit))
+		return jit->addrs[i] == jit->prg;
+
+	/* Passes in between must not increase code size */
+	return jit->addrs[i] >= jit->prg;
+}
+
+/*
+ * Update the address of i-th instruction
+ */
+static int bpf_set_addr(struct bpf_jit *jit, int i)
+{
+	int delta;
+
+	if (is_codegen_pass(jit)) {
+		delta = jit->prg - jit->addrs[i];
+		if (delta < 0)
+			bpf_skip(jit, -delta);
+	}
+	if (WARN_ON_ONCE(!bpf_is_new_addr_sane(jit, i)))
+		return -1;
+	jit->addrs[i] = jit->prg;
+	return 0;
+}
+
+/*
+ * Compile eBPF program into s390x code
+ */
+static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp,
+			bool extra_pass, u32 stack_depth)
+{
+	int i, insn_count, lit32_size, lit64_size;
+
+	jit->lit32 = jit->lit32_start;
+	jit->lit64 = jit->lit64_start;
+	jit->prg = 0;
+	jit->excnt = 0;
+
+	bpf_jit_prologue(jit, fp, stack_depth);
+	if (bpf_set_addr(jit, 0) < 0)
+		return -1;
+	for (i = 0; i < fp->len; i += insn_count) {
+		insn_count = bpf_jit_insn(jit, fp, i, extra_pass, stack_depth);
+		if (insn_count < 0)
+			return -1;
+		/* Next instruction address */
+		if (bpf_set_addr(jit, i + insn_count) < 0)
+			return -1;
+	}
+	bpf_jit_epilogue(jit, stack_depth);
+
+	lit32_size = jit->lit32 - jit->lit32_start;
+	lit64_size = jit->lit64 - jit->lit64_start;
+	jit->lit32_start = jit->prg;
+	if (lit32_size)
+		jit->lit32_start = ALIGN(jit->lit32_start, 4);
+	jit->lit64_start = jit->lit32_start + lit32_size;
+	if (lit64_size)
+		jit->lit64_start = ALIGN(jit->lit64_start, 8);
+	jit->size = jit->lit64_start + lit64_size;
+	jit->size_prg = jit->prg;
+
+	if (WARN_ON_ONCE(fp->aux->extable &&
+			 jit->excnt != fp->aux->num_exentries))
+		/* Verifier bug - too many entries. */
+		return -1;
+
+	return 0;
+}
+
+bool bpf_jit_needs_zext(void)
+{
+	return true;
+}
+
+struct s390_jit_data {
+	struct bpf_binary_header *header;
+	struct bpf_jit ctx;
+	int pass;
+};
+
+static struct bpf_binary_header *bpf_jit_alloc(struct bpf_jit *jit,
+					       struct bpf_prog *fp)
+{
+	struct bpf_binary_header *header;
+	u32 extable_size;
+	u32 code_size;
+
+	/* We need two entries per insn. */
+	fp->aux->num_exentries *= 2;
+
+	code_size = roundup(jit->size,
+			    __alignof__(struct exception_table_entry));
+	extable_size = fp->aux->num_exentries *
+		sizeof(struct exception_table_entry);
+	header = bpf_jit_binary_alloc(code_size + extable_size, &jit->prg_buf,
+				      8, jit_fill_hole);
+	if (!header)
+		return NULL;
+	fp->aux->extable = (struct exception_table_entry *)
+		(jit->prg_buf + code_size);
+	return header;
+}
+
+/*
+ * Compile eBPF program "fp"
+ */
+struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
+{
+	u32 stack_depth = round_up(fp->aux->stack_depth, 8);
+	struct bpf_prog *tmp, *orig_fp = fp;
+	struct bpf_binary_header *header;
+	struct s390_jit_data *jit_data;
+	bool tmp_blinded = false;
+	bool extra_pass = false;
+	struct bpf_jit jit;
+	int pass;
+
+	if (WARN_ON_ONCE(bpf_plt_end - bpf_plt != BPF_PLT_SIZE))
+		return orig_fp;
+
+	if (!fp->jit_requested)
+		return orig_fp;
+
+	tmp = bpf_jit_blind_constants(fp);
+	/*
+	 * If blinding was requested and we failed during blinding,
+	 * we must fall back to the interpreter.
+	 */
+	if (IS_ERR(tmp))
+		return orig_fp;
+	if (tmp != fp) {
+		tmp_blinded = true;
+		fp = tmp;
+	}
+
+	jit_data = fp->aux->jit_data;
+	if (!jit_data) {
+		jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
+		if (!jit_data) {
+			fp = orig_fp;
+			goto out;
+		}
+		fp->aux->jit_data = jit_data;
+	}
+	if (jit_data->ctx.addrs) {
+		jit = jit_data->ctx;
+		header = jit_data->header;
+		extra_pass = true;
+		pass = jit_data->pass + 1;
+		goto skip_init_ctx;
+	}
+
+	memset(&jit, 0, sizeof(jit));
+	jit.addrs = kvcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL);
+	if (jit.addrs == NULL) {
+		fp = orig_fp;
+		goto free_addrs;
+	}
+	/*
+	 * Three initial passes:
+	 *   - 1/2: Determine clobbered registers
+	 *   - 3:   Calculate program size and addrs array
+	 */
+	for (pass = 1; pass <= 3; pass++) {
+		if (bpf_jit_prog(&jit, fp, extra_pass, stack_depth)) {
+			fp = orig_fp;
+			goto free_addrs;
+		}
+	}
+	/*
+	 * Final pass: Allocate and generate program
+	 */
+	header = bpf_jit_alloc(&jit, fp);
+	if (!header) {
+		fp = orig_fp;
+		goto free_addrs;
+	}
+skip_init_ctx:
+	if (bpf_jit_prog(&jit, fp, extra_pass, stack_depth)) {
+		bpf_jit_binary_free(header);
+		fp = orig_fp;
+		goto free_addrs;
+	}
+	if (bpf_jit_enable > 1) {
+		bpf_jit_dump(fp->len, jit.size, pass, jit.prg_buf);
+		print_fn_code(jit.prg_buf, jit.size_prg);
+	}
+	if (!fp->is_func || extra_pass) {
+		bpf_jit_binary_lock_ro(header);
+	} else {
+		jit_data->header = header;
+		jit_data->ctx = jit;
+		jit_data->pass = pass;
+	}
+	fp->bpf_func = (void *) jit.prg_buf;
+	fp->jited = 1;
+	fp->jited_len = jit.size;
+
+	if (!fp->is_func || extra_pass) {
+		bpf_prog_fill_jited_linfo(fp, jit.addrs + 1);
+free_addrs:
+		kvfree(jit.addrs);
+		kfree(jit_data);
+		fp->aux->jit_data = NULL;
+	}
+out:
+	if (tmp_blinded)
+		bpf_jit_prog_release_other(fp, fp == orig_fp ?
+					   tmp : orig_fp);
+	return fp;
+}
+
+bool bpf_jit_supports_kfunc_call(void)
+{
+	return true;
+}
+
+bool bpf_jit_supports_far_kfunc_call(void)
+{
+	return true;
+}
+
+int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
+		       void *old_addr, void *new_addr)
+{
+	struct {
+		u16 opc;
+		s32 disp;
+	} __packed insn;
+	char expected_plt[BPF_PLT_SIZE];
+	char current_plt[BPF_PLT_SIZE];
+	char new_plt[BPF_PLT_SIZE];
+	char *plt;
+	char *ret;
+	int err;
+
+	/* Verify the branch to be patched. */
+	err = copy_from_kernel_nofault(&insn, ip, sizeof(insn));
+	if (err < 0)
+		return err;
+	if (insn.opc != (0xc004 | (old_addr ? 0xf0 : 0)))
+		return -EINVAL;
+
+	if (t == BPF_MOD_JUMP &&
+	    insn.disp == ((char *)new_addr - (char *)ip) >> 1) {
+		/*
+		 * The branch already points to the destination,
+		 * there is no PLT.
+		 */
+	} else {
+		/* Verify the PLT. */
+		plt = (char *)ip + (insn.disp << 1);
+		err = copy_from_kernel_nofault(current_plt, plt, BPF_PLT_SIZE);
+		if (err < 0)
+			return err;
+		ret = (char *)ip + 6;
+		bpf_jit_plt(expected_plt, ret, old_addr);
+		if (memcmp(current_plt, expected_plt, BPF_PLT_SIZE))
+			return -EINVAL;
+		/* Adjust the call address. */
+		bpf_jit_plt(new_plt, ret, new_addr);
+		s390_kernel_write(plt + (bpf_plt_target - bpf_plt),
+				  new_plt + (bpf_plt_target - bpf_plt),
+				  sizeof(void *));
+	}
+
+	/* Adjust the mask of the branch. */
+	insn.opc = 0xc004 | (new_addr ? 0xf0 : 0);
+	s390_kernel_write((char *)ip + 1, (char *)&insn.opc + 1, 1);
+
+	/* Make the new code visible to the other CPUs. */
+	text_poke_sync_lock();
+
+	return 0;
+}
+
+struct bpf_tramp_jit {
+	struct bpf_jit common;
+	int orig_stack_args_off;/* Offset of arguments placed on stack by the
+				 * func_addr's original caller
+				 */
+	int stack_size;		/* Trampoline stack size */
+	int backchain_off;	/* Offset of backchain */
+	int stack_args_off;	/* Offset of stack arguments for calling
+				 * func_addr, has to be at the top
+				 */
+	int reg_args_off;	/* Offset of register arguments for calling
+				 * func_addr
+				 */
+	int ip_off;		/* For bpf_get_func_ip(), has to be at
+				 * (ctx - 16)
+				 */
+	int arg_cnt_off;	/* For bpf_get_func_arg_cnt(), has to be at
+				 * (ctx - 8)
+				 */
+	int bpf_args_off;	/* Offset of BPF_PROG context, which consists
+				 * of BPF arguments followed by return value
+				 */
+	int retval_off;		/* Offset of return value (see above) */
+	int r7_r8_off;		/* Offset of saved %r7 and %r8, which are used
+				 * for __bpf_prog_enter() return value and
+				 * func_addr respectively
+				 */
+	int run_ctx_off;	/* Offset of struct bpf_tramp_run_ctx */
+	int tccnt_off;		/* Offset of saved tailcall counter */
+	int r14_off;		/* Offset of saved %r14, has to be at the
+				 * bottom */
+	int do_fexit;		/* do_fexit: label */
+};
+
+static void load_imm64(struct bpf_jit *jit, int dst_reg, u64 val)
+{
+	/* llihf %dst_reg,val_hi */
+	EMIT6_IMM(0xc00e0000, dst_reg, (val >> 32));
+	/* oilf %rdst_reg,val_lo */
+	EMIT6_IMM(0xc00d0000, dst_reg, val);
+}
+
+static int invoke_bpf_prog(struct bpf_tramp_jit *tjit,
+			   const struct btf_func_model *m,
+			   struct bpf_tramp_link *tlink, bool save_ret)
+{
+	struct bpf_jit *jit = &tjit->common;
+	int cookie_off = tjit->run_ctx_off +
+			 offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
+	struct bpf_prog *p = tlink->link.prog;
+	int patch;
+
+	/*
+	 * run_ctx.cookie = tlink->cookie;
+	 */
+
+	/* %r0 = tlink->cookie */
+	load_imm64(jit, REG_W0, tlink->cookie);
+	/* stg %r0,cookie_off(%r15) */
+	EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W0, REG_0, REG_15, cookie_off);
+
+	/*
+	 * if ((start = __bpf_prog_enter(p, &run_ctx)) == 0)
+	 *         goto skip;
+	 */
+
+	/* %r1 = __bpf_prog_enter */
+	load_imm64(jit, REG_1, (u64)bpf_trampoline_enter(p));
+	/* %r2 = p */
+	load_imm64(jit, REG_2, (u64)p);
+	/* la %r3,run_ctx_off(%r15) */
+	EMIT4_DISP(0x41000000, REG_3, REG_15, tjit->run_ctx_off);
+	/* %r1() */
+	call_r1(jit);
+	/* ltgr %r7,%r2 */
+	EMIT4(0xb9020000, REG_7, REG_2);
+	/* brcl 8,skip */
+	patch = jit->prg;
+	EMIT6_PCREL_RILC(0xc0040000, 8, 0);
+
+	/*
+	 * retval = bpf_func(args, p->insnsi);
+	 */
+
+	/* %r1 = p->bpf_func */
+	load_imm64(jit, REG_1, (u64)p->bpf_func);
+	/* la %r2,bpf_args_off(%r15) */
+	EMIT4_DISP(0x41000000, REG_2, REG_15, tjit->bpf_args_off);
+	/* %r3 = p->insnsi */
+	if (!p->jited)
+		load_imm64(jit, REG_3, (u64)p->insnsi);
+	/* %r1() */
+	call_r1(jit);
+	/* stg %r2,retval_off(%r15) */
+	if (save_ret) {
+		if (sign_extend(jit, REG_2, m->ret_size, m->ret_flags))
+			return -1;
+		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_2, REG_0, REG_15,
+			      tjit->retval_off);
+	}
+
+	/* skip: */
+	if (jit->prg_buf)
+		*(u32 *)&jit->prg_buf[patch + 2] = (jit->prg - patch) >> 1;
+
+	/*
+	 * __bpf_prog_exit(p, start, &run_ctx);
+	 */
+
+	/* %r1 = __bpf_prog_exit */
+	load_imm64(jit, REG_1, (u64)bpf_trampoline_exit(p));
+	/* %r2 = p */
+	load_imm64(jit, REG_2, (u64)p);
+	/* lgr %r3,%r7 */
+	EMIT4(0xb9040000, REG_3, REG_7);
+	/* la %r4,run_ctx_off(%r15) */
+	EMIT4_DISP(0x41000000, REG_4, REG_15, tjit->run_ctx_off);
+	/* %r1() */
+	call_r1(jit);
+
+	return 0;
+}
+
+static int alloc_stack(struct bpf_tramp_jit *tjit, size_t size)
+{
+	int stack_offset = tjit->stack_size;
+
+	tjit->stack_size += size;
+	return stack_offset;
+}
+
+/* ABI uses %r2 - %r6 for parameter passing. */
+#define MAX_NR_REG_ARGS 5
+
+/* The "L" field of the "mvc" instruction is 8 bits. */
+#define MAX_MVC_SIZE 256
+#define MAX_NR_STACK_ARGS (MAX_MVC_SIZE / sizeof(u64))
+
+/* -mfentry generates a 6-byte nop on s390x. */
+#define S390X_PATCH_SIZE 6
+
+static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
+					 struct bpf_tramp_jit *tjit,
+					 const struct btf_func_model *m,
+					 u32 flags,
+					 struct bpf_tramp_links *tlinks,
+					 void *func_addr)
+{
+	struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
+	struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
+	struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
+	int nr_bpf_args, nr_reg_args, nr_stack_args;
+	struct bpf_jit *jit = &tjit->common;
+	int arg, bpf_arg_off;
+	int i, j;
+
+	/* Support as many stack arguments as "mvc" instruction can handle. */
+	nr_reg_args = min_t(int, m->nr_args, MAX_NR_REG_ARGS);
+	nr_stack_args = m->nr_args - nr_reg_args;
+	if (nr_stack_args > MAX_NR_STACK_ARGS)
+		return -ENOTSUPP;
+
+	/* Return to %r14, since func_addr and %r0 are not available. */
+	if (!func_addr && !(flags & BPF_TRAMP_F_ORIG_STACK))
+		flags |= BPF_TRAMP_F_SKIP_FRAME;
+
+	/*
+	 * Compute how many arguments we need to pass to BPF programs.
+	 * BPF ABI mirrors that of x86_64: arguments that are 16 bytes or
+	 * smaller are packed into 1 or 2 registers; larger arguments are
+	 * passed via pointers.
+	 * In s390x ABI, arguments that are 8 bytes or smaller are packed into
+	 * a register; larger arguments are passed via pointers.
+	 * We need to deal with this difference.
+	 */
+	nr_bpf_args = 0;
+	for (i = 0; i < m->nr_args; i++) {
+		if (m->arg_size[i] <= 8)
+			nr_bpf_args += 1;
+		else if (m->arg_size[i] <= 16)
+			nr_bpf_args += 2;
+		else
+			return -ENOTSUPP;
+	}
+
+	/*
+	 * Calculate the stack layout.
+	 */
+
+	/*
+	 * Allocate STACK_FRAME_OVERHEAD bytes for the callees. As the s390x
+	 * ABI requires, put our backchain at the end of the allocated memory.
+	 */
+	tjit->stack_size = STACK_FRAME_OVERHEAD;
+	tjit->backchain_off = tjit->stack_size - sizeof(u64);
+	tjit->stack_args_off = alloc_stack(tjit, nr_stack_args * sizeof(u64));
+	tjit->reg_args_off = alloc_stack(tjit, nr_reg_args * sizeof(u64));
+	tjit->ip_off = alloc_stack(tjit, sizeof(u64));
+	tjit->arg_cnt_off = alloc_stack(tjit, sizeof(u64));
+	tjit->bpf_args_off = alloc_stack(tjit, nr_bpf_args * sizeof(u64));
+	tjit->retval_off = alloc_stack(tjit, sizeof(u64));
+	tjit->r7_r8_off = alloc_stack(tjit, 2 * sizeof(u64));
+	tjit->run_ctx_off = alloc_stack(tjit,
+					sizeof(struct bpf_tramp_run_ctx));
+	tjit->tccnt_off = alloc_stack(tjit, sizeof(u64));
+	tjit->r14_off = alloc_stack(tjit, sizeof(u64) * 2);
+	/*
+	 * In accordance with the s390x ABI, the caller has allocated
+	 * STACK_FRAME_OVERHEAD bytes for us. 8 of them contain the caller's
+	 * backchain, and the rest we can use.
+	 */
+	tjit->stack_size -= STACK_FRAME_OVERHEAD - sizeof(u64);
+	tjit->orig_stack_args_off = tjit->stack_size + STACK_FRAME_OVERHEAD;
+
+	/* lgr %r1,%r15 */
+	EMIT4(0xb9040000, REG_1, REG_15);
+	/* aghi %r15,-stack_size */
+	EMIT4_IMM(0xa70b0000, REG_15, -tjit->stack_size);
+	/* stg %r1,backchain_off(%r15) */
+	EMIT6_DISP_LH(0xe3000000, 0x0024, REG_1, REG_0, REG_15,
+		      tjit->backchain_off);
+	/* mvc tccnt_off(4,%r15),stack_size+STK_OFF_TCCNT(%r15) */
+	_EMIT6(0xd203f000 | tjit->tccnt_off,
+	       0xf000 | (tjit->stack_size + STK_OFF_TCCNT));
+	/* stmg %r2,%rN,fwd_reg_args_off(%r15) */
+	if (nr_reg_args)
+		EMIT6_DISP_LH(0xeb000000, 0x0024, REG_2,
+			      REG_2 + (nr_reg_args - 1), REG_15,
+			      tjit->reg_args_off);
+	for (i = 0, j = 0; i < m->nr_args; i++) {
+		if (i < MAX_NR_REG_ARGS)
+			arg = REG_2 + i;
+		else
+			arg = tjit->orig_stack_args_off +
+			      (i - MAX_NR_REG_ARGS) * sizeof(u64);
+		bpf_arg_off = tjit->bpf_args_off + j * sizeof(u64);
+		if (m->arg_size[i] <= 8) {
+			if (i < MAX_NR_REG_ARGS)
+				/* stg %arg,bpf_arg_off(%r15) */
+				EMIT6_DISP_LH(0xe3000000, 0x0024, arg,
+					      REG_0, REG_15, bpf_arg_off);
+			else
+				/* mvc bpf_arg_off(8,%r15),arg(%r15) */
+				_EMIT6(0xd207f000 | bpf_arg_off,
+				       0xf000 | arg);
+			j += 1;
+		} else {
+			if (i < MAX_NR_REG_ARGS) {
+				/* mvc bpf_arg_off(16,%r15),0(%arg) */
+				_EMIT6(0xd20ff000 | bpf_arg_off,
+				       reg2hex[arg] << 12);
+			} else {
+				/* lg %r1,arg(%r15) */
+				EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, REG_0,
+					      REG_15, arg);
+				/* mvc bpf_arg_off(16,%r15),0(%r1) */
+				_EMIT6(0xd20ff000 | bpf_arg_off, 0x1000);
+			}
+			j += 2;
+		}
+	}
+	/* stmg %r7,%r8,r7_r8_off(%r15) */
+	EMIT6_DISP_LH(0xeb000000, 0x0024, REG_7, REG_8, REG_15,
+		      tjit->r7_r8_off);
+	/* stg %r14,r14_off(%r15) */
+	EMIT6_DISP_LH(0xe3000000, 0x0024, REG_14, REG_0, REG_15, tjit->r14_off);
+
+	if (flags & BPF_TRAMP_F_ORIG_STACK) {
+		/*
+		 * The ftrace trampoline puts the return address (which is the
+		 * address of the original function + S390X_PATCH_SIZE) into
+		 * %r0; see ftrace_shared_hotpatch_trampoline_br and
+		 * ftrace_init_nop() for details.
+		 */
+
+		/* lgr %r8,%r0 */
+		EMIT4(0xb9040000, REG_8, REG_0);
+	} else {
+		/* %r8 = func_addr + S390X_PATCH_SIZE */
+		load_imm64(jit, REG_8, (u64)func_addr + S390X_PATCH_SIZE);
+	}
+
+	/*
+	 * ip = func_addr;
+	 * arg_cnt = m->nr_args;
+	 */
+
+	if (flags & BPF_TRAMP_F_IP_ARG) {
+		/* %r0 = func_addr */
+		load_imm64(jit, REG_0, (u64)func_addr);
+		/* stg %r0,ip_off(%r15) */
+		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_0, REG_0, REG_15,
+			      tjit->ip_off);
+	}
+	/* lghi %r0,nr_bpf_args */
+	EMIT4_IMM(0xa7090000, REG_0, nr_bpf_args);
+	/* stg %r0,arg_cnt_off(%r15) */
+	EMIT6_DISP_LH(0xe3000000, 0x0024, REG_0, REG_0, REG_15,
+		      tjit->arg_cnt_off);
+
+	if (flags & BPF_TRAMP_F_CALL_ORIG) {
+		/*
+		 * __bpf_tramp_enter(im);
+		 */
+
+		/* %r1 = __bpf_tramp_enter */
+		load_imm64(jit, REG_1, (u64)__bpf_tramp_enter);
+		/* %r2 = im */
+		load_imm64(jit, REG_2, (u64)im);
+		/* %r1() */
+		call_r1(jit);
+	}
+
+	for (i = 0; i < fentry->nr_links; i++)
+		if (invoke_bpf_prog(tjit, m, fentry->links[i],
+				    flags & BPF_TRAMP_F_RET_FENTRY_RET))
+			return -EINVAL;
+
+	if (fmod_ret->nr_links) {
+		/*
+		 * retval = 0;
+		 */
+
+		/* xc retval_off(8,%r15),retval_off(%r15) */
+		_EMIT6(0xd707f000 | tjit->retval_off,
+		       0xf000 | tjit->retval_off);
+
+		for (i = 0; i < fmod_ret->nr_links; i++) {
+			if (invoke_bpf_prog(tjit, m, fmod_ret->links[i], true))
+				return -EINVAL;
+
+			/*
+			 * if (retval)
+			 *         goto do_fexit;
+			 */
+
+			/* ltg %r0,retval_off(%r15) */
+			EMIT6_DISP_LH(0xe3000000, 0x0002, REG_0, REG_0, REG_15,
+				      tjit->retval_off);
+			/* brcl 7,do_fexit */
+			EMIT6_PCREL_RILC(0xc0040000, 7, tjit->do_fexit);
+		}
+	}
+
+	if (flags & BPF_TRAMP_F_CALL_ORIG) {
+		/*
+		 * retval = func_addr(args);
+		 */
+
+		/* lmg %r2,%rN,reg_args_off(%r15) */
+		if (nr_reg_args)
+			EMIT6_DISP_LH(0xeb000000, 0x0004, REG_2,
+				      REG_2 + (nr_reg_args - 1), REG_15,
+				      tjit->reg_args_off);
+		/* mvc stack_args_off(N,%r15),orig_stack_args_off(%r15) */
+		if (nr_stack_args)
+			_EMIT6(0xd200f000 |
+				       (nr_stack_args * sizeof(u64) - 1) << 16 |
+				       tjit->stack_args_off,
+			       0xf000 | tjit->orig_stack_args_off);
+		/* mvc STK_OFF_TCCNT(4,%r15),tccnt_off(%r15) */
+		_EMIT6(0xd203f000 | STK_OFF_TCCNT, 0xf000 | tjit->tccnt_off);
+		/* lgr %r1,%r8 */
+		EMIT4(0xb9040000, REG_1, REG_8);
+		/* %r1() */
+		call_r1(jit);
+		/* stg %r2,retval_off(%r15) */
+		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_2, REG_0, REG_15,
+			      tjit->retval_off);
+
+		im->ip_after_call = jit->prg_buf + jit->prg;
+
+		/*
+		 * The following nop will be patched by bpf_tramp_image_put().
+		 */
+
+		/* brcl 0,im->ip_epilogue */
+		EMIT6_PCREL_RILC(0xc0040000, 0, (u64)im->ip_epilogue);
+	}
+
+	/* do_fexit: */
+	tjit->do_fexit = jit->prg;
+	for (i = 0; i < fexit->nr_links; i++)
+		if (invoke_bpf_prog(tjit, m, fexit->links[i], false))
+			return -EINVAL;
+
+	if (flags & BPF_TRAMP_F_CALL_ORIG) {
+		im->ip_epilogue = jit->prg_buf + jit->prg;
+
+		/*
+		 * __bpf_tramp_exit(im);
+		 */
+
+		/* %r1 = __bpf_tramp_exit */
+		load_imm64(jit, REG_1, (u64)__bpf_tramp_exit);
+		/* %r2 = im */
+		load_imm64(jit, REG_2, (u64)im);
+		/* %r1() */
+		call_r1(jit);
+	}
+
+	/* lmg %r2,%rN,reg_args_off(%r15) */
+	if ((flags & BPF_TRAMP_F_RESTORE_REGS) && nr_reg_args)
+		EMIT6_DISP_LH(0xeb000000, 0x0004, REG_2,
+			      REG_2 + (nr_reg_args - 1), REG_15,
+			      tjit->reg_args_off);
+	/* lgr %r1,%r8 */
+	if (!(flags & BPF_TRAMP_F_SKIP_FRAME))
+		EMIT4(0xb9040000, REG_1, REG_8);
+	/* lmg %r7,%r8,r7_r8_off(%r15) */
+	EMIT6_DISP_LH(0xeb000000, 0x0004, REG_7, REG_8, REG_15,
+		      tjit->r7_r8_off);
+	/* lg %r14,r14_off(%r15) */
+	EMIT6_DISP_LH(0xe3000000, 0x0004, REG_14, REG_0, REG_15, tjit->r14_off);
+	/* lg %r2,retval_off(%r15) */
+	if (flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET))
+		EMIT6_DISP_LH(0xe3000000, 0x0004, REG_2, REG_0, REG_15,
+			      tjit->retval_off);
+	/* mvc stack_size+STK_OFF_TCCNT(4,%r15),tccnt_off(%r15) */
+	_EMIT6(0xd203f000 | (tjit->stack_size + STK_OFF_TCCNT),
+	       0xf000 | tjit->tccnt_off);
+	/* aghi %r15,stack_size */
+	EMIT4_IMM(0xa70b0000, REG_15, tjit->stack_size);
+	/* Emit an expoline for the following indirect jump. */
+	if (nospec_uses_trampoline())
+		emit_expoline(jit);
+	if (flags & BPF_TRAMP_F_SKIP_FRAME)
+		/* br %r14 */
+		_EMIT2(0x07fe);
+	else
+		/* br %r1 */
+		_EMIT2(0x07f1);
+
+	emit_r1_thunk(jit);
+
+	return 0;
+}
+
+int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
+				void *image_end, const struct btf_func_model *m,
+				u32 flags, struct bpf_tramp_links *tlinks,
+				void *func_addr)
+{
+	struct bpf_tramp_jit tjit;
+	int ret;
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		if (i == 0) {
+			/* Compute offsets, check whether the code fits. */
+			memset(&tjit, 0, sizeof(tjit));
+		} else {
+			/* Generate the code. */
+			tjit.common.prg = 0;
+			tjit.common.prg_buf = image;
+		}
+		ret = __arch_prepare_bpf_trampoline(im, &tjit, m, flags,
+						    tlinks, func_addr);
+		if (ret < 0)
+			return ret;
+		if (tjit.common.prg > (char *)image_end - (char *)image)
+			/*
+			 * Use the same error code as for exceeding
+			 * BPF_MAX_TRAMP_LINKS.
+			 */
+			return -E2BIG;
+	}
+
+	return tjit.common.prg;
+}
+
+bool bpf_jit_supports_subprog_tailcalls(void)
+{
+	return true;
+}
diff --git a/arch/s390/net/pnet.c b/arch/s390/net/pnet.c
new file mode 100644
index 0000000000..79211bec0f
--- /dev/null
+++ b/arch/s390/net/pnet.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  IBM System z PNET ID Support
+ *
+ *    Copyright IBM Corp. 2018
+ */
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/types.h>
+#include <asm/ccwgroup.h>
+#include <asm/ccwdev.h>
+#include <asm/pnet.h>
+#include <asm/ebcdic.h>
+
+#define PNETIDS_LEN		64	/* Total utility string length in bytes
+					 * to cover up to 4 PNETIDs of 16 bytes
+					 * for up to 4 device ports
+					 */
+#define MAX_PNETID_LEN		16	/* Max.length of a single port PNETID */
+#define MAX_PNETID_PORTS	(PNETIDS_LEN / MAX_PNETID_LEN)
+					/* Max. # of ports with a PNETID */
+
+/*
+ * Get the PNETIDs from a device.
+ * s390 hardware supports the definition of a so-called Physical Network
+ * Identifier (short PNETID) per network device port. These PNETIDs can be
+ * used to identify network devices that are attached to the same physical
+ * network (broadcast domain).
+ *
+ * The device can be
+ * - a ccwgroup device with all bundled subchannels having the same PNETID
+ * - a PCI attached network device
+ *
+ * Returns:
+ * 0:		PNETIDs extracted from device.
+ * -ENOMEM:	No memory to extract utility string.
+ * -EOPNOTSUPP: Device type without utility string support
+ */
+static int pnet_ids_by_device(struct device *dev, u8 *pnetids)
+{
+	memset(pnetids, 0, PNETIDS_LEN);
+	if (dev_is_ccwgroup(dev)) {
+		struct ccwgroup_device *gdev = to_ccwgroupdev(dev);
+		u8 *util_str;
+
+		util_str = ccw_device_get_util_str(gdev->cdev[0], 0);
+		if (!util_str)
+			return -ENOMEM;
+		memcpy(pnetids, util_str, PNETIDS_LEN);
+		EBCASC(pnetids, PNETIDS_LEN);
+		kfree(util_str);
+		return 0;
+	}
+	if (dev_is_pci(dev)) {
+		struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
+
+		memcpy(pnetids, zdev->util_str, sizeof(zdev->util_str));
+		EBCASC(pnetids, sizeof(zdev->util_str));
+		return 0;
+	}
+	return -EOPNOTSUPP;
+}
+
+/*
+ * Extract the pnetid for a device port.
+ *
+ * Return 0 if a pnetid is found and -ENOENT otherwise.
+ */
+int pnet_id_by_dev_port(struct device *dev, unsigned short port, u8 *pnetid)
+{
+	u8 pnetids[MAX_PNETID_PORTS][MAX_PNETID_LEN];
+	static const u8 zero[MAX_PNETID_LEN] = { 0 };
+	int rc = 0;
+
+	if (!dev || port >= MAX_PNETID_PORTS)
+		return -ENOENT;
+
+	if (!pnet_ids_by_device(dev, (u8 *)pnetids) &&
+	    memcmp(pnetids[port], zero, MAX_PNETID_LEN))
+		memcpy(pnetid, pnetids[port], MAX_PNETID_LEN);
+	else
+		rc = -ENOENT;
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(pnet_id_by_dev_port);
+
+MODULE_DESCRIPTION("pnetid determination from utility strings");
+MODULE_LICENSE("GPL");
diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile
new file mode 100644
index 0000000000..5ae31ca9dd
--- /dev/null
+++ b/arch/s390/pci/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the s390 PCI subsystem.
+#
+
+obj-$(CONFIG_PCI)	+= pci.o pci_irq.o pci_dma.o pci_clp.o pci_sysfs.o \
+			   pci_event.o pci_debug.o pci_insn.o pci_mmio.o \
+			   pci_bus.o pci_kvm_hook.o
+obj-$(CONFIG_PCI_IOV)	+= pci_iov.o
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
new file mode 100644
index 0000000000..d34d5813d0
--- /dev/null
+++ b/arch/s390/pci/pci.c
@@ -0,0 +1,1135 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2012
+ *
+ * Author(s):
+ *   Jan Glauber <jang@linux.vnet.ibm.com>
+ *
+ * The System z PCI code is a rewrite from a prototype by
+ * the following people (Kudoz!):
+ *   Alexander Schmidt
+ *   Christoph Raisch
+ *   Hannes Hering
+ *   Hoang-Nam Nguyen
+ *   Jan-Bernd Themann
+ *   Stefan Roscher
+ *   Thomas Klein
+ */
+
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/jump_label.h>
+#include <linux/pci.h>
+#include <linux/printk.h>
+
+#include <asm/isc.h>
+#include <asm/airq.h>
+#include <asm/facility.h>
+#include <asm/pci_insn.h>
+#include <asm/pci_clp.h>
+#include <asm/pci_dma.h>
+
+#include "pci_bus.h"
+#include "pci_iov.h"
+
+/* list of all detected zpci devices */
+static LIST_HEAD(zpci_list);
+static DEFINE_SPINLOCK(zpci_list_lock);
+
+static DECLARE_BITMAP(zpci_domain, ZPCI_DOMAIN_BITMAP_SIZE);
+static DEFINE_SPINLOCK(zpci_domain_lock);
+
+#define ZPCI_IOMAP_ENTRIES						\
+	min(((unsigned long) ZPCI_NR_DEVICES * PCI_STD_NUM_BARS / 2),	\
+	    ZPCI_IOMAP_MAX_ENTRIES)
+
+unsigned int s390_pci_no_rid;
+
+static DEFINE_SPINLOCK(zpci_iomap_lock);
+static unsigned long *zpci_iomap_bitmap;
+struct zpci_iomap_entry *zpci_iomap_start;
+EXPORT_SYMBOL_GPL(zpci_iomap_start);
+
+DEFINE_STATIC_KEY_FALSE(have_mio);
+
+static struct kmem_cache *zdev_fmb_cache;
+
+/* AEN structures that must be preserved over KVM module re-insertion */
+union zpci_sic_iib *zpci_aipb;
+EXPORT_SYMBOL_GPL(zpci_aipb);
+struct airq_iv *zpci_aif_sbv;
+EXPORT_SYMBOL_GPL(zpci_aif_sbv);
+
+struct zpci_dev *get_zdev_by_fid(u32 fid)
+{
+	struct zpci_dev *tmp, *zdev = NULL;
+
+	spin_lock(&zpci_list_lock);
+	list_for_each_entry(tmp, &zpci_list, entry) {
+		if (tmp->fid == fid) {
+			zdev = tmp;
+			zpci_zdev_get(zdev);
+			break;
+		}
+	}
+	spin_unlock(&zpci_list_lock);
+	return zdev;
+}
+
+void zpci_remove_reserved_devices(void)
+{
+	struct zpci_dev *tmp, *zdev;
+	enum zpci_state state;
+	LIST_HEAD(remove);
+
+	spin_lock(&zpci_list_lock);
+	list_for_each_entry_safe(zdev, tmp, &zpci_list, entry) {
+		if (zdev->state == ZPCI_FN_STATE_STANDBY &&
+		    !clp_get_state(zdev->fid, &state) &&
+		    state == ZPCI_FN_STATE_RESERVED)
+			list_move_tail(&zdev->entry, &remove);
+	}
+	spin_unlock(&zpci_list_lock);
+
+	list_for_each_entry_safe(zdev, tmp, &remove, entry)
+		zpci_device_reserved(zdev);
+}
+
+int pci_domain_nr(struct pci_bus *bus)
+{
+	return ((struct zpci_bus *) bus->sysdata)->domain_nr;
+}
+EXPORT_SYMBOL_GPL(pci_domain_nr);
+
+int pci_proc_domain(struct pci_bus *bus)
+{
+	return pci_domain_nr(bus);
+}
+EXPORT_SYMBOL_GPL(pci_proc_domain);
+
+/* Modify PCI: Register I/O address translation parameters */
+int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas,
+		       u64 base, u64 limit, u64 iota, u8 *status)
+{
+	u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, ZPCI_MOD_FC_REG_IOAT);
+	struct zpci_fib fib = {0};
+	u8 cc;
+
+	WARN_ON_ONCE(iota & 0x3fff);
+	fib.pba = base;
+	fib.pal = limit;
+	fib.iota = iota | ZPCI_IOTA_RTTO_FLAG;
+	fib.gd = zdev->gisa;
+	cc = zpci_mod_fc(req, &fib, status);
+	if (cc)
+		zpci_dbg(3, "reg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, *status);
+	return cc;
+}
+EXPORT_SYMBOL_GPL(zpci_register_ioat);
+
+/* Modify PCI: Unregister I/O address translation parameters */
+int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas)
+{
+	u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, ZPCI_MOD_FC_DEREG_IOAT);
+	struct zpci_fib fib = {0};
+	u8 cc, status;
+
+	fib.gd = zdev->gisa;
+
+	cc = zpci_mod_fc(req, &fib, &status);
+	if (cc)
+		zpci_dbg(3, "unreg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, status);
+	return cc;
+}
+
+/* Modify PCI: Set PCI function measurement parameters */
+int zpci_fmb_enable_device(struct zpci_dev *zdev)
+{
+	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_SET_MEASURE);
+	struct zpci_fib fib = {0};
+	u8 cc, status;
+
+	if (zdev->fmb || sizeof(*zdev->fmb) < zdev->fmb_length)
+		return -EINVAL;
+
+	zdev->fmb = kmem_cache_zalloc(zdev_fmb_cache, GFP_KERNEL);
+	if (!zdev->fmb)
+		return -ENOMEM;
+	WARN_ON((u64) zdev->fmb & 0xf);
+
+	/* reset software counters */
+	atomic64_set(&zdev->allocated_pages, 0);
+	atomic64_set(&zdev->mapped_pages, 0);
+	atomic64_set(&zdev->unmapped_pages, 0);
+
+	fib.fmb_addr = virt_to_phys(zdev->fmb);
+	fib.gd = zdev->gisa;
+	cc = zpci_mod_fc(req, &fib, &status);
+	if (cc) {
+		kmem_cache_free(zdev_fmb_cache, zdev->fmb);
+		zdev->fmb = NULL;
+	}
+	return cc ? -EIO : 0;
+}
+
+/* Modify PCI: Disable PCI function measurement */
+int zpci_fmb_disable_device(struct zpci_dev *zdev)
+{
+	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_SET_MEASURE);
+	struct zpci_fib fib = {0};
+	u8 cc, status;
+
+	if (!zdev->fmb)
+		return -EINVAL;
+
+	fib.gd = zdev->gisa;
+
+	/* Function measurement is disabled if fmb address is zero */
+	cc = zpci_mod_fc(req, &fib, &status);
+	if (cc == 3) /* Function already gone. */
+		cc = 0;
+
+	if (!cc) {
+		kmem_cache_free(zdev_fmb_cache, zdev->fmb);
+		zdev->fmb = NULL;
+	}
+	return cc ? -EIO : 0;
+}
+
+static int zpci_cfg_load(struct zpci_dev *zdev, int offset, u32 *val, u8 len)
+{
+	u64 req = ZPCI_CREATE_REQ(zdev->fh, ZPCI_PCIAS_CFGSPC, len);
+	u64 data;
+	int rc;
+
+	rc = __zpci_load(&data, req, offset);
+	if (!rc) {
+		data = le64_to_cpu((__force __le64) data);
+		data >>= (8 - len) * 8;
+		*val = (u32) data;
+	} else
+		*val = 0xffffffff;
+	return rc;
+}
+
+static int zpci_cfg_store(struct zpci_dev *zdev, int offset, u32 val, u8 len)
+{
+	u64 req = ZPCI_CREATE_REQ(zdev->fh, ZPCI_PCIAS_CFGSPC, len);
+	u64 data = val;
+	int rc;
+
+	data <<= (8 - len) * 8;
+	data = (__force u64) cpu_to_le64(data);
+	rc = __zpci_store(data, req, offset);
+	return rc;
+}
+
+resource_size_t pcibios_align_resource(void *data, const struct resource *res,
+				       resource_size_t size,
+				       resource_size_t align)
+{
+	return 0;
+}
+
+/* combine single writes by using store-block insn */
+void __iowrite64_copy(void __iomem *to, const void *from, size_t count)
+{
+       zpci_memcpy_toio(to, from, count);
+}
+
+void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
+			   unsigned long prot)
+{
+	/*
+	 * When PCI MIO instructions are unavailable the "physical" address
+	 * encodes a hint for accessing the PCI memory space it represents.
+	 * Just pass it unchanged such that ioread/iowrite can decode it.
+	 */
+	if (!static_branch_unlikely(&have_mio))
+		return (void __iomem *)phys_addr;
+
+	return generic_ioremap_prot(phys_addr, size, __pgprot(prot));
+}
+EXPORT_SYMBOL(ioremap_prot);
+
+void iounmap(volatile void __iomem *addr)
+{
+	if (static_branch_likely(&have_mio))
+		generic_iounmap(addr);
+}
+EXPORT_SYMBOL(iounmap);
+
+/* Create a virtual mapping cookie for a PCI BAR */
+static void __iomem *pci_iomap_range_fh(struct pci_dev *pdev, int bar,
+					unsigned long offset, unsigned long max)
+{
+	struct zpci_dev *zdev =	to_zpci(pdev);
+	int idx;
+
+	idx = zdev->bars[bar].map_idx;
+	spin_lock(&zpci_iomap_lock);
+	/* Detect overrun */
+	WARN_ON(!++zpci_iomap_start[idx].count);
+	zpci_iomap_start[idx].fh = zdev->fh;
+	zpci_iomap_start[idx].bar = bar;
+	spin_unlock(&zpci_iomap_lock);
+
+	return (void __iomem *) ZPCI_ADDR(idx) + offset;
+}
+
+static void __iomem *pci_iomap_range_mio(struct pci_dev *pdev, int bar,
+					 unsigned long offset,
+					 unsigned long max)
+{
+	unsigned long barsize = pci_resource_len(pdev, bar);
+	struct zpci_dev *zdev = to_zpci(pdev);
+	void __iomem *iova;
+
+	iova = ioremap((unsigned long) zdev->bars[bar].mio_wt, barsize);
+	return iova ? iova + offset : iova;
+}
+
+void __iomem *pci_iomap_range(struct pci_dev *pdev, int bar,
+			      unsigned long offset, unsigned long max)
+{
+	if (bar >= PCI_STD_NUM_BARS || !pci_resource_len(pdev, bar))
+		return NULL;
+
+	if (static_branch_likely(&have_mio))
+		return pci_iomap_range_mio(pdev, bar, offset, max);
+	else
+		return pci_iomap_range_fh(pdev, bar, offset, max);
+}
+EXPORT_SYMBOL(pci_iomap_range);
+
+void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
+{
+	return pci_iomap_range(dev, bar, 0, maxlen);
+}
+EXPORT_SYMBOL(pci_iomap);
+
+static void __iomem *pci_iomap_wc_range_mio(struct pci_dev *pdev, int bar,
+					    unsigned long offset, unsigned long max)
+{
+	unsigned long barsize = pci_resource_len(pdev, bar);
+	struct zpci_dev *zdev = to_zpci(pdev);
+	void __iomem *iova;
+
+	iova = ioremap((unsigned long) zdev->bars[bar].mio_wb, barsize);
+	return iova ? iova + offset : iova;
+}
+
+void __iomem *pci_iomap_wc_range(struct pci_dev *pdev, int bar,
+				 unsigned long offset, unsigned long max)
+{
+	if (bar >= PCI_STD_NUM_BARS || !pci_resource_len(pdev, bar))
+		return NULL;
+
+	if (static_branch_likely(&have_mio))
+		return pci_iomap_wc_range_mio(pdev, bar, offset, max);
+	else
+		return pci_iomap_range_fh(pdev, bar, offset, max);
+}
+EXPORT_SYMBOL(pci_iomap_wc_range);
+
+void __iomem *pci_iomap_wc(struct pci_dev *dev, int bar, unsigned long maxlen)
+{
+	return pci_iomap_wc_range(dev, bar, 0, maxlen);
+}
+EXPORT_SYMBOL(pci_iomap_wc);
+
+static void pci_iounmap_fh(struct pci_dev *pdev, void __iomem *addr)
+{
+	unsigned int idx = ZPCI_IDX(addr);
+
+	spin_lock(&zpci_iomap_lock);
+	/* Detect underrun */
+	WARN_ON(!zpci_iomap_start[idx].count);
+	if (!--zpci_iomap_start[idx].count) {
+		zpci_iomap_start[idx].fh = 0;
+		zpci_iomap_start[idx].bar = 0;
+	}
+	spin_unlock(&zpci_iomap_lock);
+}
+
+static void pci_iounmap_mio(struct pci_dev *pdev, void __iomem *addr)
+{
+	iounmap(addr);
+}
+
+void pci_iounmap(struct pci_dev *pdev, void __iomem *addr)
+{
+	if (static_branch_likely(&have_mio))
+		pci_iounmap_mio(pdev, addr);
+	else
+		pci_iounmap_fh(pdev, addr);
+}
+EXPORT_SYMBOL(pci_iounmap);
+
+static int pci_read(struct pci_bus *bus, unsigned int devfn, int where,
+		    int size, u32 *val)
+{
+	struct zpci_dev *zdev = zdev_from_bus(bus, devfn);
+
+	return (zdev) ? zpci_cfg_load(zdev, where, val, size) : -ENODEV;
+}
+
+static int pci_write(struct pci_bus *bus, unsigned int devfn, int where,
+		     int size, u32 val)
+{
+	struct zpci_dev *zdev = zdev_from_bus(bus, devfn);
+
+	return (zdev) ? zpci_cfg_store(zdev, where, val, size) : -ENODEV;
+}
+
+static struct pci_ops pci_root_ops = {
+	.read = pci_read,
+	.write = pci_write,
+};
+
+static void zpci_map_resources(struct pci_dev *pdev)
+{
+	struct zpci_dev *zdev = to_zpci(pdev);
+	resource_size_t len;
+	int i;
+
+	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+		len = pci_resource_len(pdev, i);
+		if (!len)
+			continue;
+
+		if (zpci_use_mio(zdev))
+			pdev->resource[i].start =
+				(resource_size_t __force) zdev->bars[i].mio_wt;
+		else
+			pdev->resource[i].start = (resource_size_t __force)
+				pci_iomap_range_fh(pdev, i, 0, 0);
+		pdev->resource[i].end = pdev->resource[i].start + len - 1;
+	}
+
+	zpci_iov_map_resources(pdev);
+}
+
+static void zpci_unmap_resources(struct pci_dev *pdev)
+{
+	struct zpci_dev *zdev = to_zpci(pdev);
+	resource_size_t len;
+	int i;
+
+	if (zpci_use_mio(zdev))
+		return;
+
+	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+		len = pci_resource_len(pdev, i);
+		if (!len)
+			continue;
+		pci_iounmap_fh(pdev, (void __iomem __force *)
+			       pdev->resource[i].start);
+	}
+}
+
+static int zpci_alloc_iomap(struct zpci_dev *zdev)
+{
+	unsigned long entry;
+
+	spin_lock(&zpci_iomap_lock);
+	entry = find_first_zero_bit(zpci_iomap_bitmap, ZPCI_IOMAP_ENTRIES);
+	if (entry == ZPCI_IOMAP_ENTRIES) {
+		spin_unlock(&zpci_iomap_lock);
+		return -ENOSPC;
+	}
+	set_bit(entry, zpci_iomap_bitmap);
+	spin_unlock(&zpci_iomap_lock);
+	return entry;
+}
+
+static void zpci_free_iomap(struct zpci_dev *zdev, int entry)
+{
+	spin_lock(&zpci_iomap_lock);
+	memset(&zpci_iomap_start[entry], 0, sizeof(struct zpci_iomap_entry));
+	clear_bit(entry, zpci_iomap_bitmap);
+	spin_unlock(&zpci_iomap_lock);
+}
+
+static void zpci_do_update_iomap_fh(struct zpci_dev *zdev, u32 fh)
+{
+	int bar, idx;
+
+	spin_lock(&zpci_iomap_lock);
+	for (bar = 0; bar < PCI_STD_NUM_BARS; bar++) {
+		if (!zdev->bars[bar].size)
+			continue;
+		idx = zdev->bars[bar].map_idx;
+		if (!zpci_iomap_start[idx].count)
+			continue;
+		WRITE_ONCE(zpci_iomap_start[idx].fh, zdev->fh);
+	}
+	spin_unlock(&zpci_iomap_lock);
+}
+
+void zpci_update_fh(struct zpci_dev *zdev, u32 fh)
+{
+	if (!fh || zdev->fh == fh)
+		return;
+
+	zdev->fh = fh;
+	if (zpci_use_mio(zdev))
+		return;
+	if (zdev->has_resources && zdev_enabled(zdev))
+		zpci_do_update_iomap_fh(zdev, fh);
+}
+
+static struct resource *__alloc_res(struct zpci_dev *zdev, unsigned long start,
+				    unsigned long size, unsigned long flags)
+{
+	struct resource *r;
+
+	r = kzalloc(sizeof(*r), GFP_KERNEL);
+	if (!r)
+		return NULL;
+
+	r->start = start;
+	r->end = r->start + size - 1;
+	r->flags = flags;
+	r->name = zdev->res_name;
+
+	if (request_resource(&iomem_resource, r)) {
+		kfree(r);
+		return NULL;
+	}
+	return r;
+}
+
+int zpci_setup_bus_resources(struct zpci_dev *zdev)
+{
+	unsigned long addr, size, flags;
+	struct resource *res;
+	int i, entry;
+
+	snprintf(zdev->res_name, sizeof(zdev->res_name),
+		 "PCI Bus %04x:%02x", zdev->uid, ZPCI_BUS_NR);
+
+	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+		if (!zdev->bars[i].size)
+			continue;
+		entry = zpci_alloc_iomap(zdev);
+		if (entry < 0)
+			return entry;
+		zdev->bars[i].map_idx = entry;
+
+		/* only MMIO is supported */
+		flags = IORESOURCE_MEM;
+		if (zdev->bars[i].val & 8)
+			flags |= IORESOURCE_PREFETCH;
+		if (zdev->bars[i].val & 4)
+			flags |= IORESOURCE_MEM_64;
+
+		if (zpci_use_mio(zdev))
+			addr = (unsigned long) zdev->bars[i].mio_wt;
+		else
+			addr = ZPCI_ADDR(entry);
+		size = 1UL << zdev->bars[i].size;
+
+		res = __alloc_res(zdev, addr, size, flags);
+		if (!res) {
+			zpci_free_iomap(zdev, entry);
+			return -ENOMEM;
+		}
+		zdev->bars[i].res = res;
+	}
+	zdev->has_resources = 1;
+
+	return 0;
+}
+
+static void zpci_cleanup_bus_resources(struct zpci_dev *zdev)
+{
+	struct resource *res;
+	int i;
+
+	pci_lock_rescan_remove();
+	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+		res = zdev->bars[i].res;
+		if (!res)
+			continue;
+
+		release_resource(res);
+		pci_bus_remove_resource(zdev->zbus->bus, res);
+		zpci_free_iomap(zdev, zdev->bars[i].map_idx);
+		zdev->bars[i].res = NULL;
+		kfree(res);
+	}
+	zdev->has_resources = 0;
+	pci_unlock_rescan_remove();
+}
+
+int pcibios_device_add(struct pci_dev *pdev)
+{
+	struct zpci_dev *zdev = to_zpci(pdev);
+	struct resource *res;
+	int i;
+
+	/* The pdev has a reference to the zdev via its bus */
+	zpci_zdev_get(zdev);
+	if (pdev->is_physfn)
+		pdev->no_vf_scan = 1;
+
+	pdev->dev.groups = zpci_attr_groups;
+	pdev->dev.dma_ops = &s390_pci_dma_ops;
+	zpci_map_resources(pdev);
+
+	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+		res = &pdev->resource[i];
+		if (res->parent || !res->flags)
+			continue;
+		pci_claim_resource(pdev, i);
+	}
+
+	return 0;
+}
+
+void pcibios_release_device(struct pci_dev *pdev)
+{
+	struct zpci_dev *zdev = to_zpci(pdev);
+
+	zpci_unmap_resources(pdev);
+	zpci_zdev_put(zdev);
+}
+
+int pcibios_enable_device(struct pci_dev *pdev, int mask)
+{
+	struct zpci_dev *zdev = to_zpci(pdev);
+
+	zpci_debug_init_device(zdev, dev_name(&pdev->dev));
+	zpci_fmb_enable_device(zdev);
+
+	return pci_enable_resources(pdev, mask);
+}
+
+void pcibios_disable_device(struct pci_dev *pdev)
+{
+	struct zpci_dev *zdev = to_zpci(pdev);
+
+	zpci_fmb_disable_device(zdev);
+	zpci_debug_exit_device(zdev);
+}
+
+static int __zpci_register_domain(int domain)
+{
+	spin_lock(&zpci_domain_lock);
+	if (test_bit(domain, zpci_domain)) {
+		spin_unlock(&zpci_domain_lock);
+		pr_err("Domain %04x is already assigned\n", domain);
+		return -EEXIST;
+	}
+	set_bit(domain, zpci_domain);
+	spin_unlock(&zpci_domain_lock);
+	return domain;
+}
+
+static int __zpci_alloc_domain(void)
+{
+	int domain;
+
+	spin_lock(&zpci_domain_lock);
+	/*
+	 * We can always auto allocate domains below ZPCI_NR_DEVICES.
+	 * There is either a free domain or we have reached the maximum in
+	 * which case we would have bailed earlier.
+	 */
+	domain = find_first_zero_bit(zpci_domain, ZPCI_NR_DEVICES);
+	set_bit(domain, zpci_domain);
+	spin_unlock(&zpci_domain_lock);
+	return domain;
+}
+
+int zpci_alloc_domain(int domain)
+{
+	if (zpci_unique_uid) {
+		if (domain)
+			return __zpci_register_domain(domain);
+		pr_warn("UID checking was active but no UID is provided: switching to automatic domain allocation\n");
+		update_uid_checking(false);
+	}
+	return __zpci_alloc_domain();
+}
+
+void zpci_free_domain(int domain)
+{
+	spin_lock(&zpci_domain_lock);
+	clear_bit(domain, zpci_domain);
+	spin_unlock(&zpci_domain_lock);
+}
+
+
+int zpci_enable_device(struct zpci_dev *zdev)
+{
+	u32 fh = zdev->fh;
+	int rc = 0;
+
+	if (clp_enable_fh(zdev, &fh, ZPCI_NR_DMA_SPACES))
+		rc = -EIO;
+	else
+		zpci_update_fh(zdev, fh);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(zpci_enable_device);
+
+int zpci_disable_device(struct zpci_dev *zdev)
+{
+	u32 fh = zdev->fh;
+	int cc, rc = 0;
+
+	cc = clp_disable_fh(zdev, &fh);
+	if (!cc) {
+		zpci_update_fh(zdev, fh);
+	} else if (cc == CLP_RC_SETPCIFN_ALRDY) {
+		pr_info("Disabling PCI function %08x had no effect as it was already disabled\n",
+			zdev->fid);
+		/* Function is already disabled - update handle */
+		rc = clp_refresh_fh(zdev->fid, &fh);
+		if (!rc) {
+			zpci_update_fh(zdev, fh);
+			rc = -EINVAL;
+		}
+	} else {
+		rc = -EIO;
+	}
+	return rc;
+}
+EXPORT_SYMBOL_GPL(zpci_disable_device);
+
+/**
+ * zpci_hot_reset_device - perform a reset of the given zPCI function
+ * @zdev: the slot which should be reset
+ *
+ * Performs a low level reset of the zPCI function. The reset is low level in
+ * the sense that the zPCI function can be reset without detaching it from the
+ * common PCI subsystem. The reset may be performed while under control of
+ * either DMA or IOMMU APIs in which case the existing DMA/IOMMU translation
+ * table is reinstated at the end of the reset.
+ *
+ * After the reset the functions internal state is reset to an initial state
+ * equivalent to its state during boot when first probing a driver.
+ * Consequently after reset the PCI function requires re-initialization via the
+ * common PCI code including re-enabling IRQs via pci_alloc_irq_vectors()
+ * and enabling the function via e.g.pci_enablde_device_flags().The caller
+ * must guard against concurrent reset attempts.
+ *
+ * In most cases this function should not be called directly but through
+ * pci_reset_function() or pci_reset_bus() which handle the save/restore and
+ * locking.
+ *
+ * Return: 0 on success and an error value otherwise
+ */
+int zpci_hot_reset_device(struct zpci_dev *zdev)
+{
+	u8 status;
+	int rc;
+
+	zpci_dbg(3, "rst fid:%x, fh:%x\n", zdev->fid, zdev->fh);
+	if (zdev_enabled(zdev)) {
+		/* Disables device access, DMAs and IRQs (reset state) */
+		rc = zpci_disable_device(zdev);
+		/*
+		 * Due to a z/VM vs LPAR inconsistency in the error state the
+		 * FH may indicate an enabled device but disable says the
+		 * device is already disabled don't treat it as an error here.
+		 */
+		if (rc == -EINVAL)
+			rc = 0;
+		if (rc)
+			return rc;
+	}
+
+	rc = zpci_enable_device(zdev);
+	if (rc)
+		return rc;
+
+	if (zdev->dma_table)
+		rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
+					virt_to_phys(zdev->dma_table), &status);
+	else
+		rc = zpci_dma_init_device(zdev);
+	if (rc) {
+		zpci_disable_device(zdev);
+		return rc;
+	}
+
+	return 0;
+}
+
+/**
+ * zpci_create_device() - Create a new zpci_dev and add it to the zbus
+ * @fid: Function ID of the device to be created
+ * @fh: Current Function Handle of the device to be created
+ * @state: Initial state after creation either Standby or Configured
+ *
+ * Creates a new zpci device and adds it to its, possibly newly created, zbus
+ * as well as zpci_list.
+ *
+ * Returns: the zdev on success or an error pointer otherwise
+ */
+struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state)
+{
+	struct zpci_dev *zdev;
+	int rc;
+
+	zpci_dbg(1, "add fid:%x, fh:%x, c:%d\n", fid, fh, state);
+	zdev = kzalloc(sizeof(*zdev), GFP_KERNEL);
+	if (!zdev)
+		return ERR_PTR(-ENOMEM);
+
+	/* FID and Function Handle are the static/dynamic identifiers */
+	zdev->fid = fid;
+	zdev->fh = fh;
+
+	/* Query function properties and update zdev */
+	rc = clp_query_pci_fn(zdev);
+	if (rc)
+		goto error;
+	zdev->state =  state;
+
+	kref_init(&zdev->kref);
+	mutex_init(&zdev->lock);
+	mutex_init(&zdev->kzdev_lock);
+
+	rc = zpci_init_iommu(zdev);
+	if (rc)
+		goto error;
+
+	rc = zpci_bus_device_register(zdev, &pci_root_ops);
+	if (rc)
+		goto error_destroy_iommu;
+
+	spin_lock(&zpci_list_lock);
+	list_add_tail(&zdev->entry, &zpci_list);
+	spin_unlock(&zpci_list_lock);
+
+	return zdev;
+
+error_destroy_iommu:
+	zpci_destroy_iommu(zdev);
+error:
+	zpci_dbg(0, "add fid:%x, rc:%d\n", fid, rc);
+	kfree(zdev);
+	return ERR_PTR(rc);
+}
+
+bool zpci_is_device_configured(struct zpci_dev *zdev)
+{
+	enum zpci_state state = zdev->state;
+
+	return state != ZPCI_FN_STATE_RESERVED &&
+		state != ZPCI_FN_STATE_STANDBY;
+}
+
+/**
+ * zpci_scan_configured_device() - Scan a freshly configured zpci_dev
+ * @zdev: The zpci_dev to be configured
+ * @fh: The general function handle supplied by the platform
+ *
+ * Given a device in the configuration state Configured, enables, scans and
+ * adds it to the common code PCI subsystem if possible. If any failure occurs,
+ * the zpci_dev is left disabled.
+ *
+ * Return: 0 on success, or an error code otherwise
+ */
+int zpci_scan_configured_device(struct zpci_dev *zdev, u32 fh)
+{
+	zpci_update_fh(zdev, fh);
+	return zpci_bus_scan_device(zdev);
+}
+
+/**
+ * zpci_deconfigure_device() - Deconfigure a zpci_dev
+ * @zdev: The zpci_dev to configure
+ *
+ * Deconfigure a zPCI function that is currently configured and possibly known
+ * to the common code PCI subsystem.
+ * If any failure occurs the device is left as is.
+ *
+ * Return: 0 on success, or an error code otherwise
+ */
+int zpci_deconfigure_device(struct zpci_dev *zdev)
+{
+	int rc;
+
+	if (zdev->zbus->bus)
+		zpci_bus_remove_device(zdev, false);
+
+	if (zdev->dma_table) {
+		rc = zpci_dma_exit_device(zdev);
+		if (rc)
+			return rc;
+	}
+	if (zdev_enabled(zdev)) {
+		rc = zpci_disable_device(zdev);
+		if (rc)
+			return rc;
+	}
+
+	rc = sclp_pci_deconfigure(zdev->fid);
+	zpci_dbg(3, "deconf fid:%x, rc:%d\n", zdev->fid, rc);
+	if (rc)
+		return rc;
+	zdev->state = ZPCI_FN_STATE_STANDBY;
+
+	return 0;
+}
+
+/**
+ * zpci_device_reserved() - Mark device as resverved
+ * @zdev: the zpci_dev that was reserved
+ *
+ * Handle the case that a given zPCI function was reserved by another system.
+ * After a call to this function the zpci_dev can not be found via
+ * get_zdev_by_fid() anymore but may still be accessible via existing
+ * references though it will not be functional anymore.
+ */
+void zpci_device_reserved(struct zpci_dev *zdev)
+{
+	if (zdev->has_hp_slot)
+		zpci_exit_slot(zdev);
+	/*
+	 * Remove device from zpci_list as it is going away. This also
+	 * makes sure we ignore subsequent zPCI events for this device.
+	 */
+	spin_lock(&zpci_list_lock);
+	list_del(&zdev->entry);
+	spin_unlock(&zpci_list_lock);
+	zdev->state = ZPCI_FN_STATE_RESERVED;
+	zpci_dbg(3, "rsv fid:%x\n", zdev->fid);
+	zpci_zdev_put(zdev);
+}
+
+void zpci_release_device(struct kref *kref)
+{
+	struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref);
+	int ret;
+
+	if (zdev->zbus->bus)
+		zpci_bus_remove_device(zdev, false);
+
+	if (zdev->dma_table)
+		zpci_dma_exit_device(zdev);
+	if (zdev_enabled(zdev))
+		zpci_disable_device(zdev);
+
+	switch (zdev->state) {
+	case ZPCI_FN_STATE_CONFIGURED:
+		ret = sclp_pci_deconfigure(zdev->fid);
+		zpci_dbg(3, "deconf fid:%x, rc:%d\n", zdev->fid, ret);
+		fallthrough;
+	case ZPCI_FN_STATE_STANDBY:
+		if (zdev->has_hp_slot)
+			zpci_exit_slot(zdev);
+		spin_lock(&zpci_list_lock);
+		list_del(&zdev->entry);
+		spin_unlock(&zpci_list_lock);
+		zpci_dbg(3, "rsv fid:%x\n", zdev->fid);
+		fallthrough;
+	case ZPCI_FN_STATE_RESERVED:
+		if (zdev->has_resources)
+			zpci_cleanup_bus_resources(zdev);
+		zpci_bus_device_unregister(zdev);
+		zpci_destroy_iommu(zdev);
+		fallthrough;
+	default:
+		break;
+	}
+	zpci_dbg(3, "rem fid:%x\n", zdev->fid);
+	kfree_rcu(zdev, rcu);
+}
+
+int zpci_report_error(struct pci_dev *pdev,
+		      struct zpci_report_error_header *report)
+{
+	struct zpci_dev *zdev = to_zpci(pdev);
+
+	return sclp_pci_report(report, zdev->fh, zdev->fid);
+}
+EXPORT_SYMBOL(zpci_report_error);
+
+/**
+ * zpci_clear_error_state() - Clears the zPCI error state of the device
+ * @zdev: The zdev for which the zPCI error state should be reset
+ *
+ * Clear the zPCI error state of the device. If clearing the zPCI error state
+ * fails the device is left in the error state. In this case it may make sense
+ * to call zpci_io_perm_failure() on the associated pdev if it exists.
+ *
+ * Returns: 0 on success, -EIO otherwise
+ */
+int zpci_clear_error_state(struct zpci_dev *zdev)
+{
+	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_RESET_ERROR);
+	struct zpci_fib fib = {0};
+	u8 status;
+	int cc;
+
+	cc = zpci_mod_fc(req, &fib, &status);
+	if (cc) {
+		zpci_dbg(3, "ces fid:%x, cc:%d, status:%x\n", zdev->fid, cc, status);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * zpci_reset_load_store_blocked() - Re-enables L/S from error state
+ * @zdev: The zdev for which to unblock load/store access
+ *
+ * Re-enables load/store access for a PCI function in the error state while
+ * keeping DMA blocked. In this state drivers can poke MMIO space to determine
+ * if error recovery is possible while catching any rogue DMA access from the
+ * device.
+ *
+ * Returns: 0 on success, -EIO otherwise
+ */
+int zpci_reset_load_store_blocked(struct zpci_dev *zdev)
+{
+	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_RESET_BLOCK);
+	struct zpci_fib fib = {0};
+	u8 status;
+	int cc;
+
+	cc = zpci_mod_fc(req, &fib, &status);
+	if (cc) {
+		zpci_dbg(3, "rls fid:%x, cc:%d, status:%x\n", zdev->fid, cc, status);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int zpci_mem_init(void)
+{
+	BUILD_BUG_ON(!is_power_of_2(__alignof__(struct zpci_fmb)) ||
+		     __alignof__(struct zpci_fmb) < sizeof(struct zpci_fmb));
+
+	zdev_fmb_cache = kmem_cache_create("PCI_FMB_cache", sizeof(struct zpci_fmb),
+					   __alignof__(struct zpci_fmb), 0, NULL);
+	if (!zdev_fmb_cache)
+		goto error_fmb;
+
+	zpci_iomap_start = kcalloc(ZPCI_IOMAP_ENTRIES,
+				   sizeof(*zpci_iomap_start), GFP_KERNEL);
+	if (!zpci_iomap_start)
+		goto error_iomap;
+
+	zpci_iomap_bitmap = kcalloc(BITS_TO_LONGS(ZPCI_IOMAP_ENTRIES),
+				    sizeof(*zpci_iomap_bitmap), GFP_KERNEL);
+	if (!zpci_iomap_bitmap)
+		goto error_iomap_bitmap;
+
+	if (static_branch_likely(&have_mio))
+		clp_setup_writeback_mio();
+
+	return 0;
+error_iomap_bitmap:
+	kfree(zpci_iomap_start);
+error_iomap:
+	kmem_cache_destroy(zdev_fmb_cache);
+error_fmb:
+	return -ENOMEM;
+}
+
+static void zpci_mem_exit(void)
+{
+	kfree(zpci_iomap_bitmap);
+	kfree(zpci_iomap_start);
+	kmem_cache_destroy(zdev_fmb_cache);
+}
+
+static unsigned int s390_pci_probe __initdata = 1;
+unsigned int s390_pci_force_floating __initdata;
+static unsigned int s390_pci_initialized;
+
+char * __init pcibios_setup(char *str)
+{
+	if (!strcmp(str, "off")) {
+		s390_pci_probe = 0;
+		return NULL;
+	}
+	if (!strcmp(str, "nomio")) {
+		S390_lowcore.machine_flags &= ~MACHINE_FLAG_PCI_MIO;
+		return NULL;
+	}
+	if (!strcmp(str, "force_floating")) {
+		s390_pci_force_floating = 1;
+		return NULL;
+	}
+	if (!strcmp(str, "norid")) {
+		s390_pci_no_rid = 1;
+		return NULL;
+	}
+	return str;
+}
+
+bool zpci_is_enabled(void)
+{
+	return s390_pci_initialized;
+}
+
+static int __init pci_base_init(void)
+{
+	int rc;
+
+	if (!s390_pci_probe)
+		return 0;
+
+	if (!test_facility(69) || !test_facility(71)) {
+		pr_info("PCI is not supported because CPU facilities 69 or 71 are not available\n");
+		return 0;
+	}
+
+	if (MACHINE_HAS_PCI_MIO) {
+		static_branch_enable(&have_mio);
+		ctl_set_bit(2, 5);
+	}
+
+	rc = zpci_debug_init();
+	if (rc)
+		goto out;
+
+	rc = zpci_mem_init();
+	if (rc)
+		goto out_mem;
+
+	rc = zpci_irq_init();
+	if (rc)
+		goto out_irq;
+
+	rc = zpci_dma_init();
+	if (rc)
+		goto out_dma;
+
+	rc = clp_scan_pci_devices();
+	if (rc)
+		goto out_find;
+	zpci_bus_scan_busses();
+
+	s390_pci_initialized = 1;
+	return 0;
+
+out_find:
+	zpci_dma_exit();
+out_dma:
+	zpci_irq_exit();
+out_irq:
+	zpci_mem_exit();
+out_mem:
+	zpci_debug_exit();
+out:
+	return rc;
+}
+subsys_initcall_sync(pci_base_init);
diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c
new file mode 100644
index 0000000000..32245b970a
--- /dev/null
+++ b/arch/s390/pci/pci_bus.c
@@ -0,0 +1,379 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2020
+ *
+ * Author(s):
+ *   Pierre Morel <pmorel@linux.ibm.com>
+ *
+ */
+
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/jump_label.h>
+#include <linux/pci.h>
+#include <linux/printk.h>
+
+#include <asm/pci_clp.h>
+#include <asm/pci_dma.h>
+
+#include "pci_bus.h"
+#include "pci_iov.h"
+
+static LIST_HEAD(zbus_list);
+static DEFINE_MUTEX(zbus_list_lock);
+static int zpci_nb_devices;
+
+/* zpci_bus_prepare_device - Prepare a zPCI function for scanning
+ * @zdev: the zPCI function to be prepared
+ *
+ * The PCI resources for the function are set up and added to its zbus and the
+ * function is enabled. The function must be added to a zbus which must have
+ * a PCI bus created. If an error occurs the zPCI function is not enabled.
+ *
+ * Return: 0 on success, an error code otherwise
+ */
+static int zpci_bus_prepare_device(struct zpci_dev *zdev)
+{
+	int rc, i;
+
+	if (!zdev_enabled(zdev)) {
+		rc = zpci_enable_device(zdev);
+		if (rc)
+			return rc;
+		rc = zpci_dma_init_device(zdev);
+		if (rc) {
+			zpci_disable_device(zdev);
+			return rc;
+		}
+	}
+
+	if (!zdev->has_resources) {
+		zpci_setup_bus_resources(zdev);
+		for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+			if (zdev->bars[i].res)
+				pci_bus_add_resource(zdev->zbus->bus, zdev->bars[i].res, 0);
+		}
+	}
+
+	return 0;
+}
+
+/* zpci_bus_scan_device - Scan a single device adding it to the PCI core
+ * @zdev: the zdev to be scanned
+ *
+ * Scans the PCI function making it available to the common PCI code.
+ *
+ * Return: 0 on success, an error value otherwise
+ */
+int zpci_bus_scan_device(struct zpci_dev *zdev)
+{
+	struct pci_dev *pdev;
+	int rc;
+
+	rc = zpci_bus_prepare_device(zdev);
+	if (rc)
+		return rc;
+
+	pdev = pci_scan_single_device(zdev->zbus->bus, zdev->devfn);
+	if (!pdev)
+		return -ENODEV;
+
+	pci_lock_rescan_remove();
+	pci_bus_add_device(pdev);
+	pci_unlock_rescan_remove();
+
+	return 0;
+}
+
+/* zpci_bus_remove_device - Removes the given zdev from the PCI core
+ * @zdev: the zdev to be removed from the PCI core
+ * @set_error: if true the device's error state is set to permanent failure
+ *
+ * Sets a zPCI device to a configured but offline state; the zPCI
+ * device is still accessible through its hotplug slot and the zPCI
+ * API but is removed from the common code PCI bus, making it
+ * no longer available to drivers.
+ */
+void zpci_bus_remove_device(struct zpci_dev *zdev, bool set_error)
+{
+	struct zpci_bus *zbus = zdev->zbus;
+	struct pci_dev *pdev;
+
+	if (!zdev->zbus->bus)
+		return;
+
+	pdev = pci_get_slot(zbus->bus, zdev->devfn);
+	if (pdev) {
+		if (set_error)
+			pdev->error_state = pci_channel_io_perm_failure;
+		if (pdev->is_virtfn) {
+			zpci_iov_remove_virtfn(pdev, zdev->vfn);
+			/* balance pci_get_slot */
+			pci_dev_put(pdev);
+			return;
+		}
+		pci_stop_and_remove_bus_device_locked(pdev);
+		/* balance pci_get_slot */
+		pci_dev_put(pdev);
+	}
+}
+
+/* zpci_bus_scan_bus - Scan all configured zPCI functions on the bus
+ * @zbus: the zbus to be scanned
+ *
+ * Enables and scans all PCI functions on the bus making them available to the
+ * common PCI code. If a PCI function fails to be initialized an error will be
+ * returned but attempts will still be made for all other functions on the bus.
+ *
+ * Return: 0 on success, an error value otherwise
+ */
+int zpci_bus_scan_bus(struct zpci_bus *zbus)
+{
+	struct zpci_dev *zdev;
+	int devfn, rc, ret = 0;
+
+	for (devfn = 0; devfn < ZPCI_FUNCTIONS_PER_BUS; devfn++) {
+		zdev = zbus->function[devfn];
+		if (zdev && zdev->state == ZPCI_FN_STATE_CONFIGURED) {
+			rc = zpci_bus_prepare_device(zdev);
+			if (rc)
+				ret = -EIO;
+		}
+	}
+
+	pci_lock_rescan_remove();
+	pci_scan_child_bus(zbus->bus);
+	pci_bus_add_devices(zbus->bus);
+	pci_unlock_rescan_remove();
+
+	return ret;
+}
+
+/* zpci_bus_scan_busses - Scan all registered busses
+ *
+ * Scan all available zbusses
+ *
+ */
+void zpci_bus_scan_busses(void)
+{
+	struct zpci_bus *zbus = NULL;
+
+	mutex_lock(&zbus_list_lock);
+	list_for_each_entry(zbus, &zbus_list, bus_next) {
+		zpci_bus_scan_bus(zbus);
+		cond_resched();
+	}
+	mutex_unlock(&zbus_list_lock);
+}
+
+/* zpci_bus_create_pci_bus - Create the PCI bus associated with this zbus
+ * @zbus: the zbus holding the zdevices
+ * @fr: PCI root function that will determine the bus's domain, and bus speeed
+ * @ops: the pci operations
+ *
+ * The PCI function @fr determines the domain (its UID), multifunction property
+ * and maximum bus speed of the entire bus.
+ *
+ * Return: 0 on success, an error code otherwise
+ */
+static int zpci_bus_create_pci_bus(struct zpci_bus *zbus, struct zpci_dev *fr, struct pci_ops *ops)
+{
+	struct pci_bus *bus;
+	int domain;
+
+	domain = zpci_alloc_domain((u16)fr->uid);
+	if (domain < 0)
+		return domain;
+
+	zbus->domain_nr = domain;
+	zbus->multifunction = fr->rid_available;
+	zbus->max_bus_speed = fr->max_bus_speed;
+
+	/*
+	 * Note that the zbus->resources are taken over and zbus->resources
+	 * is empty after a successful call
+	 */
+	bus = pci_create_root_bus(NULL, ZPCI_BUS_NR, ops, zbus, &zbus->resources);
+	if (!bus) {
+		zpci_free_domain(zbus->domain_nr);
+		return -EFAULT;
+	}
+
+	zbus->bus = bus;
+
+	return 0;
+}
+
+static void zpci_bus_release(struct kref *kref)
+{
+	struct zpci_bus *zbus = container_of(kref, struct zpci_bus, kref);
+
+	if (zbus->bus) {
+		pci_lock_rescan_remove();
+		pci_stop_root_bus(zbus->bus);
+
+		zpci_free_domain(zbus->domain_nr);
+		pci_free_resource_list(&zbus->resources);
+
+		pci_remove_root_bus(zbus->bus);
+		pci_unlock_rescan_remove();
+	}
+
+	mutex_lock(&zbus_list_lock);
+	list_del(&zbus->bus_next);
+	mutex_unlock(&zbus_list_lock);
+	kfree(zbus);
+}
+
+static void zpci_bus_put(struct zpci_bus *zbus)
+{
+	kref_put(&zbus->kref, zpci_bus_release);
+}
+
+static struct zpci_bus *zpci_bus_get(int pchid)
+{
+	struct zpci_bus *zbus;
+
+	mutex_lock(&zbus_list_lock);
+	list_for_each_entry(zbus, &zbus_list, bus_next) {
+		if (pchid == zbus->pchid) {
+			kref_get(&zbus->kref);
+			goto out_unlock;
+		}
+	}
+	zbus = NULL;
+out_unlock:
+	mutex_unlock(&zbus_list_lock);
+	return zbus;
+}
+
+static struct zpci_bus *zpci_bus_alloc(int pchid)
+{
+	struct zpci_bus *zbus;
+
+	zbus = kzalloc(sizeof(*zbus), GFP_KERNEL);
+	if (!zbus)
+		return NULL;
+
+	zbus->pchid = pchid;
+	INIT_LIST_HEAD(&zbus->bus_next);
+	mutex_lock(&zbus_list_lock);
+	list_add_tail(&zbus->bus_next, &zbus_list);
+	mutex_unlock(&zbus_list_lock);
+
+	kref_init(&zbus->kref);
+	INIT_LIST_HEAD(&zbus->resources);
+
+	zbus->bus_resource.start = 0;
+	zbus->bus_resource.end = ZPCI_BUS_NR;
+	zbus->bus_resource.flags = IORESOURCE_BUS;
+	pci_add_resource(&zbus->resources, &zbus->bus_resource);
+
+	return zbus;
+}
+
+void pcibios_bus_add_device(struct pci_dev *pdev)
+{
+	struct zpci_dev *zdev = to_zpci(pdev);
+
+	/*
+	 * With pdev->no_vf_scan the common PCI probing code does not
+	 * perform PF/VF linking.
+	 */
+	if (zdev->vfn) {
+		zpci_iov_setup_virtfn(zdev->zbus, pdev, zdev->vfn);
+		pdev->no_command_memory = 1;
+	}
+}
+
+static int zpci_bus_add_device(struct zpci_bus *zbus, struct zpci_dev *zdev)
+{
+	int rc = -EINVAL;
+
+	if (zbus->function[zdev->devfn]) {
+		pr_err("devfn %04x is already assigned\n", zdev->devfn);
+		return rc;
+	}
+
+	zdev->zbus = zbus;
+	zbus->function[zdev->devfn] = zdev;
+	zpci_nb_devices++;
+
+	if (zbus->multifunction && !zdev->rid_available) {
+		WARN_ONCE(1, "rid_available not set for multifunction\n");
+		goto error;
+	}
+	rc = zpci_init_slot(zdev);
+	if (rc)
+		goto error;
+	zdev->has_hp_slot = 1;
+
+	return 0;
+
+error:
+	zbus->function[zdev->devfn] = NULL;
+	zdev->zbus = NULL;
+	zpci_nb_devices--;
+	return rc;
+}
+
+int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops)
+{
+	struct zpci_bus *zbus = NULL;
+	int rc = -EBADF;
+
+	if (zpci_nb_devices == ZPCI_NR_DEVICES) {
+		pr_warn("Adding PCI function %08x failed because the configured limit of %d is reached\n",
+			zdev->fid, ZPCI_NR_DEVICES);
+		return -ENOSPC;
+	}
+
+	if (zdev->devfn >= ZPCI_FUNCTIONS_PER_BUS)
+		return -EINVAL;
+
+	if (!s390_pci_no_rid && zdev->rid_available)
+		zbus = zpci_bus_get(zdev->pchid);
+
+	if (!zbus) {
+		zbus = zpci_bus_alloc(zdev->pchid);
+		if (!zbus)
+			return -ENOMEM;
+	}
+
+	if (!zbus->bus) {
+		/* The UID of the first PCI function registered with a zpci_bus
+		 * is used as the domain number for that bus. Currently there
+		 * is exactly one zpci_bus per domain.
+		 */
+		rc = zpci_bus_create_pci_bus(zbus, zdev, ops);
+		if (rc)
+			goto error;
+	}
+
+	rc = zpci_bus_add_device(zbus, zdev);
+	if (rc)
+		goto error;
+
+	return 0;
+
+error:
+	pr_err("Adding PCI function %08x failed\n", zdev->fid);
+	zpci_bus_put(zbus);
+	return rc;
+}
+
+void zpci_bus_device_unregister(struct zpci_dev *zdev)
+{
+	struct zpci_bus *zbus = zdev->zbus;
+
+	zpci_nb_devices--;
+	zbus->function[zdev->devfn] = NULL;
+	zpci_bus_put(zbus);
+}
diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h
new file mode 100644
index 0000000000..af9f0ac79a
--- /dev/null
+++ b/arch/s390/pci/pci_bus.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2020
+ *
+ * Author(s):
+ *   Pierre Morel <pmorel@linux.ibm.com>
+ *
+ */
+
+int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops);
+void zpci_bus_device_unregister(struct zpci_dev *zdev);
+
+int zpci_bus_scan_bus(struct zpci_bus *zbus);
+void zpci_bus_scan_busses(void);
+
+int zpci_bus_scan_device(struct zpci_dev *zdev);
+void zpci_bus_remove_device(struct zpci_dev *zdev, bool set_error);
+
+void zpci_release_device(struct kref *kref);
+static inline void zpci_zdev_put(struct zpci_dev *zdev)
+{
+	if (zdev)
+		kref_put(&zdev->kref, zpci_release_device);
+}
+
+static inline void zpci_zdev_get(struct zpci_dev *zdev)
+{
+	kref_get(&zdev->kref);
+}
+
+int zpci_alloc_domain(int domain);
+void zpci_free_domain(int domain);
+int zpci_setup_bus_resources(struct zpci_dev *zdev);
+
+static inline struct zpci_dev *zdev_from_bus(struct pci_bus *bus,
+					     unsigned int devfn)
+{
+	struct zpci_bus *zbus = bus->sysdata;
+
+	return (devfn >= ZPCI_FUNCTIONS_PER_BUS) ? NULL : zbus->function[devfn];
+}
+
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
new file mode 100644
index 0000000000..ee90a91ed8
--- /dev/null
+++ b/arch/s390/pci/pci_clp.c
@@ -0,0 +1,669 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2012
+ *
+ * Author(s):
+ *   Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/compat.h>
+#include <linux/kernel.h>
+#include <linux/miscdevice.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/uaccess.h>
+#include <asm/asm-extable.h>
+#include <asm/pci_debug.h>
+#include <asm/pci_clp.h>
+#include <asm/clp.h>
+#include <uapi/asm/clp.h>
+
+#include "pci_bus.h"
+
+bool zpci_unique_uid;
+
+void update_uid_checking(bool new)
+{
+	if (zpci_unique_uid != new)
+		zpci_dbg(3, "uid checking:%d\n", new);
+
+	zpci_unique_uid = new;
+}
+
+static inline void zpci_err_clp(unsigned int rsp, int rc)
+{
+	struct {
+		unsigned int rsp;
+		int rc;
+	} __packed data = {rsp, rc};
+
+	zpci_err_hex(&data, sizeof(data));
+}
+
+/*
+ * Call Logical Processor with c=1, lps=0 and command 1
+ * to get the bit mask of installed logical processors
+ */
+static inline int clp_get_ilp(unsigned long *ilp)
+{
+	unsigned long mask;
+	int cc = 3;
+
+	asm volatile (
+		"	.insn	rrf,0xb9a00000,%[mask],%[cmd],8,0\n"
+		"0:	ipm	%[cc]\n"
+		"	srl	%[cc],28\n"
+		"1:\n"
+		EX_TABLE(0b, 1b)
+		: [cc] "+d" (cc), [mask] "=d" (mask) : [cmd] "a" (1)
+		: "cc");
+	*ilp = mask;
+	return cc;
+}
+
+/*
+ * Call Logical Processor with c=0, the give constant lps and an lpcb request.
+ */
+static __always_inline int clp_req(void *data, unsigned int lps)
+{
+	struct { u8 _[CLP_BLK_SIZE]; } *req = data;
+	u64 ignored;
+	int cc = 3;
+
+	asm volatile (
+		"	.insn	rrf,0xb9a00000,%[ign],%[req],0,%[lps]\n"
+		"0:	ipm	%[cc]\n"
+		"	srl	%[cc],28\n"
+		"1:\n"
+		EX_TABLE(0b, 1b)
+		: [cc] "+d" (cc), [ign] "=d" (ignored), "+m" (*req)
+		: [req] "a" (req), [lps] "i" (lps)
+		: "cc");
+	return cc;
+}
+
+static void *clp_alloc_block(gfp_t gfp_mask)
+{
+	return (void *) __get_free_pages(gfp_mask, get_order(CLP_BLK_SIZE));
+}
+
+static void clp_free_block(void *ptr)
+{
+	free_pages((unsigned long) ptr, get_order(CLP_BLK_SIZE));
+}
+
+static void clp_store_query_pci_fngrp(struct zpci_dev *zdev,
+				      struct clp_rsp_query_pci_grp *response)
+{
+	zdev->tlb_refresh = response->refresh;
+	zdev->dma_mask = response->dasm;
+	zdev->msi_addr = response->msia;
+	zdev->max_msi = response->noi;
+	zdev->fmb_update = response->mui;
+	zdev->version = response->version;
+	zdev->maxstbl = response->maxstbl;
+	zdev->dtsm = response->dtsm;
+
+	switch (response->version) {
+	case 1:
+		zdev->max_bus_speed = PCIE_SPEED_5_0GT;
+		break;
+	default:
+		zdev->max_bus_speed = PCI_SPEED_UNKNOWN;
+		break;
+	}
+}
+
+static int clp_query_pci_fngrp(struct zpci_dev *zdev, u8 pfgid)
+{
+	struct clp_req_rsp_query_pci_grp *rrb;
+	int rc;
+
+	rrb = clp_alloc_block(GFP_KERNEL);
+	if (!rrb)
+		return -ENOMEM;
+
+	memset(rrb, 0, sizeof(*rrb));
+	rrb->request.hdr.len = sizeof(rrb->request);
+	rrb->request.hdr.cmd = CLP_QUERY_PCI_FNGRP;
+	rrb->response.hdr.len = sizeof(rrb->response);
+	rrb->request.pfgid = pfgid;
+
+	rc = clp_req(rrb, CLP_LPS_PCI);
+	if (!rc && rrb->response.hdr.rsp == CLP_RC_OK)
+		clp_store_query_pci_fngrp(zdev, &rrb->response);
+	else {
+		zpci_err("Q PCI FGRP:\n");
+		zpci_err_clp(rrb->response.hdr.rsp, rc);
+		rc = -EIO;
+	}
+	clp_free_block(rrb);
+	return rc;
+}
+
+static int clp_store_query_pci_fn(struct zpci_dev *zdev,
+				  struct clp_rsp_query_pci *response)
+{
+	int i;
+
+	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+		zdev->bars[i].val = le32_to_cpu(response->bar[i]);
+		zdev->bars[i].size = response->bar_size[i];
+	}
+	zdev->start_dma = response->sdma;
+	zdev->end_dma = response->edma;
+	zdev->pchid = response->pchid;
+	zdev->pfgid = response->pfgid;
+	zdev->pft = response->pft;
+	zdev->vfn = response->vfn;
+	zdev->port = response->port;
+	zdev->uid = response->uid;
+	zdev->fmb_length = sizeof(u32) * response->fmb_len;
+	zdev->rid_available = response->rid_avail;
+	zdev->is_physfn = response->is_physfn;
+	if (!s390_pci_no_rid && zdev->rid_available)
+		zdev->devfn = response->rid & ZPCI_RID_MASK_DEVFN;
+
+	memcpy(zdev->pfip, response->pfip, sizeof(zdev->pfip));
+	if (response->util_str_avail) {
+		memcpy(zdev->util_str, response->util_str,
+		       sizeof(zdev->util_str));
+		zdev->util_str_avail = 1;
+	}
+	zdev->mio_capable = response->mio_addr_avail;
+	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+		if (!(response->mio.valid & (1 << (PCI_STD_NUM_BARS - i - 1))))
+			continue;
+
+		zdev->bars[i].mio_wb = (void __iomem *) response->mio.addr[i].wb;
+		zdev->bars[i].mio_wt = (void __iomem *) response->mio.addr[i].wt;
+	}
+	return 0;
+}
+
+int clp_query_pci_fn(struct zpci_dev *zdev)
+{
+	struct clp_req_rsp_query_pci *rrb;
+	int rc;
+
+	rrb = clp_alloc_block(GFP_KERNEL);
+	if (!rrb)
+		return -ENOMEM;
+
+	memset(rrb, 0, sizeof(*rrb));
+	rrb->request.hdr.len = sizeof(rrb->request);
+	rrb->request.hdr.cmd = CLP_QUERY_PCI_FN;
+	rrb->response.hdr.len = sizeof(rrb->response);
+	rrb->request.fh = zdev->fh;
+
+	rc = clp_req(rrb, CLP_LPS_PCI);
+	if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) {
+		rc = clp_store_query_pci_fn(zdev, &rrb->response);
+		if (rc)
+			goto out;
+		rc = clp_query_pci_fngrp(zdev, rrb->response.pfgid);
+	} else {
+		zpci_err("Q PCI FN:\n");
+		zpci_err_clp(rrb->response.hdr.rsp, rc);
+		rc = -EIO;
+	}
+out:
+	clp_free_block(rrb);
+	return rc;
+}
+
+/**
+ * clp_set_pci_fn() - Execute a command on a PCI function
+ * @zdev: Function that will be affected
+ * @fh: Out parameter for updated function handle
+ * @nr_dma_as: DMA address space number
+ * @command: The command code to execute
+ *
+ * Returns: 0 on success, < 0 for Linux errors (e.g. -ENOMEM), and
+ * > 0 for non-success platform responses
+ */
+static int clp_set_pci_fn(struct zpci_dev *zdev, u32 *fh, u8 nr_dma_as, u8 command)
+{
+	struct clp_req_rsp_set_pci *rrb;
+	int rc, retries = 100;
+	u32 gisa = 0;
+
+	*fh = 0;
+	rrb = clp_alloc_block(GFP_KERNEL);
+	if (!rrb)
+		return -ENOMEM;
+
+	if (command != CLP_SET_DISABLE_PCI_FN)
+		gisa = zdev->gisa;
+
+	do {
+		memset(rrb, 0, sizeof(*rrb));
+		rrb->request.hdr.len = sizeof(rrb->request);
+		rrb->request.hdr.cmd = CLP_SET_PCI_FN;
+		rrb->response.hdr.len = sizeof(rrb->response);
+		rrb->request.fh = zdev->fh;
+		rrb->request.oc = command;
+		rrb->request.ndas = nr_dma_as;
+		rrb->request.gisa = gisa;
+
+		rc = clp_req(rrb, CLP_LPS_PCI);
+		if (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY) {
+			retries--;
+			if (retries < 0)
+				break;
+			msleep(20);
+		}
+	} while (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY);
+
+	if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) {
+		*fh = rrb->response.fh;
+	} else {
+		zpci_err("Set PCI FN:\n");
+		zpci_err_clp(rrb->response.hdr.rsp, rc);
+		if (!rc)
+			rc = rrb->response.hdr.rsp;
+	}
+	clp_free_block(rrb);
+	return rc;
+}
+
+int clp_setup_writeback_mio(void)
+{
+	struct clp_req_rsp_slpc_pci *rrb;
+	u8  wb_bit_pos;
+	int rc;
+
+	rrb = clp_alloc_block(GFP_KERNEL);
+	if (!rrb)
+		return -ENOMEM;
+
+	memset(rrb, 0, sizeof(*rrb));
+	rrb->request.hdr.len = sizeof(rrb->request);
+	rrb->request.hdr.cmd = CLP_SLPC;
+	rrb->response.hdr.len = sizeof(rrb->response);
+
+	rc = clp_req(rrb, CLP_LPS_PCI);
+	if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) {
+		if (rrb->response.vwb) {
+			wb_bit_pos = rrb->response.mio_wb;
+			set_bit_inv(wb_bit_pos, &mio_wb_bit_mask);
+			zpci_dbg(3, "wb bit: %d\n", wb_bit_pos);
+		} else {
+			zpci_dbg(3, "wb bit: n.a.\n");
+		}
+
+	} else {
+		zpci_err("SLPC PCI:\n");
+		zpci_err_clp(rrb->response.hdr.rsp, rc);
+		rc = -EIO;
+	}
+	clp_free_block(rrb);
+	return rc;
+}
+
+int clp_enable_fh(struct zpci_dev *zdev, u32 *fh, u8 nr_dma_as)
+{
+	int rc;
+
+	rc = clp_set_pci_fn(zdev, fh, nr_dma_as, CLP_SET_ENABLE_PCI_FN);
+	zpci_dbg(3, "ena fid:%x, fh:%x, rc:%d\n", zdev->fid, *fh, rc);
+	if (!rc && zpci_use_mio(zdev)) {
+		rc = clp_set_pci_fn(zdev, fh, nr_dma_as, CLP_SET_ENABLE_MIO);
+		zpci_dbg(3, "ena mio fid:%x, fh:%x, rc:%d\n",
+				zdev->fid, *fh, rc);
+		if (rc)
+			clp_disable_fh(zdev, fh);
+	}
+	return rc;
+}
+
+int clp_disable_fh(struct zpci_dev *zdev, u32 *fh)
+{
+	int rc;
+
+	if (!zdev_enabled(zdev))
+		return 0;
+
+	rc = clp_set_pci_fn(zdev, fh, 0, CLP_SET_DISABLE_PCI_FN);
+	zpci_dbg(3, "dis fid:%x, fh:%x, rc:%d\n", zdev->fid, *fh, rc);
+	return rc;
+}
+
+static int clp_list_pci_req(struct clp_req_rsp_list_pci *rrb,
+			    u64 *resume_token, int *nentries)
+{
+	int rc;
+
+	memset(rrb, 0, sizeof(*rrb));
+	rrb->request.hdr.len = sizeof(rrb->request);
+	rrb->request.hdr.cmd = CLP_LIST_PCI;
+	/* store as many entries as possible */
+	rrb->response.hdr.len = CLP_BLK_SIZE - LIST_PCI_HDR_LEN;
+	rrb->request.resume_token = *resume_token;
+
+	/* Get PCI function handle list */
+	rc = clp_req(rrb, CLP_LPS_PCI);
+	if (rc || rrb->response.hdr.rsp != CLP_RC_OK) {
+		zpci_err("List PCI FN:\n");
+		zpci_err_clp(rrb->response.hdr.rsp, rc);
+		return -EIO;
+	}
+
+	update_uid_checking(rrb->response.uid_checking);
+	WARN_ON_ONCE(rrb->response.entry_size !=
+		sizeof(struct clp_fh_list_entry));
+
+	*nentries = (rrb->response.hdr.len - LIST_PCI_HDR_LEN) /
+		rrb->response.entry_size;
+	*resume_token = rrb->response.resume_token;
+
+	return rc;
+}
+
+static int clp_list_pci(struct clp_req_rsp_list_pci *rrb, void *data,
+			void (*cb)(struct clp_fh_list_entry *, void *))
+{
+	u64 resume_token = 0;
+	int nentries, i, rc;
+
+	do {
+		rc = clp_list_pci_req(rrb, &resume_token, &nentries);
+		if (rc)
+			return rc;
+		for (i = 0; i < nentries; i++)
+			cb(&rrb->response.fh_list[i], data);
+	} while (resume_token);
+
+	return rc;
+}
+
+static int clp_find_pci(struct clp_req_rsp_list_pci *rrb, u32 fid,
+			struct clp_fh_list_entry *entry)
+{
+	struct clp_fh_list_entry *fh_list;
+	u64 resume_token = 0;
+	int nentries, i, rc;
+
+	do {
+		rc = clp_list_pci_req(rrb, &resume_token, &nentries);
+		if (rc)
+			return rc;
+		fh_list = rrb->response.fh_list;
+		for (i = 0; i < nentries; i++) {
+			if (fh_list[i].fid == fid) {
+				*entry = fh_list[i];
+				return 0;
+			}
+		}
+	} while (resume_token);
+
+	return -ENODEV;
+}
+
+static void __clp_add(struct clp_fh_list_entry *entry, void *data)
+{
+	struct zpci_dev *zdev;
+
+	if (!entry->vendor_id)
+		return;
+
+	zdev = get_zdev_by_fid(entry->fid);
+	if (zdev) {
+		zpci_zdev_put(zdev);
+		return;
+	}
+	zpci_create_device(entry->fid, entry->fh, entry->config_state);
+}
+
+int clp_scan_pci_devices(void)
+{
+	struct clp_req_rsp_list_pci *rrb;
+	int rc;
+
+	rrb = clp_alloc_block(GFP_KERNEL);
+	if (!rrb)
+		return -ENOMEM;
+
+	rc = clp_list_pci(rrb, NULL, __clp_add);
+
+	clp_free_block(rrb);
+	return rc;
+}
+
+/*
+ * Get the current function handle of the function matching @fid
+ */
+int clp_refresh_fh(u32 fid, u32 *fh)
+{
+	struct clp_req_rsp_list_pci *rrb;
+	struct clp_fh_list_entry entry;
+	int rc;
+
+	rrb = clp_alloc_block(GFP_NOWAIT);
+	if (!rrb)
+		return -ENOMEM;
+
+	rc = clp_find_pci(rrb, fid, &entry);
+	if (!rc)
+		*fh = entry.fh;
+
+	clp_free_block(rrb);
+	return rc;
+}
+
+int clp_get_state(u32 fid, enum zpci_state *state)
+{
+	struct clp_req_rsp_list_pci *rrb;
+	struct clp_fh_list_entry entry;
+	int rc;
+
+	rrb = clp_alloc_block(GFP_ATOMIC);
+	if (!rrb)
+		return -ENOMEM;
+
+	rc = clp_find_pci(rrb, fid, &entry);
+	if (!rc) {
+		*state = entry.config_state;
+	} else if (rc == -ENODEV) {
+		*state = ZPCI_FN_STATE_RESERVED;
+		rc = 0;
+	}
+
+	clp_free_block(rrb);
+	return rc;
+}
+
+static int clp_base_slpc(struct clp_req *req, struct clp_req_rsp_slpc *lpcb)
+{
+	unsigned long limit = PAGE_SIZE - sizeof(lpcb->request);
+
+	if (lpcb->request.hdr.len != sizeof(lpcb->request) ||
+	    lpcb->response.hdr.len > limit)
+		return -EINVAL;
+	return clp_req(lpcb, CLP_LPS_BASE) ? -EOPNOTSUPP : 0;
+}
+
+static int clp_base_command(struct clp_req *req, struct clp_req_hdr *lpcb)
+{
+	switch (lpcb->cmd) {
+	case 0x0001: /* store logical-processor characteristics */
+		return clp_base_slpc(req, (void *) lpcb);
+	default:
+		return -EINVAL;
+	}
+}
+
+static int clp_pci_slpc(struct clp_req *req, struct clp_req_rsp_slpc_pci *lpcb)
+{
+	unsigned long limit = PAGE_SIZE - sizeof(lpcb->request);
+
+	if (lpcb->request.hdr.len != sizeof(lpcb->request) ||
+	    lpcb->response.hdr.len > limit)
+		return -EINVAL;
+	return clp_req(lpcb, CLP_LPS_PCI) ? -EOPNOTSUPP : 0;
+}
+
+static int clp_pci_list(struct clp_req *req, struct clp_req_rsp_list_pci *lpcb)
+{
+	unsigned long limit = PAGE_SIZE - sizeof(lpcb->request);
+
+	if (lpcb->request.hdr.len != sizeof(lpcb->request) ||
+	    lpcb->response.hdr.len > limit)
+		return -EINVAL;
+	if (lpcb->request.reserved2 != 0)
+		return -EINVAL;
+	return clp_req(lpcb, CLP_LPS_PCI) ? -EOPNOTSUPP : 0;
+}
+
+static int clp_pci_query(struct clp_req *req,
+			 struct clp_req_rsp_query_pci *lpcb)
+{
+	unsigned long limit = PAGE_SIZE - sizeof(lpcb->request);
+
+	if (lpcb->request.hdr.len != sizeof(lpcb->request) ||
+	    lpcb->response.hdr.len > limit)
+		return -EINVAL;
+	if (lpcb->request.reserved2 != 0 || lpcb->request.reserved3 != 0)
+		return -EINVAL;
+	return clp_req(lpcb, CLP_LPS_PCI) ? -EOPNOTSUPP : 0;
+}
+
+static int clp_pci_query_grp(struct clp_req *req,
+			     struct clp_req_rsp_query_pci_grp *lpcb)
+{
+	unsigned long limit = PAGE_SIZE - sizeof(lpcb->request);
+
+	if (lpcb->request.hdr.len != sizeof(lpcb->request) ||
+	    lpcb->response.hdr.len > limit)
+		return -EINVAL;
+	if (lpcb->request.reserved2 != 0 || lpcb->request.reserved3 != 0 ||
+	    lpcb->request.reserved4 != 0)
+		return -EINVAL;
+	return clp_req(lpcb, CLP_LPS_PCI) ? -EOPNOTSUPP : 0;
+}
+
+static int clp_pci_command(struct clp_req *req, struct clp_req_hdr *lpcb)
+{
+	switch (lpcb->cmd) {
+	case 0x0001: /* store logical-processor characteristics */
+		return clp_pci_slpc(req, (void *) lpcb);
+	case 0x0002: /* list PCI functions */
+		return clp_pci_list(req, (void *) lpcb);
+	case 0x0003: /* query PCI function */
+		return clp_pci_query(req, (void *) lpcb);
+	case 0x0004: /* query PCI function group */
+		return clp_pci_query_grp(req, (void *) lpcb);
+	default:
+		return -EINVAL;
+	}
+}
+
+static int clp_normal_command(struct clp_req *req)
+{
+	struct clp_req_hdr *lpcb;
+	void __user *uptr;
+	int rc;
+
+	rc = -EINVAL;
+	if (req->lps != 0 && req->lps != 2)
+		goto out;
+
+	rc = -ENOMEM;
+	lpcb = clp_alloc_block(GFP_KERNEL);
+	if (!lpcb)
+		goto out;
+
+	rc = -EFAULT;
+	uptr = (void __force __user *)(unsigned long) req->data_p;
+	if (copy_from_user(lpcb, uptr, PAGE_SIZE) != 0)
+		goto out_free;
+
+	rc = -EINVAL;
+	if (lpcb->fmt != 0 || lpcb->reserved1 != 0 || lpcb->reserved2 != 0)
+		goto out_free;
+
+	switch (req->lps) {
+	case 0:
+		rc = clp_base_command(req, lpcb);
+		break;
+	case 2:
+		rc = clp_pci_command(req, lpcb);
+		break;
+	}
+	if (rc)
+		goto out_free;
+
+	rc = -EFAULT;
+	if (copy_to_user(uptr, lpcb, PAGE_SIZE) != 0)
+		goto out_free;
+
+	rc = 0;
+
+out_free:
+	clp_free_block(lpcb);
+out:
+	return rc;
+}
+
+static int clp_immediate_command(struct clp_req *req)
+{
+	void __user *uptr;
+	unsigned long ilp;
+	int exists;
+
+	if (req->cmd > 1 || clp_get_ilp(&ilp) != 0)
+		return -EINVAL;
+
+	uptr = (void __force __user *)(unsigned long) req->data_p;
+	if (req->cmd == 0) {
+		/* Command code 0: test for a specific processor */
+		exists = test_bit_inv(req->lps, &ilp);
+		return put_user(exists, (int __user *) uptr);
+	}
+	/* Command code 1: return bit mask of installed processors */
+	return put_user(ilp, (unsigned long __user *) uptr);
+}
+
+static long clp_misc_ioctl(struct file *filp, unsigned int cmd,
+			   unsigned long arg)
+{
+	struct clp_req req;
+	void __user *argp;
+
+	if (cmd != CLP_SYNC)
+		return -EINVAL;
+
+	argp = is_compat_task() ? compat_ptr(arg) : (void __user *) arg;
+	if (copy_from_user(&req, argp, sizeof(req)))
+		return -EFAULT;
+	if (req.r != 0)
+		return -EINVAL;
+	return req.c ? clp_immediate_command(&req) : clp_normal_command(&req);
+}
+
+static int clp_misc_release(struct inode *inode, struct file *filp)
+{
+	return 0;
+}
+
+static const struct file_operations clp_misc_fops = {
+	.owner = THIS_MODULE,
+	.open = nonseekable_open,
+	.release = clp_misc_release,
+	.unlocked_ioctl = clp_misc_ioctl,
+	.compat_ioctl = clp_misc_ioctl,
+	.llseek = no_llseek,
+};
+
+static struct miscdevice clp_misc_device = {
+	.minor = MISC_DYNAMIC_MINOR,
+	.name = "clp",
+	.fops = &clp_misc_fops,
+};
+
+builtin_misc_device(clp_misc_device);
diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c
new file mode 100644
index 0000000000..ca6bd98eec
--- /dev/null
+++ b/arch/s390/pci/pci_debug.c
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Copyright IBM Corp. 2012,2015
+ *
+ *  Author(s):
+ *    Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <linux/export.h>
+#include <linux/pci.h>
+#include <asm/debug.h>
+
+#include <asm/pci_dma.h>
+
+static struct dentry *debugfs_root;
+debug_info_t *pci_debug_msg_id;
+EXPORT_SYMBOL_GPL(pci_debug_msg_id);
+debug_info_t *pci_debug_err_id;
+EXPORT_SYMBOL_GPL(pci_debug_err_id);
+
+static char *pci_common_names[] = {
+	"Load operations",
+	"Store operations",
+	"Store block operations",
+	"Refresh operations",
+};
+
+static char *pci_fmt0_names[] = {
+	"DMA read bytes",
+	"DMA write bytes",
+};
+
+static char *pci_fmt1_names[] = {
+	"Received bytes",
+	"Received packets",
+	"Transmitted bytes",
+	"Transmitted packets",
+};
+
+static char *pci_fmt2_names[] = {
+	"Consumed work units",
+	"Maximum work units",
+};
+
+static char *pci_fmt3_names[] = {
+	"Transmitted bytes",
+};
+
+static char *pci_sw_names[] = {
+	"Allocated pages",
+	"Mapped pages",
+	"Unmapped pages",
+};
+
+static void pci_fmb_show(struct seq_file *m, char *name[], int length,
+			 u64 *data)
+{
+	int i;
+
+	for (i = 0; i < length; i++, data++)
+		seq_printf(m, "%26s:\t%llu\n", name[i], *data);
+}
+
+static void pci_sw_counter_show(struct seq_file *m)
+{
+	struct zpci_dev *zdev = m->private;
+	atomic64_t *counter = &zdev->allocated_pages;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(pci_sw_names); i++, counter++)
+		seq_printf(m, "%26s:\t%llu\n", pci_sw_names[i],
+			   atomic64_read(counter));
+}
+
+static int pci_perf_show(struct seq_file *m, void *v)
+{
+	struct zpci_dev *zdev = m->private;
+
+	if (!zdev)
+		return 0;
+
+	mutex_lock(&zdev->lock);
+	if (!zdev->fmb) {
+		mutex_unlock(&zdev->lock);
+		seq_puts(m, "FMB statistics disabled\n");
+		return 0;
+	}
+
+	/* header */
+	seq_printf(m, "Update interval: %u ms\n", zdev->fmb_update);
+	seq_printf(m, "Samples: %u\n", zdev->fmb->samples);
+	seq_printf(m, "Last update TOD: %Lx\n", zdev->fmb->last_update);
+
+	pci_fmb_show(m, pci_common_names, ARRAY_SIZE(pci_common_names),
+		     &zdev->fmb->ld_ops);
+
+	switch (zdev->fmb->format) {
+	case 0:
+		if (!(zdev->fmb->fmt_ind & ZPCI_FMB_DMA_COUNTER_VALID))
+			break;
+		pci_fmb_show(m, pci_fmt0_names, ARRAY_SIZE(pci_fmt0_names),
+			     &zdev->fmb->fmt0.dma_rbytes);
+		break;
+	case 1:
+		pci_fmb_show(m, pci_fmt1_names, ARRAY_SIZE(pci_fmt1_names),
+			     &zdev->fmb->fmt1.rx_bytes);
+		break;
+	case 2:
+		pci_fmb_show(m, pci_fmt2_names, ARRAY_SIZE(pci_fmt2_names),
+			     &zdev->fmb->fmt2.consumed_work_units);
+		break;
+	case 3:
+		pci_fmb_show(m, pci_fmt3_names, ARRAY_SIZE(pci_fmt3_names),
+			     &zdev->fmb->fmt3.tx_bytes);
+		break;
+	default:
+		seq_puts(m, "Unknown format\n");
+	}
+
+	pci_sw_counter_show(m);
+	mutex_unlock(&zdev->lock);
+	return 0;
+}
+
+static ssize_t pci_perf_seq_write(struct file *file, const char __user *ubuf,
+				  size_t count, loff_t *off)
+{
+	struct zpci_dev *zdev = ((struct seq_file *) file->private_data)->private;
+	unsigned long val;
+	int rc;
+
+	if (!zdev)
+		return 0;
+
+	rc = kstrtoul_from_user(ubuf, count, 10, &val);
+	if (rc)
+		return rc;
+
+	mutex_lock(&zdev->lock);
+	switch (val) {
+	case 0:
+		rc = zpci_fmb_disable_device(zdev);
+		break;
+	case 1:
+		rc = zpci_fmb_enable_device(zdev);
+		break;
+	}
+	mutex_unlock(&zdev->lock);
+	return rc ? rc : count;
+}
+
+static int pci_perf_seq_open(struct inode *inode, struct file *filp)
+{
+	return single_open(filp, pci_perf_show,
+			   file_inode(filp)->i_private);
+}
+
+static const struct file_operations debugfs_pci_perf_fops = {
+	.open	 = pci_perf_seq_open,
+	.read	 = seq_read,
+	.write	 = pci_perf_seq_write,
+	.llseek  = seq_lseek,
+	.release = single_release,
+};
+
+void zpci_debug_init_device(struct zpci_dev *zdev, const char *name)
+{
+	zdev->debugfs_dev = debugfs_create_dir(name, debugfs_root);
+
+	debugfs_create_file("statistics", S_IFREG | S_IRUGO | S_IWUSR,
+			    zdev->debugfs_dev, zdev, &debugfs_pci_perf_fops);
+}
+
+void zpci_debug_exit_device(struct zpci_dev *zdev)
+{
+	debugfs_remove_recursive(zdev->debugfs_dev);
+}
+
+int __init zpci_debug_init(void)
+{
+	/* event trace buffer */
+	pci_debug_msg_id = debug_register("pci_msg", 8, 1, 8 * sizeof(long));
+	if (!pci_debug_msg_id)
+		return -EINVAL;
+	debug_register_view(pci_debug_msg_id, &debug_sprintf_view);
+	debug_set_level(pci_debug_msg_id, 3);
+
+	/* error log */
+	pci_debug_err_id = debug_register("pci_error", 2, 1, 16);
+	if (!pci_debug_err_id)
+		return -EINVAL;
+	debug_register_view(pci_debug_err_id, &debug_hex_ascii_view);
+	debug_set_level(pci_debug_err_id, 3);
+
+	debugfs_root = debugfs_create_dir("pci", NULL);
+	return 0;
+}
+
+void zpci_debug_exit(void)
+{
+	debug_unregister(pci_debug_msg_id);
+	debug_unregister(pci_debug_err_id);
+	debugfs_remove(debugfs_root);
+}
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
new file mode 100644
index 0000000000..99209085c7
--- /dev/null
+++ b/arch/s390/pci/pci_dma.c
@@ -0,0 +1,746 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2012
+ *
+ * Author(s):
+ *   Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/iommu-helper.h>
+#include <linux/dma-map-ops.h>
+#include <linux/vmalloc.h>
+#include <linux/pci.h>
+#include <asm/pci_dma.h>
+
+static struct kmem_cache *dma_region_table_cache;
+static struct kmem_cache *dma_page_table_cache;
+static int s390_iommu_strict;
+static u64 s390_iommu_aperture;
+static u32 s390_iommu_aperture_factor = 1;
+
+static int zpci_refresh_global(struct zpci_dev *zdev)
+{
+	return zpci_refresh_trans((u64) zdev->fh << 32, zdev->start_dma,
+				  zdev->iommu_pages * PAGE_SIZE);
+}
+
+unsigned long *dma_alloc_cpu_table(gfp_t gfp)
+{
+	unsigned long *table, *entry;
+
+	table = kmem_cache_alloc(dma_region_table_cache, gfp);
+	if (!table)
+		return NULL;
+
+	for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++)
+		*entry = ZPCI_TABLE_INVALID;
+	return table;
+}
+
+static void dma_free_cpu_table(void *table)
+{
+	kmem_cache_free(dma_region_table_cache, table);
+}
+
+static unsigned long *dma_alloc_page_table(gfp_t gfp)
+{
+	unsigned long *table, *entry;
+
+	table = kmem_cache_alloc(dma_page_table_cache, gfp);
+	if (!table)
+		return NULL;
+
+	for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++)
+		*entry = ZPCI_PTE_INVALID;
+	return table;
+}
+
+static void dma_free_page_table(void *table)
+{
+	kmem_cache_free(dma_page_table_cache, table);
+}
+
+static unsigned long *dma_get_seg_table_origin(unsigned long *rtep, gfp_t gfp)
+{
+	unsigned long old_rte, rte;
+	unsigned long *sto;
+
+	rte = READ_ONCE(*rtep);
+	if (reg_entry_isvalid(rte)) {
+		sto = get_rt_sto(rte);
+	} else {
+		sto = dma_alloc_cpu_table(gfp);
+		if (!sto)
+			return NULL;
+
+		set_rt_sto(&rte, virt_to_phys(sto));
+		validate_rt_entry(&rte);
+		entry_clr_protected(&rte);
+
+		old_rte = cmpxchg(rtep, ZPCI_TABLE_INVALID, rte);
+		if (old_rte != ZPCI_TABLE_INVALID) {
+			/* Somone else was faster, use theirs */
+			dma_free_cpu_table(sto);
+			sto = get_rt_sto(old_rte);
+		}
+	}
+	return sto;
+}
+
+static unsigned long *dma_get_page_table_origin(unsigned long *step, gfp_t gfp)
+{
+	unsigned long old_ste, ste;
+	unsigned long *pto;
+
+	ste = READ_ONCE(*step);
+	if (reg_entry_isvalid(ste)) {
+		pto = get_st_pto(ste);
+	} else {
+		pto = dma_alloc_page_table(gfp);
+		if (!pto)
+			return NULL;
+		set_st_pto(&ste, virt_to_phys(pto));
+		validate_st_entry(&ste);
+		entry_clr_protected(&ste);
+
+		old_ste = cmpxchg(step, ZPCI_TABLE_INVALID, ste);
+		if (old_ste != ZPCI_TABLE_INVALID) {
+			/* Somone else was faster, use theirs */
+			dma_free_page_table(pto);
+			pto = get_st_pto(old_ste);
+		}
+	}
+	return pto;
+}
+
+unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr,
+				  gfp_t gfp)
+{
+	unsigned long *sto, *pto;
+	unsigned int rtx, sx, px;
+
+	rtx = calc_rtx(dma_addr);
+	sto = dma_get_seg_table_origin(&rto[rtx], gfp);
+	if (!sto)
+		return NULL;
+
+	sx = calc_sx(dma_addr);
+	pto = dma_get_page_table_origin(&sto[sx], gfp);
+	if (!pto)
+		return NULL;
+
+	px = calc_px(dma_addr);
+	return &pto[px];
+}
+
+void dma_update_cpu_trans(unsigned long *ptep, phys_addr_t page_addr, int flags)
+{
+	unsigned long pte;
+
+	pte = READ_ONCE(*ptep);
+	if (flags & ZPCI_PTE_INVALID) {
+		invalidate_pt_entry(&pte);
+	} else {
+		set_pt_pfaa(&pte, page_addr);
+		validate_pt_entry(&pte);
+	}
+
+	if (flags & ZPCI_TABLE_PROTECTED)
+		entry_set_protected(&pte);
+	else
+		entry_clr_protected(&pte);
+
+	xchg(ptep, pte);
+}
+
+static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa,
+			      dma_addr_t dma_addr, size_t size, int flags)
+{
+	unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	phys_addr_t page_addr = (pa & PAGE_MASK);
+	unsigned long *entry;
+	int i, rc = 0;
+
+	if (!nr_pages)
+		return -EINVAL;
+
+	if (!zdev->dma_table)
+		return -EINVAL;
+
+	for (i = 0; i < nr_pages; i++) {
+		entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr,
+					   GFP_ATOMIC);
+		if (!entry) {
+			rc = -ENOMEM;
+			goto undo_cpu_trans;
+		}
+		dma_update_cpu_trans(entry, page_addr, flags);
+		page_addr += PAGE_SIZE;
+		dma_addr += PAGE_SIZE;
+	}
+
+undo_cpu_trans:
+	if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) {
+		flags = ZPCI_PTE_INVALID;
+		while (i-- > 0) {
+			page_addr -= PAGE_SIZE;
+			dma_addr -= PAGE_SIZE;
+			entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr,
+						   GFP_ATOMIC);
+			if (!entry)
+				break;
+			dma_update_cpu_trans(entry, page_addr, flags);
+		}
+	}
+	return rc;
+}
+
+static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr,
+			   size_t size, int flags)
+{
+	unsigned long irqflags;
+	int ret;
+
+	/*
+	 * With zdev->tlb_refresh == 0, rpcit is not required to establish new
+	 * translations when previously invalid translation-table entries are
+	 * validated. With lazy unmap, rpcit is skipped for previously valid
+	 * entries, but a global rpcit is then required before any address can
+	 * be re-used, i.e. after each iommu bitmap wrap-around.
+	 */
+	if ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID) {
+		if (!zdev->tlb_refresh)
+			return 0;
+	} else {
+		if (!s390_iommu_strict)
+			return 0;
+	}
+
+	ret = zpci_refresh_trans((u64) zdev->fh << 32, dma_addr,
+				 PAGE_ALIGN(size));
+	if (ret == -ENOMEM && !s390_iommu_strict) {
+		/* enable the hypervisor to free some resources */
+		if (zpci_refresh_global(zdev))
+			goto out;
+
+		spin_lock_irqsave(&zdev->iommu_bitmap_lock, irqflags);
+		bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap,
+			      zdev->lazy_bitmap, zdev->iommu_pages);
+		bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages);
+		spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, irqflags);
+		ret = 0;
+	}
+out:
+	return ret;
+}
+
+static int dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa,
+			    dma_addr_t dma_addr, size_t size, int flags)
+{
+	int rc;
+
+	rc = __dma_update_trans(zdev, pa, dma_addr, size, flags);
+	if (rc)
+		return rc;
+
+	rc = __dma_purge_tlb(zdev, dma_addr, size, flags);
+	if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID))
+		__dma_update_trans(zdev, pa, dma_addr, size, ZPCI_PTE_INVALID);
+
+	return rc;
+}
+
+void dma_free_seg_table(unsigned long entry)
+{
+	unsigned long *sto = get_rt_sto(entry);
+	int sx;
+
+	for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++)
+		if (reg_entry_isvalid(sto[sx]))
+			dma_free_page_table(get_st_pto(sto[sx]));
+
+	dma_free_cpu_table(sto);
+}
+
+void dma_cleanup_tables(unsigned long *table)
+{
+	int rtx;
+
+	if (!table)
+		return;
+
+	for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++)
+		if (reg_entry_isvalid(table[rtx]))
+			dma_free_seg_table(table[rtx]);
+
+	dma_free_cpu_table(table);
+}
+
+static unsigned long __dma_alloc_iommu(struct device *dev,
+				       unsigned long start, int size)
+{
+	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
+
+	return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages,
+				start, size, zdev->start_dma >> PAGE_SHIFT,
+				dma_get_seg_boundary_nr_pages(dev, PAGE_SHIFT),
+				0);
+}
+
+static dma_addr_t dma_alloc_address(struct device *dev, int size)
+{
+	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
+	unsigned long offset, flags;
+
+	spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
+	offset = __dma_alloc_iommu(dev, zdev->next_bit, size);
+	if (offset == -1) {
+		if (!s390_iommu_strict) {
+			/* global flush before DMA addresses are reused */
+			if (zpci_refresh_global(zdev))
+				goto out_error;
+
+			bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap,
+				      zdev->lazy_bitmap, zdev->iommu_pages);
+			bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages);
+		}
+		/* wrap-around */
+		offset = __dma_alloc_iommu(dev, 0, size);
+		if (offset == -1)
+			goto out_error;
+	}
+	zdev->next_bit = offset + size;
+	spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
+
+	return zdev->start_dma + offset * PAGE_SIZE;
+
+out_error:
+	spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
+	return DMA_MAPPING_ERROR;
+}
+
+static void dma_free_address(struct device *dev, dma_addr_t dma_addr, int size)
+{
+	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
+	unsigned long flags, offset;
+
+	offset = (dma_addr - zdev->start_dma) >> PAGE_SHIFT;
+
+	spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
+	if (!zdev->iommu_bitmap)
+		goto out;
+
+	if (s390_iommu_strict)
+		bitmap_clear(zdev->iommu_bitmap, offset, size);
+	else
+		bitmap_set(zdev->lazy_bitmap, offset, size);
+
+out:
+	spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
+}
+
+static inline void zpci_err_dma(unsigned long rc, unsigned long addr)
+{
+	struct {
+		unsigned long rc;
+		unsigned long addr;
+	} __packed data = {rc, addr};
+
+	zpci_err_hex(&data, sizeof(data));
+}
+
+static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
+				     unsigned long offset, size_t size,
+				     enum dma_data_direction direction,
+				     unsigned long attrs)
+{
+	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
+	unsigned long pa = page_to_phys(page) + offset;
+	int flags = ZPCI_PTE_VALID;
+	unsigned long nr_pages;
+	dma_addr_t dma_addr;
+	int ret;
+
+	/* This rounds up number of pages based on size and offset */
+	nr_pages = iommu_num_pages(pa, size, PAGE_SIZE);
+	dma_addr = dma_alloc_address(dev, nr_pages);
+	if (dma_addr == DMA_MAPPING_ERROR) {
+		ret = -ENOSPC;
+		goto out_err;
+	}
+
+	/* Use rounded up size */
+	size = nr_pages * PAGE_SIZE;
+
+	if (direction == DMA_NONE || direction == DMA_TO_DEVICE)
+		flags |= ZPCI_TABLE_PROTECTED;
+
+	ret = dma_update_trans(zdev, pa, dma_addr, size, flags);
+	if (ret)
+		goto out_free;
+
+	atomic64_add(nr_pages, &zdev->mapped_pages);
+	return dma_addr + (offset & ~PAGE_MASK);
+
+out_free:
+	dma_free_address(dev, dma_addr, nr_pages);
+out_err:
+	zpci_err("map error:\n");
+	zpci_err_dma(ret, pa);
+	return DMA_MAPPING_ERROR;
+}
+
+static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
+				 size_t size, enum dma_data_direction direction,
+				 unsigned long attrs)
+{
+	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
+	int npages, ret;
+
+	npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
+	dma_addr = dma_addr & PAGE_MASK;
+	ret = dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE,
+			       ZPCI_PTE_INVALID);
+	if (ret) {
+		zpci_err("unmap error:\n");
+		zpci_err_dma(ret, dma_addr);
+		return;
+	}
+
+	atomic64_add(npages, &zdev->unmapped_pages);
+	dma_free_address(dev, dma_addr, npages);
+}
+
+static void *s390_dma_alloc(struct device *dev, size_t size,
+			    dma_addr_t *dma_handle, gfp_t flag,
+			    unsigned long attrs)
+{
+	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
+	struct page *page;
+	phys_addr_t pa;
+	dma_addr_t map;
+
+	size = PAGE_ALIGN(size);
+	page = alloc_pages(flag | __GFP_ZERO, get_order(size));
+	if (!page)
+		return NULL;
+
+	pa = page_to_phys(page);
+	map = s390_dma_map_pages(dev, page, 0, size, DMA_BIDIRECTIONAL, 0);
+	if (dma_mapping_error(dev, map)) {
+		__free_pages(page, get_order(size));
+		return NULL;
+	}
+
+	atomic64_add(size / PAGE_SIZE, &zdev->allocated_pages);
+	if (dma_handle)
+		*dma_handle = map;
+	return phys_to_virt(pa);
+}
+
+static void s390_dma_free(struct device *dev, size_t size,
+			  void *vaddr, dma_addr_t dma_handle,
+			  unsigned long attrs)
+{
+	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
+
+	size = PAGE_ALIGN(size);
+	atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages);
+	s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, 0);
+	free_pages((unsigned long)vaddr, get_order(size));
+}
+
+/* Map a segment into a contiguous dma address area */
+static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
+			     size_t size, dma_addr_t *handle,
+			     enum dma_data_direction dir)
+{
+	unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
+	dma_addr_t dma_addr_base, dma_addr;
+	int flags = ZPCI_PTE_VALID;
+	struct scatterlist *s;
+	phys_addr_t pa = 0;
+	int ret;
+
+	dma_addr_base = dma_alloc_address(dev, nr_pages);
+	if (dma_addr_base == DMA_MAPPING_ERROR)
+		return -ENOMEM;
+
+	dma_addr = dma_addr_base;
+	if (dir == DMA_NONE || dir == DMA_TO_DEVICE)
+		flags |= ZPCI_TABLE_PROTECTED;
+
+	for (s = sg; dma_addr < dma_addr_base + size; s = sg_next(s)) {
+		pa = page_to_phys(sg_page(s));
+		ret = __dma_update_trans(zdev, pa, dma_addr,
+					 s->offset + s->length, flags);
+		if (ret)
+			goto unmap;
+
+		dma_addr += s->offset + s->length;
+	}
+	ret = __dma_purge_tlb(zdev, dma_addr_base, size, flags);
+	if (ret)
+		goto unmap;
+
+	*handle = dma_addr_base;
+	atomic64_add(nr_pages, &zdev->mapped_pages);
+
+	return ret;
+
+unmap:
+	dma_update_trans(zdev, 0, dma_addr_base, dma_addr - dma_addr_base,
+			 ZPCI_PTE_INVALID);
+	dma_free_address(dev, dma_addr_base, nr_pages);
+	zpci_err("map error:\n");
+	zpci_err_dma(ret, pa);
+	return ret;
+}
+
+static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
+			   int nr_elements, enum dma_data_direction dir,
+			   unsigned long attrs)
+{
+	struct scatterlist *s = sg, *start = sg, *dma = sg;
+	unsigned int max = dma_get_max_seg_size(dev);
+	unsigned int size = s->offset + s->length;
+	unsigned int offset = s->offset;
+	int count = 0, i, ret;
+
+	for (i = 1; i < nr_elements; i++) {
+		s = sg_next(s);
+
+		s->dma_length = 0;
+
+		if (s->offset || (size & ~PAGE_MASK) ||
+		    size + s->length > max) {
+			ret = __s390_dma_map_sg(dev, start, size,
+						&dma->dma_address, dir);
+			if (ret)
+				goto unmap;
+
+			dma->dma_address += offset;
+			dma->dma_length = size - offset;
+
+			size = offset = s->offset;
+			start = s;
+			dma = sg_next(dma);
+			count++;
+		}
+		size += s->length;
+	}
+	ret = __s390_dma_map_sg(dev, start, size, &dma->dma_address, dir);
+	if (ret)
+		goto unmap;
+
+	dma->dma_address += offset;
+	dma->dma_length = size - offset;
+
+	return count + 1;
+unmap:
+	for_each_sg(sg, s, count, i)
+		s390_dma_unmap_pages(dev, sg_dma_address(s), sg_dma_len(s),
+				     dir, attrs);
+
+	return ret;
+}
+
+static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
+			      int nr_elements, enum dma_data_direction dir,
+			      unsigned long attrs)
+{
+	struct scatterlist *s;
+	int i;
+
+	for_each_sg(sg, s, nr_elements, i) {
+		if (s->dma_length)
+			s390_dma_unmap_pages(dev, s->dma_address, s->dma_length,
+					     dir, attrs);
+		s->dma_address = 0;
+		s->dma_length = 0;
+	}
+}
+
+static unsigned long *bitmap_vzalloc(size_t bits, gfp_t flags)
+{
+	size_t n = BITS_TO_LONGS(bits);
+	size_t bytes;
+
+	if (unlikely(check_mul_overflow(n, sizeof(unsigned long), &bytes)))
+		return NULL;
+
+	return vzalloc(bytes);
+}
+	
+int zpci_dma_init_device(struct zpci_dev *zdev)
+{
+	u8 status;
+	int rc;
+
+	/*
+	 * At this point, if the device is part of an IOMMU domain, this would
+	 * be a strong hint towards a bug in the IOMMU API (common) code and/or
+	 * simultaneous access via IOMMU and DMA API. So let's issue a warning.
+	 */
+	WARN_ON(zdev->s390_domain);
+
+	spin_lock_init(&zdev->iommu_bitmap_lock);
+
+	zdev->dma_table = dma_alloc_cpu_table(GFP_KERNEL);
+	if (!zdev->dma_table) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	/*
+	 * Restrict the iommu bitmap size to the minimum of the following:
+	 * - s390_iommu_aperture which defaults to high_memory
+	 * - 3-level pagetable address limit minus start_dma offset
+	 * - DMA address range allowed by the hardware (clp query pci fn)
+	 *
+	 * Also set zdev->end_dma to the actual end address of the usable
+	 * range, instead of the theoretical maximum as reported by hardware.
+	 *
+	 * This limits the number of concurrently usable DMA mappings since
+	 * for each DMA mapped memory address we need a DMA address including
+	 * extra DMA addresses for multiple mappings of the same memory address.
+	 */
+	zdev->start_dma = PAGE_ALIGN(zdev->start_dma);
+	zdev->iommu_size = min3(s390_iommu_aperture,
+				ZPCI_TABLE_SIZE_RT - zdev->start_dma,
+				zdev->end_dma - zdev->start_dma + 1);
+	zdev->end_dma = zdev->start_dma + zdev->iommu_size - 1;
+	zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT;
+	zdev->iommu_bitmap = bitmap_vzalloc(zdev->iommu_pages, GFP_KERNEL);
+	if (!zdev->iommu_bitmap) {
+		rc = -ENOMEM;
+		goto free_dma_table;
+	}
+	if (!s390_iommu_strict) {
+		zdev->lazy_bitmap = bitmap_vzalloc(zdev->iommu_pages, GFP_KERNEL);
+		if (!zdev->lazy_bitmap) {
+			rc = -ENOMEM;
+			goto free_bitmap;
+		}
+
+	}
+	if (zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
+			       virt_to_phys(zdev->dma_table), &status)) {
+		rc = -EIO;
+		goto free_bitmap;
+	}
+
+	return 0;
+free_bitmap:
+	vfree(zdev->iommu_bitmap);
+	zdev->iommu_bitmap = NULL;
+	vfree(zdev->lazy_bitmap);
+	zdev->lazy_bitmap = NULL;
+free_dma_table:
+	dma_free_cpu_table(zdev->dma_table);
+	zdev->dma_table = NULL;
+out:
+	return rc;
+}
+
+int zpci_dma_exit_device(struct zpci_dev *zdev)
+{
+	int cc = 0;
+
+	/*
+	 * At this point, if the device is part of an IOMMU domain, this would
+	 * be a strong hint towards a bug in the IOMMU API (common) code and/or
+	 * simultaneous access via IOMMU and DMA API. So let's issue a warning.
+	 */
+	WARN_ON(zdev->s390_domain);
+	if (zdev_enabled(zdev))
+		cc = zpci_unregister_ioat(zdev, 0);
+	/*
+	 * cc == 3 indicates the function is gone already. This can happen
+	 * if the function was deconfigured/disabled suddenly and we have not
+	 * received a new handle yet.
+	 */
+	if (cc && cc != 3)
+		return -EIO;
+
+	dma_cleanup_tables(zdev->dma_table);
+	zdev->dma_table = NULL;
+	vfree(zdev->iommu_bitmap);
+	zdev->iommu_bitmap = NULL;
+	vfree(zdev->lazy_bitmap);
+	zdev->lazy_bitmap = NULL;
+	zdev->next_bit = 0;
+	return 0;
+}
+
+static int __init dma_alloc_cpu_table_caches(void)
+{
+	dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables",
+					ZPCI_TABLE_SIZE, ZPCI_TABLE_ALIGN,
+					0, NULL);
+	if (!dma_region_table_cache)
+		return -ENOMEM;
+
+	dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables",
+					ZPCI_PT_SIZE, ZPCI_PT_ALIGN,
+					0, NULL);
+	if (!dma_page_table_cache) {
+		kmem_cache_destroy(dma_region_table_cache);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+int __init zpci_dma_init(void)
+{
+	s390_iommu_aperture = (u64)virt_to_phys(high_memory);
+	if (!s390_iommu_aperture_factor)
+		s390_iommu_aperture = ULONG_MAX;
+	else
+		s390_iommu_aperture *= s390_iommu_aperture_factor;
+
+	return dma_alloc_cpu_table_caches();
+}
+
+void zpci_dma_exit(void)
+{
+	kmem_cache_destroy(dma_page_table_cache);
+	kmem_cache_destroy(dma_region_table_cache);
+}
+
+const struct dma_map_ops s390_pci_dma_ops = {
+	.alloc		= s390_dma_alloc,
+	.free		= s390_dma_free,
+	.map_sg		= s390_dma_map_sg,
+	.unmap_sg	= s390_dma_unmap_sg,
+	.map_page	= s390_dma_map_pages,
+	.unmap_page	= s390_dma_unmap_pages,
+	.mmap		= dma_common_mmap,
+	.get_sgtable	= dma_common_get_sgtable,
+	.alloc_pages	= dma_common_alloc_pages,
+	.free_pages	= dma_common_free_pages,
+	/* dma_supported is unconditionally true without a callback */
+};
+EXPORT_SYMBOL_GPL(s390_pci_dma_ops);
+
+static int __init s390_iommu_setup(char *str)
+{
+	if (!strcmp(str, "strict"))
+		s390_iommu_strict = 1;
+	return 1;
+}
+
+__setup("s390_iommu=", s390_iommu_setup);
+
+static int __init s390_iommu_aperture_setup(char *str)
+{
+	if (kstrtou32(str, 10, &s390_iommu_aperture_factor))
+		s390_iommu_aperture_factor = 1;
+	return 1;
+}
+
+__setup("s390_iommu_aperture=", s390_iommu_aperture_setup);
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
new file mode 100644
index 0000000000..b9324ca2eb
--- /dev/null
+++ b/arch/s390/pci/pci_event.c
@@ -0,0 +1,389 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Copyright IBM Corp. 2012
+ *
+ *  Author(s):
+ *    Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <asm/pci_debug.h>
+#include <asm/pci_dma.h>
+#include <asm/sclp.h>
+
+#include "pci_bus.h"
+
+/* Content Code Description for PCI Function Error */
+struct zpci_ccdf_err {
+	u32 reserved1;
+	u32 fh;				/* function handle */
+	u32 fid;			/* function id */
+	u32 ett		:  4;		/* expected table type */
+	u32 mvn		: 12;		/* MSI vector number */
+	u32 dmaas	:  8;		/* DMA address space */
+	u32		:  6;
+	u32 q		:  1;		/* event qualifier */
+	u32 rw		:  1;		/* read/write */
+	u64 faddr;			/* failing address */
+	u32 reserved3;
+	u16 reserved4;
+	u16 pec;			/* PCI event code */
+} __packed;
+
+/* Content Code Description for PCI Function Availability */
+struct zpci_ccdf_avail {
+	u32 reserved1;
+	u32 fh;				/* function handle */
+	u32 fid;			/* function id */
+	u32 reserved2;
+	u32 reserved3;
+	u32 reserved4;
+	u32 reserved5;
+	u16 reserved6;
+	u16 pec;			/* PCI event code */
+} __packed;
+
+static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res)
+{
+	switch (ers_res) {
+	case PCI_ERS_RESULT_CAN_RECOVER:
+	case PCI_ERS_RESULT_RECOVERED:
+	case PCI_ERS_RESULT_NEED_RESET:
+		return false;
+	default:
+		return true;
+	}
+}
+
+static bool is_passed_through(struct zpci_dev *zdev)
+{
+	return zdev->s390_domain;
+}
+
+static bool is_driver_supported(struct pci_driver *driver)
+{
+	if (!driver || !driver->err_handler)
+		return false;
+	if (!driver->err_handler->error_detected)
+		return false;
+	if (!driver->err_handler->slot_reset)
+		return false;
+	if (!driver->err_handler->resume)
+		return false;
+	return true;
+}
+
+static pci_ers_result_t zpci_event_notify_error_detected(struct pci_dev *pdev,
+							 struct pci_driver *driver)
+{
+	pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
+
+	ers_res = driver->err_handler->error_detected(pdev,  pdev->error_state);
+	if (ers_result_indicates_abort(ers_res))
+		pr_info("%s: Automatic recovery failed after initial reporting\n", pci_name(pdev));
+	else if (ers_res == PCI_ERS_RESULT_NEED_RESET)
+		pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
+
+	return ers_res;
+}
+
+static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev,
+							struct pci_driver *driver)
+{
+	pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
+	struct zpci_dev *zdev = to_zpci(pdev);
+	int rc;
+
+	pr_info("%s: Unblocking device access for examination\n", pci_name(pdev));
+	rc = zpci_reset_load_store_blocked(zdev);
+	if (rc) {
+		pr_err("%s: Unblocking device access failed\n", pci_name(pdev));
+		/* Let's try a full reset instead */
+		return PCI_ERS_RESULT_NEED_RESET;
+	}
+
+	if (driver->err_handler->mmio_enabled) {
+		ers_res = driver->err_handler->mmio_enabled(pdev);
+		if (ers_result_indicates_abort(ers_res)) {
+			pr_info("%s: Automatic recovery failed after MMIO re-enable\n",
+				pci_name(pdev));
+			return ers_res;
+		} else if (ers_res == PCI_ERS_RESULT_NEED_RESET) {
+			pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
+			return ers_res;
+		}
+	}
+
+	pr_debug("%s: Unblocking DMA\n", pci_name(pdev));
+	rc = zpci_clear_error_state(zdev);
+	if (!rc) {
+		pdev->error_state = pci_channel_io_normal;
+	} else {
+		pr_err("%s: Unblocking DMA failed\n", pci_name(pdev));
+		/* Let's try a full reset instead */
+		return PCI_ERS_RESULT_NEED_RESET;
+	}
+
+	return ers_res;
+}
+
+static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev,
+					    struct pci_driver *driver)
+{
+	pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
+
+	pr_info("%s: Initiating reset\n", pci_name(pdev));
+	if (zpci_hot_reset_device(to_zpci(pdev))) {
+		pr_err("%s: The reset request failed\n", pci_name(pdev));
+		return ers_res;
+	}
+	pdev->error_state = pci_channel_io_normal;
+	ers_res = driver->err_handler->slot_reset(pdev);
+	if (ers_result_indicates_abort(ers_res)) {
+		pr_info("%s: Automatic recovery failed after slot reset\n", pci_name(pdev));
+		return ers_res;
+	}
+
+	return ers_res;
+}
+
+/* zpci_event_attempt_error_recovery - Try to recover the given PCI function
+ * @pdev: PCI function to recover currently in the error state
+ *
+ * We follow the scheme outlined in Documentation/PCI/pci-error-recovery.rst.
+ * With the simplification that recovery always happens per function
+ * and the platform determines which functions are affected for
+ * multi-function devices.
+ */
+static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
+{
+	pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
+	struct pci_driver *driver;
+
+	/*
+	 * Ensure that the PCI function is not removed concurrently, no driver
+	 * is unbound or probed and that userspace can't access its
+	 * configuration space while we perform recovery.
+	 */
+	pci_dev_lock(pdev);
+	if (pdev->error_state == pci_channel_io_perm_failure) {
+		ers_res = PCI_ERS_RESULT_DISCONNECT;
+		goto out_unlock;
+	}
+	pdev->error_state = pci_channel_io_frozen;
+
+	if (is_passed_through(to_zpci(pdev))) {
+		pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n",
+			pci_name(pdev));
+		goto out_unlock;
+	}
+
+	driver = to_pci_driver(pdev->dev.driver);
+	if (!is_driver_supported(driver)) {
+		if (!driver)
+			pr_info("%s: Cannot be recovered because no driver is bound to the device\n",
+				pci_name(pdev));
+		else
+			pr_info("%s: The %s driver bound to the device does not support error recovery\n",
+				pci_name(pdev),
+				driver->name);
+		goto out_unlock;
+	}
+
+	ers_res = zpci_event_notify_error_detected(pdev, driver);
+	if (ers_result_indicates_abort(ers_res))
+		goto out_unlock;
+
+	if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) {
+		ers_res = zpci_event_do_error_state_clear(pdev, driver);
+		if (ers_result_indicates_abort(ers_res))
+			goto out_unlock;
+	}
+
+	if (ers_res == PCI_ERS_RESULT_NEED_RESET)
+		ers_res = zpci_event_do_reset(pdev, driver);
+
+	if (ers_res != PCI_ERS_RESULT_RECOVERED) {
+		pr_err("%s: Automatic recovery failed; operator intervention is required\n",
+		       pci_name(pdev));
+		goto out_unlock;
+	}
+
+	pr_info("%s: The device is ready to resume operations\n", pci_name(pdev));
+	if (driver->err_handler->resume)
+		driver->err_handler->resume(pdev);
+out_unlock:
+	pci_dev_unlock(pdev);
+
+	return ers_res;
+}
+
+/* zpci_event_io_failure - Report PCI channel failure state to driver
+ * @pdev: PCI function for which to report
+ * @es: PCI channel failure state to report
+ */
+static void zpci_event_io_failure(struct pci_dev *pdev, pci_channel_state_t es)
+{
+	struct pci_driver *driver;
+
+	pci_dev_lock(pdev);
+	pdev->error_state = es;
+	/**
+	 * While vfio-pci's error_detected callback notifies user-space QEMU
+	 * reacts to this by freezing the guest. In an s390 environment PCI
+	 * errors are rarely fatal so this is overkill. Instead in the future
+	 * we will inject the error event and let the guest recover the device
+	 * itself.
+	 */
+	if (is_passed_through(to_zpci(pdev)))
+		goto out;
+	driver = to_pci_driver(pdev->dev.driver);
+	if (driver && driver->err_handler && driver->err_handler->error_detected)
+		driver->err_handler->error_detected(pdev, pdev->error_state);
+out:
+	pci_dev_unlock(pdev);
+}
+
+static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
+{
+	struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
+	struct pci_dev *pdev = NULL;
+	pci_ers_result_t ers_res;
+
+	zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n",
+		 ccdf->fid, ccdf->fh, ccdf->pec);
+	zpci_err("error CCDF:\n");
+	zpci_err_hex(ccdf, sizeof(*ccdf));
+
+	if (zdev) {
+		zpci_update_fh(zdev, ccdf->fh);
+		if (zdev->zbus->bus)
+			pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
+	}
+
+	pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
+	       pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
+
+	if (!pdev)
+		goto no_pdev;
+
+	switch (ccdf->pec) {
+	case 0x003a: /* Service Action or Error Recovery Successful */
+		ers_res = zpci_event_attempt_error_recovery(pdev);
+		if (ers_res != PCI_ERS_RESULT_RECOVERED)
+			zpci_event_io_failure(pdev, pci_channel_io_perm_failure);
+		break;
+	default:
+		/*
+		 * Mark as frozen not permanently failed because the device
+		 * could be subsequently recovered by the platform.
+		 */
+		zpci_event_io_failure(pdev, pci_channel_io_frozen);
+		break;
+	}
+	pci_dev_put(pdev);
+no_pdev:
+	zpci_zdev_put(zdev);
+}
+
+void zpci_event_error(void *data)
+{
+	if (zpci_is_enabled())
+		__zpci_event_error(data);
+}
+
+static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh)
+{
+	zpci_update_fh(zdev, fh);
+	/* Give the driver a hint that the function is
+	 * already unusable.
+	 */
+	zpci_bus_remove_device(zdev, true);
+	/* Even though the device is already gone we still
+	 * need to free zPCI resources as part of the disable.
+	 */
+	if (zdev->dma_table)
+		zpci_dma_exit_device(zdev);
+	if (zdev_enabled(zdev))
+		zpci_disable_device(zdev);
+	zdev->state = ZPCI_FN_STATE_STANDBY;
+}
+
+static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
+{
+	struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
+	bool existing_zdev = !!zdev;
+	enum zpci_state state;
+
+	zpci_dbg(3, "avl fid:%x, fh:%x, pec:%x\n",
+		 ccdf->fid, ccdf->fh, ccdf->pec);
+	switch (ccdf->pec) {
+	case 0x0301: /* Reserved|Standby -> Configured */
+		if (!zdev) {
+			zdev = zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_CONFIGURED);
+			if (IS_ERR(zdev))
+				break;
+		} else {
+			/* the configuration request may be stale */
+			if (zdev->state != ZPCI_FN_STATE_STANDBY)
+				break;
+			zdev->state = ZPCI_FN_STATE_CONFIGURED;
+		}
+		zpci_scan_configured_device(zdev, ccdf->fh);
+		break;
+	case 0x0302: /* Reserved -> Standby */
+		if (!zdev)
+			zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_STANDBY);
+		else
+			zpci_update_fh(zdev, ccdf->fh);
+		break;
+	case 0x0303: /* Deconfiguration requested */
+		if (zdev) {
+			/* The event may have been queued before we confirgured
+			 * the device.
+			 */
+			if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
+				break;
+			zpci_update_fh(zdev, ccdf->fh);
+			zpci_deconfigure_device(zdev);
+		}
+		break;
+	case 0x0304: /* Configured -> Standby|Reserved */
+		if (zdev) {
+			/* The event may have been queued before we confirgured
+			 * the device.:
+			 */
+			if (zdev->state == ZPCI_FN_STATE_CONFIGURED)
+				zpci_event_hard_deconfigured(zdev, ccdf->fh);
+			/* The 0x0304 event may immediately reserve the device */
+			if (!clp_get_state(zdev->fid, &state) &&
+			    state == ZPCI_FN_STATE_RESERVED) {
+				zpci_device_reserved(zdev);
+			}
+		}
+		break;
+	case 0x0306: /* 0x308 or 0x302 for multiple devices */
+		zpci_remove_reserved_devices();
+		clp_scan_pci_devices();
+		break;
+	case 0x0308: /* Standby -> Reserved */
+		if (!zdev)
+			break;
+		zpci_device_reserved(zdev);
+		break;
+	default:
+		break;
+	}
+	if (existing_zdev)
+		zpci_zdev_put(zdev);
+}
+
+void zpci_event_availability(void *data)
+{
+	if (zpci_is_enabled())
+		__zpci_event_availability(data);
+}
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
new file mode 100644
index 0000000000..56480be482
--- /dev/null
+++ b/arch/s390/pci/pci_insn.c
@@ -0,0 +1,443 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * s390 specific pci instructions
+ *
+ * Copyright IBM Corp. 2013
+ */
+
+#include <linux/export.h>
+#include <linux/errno.h>
+#include <linux/delay.h>
+#include <linux/jump_label.h>
+#include <asm/asm-extable.h>
+#include <asm/facility.h>
+#include <asm/pci_insn.h>
+#include <asm/pci_debug.h>
+#include <asm/pci_io.h>
+#include <asm/processor.h>
+
+#define ZPCI_INSN_BUSY_DELAY	1	/* 1 microsecond */
+
+struct zpci_err_insn_data {
+	u8 insn;
+	u8 cc;
+	u8 status;
+	union {
+		struct {
+			u64 req;
+			u64 offset;
+		};
+		struct {
+			u64 addr;
+			u64 len;
+		};
+	};
+} __packed;
+
+static inline void zpci_err_insn_req(int lvl, u8 insn, u8 cc, u8 status,
+				     u64 req, u64 offset)
+{
+	struct zpci_err_insn_data data = {
+		.insn = insn, .cc = cc, .status = status,
+		.req = req, .offset = offset};
+
+	zpci_err_hex_level(lvl, &data, sizeof(data));
+}
+
+static inline void zpci_err_insn_addr(int lvl, u8 insn, u8 cc, u8 status,
+				      u64 addr, u64 len)
+{
+	struct zpci_err_insn_data data = {
+		.insn = insn, .cc = cc, .status = status,
+		.addr = addr, .len = len};
+
+	zpci_err_hex_level(lvl, &data, sizeof(data));
+}
+
+/* Modify PCI Function Controls */
+static inline u8 __mpcifc(u64 req, struct zpci_fib *fib, u8 *status)
+{
+	u8 cc;
+
+	asm volatile (
+		"	.insn	rxy,0xe300000000d0,%[req],%[fib]\n"
+		"	ipm	%[cc]\n"
+		"	srl	%[cc],28\n"
+		: [cc] "=d" (cc), [req] "+d" (req), [fib] "+Q" (*fib)
+		: : "cc");
+	*status = req >> 24 & 0xff;
+	return cc;
+}
+
+u8 zpci_mod_fc(u64 req, struct zpci_fib *fib, u8 *status)
+{
+	bool retried = false;
+	u8 cc;
+
+	do {
+		cc = __mpcifc(req, fib, status);
+		if (cc == 2) {
+			msleep(ZPCI_INSN_BUSY_DELAY);
+			if (!retried) {
+				zpci_err_insn_req(1, 'M', cc, *status, req, 0);
+				retried = true;
+			}
+		}
+	} while (cc == 2);
+
+	if (cc)
+		zpci_err_insn_req(0, 'M', cc, *status, req, 0);
+	else if (retried)
+		zpci_err_insn_req(1, 'M', cc, *status, req, 0);
+
+	return cc;
+}
+EXPORT_SYMBOL_GPL(zpci_mod_fc);
+
+/* Refresh PCI Translations */
+static inline u8 __rpcit(u64 fn, u64 addr, u64 range, u8 *status)
+{
+	union register_pair addr_range = {.even = addr, .odd = range};
+	u8 cc;
+
+	asm volatile (
+		"	.insn	rre,0xb9d30000,%[fn],%[addr_range]\n"
+		"	ipm	%[cc]\n"
+		"	srl	%[cc],28\n"
+		: [cc] "=d" (cc), [fn] "+d" (fn)
+		: [addr_range] "d" (addr_range.pair)
+		: "cc");
+	*status = fn >> 24 & 0xff;
+	return cc;
+}
+
+int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
+{
+	bool retried = false;
+	u8 cc, status;
+
+	do {
+		cc = __rpcit(fn, addr, range, &status);
+		if (cc == 2) {
+			udelay(ZPCI_INSN_BUSY_DELAY);
+			if (!retried) {
+				zpci_err_insn_addr(1, 'R', cc, status, addr, range);
+				retried = true;
+			}
+		}
+	} while (cc == 2);
+
+	if (cc)
+		zpci_err_insn_addr(0, 'R', cc, status, addr, range);
+	else if (retried)
+		zpci_err_insn_addr(1, 'R', cc, status, addr, range);
+
+	if (cc == 1 && (status == 4 || status == 16))
+		return -ENOMEM;
+
+	return (cc) ? -EIO : 0;
+}
+
+/* Set Interruption Controls */
+int zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib)
+{
+	if (!test_facility(72))
+		return -EIO;
+
+	asm volatile(
+		".insn	rsy,0xeb00000000d1,%[ctl],%[isc],%[iib]\n"
+		: : [ctl] "d" (ctl), [isc] "d" (isc << 27), [iib] "Q" (*iib));
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(zpci_set_irq_ctrl);
+
+/* PCI Load */
+static inline int ____pcilg(u64 *data, u64 req, u64 offset, u8 *status)
+{
+	union register_pair req_off = {.even = req, .odd = offset};
+	int cc = -ENXIO;
+	u64 __data;
+
+	asm volatile (
+		"	.insn	rre,0xb9d20000,%[data],%[req_off]\n"
+		"0:	ipm	%[cc]\n"
+		"	srl	%[cc],28\n"
+		"1:\n"
+		EX_TABLE(0b, 1b)
+		: [cc] "+d" (cc), [data] "=d" (__data),
+		  [req_off] "+&d" (req_off.pair) :: "cc");
+	*status = req_off.even >> 24 & 0xff;
+	*data = __data;
+	return cc;
+}
+
+static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status)
+{
+	u64 __data;
+	int cc;
+
+	cc = ____pcilg(&__data, req, offset, status);
+	if (!cc)
+		*data = __data;
+
+	return cc;
+}
+
+int __zpci_load(u64 *data, u64 req, u64 offset)
+{
+	bool retried = false;
+	u8 status;
+	int cc;
+
+	do {
+		cc = __pcilg(data, req, offset, &status);
+		if (cc == 2) {
+			udelay(ZPCI_INSN_BUSY_DELAY);
+			if (!retried) {
+				zpci_err_insn_req(1, 'l', cc, status, req, offset);
+				retried = true;
+			}
+		}
+	} while (cc == 2);
+
+	if (cc)
+		zpci_err_insn_req(0, 'l', cc, status, req, offset);
+	else if (retried)
+		zpci_err_insn_req(1, 'l', cc, status, req, offset);
+
+	return (cc > 0) ? -EIO : cc;
+}
+EXPORT_SYMBOL_GPL(__zpci_load);
+
+static inline int zpci_load_fh(u64 *data, const volatile void __iomem *addr,
+			       unsigned long len)
+{
+	struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)];
+	u64 req = ZPCI_CREATE_REQ(READ_ONCE(entry->fh), entry->bar, len);
+
+	return __zpci_load(data, req, ZPCI_OFFSET(addr));
+}
+
+static inline int __pcilg_mio(u64 *data, u64 ioaddr, u64 len, u8 *status)
+{
+	union register_pair ioaddr_len = {.even = ioaddr, .odd = len};
+	int cc = -ENXIO;
+	u64 __data;
+
+	asm volatile (
+		"       .insn   rre,0xb9d60000,%[data],%[ioaddr_len]\n"
+		"0:     ipm     %[cc]\n"
+		"       srl     %[cc],28\n"
+		"1:\n"
+		EX_TABLE(0b, 1b)
+		: [cc] "+d" (cc), [data] "=d" (__data),
+		  [ioaddr_len] "+&d" (ioaddr_len.pair) :: "cc");
+	*status = ioaddr_len.odd >> 24 & 0xff;
+	*data = __data;
+	return cc;
+}
+
+int zpci_load(u64 *data, const volatile void __iomem *addr, unsigned long len)
+{
+	u8 status;
+	int cc;
+
+	if (!static_branch_unlikely(&have_mio))
+		return zpci_load_fh(data, addr, len);
+
+	cc = __pcilg_mio(data, (__force u64) addr, len, &status);
+	if (cc)
+		zpci_err_insn_addr(0, 'L', cc, status, (__force u64) addr, len);
+
+	return (cc > 0) ? -EIO : cc;
+}
+EXPORT_SYMBOL_GPL(zpci_load);
+
+/* PCI Store */
+static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status)
+{
+	union register_pair req_off = {.even = req, .odd = offset};
+	int cc = -ENXIO;
+
+	asm volatile (
+		"	.insn	rre,0xb9d00000,%[data],%[req_off]\n"
+		"0:	ipm	%[cc]\n"
+		"	srl	%[cc],28\n"
+		"1:\n"
+		EX_TABLE(0b, 1b)
+		: [cc] "+d" (cc), [req_off] "+&d" (req_off.pair)
+		: [data] "d" (data)
+		: "cc");
+	*status = req_off.even >> 24 & 0xff;
+	return cc;
+}
+
+int __zpci_store(u64 data, u64 req, u64 offset)
+{
+	bool retried = false;
+	u8 status;
+	int cc;
+
+	do {
+		cc = __pcistg(data, req, offset, &status);
+		if (cc == 2) {
+			udelay(ZPCI_INSN_BUSY_DELAY);
+			if (!retried) {
+				zpci_err_insn_req(1, 's', cc, status, req, offset);
+				retried = true;
+			}
+		}
+	} while (cc == 2);
+
+	if (cc)
+		zpci_err_insn_req(0, 's', cc, status, req, offset);
+	else if (retried)
+		zpci_err_insn_req(1, 's', cc, status, req, offset);
+
+	return (cc > 0) ? -EIO : cc;
+}
+EXPORT_SYMBOL_GPL(__zpci_store);
+
+static inline int zpci_store_fh(const volatile void __iomem *addr, u64 data,
+				unsigned long len)
+{
+	struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)];
+	u64 req = ZPCI_CREATE_REQ(READ_ONCE(entry->fh), entry->bar, len);
+
+	return __zpci_store(data, req, ZPCI_OFFSET(addr));
+}
+
+static inline int __pcistg_mio(u64 data, u64 ioaddr, u64 len, u8 *status)
+{
+	union register_pair ioaddr_len = {.even = ioaddr, .odd = len};
+	int cc = -ENXIO;
+
+	asm volatile (
+		"       .insn   rre,0xb9d40000,%[data],%[ioaddr_len]\n"
+		"0:     ipm     %[cc]\n"
+		"       srl     %[cc],28\n"
+		"1:\n"
+		EX_TABLE(0b, 1b)
+		: [cc] "+d" (cc), [ioaddr_len] "+&d" (ioaddr_len.pair)
+		: [data] "d" (data)
+		: "cc", "memory");
+	*status = ioaddr_len.odd >> 24 & 0xff;
+	return cc;
+}
+
+int zpci_store(const volatile void __iomem *addr, u64 data, unsigned long len)
+{
+	u8 status;
+	int cc;
+
+	if (!static_branch_unlikely(&have_mio))
+		return zpci_store_fh(addr, data, len);
+
+	cc = __pcistg_mio(data, (__force u64) addr, len, &status);
+	if (cc)
+		zpci_err_insn_addr(0, 'S', cc, status, (__force u64) addr, len);
+
+	return (cc > 0) ? -EIO : cc;
+}
+EXPORT_SYMBOL_GPL(zpci_store);
+
+/* PCI Store Block */
+static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status)
+{
+	int cc = -ENXIO;
+
+	asm volatile (
+		"	.insn	rsy,0xeb00000000d0,%[req],%[offset],%[data]\n"
+		"0:	ipm	%[cc]\n"
+		"	srl	%[cc],28\n"
+		"1:\n"
+		EX_TABLE(0b, 1b)
+		: [cc] "+d" (cc), [req] "+d" (req)
+		: [offset] "d" (offset), [data] "Q" (*data)
+		: "cc");
+	*status = req >> 24 & 0xff;
+	return cc;
+}
+
+int __zpci_store_block(const u64 *data, u64 req, u64 offset)
+{
+	bool retried = false;
+	u8 status;
+	int cc;
+
+	do {
+		cc = __pcistb(data, req, offset, &status);
+		if (cc == 2) {
+			udelay(ZPCI_INSN_BUSY_DELAY);
+			if (!retried) {
+				zpci_err_insn_req(0, 'b', cc, status, req, offset);
+				retried = true;
+			}
+		}
+	} while (cc == 2);
+
+	if (cc)
+		zpci_err_insn_req(0, 'b', cc, status, req, offset);
+	else if (retried)
+		zpci_err_insn_req(1, 'b', cc, status, req, offset);
+
+	return (cc > 0) ? -EIO : cc;
+}
+EXPORT_SYMBOL_GPL(__zpci_store_block);
+
+static inline int zpci_write_block_fh(volatile void __iomem *dst,
+				      const void *src, unsigned long len)
+{
+	struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(dst)];
+	u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, len);
+	u64 offset = ZPCI_OFFSET(dst);
+
+	return __zpci_store_block(src, req, offset);
+}
+
+static inline int __pcistb_mio(const u64 *data, u64 ioaddr, u64 len, u8 *status)
+{
+	int cc = -ENXIO;
+
+	asm volatile (
+		"       .insn   rsy,0xeb00000000d4,%[len],%[ioaddr],%[data]\n"
+		"0:     ipm     %[cc]\n"
+		"       srl     %[cc],28\n"
+		"1:\n"
+		EX_TABLE(0b, 1b)
+		: [cc] "+d" (cc), [len] "+d" (len)
+		: [ioaddr] "d" (ioaddr), [data] "Q" (*data)
+		: "cc");
+	*status = len >> 24 & 0xff;
+	return cc;
+}
+
+int zpci_write_block(volatile void __iomem *dst,
+		     const void *src, unsigned long len)
+{
+	u8 status;
+	int cc;
+
+	if (!static_branch_unlikely(&have_mio))
+		return zpci_write_block_fh(dst, src, len);
+
+	cc = __pcistb_mio(src, (__force u64) dst, len, &status);
+	if (cc)
+		zpci_err_insn_addr(0, 'B', cc, status, (__force u64) dst, len);
+
+	return (cc > 0) ? -EIO : cc;
+}
+EXPORT_SYMBOL_GPL(zpci_write_block);
+
+static inline void __pciwb_mio(void)
+{
+	asm volatile (".insn    rre,0xb9d50000,0,0\n");
+}
+
+void zpci_barrier(void)
+{
+	if (static_branch_likely(&have_mio))
+		__pciwb_mio();
+}
+EXPORT_SYMBOL_GPL(zpci_barrier);
diff --git a/arch/s390/pci/pci_iov.c b/arch/s390/pci/pci_iov.c
new file mode 100644
index 0000000000..ead062bf2b
--- /dev/null
+++ b/arch/s390/pci/pci_iov.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2020
+ *
+ * Author(s):
+ *   Niklas Schnelle <schnelle@linux.ibm.com>
+ *
+ */
+
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+
+#include "pci_iov.h"
+
+static struct resource iov_res = {
+	.name	= "PCI IOV res",
+	.start	= 0,
+	.end	= -1,
+	.flags	= IORESOURCE_MEM,
+};
+
+void zpci_iov_map_resources(struct pci_dev *pdev)
+{
+	resource_size_t len;
+	int i;
+
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		int bar = i + PCI_IOV_RESOURCES;
+
+		len = pci_resource_len(pdev, bar);
+		if (!len)
+			continue;
+		pdev->resource[bar].parent = &iov_res;
+	}
+}
+
+void zpci_iov_remove_virtfn(struct pci_dev *pdev, int vfn)
+{
+	pci_lock_rescan_remove();
+	/* Linux' vfid's start at 0 vfn at 1 */
+	pci_iov_remove_virtfn(pdev->physfn, vfn - 1);
+	pci_unlock_rescan_remove();
+}
+
+static int zpci_iov_link_virtfn(struct pci_dev *pdev, struct pci_dev *virtfn, int vfid)
+{
+	int rc;
+
+	rc = pci_iov_sysfs_link(pdev, virtfn, vfid);
+	if (rc)
+		return rc;
+
+	virtfn->is_virtfn = 1;
+	virtfn->multifunction = 0;
+	virtfn->physfn = pci_dev_get(pdev);
+
+	return 0;
+}
+
+int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn)
+{
+	int i, cand_devfn;
+	struct zpci_dev *zdev;
+	struct pci_dev *pdev;
+	int vfid = vfn - 1; /* Linux' vfid's start at 0 vfn at 1*/
+	int rc = 0;
+
+	if (!zbus->multifunction)
+		return 0;
+
+	/* If the parent PF for the given VF is also configured in the
+	 * instance, it must be on the same zbus.
+	 * We can then identify the parent PF by checking what
+	 * devfn the VF would have if it belonged to that PF using the PF's
+	 * stride and offset. Only if this candidate devfn matches the
+	 * actual devfn will we link both functions.
+	 */
+	for (i = 0; i < ZPCI_FUNCTIONS_PER_BUS; i++) {
+		zdev = zbus->function[i];
+		if (zdev && zdev->is_physfn) {
+			pdev = pci_get_slot(zbus->bus, zdev->devfn);
+			if (!pdev)
+				continue;
+			cand_devfn = pci_iov_virtfn_devfn(pdev, vfid);
+			if (cand_devfn == virtfn->devfn) {
+				rc = zpci_iov_link_virtfn(pdev, virtfn, vfid);
+				/* balance pci_get_slot() */
+				pci_dev_put(pdev);
+				break;
+			}
+			/* balance pci_get_slot() */
+			pci_dev_put(pdev);
+		}
+	}
+	return rc;
+}
diff --git a/arch/s390/pci/pci_iov.h b/arch/s390/pci/pci_iov.h
new file mode 100644
index 0000000000..b2c828003b
--- /dev/null
+++ b/arch/s390/pci/pci_iov.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2020
+ *
+ * Author(s):
+ *   Niklas Schnelle <schnelle@linux.ibm.com>
+ *
+ */
+
+#ifndef __S390_PCI_IOV_H
+#define __S390_PCI_IOV_H
+
+#ifdef CONFIG_PCI_IOV
+void zpci_iov_remove_virtfn(struct pci_dev *pdev, int vfn);
+
+void zpci_iov_map_resources(struct pci_dev *pdev);
+
+int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn);
+
+#else /* CONFIG_PCI_IOV */
+static inline void zpci_iov_remove_virtfn(struct pci_dev *pdev, int vfn) {}
+
+static inline void zpci_iov_map_resources(struct pci_dev *pdev) {}
+
+static inline int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn)
+{
+	return 0;
+}
+#endif /* CONFIG_PCI_IOV */
+#endif /* __S390_PCI_IOV_h */
diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c
new file mode 100644
index 0000000000..ff8f24854c
--- /dev/null
+++ b/arch/s390/pci/pci_irq.c
@@ -0,0 +1,530 @@
+// SPDX-License-Identifier: GPL-2.0
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/kernel_stat.h>
+#include <linux/pci.h>
+#include <linux/msi.h>
+#include <linux/smp.h>
+
+#include <asm/isc.h>
+#include <asm/airq.h>
+#include <asm/tpi.h>
+
+static enum {FLOATING, DIRECTED} irq_delivery;
+
+/*
+ * summary bit vector
+ * FLOATING - summary bit per function
+ * DIRECTED - summary bit per cpu (only used in fallback path)
+ */
+static struct airq_iv *zpci_sbv;
+
+/*
+ * interrupt bit vectors
+ * FLOATING - interrupt bit vector per function
+ * DIRECTED - interrupt bit vector per cpu
+ */
+static struct airq_iv **zpci_ibv;
+
+/* Modify PCI: Register floating adapter interruptions */
+static int zpci_set_airq(struct zpci_dev *zdev)
+{
+	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT);
+	struct zpci_fib fib = {0};
+	u8 status;
+
+	fib.fmt0.isc = PCI_ISC;
+	fib.fmt0.sum = 1;	/* enable summary notifications */
+	fib.fmt0.noi = airq_iv_end(zdev->aibv);
+	fib.fmt0.aibv = virt_to_phys(zdev->aibv->vector);
+	fib.fmt0.aibvo = 0;	/* each zdev has its own interrupt vector */
+	fib.fmt0.aisb = virt_to_phys(zpci_sbv->vector) + (zdev->aisb / 64) * 8;
+	fib.fmt0.aisbo = zdev->aisb & 63;
+	fib.gd = zdev->gisa;
+
+	return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
+}
+
+/* Modify PCI: Unregister floating adapter interruptions */
+static int zpci_clear_airq(struct zpci_dev *zdev)
+{
+	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_DEREG_INT);
+	struct zpci_fib fib = {0};
+	u8 cc, status;
+
+	fib.gd = zdev->gisa;
+
+	cc = zpci_mod_fc(req, &fib, &status);
+	if (cc == 3 || (cc == 1 && status == 24))
+		/* Function already gone or IRQs already deregistered. */
+		cc = 0;
+
+	return cc ? -EIO : 0;
+}
+
+/* Modify PCI: Register CPU directed interruptions */
+static int zpci_set_directed_irq(struct zpci_dev *zdev)
+{
+	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT_D);
+	struct zpci_fib fib = {0};
+	u8 status;
+
+	fib.fmt = 1;
+	fib.fmt1.noi = zdev->msi_nr_irqs;
+	fib.fmt1.dibvo = zdev->msi_first_bit;
+	fib.gd = zdev->gisa;
+
+	return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
+}
+
+/* Modify PCI: Unregister CPU directed interruptions */
+static int zpci_clear_directed_irq(struct zpci_dev *zdev)
+{
+	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_DEREG_INT_D);
+	struct zpci_fib fib = {0};
+	u8 cc, status;
+
+	fib.fmt = 1;
+	fib.gd = zdev->gisa;
+	cc = zpci_mod_fc(req, &fib, &status);
+	if (cc == 3 || (cc == 1 && status == 24))
+		/* Function already gone or IRQs already deregistered. */
+		cc = 0;
+
+	return cc ? -EIO : 0;
+}
+
+/* Register adapter interruptions */
+static int zpci_set_irq(struct zpci_dev *zdev)
+{
+	int rc;
+
+	if (irq_delivery == DIRECTED)
+		rc = zpci_set_directed_irq(zdev);
+	else
+		rc = zpci_set_airq(zdev);
+
+	if (!rc)
+		zdev->irqs_registered = 1;
+
+	return rc;
+}
+
+/* Clear adapter interruptions */
+static int zpci_clear_irq(struct zpci_dev *zdev)
+{
+	int rc;
+
+	if (irq_delivery == DIRECTED)
+		rc = zpci_clear_directed_irq(zdev);
+	else
+		rc = zpci_clear_airq(zdev);
+
+	if (!rc)
+		zdev->irqs_registered = 0;
+
+	return rc;
+}
+
+static int zpci_set_irq_affinity(struct irq_data *data, const struct cpumask *dest,
+				 bool force)
+{
+	struct msi_desc *entry = irq_data_get_msi_desc(data);
+	struct msi_msg msg = entry->msg;
+	int cpu_addr = smp_cpu_get_cpu_address(cpumask_first(dest));
+
+	msg.address_lo &= 0xff0000ff;
+	msg.address_lo |= (cpu_addr << 8);
+	pci_write_msi_msg(data->irq, &msg);
+
+	return IRQ_SET_MASK_OK;
+}
+
+static struct irq_chip zpci_irq_chip = {
+	.name = "PCI-MSI",
+	.irq_unmask = pci_msi_unmask_irq,
+	.irq_mask = pci_msi_mask_irq,
+};
+
+static void zpci_handle_cpu_local_irq(bool rescan)
+{
+	struct airq_iv *dibv = zpci_ibv[smp_processor_id()];
+	union zpci_sic_iib iib = {{0}};
+	unsigned long bit;
+	int irqs_on = 0;
+
+	for (bit = 0;;) {
+		/* Scan the directed IRQ bit vector */
+		bit = airq_iv_scan(dibv, bit, airq_iv_end(dibv));
+		if (bit == -1UL) {
+			if (!rescan || irqs_on++)
+				/* End of second scan with interrupts on. */
+				break;
+			/* First scan complete, re-enable interrupts. */
+			if (zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC, &iib))
+				break;
+			bit = 0;
+			continue;
+		}
+		inc_irq_stat(IRQIO_MSI);
+		generic_handle_irq(airq_iv_get_data(dibv, bit));
+	}
+}
+
+struct cpu_irq_data {
+	call_single_data_t csd;
+	atomic_t scheduled;
+};
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_irq_data, irq_data);
+
+static void zpci_handle_remote_irq(void *data)
+{
+	atomic_t *scheduled = data;
+
+	do {
+		zpci_handle_cpu_local_irq(false);
+	} while (atomic_dec_return(scheduled));
+}
+
+static void zpci_handle_fallback_irq(void)
+{
+	struct cpu_irq_data *cpu_data;
+	union zpci_sic_iib iib = {{0}};
+	unsigned long cpu;
+	int irqs_on = 0;
+
+	for (cpu = 0;;) {
+		cpu = airq_iv_scan(zpci_sbv, cpu, airq_iv_end(zpci_sbv));
+		if (cpu == -1UL) {
+			if (irqs_on++)
+				/* End of second scan with interrupts on. */
+				break;
+			/* First scan complete, re-enable interrupts. */
+			if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC, &iib))
+				break;
+			cpu = 0;
+			continue;
+		}
+		cpu_data = &per_cpu(irq_data, cpu);
+		if (atomic_inc_return(&cpu_data->scheduled) > 1)
+			continue;
+
+		INIT_CSD(&cpu_data->csd, zpci_handle_remote_irq, &cpu_data->scheduled);
+		smp_call_function_single_async(cpu, &cpu_data->csd);
+	}
+}
+
+static void zpci_directed_irq_handler(struct airq_struct *airq,
+				      struct tpi_info *tpi_info)
+{
+	bool floating = !tpi_info->directed_irq;
+
+	if (floating) {
+		inc_irq_stat(IRQIO_PCF);
+		zpci_handle_fallback_irq();
+	} else {
+		inc_irq_stat(IRQIO_PCD);
+		zpci_handle_cpu_local_irq(true);
+	}
+}
+
+static void zpci_floating_irq_handler(struct airq_struct *airq,
+				      struct tpi_info *tpi_info)
+{
+	union zpci_sic_iib iib = {{0}};
+	unsigned long si, ai;
+	struct airq_iv *aibv;
+	int irqs_on = 0;
+
+	inc_irq_stat(IRQIO_PCF);
+	for (si = 0;;) {
+		/* Scan adapter summary indicator bit vector */
+		si = airq_iv_scan(zpci_sbv, si, airq_iv_end(zpci_sbv));
+		if (si == -1UL) {
+			if (irqs_on++)
+				/* End of second scan with interrupts on. */
+				break;
+			/* First scan complete, re-enable interrupts. */
+			if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC, &iib))
+				break;
+			si = 0;
+			continue;
+		}
+
+		/* Scan the adapter interrupt vector for this device. */
+		aibv = zpci_ibv[si];
+		for (ai = 0;;) {
+			ai = airq_iv_scan(aibv, ai, airq_iv_end(aibv));
+			if (ai == -1UL)
+				break;
+			inc_irq_stat(IRQIO_MSI);
+			airq_iv_lock(aibv, ai);
+			generic_handle_irq(airq_iv_get_data(aibv, ai));
+			airq_iv_unlock(aibv, ai);
+		}
+	}
+}
+
+int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+{
+	struct zpci_dev *zdev = to_zpci(pdev);
+	unsigned int hwirq, msi_vecs, cpu;
+	unsigned long bit;
+	struct msi_desc *msi;
+	struct msi_msg msg;
+	int cpu_addr;
+	int rc, irq;
+
+	zdev->aisb = -1UL;
+	zdev->msi_first_bit = -1U;
+	if (type == PCI_CAP_ID_MSI && nvec > 1)
+		return 1;
+	msi_vecs = min_t(unsigned int, nvec, zdev->max_msi);
+
+	if (irq_delivery == DIRECTED) {
+		/* Allocate cpu vector bits */
+		bit = airq_iv_alloc(zpci_ibv[0], msi_vecs);
+		if (bit == -1UL)
+			return -EIO;
+	} else {
+		/* Allocate adapter summary indicator bit */
+		bit = airq_iv_alloc_bit(zpci_sbv);
+		if (bit == -1UL)
+			return -EIO;
+		zdev->aisb = bit;
+
+		/* Create adapter interrupt vector */
+		zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK, NULL);
+		if (!zdev->aibv)
+			return -ENOMEM;
+
+		/* Wire up shortcut pointer */
+		zpci_ibv[bit] = zdev->aibv;
+		/* Each function has its own interrupt vector */
+		bit = 0;
+	}
+
+	/* Request MSI interrupts */
+	hwirq = bit;
+	msi_for_each_desc(msi, &pdev->dev, MSI_DESC_NOTASSOCIATED) {
+		rc = -EIO;
+		if (hwirq - bit >= msi_vecs)
+			break;
+		irq = __irq_alloc_descs(-1, 0, 1, 0, THIS_MODULE,
+				(irq_delivery == DIRECTED) ?
+				msi->affinity : NULL);
+		if (irq < 0)
+			return -ENOMEM;
+		rc = irq_set_msi_desc(irq, msi);
+		if (rc)
+			return rc;
+		irq_set_chip_and_handler(irq, &zpci_irq_chip,
+					 handle_percpu_irq);
+		msg.data = hwirq - bit;
+		if (irq_delivery == DIRECTED) {
+			if (msi->affinity)
+				cpu = cpumask_first(&msi->affinity->mask);
+			else
+				cpu = 0;
+			cpu_addr = smp_cpu_get_cpu_address(cpu);
+
+			msg.address_lo = zdev->msi_addr & 0xff0000ff;
+			msg.address_lo |= (cpu_addr << 8);
+
+			for_each_possible_cpu(cpu) {
+				airq_iv_set_data(zpci_ibv[cpu], hwirq, irq);
+			}
+		} else {
+			msg.address_lo = zdev->msi_addr & 0xffffffff;
+			airq_iv_set_data(zdev->aibv, hwirq, irq);
+		}
+		msg.address_hi = zdev->msi_addr >> 32;
+		pci_write_msi_msg(irq, &msg);
+		hwirq++;
+	}
+
+	zdev->msi_first_bit = bit;
+	zdev->msi_nr_irqs = msi_vecs;
+
+	rc = zpci_set_irq(zdev);
+	if (rc)
+		return rc;
+
+	return (msi_vecs == nvec) ? 0 : msi_vecs;
+}
+
+void arch_teardown_msi_irqs(struct pci_dev *pdev)
+{
+	struct zpci_dev *zdev = to_zpci(pdev);
+	struct msi_desc *msi;
+	int rc;
+
+	/* Disable interrupts */
+	rc = zpci_clear_irq(zdev);
+	if (rc)
+		return;
+
+	/* Release MSI interrupts */
+	msi_for_each_desc(msi, &pdev->dev, MSI_DESC_ASSOCIATED) {
+		irq_set_msi_desc(msi->irq, NULL);
+		irq_free_desc(msi->irq);
+		msi->msg.address_lo = 0;
+		msi->msg.address_hi = 0;
+		msi->msg.data = 0;
+		msi->irq = 0;
+	}
+
+	if (zdev->aisb != -1UL) {
+		zpci_ibv[zdev->aisb] = NULL;
+		airq_iv_free_bit(zpci_sbv, zdev->aisb);
+		zdev->aisb = -1UL;
+	}
+	if (zdev->aibv) {
+		airq_iv_release(zdev->aibv);
+		zdev->aibv = NULL;
+	}
+
+	if ((irq_delivery == DIRECTED) && zdev->msi_first_bit != -1U)
+		airq_iv_free(zpci_ibv[0], zdev->msi_first_bit, zdev->msi_nr_irqs);
+}
+
+bool arch_restore_msi_irqs(struct pci_dev *pdev)
+{
+	struct zpci_dev *zdev = to_zpci(pdev);
+
+	if (!zdev->irqs_registered)
+		zpci_set_irq(zdev);
+	return true;
+}
+
+static struct airq_struct zpci_airq = {
+	.handler = zpci_floating_irq_handler,
+	.isc = PCI_ISC,
+};
+
+static void __init cpu_enable_directed_irq(void *unused)
+{
+	union zpci_sic_iib iib = {{0}};
+	union zpci_sic_iib ziib = {{0}};
+
+	iib.cdiib.dibv_addr = (u64) zpci_ibv[smp_processor_id()]->vector;
+
+	zpci_set_irq_ctrl(SIC_IRQ_MODE_SET_CPU, 0, &iib);
+	zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC, &ziib);
+}
+
+static int __init zpci_directed_irq_init(void)
+{
+	union zpci_sic_iib iib = {{0}};
+	unsigned int cpu;
+
+	zpci_sbv = airq_iv_create(num_possible_cpus(), 0, NULL);
+	if (!zpci_sbv)
+		return -ENOMEM;
+
+	iib.diib.isc = PCI_ISC;
+	iib.diib.nr_cpus = num_possible_cpus();
+	iib.diib.disb_addr = virt_to_phys(zpci_sbv->vector);
+	zpci_set_irq_ctrl(SIC_IRQ_MODE_DIRECT, 0, &iib);
+
+	zpci_ibv = kcalloc(num_possible_cpus(), sizeof(*zpci_ibv),
+			   GFP_KERNEL);
+	if (!zpci_ibv)
+		return -ENOMEM;
+
+	for_each_possible_cpu(cpu) {
+		/*
+		 * Per CPU IRQ vectors look the same but bit-allocation
+		 * is only done on the first vector.
+		 */
+		zpci_ibv[cpu] = airq_iv_create(cache_line_size() * BITS_PER_BYTE,
+					       AIRQ_IV_DATA |
+					       AIRQ_IV_CACHELINE |
+					       (!cpu ? AIRQ_IV_ALLOC : 0), NULL);
+		if (!zpci_ibv[cpu])
+			return -ENOMEM;
+	}
+	on_each_cpu(cpu_enable_directed_irq, NULL, 1);
+
+	zpci_irq_chip.irq_set_affinity = zpci_set_irq_affinity;
+
+	return 0;
+}
+
+static int __init zpci_floating_irq_init(void)
+{
+	zpci_ibv = kcalloc(ZPCI_NR_DEVICES, sizeof(*zpci_ibv), GFP_KERNEL);
+	if (!zpci_ibv)
+		return -ENOMEM;
+
+	zpci_sbv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC, NULL);
+	if (!zpci_sbv)
+		goto out_free;
+
+	return 0;
+
+out_free:
+	kfree(zpci_ibv);
+	return -ENOMEM;
+}
+
+int __init zpci_irq_init(void)
+{
+	union zpci_sic_iib iib = {{0}};
+	int rc;
+
+	irq_delivery = sclp.has_dirq ? DIRECTED : FLOATING;
+	if (s390_pci_force_floating)
+		irq_delivery = FLOATING;
+
+	if (irq_delivery == DIRECTED)
+		zpci_airq.handler = zpci_directed_irq_handler;
+
+	rc = register_adapter_interrupt(&zpci_airq);
+	if (rc)
+		goto out;
+	/* Set summary to 1 to be called every time for the ISC. */
+	*zpci_airq.lsi_ptr = 1;
+
+	switch (irq_delivery) {
+	case FLOATING:
+		rc = zpci_floating_irq_init();
+		break;
+	case DIRECTED:
+		rc = zpci_directed_irq_init();
+		break;
+	}
+
+	if (rc)
+		goto out_airq;
+
+	/*
+	 * Enable floating IRQs (with suppression after one IRQ). When using
+	 * directed IRQs this enables the fallback path.
+	 */
+	zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC, &iib);
+
+	return 0;
+out_airq:
+	unregister_adapter_interrupt(&zpci_airq);
+out:
+	return rc;
+}
+
+void __init zpci_irq_exit(void)
+{
+	unsigned int cpu;
+
+	if (irq_delivery == DIRECTED) {
+		for_each_possible_cpu(cpu) {
+			airq_iv_release(zpci_ibv[cpu]);
+		}
+	}
+	kfree(zpci_ibv);
+	if (zpci_sbv)
+		airq_iv_release(zpci_sbv);
+	unregister_adapter_interrupt(&zpci_airq);
+}
diff --git a/arch/s390/pci/pci_kvm_hook.c b/arch/s390/pci/pci_kvm_hook.c
new file mode 100644
index 0000000000..ff34baf50a
--- /dev/null
+++ b/arch/s390/pci/pci_kvm_hook.c
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VFIO ZPCI devices support
+ *
+ * Copyright (C) IBM Corp. 2022.  All rights reserved.
+ *	Author(s): Pierre Morel <pmorel@linux.ibm.com>
+ */
+#include <linux/kvm_host.h>
+
+struct zpci_kvm_hook zpci_kvm_hook;
+EXPORT_SYMBOL_GPL(zpci_kvm_hook);
diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c
new file mode 100644
index 0000000000..a90499c087
--- /dev/null
+++ b/arch/s390/pci/pci_mmio.c
@@ -0,0 +1,335 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Access to PCI I/O memory from user space programs.
+ *
+ * Copyright IBM Corp. 2014
+ * Author(s): Alexey Ishchuk <aishchuk@linux.vnet.ibm.com>
+ */
+#include <linux/kernel.h>
+#include <linux/syscalls.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <asm/asm-extable.h>
+#include <asm/pci_io.h>
+#include <asm/pci_debug.h>
+
+static inline void zpci_err_mmio(u8 cc, u8 status, u64 offset)
+{
+	struct {
+		u64 offset;
+		u8 cc;
+		u8 status;
+	} data = {offset, cc, status};
+
+	zpci_err_hex(&data, sizeof(data));
+}
+
+static inline int __pcistb_mio_inuser(
+		void __iomem *ioaddr, const void __user *src,
+		u64 len, u8 *status)
+{
+	int cc = -ENXIO;
+
+	asm volatile (
+		"       sacf 256\n"
+		"0:     .insn   rsy,0xeb00000000d4,%[len],%[ioaddr],%[src]\n"
+		"1:     ipm     %[cc]\n"
+		"       srl     %[cc],28\n"
+		"2:     sacf 768\n"
+		EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
+		: [cc] "+d" (cc), [len] "+d" (len)
+		: [ioaddr] "a" (ioaddr), [src] "Q" (*((u8 __force *)src))
+		: "cc", "memory");
+	*status = len >> 24 & 0xff;
+	return cc;
+}
+
+static inline int __pcistg_mio_inuser(
+		void __iomem *ioaddr, const void __user *src,
+		u64 ulen, u8 *status)
+{
+	union register_pair ioaddr_len = {.even = (u64 __force)ioaddr, .odd = ulen};
+	int cc = -ENXIO;
+	u64 val = 0;
+	u64 cnt = ulen;
+	u8 tmp;
+
+	/*
+	 * copy 0 < @len <= 8 bytes from @src into the right most bytes of
+	 * a register, then store it to PCI at @ioaddr while in secondary
+	 * address space. pcistg then uses the user mappings.
+	 */
+	asm volatile (
+		"       sacf    256\n"
+		"0:     llgc    %[tmp],0(%[src])\n"
+		"4:	sllg	%[val],%[val],8\n"
+		"       aghi    %[src],1\n"
+		"       ogr     %[val],%[tmp]\n"
+		"       brctg   %[cnt],0b\n"
+		"1:     .insn   rre,0xb9d40000,%[val],%[ioaddr_len]\n"
+		"2:     ipm     %[cc]\n"
+		"       srl     %[cc],28\n"
+		"3:     sacf    768\n"
+		EX_TABLE(0b, 3b) EX_TABLE(4b, 3b) EX_TABLE(1b, 3b) EX_TABLE(2b, 3b)
+		:
+		[src] "+a" (src), [cnt] "+d" (cnt),
+		[val] "+d" (val), [tmp] "=d" (tmp),
+		[cc] "+d" (cc), [ioaddr_len] "+&d" (ioaddr_len.pair)
+		:: "cc", "memory");
+	*status = ioaddr_len.odd >> 24 & 0xff;
+
+	/* did we read everything from user memory? */
+	if (!cc && cnt != 0)
+		cc = -EFAULT;
+
+	return cc;
+}
+
+static inline int __memcpy_toio_inuser(void __iomem *dst,
+				   const void __user *src, size_t n)
+{
+	int size, rc = 0;
+	u8 status = 0;
+
+	if (!src)
+		return -EINVAL;
+
+	while (n > 0) {
+		size = zpci_get_max_io_size((u64 __force) dst,
+					    (u64 __force) src, n,
+					    ZPCI_MAX_WRITE_SIZE);
+		if (size > 8) /* main path */
+			rc = __pcistb_mio_inuser(dst, src, size, &status);
+		else
+			rc = __pcistg_mio_inuser(dst, src, size, &status);
+		if (rc)
+			break;
+		src += size;
+		dst += size;
+		n -= size;
+	}
+	if (rc)
+		zpci_err_mmio(rc, status, (__force u64) dst);
+	return rc;
+}
+
+SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
+		const void __user *, user_buffer, size_t, length)
+{
+	u8 local_buf[64];
+	void __iomem *io_addr;
+	void *buf;
+	struct vm_area_struct *vma;
+	pte_t *ptep;
+	spinlock_t *ptl;
+	long ret;
+
+	if (!zpci_is_enabled())
+		return -ENODEV;
+
+	if (length <= 0 || PAGE_SIZE - (mmio_addr & ~PAGE_MASK) < length)
+		return -EINVAL;
+
+	/*
+	 * We only support write access to MIO capable devices if we are on
+	 * a MIO enabled system. Otherwise we would have to check for every
+	 * address if it is a special ZPCI_ADDR and would have to do
+	 * a pfn lookup which we don't need for MIO capable devices.  Currently
+	 * ISM devices are the only devices without MIO support and there is no
+	 * known need for accessing these from userspace.
+	 */
+	if (static_branch_likely(&have_mio)) {
+		ret = __memcpy_toio_inuser((void  __iomem *) mmio_addr,
+					user_buffer,
+					length);
+		return ret;
+	}
+
+	if (length > 64) {
+		buf = kmalloc(length, GFP_KERNEL);
+		if (!buf)
+			return -ENOMEM;
+	} else
+		buf = local_buf;
+
+	ret = -EFAULT;
+	if (copy_from_user(buf, user_buffer, length))
+		goto out_free;
+
+	mmap_read_lock(current->mm);
+	ret = -EINVAL;
+	vma = vma_lookup(current->mm, mmio_addr);
+	if (!vma)
+		goto out_unlock_mmap;
+	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
+		goto out_unlock_mmap;
+	ret = -EACCES;
+	if (!(vma->vm_flags & VM_WRITE))
+		goto out_unlock_mmap;
+
+	ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl);
+	if (ret)
+		goto out_unlock_mmap;
+
+	io_addr = (void __iomem *)((pte_pfn(*ptep) << PAGE_SHIFT) |
+			(mmio_addr & ~PAGE_MASK));
+
+	if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE)
+		goto out_unlock_pt;
+
+	ret = zpci_memcpy_toio(io_addr, buf, length);
+out_unlock_pt:
+	pte_unmap_unlock(ptep, ptl);
+out_unlock_mmap:
+	mmap_read_unlock(current->mm);
+out_free:
+	if (buf != local_buf)
+		kfree(buf);
+	return ret;
+}
+
+static inline int __pcilg_mio_inuser(
+		void __user *dst, const void __iomem *ioaddr,
+		u64 ulen, u8 *status)
+{
+	union register_pair ioaddr_len = {.even = (u64 __force)ioaddr, .odd = ulen};
+	u64 cnt = ulen;
+	int shift = ulen * 8;
+	int cc = -ENXIO;
+	u64 val, tmp;
+
+	/*
+	 * read 0 < @len <= 8 bytes from the PCI memory mapped at @ioaddr (in
+	 * user space) into a register using pcilg then store these bytes at
+	 * user address @dst
+	 */
+	asm volatile (
+		"       sacf    256\n"
+		"0:     .insn   rre,0xb9d60000,%[val],%[ioaddr_len]\n"
+		"1:     ipm     %[cc]\n"
+		"       srl     %[cc],28\n"
+		"       ltr     %[cc],%[cc]\n"
+		"       jne     4f\n"
+		"2:     ahi     %[shift],-8\n"
+		"       srlg    %[tmp],%[val],0(%[shift])\n"
+		"3:     stc     %[tmp],0(%[dst])\n"
+		"5:	aghi	%[dst],1\n"
+		"       brctg   %[cnt],2b\n"
+		"4:     sacf    768\n"
+		EX_TABLE(0b, 4b) EX_TABLE(1b, 4b) EX_TABLE(3b, 4b) EX_TABLE(5b, 4b)
+		:
+		[ioaddr_len] "+&d" (ioaddr_len.pair),
+		[cc] "+d" (cc), [val] "=d" (val),
+		[dst] "+a" (dst), [cnt] "+d" (cnt), [tmp] "=d" (tmp),
+		[shift] "+d" (shift)
+		:: "cc", "memory");
+
+	/* did we write everything to the user space buffer? */
+	if (!cc && cnt != 0)
+		cc = -EFAULT;
+
+	*status = ioaddr_len.odd >> 24 & 0xff;
+	return cc;
+}
+
+static inline int __memcpy_fromio_inuser(void __user *dst,
+				     const void __iomem *src,
+				     unsigned long n)
+{
+	int size, rc = 0;
+	u8 status;
+
+	while (n > 0) {
+		size = zpci_get_max_io_size((u64 __force) src,
+					    (u64 __force) dst, n,
+					    ZPCI_MAX_READ_SIZE);
+		rc = __pcilg_mio_inuser(dst, src, size, &status);
+		if (rc)
+			break;
+		src += size;
+		dst += size;
+		n -= size;
+	}
+	if (rc)
+		zpci_err_mmio(rc, status, (__force u64) dst);
+	return rc;
+}
+
+SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
+		void __user *, user_buffer, size_t, length)
+{
+	u8 local_buf[64];
+	void __iomem *io_addr;
+	void *buf;
+	struct vm_area_struct *vma;
+	pte_t *ptep;
+	spinlock_t *ptl;
+	long ret;
+
+	if (!zpci_is_enabled())
+		return -ENODEV;
+
+	if (length <= 0 || PAGE_SIZE - (mmio_addr & ~PAGE_MASK) < length)
+		return -EINVAL;
+
+	/*
+	 * We only support read access to MIO capable devices if we are on
+	 * a MIO enabled system. Otherwise we would have to check for every
+	 * address if it is a special ZPCI_ADDR and would have to do
+	 * a pfn lookup which we don't need for MIO capable devices.  Currently
+	 * ISM devices are the only devices without MIO support and there is no
+	 * known need for accessing these from userspace.
+	 */
+	if (static_branch_likely(&have_mio)) {
+		ret = __memcpy_fromio_inuser(
+				user_buffer, (const void __iomem *)mmio_addr,
+				length);
+		return ret;
+	}
+
+	if (length > 64) {
+		buf = kmalloc(length, GFP_KERNEL);
+		if (!buf)
+			return -ENOMEM;
+	} else {
+		buf = local_buf;
+	}
+
+	mmap_read_lock(current->mm);
+	ret = -EINVAL;
+	vma = vma_lookup(current->mm, mmio_addr);
+	if (!vma)
+		goto out_unlock_mmap;
+	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
+		goto out_unlock_mmap;
+	ret = -EACCES;
+	if (!(vma->vm_flags & VM_WRITE))
+		goto out_unlock_mmap;
+
+	ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl);
+	if (ret)
+		goto out_unlock_mmap;
+
+	io_addr = (void __iomem *)((pte_pfn(*ptep) << PAGE_SHIFT) |
+			(mmio_addr & ~PAGE_MASK));
+
+	if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE) {
+		ret = -EFAULT;
+		goto out_unlock_pt;
+	}
+	ret = zpci_memcpy_fromio(buf, io_addr, length);
+
+out_unlock_pt:
+	pte_unmap_unlock(ptep, ptl);
+out_unlock_mmap:
+	mmap_read_unlock(current->mm);
+
+	if (!ret && copy_to_user(user_buffer, buf, length))
+		ret = -EFAULT;
+
+	if (buf != local_buf)
+		kfree(buf);
+	return ret;
+}
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
new file mode 100644
index 0000000000..cae280e5c0
--- /dev/null
+++ b/arch/s390/pci/pci_sysfs.c
@@ -0,0 +1,239 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2012
+ *
+ * Author(s):
+ *   Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/stat.h>
+#include <linux/pci.h>
+
+#include "../../../drivers/pci/pci.h"
+
+#include <asm/sclp.h>
+
+#define zpci_attr(name, fmt, member)					\
+static ssize_t name##_show(struct device *dev,				\
+			   struct device_attribute *attr, char *buf)	\
+{									\
+	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));		\
+									\
+	return sprintf(buf, fmt, zdev->member);				\
+}									\
+static DEVICE_ATTR_RO(name)
+
+zpci_attr(function_id, "0x%08x\n", fid);
+zpci_attr(function_handle, "0x%08x\n", fh);
+zpci_attr(pchid, "0x%04x\n", pchid);
+zpci_attr(pfgid, "0x%02x\n", pfgid);
+zpci_attr(vfn, "0x%04x\n", vfn);
+zpci_attr(pft, "0x%02x\n", pft);
+zpci_attr(port, "%d\n", port);
+zpci_attr(uid, "0x%x\n", uid);
+zpci_attr(segment0, "0x%02x\n", pfip[0]);
+zpci_attr(segment1, "0x%02x\n", pfip[1]);
+zpci_attr(segment2, "0x%02x\n", pfip[2]);
+zpci_attr(segment3, "0x%02x\n", pfip[3]);
+
+static ssize_t mio_enabled_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
+
+	return sprintf(buf, zpci_use_mio(zdev) ? "1\n" : "0\n");
+}
+static DEVICE_ATTR_RO(mio_enabled);
+
+static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
+			     const char *buf, size_t count)
+{
+	struct kernfs_node *kn;
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct zpci_dev *zdev = to_zpci(pdev);
+	int ret = 0;
+
+	/* Can't use device_remove_self() here as that would lead us to lock
+	 * the pci_rescan_remove_lock while holding the device' kernfs lock.
+	 * This would create a possible deadlock with disable_slot() which is
+	 * not directly protected by the device' kernfs lock but takes it
+	 * during the device removal which happens under
+	 * pci_rescan_remove_lock.
+	 *
+	 * This is analogous to sdev_store_delete() in
+	 * drivers/scsi/scsi_sysfs.c
+	 */
+	kn = sysfs_break_active_protection(&dev->kobj, &attr->attr);
+	WARN_ON_ONCE(!kn);
+	/* device_remove_file() serializes concurrent calls ignoring all but
+	 * the first
+	 */
+	device_remove_file(dev, attr);
+
+	/* A concurrent call to recover_store() may slip between
+	 * sysfs_break_active_protection() and the sysfs file removal.
+	 * Once it unblocks from pci_lock_rescan_remove() the original pdev
+	 * will already be removed.
+	 */
+	pci_lock_rescan_remove();
+	if (pci_dev_is_added(pdev)) {
+		pci_stop_and_remove_bus_device(pdev);
+		if (zdev->dma_table) {
+			ret = zpci_dma_exit_device(zdev);
+			if (ret)
+				goto out;
+		}
+
+		if (zdev_enabled(zdev)) {
+			ret = zpci_disable_device(zdev);
+			/*
+			 * Due to a z/VM vs LPAR inconsistency in the error
+			 * state the FH may indicate an enabled device but
+			 * disable says the device is already disabled don't
+			 * treat it as an error here.
+			 */
+			if (ret == -EINVAL)
+				ret = 0;
+			if (ret)
+				goto out;
+		}
+
+		ret = zpci_enable_device(zdev);
+		if (ret)
+			goto out;
+		ret = zpci_dma_init_device(zdev);
+		if (ret) {
+			zpci_disable_device(zdev);
+			goto out;
+		}
+		pci_rescan_bus(zdev->zbus->bus);
+	}
+out:
+	pci_unlock_rescan_remove();
+	if (kn)
+		sysfs_unbreak_active_protection(kn);
+	return ret ? ret : count;
+}
+static DEVICE_ATTR_WO(recover);
+
+static ssize_t util_string_read(struct file *filp, struct kobject *kobj,
+				struct bin_attribute *attr, char *buf,
+				loff_t off, size_t count)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct zpci_dev *zdev = to_zpci(pdev);
+
+	return memory_read_from_buffer(buf, count, &off, zdev->util_str,
+				       sizeof(zdev->util_str));
+}
+static BIN_ATTR_RO(util_string, CLP_UTIL_STR_LEN);
+
+static ssize_t report_error_write(struct file *filp, struct kobject *kobj,
+				  struct bin_attribute *attr, char *buf,
+				  loff_t off, size_t count)
+{
+	struct zpci_report_error_header *report = (void *) buf;
+	struct device *dev = kobj_to_dev(kobj);
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct zpci_dev *zdev = to_zpci(pdev);
+	int ret;
+
+	if (off || (count < sizeof(*report)))
+		return -EINVAL;
+
+	ret = sclp_pci_report(report, zdev->fh, zdev->fid);
+
+	return ret ? ret : count;
+}
+static BIN_ATTR(report_error, S_IWUSR, NULL, report_error_write, PAGE_SIZE);
+
+static ssize_t uid_is_unique_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%d\n", zpci_unique_uid ? 1 : 0);
+}
+static DEVICE_ATTR_RO(uid_is_unique);
+
+#ifndef CONFIG_DMI
+/* analogous to smbios index */
+static ssize_t index_show(struct device *dev,
+			  struct device_attribute *attr, char *buf)
+{
+	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
+	u32 index = ~0;
+
+	if (zpci_unique_uid)
+		index = zdev->uid;
+
+	return sysfs_emit(buf, "%u\n", index);
+}
+static DEVICE_ATTR_RO(index);
+
+static umode_t zpci_index_is_visible(struct kobject *kobj,
+				     struct attribute *attr, int n)
+{
+	return zpci_unique_uid ? attr->mode : 0;
+}
+
+static struct attribute *zpci_ident_attrs[] = {
+	&dev_attr_index.attr,
+	NULL,
+};
+
+static struct attribute_group zpci_ident_attr_group = {
+	.attrs = zpci_ident_attrs,
+	.is_visible = zpci_index_is_visible,
+};
+#endif
+
+static struct bin_attribute *zpci_bin_attrs[] = {
+	&bin_attr_util_string,
+	&bin_attr_report_error,
+	NULL,
+};
+
+static struct attribute *zpci_dev_attrs[] = {
+	&dev_attr_function_id.attr,
+	&dev_attr_function_handle.attr,
+	&dev_attr_pchid.attr,
+	&dev_attr_pfgid.attr,
+	&dev_attr_pft.attr,
+	&dev_attr_port.attr,
+	&dev_attr_vfn.attr,
+	&dev_attr_uid.attr,
+	&dev_attr_recover.attr,
+	&dev_attr_mio_enabled.attr,
+	&dev_attr_uid_is_unique.attr,
+	NULL,
+};
+
+static struct attribute_group zpci_attr_group = {
+	.attrs = zpci_dev_attrs,
+	.bin_attrs = zpci_bin_attrs,
+};
+
+static struct attribute *pfip_attrs[] = {
+	&dev_attr_segment0.attr,
+	&dev_attr_segment1.attr,
+	&dev_attr_segment2.attr,
+	&dev_attr_segment3.attr,
+	NULL,
+};
+static struct attribute_group pfip_attr_group = {
+	.name = "pfip",
+	.attrs = pfip_attrs,
+};
+
+const struct attribute_group *zpci_attr_groups[] = {
+	&zpci_attr_group,
+	&pfip_attr_group,
+#ifndef CONFIG_DMI
+	&zpci_ident_attr_group,
+#endif
+	NULL,
+};
diff --git a/arch/s390/purgatory/.gitignore b/arch/s390/purgatory/.gitignore
new file mode 100644
index 0000000000..97ca527794
--- /dev/null
+++ b/arch/s390/purgatory/.gitignore
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only
+purgatory
+purgatory.chk
+purgatory.lds
+purgatory.ro
diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile
new file mode 100644
index 0000000000..4e930f5668
--- /dev/null
+++ b/arch/s390/purgatory/Makefile
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: GPL-2.0
+
+OBJECT_FILES_NON_STANDARD := y
+
+purgatory-y := head.o purgatory.o string.o sha256.o mem.o
+
+targets += $(purgatory-y) purgatory.lds purgatory purgatory.chk purgatory.ro
+PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
+
+$(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE
+	$(call if_changed_rule,cc_o_c)
+
+CFLAGS_sha256.o := -D__DISABLE_EXPORTS -D__NO_FORTIFY
+
+$(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE
+	$(call if_changed_rule,as_o_S)
+
+KCOV_INSTRUMENT := n
+GCOV_PROFILE := n
+UBSAN_SANITIZE := n
+KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
+
+KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes
+KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare
+KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding
+KBUILD_CFLAGS += -Os -m64 -msoft-float -fno-common
+KBUILD_CFLAGS += -fno-stack-protector
+KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
+KBUILD_CFLAGS += $(CLANG_FLAGS)
+KBUILD_CFLAGS += $(call cc-option,-fno-PIE)
+KBUILD_AFLAGS := $(filter-out -DCC_USING_EXPOLINE,$(KBUILD_AFLAGS))
+
+# Since we link purgatory with -r unresolved symbols are not checked, so we
+# also link a purgatory.chk binary without -r to check for unresolved symbols.
+PURGATORY_LDFLAGS := -nostdlib -z nodefaultlib
+LDFLAGS_purgatory := -r $(PURGATORY_LDFLAGS) -T
+LDFLAGS_purgatory.chk := -e purgatory_start $(PURGATORY_LDFLAGS)
+$(obj)/purgatory: $(obj)/purgatory.lds $(PURGATORY_OBJS) FORCE
+		$(call if_changed,ld)
+
+$(obj)/purgatory.chk: $(obj)/purgatory FORCE
+		$(call if_changed,ld)
+
+OBJCOPYFLAGS_purgatory.ro := -O elf64-s390
+OBJCOPYFLAGS_purgatory.ro += --remove-section='*debug*'
+OBJCOPYFLAGS_purgatory.ro += --remove-section='.comment'
+OBJCOPYFLAGS_purgatory.ro += --remove-section='.note.*'
+$(obj)/purgatory.ro: $(obj)/purgatory $(obj)/purgatory.chk FORCE
+		$(call if_changed,objcopy)
+
+$(obj)/kexec-purgatory.o: $(obj)/purgatory.ro
+
+obj-y += kexec-purgatory.o
diff --git a/arch/s390/purgatory/head.S b/arch/s390/purgatory/head.S
new file mode 100644
index 0000000000..0f93f2e72e
--- /dev/null
+++ b/arch/s390/purgatory/head.S
@@ -0,0 +1,265 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Purgatory setup code
+ *
+ * Copyright IBM Corp. 2018
+ *
+ * Author(s): Philipp Rudo <prudo@linux.vnet.ibm.com>
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/page.h>
+#include <asm/sigp.h>
+#include <asm/ptrace.h>
+
+/* The purgatory is the code running between two kernels. It's main purpose
+ * is to verify that the next kernel was not corrupted after load and to
+ * start it.
+ *
+ * If the next kernel is a crash kernel there are some peculiarities to
+ * consider:
+ *
+ * First the purgatory is called twice. Once only to verify the
+ * sha digest. So if the crash kernel got corrupted the old kernel can try
+ * to trigger a stand-alone dumper. And once to actually load the crash kernel.
+ *
+ * Second the purgatory also has to swap the crash memory region with its
+ * destination at address 0. As the purgatory is part of crash memory this
+ * requires some finesse. The tactic here is that the purgatory first copies
+ * itself to the end of the destination and then swaps the rest of the
+ * memory running from there.
+ */
+
+#define bufsz purgatory_end-stack
+
+.macro MEMCPY dst,src,len
+	lgr	%r0,\dst
+	lgr	%r1,\len
+	lgr	%r2,\src
+	lgr	%r3,\len
+
+20:	mvcle	%r0,%r2,0
+	jo	20b
+.endm
+
+.macro MEMSWAP dst,src,buf,len
+10:	larl	%r0,purgatory_end
+	larl	%r1,stack
+	slgr	%r0,%r1
+	cgr	\len,%r0
+	jh	11f
+	lgr	%r4,\len
+	j	12f
+11:	lgr	%r4,%r0
+
+12:	MEMCPY	\buf,\dst,%r4
+	MEMCPY	\dst,\src,%r4
+	MEMCPY	\src,\buf,%r4
+
+	agr	\dst,%r4
+	agr	\src,%r4
+	sgr	\len,%r4
+
+	cghi	\len,0
+	jh	10b
+.endm
+
+.macro START_NEXT_KERNEL base subcode
+	lg	%r4,kernel_entry-\base(%r13)
+	lg	%r5,load_psw_mask-\base(%r13)
+	ogr	%r4,%r5
+	stg	%r4,0(%r0)
+
+	xgr	%r0,%r0
+	lghi	%r1,\subcode
+	diag	%r0,%r1,0x308
+.endm
+
+	.text
+	.balign PAGE_SIZE
+SYM_CODE_START(purgatory_start)
+	/* The purgatory might be called after a diag308 so better set
+	 * architecture and addressing mode.
+	 */
+	lhi	%r1,1
+	sigp	%r1,%r0,SIGP_SET_ARCHITECTURE
+	sam64
+
+	larl	%r5,gprregs
+	stmg	%r6,%r15,0(%r5)
+
+	basr	%r13,0
+.base_crash:
+
+	/* Setup stack */
+	larl	%r15,purgatory_end-STACK_FRAME_OVERHEAD
+
+	/* If the next kernel is KEXEC_TYPE_CRASH the purgatory is called
+	 * directly with a flag passed in %r2 whether the purgatory shall do
+	 * checksum verification only (%r2 = 0 -> verification only).
+	 *
+	 * Check now and preserve over C function call by storing in
+	 * %r10 with
+	 *	1 -> checksum verification only
+	 *	0 -> load new kernel
+	 */
+	lghi	%r10,0
+	lg	%r11,kernel_type-.base_crash(%r13)
+	cghi	%r11,1		/* KEXEC_TYPE_CRASH */
+	jne	.do_checksum_verification
+	cghi	%r2,0		/* checksum verification only */
+	jne	.do_checksum_verification
+	lghi	%r10,1
+
+.do_checksum_verification:
+	brasl	%r14,verify_sha256_digest
+
+	cghi	%r10,1		/* checksum verification only */
+	je	.return_old_kernel
+	cghi	%r2,0		/* checksum match */
+	jne	.disabled_wait
+
+	/* If the next kernel is a crash kernel the purgatory has to swap
+	 * the mem regions first.
+	 */
+	cghi	%r11,1 /* KEXEC_TYPE_CRASH */
+	je	.start_crash_kernel
+
+	/* start normal kernel */
+	START_NEXT_KERNEL .base_crash 0
+
+.return_old_kernel:
+	lmg	%r6,%r15,gprregs-.base_crash(%r13)
+	br	%r14
+
+.disabled_wait:
+	lpswe	disabled_wait_psw-.base_crash(%r13)
+
+.start_crash_kernel:
+	/* Location of purgatory_start in crash memory */
+	larl	%r0,.base_crash
+	larl	%r1,purgatory_start
+	slgr	%r0,%r1
+	lgr	%r8,%r13
+	sgr	%r8,%r0
+
+	/* Destination for this code i.e. end of memory to be swapped. */
+	larl	%r0,purgatory_end
+	larl	%r1,purgatory_start
+	slgr	%r0,%r1
+	lg	%r9,crash_size-.base_crash(%r13)
+	sgr	%r9,%r0
+
+	/* Destination in crash memory, i.e. same as r9 but in crash memory. */
+	lg	%r10,crash_start-.base_crash(%r13)
+	agr	%r10,%r9
+
+	/* Buffer location (in crash memory) and size. As the purgatory is
+	 * behind the point of no return it can re-use the stack as buffer.
+	 */
+	larl	%r11,purgatory_end
+	larl	%r12,stack
+	slgr	%r11,%r12
+
+	MEMCPY	%r12,%r9,%r11	/* dst	-> (crash) buf */
+	MEMCPY	%r9,%r8,%r11	/* self -> dst */
+
+	/* Jump to new location. */
+	lgr	%r7,%r9
+	larl	%r0,.jump_to_dst
+	larl	%r1,purgatory_start
+	slgr	%r0,%r1
+	agr	%r7,%r0
+	br	%r7
+
+.jump_to_dst:
+	basr	%r13,0
+.base_dst:
+
+	/* clear buffer */
+	MEMCPY	%r12,%r10,%r11	/* (crash) buf -> (crash) dst */
+
+	/* Load new buffer location after jump */
+	larl	%r7,stack
+	lgr	%r0,%r7
+	larl	%r1,purgatory_start
+	slgr	%r0,%r1
+	agr	%r10,%r0
+	MEMCPY	%r10,%r7,%r11	/* (new) buf -> (crash) buf */
+
+	/* Now the code is set up to run from its designated location. Start
+	 * swapping the rest of crash memory now.
+	 *
+	 * The registers will be used as follow:
+	 *
+	 *	%r0-%r4	reserved for macros defined above
+	 *	%r5-%r6 tmp registers
+	 *	%r7	pointer to current struct sha region
+	 *	%r8	index to iterate over all sha regions
+	 *	%r9	pointer in crash memory
+	 *	%r10	pointer in old kernel
+	 *	%r11	total size (still) to be moved
+	 *	%r12	pointer to buffer
+	 */
+	lgr	%r12,%r7
+	lgr	%r11,%r9
+	lghi	%r10,0
+	lg	%r9,crash_start-.base_dst(%r13)
+	lghi	%r8,16	/* KEXEC_SEGMENTS_MAX */
+	larl	%r7,purgatory_sha_regions
+
+	j .loop_first
+
+	/* Loop over all purgatory_sha_regions. */
+.loop_next:
+	aghi	%r8,-1
+	cghi	%r8,0
+	je	.loop_out
+
+	aghi	%r7,__KEXEC_SHA_REGION_SIZE
+
+.loop_first:
+	lg	%r5,__KEXEC_SHA_REGION_START(%r7)
+	cghi	%r5,0
+	je	.loop_next
+
+	/* Copy [end last sha region, start current sha region) */
+	/* Note: kexec_sha_region->start points in crash memory */
+	sgr	%r5,%r9
+	MEMCPY	%r9,%r10,%r5
+
+	agr	%r9,%r5
+	agr	%r10,%r5
+	sgr	%r11,%r5
+
+	/* Swap sha region */
+	lg	%r6,__KEXEC_SHA_REGION_LEN(%r7)
+	MEMSWAP	%r9,%r10,%r12,%r6
+	sg	%r11,__KEXEC_SHA_REGION_LEN(%r7)
+	j	.loop_next
+
+.loop_out:
+	/* Copy rest of crash memory */
+	MEMCPY	%r9,%r10,%r11
+
+	/* start crash kernel */
+	START_NEXT_KERNEL .base_dst 1
+SYM_CODE_END(purgatory_start)
+
+SYM_DATA_LOCAL(load_psw_mask,		.long 0x00080000,0x80000000)
+	.balign	8
+SYM_DATA_LOCAL(disabled_wait_psw,	.quad 0x0002000180000000,.do_checksum_verification)
+SYM_DATA_LOCAL(gprregs,			.fill 10,8,0)
+SYM_DATA(purgatory_sha256_digest,	.skip 32)
+SYM_DATA(purgatory_sha_regions,		.skip 16*__KEXEC_SHA_REGION_SIZE)
+SYM_DATA(kernel_entry,			.skip 8)
+SYM_DATA(kernel_type,			.skip 8)
+SYM_DATA(crash_start,			.skip 8)
+SYM_DATA(crash_size,			.skip 8)
+	.balign	PAGE_SIZE
+SYM_DATA_START_LOCAL(stack)
+	/* The buffer to move this code must be as big as the code. */
+	.skip	stack-purgatory_start
+	.balign	PAGE_SIZE
+SYM_DATA_END_LABEL(stack, SYM_L_LOCAL, purgatory_end)
diff --git a/arch/s390/purgatory/kexec-purgatory.S b/arch/s390/purgatory/kexec-purgatory.S
new file mode 100644
index 0000000000..25f512b1de
--- /dev/null
+++ b/arch/s390/purgatory/kexec-purgatory.S
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+
+	.section .rodata, "a"
+
+	.balign	8
+SYM_DATA_START(kexec_purgatory)
+	.incbin	"arch/s390/purgatory/purgatory.ro"
+SYM_DATA_END_LABEL(kexec_purgatory, SYM_L_LOCAL, kexec_purgatory_end)
+
+	.balign	8
+SYM_DATA(kexec_purgatory_size, .quad kexec_purgatory_end-kexec_purgatory)
diff --git a/arch/s390/purgatory/purgatory.c b/arch/s390/purgatory/purgatory.c
new file mode 100644
index 0000000000..030efda05d
--- /dev/null
+++ b/arch/s390/purgatory/purgatory.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Purgatory code running between two kernels.
+ *
+ * Copyright IBM Corp. 2018
+ *
+ * Author(s): Philipp Rudo <prudo@linux.vnet.ibm.com>
+ */
+
+#include <linux/kexec.h>
+#include <linux/string.h>
+#include <crypto/sha2.h>
+#include <asm/purgatory.h>
+
+int verify_sha256_digest(void)
+{
+	struct kexec_sha_region *ptr, *end;
+	u8 digest[SHA256_DIGEST_SIZE];
+	struct sha256_state sctx;
+
+	sha256_init(&sctx);
+	end = purgatory_sha_regions + ARRAY_SIZE(purgatory_sha_regions);
+
+	for (ptr = purgatory_sha_regions; ptr < end; ptr++)
+		sha256_update(&sctx, (uint8_t *)(ptr->start), ptr->len);
+
+	sha256_final(&sctx, digest);
+
+	if (memcmp(digest, purgatory_sha256_digest, sizeof(digest)))
+		return 1;
+
+	return 0;
+}
diff --git a/arch/s390/purgatory/purgatory.lds.S b/arch/s390/purgatory/purgatory.lds.S
new file mode 100644
index 0000000000..482eb4fbce
--- /dev/null
+++ b/arch/s390/purgatory/purgatory.lds.S
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <asm-generic/vmlinux.lds.h>
+
+OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
+OUTPUT_ARCH(s390:64-bit)
+
+ENTRY(purgatory_start)
+
+SECTIONS
+{
+	. = 0;
+	.head.text : {
+		_head = . ;
+		HEAD_TEXT
+		_ehead = . ;
+	}
+	.text :	{
+		_text = .;	/* Text */
+		*(.text)
+		*(.text.*)
+		_etext = . ;
+	}
+	.rodata : {
+		_rodata = . ;
+		*(.rodata)	 /* read-only data */
+		*(.rodata.*)
+		_erodata = . ;
+	}
+	.data :	{
+		_data = . ;
+		*(.data)
+		*(.data.*)
+		_edata = . ;
+	}
+
+	. = ALIGN(256);
+	.bss : {
+		_bss = . ;
+		*(.bss)
+		*(.bss.*)
+		*(COMMON)
+		. = ALIGN(8);	/* For convenience during zeroing */
+		_ebss = .;
+	}
+	_end = .;
+
+	/* Sections to be discarded */
+	/DISCARD/ : {
+		*(.eh_frame)
+		*(*__ksymtab*)
+		*(___kcrctab*)
+	}
+}
diff --git a/arch/s390/purgatory/string.c b/arch/s390/purgatory/string.c
new file mode 100644
index 0000000000..c98c22a72d
--- /dev/null
+++ b/arch/s390/purgatory/string.c
@@ -0,0 +1,3 @@
+// SPDX-License-Identifier: GPL-2.0
+#define __HAVE_ARCH_MEMCMP	/* arch function */
+#include "../lib/string.c"
diff --git a/arch/s390/tools/.gitignore b/arch/s390/tools/.gitignore
new file mode 100644
index 0000000000..ea62f37b79
--- /dev/null
+++ b/arch/s390/tools/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+gen_facilities
+gen_opcode_table
diff --git a/arch/s390/tools/Makefile b/arch/s390/tools/Makefile
new file mode 100644
index 0000000000..f9dd47ff9a
--- /dev/null
+++ b/arch/s390/tools/Makefile
@@ -0,0 +1,27 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for s390 specific build tools
+#
+
+kapi := arch/$(ARCH)/include/generated/asm
+kapi-hdrs-y := $(kapi)/facility-defs.h $(kapi)/dis-defs.h
+
+PHONY += kapi
+
+kapi:	$(kapi-hdrs-y)
+
+hostprogs		    += gen_facilities
+hostprogs		    += gen_opcode_table
+
+HOSTCFLAGS_gen_facilities.o += $(LINUXINCLUDE)
+
+filechk_facility-defs.h = $(obj)/gen_facilities
+
+filechk_dis-defs.h = \
+	$(obj)/gen_opcode_table < $(srctree)/arch/$(ARCH)/tools/opcodes.txt
+
+$(kapi)/facility-defs.h: $(obj)/gen_facilities FORCE
+	$(call filechk,facility-defs.h)
+
+$(kapi)/dis-defs.h: $(obj)/gen_opcode_table FORCE
+	$(call filechk,dis-defs.h)
diff --git a/arch/s390/tools/gcc-thunk-extern.sh b/arch/s390/tools/gcc-thunk-extern.sh
new file mode 100755
index 0000000000..20bcbf6dd7
--- /dev/null
+++ b/arch/s390/tools/gcc-thunk-extern.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Borrowed from gcc: gcc/testsuite/gcc.target/s390/nobp-section-type-conflict.c
+# Checks that we don't get error: section type conflict with ‘put_page’.
+
+cat << "END" | $@ -x c - -fno-PIE -march=z10 -mindirect-branch=thunk-extern -mfunction-return=thunk-extern -mindirect-branch-table -O2 -c -o /dev/null
+int a;
+int b (void);
+void c (int);
+
+static void
+put_page (void)
+{
+  if (b ())
+    c (a);
+}
+
+__attribute__ ((__section__ (".init.text"), __cold__)) void
+d (void)
+{
+  put_page ();
+  put_page ();
+}
+END
diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c
new file mode 100644
index 0000000000..cb0aff5c01
--- /dev/null
+++ b/arch/s390/tools/gen_facilities.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Simple program to generate defines out of facility lists that use the bit
+ * numbering scheme from the Princples of Operations: most significant bit
+ * has bit number 0.
+ *
+ *    Copyright IBM Corp. 2015, 2018
+ *
+ */
+
+#include <strings.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+struct facility_def {
+	char *name;
+	int *bits;
+};
+
+static struct facility_def facility_defs[] = {
+	{
+		/*
+		 * FACILITIES_ALS contains the list of facilities that are
+		 * required to run a kernel that is compiled e.g. with
+		 * -march=<machine>.
+		 */
+		.name = "FACILITIES_ALS",
+		.bits = (int[]){
+			0,  /* N3 instructions */
+			1,  /* z/Arch mode installed */
+			18, /* long displacement facility */
+			21, /* extended-immediate facility */
+			25, /* store clock fast */
+			27, /* mvcos */
+			32, /* compare and swap and store */
+			33, /* compare and swap and store 2 */
+			34, /* general instructions extension */
+			35, /* execute extensions */
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+			45, /* fast-BCR, etc. */
+#endif
+#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
+			49, /* misc-instruction-extensions */
+			52, /* interlocked facility 2 */
+#endif
+#ifdef CONFIG_HAVE_MARCH_Z13_FEATURES
+			53, /* load-and-zero-rightmost-byte, etc. */
+#endif
+#ifdef CONFIG_HAVE_MARCH_Z14_FEATURES
+			58, /* miscellaneous-instruction-extension 2 */
+#endif
+#ifdef CONFIG_HAVE_MARCH_Z15_FEATURES
+			61, /* miscellaneous-instruction-extension 3 */
+#endif
+			-1 /* END */
+		}
+	},
+	{
+		/*
+		 * FACILITIES_KVM contains the list of facilities that are part
+		 * of the default facility mask and list that are passed to the
+		 * initial CPU model. If no CPU model is used, this, together
+		 * with the non-hypervisor managed bits, is the maximum list of
+		 * guest facilities supported by KVM.
+		 */
+		.name = "FACILITIES_KVM",
+		.bits = (int[]){
+			0,  /* N3 instructions */
+			1,  /* z/Arch mode installed */
+			2,  /* z/Arch mode active */
+			3,  /* DAT-enhancement */
+			4,  /* idte segment table */
+			5,  /* idte region table */
+			6,  /* ASN-and-LX reuse */
+			7,  /* stfle */
+			8,  /* enhanced-DAT 1 */
+			9,  /* sense-running-status */
+			10, /* conditional sske */
+			13, /* ipte-range */
+			14, /* nonquiescing key-setting */
+			73, /* transactional execution */
+			75, /* access-exception-fetch/store indication */
+			76, /* msa extension 3 */
+			77, /* msa extension 4 */
+			78, /* enhanced-DAT 2 */
+			130, /* instruction-execution-protection */
+			131, /* enhanced-SOP 2 and side-effect */
+			139, /* multiple epoch facility */
+			146, /* msa extension 8 */
+			150, /* enhanced sort */
+			151, /* deflate conversion */
+			155, /* msa extension 9 */
+			-1  /* END */
+		}
+	},
+	{
+		/*
+		 * FACILITIES_KVM_CPUMODEL contains the list of facilities
+		 * that can be enabled by CPU model code if the host supports
+		 * it. These facilities are not passed to the guest without
+		 * CPU model support.
+		 */
+
+		.name = "FACILITIES_KVM_CPUMODEL",
+		.bits = (int[]){
+			12, /* AP Query Configuration Information */
+			15, /* AP Facilities Test */
+			156, /* etoken facility */
+			165, /* nnpa facility */
+			193, /* bear enhancement facility */
+			194, /* rdp enhancement facility */
+			196, /* processor activity instrumentation facility */
+			197, /* processor activity instrumentation extension 1 */
+			-1  /* END */
+		}
+	},
+};
+
+static void print_facility_list(struct facility_def *def)
+{
+	unsigned int high, bit, dword, i;
+	unsigned long long *array;
+
+	array = calloc(1, 8);
+	if (!array)
+		exit(EXIT_FAILURE);
+	high = 0;
+	for (i = 0; def->bits[i] != -1; i++) {
+		bit = 63 - (def->bits[i] & 63);
+		dword = def->bits[i] / 64;
+		if (dword > high) {
+			array = realloc(array, (dword + 1) * 8);
+			if (!array)
+				exit(EXIT_FAILURE);
+			memset(array + high + 1, 0, (dword - high) * 8);
+			high = dword;
+		}
+		array[dword] |= 1ULL << bit;
+	}
+	printf("#define %s ", def->name);
+	for (i = 0; i <= high; i++)
+		printf("_AC(0x%016llx,UL)%c", array[i], i < high ? ',' : '\n');
+	free(array);
+}
+
+static void print_facility_lists(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < sizeof(facility_defs) / sizeof(facility_defs[0]); i++)
+		print_facility_list(&facility_defs[i]);
+}
+
+int main(int argc, char **argv)
+{
+	printf("#ifndef __ASM_S390_FACILITY_DEFS__\n");
+	printf("#define __ASM_S390_FACILITY_DEFS__\n");
+	printf("/*\n");
+	printf(" * DO NOT MODIFY.\n");
+	printf(" *\n");
+	printf(" * This file was generated by %s\n", __FILE__);
+	printf(" */\n\n");
+	printf("#include <linux/const.h>\n\n");
+	print_facility_lists();
+	printf("\n#endif\n");
+	return 0;
+}
diff --git a/arch/s390/tools/gen_opcode_table.c b/arch/s390/tools/gen_opcode_table.c
new file mode 100644
index 0000000000..a1bc02b29c
--- /dev/null
+++ b/arch/s390/tools/gen_opcode_table.c
@@ -0,0 +1,337 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Generate opcode table initializers for the in-kernel disassembler.
+ *
+ *    Copyright IBM Corp. 2017
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdio.h>
+
+#define STRING_SIZE_MAX 20
+
+struct insn_type {
+	unsigned char byte;
+	unsigned char mask;
+	char **format;
+};
+
+struct insn {
+	struct insn_type *type;
+	char opcode[STRING_SIZE_MAX];
+	char name[STRING_SIZE_MAX];
+	char upper[STRING_SIZE_MAX];
+	char format[STRING_SIZE_MAX];
+	unsigned int name_len;
+};
+
+struct insn_group {
+	struct insn_type *type;
+	int offset;
+	int count;
+	char opcode[2];
+};
+
+struct insn_format {
+	char *format;
+	int type;
+};
+
+struct gen_opcode {
+	struct insn *insn;
+	int nr;
+	struct insn_group *group;
+	int nr_groups;
+};
+
+/*
+ * Table of instruction format types. Each opcode is defined with at
+ * least one byte (two nibbles), three nibbles, or two bytes (four
+ * nibbles).
+ * The byte member of each instruction format type entry defines
+ * within which byte of an instruction the third (and fourth) nibble
+ * of an opcode can be found. The mask member is the and-mask that
+ * needs to be applied on this byte in order to get the third (and
+ * fourth) nibble of the opcode.
+ * The format array defines all instruction formats (as defined in the
+ * Principles of Operation) which have the same position of the opcode
+ * nibbles.
+ * A special case are instruction formats with 1-byte opcodes. In this
+ * case the byte member always is zero, so that the mask is applied on
+ * the (only) byte that contains the opcode.
+ */
+static struct insn_type insn_type_table[] = {
+	{
+		.byte = 0,
+		.mask = 0xff,
+		.format = (char *[]) {
+			"MII",
+			"RR",
+			"RS",
+			"RSI",
+			"RX",
+			"SI",
+			"SMI",
+			"SS",
+			NULL,
+		},
+	},
+	{
+		.byte = 1,
+		.mask = 0x0f,
+		.format = (char *[]) {
+			"RI",
+			"RIL",
+			"SSF",
+			NULL,
+		},
+	},
+	{
+		.byte = 1,
+		.mask = 0xff,
+		.format = (char *[]) {
+			"E",
+			"IE",
+			"RRE",
+			"RRF",
+			"RRR",
+			"S",
+			"SIL",
+			"SSE",
+			NULL,
+		},
+	},
+	{
+		.byte = 5,
+		.mask = 0xff,
+		.format = (char *[]) {
+			"RIE",
+			"RIS",
+			"RRS",
+			"RSE",
+			"RSL",
+			"RSY",
+			"RXE",
+			"RXF",
+			"RXY",
+			"SIY",
+			"VRI",
+			"VRR",
+			"VRS",
+			"VRV",
+			"VRX",
+			"VSI",
+			NULL,
+		},
+	},
+};
+
+static struct insn_type *insn_format_to_type(char *format)
+{
+	char tmp[STRING_SIZE_MAX];
+	char *base_format, **ptr;
+	int i;
+
+	strcpy(tmp, format);
+	base_format = tmp;
+	base_format = strsep(&base_format, "_");
+	for (i = 0; i < sizeof(insn_type_table) / sizeof(insn_type_table[0]); i++) {
+		ptr = insn_type_table[i].format;
+		while (*ptr) {
+			if (!strcmp(base_format, *ptr))
+				return &insn_type_table[i];
+			ptr++;
+		}
+	}
+	exit(EXIT_FAILURE);
+}
+
+static void read_instructions(struct gen_opcode *desc)
+{
+	struct insn insn;
+	int rc, i;
+
+	while (1) {
+		rc = scanf("%s %s %s", insn.opcode, insn.name, insn.format);
+		if (rc == EOF)
+			break;
+		if (rc != 3)
+			exit(EXIT_FAILURE);
+		insn.type = insn_format_to_type(insn.format);
+		insn.name_len = strlen(insn.name);
+		for (i = 0; i <= insn.name_len; i++)
+			insn.upper[i] = toupper((unsigned char)insn.name[i]);
+		desc->nr++;
+		desc->insn = realloc(desc->insn, desc->nr * sizeof(*desc->insn));
+		if (!desc->insn)
+			exit(EXIT_FAILURE);
+		desc->insn[desc->nr - 1] = insn;
+	}
+}
+
+static int cmpformat(const void *a, const void *b)
+{
+	return strcmp(((struct insn *)a)->format, ((struct insn *)b)->format);
+}
+
+static void print_formats(struct gen_opcode *desc)
+{
+	char *format;
+	int i, count;
+
+	qsort(desc->insn, desc->nr, sizeof(*desc->insn), cmpformat);
+	format = "";
+	count = 0;
+	printf("enum {\n");
+	for (i = 0; i < desc->nr; i++) {
+		if (!strcmp(format, desc->insn[i].format))
+			continue;
+		count++;
+		format = desc->insn[i].format;
+		printf("\tINSTR_%s,\n", format);
+	}
+	printf("}; /* %d */\n\n", count);
+}
+
+static int cmp_long_insn(const void *a, const void *b)
+{
+	return strcmp(((struct insn *)a)->name, ((struct insn *)b)->name);
+}
+
+static void print_long_insn(struct gen_opcode *desc)
+{
+	struct insn *insn;
+	int i, count;
+
+	qsort(desc->insn, desc->nr, sizeof(*desc->insn), cmp_long_insn);
+	count = 0;
+	printf("enum {\n");
+	for (i = 0; i < desc->nr; i++) {
+		insn = &desc->insn[i];
+		if (insn->name_len < 6)
+			continue;
+		printf("\tLONG_INSN_%s,\n", insn->upper);
+		count++;
+	}
+	printf("}; /* %d */\n\n", count);
+
+	printf("#define LONG_INSN_INITIALIZER { \\\n");
+	for (i = 0; i < desc->nr; i++) {
+		insn = &desc->insn[i];
+		if (insn->name_len < 6)
+			continue;
+		printf("\t[LONG_INSN_%s] = \"%s\", \\\n", insn->upper, insn->name);
+	}
+	printf("}\n\n");
+}
+
+static void print_opcode(struct insn *insn, int nr)
+{
+	char *opcode;
+
+	opcode = insn->opcode;
+	if (insn->type->byte != 0)
+		opcode += 2;
+	printf("\t[%4d] = { .opfrag = 0x%s, .format = INSTR_%s, ", nr, opcode, insn->format);
+	if (insn->name_len < 6)
+		printf(".name = \"%s\" ", insn->name);
+	else
+		printf(".offset = LONG_INSN_%s ", insn->upper);
+	printf("}, \\\n");
+}
+
+static void add_to_group(struct gen_opcode *desc, struct insn *insn, int offset)
+{
+	struct insn_group *group;
+
+	group = desc->group ? &desc->group[desc->nr_groups - 1] : NULL;
+	if (group && (!strncmp(group->opcode, insn->opcode, 2) || group->type->byte == 0)) {
+		group->count++;
+		return;
+	}
+	desc->nr_groups++;
+	desc->group = realloc(desc->group, desc->nr_groups * sizeof(*desc->group));
+	if (!desc->group)
+		exit(EXIT_FAILURE);
+	group = &desc->group[desc->nr_groups - 1];
+	memcpy(group->opcode, insn->opcode, 2);
+	group->type = insn->type;
+	group->offset = offset;
+	group->count = 1;
+}
+
+static int cmpopcode(const void *a, const void *b)
+{
+	return strcmp(((struct insn *)a)->opcode, ((struct insn *)b)->opcode);
+}
+
+static void print_opcode_table(struct gen_opcode *desc)
+{
+	char opcode[2] = "";
+	struct insn *insn;
+	int i, offset;
+
+	qsort(desc->insn, desc->nr, sizeof(*desc->insn), cmpopcode);
+	printf("#define OPCODE_TABLE_INITIALIZER { \\\n");
+	offset = 0;
+	for (i = 0; i < desc->nr; i++) {
+		insn = &desc->insn[i];
+		if (insn->type->byte == 0)
+			continue;
+		add_to_group(desc, insn, offset);
+		if (strncmp(opcode, insn->opcode, 2)) {
+			memcpy(opcode, insn->opcode, 2);
+			printf("\t/* %.2s */ \\\n", opcode);
+		}
+		print_opcode(insn, offset);
+		offset++;
+	}
+	printf("\t/* 1-byte opcode instructions */ \\\n");
+	for (i = 0; i < desc->nr; i++) {
+		insn = &desc->insn[i];
+		if (insn->type->byte != 0)
+			continue;
+		add_to_group(desc, insn, offset);
+		print_opcode(insn, offset);
+		offset++;
+	}
+	printf("}\n\n");
+}
+
+static void print_opcode_table_offsets(struct gen_opcode *desc)
+{
+	struct insn_group *group;
+	int i;
+
+	printf("#define OPCODE_OFFSET_INITIALIZER { \\\n");
+	for (i = 0; i < desc->nr_groups; i++) {
+		group = &desc->group[i];
+		printf("\t{ .opcode = 0x%.2s, .mask = 0x%02x, .byte = %d, .offset = %d, .count = %d }, \\\n",
+		       group->opcode, group->type->mask, group->type->byte, group->offset, group->count);
+	}
+	printf("}\n\n");
+}
+
+int main(int argc, char **argv)
+{
+	struct gen_opcode _desc = { 0 };
+	struct gen_opcode *desc = &_desc;
+
+	read_instructions(desc);
+	printf("#ifndef __S390_GENERATED_DIS_DEFS_H__\n");
+	printf("#define __S390_GENERATED_DIS_DEFS_H__\n");
+	printf("/*\n");
+	printf(" * DO NOT MODIFY.\n");
+	printf(" *\n");
+	printf(" * This file was generated by %s\n", __FILE__);
+	printf(" */\n\n");
+	print_formats(desc);
+	print_long_insn(desc);
+	print_opcode_table(desc);
+	print_opcode_table_offsets(desc);
+	printf("#endif\n");
+	exit(EXIT_SUCCESS);
+}
diff --git a/arch/s390/tools/opcodes.txt b/arch/s390/tools/opcodes.txt
new file mode 100644
index 0000000000..5f008e7948
--- /dev/null
+++ b/arch/s390/tools/opcodes.txt
@@ -0,0 +1,1250 @@
+0000	illegal	E
+0002	brkpt	E
+0101	pr	E
+0102	upt	E
+0104	ptff	E
+0107	sckpf	E
+010a	pfpo	E
+010b	tam	E
+010c	sam24	E
+010d	sam31	E
+010e	sam64	E
+01ff	trap2	E
+04	spm	RR_R0
+05	balr	RR_RR
+06	bctr	RR_RR
+07	bcr	RR_UR
+0a	svc	RR_U0
+0b	bsm	RR_RR
+0c	bassm	RR_RR
+0d	basr	RR_RR
+0e	mvcl	RR_RR
+0f	clcl	RR_RR
+10	lpr	RR_RR
+11	lnr	RR_RR
+12	ltr	RR_RR
+13	lcr	RR_RR
+14	nr	RR_RR
+15	clr	RR_RR
+16	or	RR_RR
+17	xr	RR_RR
+18	lr	RR_RR
+19	cr	RR_RR
+1a	ar	RR_RR
+1b	sr	RR_RR
+1c	mr	RR_RR
+1d	dr	RR_RR
+1e	alr	RR_RR
+1f	slr	RR_RR
+20	lpdr	RR_FF
+21	lndr	RR_FF
+22	ltdr	RR_FF
+23	lcdr	RR_FF
+24	hdr	RR_FF
+25	ldxr	RR_FF
+26	mxr	RR_FF
+27	mxdr	RR_FF
+28	ldr	RR_FF
+29	cdr	RR_FF
+2a	adr	RR_FF
+2b	sdr	RR_FF
+2c	mdr	RR_FF
+2d	ddr	RR_FF
+2e	awr	RR_FF
+2f	swr	RR_FF
+30	lper	RR_FF
+31	lner	RR_FF
+32	lter	RR_FF
+33	lcer	RR_FF
+34	her	RR_FF
+35	ledr	RR_FF
+36	axr	RR_FF
+37	sxr	RR_FF
+38	ler	RR_FF
+39	cer	RR_FF
+3a	aer	RR_FF
+3b	ser	RR_FF
+3c	mder	RR_FF
+3d	der	RR_FF
+3e	aur	RR_FF
+3f	sur	RR_FF
+40	sth	RX_RRRD
+41	la	RX_RRRD
+42	stc	RX_RRRD
+43	ic	RX_RRRD
+44	ex	RX_RRRD
+45	bal	RX_RRRD
+46	bct	RX_RRRD
+47	bc	RX_URRD
+48	lh	RX_RRRD
+49	ch	RX_RRRD
+4a	ah	RX_RRRD
+4b	sh	RX_RRRD
+4c	mh	RX_RRRD
+4d	bas	RX_RRRD
+4e	cvd	RX_RRRD
+4f	cvb	RX_RRRD
+50	st	RX_RRRD
+51	lae	RX_RRRD
+54	n	RX_RRRD
+55	cl	RX_RRRD
+56	o	RX_RRRD
+57	x	RX_RRRD
+58	l	RX_RRRD
+59	c	RX_RRRD
+5a	a	RX_RRRD
+5b	s	RX_RRRD
+5c	m	RX_RRRD
+5d	d	RX_RRRD
+5e	al	RX_RRRD
+5f	sl	RX_RRRD
+60	std	RX_FRRD
+67	mxd	RX_FRRD
+68	ld	RX_FRRD
+69	cd	RX_FRRD
+6a	ad	RX_FRRD
+6b	sd	RX_FRRD
+6c	md	RX_FRRD
+6d	dd	RX_FRRD
+6e	aw	RX_FRRD
+6f	sw	RX_FRRD
+70	ste	RX_FRRD
+71	ms	RX_RRRD
+78	le	RX_FRRD
+79	ce	RX_FRRD
+7a	ae	RX_FRRD
+7b	se	RX_FRRD
+7c	mde	RX_FRRD
+7d	de	RX_FRRD
+7e	au	RX_FRRD
+7f	su	RX_FRRD
+80	ssm	SI_RD
+82	lpsw	SI_RD
+83	diag	RS_RRRD
+84	brxh	RSI_RRP
+85	brxle	RSI_RRP
+86	bxh	RS_RRRD
+87	bxle	RS_RRRD
+88	srl	RS_R0RD
+89	sll	RS_R0RD
+8a	sra	RS_R0RD
+8b	sla	RS_R0RD
+8c	srdl	RS_R0RD
+8d	sldl	RS_R0RD
+8e	srda	RS_R0RD
+8f	slda	RS_R0RD
+90	stm	RS_RRRD
+91	tm	SI_URD
+92	mvi	SI_URD
+93	ts	SI_RD
+94	ni	SI_URD
+95	cli	SI_URD
+96	oi	SI_URD
+97	xi	SI_URD
+98	lm	RS_RRRD
+99	trace	RS_RRRD
+9a	lam	RS_AARD
+9b	stam	RS_AARD
+a50	iihh	RI_RU
+a51	iihl	RI_RU
+a52	iilh	RI_RU
+a53	iill	RI_RU
+a54	nihh	RI_RU
+a55	nihl	RI_RU
+a56	nilh	RI_RU
+a57	nill	RI_RU
+a58	oihh	RI_RU
+a59	oihl	RI_RU
+a5a	oilh	RI_RU
+a5b	oill	RI_RU
+a5c	llihh	RI_RU
+a5d	llihl	RI_RU
+a5e	llilh	RI_RU
+a5f	llill	RI_RU
+a70	tmlh	RI_RU
+a71	tmll	RI_RU
+a72	tmhh	RI_RU
+a73	tmhl	RI_RU
+a74	brc	RI_UP
+a75	bras	RI_RP
+a76	brct	RI_RP
+a77	brctg	RI_RP
+a78	lhi	RI_RI
+a79	lghi	RI_RI
+a7a	ahi	RI_RI
+a7b	aghi	RI_RI
+a7c	mhi	RI_RI
+a7d	mghi	RI_RI
+a7e	chi	RI_RI
+a7f	cghi	RI_RI
+a8	mvcle	RS_RRRD
+a9	clcle	RS_RRRD
+aa0	rinext	RI_RI
+aa1	rion	RI_RI
+aa2	tric	RI_RI
+aa3	rioff	RI_RI
+aa4	riemit	RI_RI
+ac	stnsm	SI_URD
+ad	stosm	SI_URD
+ae	sigp	RS_RRRD
+af	mc	SI_URD
+b1	lra	RX_RRRD
+b200	lbear	S_RD
+b201	stbear	S_RD
+b202	stidp	S_RD
+b204	sck	S_RD
+b205	stck	S_RD
+b206	sckc	S_RD
+b207	stckc	S_RD
+b208	spt	S_RD
+b209	stpt	S_RD
+b20a	spka	S_RD
+b20b	ipk	S_00
+b20d	ptlb	S_00
+b210	spx	S_RD
+b211	stpx	S_RD
+b212	stap	S_RD
+b214	sie	S_RD
+b218	pc	S_RD
+b219	sac	S_RD
+b21a	cfc	S_RD
+b220	servc	RRE_RR
+b221	ipte	RRF_RURR
+b222	ipm	RRE_R0
+b223	ivsk	RRE_RR
+b224	iac	RRE_R0
+b225	ssar	RRE_R0
+b226	epar	RRE_R0
+b227	esar	RRE_R0
+b228	pt	RRE_RR
+b229	iske	RRE_RR
+b22a	rrbe	RRE_RR
+b22b	sske	RRF_U0RR
+b22c	tb	RRE_RR
+b22d	dxr	RRE_FF
+b22e	pgin	RRE_RR
+b22f	pgout	RRE_RR
+b230	csch	S_00
+b231	hsch	S_00
+b232	msch	S_RD
+b233	ssch	S_RD
+b234	stsch	S_RD
+b235	tsch	S_RD
+b236	tpi	S_RD
+b237	sal	S_00
+b238	rsch	S_00
+b239	stcrw	S_RD
+b23a	stcps	S_RD
+b23b	rchp	S_00
+b23c	schm	S_00
+b240	bakr	RRE_RR
+b241	cksm	RRE_RR
+b244	sqdr	RRE_FF
+b245	sqer	RRE_FF
+b246	stura	RRE_RR
+b247	msta	RRE_R0
+b248	palb	RRE_00
+b249	ereg	RRE_RR
+b24a	esta	RRE_RR
+b24b	lura	RRE_RR
+b24c	tar	RRE_AR
+b24d	cpya	RRE_AA
+b24e	sar	RRE_AR
+b24f	ear	RRE_RA
+b250	csp	RRE_RR
+b252	msr	RRE_RR
+b254	mvpg	RRE_RR
+b255	mvst	RRE_RR
+b256	sthyi	RRE_RR
+b257	cuse	RRE_RR
+b258	bsg	RRE_RR
+b25a	bsa	RRE_RR
+b25d	clst	RRE_RR
+b25e	srst	RRE_RR
+b25f	chsc	RRE_R0
+b263	cmpsc	RRE_RR
+b274	siga	S_RD
+b276	xsch	S_00
+b277	rp	S_RD
+b278	stcke	S_RD
+b279	sacf	S_RD
+b27c	stckf	S_RD
+b27d	stsi	S_RD
+b280	lpp	S_RD
+b284	lcctl	S_RD
+b285	lpctl	S_RD
+b286	qsi	S_RD
+b287	lsctl	S_RD
+b28e	qctri	S_RD
+b28f	qpaci	S_RD
+b299	srnm	S_RD
+b29c	stfpc	S_RD
+b29d	lfpc	S_RD
+b2a5	tre	RRE_RR
+b2a6	cu21	RRF_U0RR
+b2a7	cu12	RRF_U0RR
+b2ad	nqap	RRE_RR
+b2ae	dqap	RRE_RR
+b2af	pqap	RRE_RR
+b2b0	stfle	S_RD
+b2b1	stfl	S_RD
+b2b2	lpswe	S_RD
+b2b8	srnmb	S_RD
+b2b9	srnmt	S_RD
+b2bd	lfas	S_RD
+b2e0	scctr	RRE_RR
+b2e1	spctr	RRE_RR
+b2e4	ecctr	RRE_RR
+b2e5	epctr	RRE_RR
+b2e8	ppa	RRF_U0RR
+b2ec	etnd	RRE_R0
+b2ed	ecpga	RRE_RR
+b2f0	iucv	RRE_RR
+b2f8	tend	S_00
+b2fa	niai	IE_UU
+b2fc	tabort	S_RD
+b2ff	trap4	S_RD
+b300	lpebr	RRE_FF
+b301	lnebr	RRE_FF
+b302	ltebr	RRE_FF
+b303	lcebr	RRE_FF
+b304	ldebr	RRE_FF
+b305	lxdbr	RRE_FF
+b306	lxebr	RRE_FF
+b307	mxdbr	RRE_FF
+b308	kebr	RRE_FF
+b309	cebr	RRE_FF
+b30a	aebr	RRE_FF
+b30b	sebr	RRE_FF
+b30c	mdebr	RRE_FF
+b30d	debr	RRE_FF
+b30e	maebr	RRF_F0FF
+b30f	msebr	RRF_F0FF
+b310	lpdbr	RRE_FF
+b311	lndbr	RRE_FF
+b312	ltdbr	RRE_FF
+b313	lcdbr	RRE_FF
+b314	sqebr	RRE_FF
+b315	sqdbr	RRE_FF
+b316	sqxbr	RRE_FF
+b317	meebr	RRE_FF
+b318	kdbr	RRE_FF
+b319	cdbr	RRE_FF
+b31a	adbr	RRE_FF
+b31b	sdbr	RRE_FF
+b31c	mdbr	RRE_FF
+b31d	ddbr	RRE_FF
+b31e	madbr	RRF_F0FF
+b31f	msdbr	RRF_F0FF
+b324	lder	RRE_FF
+b325	lxdr	RRE_FF
+b326	lxer	RRE_FF
+b32e	maer	RRF_F0FF
+b32f	mser	RRF_F0FF
+b336	sqxr	RRE_FF
+b337	meer	RRE_FF
+b338	maylr	RRF_F0FF
+b339	mylr	RRF_F0FF
+b33a	mayr	RRF_F0FF
+b33b	myr	RRF_F0FF
+b33c	mayhr	RRF_F0FF
+b33d	myhr	RRF_F0FF
+b33e	madr	RRF_F0FF
+b33f	msdr	RRF_F0FF
+b340	lpxbr	RRE_FF
+b341	lnxbr	RRE_FF
+b342	ltxbr	RRE_FF
+b343	lcxbr	RRE_FF
+b344	ledbra	RRF_UUFF
+b345	ldxbra	RRF_UUFF
+b346	lexbra	RRF_UUFF
+b347	fixbra	RRF_UUFF
+b348	kxbr	RRE_FF
+b349	cxbr	RRE_FF
+b34a	axbr	RRE_FF
+b34b	sxbr	RRE_FF
+b34c	mxbr	RRE_FF
+b34d	dxbr	RRE_FF
+b350	tbedr	RRF_U0FF
+b351	tbdr	RRF_U0FF
+b353	diebr	RRF_FUFF
+b357	fiebra	RRF_UUFF
+b358	thder	RRE_FF
+b359	thdr	RRE_FF
+b35b	didbr	RRF_FUFF
+b35f	fidbra	RRF_UUFF
+b360	lpxr	RRE_FF
+b361	lnxr	RRE_FF
+b362	ltxr	RRE_FF
+b363	lcxr	RRE_FF
+b365	lxr	RRE_FF
+b366	lexr	RRE_FF
+b367	fixr	RRE_FF
+b369	cxr	RRE_FF
+b370	lpdfr	RRE_FF
+b371	lndfr	RRE_FF
+b372	cpsdr	RRF_F0FF2
+b373	lcdfr	RRE_FF
+b374	lzer	RRE_F0
+b375	lzdr	RRE_F0
+b376	lzxr	RRE_F0
+b377	fier	RRE_FF
+b37f	fidr	RRE_FF
+b384	sfpc	RRE_RR
+b385	sfasr	RRE_R0
+b38c	efpc	RRE_RR
+b390	celfbr	RRF_UUFR
+b391	cdlfbr	RRF_UUFR
+b392	cxlfbr	RRF_UUFR
+b394	cefbra	RRF_UUFR
+b395	cdfbra	RRF_UUFR
+b396	cxfbra	RRF_UUFR
+b398	cfebra	RRF_UURF
+b399	cfdbra	RRF_UURF
+b39a	cfxbra	RRF_UURF
+b39c	clfebr	RRF_UURF
+b39d	clfdbr	RRF_UURF
+b39e	clfxbr	RRF_UURF
+b3a0	celgbr	RRF_UUFR
+b3a1	cdlgbr	RRF_UUFR
+b3a2	cxlgbr	RRF_UUFR
+b3a4	cegbra	RRF_UUFR
+b3a5	cdgbra	RRF_UUFR
+b3a6	cxgbra	RRF_UUFR
+b3a8	cgebra	RRF_UURF
+b3a9	cgdbra	RRF_UURF
+b3aa	cgxbra	RRF_UURF
+b3ac	clgebr	RRF_UURF
+b3ad	clgdbr	RRF_UURF
+b3ae	clgxbr	RRF_UURF
+b3b4	cefr	RRE_FR
+b3b5	cdfr	RRE_FR
+b3b6	cxfr	RRE_FR
+b3b8	cfer	RRF_U0RF
+b3b9	cfdr	RRF_U0RF
+b3ba	cfxr	RRF_U0RF
+b3c1	ldgr	RRE_FR
+b3c4	cegr	RRE_FR
+b3c5	cdgr	RRE_FR
+b3c6	cxgr	RRE_FR
+b3c8	cger	RRF_U0RF
+b3c9	cgdr	RRF_U0RF
+b3ca	cgxr	RRF_U0RF
+b3cd	lgdr	RRE_RF
+b3d0	mdtra	RRF_FUFF2
+b3d1	ddtra	RRF_FUFF2
+b3d2	adtra	RRF_FUFF2
+b3d3	sdtra	RRF_FUFF2
+b3d4	ldetr	RRF_0UFF
+b3d5	ledtr	RRF_UUFF
+b3d6	ltdtr	RRE_FF
+b3d7	fidtr	RRF_UUFF
+b3d8	mxtra	RRF_FUFF2
+b3d9	dxtra	RRF_FUFF2
+b3da	axtra	RRF_FUFF2
+b3db	sxtra	RRF_FUFF2
+b3dc	lxdtr	RRF_0UFF
+b3dd	ldxtr	RRF_UUFF
+b3de	ltxtr	RRE_FF
+b3df	fixtr	RRF_UUFF
+b3e0	kdtr	RRE_FF
+b3e1	cgdtra	RRF_UURF
+b3e2	cudtr	RRE_RF
+b3e3	csdtr	RRF_0URF
+b3e4	cdtr	RRE_FF
+b3e5	eedtr	RRE_RF
+b3e7	esdtr	RRE_RF
+b3e8	kxtr	RRE_FF
+b3e9	cgxtra	RRF_UURF
+b3ea	cuxtr	RRE_RF
+b3eb	csxtr	RRF_0URF
+b3ec	cxtr	RRE_FF
+b3ed	eextr	RRE_RF
+b3ef	esxtr	RRE_RF
+b3f1	cdgtra	RRF_UUFR
+b3f2	cdutr	RRE_FR
+b3f3	cdstr	RRE_FR
+b3f4	cedtr	RRE_FF
+b3f5	qadtr	RRF_FUFF
+b3f6	iedtr	RRF_F0FR
+b3f7	rrdtr	RRF_FFRU
+b3f9	cxgtra	RRF_UUFR
+b3fa	cxutr	RRE_FR
+b3fb	cxstr	RRE_FR
+b3fc	cextr	RRE_FF
+b3fd	qaxtr	RRF_FUFF
+b3fe	iextr	RRF_F0FR
+b3ff	rrxtr	RRF_FFRU
+b6	stctl	RS_CCRD
+b7	lctl	RS_CCRD
+b900	lpgr	RRE_RR
+b901	lngr	RRE_RR
+b902	ltgr	RRE_RR
+b903	lcgr	RRE_RR
+b904	lgr	RRE_RR
+b905	lurag	RRE_RR
+b906	lgbr	RRE_RR
+b907	lghr	RRE_RR
+b908	agr	RRE_RR
+b909	sgr	RRE_RR
+b90a	algr	RRE_RR
+b90b	slgr	RRE_RR
+b90c	msgr	RRE_RR
+b90d	dsgr	RRE_RR
+b90e	eregg	RRE_RR
+b90f	lrvgr	RRE_RR
+b910	lpgfr	RRE_RR
+b911	lngfr	RRE_RR
+b912	ltgfr	RRE_RR
+b913	lcgfr	RRE_RR
+b914	lgfr	RRE_RR
+b916	llgfr	RRE_RR
+b917	llgtr	RRE_RR
+b918	agfr	RRE_RR
+b919	sgfr	RRE_RR
+b91a	algfr	RRE_RR
+b91b	slgfr	RRE_RR
+b91c	msgfr	RRE_RR
+b91d	dsgfr	RRE_RR
+b91e	kmac	RRE_RR
+b91f	lrvr	RRE_RR
+b920	cgr	RRE_RR
+b921	clgr	RRE_RR
+b925	sturg	RRE_RR
+b926	lbr	RRE_RR
+b927	lhr	RRE_RR
+b928	pckmo	RRE_00
+b929	kma	RRF_R0RR
+b92a	kmf	RRE_RR
+b92b	kmo	RRE_RR
+b92c	pcc	RRE_00
+b92d	kmctr	RRF_R0RR
+b92e	km	RRE_RR
+b92f	kmc	RRE_RR
+b930	cgfr	RRE_RR
+b931	clgfr	RRE_RR
+b938	sortl	RRE_RR
+b939	dfltcc	RRF_R0RR2
+b93a	kdsa	RRE_RR
+b93b	nnpa	RRE_00
+b93c	ppno	RRE_RR
+b93e	kimd	RRE_RR
+b93f	klmd	RRE_RR
+b941	cfdtr	RRF_UURF
+b942	clgdtr	RRF_UURF
+b943	clfdtr	RRF_UURF
+b946	bctgr	RRE_RR
+b949	cfxtr	RRF_UURF
+b94a	clgxtr	RRF_UURF
+b94b	clfxtr	RRF_UURF
+b951	cdftr	RRF_UUFR
+b952	cdlgtr	RRF_UUFR
+b953	cdlftr	RRF_UUFR
+b959	cxftr	RRF_UUFR
+b95a	cxlgtr	RRF_UUFR
+b95b	cxlftr	RRF_UUFR
+b960	cgrt	RRF_U0RR
+b961	clgrt	RRF_U0RR
+b964	nngrk	RRF_R0RR2
+b965	ocgrk	RRF_R0RR2
+b966	nogrk	RRF_R0RR2
+b967	nxgrk	RRF_R0RR2
+b972	crt	RRF_U0RR
+b973	clrt	RRF_U0RR
+b974	nnrk	RRF_R0RR2
+b975	ocrk	RRF_R0RR2
+b976	nork	RRF_R0RR2
+b977	nxrk	RRF_R0RR2
+b980	ngr	RRE_RR
+b981	ogr	RRE_RR
+b982	xgr	RRE_RR
+b983	flogr	RRE_RR
+b984	llgcr	RRE_RR
+b985	llghr	RRE_RR
+b986	mlgr	RRE_RR
+b987	dlgr	RRE_RR
+b988	alcgr	RRE_RR
+b989	slbgr	RRE_RR
+b98a	cspg	RRE_RR
+b98b	rdp	RRF_RURR2
+b98d	epsw	RRE_RR
+b98e	idte	RRF_RURR2
+b98f	crdte	RRF_RURR2
+b990	trtt	RRF_U0RR
+b991	trto	RRF_U0RR
+b992	trot	RRF_U0RR
+b993	troo	RRF_U0RR
+b994	llcr	RRE_RR
+b995	llhr	RRE_RR
+b996	mlr	RRE_RR
+b997	dlr	RRE_RR
+b998	alcr	RRE_RR
+b999	slbr	RRE_RR
+b99a	epair	RRE_R0
+b99b	esair	RRE_R0
+b99c	eqbs	RRF_U0RR
+b99d	esea	RRE_R0
+b99e	pti	RRE_RR
+b99f	ssair	RRE_R0
+b9a0	clp	RRF_U0RR
+b9a1	tpei	RRE_RR
+b9a2	ptf	RRE_R0
+b9a4	uvc	RRF_URR
+b9aa	lptea	RRF_RURR2
+b9ab	essa	RRF_U0RR
+b9ac	irbm	RRE_RR
+b9ae	rrbm	RRE_RR
+b9af	pfmf	RRE_RR
+b9b0	cu14	RRF_U0RR
+b9b1	cu24	RRF_U0RR
+b9b2	cu41	RRE_RR
+b9b3	cu42	RRE_RR
+b9bd	trtre	RRF_U0RR
+b9be	srstu	RRE_RR
+b9bf	trte	RRF_U0RR
+b9c0	selfhr	RRF_RURR
+b9c8	ahhhr	RRF_R0RR2
+b9c9	shhhr	RRF_R0RR2
+b9ca	alhhhr	RRF_R0RR2
+b9cb	slhhhr	RRF_R0RR2
+b9cd	chhr	RRE_RR
+b9cf	clhhr	RRE_RR
+b9d0	pcistg	RRE_RR
+b9d2	pcilg	RRE_RR
+b9d3	rpcit	RRE_RR
+b9d4	pcistgi	RRE_RR
+b9d5	pciwb	RRE_00
+b9d6	pcilgi	RRE_RR
+b9d8	ahhlr	RRF_R0RR2
+b9d9	shhlr	RRF_R0RR2
+b9da	alhhlr	RRF_R0RR2
+b9db	slhhlr	RRF_R0RR2
+b9dd	chlr	RRE_RR
+b9df	clhlr	RRE_RR
+b9e0	locfhr	RRF_U0RR
+b9e1	popcnt	RRF_U0RR
+b9e2	locgr	RRF_U0RR
+b9e3	selgr	RRF_RURR
+b9e4	ngrk	RRF_R0RR2
+b9e5	ncgrk	RRF_R0RR2
+b9e6	ogrk	RRF_R0RR2
+b9e7	xgrk	RRF_R0RR2
+b9e8	agrk	RRF_R0RR2
+b9e9	sgrk	RRF_R0RR2
+b9ea	algrk	RRF_R0RR2
+b9eb	slgrk	RRF_R0RR2
+b9ec	mgrk	RRF_R0RR2
+b9ed	msgrkc	RRF_R0RR2
+b9f0	selr	RRF_RURR
+b9f2	locr	RRF_U0RR
+b9f4	nrk	RRF_R0RR2
+b9f5	ncrk	RRF_R0RR2
+b9f6	ork	RRF_R0RR2
+b9f7	xrk	RRF_R0RR2
+b9f8	ark	RRF_R0RR2
+b9f9	srk	RRF_R0RR2
+b9fa	alrk	RRF_R0RR2
+b9fb	slrk	RRF_R0RR2
+b9fd	msrkc	RRF_R0RR2
+ba	cs	RS_RRRD
+bb	cds	RS_RRRD
+bd	clm	RS_RURD
+be	stcm	RS_RURD
+bf	icm	RS_RURD
+c00	larl	RIL_RP
+c01	lgfi	RIL_RI
+c04	brcl	RIL_UP
+c05	brasl	RIL_RP
+c06	xihf	RIL_RU
+c07	xilf	RIL_RU
+c08	iihf	RIL_RU
+c09	iilf	RIL_RU
+c0a	nihf	RIL_RU
+c0b	nilf	RIL_RU
+c0c	oihf	RIL_RU
+c0d	oilf	RIL_RU
+c0e	llihf	RIL_RU
+c0f	llilf	RIL_RU
+c20	msgfi	RIL_RI
+c21	msfi	RIL_RI
+c24	slgfi	RIL_RU
+c25	slfi	RIL_RU
+c28	agfi	RIL_RI
+c29	afi	RIL_RI
+c2a	algfi	RIL_RU
+c2b	alfi	RIL_RU
+c2c	cgfi	RIL_RI
+c2d	cfi	RIL_RI
+c2e	clgfi	RIL_RU
+c2f	clfi	RIL_RU
+c42	llhrl	RIL_RP
+c44	lghrl	RIL_RP
+c45	lhrl	RIL_RP
+c46	llghrl	RIL_RP
+c47	sthrl	RIL_RP
+c48	lgrl	RIL_RP
+c4b	stgrl	RIL_RP
+c4c	lgfrl	RIL_RP
+c4d	lrl	RIL_RP
+c4e	llgfrl	RIL_RP
+c4f	strl	RIL_RP
+c5	bprp	MII_UPP
+c60	exrl	RIL_RP
+c62	pfdrl	RIL_UP
+c64	cghrl	RIL_RP
+c65	chrl	RIL_RP
+c66	clghrl	RIL_RP
+c67	clhrl	RIL_RP
+c68	cgrl	RIL_RP
+c6a	clgrl	RIL_RP
+c6c	cgfrl	RIL_RP
+c6d	crl	RIL_RP
+c6e	clgfrl	RIL_RP
+c6f	clrl	RIL_RP
+c7	bpp	SMI_U0RDP
+c80	mvcos	SSF_RRDRD
+c81	ectg	SSF_RRDRD
+c82	csst	SSF_RRDRD
+c84	lpd	SSF_RRDRD2
+c85	lpdg	SSF_RRDRD2
+cc6	brcth	RIL_RP
+cc8	aih	RIL_RI
+cca	alsih	RIL_RI
+ccb	alsihn	RIL_RI
+ccd	cih	RIL_RI
+ccf	clih	RIL_RU
+d0	trtr	SS_L0RDRD
+d1	mvn	SS_L0RDRD
+d2	mvc	SS_L0RDRD
+d3	mvz	SS_L0RDRD
+d4	nc	SS_L0RDRD
+d5	clc	SS_L0RDRD
+d6	oc	SS_L0RDRD
+d7	xc	SS_L0RDRD
+d9	mvck	SS_RRRDRD
+da	mvcp	SS_RRRDRD
+db	mvcs	SS_RRRDRD
+dc	tr	SS_L0RDRD
+dd	trt	SS_L0RDRD
+de	ed	SS_L0RDRD
+df	edmk	SS_L0RDRD
+e1	pku	SS_L2RDRD
+e2	unpku	SS_L0RDRD
+e302	ltg	RXY_RRRD
+e303	lrag	RXY_RRRD
+e304	lg	RXY_RRRD
+e306	cvby	RXY_RRRD
+e308	ag	RXY_RRRD
+e309	sg	RXY_RRRD
+e30a	alg	RXY_RRRD
+e30b	slg	RXY_RRRD
+e30c	msg	RXY_RRRD
+e30d	dsg	RXY_RRRD
+e30e	cvbg	RXY_RRRD
+e30f	lrvg	RXY_RRRD
+e312	lt	RXY_RRRD
+e313	lray	RXY_RRRD
+e314	lgf	RXY_RRRD
+e315	lgh	RXY_RRRD
+e316	llgf	RXY_RRRD
+e317	llgt	RXY_RRRD
+e318	agf	RXY_RRRD
+e319	sgf	RXY_RRRD
+e31a	algf	RXY_RRRD
+e31b	slgf	RXY_RRRD
+e31c	msgf	RXY_RRRD
+e31d	dsgf	RXY_RRRD
+e31e	lrv	RXY_RRRD
+e31f	lrvh	RXY_RRRD
+e320	cg	RXY_RRRD
+e321	clg	RXY_RRRD
+e324	stg	RXY_RRRD
+e325	ntstg	RXY_RRRD
+e326	cvdy	RXY_RRRD
+e32a	lzrg	RXY_RRRD
+e32e	cvdg	RXY_RRRD
+e32f	strvg	RXY_RRRD
+e330	cgf	RXY_RRRD
+e331	clgf	RXY_RRRD
+e332	ltgf	RXY_RRRD
+e334	cgh	RXY_RRRD
+e336	pfd	RXY_URRD
+e338	agh	RXY_RRRD
+e339	sgh	RXY_RRRD
+e33a	llzrgf	RXY_RRRD
+e33b	lzrf	RXY_RRRD
+e33c	mgh	RXY_RRRD
+e33e	strv	RXY_RRRD
+e33f	strvh	RXY_RRRD
+e346	bctg	RXY_RRRD
+e347	bic	RXY_URRD
+e348	llgfsg	RXY_RRRD
+e349	stgsc	RXY_RRRD
+e34c	lgg	RXY_RRRD
+e34d	lgsc	RXY_RRRD
+e350	sty	RXY_RRRD
+e351	msy	RXY_RRRD
+e353	msc	RXY_RRRD
+e354	ny	RXY_RRRD
+e355	cly	RXY_RRRD
+e356	oy	RXY_RRRD
+e357	xy	RXY_RRRD
+e358	ly	RXY_RRRD
+e359	cy	RXY_RRRD
+e35a	ay	RXY_RRRD
+e35b	sy	RXY_RRRD
+e35c	mfy	RXY_RRRD
+e35e	aly	RXY_RRRD
+e35f	sly	RXY_RRRD
+e370	sthy	RXY_RRRD
+e371	lay	RXY_RRRD
+e372	stcy	RXY_RRRD
+e373	icy	RXY_RRRD
+e375	laey	RXY_RRRD
+e376	lb	RXY_RRRD
+e377	lgb	RXY_RRRD
+e378	lhy	RXY_RRRD
+e379	chy	RXY_RRRD
+e37a	ahy	RXY_RRRD
+e37b	shy	RXY_RRRD
+e37c	mhy	RXY_RRRD
+e380	ng	RXY_RRRD
+e381	og	RXY_RRRD
+e382	xg	RXY_RRRD
+e383	msgc	RXY_RRRD
+e384	mg	RXY_RRRD
+e385	lgat	RXY_RRRD
+e386	mlg	RXY_RRRD
+e387	dlg	RXY_RRRD
+e388	alcg	RXY_RRRD
+e389	slbg	RXY_RRRD
+e38e	stpq	RXY_RRRD
+e38f	lpq	RXY_RRRD
+e390	llgc	RXY_RRRD
+e391	llgh	RXY_RRRD
+e394	llc	RXY_RRRD
+e395	llh	RXY_RRRD
+e396	ml	RXY_RRRD
+e397	dl	RXY_RRRD
+e398	alc	RXY_RRRD
+e399	slb	RXY_RRRD
+e39c	llgtat	RXY_RRRD
+e39d	llgfat	RXY_RRRD
+e39f	lat	RXY_RRRD
+e3c0	lbh	RXY_RRRD
+e3c2	llch	RXY_RRRD
+e3c3	stch	RXY_RRRD
+e3c4	lhh	RXY_RRRD
+e3c6	llhh	RXY_RRRD
+e3c7	sthh	RXY_RRRD
+e3c8	lfhat	RXY_RRRD
+e3ca	lfh	RXY_RRRD
+e3cb	stfh	RXY_RRRD
+e3cd	chf	RXY_RRRD
+e3cf	clhf	RXY_RRRD
+e3d0	mpcifc	RXY_RRRD
+e3d4	stpcifc	RXY_RRRD
+e500	lasp	SSE_RDRD
+e501	tprot	SSE_RDRD
+e502	strag	SSE_RDRD
+e50a	mvcrl	SSE_RDRD
+e50e	mvcsk	SSE_RDRD
+e50f	mvcdk	SSE_RDRD
+e544	mvhhi	SIL_RDI
+e548	mvghi	SIL_RDI
+e54c	mvhi	SIL_RDI
+e554	chhsi	SIL_RDI
+e555	clhhsi	SIL_RDU
+e558	cghsi	SIL_RDI
+e559	clghsi	SIL_RDU
+e55c	chsi	SIL_RDI
+e55d	clfhsi	SIL_RDU
+e560	tbegin	SIL_RDU
+e561	tbeginc	SIL_RDU
+e601	vlebrh	VRX_VRRDU
+e602	vlebrg	VRX_VRRDU
+e603	vlebrf	VRX_VRRDU
+e604	vllebrz	VRX_VRRDU
+e605	vlbrrep	VRX_VRRDU
+e606	vlbr	VRX_VRRDU
+e607	vler	VRX_VRRDU
+e609	vstebrh	VRX_VRRDU
+e60a	vstebrg	VRX_VRRDU
+e60b	vstebrf	VRX_VRRDU
+e60e	vstbr	VRX_VRRDU
+e60f	vster	VRX_VRRDU
+e634	vpkz	VSI_URDV
+e635	vlrl	VSI_URDV
+e637	vlrlr	VRS_RRDV
+e63c	vupkz	VSI_URDV
+e63d	vstrl	VSI_URDV
+e63f	vstrlr	VRS_RRDV
+e649	vlip	VRI_V0UU2
+e650	vcvb	VRR_RV0UU
+e651	vclzdp	VRR_VV0U2
+e652	vcvbg	VRR_RV0UU
+e654	vupkzh	VRR_VV0U2
+e655	vcnf	VRR_VV0UU2
+e656	vclfnh	VRR_VV0UU2
+e658	vcvd	VRI_VR0UU
+e659	vsrp	VRI_VVUUU2
+e65a	vcvdg	VRI_VR0UU
+e65b	vpsop	VRI_VVUUU2
+e65c	vupkzl	VRR_VV0U2
+e65d	vcfn	VRR_VV0UU2
+e65e	vclfnl	VRR_VV0UU2
+e65f	vtp	VRR_0V
+e670	vpkzr	VRI_VVV0UU2
+e671	vap	VRI_VVV0UU2
+e672	vsrpr	VRI_VVV0UU2
+e673	vsp	VRI_VVV0UU2
+e674	vschp	VRR_VVV0U0U
+e675	vcrnf	VRR_VVV0UU
+e677	vcp	VRR_0VV0U
+e678	vmp	VRI_VVV0UU2
+e679	vmsp	VRI_VVV0UU2
+e67a	vdp	VRI_VVV0UU2
+e67b	vrp	VRI_VVV0UU2
+e67c	vscshp	VRR_VVV
+e67d	vcsph	VRR_VVV0U0
+e67e	vsdp	VRI_VVV0UU2
+e700	vleb	VRX_VRRDU
+e701	vleh	VRX_VRRDU
+e702	vleg	VRX_VRRDU
+e703	vlef	VRX_VRRDU
+e704	vllez	VRX_VRRDU
+e705	vlrep	VRX_VRRDU
+e706	vl	VRX_VRRDU
+e707	vlbb	VRX_VRRDU
+e708	vsteb	VRX_VRRDU
+e709	vsteh	VRX_VRRDU
+e70a	vsteg	VRX_VRRDU
+e70b	vstef	VRX_VRRDU
+e70e	vst	VRX_VRRDU
+e712	vgeg	VRV_VVXRDU
+e713	vgef	VRV_VVXRDU
+e71a	vsceg	VRV_VVXRDU
+e71b	vscef	VRV_VVXRDU
+e721	vlgv	VRS_RVRDU
+e722	vlvg	VRS_VRRDU
+e727	lcbb	RXE_RRRDU
+e730	vesl	VRS_VVRDU
+e733	verll	VRS_VVRDU
+e736	vlm	VRS_VVRDU
+e737	vll	VRS_VRRD
+e738	vesrl	VRS_VVRDU
+e73a	vesra	VRS_VVRDU
+e73e	vstm	VRS_VVRDU
+e73f	vstl	VRS_VRRD
+e740	vleib	VRI_V0IU
+e741	vleih	VRI_V0IU
+e742	vleig	VRI_V0IU
+e743	vleif	VRI_V0IU
+e744	vgbm	VRI_V0U
+e745	vrepi	VRI_V0IU
+e746	vgm	VRI_V0UUU
+e74a	vftci	VRI_VVUUU
+e74d	vrep	VRI_VVUU
+e750	vpopct	VRR_VV0U
+e752	vctz	VRR_VV0U
+e753	vclz	VRR_VV0U
+e756	vlr	VRX_VV
+e75c	vistr	VRR_VV0U0U
+e75f	vseg	VRR_VV0U
+e760	vmrl	VRR_VVV0U
+e761	vmrh	VRR_VVV0U
+e762	vlvgp	VRR_VRR
+e764	vsum	VRR_VVV0U
+e765	vsumg	VRR_VVV0U
+e766	vcksm	VRR_VVV
+e767	vsumq	VRR_VVV0U
+e768	vn	VRR_VVV
+e769	vnc	VRR_VVV
+e76a	vo	VRR_VVV
+e76b	vno	VRR_VVV
+e76c	vnx	VRR_VVV
+e76d	vx	VRR_VVV
+e76e	vnn	VRR_VVV
+e76f	voc	VRR_VVV
+e770	veslv	VRR_VVV0U
+e772	verim	VRI_VVV0UU
+e773	verllv	VRR_VVV0U
+e774	vsl	VRR_VVV
+e775	vslb	VRR_VVV
+e777	vsldb	VRI_VVV0U
+e778	vesrlv	VRR_VVV0U
+e77a	vesrav	VRR_VVV0U
+e77c	vsrl	VRR_VVV
+e77d	vsrlb	VRR_VVV
+e77e	vsra	VRR_VVV
+e77f	vsrab	VRR_VVV
+e780	vfee	VRR_VVV0U0U
+e781	vfene	VRR_VVV0U0U
+e782	vfae	VRR_VVV0U0U
+e784	vpdi	VRR_VVV0U
+e785	vbperm	VRR_VVV
+e786	vsld	VRI_VVV0U
+e787	vsrd	VRI_VVV0U
+e78a	vstrc	VRR_VVVUU0V
+e78b	vstrs	VRR_VVVUU0V
+e78c	vperm	VRR_VVV0V
+e78d	vsel	VRR_VVV0V
+e78e	vfms	VRR_VVVU0UV
+e78f	vfma	VRR_VVVU0UV
+e794	vpk	VRR_VVV0U
+e795	vpkls	VRR_VVV0U0U
+e797	vpks	VRR_VVV0U0U
+e79e	vfnms	VRR_VVVU0UV
+e79f	vfnma	VRR_VVVU0UV
+e7a1	vmlh	VRR_VVV0U
+e7a2	vml	VRR_VVV0U
+e7a3	vmh	VRR_VVV0U
+e7a4	vmle	VRR_VVV0U
+e7a5	vmlo	VRR_VVV0U
+e7a6	vme	VRR_VVV0U
+e7a7	vmo	VRR_VVV0U
+e7a9	vmalh	VRR_VVVU0V
+e7aa	vmal	VRR_VVVU0V
+e7ab	vmah	VRR_VVVU0V
+e7ac	vmale	VRR_VVVU0V
+e7ad	vmalo	VRR_VVVU0V
+e7ae	vmae	VRR_VVVU0V
+e7af	vmao	VRR_VVVU0V
+e7b4	vgfm	VRR_VVV0U
+e7b8	vmsl	VRR_VVVUU0V
+e7b9	vaccc	VRR_VVVU0V
+e7bb	vac	VRR_VVVU0V
+e7bc	vgfma	VRR_VVVU0V
+e7bd	vsbcbi	VRR_VVVU0V
+e7bf	vsbi	VRR_VVVU0V
+e7c0	vclgd	VRR_VV0UUU
+e7c1	vcdlg	VRR_VV0UUU
+e7c2	vcgd	VRR_VV0UUU
+e7c3	vcdg	VRR_VV0UUU
+e7c4	vlde	VRR_VV0UU2
+e7c5	vled	VRR_VV0UUU
+e7c7	vfi	VRR_VV0UUU
+e7ca	wfk	VRR_VV0UU2
+e7cb	wfc	VRR_VV0UU2
+e7cc	vfpso	VRR_VV0UUU
+e7ce	vfsq	VRR_VV0UU2
+e7d4	vupll	VRR_VV0U
+e7d5	vuplh	VRR_VV0U
+e7d6	vupl	VRR_VV0U
+e7d7	vuph	VRR_VV0U
+e7d8	vtm	VRR_VV
+e7d9	vecl	VRR_VV0U
+e7db	vec	VRR_VV0U
+e7de	vlc	VRR_VV0U
+e7df	vlp	VRR_VV0U
+e7e2	vfs	VRR_VVV0UU
+e7e3	vfa	VRR_VVV0UU
+e7e5	vfd	VRR_VVV0UU
+e7e7	vfm	VRR_VVV0UU
+e7e8	vfce	VRR_VVV0UUU
+e7ea	vfche	VRR_VVV0UUU
+e7eb	vfch	VRR_VVV0UUU
+e7ee	vfmin	VRR_VVV0UUU
+e7ef	vfmax	VRR_VVV0UUU
+e7f0	vavgl	VRR_VVV0U
+e7f1	vacc	VRR_VVV0U
+e7f2	vavg	VRR_VVV0U
+e7f3	va	VRR_VVV0U
+e7f5	vscbi	VRR_VVV0U
+e7f7	vs	VRR_VVV0U
+e7f8	vceq	VRR_VVV0U0U
+e7f9	vchl	VRR_VVV0U0U
+e7fb	vch	VRR_VVV0U0U
+e7fc	vmnl	VRR_VVV0U
+e7fd	vmxl	VRR_VVV0U
+e7fe	vmn	VRR_VVV0U
+e7ff	vmx	VRR_VVV0U
+e8	mvcin	SS_L0RDRD
+e9	pka	SS_L2RDRD
+ea	unpka	SS_L0RDRD
+eb04	lmg	RSY_RRRD
+eb0a	srag	RSY_RRRD
+eb0b	slag	RSY_RRRD
+eb0c	srlg	RSY_RRRD
+eb0d	sllg	RSY_RRRD
+eb0f	tracg	RSY_RRRD
+eb14	csy	RSY_RRRD
+eb17	stcctm	RSY_RURD
+eb1c	rllg	RSY_RRRD
+eb1d	rll	RSY_RRRD
+eb20	clmh	RSY_RURD
+eb21	clmy	RSY_RURD
+eb23	clt	RSY_RURD
+eb24	stmg	RSY_RRRD
+eb25	stctg	RSY_CCRD
+eb26	stmh	RSY_RRRD
+eb2b	clgt	RSY_RURD
+eb2c	stcmh	RSY_RURD
+eb2d	stcmy	RSY_RURD
+eb2f	lctlg	RSY_CCRD
+eb30	csg	RSY_RRRD
+eb31	cdsy	RSY_RRRD
+eb3e	cdsg	RSY_RRRD
+eb44	bxhg	RSY_RRRD
+eb45	bxleg	RSY_RRRD
+eb4c	ecag	RSY_RRRD
+eb51	tmy	SIY_URD
+eb52	mviy	SIY_URD
+eb54	niy	SIY_URD
+eb55	cliy	SIY_URD
+eb56	oiy	SIY_URD
+eb57	xiy	SIY_URD
+eb60	lric	RSY_RDRU
+eb61	stric	RSY_RDRU
+eb62	mric	RSY_RDRU
+eb6a	asi	SIY_IRD
+eb6e	alsi	SIY_IRD
+eb71	lpswey	SIY_RD
+eb7a	agsi	SIY_IRD
+eb7e	algsi	SIY_IRD
+eb80	icmh	RSY_RURD
+eb81	icmy	RSY_RURD
+eb8a	sqbs	RSY_RDRU
+eb8e	mvclu	RSY_RRRD
+eb8f	clclu	RSY_RRRD
+eb90	stmy	RSY_RRRD
+eb96	lmh	RSY_RRRD
+eb98	lmy	RSY_RRRD
+eb9a	lamy	RSY_AARD
+eb9b	stamy	RSY_AARD
+ebc0	tp	RSL_R0RD
+ebd0	pcistb	RSY_RRRD
+ebd1	sic	RSY_RRRD
+ebd4	pcistbi	RSY_RRRD
+ebdc	srak	RSY_RRRD
+ebdd	slak	RSY_RRRD
+ebde	srlk	RSY_RRRD
+ebdf	sllk	RSY_RRRD
+ebe0	locfh	RSY_RURD2
+ebe1	stocfh	RSY_RURD2
+ebe2	locg	RSY_RURD2
+ebe3	stocg	RSY_RURD2
+ebe4	lang	RSY_RRRD
+ebe6	laog	RSY_RRRD
+ebe7	laxg	RSY_RRRD
+ebe8	laag	RSY_RRRD
+ebea	laalg	RSY_RRRD
+ebf2	loc	RSY_RURD2
+ebf3	stoc	RSY_RURD2
+ebf4	lan	RSY_RRRD
+ebf6	lao	RSY_RRRD
+ebf7	lax	RSY_RRRD
+ebf8	laa	RSY_RRRD
+ebfa	laal	RSY_RRRD
+ec42	lochi	RIE_RUI0
+ec44	brxhg	RIE_RRP
+ec45	brxlg	RIE_RRP
+ec46	locghi	RIE_RUI0
+ec4e	lochhi	RIE_RUI0
+ec51	risblg	RIE_RRUUU
+ec54	rnsbg	RIE_RRUUU
+ec55	risbg	RIE_RRUUU
+ec56	rosbg	RIE_RRUUU
+ec57	rxsbg	RIE_RRUUU
+ec59	risbgn	RIE_RRUUU
+ec5d	risbhg	RIE_RRUUU
+ec64	cgrj	RIE_RRPU
+ec65	clgrj	RIE_RRPU
+ec70	cgit	RIE_R0IU
+ec71	clgit	RIE_R0UU
+ec72	cit	RIE_R0IU
+ec73	clfit	RIE_R0UU
+ec76	crj	RIE_RRPU
+ec77	clrj	RIE_RRPU
+ec7c	cgij	RIE_RUPI
+ec7d	clgij	RIE_RUPU
+ec7e	cij	RIE_RUPI
+ec7f	clij	RIE_RUPU
+ecd8	ahik	RIE_RRI0
+ecd9	aghik	RIE_RRI0
+ecda	alhsik	RIE_RRI0
+ecdb	alghsik	RIE_RRI0
+ece4	cgrb	RRS_RRRDU
+ece5	clgrb	RRS_RRRDU
+ecf6	crb	RRS_RRRDU
+ecf7	clrb	RRS_RRRDU
+ecfc	cgib	RIS_RURDI
+ecfd	clgib	RIS_RURDU
+ecfe	cib	RIS_RURDI
+ecff	clib	RIS_RURDU
+ed04	ldeb	RXE_FRRD
+ed05	lxdb	RXE_FRRD
+ed06	lxeb	RXE_FRRD
+ed07	mxdb	RXE_FRRD
+ed08	keb	RXE_FRRD
+ed09	ceb	RXE_FRRD
+ed0a	aeb	RXE_FRRD
+ed0b	seb	RXE_FRRD
+ed0c	mdeb	RXE_FRRD
+ed0d	deb	RXE_FRRD
+ed0e	maeb	RXF_FRRDF
+ed0f	mseb	RXF_FRRDF
+ed10	tceb	RXE_FRRD
+ed11	tcdb	RXE_FRRD
+ed12	tcxb	RXE_FRRD
+ed14	sqeb	RXE_FRRD
+ed15	sqdb	RXE_FRRD
+ed17	meeb	RXE_FRRD
+ed18	kdb	RXE_FRRD
+ed19	cdb	RXE_FRRD
+ed1a	adb	RXE_FRRD
+ed1b	sdb	RXE_FRRD
+ed1c	mdb	RXE_FRRD
+ed1d	ddb	RXE_FRRD
+ed1e	madb	RXF_FRRDF
+ed1f	msdb	RXF_FRRDF
+ed24	lde	RXE_FRRD
+ed25	lxd	RXE_FRRD
+ed26	lxe	RXE_FRRD
+ed2e	mae	RXF_FRRDF
+ed2f	mse	RXF_FRRDF
+ed34	sqe	RXE_FRRD
+ed35	sqd	RXE_FRRD
+ed37	mee	RXE_FRRD
+ed38	mayl	RXF_FRRDF
+ed39	myl	RXF_FRRDF
+ed3a	may	RXF_FRRDF
+ed3b	my	RXF_FRRDF
+ed3c	mayh	RXF_FRRDF
+ed3d	myh	RXF_FRRDF
+ed3e	mad	RXF_FRRDF
+ed3f	msd	RXF_FRRDF
+ed40	sldt	RXF_FRRDF
+ed41	srdt	RXF_FRRDF
+ed48	slxt	RXF_FRRDF
+ed49	srxt	RXF_FRRDF
+ed50	tdcet	RXE_FRRD
+ed51	tdget	RXE_FRRD
+ed54	tdcdt	RXE_FRRD
+ed55	tdgdt	RXE_FRRD
+ed58	tdcxt	RXE_FRRD
+ed59	tdgxt	RXE_FRRD
+ed64	ley	RXY_FRRD
+ed65	ldy	RXY_FRRD
+ed66	stey	RXY_FRRD
+ed67	stdy	RXY_FRRD
+eda8	czdt	RSL_LRDFU
+eda9	czxt	RSL_LRDFU
+edaa	cdzt	RSL_LRDFU
+edab	cxzt	RSL_LRDFU
+edac	cpdt	RSL_LRDFU
+edad	cpxt	RSL_LRDFU
+edae	cdpt	RSL_LRDFU
+edaf	cxpt	RSL_LRDFU
+ee	plo	SS_RRRDRD2
+ef	lmd	SS_RRRDRD3
+f0	srp	SS_LIRDRD
+f1	mvo	SS_LLRDRD
+f2	pack	SS_LLRDRD
+f3	unpk	SS_LLRDRD
+f8	zap	SS_LLRDRD
+f9	cp	SS_LLRDRD
+fa	ap	SS_LLRDRD
+fb	sp	SS_LLRDRD
+fc	mp	SS_LLRDRD
+fd	dp	SS_LLRDRD
-- 
cgit v1.2.3