summaryrefslogtreecommitdiffstats
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/Kbuild11
-rw-r--r--arch/s390/Kconfig955
-rw-r--r--arch/s390/Kconfig.debug19
-rw-r--r--arch/s390/Makefile184
-rw-r--r--arch/s390/appldata/Makefile9
-rw-r--r--arch/s390/appldata/appldata.h48
-rw-r--r--arch/s390/appldata/appldata_base.c544
-rw-r--r--arch/s390/appldata/appldata_mem.c165
-rw-r--r--arch/s390/appldata/appldata_net_sum.c167
-rw-r--r--arch/s390/appldata/appldata_os.c221
-rw-r--r--arch/s390/boot/.gitignore2
-rw-r--r--arch/s390/boot/Makefile55
-rw-r--r--arch/s390/boot/als.c126
-rw-r--r--arch/s390/boot/compressed/.gitignore5
-rw-r--r--arch/s390/boot/compressed/Makefile74
-rw-r--r--arch/s390/boot/compressed/head.S52
-rw-r--r--arch/s390/boot/compressed/misc.c116
-rw-r--r--arch/s390/boot/compressed/vmlinux.lds.S65
-rw-r--r--arch/s390/boot/compressed/vmlinux.scr.lds.S15
-rw-r--r--arch/s390/boot/ebcdic.c2
-rw-r--r--arch/s390/boot/head.S340
-rw-r--r--arch/s390/boot/head_kdump.S101
-rw-r--r--arch/s390/boot/install.sh35
-rw-r--r--arch/s390/boot/mem.S2
-rw-r--r--arch/s390/boot/sclp_early_core.c2
-rw-r--r--arch/s390/configs/debug_defconfig730
-rw-r--r--arch/s390/configs/performance_defconfig669
-rw-r--r--arch/s390/configs/zfcpdump_defconfig71
-rw-r--r--arch/s390/crypto/Makefile17
-rw-r--r--arch/s390/crypto/aes_s390.c1214
-rw-r--r--arch/s390/crypto/arch_random.c127
-rw-r--r--arch/s390/crypto/crc32-vx.c314
-rw-r--r--arch/s390/crypto/crc32be-vx.S211
-rw-r--r--arch/s390/crypto/crc32le-vx.S271
-rw-r--r--arch/s390/crypto/des_s390.c593
-rw-r--r--arch/s390/crypto/ghash_s390.c156
-rw-r--r--arch/s390/crypto/paes_s390.c618
-rw-r--r--arch/s390/crypto/prng.c875
-rw-r--r--arch/s390/crypto/sha.h32
-rw-r--r--arch/s390/crypto/sha1_s390.c103
-rw-r--r--arch/s390/crypto/sha256_s390.c143
-rw-r--r--arch/s390/crypto/sha512_s390.c149
-rw-r--r--arch/s390/crypto/sha_common.c92
-rw-r--r--arch/s390/defconfig249
-rw-r--r--arch/s390/hypfs/Makefile9
-rw-r--r--arch/s390/hypfs/hypfs.h77
-rw-r--r--arch/s390/hypfs/hypfs_dbfs.c105
-rw-r--r--arch/s390/hypfs/hypfs_diag.c617
-rw-r--r--arch/s390/hypfs/hypfs_diag0c.c139
-rw-r--r--arch/s390/hypfs/hypfs_sprp.c148
-rw-r--r--arch/s390/hypfs/hypfs_vm.c292
-rw-r--r--arch/s390/hypfs/inode.c502
-rw-r--r--arch/s390/include/asm/Kbuild27
-rw-r--r--arch/s390/include/asm/airq.h104
-rw-r--r--arch/s390/include/asm/alternative-asm.h108
-rw-r--r--arch/s390/include/asm/alternative.h150
-rw-r--r--arch/s390/include/asm/ap.h363
-rw-r--r--arch/s390/include/asm/appldata.h67
-rw-r--r--arch/s390/include/asm/archrandom.h51
-rw-r--r--arch/s390/include/asm/asm-prototypes.h9
-rw-r--r--arch/s390/include/asm/atomic.h152
-rw-r--r--arch/s390/include/asm/atomic_ops.h143
-rw-r--r--arch/s390/include/asm/barrier.h78
-rw-r--r--arch/s390/include/asm/bitops.h422
-rw-r--r--arch/s390/include/asm/bug.h72
-rw-r--r--arch/s390/include/asm/bugs.h21
-rw-r--r--arch/s390/include/asm/cache.h19
-rw-r--r--arch/s390/include/asm/ccwdev.h235
-rw-r--r--arch/s390/include/asm/ccwgroup.h86
-rw-r--r--arch/s390/include/asm/checksum.h139
-rw-r--r--arch/s390/include/asm/chpid.h51
-rw-r--r--arch/s390/include/asm/cio.h335
-rw-r--r--arch/s390/include/asm/clp.h56
-rw-r--r--arch/s390/include/asm/cmb.h14
-rw-r--r--arch/s390/include/asm/cmpxchg.h66
-rw-r--r--arch/s390/include/asm/compat.h276
-rw-r--r--arch/s390/include/asm/cpacf.h523
-rw-r--r--arch/s390/include/asm/cpcmd.h32
-rw-r--r--arch/s390/include/asm/cpu.h25
-rw-r--r--arch/s390/include/asm/cpu_mf.h314
-rw-r--r--arch/s390/include/asm/cpufeature.h30
-rw-r--r--arch/s390/include/asm/cputime.h38
-rw-r--r--arch/s390/include/asm/crw.h56
-rw-r--r--arch/s390/include/asm/css_chars.h45
-rw-r--r--arch/s390/include/asm/ctl_reg.h124
-rw-r--r--arch/s390/include/asm/current.h19
-rw-r--r--arch/s390/include/asm/debug.h270
-rw-r--r--arch/s390/include/asm/delay.h25
-rw-r--r--arch/s390/include/asm/diag.h299
-rw-r--r--arch/s390/include/asm/dis.h30
-rw-r--r--arch/s390/include/asm/dma.h20
-rw-r--r--arch/s390/include/asm/dwarf.h37
-rw-r--r--arch/s390/include/asm/eadm.h120
-rw-r--r--arch/s390/include/asm/ebcdic.h47
-rw-r--r--arch/s390/include/asm/elf.h280
-rw-r--r--arch/s390/include/asm/exec.h13
-rw-r--r--arch/s390/include/asm/extable.h29
-rw-r--r--arch/s390/include/asm/extmem.h32
-rw-r--r--arch/s390/include/asm/facility.h98
-rw-r--r--arch/s390/include/asm/fcx.h312
-rw-r--r--arch/s390/include/asm/fpu/api.h116
-rw-r--r--arch/s390/include/asm/fpu/internal.h62
-rw-r--r--arch/s390/include/asm/fpu/types.h38
-rw-r--r--arch/s390/include/asm/ftrace.h86
-rw-r--r--arch/s390/include/asm/futex.h87
-rw-r--r--arch/s390/include/asm/gmap.h145
-rw-r--r--arch/s390/include/asm/hardirq.h29
-rw-r--r--arch/s390/include/asm/hugetlb.h122
-rw-r--r--arch/s390/include/asm/hw_irq.h12
-rw-r--r--arch/s390/include/asm/idals.h233
-rw-r--r--arch/s390/include/asm/idle.h30
-rw-r--r--arch/s390/include/asm/io.h81
-rw-r--r--arch/s390/include/asm/ipl.h165
-rw-r--r--arch/s390/include/asm/irq.h116
-rw-r--r--arch/s390/include/asm/irqflags.h78
-rw-r--r--arch/s390/include/asm/isc.h30
-rw-r--r--arch/s390/include/asm/itcw.h31
-rw-r--r--arch/s390/include/asm/jump_label.h60
-rw-r--r--arch/s390/include/asm/kdebug.h28
-rw-r--r--arch/s390/include/asm/kexec.h72
-rw-r--r--arch/s390/include/asm/kprobes.h83
-rw-r--r--arch/s390/include/asm/kvm_host.h877
-rw-r--r--arch/s390/include/asm/kvm_para.h206
-rw-r--r--arch/s390/include/asm/linkage.h32
-rw-r--r--arch/s390/include/asm/livepatch.h26
-rw-r--r--arch/s390/include/asm/lowcore.h207
-rw-r--r--arch/s390/include/asm/mmu.h58
-rw-r--r--arch/s390/include/asm/mmu_context.h136
-rw-r--r--arch/s390/include/asm/mmzone.h17
-rw-r--r--arch/s390/include/asm/module.h35
-rw-r--r--arch/s390/include/asm/nmi.h105
-rw-r--r--arch/s390/include/asm/nospec-branch.h17
-rw-r--r--arch/s390/include/asm/nospec-insn.h196
-rw-r--r--arch/s390/include/asm/numa.h36
-rw-r--r--arch/s390/include/asm/os_info.h50
-rw-r--r--arch/s390/include/asm/page-states.h21
-rw-r--r--arch/s390/include/asm/page.h186
-rw-r--r--arch/s390/include/asm/pci.h253
-rw-r--r--arch/s390/include/asm/pci_clp.h167
-rw-r--r--arch/s390/include/asm/pci_debug.h25
-rw-r--r--arch/s390/include/asm/pci_dma.h207
-rw-r--r--arch/s390/include/asm/pci_insn.h87
-rw-r--r--arch/s390/include/asm/pci_io.h206
-rw-r--r--arch/s390/include/asm/percpu.h186
-rw-r--r--arch/s390/include/asm/perf_event.h74
-rw-r--r--arch/s390/include/asm/pgalloc.h169
-rw-r--r--arch/s390/include/asm/pgtable.h1643
-rw-r--r--arch/s390/include/asm/pkey.h112
-rw-r--r--arch/s390/include/asm/pnet.h23
-rw-r--r--arch/s390/include/asm/preempt.h145
-rw-r--r--arch/s390/include/asm/processor.h386
-rw-r--r--arch/s390/include/asm/ptrace.h188
-rw-r--r--arch/s390/include/asm/purgatory.h17
-rw-r--r--arch/s390/include/asm/qdio.h430
-rw-r--r--arch/s390/include/asm/runtime_instr.h28
-rw-r--r--arch/s390/include/asm/schid.h22
-rw-r--r--arch/s390/include/asm/sclp.h136
-rw-r--r--arch/s390/include/asm/scsw.h993
-rw-r--r--arch/s390/include/asm/seccomp.h19
-rw-r--r--arch/s390/include/asm/sections.h7
-rw-r--r--arch/s390/include/asm/segment.h5
-rw-r--r--arch/s390/include/asm/serial.h7
-rw-r--r--arch/s390/include/asm/set_memory.h32
-rw-r--r--arch/s390/include/asm/setup.h148
-rw-r--r--arch/s390/include/asm/shmparam.h12
-rw-r--r--arch/s390/include/asm/signal.h26
-rw-r--r--arch/s390/include/asm/sigp.h70
-rw-r--r--arch/s390/include/asm/smp.h96
-rw-r--r--arch/s390/include/asm/sparsemem.h8
-rw-r--r--arch/s390/include/asm/spinlock.h159
-rw-r--r--arch/s390/include/asm/spinlock_types.h22
-rw-r--r--arch/s390/include/asm/stp.h52
-rw-r--r--arch/s390/include/asm/string.h166
-rw-r--r--arch/s390/include/asm/switch_to.h49
-rw-r--r--arch/s390/include/asm/syscall.h113
-rw-r--r--arch/s390/include/asm/sysinfo.h201
-rw-r--r--arch/s390/include/asm/termios.h26
-rw-r--r--arch/s390/include/asm/thread_info.h97
-rw-r--r--arch/s390/include/asm/timex.h257
-rw-r--r--arch/s390/include/asm/tlb.h205
-rw-r--r--arch/s390/include/asm/tlbflush.h152
-rw-r--r--arch/s390/include/asm/topology.h100
-rw-r--r--arch/s390/include/asm/trace/diag.h44
-rw-r--r--arch/s390/include/asm/trace/zcrypt.h123
-rw-r--r--arch/s390/include/asm/uaccess.h282
-rw-r--r--arch/s390/include/asm/unistd.h42
-rw-r--r--arch/s390/include/asm/uprobes.h33
-rw-r--r--arch/s390/include/asm/user.h75
-rw-r--r--arch/s390/include/asm/vdso.h57
-rw-r--r--arch/s390/include/asm/vga.h7
-rw-r--r--arch/s390/include/asm/vtime.h8
-rw-r--r--arch/s390/include/asm/vtimer.h32
-rw-r--r--arch/s390/include/asm/vx-insn.h561
-rw-r--r--arch/s390/include/asm/xor.h21
-rw-r--r--arch/s390/include/uapi/asm/Kbuild20
-rw-r--r--arch/s390/include/uapi/asm/auxvec.h9
-rw-r--r--arch/s390/include/uapi/asm/bitsperlong.h14
-rw-r--r--arch/s390/include/uapi/asm/bpf_perf_event.h9
-rw-r--r--arch/s390/include/uapi/asm/byteorder.h7
-rw-r--r--arch/s390/include/uapi/asm/chpid.h23
-rw-r--r--arch/s390/include/uapi/asm/chsc.h144
-rw-r--r--arch/s390/include/uapi/asm/clp.h29
-rw-r--r--arch/s390/include/uapi/asm/cmb.h54
-rw-r--r--arch/s390/include/uapi/asm/dasd.h334
-rw-r--r--arch/s390/include/uapi/asm/debug.h35
-rw-r--r--arch/s390/include/uapi/asm/guarded_storage.h78
-rw-r--r--arch/s390/include/uapi/asm/hypfs.h55
-rw-r--r--arch/s390/include/uapi/asm/ioctls.h9
-rw-r--r--arch/s390/include/uapi/asm/ipcbuf.h32
-rw-r--r--arch/s390/include/uapi/asm/kvm.h277
-rw-r--r--arch/s390/include/uapi/asm/kvm_para.h8
-rw-r--r--arch/s390/include/uapi/asm/kvm_perf.h22
-rw-r--r--arch/s390/include/uapi/asm/monwriter.h32
-rw-r--r--arch/s390/include/uapi/asm/perf_regs.h44
-rw-r--r--arch/s390/include/uapi/asm/pkey.h132
-rw-r--r--arch/s390/include/uapi/asm/posix_types.h52
-rw-r--r--arch/s390/include/uapi/asm/ptrace.h457
-rw-r--r--arch/s390/include/uapi/asm/qeth.h116
-rw-r--r--arch/s390/include/uapi/asm/runtime_instr.h74
-rw-r--r--arch/s390/include/uapi/asm/schid.h17
-rw-r--r--arch/s390/include/uapi/asm/sclp_ctl.h25
-rw-r--r--arch/s390/include/uapi/asm/setup.h14
-rw-r--r--arch/s390/include/uapi/asm/sie.h252
-rw-r--r--arch/s390/include/uapi/asm/sigcontext.h85
-rw-r--r--arch/s390/include/uapi/asm/siginfo.h17
-rw-r--r--arch/s390/include/uapi/asm/signal.h139
-rw-r--r--arch/s390/include/uapi/asm/socket.h117
-rw-r--r--arch/s390/include/uapi/asm/stat.h104
-rw-r--r--arch/s390/include/uapi/asm/statfs.h51
-rw-r--r--arch/s390/include/uapi/asm/sthyi.h7
-rw-r--r--arch/s390/include/uapi/asm/tape390.h103
-rw-r--r--arch/s390/include/uapi/asm/termios.h50
-rw-r--r--arch/s390/include/uapi/asm/types.h27
-rw-r--r--arch/s390/include/uapi/asm/ucontext.h41
-rw-r--r--arch/s390/include/uapi/asm/unistd.h17
-rw-r--r--arch/s390/include/uapi/asm/virtio-ccw.h18
-rw-r--r--arch/s390/include/uapi/asm/vmcp.h25
-rw-r--r--arch/s390/include/uapi/asm/vtoc.h214
-rw-r--r--arch/s390/include/uapi/asm/zcrypt.h358
-rw-r--r--arch/s390/kernel/.gitignore1
-rw-r--r--arch/s390/kernel/Makefile88
-rw-r--r--arch/s390/kernel/alternative.c113
-rw-r--r--arch/s390/kernel/asm-offsets.c206
-rw-r--r--arch/s390/kernel/audit.c79
-rw-r--r--arch/s390/kernel/audit.h16
-rw-r--r--arch/s390/kernel/base.S149
-rw-r--r--arch/s390/kernel/cache.c177
-rw-r--r--arch/s390/kernel/compat_audit.c45
-rw-r--r--arch/s390/kernel/compat_linux.c522
-rw-r--r--arch/s390/kernel/compat_linux.h130
-rw-r--r--arch/s390/kernel/compat_ptrace.h64
-rw-r--r--arch/s390/kernel/compat_signal.c428
-rw-r--r--arch/s390/kernel/compat_wrapper.c186
-rw-r--r--arch/s390/kernel/cpcmd.c112
-rw-r--r--arch/s390/kernel/crash_dump.c711
-rw-r--r--arch/s390/kernel/debug.c1461
-rw-r--r--arch/s390/kernel/diag.c268
-rw-r--r--arch/s390/kernel/dis.c582
-rw-r--r--arch/s390/kernel/dumpstack.c191
-rw-r--r--arch/s390/kernel/early.c366
-rw-r--r--arch/s390/kernel/early_nobss.c65
-rw-r--r--arch/s390/kernel/early_printk.c36
-rw-r--r--arch/s390/kernel/ebcdic.c401
-rw-r--r--arch/s390/kernel/entry.S1510
-rw-r--r--arch/s390/kernel/entry.h89
-rw-r--r--arch/s390/kernel/fpu.c177
-rw-r--r--arch/s390/kernel/ftrace.c244
-rw-r--r--arch/s390/kernel/guarded_storage.c128
-rw-r--r--arch/s390/kernel/head64.S101
-rw-r--r--arch/s390/kernel/idle.c142
-rw-r--r--arch/s390/kernel/ipl.c1787
-rw-r--r--arch/s390/kernel/irq.c311
-rw-r--r--arch/s390/kernel/jump_label.c102
-rw-r--r--arch/s390/kernel/kdebugfs.c16
-rw-r--r--arch/s390/kernel/kexec_elf.c152
-rw-r--r--arch/s390/kernel/kexec_image.c76
-rw-r--r--arch/s390/kernel/kprobes.c658
-rw-r--r--arch/s390/kernel/lgr.c188
-rw-r--r--arch/s390/kernel/machine_kexec.c304
-rw-r--r--arch/s390/kernel/machine_kexec_file.c245
-rw-r--r--arch/s390/kernel/mcount.S104
-rw-r--r--arch/s390/kernel/module.c478
-rw-r--r--arch/s390/kernel/nmi.c476
-rw-r--r--arch/s390/kernel/nospec-branch.c176
-rw-r--r--arch/s390/kernel/nospec-sysfs.c23
-rw-r--r--arch/s390/kernel/os_info.c170
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c724
-rw-r--r--arch/s390/kernel/perf_cpum_cf_events.c598
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c2100
-rw-r--r--arch/s390/kernel/perf_event.c246
-rw-r--r--arch/s390/kernel/perf_regs.c69
-rw-r--r--arch/s390/kernel/pgm_check.S147
-rw-r--r--arch/s390/kernel/process.c243
-rw-r--r--arch/s390/kernel/processor.c220
-rw-r--r--arch/s390/kernel/ptrace.c1776
-rw-r--r--arch/s390/kernel/reipl.S80
-rw-r--r--arch/s390/kernel/relocate_kernel.S74
-rw-r--r--arch/s390/kernel/runtime_instr.c102
-rw-r--r--arch/s390/kernel/setup.c985
-rw-r--r--arch/s390/kernel/signal.c541
-rw-r--r--arch/s390/kernel/smp.c1212
-rw-r--r--arch/s390/kernel/stacktrace.c75
-rw-r--r--arch/s390/kernel/sthyi.c516
-rw-r--r--arch/s390/kernel/suspend.c240
-rw-r--r--arch/s390/kernel/swsusp.S286
-rw-r--r--arch/s390/kernel/sys_s390.c92
-rw-r--r--arch/s390/kernel/syscalls/Makefile52
-rw-r--r--arch/s390/kernel/syscalls/syscall.tbl393
-rwxr-xr-xarch/s390/kernel/syscalls/syscalltbl232
-rw-r--r--arch/s390/kernel/sysinfo.c568
-rw-r--r--arch/s390/kernel/time.c923
-rw-r--r--arch/s390/kernel/topology.c641
-rw-r--r--arch/s390/kernel/trace.c33
-rw-r--r--arch/s390/kernel/traps.c267
-rw-r--r--arch/s390/kernel/uprobes.c397
-rw-r--r--arch/s390/kernel/vdso.c325
-rw-r--r--arch/s390/kernel/vdso32/.gitignore1
-rw-r--r--arch/s390/kernel/vdso32/Makefile66
-rw-r--r--arch/s390/kernel/vdso32/clock_getres.S44
-rw-r--r--arch/s390/kernel/vdso32/clock_gettime.S179
-rw-r--r--arch/s390/kernel/vdso32/getcpu.S33
-rw-r--r--arch/s390/kernel/vdso32/gettimeofday.S103
-rw-r--r--arch/s390/kernel/vdso32/note.S13
-rw-r--r--arch/s390/kernel/vdso32/vdso32.lds.S142
-rw-r--r--arch/s390/kernel/vdso32/vdso32_wrapper.S15
-rw-r--r--arch/s390/kernel/vdso64/.gitignore1
-rw-r--r--arch/s390/kernel/vdso64/Makefile66
-rw-r--r--arch/s390/kernel/vdso64/clock_getres.S50
-rw-r--r--arch/s390/kernel/vdso64/clock_gettime.S163
-rw-r--r--arch/s390/kernel/vdso64/getcpu.S33
-rw-r--r--arch/s390/kernel/vdso64/gettimeofday.S71
-rw-r--r--arch/s390/kernel/vdso64/note.S13
-rw-r--r--arch/s390/kernel/vdso64/vdso64.lds.S142
-rw-r--r--arch/s390/kernel/vdso64/vdso64_wrapper.S15
-rw-r--r--arch/s390/kernel/vmlinux.lds.S156
-rw-r--r--arch/s390/kernel/vtime.c437
-rw-r--r--arch/s390/kvm/Kconfig62
-rw-r--r--arch/s390/kvm/Makefile14
-rw-r--r--arch/s390/kvm/diag.c263
-rw-r--r--arch/s390/kvm/gaccess.c1204
-rw-r--r--arch/s390/kvm/gaccess.h393
-rw-r--r--arch/s390/kvm/guestdbg.c626
-rw-r--r--arch/s390/kvm/intercept.c492
-rw-r--r--arch/s390/kvm/interrupt.c2927
-rw-r--r--arch/s390/kvm/irq.h19
-rw-r--r--arch/s390/kvm/kvm-s390.c4253
-rw-r--r--arch/s390/kvm/kvm-s390.h427
-rw-r--r--arch/s390/kvm/priv.c1452
-rw-r--r--arch/s390/kvm/sigp.c507
-rw-r--r--arch/s390/kvm/trace-s390.h340
-rw-r--r--arch/s390/kvm/trace.h462
-rw-r--r--arch/s390/kvm/vsie.c1200
-rw-r--r--arch/s390/lib/Makefile17
-rw-r--r--arch/s390/lib/delay.c129
-rw-r--r--arch/s390/lib/find.c76
-rw-r--r--arch/s390/lib/mem.S184
-rw-r--r--arch/s390/lib/probes.c161
-rw-r--r--arch/s390/lib/spinlock.c325
-rw-r--r--arch/s390/lib/string.c343
-rw-r--r--arch/s390/lib/uaccess.c448
-rw-r--r--arch/s390/lib/xor.c135
-rw-r--r--arch/s390/mm/Makefile13
-rw-r--r--arch/s390/mm/cmm.c487
-rw-r--r--arch/s390/mm/dump_pagetables.c249
-rw-r--r--arch/s390/mm/extmem.c763
-rw-r--r--arch/s390/mm/fault.c837
-rw-r--r--arch/s390/mm/gmap.c2645
-rw-r--r--arch/s390/mm/gup.c310
-rw-r--r--arch/s390/mm/hugetlbpage.c370
-rw-r--r--arch/s390/mm/init.c251
-rw-r--r--arch/s390/mm/maccess.c213
-rw-r--r--arch/s390/mm/mem_detect.c62
-rw-r--r--arch/s390/mm/mmap.c206
-rw-r--r--arch/s390/mm/page-states.c281
-rw-r--r--arch/s390/mm/pageattr.c386
-rw-r--r--arch/s390/mm/pgalloc.c681
-rw-r--r--arch/s390/mm/pgtable.c1126
-rw-r--r--arch/s390/mm/vmem.c443
-rw-r--r--arch/s390/net/Makefile6
-rw-r--r--arch/s390/net/bpf_jit.h55
-rw-r--r--arch/s390/net/bpf_jit_comp.c1320
-rw-r--r--arch/s390/net/pnet.c80
-rw-r--r--arch/s390/numa/Makefile4
-rw-r--r--arch/s390/numa/mode_emu.c576
-rw-r--r--arch/s390/numa/numa.c178
-rw-r--r--arch/s390/numa/numa_mode.h25
-rw-r--r--arch/s390/numa/toptree.c351
-rw-r--r--arch/s390/numa/toptree.h61
-rw-r--r--arch/s390/oprofile/Makefile10
-rw-r--r--arch/s390/oprofile/init.c43
-rw-r--r--arch/s390/pci/Makefile7
-rw-r--r--arch/s390/pci/pci.c1005
-rw-r--r--arch/s390/pci/pci_clp.c644
-rw-r--r--arch/s390/pci/pci_debug.c217
-rw-r--r--arch/s390/pci/pci_dma.c690
-rw-r--r--arch/s390/pci/pci_event.c164
-rw-r--r--arch/s390/pci/pci_insn.c233
-rw-r--r--arch/s390/pci/pci_mmio.c115
-rw-r--r--arch/s390/pci/pci_sysfs.c162
-rw-r--r--arch/s390/purgatory/.gitignore2
-rw-r--r--arch/s390/purgatory/Makefile39
-rw-r--r--arch/s390/purgatory/head.S273
-rw-r--r--arch/s390/purgatory/purgatory.c33
-rw-r--r--arch/s390/purgatory/string.c3
-rw-r--r--arch/s390/scripts/Makefile.chkbss23
-rw-r--r--arch/s390/tools/.gitignore2
-rw-r--r--arch/s390/tools/Makefile35
-rw-r--r--arch/s390/tools/gen_facilities.c163
-rw-r--r--arch/s390/tools/gen_opcode_table.c337
-rw-r--r--arch/s390/tools/opcodes.txt1183
410 files changed, 97942 insertions, 0 deletions
diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild
new file mode 100644
index 000000000..e63940bb5
--- /dev/null
+++ b/arch/s390/Kbuild
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-y += kernel/
+obj-y += mm/
+obj-$(CONFIG_KVM) += kvm/
+obj-y += crypto/
+obj-$(CONFIG_S390_HYPFS_FS) += hypfs/
+obj-$(CONFIG_APPLDATA_BASE) += appldata/
+obj-y += net/
+obj-$(CONFIG_PCI) += pci/
+obj-$(CONFIG_NUMA) += numa/
+obj-$(CONFIG_ARCH_HAS_KEXEC_PURGATORY) += purgatory/
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
new file mode 100644
index 000000000..ce4c3b659
--- /dev/null
+++ b/arch/s390/Kconfig
@@ -0,0 +1,955 @@
+# SPDX-License-Identifier: GPL-2.0
+config MMU
+ def_bool y
+
+config ZONE_DMA
+ def_bool y
+
+config CPU_BIG_ENDIAN
+ def_bool y
+
+config LOCKDEP_SUPPORT
+ def_bool y
+
+config STACKTRACE_SUPPORT
+ def_bool y
+
+config RWSEM_GENERIC_SPINLOCK
+ bool
+
+config RWSEM_XCHGADD_ALGORITHM
+ def_bool y
+
+config ARCH_HAS_ILOG2_U32
+ def_bool n
+
+config ARCH_HAS_ILOG2_U64
+ def_bool n
+
+config GENERIC_HWEIGHT
+ def_bool y
+
+config GENERIC_BUG
+ def_bool y if BUG
+
+config GENERIC_BUG_RELATIVE_POINTERS
+ def_bool y
+
+config GENERIC_LOCKBREAK
+ def_bool y if SMP && PREEMPT
+
+config PGSTE
+ def_bool y if KVM
+
+config ARCH_SUPPORTS_DEBUG_PAGEALLOC
+ def_bool y
+
+config AUDIT_ARCH
+ def_bool y
+
+config NO_IOPORT_MAP
+ def_bool y
+
+config PCI_QUIRKS
+ def_bool n
+
+config ARCH_SUPPORTS_UPROBES
+ def_bool y
+
+config S390
+ def_bool y
+ select ARCH_BINFMT_ELF_STATE
+ select ARCH_HAS_DEVMEM_IS_ALLOWED
+ select ARCH_HAS_ELF_RANDOMIZE
+ select ARCH_HAS_FORTIFY_SOURCE
+ select ARCH_HAS_GCOV_PROFILE_ALL
+ select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
+ select ARCH_HAS_KCOV
+ select ARCH_HAS_PTE_SPECIAL
+ select ARCH_HAS_SET_MEMORY
+ select ARCH_HAS_SG_CHAIN
+ select ARCH_HAS_STRICT_KERNEL_RWX
+ select ARCH_HAS_STRICT_MODULE_RWX
+ select ARCH_HAS_UBSAN_SANITIZE_ALL
+ select ARCH_HAVE_NMI_SAFE_CMPXCHG
+ select ARCH_INLINE_READ_LOCK
+ select ARCH_INLINE_READ_LOCK_BH
+ select ARCH_INLINE_READ_LOCK_IRQ
+ select ARCH_INLINE_READ_LOCK_IRQSAVE
+ select ARCH_INLINE_READ_TRYLOCK
+ select ARCH_INLINE_READ_UNLOCK
+ select ARCH_INLINE_READ_UNLOCK_BH
+ select ARCH_INLINE_READ_UNLOCK_IRQ
+ select ARCH_INLINE_READ_UNLOCK_IRQRESTORE
+ select ARCH_INLINE_SPIN_LOCK
+ select ARCH_INLINE_SPIN_LOCK_BH
+ select ARCH_INLINE_SPIN_LOCK_IRQ
+ select ARCH_INLINE_SPIN_LOCK_IRQSAVE
+ select ARCH_INLINE_SPIN_TRYLOCK
+ select ARCH_INLINE_SPIN_TRYLOCK_BH
+ select ARCH_INLINE_SPIN_UNLOCK
+ select ARCH_INLINE_SPIN_UNLOCK_BH
+ select ARCH_INLINE_SPIN_UNLOCK_IRQ
+ select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE
+ select ARCH_INLINE_WRITE_LOCK
+ select ARCH_INLINE_WRITE_LOCK_BH
+ select ARCH_INLINE_WRITE_LOCK_IRQ
+ select ARCH_INLINE_WRITE_LOCK_IRQSAVE
+ select ARCH_INLINE_WRITE_TRYLOCK
+ select ARCH_INLINE_WRITE_UNLOCK
+ select ARCH_INLINE_WRITE_UNLOCK_BH
+ select ARCH_INLINE_WRITE_UNLOCK_IRQ
+ select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
+ select ARCH_SAVE_PAGE_KEYS if HIBERNATION
+ select ARCH_SUPPORTS_ATOMIC_RMW
+ select ARCH_SUPPORTS_NUMA_BALANCING
+ select ARCH_USE_BUILTIN_BSWAP
+ select ARCH_USE_CMPXCHG_LOCKREF
+ select ARCH_WANTS_DYNAMIC_TASK_STRUCT
+ select ARCH_WANT_IPC_PARSE_VERSION
+ select BUILDTIME_EXTABLE_SORT
+ select CLONE_BACKWARDS2
+ select DYNAMIC_FTRACE if FUNCTION_TRACER
+ select GENERIC_CLOCKEVENTS
+ select GENERIC_CPU_AUTOPROBE
+ select GENERIC_CPU_DEVICES if !SMP
+ select GENERIC_CPU_VULNERABILITIES
+ select GENERIC_FIND_FIRST_BIT
+ select GENERIC_SMP_IDLE_THREAD
+ select GENERIC_TIME_VSYSCALL
+ select HAVE_ALIGNED_STRUCT_PAGE if SLUB
+ select HAVE_ARCH_AUDITSYSCALL
+ select HAVE_ARCH_JUMP_LABEL
+ select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES
+ select HAVE_ARCH_SECCOMP_FILTER
+ select HAVE_ARCH_SOFT_DIRTY
+ select HAVE_ARCH_TRACEHOOK
+ select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+ select HAVE_EBPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES
+ select HAVE_CMPXCHG_DOUBLE
+ select HAVE_CMPXCHG_LOCAL
+ select HAVE_COPY_THREAD_TLS
+ select HAVE_DEBUG_KMEMLEAK
+ select HAVE_DMA_CONTIGUOUS
+ select DMA_DIRECT_OPS
+ select HAVE_DYNAMIC_FTRACE
+ select HAVE_DYNAMIC_FTRACE_WITH_REGS
+ select HAVE_EFFICIENT_UNALIGNED_ACCESS
+ select HAVE_FENTRY
+ select HAVE_FTRACE_MCOUNT_RECORD
+ select HAVE_FUNCTION_GRAPH_TRACER
+ select HAVE_FUNCTION_TRACER
+ select HAVE_FUTEX_CMPXCHG if FUTEX
+ select HAVE_GCC_PLUGINS
+ select HAVE_KERNEL_BZIP2
+ select HAVE_KERNEL_GZIP
+ select HAVE_KERNEL_LZ4
+ select HAVE_KERNEL_LZMA
+ select HAVE_KERNEL_LZO
+ select HAVE_KERNEL_UNCOMPRESSED
+ select HAVE_KERNEL_XZ
+ select HAVE_KPROBES
+ select HAVE_KRETPROBES
+ select HAVE_KVM
+ select HAVE_LIVEPATCH
+ select HAVE_PERF_REGS
+ select HAVE_PERF_USER_STACK_DUMP
+ select HAVE_MEMBLOCK
+ select HAVE_MEMBLOCK_NODE_MAP
+ select HAVE_MEMBLOCK_PHYS_MAP
+ select HAVE_MOD_ARCH_SPECIFIC
+ select HAVE_NOP_MCOUNT
+ select HAVE_OPROFILE
+ select HAVE_PERF_EVENTS
+ select HAVE_REGS_AND_STACK_ACCESS_API
+ select HAVE_RSEQ
+ select HAVE_SYSCALL_TRACEPOINTS
+ select HAVE_VIRT_CPU_ACCOUNTING
+ select MODULES_USE_ELF_RELA
+ select NO_BOOTMEM
+ select OLD_SIGACTION
+ select OLD_SIGSUSPEND3
+ select SPARSE_IRQ
+ select SYSCTL_EXCEPTION_TRACE
+ select THREAD_INFO_IN_TASK
+ select TTY
+ select VIRT_CPU_ACCOUNTING
+ select ARCH_HAS_SCALED_CPUTIME
+ select VIRT_TO_BUS
+ select HAVE_NMI
+
+
+config SCHED_OMIT_FRAME_POINTER
+ def_bool y
+
+config PGTABLE_LEVELS
+ int
+ default 5
+
+source "kernel/livepatch/Kconfig"
+
+menu "Processor type and features"
+
+config HAVE_MARCH_Z900_FEATURES
+ def_bool n
+
+config HAVE_MARCH_Z990_FEATURES
+ def_bool n
+ select HAVE_MARCH_Z900_FEATURES
+
+config HAVE_MARCH_Z9_109_FEATURES
+ def_bool n
+ select HAVE_MARCH_Z990_FEATURES
+
+config HAVE_MARCH_Z10_FEATURES
+ def_bool n
+ select HAVE_MARCH_Z9_109_FEATURES
+
+config HAVE_MARCH_Z196_FEATURES
+ def_bool n
+ select HAVE_MARCH_Z10_FEATURES
+
+config HAVE_MARCH_ZEC12_FEATURES
+ def_bool n
+ select HAVE_MARCH_Z196_FEATURES
+
+config HAVE_MARCH_Z13_FEATURES
+ def_bool n
+ select HAVE_MARCH_ZEC12_FEATURES
+
+config HAVE_MARCH_Z14_FEATURES
+ def_bool n
+ select HAVE_MARCH_Z13_FEATURES
+
+choice
+ prompt "Processor type"
+ default MARCH_Z196
+
+config MARCH_Z900
+ bool "IBM zSeries model z800 and z900"
+ select HAVE_MARCH_Z900_FEATURES
+ help
+ Select this to enable optimizations for model z800/z900 (2064 and
+ 2066 series). This will enable some optimizations that are not
+ available on older ESA/390 (31 Bit) only CPUs.
+
+config MARCH_Z990
+ bool "IBM zSeries model z890 and z990"
+ select HAVE_MARCH_Z990_FEATURES
+ help
+ Select this to enable optimizations for model z890/z990 (2084 and
+ 2086 series). The kernel will be slightly faster but will not work
+ on older machines.
+
+config MARCH_Z9_109
+ bool "IBM System z9"
+ select HAVE_MARCH_Z9_109_FEATURES
+ help
+ Select this to enable optimizations for IBM System z9 (2094 and
+ 2096 series). The kernel will be slightly faster but will not work
+ on older machines.
+
+config MARCH_Z10
+ bool "IBM System z10"
+ select HAVE_MARCH_Z10_FEATURES
+ help
+ Select this to enable optimizations for IBM System z10 (2097 and
+ 2098 series). The kernel will be slightly faster but will not work
+ on older machines.
+
+config MARCH_Z196
+ bool "IBM zEnterprise 114 and 196"
+ select HAVE_MARCH_Z196_FEATURES
+ help
+ Select this to enable optimizations for IBM zEnterprise 114 and 196
+ (2818 and 2817 series). The kernel will be slightly faster but will
+ not work on older machines.
+
+config MARCH_ZEC12
+ bool "IBM zBC12 and zEC12"
+ select HAVE_MARCH_ZEC12_FEATURES
+ help
+ Select this to enable optimizations for IBM zBC12 and zEC12 (2828 and
+ 2827 series). The kernel will be slightly faster but will not work on
+ older machines.
+
+config MARCH_Z13
+ bool "IBM z13s and z13"
+ select HAVE_MARCH_Z13_FEATURES
+ help
+ Select this to enable optimizations for IBM z13s and z13 (2965 and
+ 2964 series). The kernel will be slightly faster but will not work on
+ older machines.
+
+config MARCH_Z14
+ bool "IBM z14 ZR1 and z14"
+ select HAVE_MARCH_Z14_FEATURES
+ help
+ Select this to enable optimizations for IBM z14 ZR1 and z14 (3907
+ and 3906 series). The kernel will be slightly faster but will not
+ work on older machines.
+
+endchoice
+
+config MARCH_Z900_TUNE
+ def_bool TUNE_Z900 || MARCH_Z900 && TUNE_DEFAULT
+
+config MARCH_Z990_TUNE
+ def_bool TUNE_Z990 || MARCH_Z990 && TUNE_DEFAULT
+
+config MARCH_Z9_109_TUNE
+ def_bool TUNE_Z9_109 || MARCH_Z9_109 && TUNE_DEFAULT
+
+config MARCH_Z10_TUNE
+ def_bool TUNE_Z10 || MARCH_Z10 && TUNE_DEFAULT
+
+config MARCH_Z196_TUNE
+ def_bool TUNE_Z196 || MARCH_Z196 && TUNE_DEFAULT
+
+config MARCH_ZEC12_TUNE
+ def_bool TUNE_ZEC12 || MARCH_ZEC12 && TUNE_DEFAULT
+
+config MARCH_Z13_TUNE
+ def_bool TUNE_Z13 || MARCH_Z13 && TUNE_DEFAULT
+
+config MARCH_Z14_TUNE
+ def_bool TUNE_Z14 || MARCH_Z14 && TUNE_DEFAULT
+
+choice
+ prompt "Tune code generation"
+ default TUNE_DEFAULT
+ help
+ Cause the compiler to tune (-mtune) the generated code for a machine.
+ This will make the code run faster on the selected machine but
+ somewhat slower on other machines.
+ This option only changes how the compiler emits instructions, not the
+ selection of instructions itself, so the resulting kernel will run on
+ all other machines.
+
+config TUNE_DEFAULT
+ bool "Default"
+ help
+ Tune the generated code for the target processor for which the kernel
+ will be compiled.
+
+config TUNE_Z900
+ bool "IBM zSeries model z800 and z900"
+
+config TUNE_Z990
+ bool "IBM zSeries model z890 and z990"
+
+config TUNE_Z9_109
+ bool "IBM System z9"
+
+config TUNE_Z10
+ bool "IBM System z10"
+
+config TUNE_Z196
+ bool "IBM zEnterprise 114 and 196"
+
+config TUNE_ZEC12
+ bool "IBM zBC12 and zEC12"
+
+config TUNE_Z13
+ bool "IBM z13"
+
+config TUNE_Z14
+ bool "IBM z14"
+
+endchoice
+
+config 64BIT
+ def_bool y
+
+config COMPAT
+ def_bool y
+ prompt "Kernel support for 31 bit emulation"
+ select COMPAT_BINFMT_ELF if BINFMT_ELF
+ select ARCH_WANT_OLD_COMPAT_IPC
+ select COMPAT_OLD_SIGACTION
+ depends on MULTIUSER
+ help
+ Select this option if you want to enable your system kernel to
+ handle system-calls from ELF binaries for 31 bit ESA. This option
+ (and some other stuff like libraries and such) is needed for
+ executing 31 bit applications. It is safe to say "Y".
+
+config SYSVIPC_COMPAT
+ def_bool y if COMPAT && SYSVIPC
+
+config SMP
+ def_bool y
+ prompt "Symmetric multi-processing support"
+ ---help---
+ This enables support for systems with more than one CPU. If you have
+ a system with only one CPU, like most personal computers, say N. If
+ you have a system with more than one CPU, say Y.
+
+ If you say N here, the kernel will run on uni- and multiprocessor
+ machines, but will use only one CPU of a multiprocessor machine. If
+ you say Y here, the kernel will run on many, but not all,
+ uniprocessor machines. On a uniprocessor machine, the kernel
+ will run faster if you say N here.
+
+ See also the SMP-HOWTO available at
+ <http://www.tldp.org/docs.html#howto>.
+
+ Even if you don't know what to do here, say Y.
+
+config NR_CPUS
+ int "Maximum number of CPUs (2-512)"
+ range 2 512
+ depends on SMP
+ default "64"
+ help
+ This allows you to specify the maximum number of CPUs which this
+ kernel will support. The maximum supported value is 512 and the
+ minimum value which makes sense is 2.
+
+ This is purely to save memory - each supported CPU adds
+ approximately sixteen kilobytes to the kernel image.
+
+config HOTPLUG_CPU
+ def_bool y
+ prompt "Support for hot-pluggable CPUs"
+ depends on SMP
+ help
+ Say Y here to be able to turn CPUs off and on. CPUs
+ can be controlled through /sys/devices/system/cpu/cpu#.
+ Say N if you want to disable CPU hotplug.
+
+# Some NUMA nodes have memory ranges that span
+# other nodes. Even though a pfn is valid and
+# between a node's start and end pfns, it may not
+# reside on that node. See memmap_init_zone()
+# for details. <- They meant memory holes!
+config NODES_SPAN_OTHER_NODES
+ def_bool NUMA
+
+config NUMA
+ bool "NUMA support"
+ depends on SMP && SCHED_TOPOLOGY
+ default n
+ help
+ Enable NUMA support
+
+ This option adds NUMA support to the kernel.
+
+ An operation mode can be selected by appending
+ numa=<method> to the kernel command line.
+
+ The default behaviour is identical to appending numa=plain to
+ the command line. This will create just one node with all
+ available memory and all CPUs in it.
+
+config NODES_SHIFT
+ int "Maximum NUMA nodes (as a power of 2)"
+ range 1 10
+ depends on NUMA
+ default "4"
+ help
+ Specify the maximum number of NUMA nodes available on the target
+ system. Increases memory reserved to accommodate various tables.
+
+menu "Select NUMA modes"
+ depends on NUMA
+
+config NUMA_EMU
+ bool "NUMA emulation"
+ default y
+ help
+ Numa emulation mode will split the available system memory into
+ equal chunks which then are distributed over the configured number
+ of nodes in a round-robin manner.
+
+ The number of fake nodes is limited by the number of available memory
+ chunks (i.e. memory size / fake size) and the number of supported
+ nodes in the kernel.
+
+ The CPUs are assigned to the nodes in a way that partially respects
+ the original machine topology (if supported by the machine).
+ Fair distribution of the CPUs is not guaranteed.
+
+config EMU_SIZE
+ hex "NUMA emulation memory chunk size"
+ default 0x10000000
+ range 0x400000 0x100000000
+ depends on NUMA_EMU
+ help
+ Select the default size by which the memory is chopped and then
+ assigned to emulated NUMA nodes.
+
+ This can be overridden by specifying
+
+ emu_size=<n>
+
+ on the kernel command line where also suffixes K, M, G, and T are
+ supported.
+
+endmenu
+
+config SCHED_SMT
+ def_bool n
+
+config SCHED_MC
+ def_bool n
+
+config SCHED_BOOK
+ def_bool n
+
+config SCHED_DRAWER
+ def_bool n
+
+config SCHED_TOPOLOGY
+ def_bool y
+ prompt "Topology scheduler support"
+ depends on SMP
+ select SCHED_SMT
+ select SCHED_MC
+ select SCHED_BOOK
+ select SCHED_DRAWER
+ help
+ Topology scheduler support improves the CPU scheduler's decision
+ making when dealing with machines that have multi-threading,
+ multiple cores or multiple books.
+
+source kernel/Kconfig.hz
+
+config KEXEC
+ def_bool y
+ select KEXEC_CORE
+
+config KEXEC_FILE
+ bool "kexec file based system call"
+ select KEXEC_CORE
+ select BUILD_BIN2C
+ depends on CRYPTO
+ depends on CRYPTO_SHA256
+ depends on CRYPTO_SHA256_S390
+ help
+ Enable the kexec file based system call. In contrast to the normal
+ kexec system call this system call takes file descriptors for the
+ kernel and initramfs as arguments.
+
+config ARCH_HAS_KEXEC_PURGATORY
+ def_bool y
+ depends on KEXEC_FILE
+
+config ARCH_RANDOM
+ def_bool y
+ prompt "s390 architectural random number generation API"
+ help
+ Enable the s390 architectural random number generation API
+ to provide random data for all consumers within the Linux
+ kernel.
+
+ When enabled the arch_random_* functions declared in linux/random.h
+ are implemented. The implementation is based on the s390 CPACF
+ instruction subfunction TRNG which provides a real true random
+ number generator.
+
+ If unsure, say Y.
+
+config KERNEL_NOBP
+ def_bool n
+ prompt "Enable modified branch prediction for the kernel by default"
+ help
+ If this option is selected the kernel will switch to a modified
+ branch prediction mode if the firmware interface is available.
+ The modified branch prediction mode improves the behaviour in
+ regard to speculative execution.
+
+ With the option enabled the kernel parameter "nobp=0" or "nospec"
+ can be used to run the kernel in the normal branch prediction mode.
+
+ With the option disabled the modified branch prediction mode is
+ enabled with the "nobp=1" kernel parameter.
+
+ If unsure, say N.
+
+config EXPOLINE
+ def_bool n
+ prompt "Avoid speculative indirect branches in the kernel"
+ help
+ Compile the kernel with the expoline compiler options to guard
+ against kernel-to-user data leaks by avoiding speculative indirect
+ branches.
+ Requires a compiler with -mindirect-branch=thunk support for full
+ protection. The kernel may run slower.
+
+ If unsure, say N.
+
+choice
+ prompt "Expoline default"
+ depends on EXPOLINE
+ default EXPOLINE_FULL
+
+config EXPOLINE_OFF
+ bool "spectre_v2=off"
+
+config EXPOLINE_AUTO
+ bool "spectre_v2=auto"
+
+config EXPOLINE_FULL
+ bool "spectre_v2=on"
+
+endchoice
+
+endmenu
+
+menu "Memory setup"
+
+config ARCH_SPARSEMEM_ENABLE
+ def_bool y
+ select SPARSEMEM_VMEMMAP_ENABLE
+ select SPARSEMEM_VMEMMAP
+
+config ARCH_SPARSEMEM_DEFAULT
+ def_bool y
+
+config ARCH_SELECT_MEMORY_MODEL
+ def_bool y
+
+config ARCH_ENABLE_MEMORY_HOTPLUG
+ def_bool y if SPARSEMEM
+
+config ARCH_ENABLE_MEMORY_HOTREMOVE
+ def_bool y
+
+config ARCH_ENABLE_SPLIT_PMD_PTLOCK
+ def_bool y
+
+config FORCE_MAX_ZONEORDER
+ int
+ default "9"
+
+config MAX_PHYSMEM_BITS
+ int "Maximum size of supported physical memory in bits (42-53)"
+ range 42 53
+ default "46"
+ help
+ This option specifies the maximum supported size of physical memory
+ in bits. Supported is any size between 2^42 (4TB) and 2^53 (8PB).
+ Increasing the number of bits also increases the kernel image size.
+ By default 46 bits (64TB) are supported.
+
+config PACK_STACK
+ def_bool y
+ prompt "Pack kernel stack"
+ help
+ This option enables the compiler option -mkernel-backchain if it
+ is available. If the option is available the compiler supports
+ the new stack layout which dramatically reduces the minimum stack
+ frame size. With an old compiler a non-leaf function needs a
+ minimum of 96 bytes on 31 bit and 160 bytes on 64 bit. With
+ -mkernel-backchain the minimum size drops to 16 byte on 31 bit
+ and 24 byte on 64 bit.
+
+ Say Y if you are unsure.
+
+config CHECK_STACK
+ def_bool y
+ prompt "Detect kernel stack overflow"
+ help
+ This option enables the compiler option -mstack-guard and
+ -mstack-size if they are available. If the compiler supports them
+ it will emit additional code to each function prolog to trigger
+ an illegal operation if the kernel stack is about to overflow.
+
+ Say N if you are unsure.
+
+config STACK_GUARD
+ int "Size of the guard area (128-1024)"
+ range 128 1024
+ depends on CHECK_STACK
+ default "256"
+ help
+ This allows you to specify the size of the guard area at the lower
+ end of the kernel stack. If the kernel stack points into the guard
+ area on function entry an illegal operation is triggered. The size
+ needs to be a power of 2. Please keep in mind that the size of an
+ interrupt frame is 184 bytes for 31 bit and 328 bytes on 64 bit.
+ The minimum size for the stack guard should be 256 for 31 bit and
+ 512 for 64 bit.
+
+config WARN_DYNAMIC_STACK
+ def_bool n
+ prompt "Emit compiler warnings for function with dynamic stack usage"
+ help
+ This option enables the compiler option -mwarn-dynamicstack. If the
+ compiler supports this options generates warnings for functions
+ that dynamically allocate stack space using alloca.
+
+ Say N if you are unsure.
+
+endmenu
+
+menu "I/O subsystem"
+
+config QDIO
+ def_tristate y
+ prompt "QDIO support"
+ ---help---
+ This driver provides the Queued Direct I/O base support for
+ IBM System z.
+
+ To compile this driver as a module, choose M here: the
+ module will be called qdio.
+
+ If unsure, say Y.
+
+menuconfig PCI
+ bool "PCI support"
+ select PCI_MSI
+ select IOMMU_HELPER
+ select IOMMU_SUPPORT
+ select NEED_DMA_MAP_STATE
+ select NEED_SG_DMA_LENGTH
+
+ help
+ Enable PCI support.
+
+if PCI
+
+config PCI_NR_FUNCTIONS
+ int "Maximum number of PCI functions (1-4096)"
+ range 1 4096
+ default "128"
+ help
+ This allows you to specify the maximum number of PCI functions which
+ this kernel will support.
+
+source "drivers/pci/Kconfig"
+
+endif # PCI
+
+config PCI_DOMAINS
+ def_bool PCI
+
+config HAS_IOMEM
+ def_bool PCI
+
+config CHSC_SCH
+ def_tristate m
+ prompt "Support for CHSC subchannels"
+ help
+ This driver allows usage of CHSC subchannels. A CHSC subchannel
+ is usually present on LPAR only.
+ The driver creates a device /dev/chsc, which may be used to
+ obtain I/O configuration information about the machine and
+ to issue asynchronous chsc commands (DANGEROUS).
+ You will usually only want to use this interface on a special
+ LPAR designated for system management.
+
+ To compile this driver as a module, choose M here: the
+ module will be called chsc_sch.
+
+ If unsure, say N.
+
+config SCM_BUS
+ def_bool y
+ prompt "SCM bus driver"
+ help
+ Bus driver for Storage Class Memory.
+
+config EADM_SCH
+ def_tristate m
+ prompt "Support for EADM subchannels"
+ depends on SCM_BUS
+ help
+ This driver allows usage of EADM subchannels. EADM subchannels act
+ as a communication vehicle for SCM increments.
+
+ To compile this driver as a module, choose M here: the
+ module will be called eadm_sch.
+
+config VFIO_CCW
+ def_tristate n
+ prompt "Support for VFIO-CCW subchannels"
+ depends on S390_CCW_IOMMU && VFIO_MDEV
+ help
+ This driver allows usage of I/O subchannels via VFIO-CCW.
+
+ To compile this driver as a module, choose M here: the
+ module will be called vfio_ccw.
+
+endmenu
+
+menu "Dump support"
+
+config CRASH_DUMP
+ bool "kernel crash dumps"
+ depends on SMP
+ select KEXEC
+ help
+ Generate crash dump after being started by kexec.
+ Crash dump kernels are loaded in the main kernel with kexec-tools
+ into a specially reserved region and then later executed after
+ a crash by kdump/kexec.
+ Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this.
+ This option also enables s390 zfcpdump.
+ See also <file:Documentation/s390/zfcpdump.txt>
+
+endmenu
+
+config SECCOMP
+ def_bool y
+ prompt "Enable seccomp to safely compute untrusted bytecode"
+ depends on PROC_FS
+ help
+ This kernel feature is useful for number crunching applications
+ that may need to compute untrusted bytecode during their
+ execution. By using pipes or other transports made available to
+ the process as file descriptors supporting the read/write
+ syscalls, it's possible to isolate those applications in
+ their own address space using seccomp. Once seccomp is
+ enabled via /proc/<pid>/seccomp, it cannot be disabled
+ and the task is only allowed to execute a few safe syscalls
+ defined by each seccomp mode.
+
+ If unsure, say Y.
+
+menu "Power Management"
+
+config ARCH_HIBERNATION_POSSIBLE
+ def_bool y
+
+source "kernel/power/Kconfig"
+
+endmenu
+
+config PCMCIA
+ def_bool n
+
+config CCW
+ def_bool y
+
+config HAVE_PNETID
+ tristate
+ default (SMC || CCWGROUP)
+
+menu "Virtualization"
+
+config PFAULT
+ def_bool y
+ prompt "Pseudo page fault support"
+ help
+ Select this option, if you want to use PFAULT pseudo page fault
+ handling under VM. If running native or in LPAR, this option
+ has no effect. If your VM does not support PFAULT, PAGEEX
+ pseudo page fault handling will be used.
+ Note that VM 4.2 supports PFAULT but has a bug in its
+ implementation that causes some problems.
+ Everybody who wants to run Linux under VM != VM4.2 should select
+ this option.
+
+config CMM
+ def_tristate n
+ prompt "Cooperative memory management"
+ help
+ Select this option, if you want to enable the kernel interface
+ to reduce the memory size of the system. This is accomplished
+ by allocating pages of memory and put them "on hold". This only
+ makes sense for a system running under VM where the unused pages
+ will be reused by VM for other guest systems. The interface
+ allows an external monitor to balance memory of many systems.
+ Everybody who wants to run Linux under VM should select this
+ option.
+
+config CMM_IUCV
+ def_bool y
+ prompt "IUCV special message interface to cooperative memory management"
+ depends on CMM && (SMSGIUCV=y || CMM=SMSGIUCV)
+ help
+ Select this option to enable the special message interface to
+ the cooperative memory management.
+
+config APPLDATA_BASE
+ def_bool n
+ prompt "Linux - VM Monitor Stream, base infrastructure"
+ depends on PROC_SYSCTL
+ help
+ This provides a kernel interface for creating and updating z/VM APPLDATA
+ monitor records. The monitor records are updated at certain time
+ intervals, once the timer is started.
+ Writing 1 or 0 to /proc/appldata/timer starts(1) or stops(0) the timer,
+ i.e. enables or disables monitoring on the Linux side.
+ A custom interval value (in seconds) can be written to
+ /proc/appldata/interval.
+
+ Defaults are 60 seconds interval and timer off.
+ The /proc entries can also be read from, showing the current settings.
+
+config APPLDATA_MEM
+ def_tristate m
+ prompt "Monitor memory management statistics"
+ depends on APPLDATA_BASE && VM_EVENT_COUNTERS
+ help
+ This provides memory management related data to the Linux - VM Monitor
+ Stream, like paging/swapping rate, memory utilisation, etc.
+ Writing 1 or 0 to /proc/appldata/memory creates(1) or removes(0) a z/VM
+ APPLDATA monitor record, i.e. enables or disables monitoring this record
+ on the z/VM side.
+
+ Default is disabled.
+ The /proc entry can also be read from, showing the current settings.
+
+ This can also be compiled as a module, which will be called
+ appldata_mem.o.
+
+config APPLDATA_OS
+ def_tristate m
+ prompt "Monitor OS statistics"
+ depends on APPLDATA_BASE
+ help
+ This provides OS related data to the Linux - VM Monitor Stream, like
+ CPU utilisation, etc.
+ Writing 1 or 0 to /proc/appldata/os creates(1) or removes(0) a z/VM
+ APPLDATA monitor record, i.e. enables or disables monitoring this record
+ on the z/VM side.
+
+ Default is disabled.
+ This can also be compiled as a module, which will be called
+ appldata_os.o.
+
+config APPLDATA_NET_SUM
+ def_tristate m
+ prompt "Monitor overall network statistics"
+ depends on APPLDATA_BASE && NET
+ help
+ This provides network related data to the Linux - VM Monitor Stream,
+ currently there is only a total sum of network I/O statistics, no
+ per-interface data.
+ Writing 1 or 0 to /proc/appldata/net_sum creates(1) or removes(0) a z/VM
+ APPLDATA monitor record, i.e. enables or disables monitoring this record
+ on the z/VM side.
+
+ Default is disabled.
+ This can also be compiled as a module, which will be called
+ appldata_net_sum.o.
+
+config S390_HYPFS_FS
+ def_bool y
+ prompt "s390 hypervisor file system support"
+ select SYS_HYPERVISOR
+ help
+ This is a virtual file system intended to provide accounting
+ information in an s390 hypervisor environment.
+
+source "arch/s390/kvm/Kconfig"
+
+config S390_GUEST
+ def_bool y
+ prompt "s390 support for virtio devices"
+ select TTY
+ select VIRTUALIZATION
+ select VIRTIO
+ select VIRTIO_CONSOLE
+ help
+ Enabling this option adds support for virtio based paravirtual device
+ drivers on s390.
+
+ Select this option if you want to run the kernel as a guest under
+ the KVM hypervisor.
+
+endmenu
diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug
new file mode 100644
index 000000000..190527560
--- /dev/null
+++ b/arch/s390/Kconfig.debug
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+
+config TRACE_IRQFLAGS_SUPPORT
+ def_bool y
+
+config S390_PTDUMP
+ bool "Export kernel pagetable layout to userspace via debugfs"
+ depends on DEBUG_KERNEL
+ select DEBUG_FS
+ ---help---
+ Say Y here if you want to show the kernel pagetable layout in a
+ debugfs file. This information is only useful for kernel developers
+ who are working in architecture specific areas of the kernel.
+ It is probably not a good idea to enable this feature in a production
+ kernel.
+ If in doubt, say "N"
+
+config EARLY_PRINTK
+ def_bool y
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
new file mode 100644
index 000000000..4d0082f3d
--- /dev/null
+++ b/arch/s390/Makefile
@@ -0,0 +1,184 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# s390/Makefile
+#
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies. Remember to do have actions
+# for "archclean" and "archdep" for cleaning up and making dependencies for
+# this architecture
+#
+# Copyright (C) 1994 by Linus Torvalds
+#
+
+LD_BFD := elf64-s390
+KBUILD_LDFLAGS := -m elf64_s390
+KBUILD_AFLAGS_MODULE += -fPIC
+KBUILD_CFLAGS_MODULE += -fPIC
+KBUILD_AFLAGS += -m64
+KBUILD_CFLAGS += -m64
+aflags_dwarf := -Wa,-gdwarf-2
+KBUILD_AFLAGS_DECOMPRESSOR := -m64 -D__ASSEMBLY__
+KBUILD_AFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),$(aflags_dwarf))
+KBUILD_CFLAGS_DECOMPRESSOR := -m64 -O2
+KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY
+KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float
+KBUILD_CFLAGS_DECOMPRESSOR += -fno-asynchronous-unwind-tables
+KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-option,-ffreestanding)
+KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-disable-warning, address-of-packed-member)
+KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),-g)
+KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO_DWARF4), $(call cc-option, -gdwarf-4,))
+
+ifdef CONFIG_CC_IS_GCC
+ ifeq ($(call cc-ifversion, -ge, 1200, y), y)
+ ifeq ($(call cc-ifversion, -lt, 1300, y), y)
+ KBUILD_CFLAGS += $(call cc-disable-warning, array-bounds)
+ KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-disable-warning, array-bounds)
+ endif
+ endif
+endif
+
+UTS_MACHINE := s390x
+STACK_SIZE := 16384
+CHECKFLAGS += -D__s390__ -D__s390x__
+
+export LD_BFD
+
+mflags-$(CONFIG_MARCH_Z900) := -march=z900
+mflags-$(CONFIG_MARCH_Z990) := -march=z990
+mflags-$(CONFIG_MARCH_Z9_109) := -march=z9-109
+mflags-$(CONFIG_MARCH_Z10) := -march=z10
+mflags-$(CONFIG_MARCH_Z196) := -march=z196
+mflags-$(CONFIG_MARCH_ZEC12) := -march=zEC12
+mflags-$(CONFIG_MARCH_Z13) := -march=z13
+mflags-$(CONFIG_MARCH_Z14) := -march=z14
+
+export CC_FLAGS_MARCH := $(mflags-y)
+
+aflags-y += $(mflags-y)
+cflags-y += $(mflags-y)
+
+cflags-$(CONFIG_MARCH_Z900_TUNE) += -mtune=z900
+cflags-$(CONFIG_MARCH_Z990_TUNE) += -mtune=z990
+cflags-$(CONFIG_MARCH_Z9_109_TUNE) += -mtune=z9-109
+cflags-$(CONFIG_MARCH_Z10_TUNE) += -mtune=z10
+cflags-$(CONFIG_MARCH_Z196_TUNE) += -mtune=z196
+cflags-$(CONFIG_MARCH_ZEC12_TUNE) += -mtune=zEC12
+cflags-$(CONFIG_MARCH_Z13_TUNE) += -mtune=z13
+cflags-$(CONFIG_MARCH_Z14_TUNE) += -mtune=z14
+
+cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include
+
+#
+# Prevent tail-call optimizations, to get clearer backtraces:
+#
+cflags-$(CONFIG_FRAME_POINTER) += -fno-optimize-sibling-calls
+
+ifeq ($(call cc-option-yn,-mpacked-stack -mbackchain -msoft-float),y)
+cflags-$(CONFIG_PACK_STACK) += -mpacked-stack -D__PACK_STACK
+aflags-$(CONFIG_PACK_STACK) += -D__PACK_STACK
+endif
+
+KBUILD_AFLAGS_DECOMPRESSOR += $(aflags-y)
+KBUILD_CFLAGS_DECOMPRESSOR += $(cflags-y)
+
+ifeq ($(call cc-option-yn,-mstack-size=8192 -mstack-guard=128),y)
+cflags-$(CONFIG_CHECK_STACK) += -mstack-size=$(STACK_SIZE)
+ifneq ($(call cc-option-yn,-mstack-size=8192),y)
+cflags-$(CONFIG_CHECK_STACK) += -mstack-guard=$(CONFIG_STACK_GUARD)
+endif
+endif
+
+ifdef CONFIG_WARN_DYNAMIC_STACK
+ ifeq ($(call cc-option-yn,-mwarn-dynamicstack),y)
+ KBUILD_CFLAGS += -mwarn-dynamicstack
+ KBUILD_CFLAGS_DECOMPRESSOR += -mwarn-dynamicstack
+ endif
+endif
+
+ifdef CONFIG_EXPOLINE
+ ifeq ($(call cc-option-yn,$(CC_FLAGS_MARCH) -mindirect-branch=thunk),y)
+ CC_FLAGS_EXPOLINE := -mindirect-branch=thunk
+ CC_FLAGS_EXPOLINE += -mfunction-return=thunk
+ CC_FLAGS_EXPOLINE += -mindirect-branch-table
+ export CC_FLAGS_EXPOLINE
+ cflags-y += $(CC_FLAGS_EXPOLINE) -DCC_USING_EXPOLINE
+ aflags-y += -DCC_USING_EXPOLINE
+ endif
+endif
+
+ifdef CONFIG_FUNCTION_TRACER
+ ifeq ($(call cc-option-yn,-mfentry -mnop-mcount),n)
+ # make use of hotpatch feature if the compiler supports it
+ cc_hotpatch := -mhotpatch=0,3
+ ifeq ($(call cc-option-yn,$(cc_hotpatch)),y)
+ CC_FLAGS_FTRACE := $(cc_hotpatch)
+ KBUILD_AFLAGS += -DCC_USING_HOTPATCH
+ KBUILD_CFLAGS += -DCC_USING_HOTPATCH
+ endif
+ endif
+endif
+
+# Test CFI features of binutils
+cfi := $(call as-instr,.cfi_startproc\n.cfi_val_offset 15$(comma)-160\n.cfi_endproc,-DCONFIG_AS_CFI_VAL_OFFSET=1)
+
+KBUILD_CFLAGS += -mbackchain -msoft-float $(cflags-y)
+KBUILD_CFLAGS += -pipe -fno-strength-reduce -Wno-sign-compare
+KBUILD_CFLAGS += -fno-asynchronous-unwind-tables $(cfi)
+KBUILD_AFLAGS += $(aflags-y) $(cfi)
+export KBUILD_AFLAGS_DECOMPRESSOR
+export KBUILD_CFLAGS_DECOMPRESSOR
+
+OBJCOPYFLAGS := -O binary
+
+head-y := arch/s390/kernel/head64.o
+
+# See arch/s390/Kbuild for content of core part of the kernel
+core-y += arch/s390/
+
+libs-y += arch/s390/lib/
+drivers-y += drivers/s390/
+
+# must be linked after kernel
+drivers-$(CONFIG_OPROFILE) += arch/s390/oprofile/
+
+boot := arch/s390/boot
+syscalls := arch/s390/kernel/syscalls
+tools := arch/s390/tools
+
+all: bzImage
+
+#KBUILD_IMAGE is necessary for packaging targets like rpm-pkg, deb-pkg...
+KBUILD_IMAGE := $(boot)/bzImage
+
+install:
+ $(Q)$(MAKE) $(build)=$(boot) $@
+
+bzImage: vmlinux
+ $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+
+zfcpdump:
+ $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+
+vdso_install:
+ $(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso64 $@
+ $(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso32 $@
+
+archclean:
+ $(Q)$(MAKE) $(clean)=$(boot)
+ $(Q)$(MAKE) $(clean)=$(tools)
+
+archheaders:
+ $(Q)$(MAKE) $(build)=$(syscalls) uapi
+
+archprepare:
+ $(Q)$(MAKE) $(build)=$(syscalls) kapi
+ $(Q)$(MAKE) $(build)=$(tools) kapi
+
+# Don't use tabs in echo arguments
+define archhelp
+ echo '* bzImage - Kernel image for IPL ($(boot)/bzImage)'
+ echo ' install - Install kernel using'
+ echo ' (your) ~/bin/$(INSTALLKERNEL) or'
+ echo ' (distribution) /sbin/$(INSTALLKERNEL) or'
+ echo ' install to $$(INSTALL_PATH)'
+endef
diff --git a/arch/s390/appldata/Makefile b/arch/s390/appldata/Makefile
new file mode 100644
index 000000000..b06def4a4
--- /dev/null
+++ b/arch/s390/appldata/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the Linux - z/VM Monitor Stream.
+#
+
+obj-$(CONFIG_APPLDATA_BASE) += appldata_base.o
+obj-$(CONFIG_APPLDATA_MEM) += appldata_mem.o
+obj-$(CONFIG_APPLDATA_OS) += appldata_os.o
+obj-$(CONFIG_APPLDATA_NET_SUM) += appldata_net_sum.o
diff --git a/arch/s390/appldata/appldata.h b/arch/s390/appldata/appldata.h
new file mode 100644
index 000000000..10346d2f3
--- /dev/null
+++ b/arch/s390/appldata/appldata.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Definitions and interface for Linux - z/VM Monitor Stream.
+ *
+ * Copyright IBM Corp. 2003, 2008
+ *
+ * Author: Gerald Schaefer <gerald.schaefer@de.ibm.com>
+ */
+
+#define APPLDATA_MAX_REC_SIZE 4024 /* Maximum size of the */
+ /* data buffer */
+#define APPLDATA_MAX_PROCS 100
+
+#define APPLDATA_PROC_NAME_LENGTH 16 /* Max. length of /proc name */
+
+#define APPLDATA_RECORD_MEM_ID 0x01 /* IDs to identify the */
+#define APPLDATA_RECORD_OS_ID 0x02 /* individual records, */
+#define APPLDATA_RECORD_NET_SUM_ID 0x03 /* must be < 256 ! */
+#define APPLDATA_RECORD_PROC_ID 0x04
+
+#define CTL_APPLDATA_TIMER 2121 /* sysctl IDs, must be unique */
+#define CTL_APPLDATA_INTERVAL 2122
+#define CTL_APPLDATA_MEM 2123
+#define CTL_APPLDATA_OS 2124
+#define CTL_APPLDATA_NET_SUM 2125
+#define CTL_APPLDATA_PROC 2126
+
+struct appldata_ops {
+ struct list_head list;
+ struct ctl_table_header *sysctl_header;
+ struct ctl_table *ctl_table;
+ int active; /* monitoring status */
+
+ /* fill in from here */
+ char name[APPLDATA_PROC_NAME_LENGTH]; /* name of /proc fs node */
+ unsigned char record_nr; /* Record Nr. for Product ID */
+ void (*callback)(void *data); /* callback function */
+ void *data; /* record data */
+ unsigned int size; /* size of record */
+ struct module *owner; /* THIS_MODULE */
+ char mod_lvl[2]; /* modification level, EBCDIC */
+};
+
+extern int appldata_register_ops(struct appldata_ops *ops);
+extern void appldata_unregister_ops(struct appldata_ops *ops);
+extern int appldata_diag(char record_nr, u16 function, unsigned long buffer,
+ u16 length, char *mod_lvl);
+
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
new file mode 100644
index 000000000..9bf8489df
--- /dev/null
+++ b/arch/s390/appldata/appldata_base.c
@@ -0,0 +1,544 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Base infrastructure for Linux-z/VM Monitor Stream, Stage 1.
+ * Exports appldata_register_ops() and appldata_unregister_ops() for the
+ * data gathering modules.
+ *
+ * Copyright IBM Corp. 2003, 2009
+ *
+ * Author: Gerald Schaefer <gerald.schaefer@de.ibm.com>
+ */
+
+#define KMSG_COMPONENT "appldata"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/sched/stat.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/proc_fs.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/pagemap.h>
+#include <linux/sysctl.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/workqueue.h>
+#include <linux/suspend.h>
+#include <linux/platform_device.h>
+#include <asm/appldata.h>
+#include <asm/vtimer.h>
+#include <linux/uaccess.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+
+#include "appldata.h"
+
+
+#define APPLDATA_CPU_INTERVAL 10000 /* default (CPU) time for
+ sampling interval in
+ milliseconds */
+
+#define TOD_MICRO 0x01000 /* nr. of TOD clock units
+ for 1 microsecond */
+
+static struct platform_device *appldata_pdev;
+
+/*
+ * /proc entries (sysctl)
+ */
+static const char appldata_proc_name[APPLDATA_PROC_NAME_LENGTH] = "appldata";
+static int appldata_timer_handler(struct ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos);
+static int appldata_interval_handler(struct ctl_table *ctl, int write,
+ void __user *buffer,
+ size_t *lenp, loff_t *ppos);
+
+static struct ctl_table_header *appldata_sysctl_header;
+static struct ctl_table appldata_table[] = {
+ {
+ .procname = "timer",
+ .mode = S_IRUGO | S_IWUSR,
+ .proc_handler = appldata_timer_handler,
+ },
+ {
+ .procname = "interval",
+ .mode = S_IRUGO | S_IWUSR,
+ .proc_handler = appldata_interval_handler,
+ },
+ { },
+};
+
+static struct ctl_table appldata_dir_table[] = {
+ {
+ .procname = appldata_proc_name,
+ .maxlen = 0,
+ .mode = S_IRUGO | S_IXUGO,
+ .child = appldata_table,
+ },
+ { },
+};
+
+/*
+ * Timer
+ */
+static struct vtimer_list appldata_timer;
+
+static DEFINE_SPINLOCK(appldata_timer_lock);
+static int appldata_interval = APPLDATA_CPU_INTERVAL;
+static int appldata_timer_active;
+static int appldata_timer_suspended = 0;
+
+/*
+ * Work queue
+ */
+static struct workqueue_struct *appldata_wq;
+static void appldata_work_fn(struct work_struct *work);
+static DECLARE_WORK(appldata_work, appldata_work_fn);
+
+
+/*
+ * Ops list
+ */
+static DEFINE_MUTEX(appldata_ops_mutex);
+static LIST_HEAD(appldata_ops_list);
+
+
+/*************************** timer, work, DIAG *******************************/
+/*
+ * appldata_timer_function()
+ *
+ * schedule work and reschedule timer
+ */
+static void appldata_timer_function(unsigned long data)
+{
+ queue_work(appldata_wq, (struct work_struct *) data);
+}
+
+/*
+ * appldata_work_fn()
+ *
+ * call data gathering function for each (active) module
+ */
+static void appldata_work_fn(struct work_struct *work)
+{
+ struct list_head *lh;
+ struct appldata_ops *ops;
+
+ mutex_lock(&appldata_ops_mutex);
+ list_for_each(lh, &appldata_ops_list) {
+ ops = list_entry(lh, struct appldata_ops, list);
+ if (ops->active == 1) {
+ ops->callback(ops->data);
+ }
+ }
+ mutex_unlock(&appldata_ops_mutex);
+}
+
+/*
+ * appldata_diag()
+ *
+ * prepare parameter list, issue DIAG 0xDC
+ */
+int appldata_diag(char record_nr, u16 function, unsigned long buffer,
+ u16 length, char *mod_lvl)
+{
+ struct appldata_product_id id = {
+ .prod_nr = {0xD3, 0xC9, 0xD5, 0xE4,
+ 0xE7, 0xD2, 0xD9}, /* "LINUXKR" */
+ .prod_fn = 0xD5D3, /* "NL" */
+ .version_nr = 0xF2F6, /* "26" */
+ .release_nr = 0xF0F1, /* "01" */
+ };
+
+ id.record_nr = record_nr;
+ id.mod_lvl = (mod_lvl[0]) << 8 | mod_lvl[1];
+ return appldata_asm(&id, function, (void *) buffer, length);
+}
+/************************ timer, work, DIAG <END> ****************************/
+
+
+/****************************** /proc stuff **********************************/
+
+#define APPLDATA_ADD_TIMER 0
+#define APPLDATA_DEL_TIMER 1
+#define APPLDATA_MOD_TIMER 2
+
+/*
+ * __appldata_vtimer_setup()
+ *
+ * Add, delete or modify virtual timers on all online cpus.
+ * The caller needs to get the appldata_timer_lock spinlock.
+ */
+static void __appldata_vtimer_setup(int cmd)
+{
+ u64 timer_interval = (u64) appldata_interval * 1000 * TOD_MICRO;
+
+ switch (cmd) {
+ case APPLDATA_ADD_TIMER:
+ if (appldata_timer_active)
+ break;
+ appldata_timer.expires = timer_interval;
+ add_virt_timer_periodic(&appldata_timer);
+ appldata_timer_active = 1;
+ break;
+ case APPLDATA_DEL_TIMER:
+ del_virt_timer(&appldata_timer);
+ if (!appldata_timer_active)
+ break;
+ appldata_timer_active = 0;
+ break;
+ case APPLDATA_MOD_TIMER:
+ if (!appldata_timer_active)
+ break;
+ mod_virt_timer_periodic(&appldata_timer, timer_interval);
+ }
+}
+
+/*
+ * appldata_timer_handler()
+ *
+ * Start/Stop timer, show status of timer (0 = not active, 1 = active)
+ */
+static int
+appldata_timer_handler(struct ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int timer_active = appldata_timer_active;
+ int zero = 0;
+ int one = 1;
+ int rc;
+ struct ctl_table ctl_entry = {
+ .procname = ctl->procname,
+ .data = &timer_active,
+ .maxlen = sizeof(int),
+ .extra1 = &zero,
+ .extra2 = &one,
+ };
+
+ rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos);
+ if (rc < 0 || !write)
+ return rc;
+
+ spin_lock(&appldata_timer_lock);
+ if (timer_active)
+ __appldata_vtimer_setup(APPLDATA_ADD_TIMER);
+ else
+ __appldata_vtimer_setup(APPLDATA_DEL_TIMER);
+ spin_unlock(&appldata_timer_lock);
+ return 0;
+}
+
+/*
+ * appldata_interval_handler()
+ *
+ * Set (CPU) timer interval for collection of data (in milliseconds), show
+ * current timer interval.
+ */
+static int
+appldata_interval_handler(struct ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int interval = appldata_interval;
+ int one = 1;
+ int rc;
+ struct ctl_table ctl_entry = {
+ .procname = ctl->procname,
+ .data = &interval,
+ .maxlen = sizeof(int),
+ .extra1 = &one,
+ };
+
+ rc = proc_dointvec_minmax(&ctl_entry, write, buffer, lenp, ppos);
+ if (rc < 0 || !write)
+ return rc;
+
+ spin_lock(&appldata_timer_lock);
+ appldata_interval = interval;
+ __appldata_vtimer_setup(APPLDATA_MOD_TIMER);
+ spin_unlock(&appldata_timer_lock);
+ return 0;
+}
+
+/*
+ * appldata_generic_handler()
+ *
+ * Generic start/stop monitoring and DIAG, show status of
+ * monitoring (0 = not in process, 1 = in process)
+ */
+static int
+appldata_generic_handler(struct ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct appldata_ops *ops = NULL, *tmp_ops;
+ struct list_head *lh;
+ int rc, found;
+ int active;
+ int zero = 0;
+ int one = 1;
+ struct ctl_table ctl_entry = {
+ .data = &active,
+ .maxlen = sizeof(int),
+ .extra1 = &zero,
+ .extra2 = &one,
+ };
+
+ found = 0;
+ mutex_lock(&appldata_ops_mutex);
+ list_for_each(lh, &appldata_ops_list) {
+ tmp_ops = list_entry(lh, struct appldata_ops, list);
+ if (&tmp_ops->ctl_table[2] == ctl) {
+ found = 1;
+ }
+ }
+ if (!found) {
+ mutex_unlock(&appldata_ops_mutex);
+ return -ENODEV;
+ }
+ ops = ctl->data;
+ if (!try_module_get(ops->owner)) { // protect this function
+ mutex_unlock(&appldata_ops_mutex);
+ return -ENODEV;
+ }
+ mutex_unlock(&appldata_ops_mutex);
+
+ active = ops->active;
+ rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos);
+ if (rc < 0 || !write) {
+ module_put(ops->owner);
+ return rc;
+ }
+
+ mutex_lock(&appldata_ops_mutex);
+ if (active && (ops->active == 0)) {
+ // protect work queue callback
+ if (!try_module_get(ops->owner)) {
+ mutex_unlock(&appldata_ops_mutex);
+ module_put(ops->owner);
+ return -ENODEV;
+ }
+ ops->callback(ops->data); // init record
+ rc = appldata_diag(ops->record_nr,
+ APPLDATA_START_INTERVAL_REC,
+ (unsigned long) ops->data, ops->size,
+ ops->mod_lvl);
+ if (rc != 0) {
+ pr_err("Starting the data collection for %s "
+ "failed with rc=%d\n", ops->name, rc);
+ module_put(ops->owner);
+ } else
+ ops->active = 1;
+ } else if (!active && (ops->active == 1)) {
+ ops->active = 0;
+ rc = appldata_diag(ops->record_nr, APPLDATA_STOP_REC,
+ (unsigned long) ops->data, ops->size,
+ ops->mod_lvl);
+ if (rc != 0)
+ pr_err("Stopping the data collection for %s "
+ "failed with rc=%d\n", ops->name, rc);
+ module_put(ops->owner);
+ }
+ mutex_unlock(&appldata_ops_mutex);
+ module_put(ops->owner);
+ return 0;
+}
+
+/*************************** /proc stuff <END> *******************************/
+
+
+/************************* module-ops management *****************************/
+/*
+ * appldata_register_ops()
+ *
+ * update ops list, register /proc/sys entries
+ */
+int appldata_register_ops(struct appldata_ops *ops)
+{
+ if (ops->size > APPLDATA_MAX_REC_SIZE)
+ return -EINVAL;
+
+ ops->ctl_table = kcalloc(4, sizeof(struct ctl_table), GFP_KERNEL);
+ if (!ops->ctl_table)
+ return -ENOMEM;
+
+ mutex_lock(&appldata_ops_mutex);
+ list_add(&ops->list, &appldata_ops_list);
+ mutex_unlock(&appldata_ops_mutex);
+
+ ops->ctl_table[0].procname = appldata_proc_name;
+ ops->ctl_table[0].maxlen = 0;
+ ops->ctl_table[0].mode = S_IRUGO | S_IXUGO;
+ ops->ctl_table[0].child = &ops->ctl_table[2];
+
+ ops->ctl_table[2].procname = ops->name;
+ ops->ctl_table[2].mode = S_IRUGO | S_IWUSR;
+ ops->ctl_table[2].proc_handler = appldata_generic_handler;
+ ops->ctl_table[2].data = ops;
+
+ ops->sysctl_header = register_sysctl_table(ops->ctl_table);
+ if (!ops->sysctl_header)
+ goto out;
+ return 0;
+out:
+ mutex_lock(&appldata_ops_mutex);
+ list_del(&ops->list);
+ mutex_unlock(&appldata_ops_mutex);
+ kfree(ops->ctl_table);
+ return -ENOMEM;
+}
+
+/*
+ * appldata_unregister_ops()
+ *
+ * update ops list, unregister /proc entries, stop DIAG if necessary
+ */
+void appldata_unregister_ops(struct appldata_ops *ops)
+{
+ mutex_lock(&appldata_ops_mutex);
+ list_del(&ops->list);
+ mutex_unlock(&appldata_ops_mutex);
+ unregister_sysctl_table(ops->sysctl_header);
+ kfree(ops->ctl_table);
+}
+/********************** module-ops management <END> **************************/
+
+
+/**************************** suspend / resume *******************************/
+static int appldata_freeze(struct device *dev)
+{
+ struct appldata_ops *ops;
+ int rc;
+ struct list_head *lh;
+
+ spin_lock(&appldata_timer_lock);
+ if (appldata_timer_active) {
+ __appldata_vtimer_setup(APPLDATA_DEL_TIMER);
+ appldata_timer_suspended = 1;
+ }
+ spin_unlock(&appldata_timer_lock);
+
+ mutex_lock(&appldata_ops_mutex);
+ list_for_each(lh, &appldata_ops_list) {
+ ops = list_entry(lh, struct appldata_ops, list);
+ if (ops->active == 1) {
+ rc = appldata_diag(ops->record_nr, APPLDATA_STOP_REC,
+ (unsigned long) ops->data, ops->size,
+ ops->mod_lvl);
+ if (rc != 0)
+ pr_err("Stopping the data collection for %s "
+ "failed with rc=%d\n", ops->name, rc);
+ }
+ }
+ mutex_unlock(&appldata_ops_mutex);
+ return 0;
+}
+
+static int appldata_restore(struct device *dev)
+{
+ struct appldata_ops *ops;
+ int rc;
+ struct list_head *lh;
+
+ spin_lock(&appldata_timer_lock);
+ if (appldata_timer_suspended) {
+ __appldata_vtimer_setup(APPLDATA_ADD_TIMER);
+ appldata_timer_suspended = 0;
+ }
+ spin_unlock(&appldata_timer_lock);
+
+ mutex_lock(&appldata_ops_mutex);
+ list_for_each(lh, &appldata_ops_list) {
+ ops = list_entry(lh, struct appldata_ops, list);
+ if (ops->active == 1) {
+ ops->callback(ops->data); // init record
+ rc = appldata_diag(ops->record_nr,
+ APPLDATA_START_INTERVAL_REC,
+ (unsigned long) ops->data, ops->size,
+ ops->mod_lvl);
+ if (rc != 0) {
+ pr_err("Starting the data collection for %s "
+ "failed with rc=%d\n", ops->name, rc);
+ }
+ }
+ }
+ mutex_unlock(&appldata_ops_mutex);
+ return 0;
+}
+
+static int appldata_thaw(struct device *dev)
+{
+ return appldata_restore(dev);
+}
+
+static const struct dev_pm_ops appldata_pm_ops = {
+ .freeze = appldata_freeze,
+ .thaw = appldata_thaw,
+ .restore = appldata_restore,
+};
+
+static struct platform_driver appldata_pdrv = {
+ .driver = {
+ .name = "appldata",
+ .pm = &appldata_pm_ops,
+ },
+};
+/************************* suspend / resume <END> ****************************/
+
+
+/******************************* init / exit *********************************/
+
+/*
+ * appldata_init()
+ *
+ * init timer, register /proc entries
+ */
+static int __init appldata_init(void)
+{
+ int rc;
+
+ init_virt_timer(&appldata_timer);
+ appldata_timer.function = appldata_timer_function;
+ appldata_timer.data = (unsigned long) &appldata_work;
+
+ rc = platform_driver_register(&appldata_pdrv);
+ if (rc)
+ return rc;
+
+ appldata_pdev = platform_device_register_simple("appldata", -1, NULL,
+ 0);
+ if (IS_ERR(appldata_pdev)) {
+ rc = PTR_ERR(appldata_pdev);
+ goto out_driver;
+ }
+ appldata_wq = alloc_ordered_workqueue("appldata", 0);
+ if (!appldata_wq) {
+ rc = -ENOMEM;
+ goto out_device;
+ }
+
+ appldata_sysctl_header = register_sysctl_table(appldata_dir_table);
+ return 0;
+
+out_device:
+ platform_device_unregister(appldata_pdev);
+out_driver:
+ platform_driver_unregister(&appldata_pdrv);
+ return rc;
+}
+
+__initcall(appldata_init);
+
+/**************************** init / exit <END> ******************************/
+
+EXPORT_SYMBOL_GPL(appldata_register_ops);
+EXPORT_SYMBOL_GPL(appldata_unregister_ops);
+EXPORT_SYMBOL_GPL(appldata_diag);
+
+#ifdef CONFIG_SWAP
+EXPORT_SYMBOL_GPL(si_swapinfo);
+#endif
+EXPORT_SYMBOL_GPL(nr_threads);
+EXPORT_SYMBOL_GPL(nr_running);
+EXPORT_SYMBOL_GPL(nr_iowait);
diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c
new file mode 100644
index 000000000..e68136c3c
--- /dev/null
+++ b/arch/s390/appldata/appldata_mem.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Data gathering module for Linux-VM Monitor Stream, Stage 1.
+ * Collects data related to memory management.
+ *
+ * Copyright IBM Corp. 2003, 2006
+ *
+ * Author: Gerald Schaefer <gerald.schaefer@de.ibm.com>
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/kernel_stat.h>
+#include <linux/pagemap.h>
+#include <linux/swap.h>
+#include <linux/slab.h>
+#include <asm/io.h>
+
+#include "appldata.h"
+
+
+#define P2K(x) ((x) << (PAGE_SHIFT - 10)) /* Converts #Pages to KB */
+
+/*
+ * Memory data
+ *
+ * This is accessed as binary data by z/VM. If changes to it can't be avoided,
+ * the structure version (product ID, see appldata_base.c) needs to be changed
+ * as well and all documentation and z/VM applications using it must be
+ * updated.
+ *
+ * The record layout is documented in the Linux for zSeries Device Drivers
+ * book:
+ * http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml
+ */
+struct appldata_mem_data {
+ u64 timestamp;
+ u32 sync_count_1; /* after VM collected the record data, */
+ u32 sync_count_2; /* sync_count_1 and sync_count_2 should be the
+ same. If not, the record has been updated on
+ the Linux side while VM was collecting the
+ (possibly corrupt) data */
+
+ u64 pgpgin; /* data read from disk */
+ u64 pgpgout; /* data written to disk */
+ u64 pswpin; /* pages swapped in */
+ u64 pswpout; /* pages swapped out */
+
+ u64 sharedram; /* sharedram is currently set to 0 */
+
+ u64 totalram; /* total main memory size */
+ u64 freeram; /* free main memory size */
+ u64 totalhigh; /* total high memory size */
+ u64 freehigh; /* free high memory size */
+
+ u64 bufferram; /* memory reserved for buffers, free cache */
+ u64 cached; /* size of (used) cache, w/o buffers */
+ u64 totalswap; /* total swap space size */
+ u64 freeswap; /* free swap space */
+
+// New in 2.6 -->
+ u64 pgalloc; /* page allocations */
+ u64 pgfault; /* page faults (major+minor) */
+ u64 pgmajfault; /* page faults (major only) */
+// <-- New in 2.6
+
+} __packed;
+
+
+/*
+ * appldata_get_mem_data()
+ *
+ * gather memory data
+ */
+static void appldata_get_mem_data(void *data)
+{
+ /*
+ * don't put large structures on the stack, we are
+ * serialized through the appldata_ops_mutex and can use static
+ */
+ static struct sysinfo val;
+ unsigned long ev[NR_VM_EVENT_ITEMS];
+ struct appldata_mem_data *mem_data;
+
+ mem_data = data;
+ mem_data->sync_count_1++;
+
+ all_vm_events(ev);
+ mem_data->pgpgin = ev[PGPGIN] >> 1;
+ mem_data->pgpgout = ev[PGPGOUT] >> 1;
+ mem_data->pswpin = ev[PSWPIN];
+ mem_data->pswpout = ev[PSWPOUT];
+ mem_data->pgalloc = ev[PGALLOC_NORMAL];
+ mem_data->pgalloc += ev[PGALLOC_DMA];
+ mem_data->pgfault = ev[PGFAULT];
+ mem_data->pgmajfault = ev[PGMAJFAULT];
+
+ si_meminfo(&val);
+ mem_data->sharedram = val.sharedram;
+ mem_data->totalram = P2K(val.totalram);
+ mem_data->freeram = P2K(val.freeram);
+ mem_data->totalhigh = P2K(val.totalhigh);
+ mem_data->freehigh = P2K(val.freehigh);
+ mem_data->bufferram = P2K(val.bufferram);
+ mem_data->cached = P2K(global_node_page_state(NR_FILE_PAGES)
+ - val.bufferram);
+
+ si_swapinfo(&val);
+ mem_data->totalswap = P2K(val.totalswap);
+ mem_data->freeswap = P2K(val.freeswap);
+
+ mem_data->timestamp = get_tod_clock();
+ mem_data->sync_count_2++;
+}
+
+
+static struct appldata_ops ops = {
+ .name = "mem",
+ .record_nr = APPLDATA_RECORD_MEM_ID,
+ .size = sizeof(struct appldata_mem_data),
+ .callback = &appldata_get_mem_data,
+ .owner = THIS_MODULE,
+ .mod_lvl = {0xF0, 0xF0}, /* EBCDIC "00" */
+};
+
+
+/*
+ * appldata_mem_init()
+ *
+ * init_data, register ops
+ */
+static int __init appldata_mem_init(void)
+{
+ int ret;
+
+ ops.data = kzalloc(sizeof(struct appldata_mem_data), GFP_KERNEL);
+ if (!ops.data)
+ return -ENOMEM;
+
+ ret = appldata_register_ops(&ops);
+ if (ret)
+ kfree(ops.data);
+
+ return ret;
+}
+
+/*
+ * appldata_mem_exit()
+ *
+ * unregister ops
+ */
+static void __exit appldata_mem_exit(void)
+{
+ appldata_unregister_ops(&ops);
+ kfree(ops.data);
+}
+
+
+module_init(appldata_mem_init);
+module_exit(appldata_mem_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Gerald Schaefer");
+MODULE_DESCRIPTION("Linux-VM Monitor Stream, MEMORY statistics");
diff --git a/arch/s390/appldata/appldata_net_sum.c b/arch/s390/appldata/appldata_net_sum.c
new file mode 100644
index 000000000..8bc14b0d1
--- /dev/null
+++ b/arch/s390/appldata/appldata_net_sum.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Data gathering module for Linux-VM Monitor Stream, Stage 1.
+ * Collects accumulated network statistics (Packets received/transmitted,
+ * dropped, errors, ...).
+ *
+ * Copyright IBM Corp. 2003, 2006
+ *
+ * Author: Gerald Schaefer <gerald.schaefer@de.ibm.com>
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/kernel_stat.h>
+#include <linux/netdevice.h>
+#include <net/net_namespace.h>
+
+#include "appldata.h"
+
+
+/*
+ * Network data
+ *
+ * This is accessed as binary data by z/VM. If changes to it can't be avoided,
+ * the structure version (product ID, see appldata_base.c) needs to be changed
+ * as well and all documentation and z/VM applications using it must be updated.
+ *
+ * The record layout is documented in the Linux for zSeries Device Drivers
+ * book:
+ * http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml
+ */
+struct appldata_net_sum_data {
+ u64 timestamp;
+ u32 sync_count_1; /* after VM collected the record data, */
+ u32 sync_count_2; /* sync_count_1 and sync_count_2 should be the
+ same. If not, the record has been updated on
+ the Linux side while VM was collecting the
+ (possibly corrupt) data */
+
+ u32 nr_interfaces; /* nr. of network interfaces being monitored */
+
+ u32 padding; /* next value is 64-bit aligned, so these */
+ /* 4 byte would be padded out by compiler */
+
+ u64 rx_packets; /* total packets received */
+ u64 tx_packets; /* total packets transmitted */
+ u64 rx_bytes; /* total bytes received */
+ u64 tx_bytes; /* total bytes transmitted */
+ u64 rx_errors; /* bad packets received */
+ u64 tx_errors; /* packet transmit problems */
+ u64 rx_dropped; /* no space in linux buffers */
+ u64 tx_dropped; /* no space available in linux */
+ u64 collisions; /* collisions while transmitting */
+} __packed;
+
+
+/*
+ * appldata_get_net_sum_data()
+ *
+ * gather accumulated network statistics
+ */
+static void appldata_get_net_sum_data(void *data)
+{
+ int i;
+ struct appldata_net_sum_data *net_data;
+ struct net_device *dev;
+ unsigned long rx_packets, tx_packets, rx_bytes, tx_bytes, rx_errors,
+ tx_errors, rx_dropped, tx_dropped, collisions;
+
+ net_data = data;
+ net_data->sync_count_1++;
+
+ i = 0;
+ rx_packets = 0;
+ tx_packets = 0;
+ rx_bytes = 0;
+ tx_bytes = 0;
+ rx_errors = 0;
+ tx_errors = 0;
+ rx_dropped = 0;
+ tx_dropped = 0;
+ collisions = 0;
+
+ rcu_read_lock();
+ for_each_netdev_rcu(&init_net, dev) {
+ const struct rtnl_link_stats64 *stats;
+ struct rtnl_link_stats64 temp;
+
+ stats = dev_get_stats(dev, &temp);
+ rx_packets += stats->rx_packets;
+ tx_packets += stats->tx_packets;
+ rx_bytes += stats->rx_bytes;
+ tx_bytes += stats->tx_bytes;
+ rx_errors += stats->rx_errors;
+ tx_errors += stats->tx_errors;
+ rx_dropped += stats->rx_dropped;
+ tx_dropped += stats->tx_dropped;
+ collisions += stats->collisions;
+ i++;
+ }
+ rcu_read_unlock();
+
+ net_data->nr_interfaces = i;
+ net_data->rx_packets = rx_packets;
+ net_data->tx_packets = tx_packets;
+ net_data->rx_bytes = rx_bytes;
+ net_data->tx_bytes = tx_bytes;
+ net_data->rx_errors = rx_errors;
+ net_data->tx_errors = tx_errors;
+ net_data->rx_dropped = rx_dropped;
+ net_data->tx_dropped = tx_dropped;
+ net_data->collisions = collisions;
+
+ net_data->timestamp = get_tod_clock();
+ net_data->sync_count_2++;
+}
+
+
+static struct appldata_ops ops = {
+ .name = "net_sum",
+ .record_nr = APPLDATA_RECORD_NET_SUM_ID,
+ .size = sizeof(struct appldata_net_sum_data),
+ .callback = &appldata_get_net_sum_data,
+ .owner = THIS_MODULE,
+ .mod_lvl = {0xF0, 0xF0}, /* EBCDIC "00" */
+};
+
+
+/*
+ * appldata_net_init()
+ *
+ * init data, register ops
+ */
+static int __init appldata_net_init(void)
+{
+ int ret;
+
+ ops.data = kzalloc(sizeof(struct appldata_net_sum_data), GFP_KERNEL);
+ if (!ops.data)
+ return -ENOMEM;
+
+ ret = appldata_register_ops(&ops);
+ if (ret)
+ kfree(ops.data);
+
+ return ret;
+}
+
+/*
+ * appldata_net_exit()
+ *
+ * unregister ops
+ */
+static void __exit appldata_net_exit(void)
+{
+ appldata_unregister_ops(&ops);
+ kfree(ops.data);
+}
+
+
+module_init(appldata_net_init);
+module_exit(appldata_net_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Gerald Schaefer");
+MODULE_DESCRIPTION("Linux-VM Monitor Stream, accumulated network statistics");
diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c
new file mode 100644
index 000000000..433a994b1
--- /dev/null
+++ b/arch/s390/appldata/appldata_os.c
@@ -0,0 +1,221 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Data gathering module for Linux-VM Monitor Stream, Stage 1.
+ * Collects misc. OS related data (CPU utilization, running processes).
+ *
+ * Copyright IBM Corp. 2003, 2006
+ *
+ * Author: Gerald Schaefer <gerald.schaefer@de.ibm.com>
+ */
+
+#define KMSG_COMPONENT "appldata"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/kernel_stat.h>
+#include <linux/netdevice.h>
+#include <linux/sched.h>
+#include <linux/sched/loadavg.h>
+#include <linux/sched/stat.h>
+#include <asm/appldata.h>
+#include <asm/smp.h>
+
+#include "appldata.h"
+
+
+#define LOAD_INT(x) ((x) >> FSHIFT)
+#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
+
+/*
+ * OS data
+ *
+ * This is accessed as binary data by z/VM. If changes to it can't be avoided,
+ * the structure version (product ID, see appldata_base.c) needs to be changed
+ * as well and all documentation and z/VM applications using it must be
+ * updated.
+ *
+ * The record layout is documented in the Linux for zSeries Device Drivers
+ * book:
+ * http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml
+ */
+struct appldata_os_per_cpu {
+ u32 per_cpu_user; /* timer ticks spent in user mode */
+ u32 per_cpu_nice; /* ... spent with modified priority */
+ u32 per_cpu_system; /* ... spent in kernel mode */
+ u32 per_cpu_idle; /* ... spent in idle mode */
+
+ /* New in 2.6 */
+ u32 per_cpu_irq; /* ... spent in interrupts */
+ u32 per_cpu_softirq; /* ... spent in softirqs */
+ u32 per_cpu_iowait; /* ... spent while waiting for I/O */
+
+ /* New in modification level 01 */
+ u32 per_cpu_steal; /* ... stolen by hypervisor */
+ u32 cpu_id; /* number of this CPU */
+} __attribute__((packed));
+
+struct appldata_os_data {
+ u64 timestamp;
+ u32 sync_count_1; /* after VM collected the record data, */
+ u32 sync_count_2; /* sync_count_1 and sync_count_2 should be the
+ same. If not, the record has been updated on
+ the Linux side while VM was collecting the
+ (possibly corrupt) data */
+
+ u32 nr_cpus; /* number of (virtual) CPUs */
+ u32 per_cpu_size; /* size of the per-cpu data struct */
+ u32 cpu_offset; /* offset of the first per-cpu data struct */
+
+ u32 nr_running; /* number of runnable threads */
+ u32 nr_threads; /* number of threads */
+ u32 avenrun[3]; /* average nr. of running processes during */
+ /* the last 1, 5 and 15 minutes */
+
+ /* New in 2.6 */
+ u32 nr_iowait; /* number of blocked threads
+ (waiting for I/O) */
+
+ /* per cpu data */
+ struct appldata_os_per_cpu os_cpu[0];
+} __attribute__((packed));
+
+static struct appldata_os_data *appldata_os_data;
+
+static struct appldata_ops ops = {
+ .name = "os",
+ .record_nr = APPLDATA_RECORD_OS_ID,
+ .owner = THIS_MODULE,
+ .mod_lvl = {0xF0, 0xF1}, /* EBCDIC "01" */
+};
+
+
+/*
+ * appldata_get_os_data()
+ *
+ * gather OS data
+ */
+static void appldata_get_os_data(void *data)
+{
+ int i, j, rc;
+ struct appldata_os_data *os_data;
+ unsigned int new_size;
+
+ os_data = data;
+ os_data->sync_count_1++;
+
+ os_data->nr_threads = nr_threads;
+ os_data->nr_running = nr_running();
+ os_data->nr_iowait = nr_iowait();
+ os_data->avenrun[0] = avenrun[0] + (FIXED_1/200);
+ os_data->avenrun[1] = avenrun[1] + (FIXED_1/200);
+ os_data->avenrun[2] = avenrun[2] + (FIXED_1/200);
+
+ j = 0;
+ for_each_online_cpu(i) {
+ os_data->os_cpu[j].per_cpu_user =
+ nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_USER]);
+ os_data->os_cpu[j].per_cpu_nice =
+ nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_NICE]);
+ os_data->os_cpu[j].per_cpu_system =
+ nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]);
+ os_data->os_cpu[j].per_cpu_idle =
+ nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IDLE]);
+ os_data->os_cpu[j].per_cpu_irq =
+ nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IRQ]);
+ os_data->os_cpu[j].per_cpu_softirq =
+ nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ]);
+ os_data->os_cpu[j].per_cpu_iowait =
+ nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IOWAIT]);
+ os_data->os_cpu[j].per_cpu_steal =
+ nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_STEAL]);
+ os_data->os_cpu[j].cpu_id = i;
+ j++;
+ }
+
+ os_data->nr_cpus = j;
+
+ new_size = sizeof(struct appldata_os_data) +
+ (os_data->nr_cpus * sizeof(struct appldata_os_per_cpu));
+ if (ops.size != new_size) {
+ if (ops.active) {
+ rc = appldata_diag(APPLDATA_RECORD_OS_ID,
+ APPLDATA_START_INTERVAL_REC,
+ (unsigned long) ops.data, new_size,
+ ops.mod_lvl);
+ if (rc != 0)
+ pr_err("Starting a new OS data collection "
+ "failed with rc=%d\n", rc);
+
+ rc = appldata_diag(APPLDATA_RECORD_OS_ID,
+ APPLDATA_STOP_REC,
+ (unsigned long) ops.data, ops.size,
+ ops.mod_lvl);
+ if (rc != 0)
+ pr_err("Stopping a faulty OS data "
+ "collection failed with rc=%d\n", rc);
+ }
+ ops.size = new_size;
+ }
+ os_data->timestamp = get_tod_clock();
+ os_data->sync_count_2++;
+}
+
+
+/*
+ * appldata_os_init()
+ *
+ * init data, register ops
+ */
+static int __init appldata_os_init(void)
+{
+ int rc, max_size;
+
+ max_size = sizeof(struct appldata_os_data) +
+ (num_possible_cpus() * sizeof(struct appldata_os_per_cpu));
+ if (max_size > APPLDATA_MAX_REC_SIZE) {
+ pr_err("Maximum OS record size %i exceeds the maximum "
+ "record size %i\n", max_size, APPLDATA_MAX_REC_SIZE);
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ appldata_os_data = kzalloc(max_size, GFP_KERNEL | GFP_DMA);
+ if (appldata_os_data == NULL) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ appldata_os_data->per_cpu_size = sizeof(struct appldata_os_per_cpu);
+ appldata_os_data->cpu_offset = offsetof(struct appldata_os_data,
+ os_cpu);
+
+ ops.data = appldata_os_data;
+ ops.callback = &appldata_get_os_data;
+ rc = appldata_register_ops(&ops);
+ if (rc != 0)
+ kfree(appldata_os_data);
+out:
+ return rc;
+}
+
+/*
+ * appldata_os_exit()
+ *
+ * unregister ops
+ */
+static void __exit appldata_os_exit(void)
+{
+ appldata_unregister_ops(&ops);
+ kfree(appldata_os_data);
+}
+
+
+module_init(appldata_os_init);
+module_exit(appldata_os_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Gerald Schaefer");
+MODULE_DESCRIPTION("Linux-VM Monitor Stream, OS statistics");
diff --git a/arch/s390/boot/.gitignore b/arch/s390/boot/.gitignore
new file mode 100644
index 000000000..017d5912a
--- /dev/null
+++ b/arch/s390/boot/.gitignore
@@ -0,0 +1,2 @@
+image
+bzImage
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
new file mode 100644
index 000000000..45c72d1f9
--- /dev/null
+++ b/arch/s390/boot/Makefile
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the linux s390-specific parts of the memory manager.
+#
+
+KCOV_INSTRUMENT := n
+GCOV_PROFILE := n
+UBSAN_SANITIZE := n
+KASAN_SANITIZE := n
+
+KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR)
+KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR)
+
+#
+# Use -march=z900 for als.c to be able to print an error
+# message if the kernel is started on a machine which is too old
+#
+ifneq ($(CC_FLAGS_MARCH),-march=z900)
+AFLAGS_REMOVE_head.o += $(CC_FLAGS_MARCH)
+AFLAGS_head.o += -march=z900
+AFLAGS_REMOVE_mem.o += $(CC_FLAGS_MARCH)
+AFLAGS_mem.o += -march=z900
+CFLAGS_REMOVE_als.o += $(CC_FLAGS_MARCH)
+CFLAGS_als.o += -march=z900
+CFLAGS_REMOVE_sclp_early_core.o += $(CC_FLAGS_MARCH)
+CFLAGS_sclp_early_core.o += -march=z900
+endif
+
+CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char
+
+obj-y := head.o als.o ebcdic.o sclp_early_core.o mem.o
+targets := bzImage startup.a $(obj-y)
+subdir- := compressed
+
+OBJECTS := $(addprefix $(obj)/,$(obj-y))
+
+$(obj)/bzImage: $(obj)/compressed/vmlinux FORCE
+ $(call if_changed,objcopy)
+
+$(obj)/compressed/vmlinux: $(obj)/startup.a FORCE
+ $(Q)$(MAKE) $(build)=$(obj)/compressed $@
+
+quiet_cmd_ar = AR $@
+ cmd_ar = rm -f $@; $(AR) rcsTP$(KBUILD_ARFLAGS) $@ $(filter $(OBJECTS), $^)
+
+$(obj)/startup.a: $(OBJECTS) FORCE
+ $(call if_changed,ar)
+
+install:
+ sh -x $(srctree)/$(obj)/install.sh $(KERNELRELEASE) $(obj)/bzImage \
+ System.map "$(INSTALL_PATH)"
+
+chkbss := $(OBJECTS)
+chkbss-target := $(obj)/startup.a
+include $(srctree)/arch/s390/scripts/Makefile.chkbss
diff --git a/arch/s390/boot/als.c b/arch/s390/boot/als.c
new file mode 100644
index 000000000..d592e0d90
--- /dev/null
+++ b/arch/s390/boot/als.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2016
+ */
+#include <linux/kernel.h>
+#include <asm/processor.h>
+#include <asm/facility.h>
+#include <asm/lowcore.h>
+#include <asm/sclp.h>
+
+/*
+ * The code within this file will be called very early. It may _not_
+ * access anything within the bss section, since that is not cleared
+ * yet and may contain data (e.g. initrd) that must be saved by other
+ * code.
+ * For temporary objects the stack (16k) should be used.
+ */
+
+static unsigned long als[] = { FACILITIES_ALS };
+
+static void u16_to_hex(char *str, u16 val)
+{
+ int i, num;
+
+ for (i = 1; i <= 4; i++) {
+ num = (val >> (16 - 4 * i)) & 0xf;
+ if (num >= 10)
+ num += 7;
+ *str++ = '0' + num;
+ }
+ *str = '\0';
+}
+
+static void print_machine_type(void)
+{
+ static char mach_str[80] = "Detected machine-type number: ";
+ char type_str[5];
+ struct cpuid id;
+
+ get_cpu_id(&id);
+ u16_to_hex(type_str, id.machine);
+ strcat(mach_str, type_str);
+ strcat(mach_str, "\n");
+ sclp_early_printk(mach_str);
+}
+
+static void u16_to_decimal(char *str, u16 val)
+{
+ int div = 1;
+
+ while (div * 10 <= val)
+ div *= 10;
+ while (div) {
+ *str++ = '0' + val / div;
+ val %= div;
+ div /= 10;
+ }
+ *str = '\0';
+}
+
+static void print_missing_facilities(void)
+{
+ static char als_str[80] = "Missing facilities: ";
+ unsigned long val;
+ char val_str[6];
+ int i, j, first;
+
+ first = 1;
+ for (i = 0; i < ARRAY_SIZE(als); i++) {
+ val = ~S390_lowcore.stfle_fac_list[i] & als[i];
+ for (j = 0; j < BITS_PER_LONG; j++) {
+ if (!(val & (1UL << (BITS_PER_LONG - 1 - j))))
+ continue;
+ if (!first)
+ strcat(als_str, ",");
+ /*
+ * Make sure we stay within one line. Consider that
+ * each facility bit adds up to five characters and
+ * z/VM adds a four character prefix.
+ */
+ if (strlen(als_str) > 70) {
+ strcat(als_str, "\n");
+ sclp_early_printk(als_str);
+ *als_str = '\0';
+ }
+ u16_to_decimal(val_str, i * BITS_PER_LONG + j);
+ strcat(als_str, val_str);
+ first = 0;
+ }
+ }
+ strcat(als_str, "\n");
+ sclp_early_printk(als_str);
+ sclp_early_printk("See Principles of Operations for facility bits\n");
+}
+
+static void facility_mismatch(void)
+{
+ sclp_early_printk("The Linux kernel requires more recent processor hardware\n");
+ print_machine_type();
+ print_missing_facilities();
+ disabled_wait(0x8badcccc);
+}
+
+void verify_facilities(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(S390_lowcore.stfle_fac_list); i++)
+ S390_lowcore.stfle_fac_list[i] = 0;
+ asm volatile(
+ " stfl 0(0)\n"
+ : "=m" (S390_lowcore.stfl_fac_list));
+ S390_lowcore.stfle_fac_list[0] = (u64)S390_lowcore.stfl_fac_list << 32;
+ if (S390_lowcore.stfl_fac_list & 0x01000000) {
+ register unsigned long reg0 asm("0") = ARRAY_SIZE(als) - 1;
+
+ asm volatile(".insn s,0xb2b00000,0(%1)" /* stfle */
+ : "+d" (reg0)
+ : "a" (&S390_lowcore.stfle_fac_list)
+ : "memory", "cc");
+ }
+ for (i = 0; i < ARRAY_SIZE(als); i++) {
+ if ((S390_lowcore.stfle_fac_list[i] & als[i]) != als[i])
+ facility_mismatch();
+ }
+}
diff --git a/arch/s390/boot/compressed/.gitignore b/arch/s390/boot/compressed/.gitignore
new file mode 100644
index 000000000..45aeb4f08
--- /dev/null
+++ b/arch/s390/boot/compressed/.gitignore
@@ -0,0 +1,5 @@
+sizes.h
+vmlinux
+vmlinux.lds
+vmlinux.scr.lds
+vmlinux.bin.full
diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile
new file mode 100644
index 000000000..9b3d821e5
--- /dev/null
+++ b/arch/s390/boot/compressed/Makefile
@@ -0,0 +1,74 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# linux/arch/s390/boot/compressed/Makefile
+#
+# create a compressed vmlinux image from the original vmlinux
+#
+
+KCOV_INSTRUMENT := n
+GCOV_PROFILE := n
+UBSAN_SANITIZE := n
+KASAN_SANITIZE := n
+
+obj-y := $(if $(CONFIG_KERNEL_UNCOMPRESSED),,head.o misc.o) piggy.o
+targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2
+targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4
+targets += vmlinux.scr.lds $(obj-y) $(if $(CONFIG_KERNEL_UNCOMPRESSED),,sizes.h)
+
+KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR)
+KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR)
+
+OBJECTS := $(addprefix $(obj)/,$(obj-y))
+
+LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup -T
+$(obj)/vmlinux: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OBJECTS) FORCE
+ $(call if_changed,ld)
+
+# extract required uncompressed vmlinux symbols and adjust them to reflect offsets inside vmlinux.bin
+sed-sizes := -e 's/^\([0-9a-fA-F]*\) . \(__bss_start\|_end\)$$/\#define SZ\2 (0x\1 - 0x100000)/p'
+
+quiet_cmd_sizes = GEN $@
+ cmd_sizes = $(NM) $< | sed -n $(sed-sizes) > $@
+
+$(obj)/sizes.h: vmlinux
+ $(call if_changed,sizes)
+
+AFLAGS_head.o += -I$(objtree)/$(obj)
+$(obj)/head.o: $(obj)/sizes.h
+
+CFLAGS_misc.o += -I$(objtree)/$(obj)
+$(obj)/misc.o: $(obj)/sizes.h
+
+OBJCOPYFLAGS_vmlinux.bin := -R .comment -S
+$(obj)/vmlinux.bin: vmlinux
+ $(call if_changed,objcopy)
+
+vmlinux.bin.all-y := $(obj)/vmlinux.bin
+
+suffix-$(CONFIG_KERNEL_GZIP) := .gz
+suffix-$(CONFIG_KERNEL_BZIP2) := .bz2
+suffix-$(CONFIG_KERNEL_LZ4) := .lz4
+suffix-$(CONFIG_KERNEL_LZMA) := .lzma
+suffix-$(CONFIG_KERNEL_LZO) := .lzo
+suffix-$(CONFIG_KERNEL_XZ) := .xz
+
+$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE
+ $(call if_changed,gzip)
+$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE
+ $(call if_changed,bzip2)
+$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) FORCE
+ $(call if_changed,lz4)
+$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE
+ $(call if_changed,lzma)
+$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE
+ $(call if_changed,lzo)
+$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) FORCE
+ $(call if_changed,xzkern)
+
+LDFLAGS_piggy.o := -r --format binary --oformat $(LD_BFD) -T
+$(obj)/piggy.o: $(obj)/vmlinux.scr.lds $(obj)/vmlinux.bin$(suffix-y)
+ $(call if_changed,ld)
+
+chkbss := $(filter-out $(obj)/misc.o $(obj)/piggy.o,$(OBJECTS))
+chkbss-target := $(obj)/vmlinux.bin
+include $(srctree)/arch/s390/scripts/Makefile.chkbss
diff --git a/arch/s390/boot/compressed/head.S b/arch/s390/boot/compressed/head.S
new file mode 100644
index 000000000..df8dbbc17
--- /dev/null
+++ b/arch/s390/boot/compressed/head.S
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Startup glue code to uncompress the kernel
+ *
+ * Copyright IBM Corp. 2010
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/thread_info.h>
+#include <asm/page.h>
+#include "sizes.h"
+
+__HEAD
+ENTRY(startup_decompressor)
+ basr %r13,0 # get base
+.LPG1:
+ # setup stack
+ lg %r15,.Lstack-.LPG1(%r13)
+ aghi %r15,-160
+ brasl %r14,decompress_kernel
+ # Set up registers for memory mover. We move the decompressed image to
+ # 0x100000, where startup_continue of the decompressed image is supposed
+ # to be.
+ lgr %r4,%r2
+ lg %r2,.Loffset-.LPG1(%r13)
+ lg %r3,.Lmvsize-.LPG1(%r13)
+ lgr %r5,%r3
+ # Move the memory mover someplace safe so it doesn't overwrite itself.
+ la %r1,0x200
+ mvc 0(mover_end-mover,%r1),mover-.LPG1(%r13)
+ # When the memory mover is done we pass control to
+ # arch/s390/kernel/head64.S:startup_continue which lives at 0x100000 in
+ # the decompressed image.
+ lgr %r6,%r2
+ br %r1
+mover:
+ mvcle %r2,%r4,0
+ jo mover
+ br %r6
+mover_end:
+
+ .align 8
+.Lstack:
+ .quad 0x8000 + (1<<(PAGE_SHIFT+THREAD_SIZE_ORDER))
+.Loffset:
+ .quad 0x100000
+.Lmvsize:
+ .quad SZ__bss_start
diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c
new file mode 100644
index 000000000..f66ad73c2
--- /dev/null
+++ b/arch/s390/boot/compressed/misc.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Definitions and wrapper functions for kernel decompressor
+ *
+ * Copyright IBM Corp. 2010
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/uaccess.h>
+#include <asm/page.h>
+#include <asm/sclp.h>
+#include <asm/ipl.h>
+#include "sizes.h"
+
+/*
+ * gzip declarations
+ */
+#define STATIC static
+
+#undef memset
+#undef memcpy
+#undef memmove
+#define memmove memmove
+#define memzero(s, n) memset((s), 0, (n))
+
+/* Symbols defined by linker scripts */
+extern char input_data[];
+extern int input_len;
+extern char _end[];
+extern char _bss[], _ebss[];
+
+static void error(char *m);
+
+static unsigned long free_mem_ptr;
+static unsigned long free_mem_end_ptr;
+
+#ifdef CONFIG_HAVE_KERNEL_BZIP2
+#define HEAP_SIZE 0x400000
+#else
+#define HEAP_SIZE 0x10000
+#endif
+
+#ifdef CONFIG_KERNEL_GZIP
+#include "../../../../lib/decompress_inflate.c"
+#endif
+
+#ifdef CONFIG_KERNEL_BZIP2
+#include "../../../../lib/decompress_bunzip2.c"
+#endif
+
+#ifdef CONFIG_KERNEL_LZ4
+#include "../../../../lib/decompress_unlz4.c"
+#endif
+
+#ifdef CONFIG_KERNEL_LZMA
+#include "../../../../lib/decompress_unlzma.c"
+#endif
+
+#ifdef CONFIG_KERNEL_LZO
+#include "../../../../lib/decompress_unlzo.c"
+#endif
+
+#ifdef CONFIG_KERNEL_XZ
+#include "../../../../lib/decompress_unxz.c"
+#endif
+
+static int puts(const char *s)
+{
+ sclp_early_printk(s);
+ return 0;
+}
+
+static void error(char *x)
+{
+ unsigned long long psw = 0x000a0000deadbeefULL;
+
+ puts("\n\n");
+ puts(x);
+ puts("\n\n -- System halted");
+
+ asm volatile("lpsw %0" : : "Q" (psw));
+}
+
+unsigned long decompress_kernel(void)
+{
+ void *output, *kernel_end;
+
+ output = (void *) ALIGN((unsigned long) _end + HEAP_SIZE, PAGE_SIZE);
+ kernel_end = output + SZ__bss_start;
+
+#ifdef CONFIG_BLK_DEV_INITRD
+ /*
+ * Move the initrd right behind the end of the decompressed
+ * kernel image. This also prevents initrd corruption caused by
+ * bss clearing since kernel_end will always be located behind the
+ * current bss section..
+ */
+ if (INITRD_START && INITRD_SIZE && kernel_end > (void *) INITRD_START) {
+ memmove(kernel_end, (void *) INITRD_START, INITRD_SIZE);
+ INITRD_START = (unsigned long) kernel_end;
+ }
+#endif
+
+ /*
+ * Clear bss section. free_mem_ptr and free_mem_end_ptr need to be
+ * initialized afterwards since they reside in bss.
+ */
+ memset(_bss, 0, _ebss - _bss);
+ free_mem_ptr = (unsigned long) _end;
+ free_mem_end_ptr = free_mem_ptr + HEAP_SIZE;
+
+ __decompress(input_data, input_len, NULL, NULL, output, 0, NULL, error);
+ return (unsigned long) output;
+}
+
diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S
new file mode 100644
index 000000000..b16ac8b3c
--- /dev/null
+++ b/arch/s390/boot/compressed/vmlinux.lds.S
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm-generic/vmlinux.lds.h>
+
+OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
+OUTPUT_ARCH(s390:64-bit)
+
+ENTRY(startup)
+
+SECTIONS
+{
+ /* Be careful parts of head_64.S assume startup_32 is at
+ * address 0.
+ */
+ . = 0;
+ .head.text : {
+ _head = . ;
+ HEAD_TEXT
+ _ehead = . ;
+ }
+ .text : {
+ _text = .; /* Text */
+ *(.text)
+ *(.text.*)
+ _etext = . ;
+ }
+ .rodata : {
+ _rodata = . ;
+ *(.rodata) /* read-only data */
+ *(EXCLUDE_FILE (*piggy.o) .rodata.compressed)
+ _erodata = . ;
+ }
+ .data : {
+ _data = . ;
+ *(.data)
+ *(.data.*)
+ _edata = . ;
+ }
+ startup_continue = 0x100000;
+#ifdef CONFIG_KERNEL_UNCOMPRESSED
+ . = 0x100000;
+#else
+ . = ALIGN(8);
+#endif
+ .rodata.compressed : {
+ *(.rodata.compressed)
+ }
+ . = ALIGN(256);
+ .bss : {
+ _bss = . ;
+ *(.bss)
+ *(.bss.*)
+ *(COMMON)
+ . = ALIGN(8); /* For convenience during zeroing */
+ _ebss = .;
+ }
+ _end = .;
+
+ /* Sections to be discarded */
+ /DISCARD/ : {
+ *(.eh_frame)
+ *(__ex_table)
+ *(*__ksymtab*)
+ *(___kcrctab*)
+ }
+}
diff --git a/arch/s390/boot/compressed/vmlinux.scr.lds.S b/arch/s390/boot/compressed/vmlinux.scr.lds.S
new file mode 100644
index 000000000..ff01d18c9
--- /dev/null
+++ b/arch/s390/boot/compressed/vmlinux.scr.lds.S
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+SECTIONS
+{
+ .rodata.compressed : {
+#ifndef CONFIG_KERNEL_UNCOMPRESSED
+ input_len = .;
+ LONG(input_data_end - input_data) input_data = .;
+#endif
+ *(.data)
+#ifndef CONFIG_KERNEL_UNCOMPRESSED
+ output_len = . - 4;
+ input_data_end = .;
+#endif
+ }
+}
diff --git a/arch/s390/boot/ebcdic.c b/arch/s390/boot/ebcdic.c
new file mode 100644
index 000000000..7391e7d36
--- /dev/null
+++ b/arch/s390/boot/ebcdic.c
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../kernel/ebcdic.c"
diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S
new file mode 100644
index 000000000..f721913b7
--- /dev/null
+++ b/arch/s390/boot/head.S
@@ -0,0 +1,340 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 1999, 2010
+ *
+ * Author(s): Hartmut Penner <hp@de.ibm.com>
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>
+ * Rob van der Heij <rvdhei@iae.nl>
+ * Heiko Carstens <heiko.carstens@de.ibm.com>
+ *
+ * There are 5 different IPL methods
+ * 1) load the image directly into ram at address 0 and do an PSW restart
+ * 2) linload will load the image from address 0x10000 to memory 0x10000
+ * and start the code thru LPSW 0x0008000080010000 (VM only, deprecated)
+ * 3) generate the tape ipl header, store the generated image on a tape
+ * and ipl from it
+ * In case of SL tape you need to IPL 5 times to get past VOL1 etc
+ * 4) generate the vm reader ipl header, move the generated image to the
+ * VM reader (use option NOH!) and do a ipl from reader (VM only)
+ * 5) direct call of start by the SALIPL loader
+ * We use the cpuid to distinguish between VM and native ipl
+ * params for kernel are pushed to 0x10400 (see setup.h)
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/thread_info.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+
+#define ARCH_OFFSET 4
+
+__HEAD
+
+#define IPL_BS 0x730
+ .org 0
+ .long 0x00080000,0x80000000+iplstart # The first 24 bytes are loaded
+ .long 0x02000018,0x60000050 # by ipl to addresses 0-23.
+ .long 0x02000068,0x60000050 # (a PSW and two CCWs).
+ .fill 80-24,1,0x40 # bytes 24-79 are discarded !!
+ .long 0x020000f0,0x60000050 # The next 160 byte are loaded
+ .long 0x02000140,0x60000050 # to addresses 0x18-0xb7
+ .long 0x02000190,0x60000050 # They form the continuation
+ .long 0x020001e0,0x60000050 # of the CCW program started
+ .long 0x02000230,0x60000050 # by ipl and load the range
+ .long 0x02000280,0x60000050 # 0x0f0-0x730 from the image
+ .long 0x020002d0,0x60000050 # to the range 0x0f0-0x730
+ .long 0x02000320,0x60000050 # in memory. At the end of
+ .long 0x02000370,0x60000050 # the channel program the PSW
+ .long 0x020003c0,0x60000050 # at location 0 is loaded.
+ .long 0x02000410,0x60000050 # Initial processing starts
+ .long 0x02000460,0x60000050 # at 0x200 = iplstart.
+ .long 0x020004b0,0x60000050
+ .long 0x02000500,0x60000050
+ .long 0x02000550,0x60000050
+ .long 0x020005a0,0x60000050
+ .long 0x020005f0,0x60000050
+ .long 0x02000640,0x60000050
+ .long 0x02000690,0x60000050
+ .long 0x020006e0,0x20000050
+
+ .org 0x200
+
+#
+# subroutine to wait for end I/O
+#
+.Lirqwait:
+ mvc __LC_IO_NEW_PSW(16),.Lnewpsw # set up IO interrupt psw
+ lpsw .Lwaitpsw
+.Lioint:
+ br %r14
+ .align 8
+.Lnewpsw:
+ .quad 0x0000000080000000,.Lioint
+.Lwaitpsw:
+ .long 0x020a0000,0x80000000+.Lioint
+
+#
+# subroutine for loading cards from the reader
+#
+.Lloader:
+ la %r4,0(%r14)
+ la %r3,.Lorb # r2 = address of orb into r2
+ la %r5,.Lirb # r4 = address of irb
+ la %r6,.Lccws
+ la %r7,20
+.Linit:
+ st %r2,4(%r6) # initialize CCW data addresses
+ la %r2,0x50(%r2)
+ la %r6,8(%r6)
+ bct 7,.Linit
+
+ lctl %c6,%c6,.Lcr6 # set IO subclass mask
+ slr %r2,%r2
+.Lldlp:
+ ssch 0(%r3) # load chunk of 1600 bytes
+ bnz .Llderr
+.Lwait4irq:
+ bas %r14,.Lirqwait
+ c %r1,__LC_SUBCHANNEL_ID # compare subchannel number
+ bne .Lwait4irq
+ tsch 0(%r5)
+
+ slr %r0,%r0
+ ic %r0,8(%r5) # get device status
+ chi %r0,8 # channel end ?
+ be .Lcont
+ chi %r0,12 # channel end + device end ?
+ be .Lcont
+
+ l %r0,4(%r5)
+ s %r0,8(%r3) # r0/8 = number of ccws executed
+ mhi %r0,10 # *10 = number of bytes in ccws
+ lh %r3,10(%r5) # get residual count
+ sr %r0,%r3 # #ccws*80-residual=#bytes read
+ ar %r2,%r0
+
+ br %r4 # r2 contains the total size
+
+.Lcont:
+ ahi %r2,0x640 # add 0x640 to total size
+ la %r6,.Lccws
+ la %r7,20
+.Lincr:
+ l %r0,4(%r6) # update CCW data addresses
+ ahi %r0,0x640
+ st %r0,4(%r6)
+ ahi %r6,8
+ bct 7,.Lincr
+
+ b .Lldlp
+.Llderr:
+ lpsw .Lcrash
+
+ .align 8
+.Lorb: .long 0x00000000,0x0080ff00,.Lccws
+.Lirb: .long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+.Lcr6: .long 0xff000000
+.Lloadp:.long 0,0
+ .align 8
+.Lcrash:.long 0x000a0000,0x00000000
+
+ .align 8
+.Lccws: .rept 19
+ .long 0x02600050,0x00000000
+ .endr
+ .long 0x02200050,0x00000000
+
+iplstart:
+ mvi __LC_AR_MODE_ID,1 # set esame flag
+ slr %r0,%r0 # set cpuid to zero
+ lhi %r1,2 # mode 2 = esame (dump)
+ sigp %r1,%r0,0x12 # switch to esame mode
+ bras %r13,0f
+ .fill 16,4,0x0
+0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs
+ sam31 # switch to 31 bit addressing mode
+ lh %r1,__LC_SUBCHANNEL_ID # test if subchannel number
+ bct %r1,.Lnoload # is valid
+ l %r1,__LC_SUBCHANNEL_ID # load ipl subchannel number
+ la %r2,IPL_BS # load start address
+ bas %r14,.Lloader # load rest of ipl image
+ l %r12,.Lparm # pointer to parameter area
+ st %r1,IPL_DEVICE+ARCH_OFFSET-PARMAREA(%r12) # save ipl device number
+
+#
+# load parameter file from ipl device
+#
+.Lagain1:
+ l %r2,.Linitrd # ramdisk loc. is temp
+ bas %r14,.Lloader # load parameter file
+ ltr %r2,%r2 # got anything ?
+ bz .Lnopf
+ chi %r2,895
+ bnh .Lnotrunc
+ la %r2,895
+.Lnotrunc:
+ l %r4,.Linitrd
+ clc 0(3,%r4),.L_hdr # if it is HDRx
+ bz .Lagain1 # skip dataset header
+ clc 0(3,%r4),.L_eof # if it is EOFx
+ bz .Lagain1 # skip dateset trailer
+ la %r5,0(%r4,%r2)
+ lr %r3,%r2
+ la %r3,COMMAND_LINE-PARMAREA(%r12) # load adr. of command line
+ mvc 0(256,%r3),0(%r4)
+ mvc 256(256,%r3),256(%r4)
+ mvc 512(256,%r3),512(%r4)
+ mvc 768(122,%r3),768(%r4)
+ slr %r0,%r0
+ b .Lcntlp
+.Ldelspc:
+ ic %r0,0(%r2,%r3)
+ chi %r0,0x20 # is it a space ?
+ be .Lcntlp
+ ahi %r2,1
+ b .Leolp
+.Lcntlp:
+ brct %r2,.Ldelspc
+.Leolp:
+ slr %r0,%r0
+ stc %r0,0(%r2,%r3) # terminate buffer
+.Lnopf:
+
+#
+# load ramdisk from ipl device
+#
+.Lagain2:
+ l %r2,.Linitrd # addr of ramdisk
+ st %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12)
+ bas %r14,.Lloader # load ramdisk
+ st %r2,INITRD_SIZE+ARCH_OFFSET-PARMAREA(%r12) # store size of rd
+ ltr %r2,%r2
+ bnz .Lrdcont
+ st %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12) # no ramdisk found
+.Lrdcont:
+ l %r2,.Linitrd
+
+ clc 0(3,%r2),.L_hdr # skip HDRx and EOFx
+ bz .Lagain2
+ clc 0(3,%r2),.L_eof
+ bz .Lagain2
+
+#
+# reset files in VM reader
+#
+ stidp .Lcpuid # store cpuid
+ tm .Lcpuid,0xff # running VM ?
+ bno .Lnoreset
+ la %r2,.Lreset
+ lhi %r3,26
+ diag %r2,%r3,8
+ la %r5,.Lirb
+ stsch 0(%r5) # check if irq is pending
+ tm 30(%r5),0x0f # by verifying if any of the
+ bnz .Lwaitforirq # activity or status control
+ tm 31(%r5),0xff # bits is set in the schib
+ bz .Lnoreset
+.Lwaitforirq:
+ bas %r14,.Lirqwait # wait for IO interrupt
+ c %r1,__LC_SUBCHANNEL_ID # compare subchannel number
+ bne .Lwaitforirq
+ la %r5,.Lirb
+ tsch 0(%r5)
+.Lnoreset:
+ b .Lnoload
+
+#
+# everything loaded, go for it
+#
+.Lnoload:
+ l %r1,.Lstartup
+ br %r1
+
+.Linitrd:.long _end # default address of initrd
+.Lparm: .long PARMAREA
+.Lstartup: .long startup
+.Lreset:.byte 0xc3,0xc8,0xc1,0xd5,0xc7,0xc5,0x40,0xd9,0xc4,0xd9,0x40
+ .byte 0xc1,0xd3,0xd3,0x40,0xd2,0xc5,0xc5,0xd7,0x40,0xd5,0xd6
+ .byte 0xc8,0xd6,0xd3,0xc4 # "change rdr all keep nohold"
+.L_eof: .long 0xc5d6c600 /* C'EOF' */
+.L_hdr: .long 0xc8c4d900 /* C'HDR' */
+ .align 8
+.Lcpuid:.fill 8,1,0
+
+#
+# startup-code at 0x10000, running in absolute addressing mode
+# this is called either by the ipl loader or directly by PSW restart
+# or linload or SALIPL
+#
+ .org 0x10000
+ENTRY(startup)
+ j .Lep_startup_normal
+ .org EP_OFFSET
+#
+# This is a list of s390 kernel entry points. At address 0x1000f the number of
+# valid entry points is stored.
+#
+# IMPORTANT: Do not change this table, it is s390 kernel ABI!
+#
+ .ascii EP_STRING
+ .byte 0x00,0x01
+#
+# kdump startup-code at 0x10010, running in 64 bit absolute addressing mode
+#
+ .org 0x10010
+ENTRY(startup_kdump)
+ j .Lep_startup_kdump
+.Lep_startup_normal:
+ mvi __LC_AR_MODE_ID,1 # set esame flag
+ slr %r0,%r0 # set cpuid to zero
+ lhi %r1,2 # mode 2 = esame (dump)
+ sigp %r1,%r0,0x12 # switch to esame mode
+ bras %r13,0f
+ .fill 16,4,0x0
+0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs
+ sam64 # switch to 64 bit addressing mode
+ basr %r13,0 # get base
+.LPG0:
+ xc 0x200(256),0x200 # partially clear lowcore
+ xc 0x300(256),0x300
+ xc 0xe00(256),0xe00
+ xc 0xf00(256),0xf00
+ lctlg %c0,%c15,0x200(%r0) # initialize control registers
+ stcke __LC_BOOT_CLOCK
+ mvc __LC_LAST_UPDATE_CLOCK(8),__LC_BOOT_CLOCK+1
+ spt 6f-.LPG0(%r13)
+ mvc __LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13)
+ l %r15,.Lstack-.LPG0(%r13)
+ ahi %r15,-STACK_FRAME_OVERHEAD
+ brasl %r14,verify_facilities
+#ifdef CONFIG_KERNEL_UNCOMPRESSED
+ jg startup_continue
+#else
+ jg startup_decompressor
+#endif
+
+.Lstack:
+ .long 0x8000 + (1<<(PAGE_SHIFT+THREAD_SIZE_ORDER))
+ .align 8
+6: .long 0x7fffffff,0xffffffff
+
+#include "head_kdump.S"
+
+#
+# params at 10400 (setup.h)
+#
+ .org PARMAREA
+ .long 0,0 # IPL_DEVICE
+ .long 0,0 # INITRD_START
+ .long 0,0 # INITRD_SIZE
+ .long 0,0 # OLDMEM_BASE
+ .long 0,0 # OLDMEM_SIZE
+
+ .org COMMAND_LINE
+ .byte "root=/dev/ram0 ro"
+ .byte 0
+
+ .org 0x11000
diff --git a/arch/s390/boot/head_kdump.S b/arch/s390/boot/head_kdump.S
new file mode 100644
index 000000000..174d6959b
--- /dev/null
+++ b/arch/s390/boot/head_kdump.S
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * S390 kdump lowlevel functions (new kernel)
+ *
+ * Copyright IBM Corp. 2011
+ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#include <asm/sigp.h>
+
+#define DATAMOVER_ADDR 0x4000
+#define COPY_PAGE_ADDR 0x6000
+
+#ifdef CONFIG_CRASH_DUMP
+
+#
+# kdump entry (new kernel - not yet relocated)
+#
+# Note: This code has to be position independent
+#
+
+.align 2
+.Lep_startup_kdump:
+ lhi %r1,2 # mode 2 = esame (dump)
+ sigp %r1,%r0,SIGP_SET_ARCHITECTURE # Switch to esame mode
+ sam64 # Switch to 64 bit addressing
+ basr %r13,0
+.Lbase:
+ larl %r2,.Lbase_addr # Check, if we have been
+ lg %r2,0(%r2) # already relocated:
+ clgr %r2,%r13 #
+ jne .Lrelocate # No : Start data mover
+ lghi %r2,0 # Yes: Start kdump kernel
+ brasl %r14,startup_kdump_relocated
+
+.Lrelocate:
+ larl %r4,startup
+ lg %r2,0x418(%r4) # Get kdump base
+ lg %r3,0x420(%r4) # Get kdump size
+
+ larl %r10,.Lcopy_start # Source of data mover
+ lghi %r8,DATAMOVER_ADDR # Target of data mover
+ mvc 0(256,%r8),0(%r10) # Copy data mover code
+
+ agr %r8,%r2 # Copy data mover to
+ mvc 0(256,%r8),0(%r10) # reserved mem
+
+ lghi %r14,DATAMOVER_ADDR # Jump to copied data mover
+ basr %r14,%r14
+.Lbase_addr:
+ .quad .Lbase
+
+#
+# kdump data mover code (runs at address DATAMOVER_ADDR)
+#
+# r2: kdump base address
+# r3: kdump size
+#
+.Lcopy_start:
+ basr %r13,0 # Base
+0:
+ lgr %r11,%r2 # Save kdump base address
+ lgr %r12,%r2
+ agr %r12,%r3 # Compute kdump end address
+
+ lghi %r5,0
+ lghi %r10,COPY_PAGE_ADDR # Load copy page address
+1:
+ mvc 0(256,%r10),0(%r5) # Copy old kernel to tmp
+ mvc 0(256,%r5),0(%r11) # Copy new kernel to old
+ mvc 0(256,%r11),0(%r10) # Copy tmp to new
+ aghi %r11,256
+ aghi %r5,256
+ clgr %r11,%r12
+ jl 1b
+
+ lg %r14,.Lstartup_kdump-0b(%r13)
+ basr %r14,%r14 # Start relocated kernel
+.Lstartup_kdump:
+ .long 0x00000000,0x00000000 + startup_kdump_relocated
+.Lcopy_end:
+
+#
+# Startup of kdump (relocated new kernel)
+#
+.align 2
+startup_kdump_relocated:
+ basr %r13,0
+0: lpswe .Lrestart_psw-0b(%r13) # Start new kernel...
+.align 8
+.Lrestart_psw:
+ .quad 0x0000000080000000,0x0000000000000000 + startup
+#else
+.align 2
+.Lep_startup_kdump:
+ larl %r13,startup_kdump_crash
+ lpswe 0(%r13)
+.align 8
+startup_kdump_crash:
+ .quad 0x0002000080000000,0x0000000000000000 + startup_kdump_crash
+#endif /* CONFIG_CRASH_DUMP */
diff --git a/arch/s390/boot/install.sh b/arch/s390/boot/install.sh
new file mode 100644
index 000000000..bed227f26
--- /dev/null
+++ b/arch/s390/boot/install.sh
@@ -0,0 +1,35 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# arch/s390x/boot/install.sh
+#
+# Copyright (C) 1995 by Linus Torvalds
+#
+# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin
+#
+# "make install" script for s390 architecture
+#
+# Arguments:
+# $1 - kernel version
+# $2 - kernel image file
+# $3 - kernel map file
+# $4 - default install path (blank if root directory)
+#
+
+# User may have a custom install script
+
+if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
+if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
+
+# Default install - same as make zlilo
+
+if [ -f $4/vmlinuz ]; then
+ mv $4/vmlinuz $4/vmlinuz.old
+fi
+
+if [ -f $4/System.map ]; then
+ mv $4/System.map $4/System.old
+fi
+
+cat $2 > $4/vmlinuz
+cp $3 $4/System.map
diff --git a/arch/s390/boot/mem.S b/arch/s390/boot/mem.S
new file mode 100644
index 000000000..b33463633
--- /dev/null
+++ b/arch/s390/boot/mem.S
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include "../lib/mem.S"
diff --git a/arch/s390/boot/sclp_early_core.c b/arch/s390/boot/sclp_early_core.c
new file mode 100644
index 000000000..5a19fd702
--- /dev/null
+++ b/arch/s390/boot/sclp_early_core.c
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../../../drivers/s390/char/sclp_early_core.c"
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
new file mode 100644
index 000000000..941d8cc6c
--- /dev/null
+++ b/arch/s390/configs/debug_defconfig
@@ -0,0 +1,730 @@
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_AUDIT=y
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BSD_PROCESS_ACCT_V3=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NUMA_BALANCING=y
+CONFIG_MEMCG=y
+CONFIG_MEMCG_SWAP=y
+CONFIG_BLK_CGROUP=y
+CONFIG_CFS_BANDWIDTH=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_CGROUP_PIDS=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_HUGETLB=y
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_PERF=y
+CONFIG_NAMESPACES=y
+CONFIG_USER_NS=y
+CONFIG_SCHED_AUTOGROUP=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+# CONFIG_SYSFS_SYSCALL is not set
+CONFIG_CHECKPOINT_RESTORE=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_USERFAULTFD=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=m
+CONFIG_KPROBES=y
+CONFIG_JUMP_LABEL=y
+CONFIG_STATIC_KEYS_SELFTEST=y
+CONFIG_MODULES=y
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_BLK_DEV_INTEGRITY=y
+CONFIG_BLK_DEV_THROTTLING=y
+CONFIG_BLK_WBT=y
+CONFIG_BLK_WBT_SQ=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_IBM_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_CFQ_GROUP_IOSCHED=y
+CONFIG_DEFAULT_DEADLINE=y
+CONFIG_LIVEPATCH=y
+CONFIG_TUNE_ZEC12=y
+CONFIG_NR_CPUS=512
+CONFIG_NUMA=y
+CONFIG_PREEMPT=y
+CONFIG_HZ_100=y
+CONFIG_KEXEC_FILE=y
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTREMOVE=y
+CONFIG_KSM=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_CLEANCACHE=y
+CONFIG_FRONTSWAP=y
+CONFIG_CMA_DEBUG=y
+CONFIG_CMA_DEBUGFS=y
+CONFIG_MEM_SOFT_DIRTY=y
+CONFIG_ZSWAP=y
+CONFIG_ZBUD=m
+CONFIG_ZSMALLOC=m
+CONFIG_ZSMALLOC_STAT=y
+CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
+CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_PCI=y
+CONFIG_PCI_DEBUG=y
+CONFIG_HOTPLUG_PCI=y
+CONFIG_HOTPLUG_PCI_S390=y
+CONFIG_CHSC_SCH=y
+CONFIG_CRASH_DUMP=y
+CONFIG_BINFMT_MISC=m
+CONFIG_HIBERNATION=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_PACKET_DIAG=m
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=m
+CONFIG_XFRM_USER=m
+CONFIG_NET_KEY=m
+CONFIG_SMC=m
+CONFIG_SMC_DIAG=m
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_NET_IPIP=m
+CONFIG_NET_IPGRE_DEMUX=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_BROADCAST=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_SYN_COOKIES=y
+CONFIG_NET_IPVTI=m
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+CONFIG_INET_XFRM_MODE_TRANSPORT=m
+CONFIG_INET_XFRM_MODE_TUNNEL=m
+CONFIG_INET_XFRM_MODE_BEET=m
+CONFIG_INET_DIAG=m
+CONFIG_INET_UDP_DIAG=m
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_CONG_HSTCP=m
+CONFIG_TCP_CONG_HYBLA=m
+CONFIG_TCP_CONG_SCALABLE=m
+CONFIG_TCP_CONG_LP=m
+CONFIG_TCP_CONG_VENO=m
+CONFIG_TCP_CONG_YEAH=m
+CONFIG_TCP_CONG_ILLINOIS=m
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_MIP6=m
+CONFIG_INET6_XFRM_MODE_TRANSPORT=m
+CONFIG_INET6_XFRM_MODE_TUNNEL=m
+CONFIG_INET6_XFRM_MODE_BEET=m
+CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m
+CONFIG_IPV6_VTI=m
+CONFIG_IPV6_SIT=m
+CONFIG_IPV6_GRE=m
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IPV6_SUBTREES=y
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CONNTRACK_TIMEOUT=y
+CONFIG_NF_CONNTRACK_TIMESTAMP=y
+CONFIG_NF_CONNTRACK_AMANDA=m
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_H323=m
+CONFIG_NF_CONNTRACK_IRC=m
+CONFIG_NF_CONNTRACK_NETBIOS_NS=m
+CONFIG_NF_CONNTRACK_SNMP=m
+CONFIG_NF_CONNTRACK_PPTP=m
+CONFIG_NF_CONNTRACK_SANE=m
+CONFIG_NF_CONNTRACK_SIP=m
+CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_CT_NETLINK=m
+CONFIG_NF_CT_NETLINK_TIMEOUT=m
+CONFIG_NF_TABLES=m
+CONFIG_NFT_EXTHDR=m
+CONFIG_NFT_META=m
+CONFIG_NFT_CT=m
+CONFIG_NFT_COUNTER=m
+CONFIG_NFT_LOG=m
+CONFIG_NFT_LIMIT=m
+CONFIG_NFT_NAT=m
+CONFIG_NFT_COMPAT=m
+CONFIG_NFT_HASH=m
+CONFIG_NETFILTER_XT_SET=m
+CONFIG_NETFILTER_XT_TARGET_AUDIT=m
+CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
+CONFIG_NETFILTER_XT_TARGET_CT=m
+CONFIG_NETFILTER_XT_TARGET_DSCP=m
+CONFIG_NETFILTER_XT_TARGET_HMARK=m
+CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
+CONFIG_NETFILTER_XT_TARGET_LOG=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_TARGET_NFLOG=m
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_TEE=m
+CONFIG_NETFILTER_XT_TARGET_TPROXY=m
+CONFIG_NETFILTER_XT_TARGET_TRACE=m
+CONFIG_NETFILTER_XT_TARGET_SECMARK=m
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
+CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
+CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
+CONFIG_NETFILTER_XT_MATCH_BPF=m
+CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
+CONFIG_NETFILTER_XT_MATCH_COMMENT=m
+CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
+CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NETFILTER_XT_MATCH_CPU=m
+CONFIG_NETFILTER_XT_MATCH_DCCP=m
+CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m
+CONFIG_NETFILTER_XT_MATCH_DSCP=m
+CONFIG_NETFILTER_XT_MATCH_ESP=m
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
+CONFIG_NETFILTER_XT_MATCH_IPVS=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_LIMIT=m
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_MARK=m
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
+CONFIG_NETFILTER_XT_MATCH_NFACCT=m
+CONFIG_NETFILTER_XT_MATCH_OSF=m
+CONFIG_NETFILTER_XT_MATCH_OWNER=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
+CONFIG_NETFILTER_XT_MATCH_QUOTA=m
+CONFIG_NETFILTER_XT_MATCH_RATEEST=m
+CONFIG_NETFILTER_XT_MATCH_REALM=m
+CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_TIME=m
+CONFIG_NETFILTER_XT_MATCH_U32=m
+CONFIG_IP_SET=m
+CONFIG_IP_SET_BITMAP_IP=m
+CONFIG_IP_SET_BITMAP_IPMAC=m
+CONFIG_IP_SET_BITMAP_PORT=m
+CONFIG_IP_SET_HASH_IP=m
+CONFIG_IP_SET_HASH_IPPORT=m
+CONFIG_IP_SET_HASH_IPPORTIP=m
+CONFIG_IP_SET_HASH_IPPORTNET=m
+CONFIG_IP_SET_HASH_NETPORTNET=m
+CONFIG_IP_SET_HASH_NET=m
+CONFIG_IP_SET_HASH_NETNET=m
+CONFIG_IP_SET_HASH_NETPORT=m
+CONFIG_IP_SET_HASH_NETIFACE=m
+CONFIG_IP_SET_LIST_SET=m
+CONFIG_IP_VS=m
+CONFIG_IP_VS_PROTO_TCP=y
+CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_PROTO_ESP=y
+CONFIG_IP_VS_PROTO_AH=y
+CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
+CONFIG_IP_VS_LC=m
+CONFIG_IP_VS_WLC=m
+CONFIG_IP_VS_LBLC=m
+CONFIG_IP_VS_LBLCR=m
+CONFIG_IP_VS_DH=m
+CONFIG_IP_VS_SH=m
+CONFIG_IP_VS_SED=m
+CONFIG_IP_VS_NQ=m
+CONFIG_IP_VS_FTP=m
+CONFIG_IP_VS_PE_SIP=m
+CONFIG_NF_CONNTRACK_IPV4=m
+CONFIG_NF_TABLES_IPV4=y
+CONFIG_NFT_CHAIN_ROUTE_IPV4=m
+CONFIG_NF_TABLES_ARP=y
+CONFIG_NFT_CHAIN_NAT_IPV4=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_AH=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_RPFILTER=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_CLUSTERIP=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_SECURITY=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+CONFIG_NF_CONNTRACK_IPV6=m
+CONFIG_NF_TABLES_IPV6=y
+CONFIG_NFT_CHAIN_ROUTE_IPV6=m
+CONFIG_NFT_CHAIN_NAT_IPV6=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_AH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_MH=m
+CONFIG_IP6_NF_MATCH_RPFILTER=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_TARGET_HL=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_RAW=m
+CONFIG_IP6_NF_SECURITY=m
+CONFIG_IP6_NF_NAT=m
+CONFIG_IP6_NF_TARGET_MASQUERADE=m
+CONFIG_NF_TABLES_BRIDGE=y
+CONFIG_RDS=m
+CONFIG_RDS_RDMA=m
+CONFIG_RDS_TCP=m
+CONFIG_RDS_DEBUG=y
+CONFIG_L2TP=m
+CONFIG_L2TP_DEBUGFS=m
+CONFIG_L2TP_V3=y
+CONFIG_L2TP_IP=m
+CONFIG_L2TP_ETH=m
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+CONFIG_VLAN_8021Q_GVRP=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_HFSC=m
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_MULTIQ=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFB=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_DSMARK=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_SCH_DRR=m
+CONFIG_NET_SCH_MQPRIO=m
+CONFIG_NET_SCH_CHOKE=m
+CONFIG_NET_SCH_QFQ=m
+CONFIG_NET_SCH_CODEL=m
+CONFIG_NET_SCH_FQ_CODEL=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_SCH_PLUG=m
+CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_CLS_TCINDEX=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_CLS_U32_PERF=y
+CONFIG_CLS_U32_MARK=y
+CONFIG_NET_CLS_RSVP=m
+CONFIG_NET_CLS_RSVP6=m
+CONFIG_NET_CLS_FLOW=m
+CONFIG_NET_CLS_CGROUP=y
+CONFIG_NET_CLS_BPF=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_ACT_POLICE=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_GACT_PROB=y
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_IPT=m
+CONFIG_NET_ACT_NAT=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_SIMP=m
+CONFIG_NET_ACT_SKBEDIT=m
+CONFIG_NET_ACT_CSUM=m
+CONFIG_DNS_RESOLVER=y
+CONFIG_OPENVSWITCH=m
+CONFIG_NETLINK_DIAG=m
+CONFIG_CGROUP_NET_PRIO=y
+CONFIG_BPF_JIT=y
+CONFIG_NET_PKTGEN=m
+CONFIG_DEVTMPFS=y
+CONFIG_DMA_CMA=y
+CONFIG_CMA_SIZE_MBYTES=0
+CONFIG_CONNECTOR=y
+CONFIG_ZRAM=m
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_DRBD=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=32768
+CONFIG_VIRTIO_BLK=y
+CONFIG_BLK_DEV_RBD=m
+CONFIG_BLK_DEV_NVME=m
+CONFIG_ENCLOSURE_SERVICES=m
+CONFIG_GENWQE=m
+CONFIG_RAID_ATTRS=m
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=m
+CONFIG_CHR_DEV_OSST=m
+CONFIG_BLK_DEV_SR=m
+CONFIG_CHR_DEV_SG=y
+CONFIG_CHR_DEV_SCH=m
+CONFIG_SCSI_ENCLOSURE=m
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_SPI_ATTRS=m
+CONFIG_SCSI_FC_ATTRS=y
+CONFIG_SCSI_SAS_LIBSAS=m
+CONFIG_SCSI_SRP_ATTRS=m
+CONFIG_ISCSI_TCP=m
+CONFIG_SCSI_DEBUG=m
+CONFIG_ZFCP=y
+CONFIG_SCSI_VIRTIO=m
+CONFIG_SCSI_DH=y
+CONFIG_SCSI_DH_RDAC=m
+CONFIG_SCSI_DH_HP_SW=m
+CONFIG_SCSI_DH_EMC=m
+CONFIG_SCSI_DH_ALUA=m
+CONFIG_SCSI_OSD_INITIATOR=m
+CONFIG_SCSI_OSD_ULD=m
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_MD_FAULTY=m
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_THIN_PROVISIONING=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_LOG_USERSPACE=m
+CONFIG_DM_RAID=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_DM_MULTIPATH_QL=m
+CONFIG_DM_MULTIPATH_ST=m
+CONFIG_DM_DELAY=m
+CONFIG_DM_UEVENT=y
+CONFIG_DM_FLAKEY=m
+CONFIG_DM_VERITY=m
+CONFIG_DM_SWITCH=m
+CONFIG_NETDEVICES=y
+CONFIG_BONDING=m
+CONFIG_DUMMY=m
+CONFIG_EQUALIZER=m
+CONFIG_IFB=m
+CONFIG_MACVLAN=m
+CONFIG_MACVTAP=m
+CONFIG_VXLAN=m
+CONFIG_TUN=m
+CONFIG_VETH=m
+CONFIG_VIRTIO_NET=m
+CONFIG_NLMON=m
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_CHELSIO is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+CONFIG_MLX4_EN=m
+CONFIG_MLX5_CORE=m
+CONFIG_MLX5_CORE_EN=y
+# CONFIG_NET_VENDOR_NATSEMI is not set
+CONFIG_PPP=m
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPP_MPPE=m
+CONFIG_PPPOE=m
+CONFIG_PPTP=m
+CONFIG_PPPOL2TP=m
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+CONFIG_INPUT_EVDEV=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+CONFIG_LEGACY_PTY_COUNT=0
+CONFIG_HW_RANDOM_VIRTIO=m
+CONFIG_RAW_DRIVER=m
+CONFIG_HANGCHECK_TIMER=m
+CONFIG_TN3270_FS=y
+# CONFIG_HWMON is not set
+CONFIG_WATCHDOG=y
+CONFIG_WATCHDOG_NOWAYOUT=y
+CONFIG_SOFT_WATCHDOG=m
+CONFIG_DIAG288_WATCHDOG=m
+CONFIG_DRM=y
+CONFIG_DRM_VIRTIO_GPU=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_USER_ACCESS=m
+CONFIG_MLX4_INFINIBAND=m
+CONFIG_MLX5_INFINIBAND=m
+CONFIG_VFIO=m
+CONFIG_VFIO_PCI=m
+CONFIG_VIRTIO_PCI=m
+CONFIG_VIRTIO_BALLOON=m
+CONFIG_VIRTIO_INPUT=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_EXT4_ENCRYPTION=y
+CONFIG_JBD2_DEBUG=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_SECURITY=y
+CONFIG_JFS_STATISTICS=y
+CONFIG_XFS_FS=y
+CONFIG_XFS_QUOTA=y
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_XFS_RT=y
+CONFIG_XFS_DEBUG=y
+CONFIG_GFS2_FS=m
+CONFIG_GFS2_FS_LOCKING_DLM=y
+CONFIG_OCFS2_FS=m
+CONFIG_BTRFS_FS=y
+CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_BTRFS_DEBUG=y
+CONFIG_NILFS2_FS=m
+CONFIG_FS_DAX=y
+CONFIG_EXPORTFS_BLOCK_OPS=y
+CONFIG_FANOTIFY=y
+CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
+CONFIG_QUOTA_NETLINK_INTERFACE=y
+CONFIG_QUOTA_DEBUG=y
+CONFIG_QFMT_V1=m
+CONFIG_QFMT_V2=m
+CONFIG_AUTOFS4_FS=m
+CONFIG_FUSE_FS=y
+CONFIG_CUSE=m
+CONFIG_OVERLAY_FS=m
+CONFIG_FSCACHE=m
+CONFIG_CACHEFILES=m
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_NTFS_FS=m
+CONFIG_NTFS_RW=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_HUGETLBFS=y
+CONFIG_CONFIGFS_FS=m
+CONFIG_ECRYPT_FS=m
+CONFIG_CRAMFS=m
+CONFIG_SQUASHFS=m
+CONFIG_SQUASHFS_XATTR=y
+CONFIG_SQUASHFS_LZO=y
+CONFIG_SQUASHFS_XZ=y
+CONFIG_ROMFS_FS=m
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=m
+CONFIG_NFS_SWAP=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3_ACL=y
+CONFIG_NFSD_V4=y
+CONFIG_NFSD_V4_SECURITY_LABEL=y
+CONFIG_CIFS=m
+CONFIG_CIFS_STATS=y
+CONFIG_CIFS_STATS2=y
+CONFIG_CIFS_WEAK_PW_HASH=y
+CONFIG_CIFS_UPCALL=y
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+# CONFIG_CIFS_DEBUG is not set
+CONFIG_CIFS_DFS_UPCALL=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_ASCII=m
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_UTF8=m
+CONFIG_DLM=m
+CONFIG_PRINTK_TIME=y
+CONFIG_DYNAMIC_DEBUG=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_GDB_SCRIPTS=y
+CONFIG_FRAME_WARN=1024
+CONFIG_READABLE_ASM=y
+CONFIG_UNUSED_SYMBOLS=y
+CONFIG_HEADERS_CHECK=y
+CONFIG_DEBUG_SECTION_MISMATCH=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_PAGEALLOC=y
+CONFIG_DEBUG_RODATA_TEST=y
+CONFIG_DEBUG_OBJECTS=y
+CONFIG_DEBUG_OBJECTS_SELFTEST=y
+CONFIG_DEBUG_OBJECTS_FREE=y
+CONFIG_DEBUG_OBJECTS_TIMERS=y
+CONFIG_DEBUG_OBJECTS_WORK=y
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
+CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER=y
+CONFIG_SLUB_DEBUG_ON=y
+CONFIG_SLUB_STATS=y
+CONFIG_DEBUG_STACK_USAGE=y
+CONFIG_DEBUG_VM=y
+CONFIG_DEBUG_VM_VMACACHE=y
+CONFIG_DEBUG_VM_RB=y
+CONFIG_DEBUG_VM_PGFLAGS=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
+CONFIG_DEBUG_PER_CPU_MAPS=y
+CONFIG_DEBUG_SHIRQ=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_WQ_WATCHDOG=y
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_DEBUG_TIMEKEEPING=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_LOCK_STAT=y
+CONFIG_DEBUG_LOCKDEP=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_DEBUG_LOCKING_API_SELFTESTS=y
+CONFIG_DEBUG_SG=y
+CONFIG_DEBUG_NOTIFIERS=y
+CONFIG_DEBUG_CREDENTIALS=y
+CONFIG_RCU_TORTURE_TEST=m
+CONFIG_RCU_CPU_STALL_TIMEOUT=300
+CONFIG_NOTIFIER_ERROR_INJECTION=m
+CONFIG_PM_NOTIFIER_ERROR_INJECT=m
+CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m
+CONFIG_FAULT_INJECTION=y
+CONFIG_FAILSLAB=y
+CONFIG_FAIL_PAGE_ALLOC=y
+CONFIG_FAIL_MAKE_REQUEST=y
+CONFIG_FAIL_IO_TIMEOUT=y
+CONFIG_FAIL_FUTEX=y
+CONFIG_FAULT_INJECTION_DEBUG_FS=y
+CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y
+CONFIG_LATENCYTOP=y
+CONFIG_IRQSOFF_TRACER=y
+CONFIG_PREEMPT_TRACER=y
+CONFIG_SCHED_TRACER=y
+CONFIG_FTRACE_SYSCALLS=y
+CONFIG_STACK_TRACER=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_FUNCTION_PROFILER=y
+CONFIG_HIST_TRIGGERS=y
+CONFIG_DMA_API_DEBUG=y
+CONFIG_LKDTM=m
+CONFIG_TEST_LIST_SORT=y
+CONFIG_TEST_SORT=y
+CONFIG_KPROBES_SANITY_TEST=y
+CONFIG_RBTREE_TEST=y
+CONFIG_INTERVAL_TREE_TEST=m
+CONFIG_PERCPU_TEST=m
+CONFIG_ATOMIC64_SELFTEST=y
+CONFIG_TEST_BPF=m
+CONFIG_BUG_ON_DATA_CORRUPTION=y
+CONFIG_S390_PTDUMP=y
+CONFIG_PERSISTENT_KEYRINGS=y
+CONFIG_BIG_KEYS=y
+CONFIG_ENCRYPTED_KEYS=m
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_FORTIFY_SOURCE=y
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
+CONFIG_SECURITY_SELINUX_DISABLE=y
+CONFIG_INTEGRITY_SIGNATURE=y
+CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
+CONFIG_IMA=y
+CONFIG_IMA_DEFAULT_HASH_SHA256=y
+CONFIG_IMA_WRITE_POLICY=y
+CONFIG_IMA_APPRAISE=y
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_USER=m
+# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
+CONFIG_CRYPTO_PCRYPT=m
+CONFIG_CRYPTO_CRYPTD=m
+CONFIG_CRYPTO_MCRYPTD=m
+CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_LRW=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_XCBC=m
+CONFIG_CRYPTO_VMAC=m
+CONFIG_CRYPTO_CRC32=m
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_RMD128=m
+CONFIG_CRYPTO_RMD160=m
+CONFIG_CRYPTO_RMD256=m
+CONFIG_CRYPTO_RMD320=m
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_SHA3=m
+CONFIG_CRYPTO_TGR192=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES_TI=m
+CONFIG_CRYPTO_ANUBIS=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_CAMELLIA=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_FCRYPT=m
+CONFIG_CRYPTO_KHAZAD=m
+CONFIG_CRYPTO_SALSA20=m
+CONFIG_CRYPTO_SEED=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_842=m
+CONFIG_CRYPTO_LZ4=m
+CONFIG_CRYPTO_LZ4HC=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
+CONFIG_CRYPTO_USER_API_HASH=m
+CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
+CONFIG_CRYPTO_USER_API_AEAD=m
+CONFIG_ZCRYPT=m
+CONFIG_PKEY=m
+CONFIG_CRYPTO_PAES_S390=m
+CONFIG_CRYPTO_SHA1_S390=m
+CONFIG_CRYPTO_SHA256_S390=m
+CONFIG_CRYPTO_SHA512_S390=m
+CONFIG_CRYPTO_DES_S390=m
+CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_GHASH_S390=m
+CONFIG_CRYPTO_CRC32_S390=y
+CONFIG_PKCS7_MESSAGE_PARSER=y
+CONFIG_SYSTEM_TRUSTED_KEYRING=y
+CONFIG_CRC7=m
+CONFIG_CRC8=m
+CONFIG_RANDOM32_SELFTEST=y
+CONFIG_CORDIC=m
+CONFIG_CMM=m
+CONFIG_APPLDATA_BASE=y
+CONFIG_KVM=m
+CONFIG_KVM_S390_UCONTROL=y
+CONFIG_VHOST_NET=m
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
new file mode 100644
index 000000000..eb6f75f24
--- /dev/null
+++ b/arch/s390/configs/performance_defconfig
@@ -0,0 +1,669 @@
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_AUDIT=y
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BSD_PROCESS_ACCT_V3=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NUMA_BALANCING=y
+# CONFIG_NUMA_BALANCING_DEFAULT_ENABLED is not set
+CONFIG_MEMCG=y
+CONFIG_MEMCG_SWAP=y
+CONFIG_BLK_CGROUP=y
+CONFIG_CFS_BANDWIDTH=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_CGROUP_PIDS=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_HUGETLB=y
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_PERF=y
+CONFIG_NAMESPACES=y
+CONFIG_USER_NS=y
+CONFIG_SCHED_AUTOGROUP=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+# CONFIG_SYSFS_SYSCALL is not set
+CONFIG_CHECKPOINT_RESTORE=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_USERFAULTFD=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=m
+CONFIG_KPROBES=y
+CONFIG_JUMP_LABEL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_MODULE_SIG=y
+CONFIG_MODULE_SIG_SHA256=y
+CONFIG_BLK_DEV_INTEGRITY=y
+CONFIG_BLK_DEV_THROTTLING=y
+CONFIG_BLK_WBT=y
+CONFIG_BLK_WBT_SQ=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_IBM_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_CFQ_GROUP_IOSCHED=y
+CONFIG_DEFAULT_DEADLINE=y
+CONFIG_LIVEPATCH=y
+CONFIG_TUNE_ZEC12=y
+CONFIG_NR_CPUS=512
+CONFIG_NUMA=y
+CONFIG_HZ_100=y
+CONFIG_KEXEC_FILE=y
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTREMOVE=y
+CONFIG_KSM=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_CLEANCACHE=y
+CONFIG_FRONTSWAP=y
+CONFIG_MEM_SOFT_DIRTY=y
+CONFIG_ZSWAP=y
+CONFIG_ZBUD=m
+CONFIG_ZSMALLOC=m
+CONFIG_ZSMALLOC_STAT=y
+CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
+CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_PCI=y
+CONFIG_HOTPLUG_PCI=y
+CONFIG_HOTPLUG_PCI_S390=y
+CONFIG_CHSC_SCH=y
+CONFIG_CRASH_DUMP=y
+CONFIG_BINFMT_MISC=m
+CONFIG_HIBERNATION=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_PACKET_DIAG=m
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=m
+CONFIG_XFRM_USER=m
+CONFIG_NET_KEY=m
+CONFIG_SMC=m
+CONFIG_SMC_DIAG=m
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_NET_IPIP=m
+CONFIG_NET_IPGRE_DEMUX=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_BROADCAST=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_SYN_COOKIES=y
+CONFIG_NET_IPVTI=m
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+CONFIG_INET_XFRM_MODE_TRANSPORT=m
+CONFIG_INET_XFRM_MODE_TUNNEL=m
+CONFIG_INET_XFRM_MODE_BEET=m
+CONFIG_INET_DIAG=m
+CONFIG_INET_UDP_DIAG=m
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_CONG_HSTCP=m
+CONFIG_TCP_CONG_HYBLA=m
+CONFIG_TCP_CONG_SCALABLE=m
+CONFIG_TCP_CONG_LP=m
+CONFIG_TCP_CONG_VENO=m
+CONFIG_TCP_CONG_YEAH=m
+CONFIG_TCP_CONG_ILLINOIS=m
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_MIP6=m
+CONFIG_INET6_XFRM_MODE_TRANSPORT=m
+CONFIG_INET6_XFRM_MODE_TUNNEL=m
+CONFIG_INET6_XFRM_MODE_BEET=m
+CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m
+CONFIG_IPV6_VTI=m
+CONFIG_IPV6_SIT=m
+CONFIG_IPV6_GRE=m
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IPV6_SUBTREES=y
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CONNTRACK_TIMEOUT=y
+CONFIG_NF_CONNTRACK_TIMESTAMP=y
+CONFIG_NF_CONNTRACK_AMANDA=m
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_H323=m
+CONFIG_NF_CONNTRACK_IRC=m
+CONFIG_NF_CONNTRACK_NETBIOS_NS=m
+CONFIG_NF_CONNTRACK_SNMP=m
+CONFIG_NF_CONNTRACK_PPTP=m
+CONFIG_NF_CONNTRACK_SANE=m
+CONFIG_NF_CONNTRACK_SIP=m
+CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_CT_NETLINK=m
+CONFIG_NF_CT_NETLINK_TIMEOUT=m
+CONFIG_NF_TABLES=m
+CONFIG_NFT_EXTHDR=m
+CONFIG_NFT_META=m
+CONFIG_NFT_CT=m
+CONFIG_NFT_COUNTER=m
+CONFIG_NFT_LOG=m
+CONFIG_NFT_LIMIT=m
+CONFIG_NFT_NAT=m
+CONFIG_NFT_COMPAT=m
+CONFIG_NFT_HASH=m
+CONFIG_NETFILTER_XT_SET=m
+CONFIG_NETFILTER_XT_TARGET_AUDIT=m
+CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
+CONFIG_NETFILTER_XT_TARGET_CT=m
+CONFIG_NETFILTER_XT_TARGET_DSCP=m
+CONFIG_NETFILTER_XT_TARGET_HMARK=m
+CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
+CONFIG_NETFILTER_XT_TARGET_LOG=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_TARGET_NFLOG=m
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_TEE=m
+CONFIG_NETFILTER_XT_TARGET_TPROXY=m
+CONFIG_NETFILTER_XT_TARGET_TRACE=m
+CONFIG_NETFILTER_XT_TARGET_SECMARK=m
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
+CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
+CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
+CONFIG_NETFILTER_XT_MATCH_BPF=m
+CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
+CONFIG_NETFILTER_XT_MATCH_COMMENT=m
+CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
+CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NETFILTER_XT_MATCH_CPU=m
+CONFIG_NETFILTER_XT_MATCH_DCCP=m
+CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m
+CONFIG_NETFILTER_XT_MATCH_DSCP=m
+CONFIG_NETFILTER_XT_MATCH_ESP=m
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
+CONFIG_NETFILTER_XT_MATCH_IPVS=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_LIMIT=m
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_MARK=m
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
+CONFIG_NETFILTER_XT_MATCH_NFACCT=m
+CONFIG_NETFILTER_XT_MATCH_OSF=m
+CONFIG_NETFILTER_XT_MATCH_OWNER=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
+CONFIG_NETFILTER_XT_MATCH_QUOTA=m
+CONFIG_NETFILTER_XT_MATCH_RATEEST=m
+CONFIG_NETFILTER_XT_MATCH_REALM=m
+CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_TIME=m
+CONFIG_NETFILTER_XT_MATCH_U32=m
+CONFIG_IP_SET=m
+CONFIG_IP_SET_BITMAP_IP=m
+CONFIG_IP_SET_BITMAP_IPMAC=m
+CONFIG_IP_SET_BITMAP_PORT=m
+CONFIG_IP_SET_HASH_IP=m
+CONFIG_IP_SET_HASH_IPPORT=m
+CONFIG_IP_SET_HASH_IPPORTIP=m
+CONFIG_IP_SET_HASH_IPPORTNET=m
+CONFIG_IP_SET_HASH_NETPORTNET=m
+CONFIG_IP_SET_HASH_NET=m
+CONFIG_IP_SET_HASH_NETNET=m
+CONFIG_IP_SET_HASH_NETPORT=m
+CONFIG_IP_SET_HASH_NETIFACE=m
+CONFIG_IP_SET_LIST_SET=m
+CONFIG_IP_VS=m
+CONFIG_IP_VS_PROTO_TCP=y
+CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_PROTO_ESP=y
+CONFIG_IP_VS_PROTO_AH=y
+CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
+CONFIG_IP_VS_LC=m
+CONFIG_IP_VS_WLC=m
+CONFIG_IP_VS_LBLC=m
+CONFIG_IP_VS_LBLCR=m
+CONFIG_IP_VS_DH=m
+CONFIG_IP_VS_SH=m
+CONFIG_IP_VS_SED=m
+CONFIG_IP_VS_NQ=m
+CONFIG_IP_VS_FTP=m
+CONFIG_IP_VS_PE_SIP=m
+CONFIG_NF_CONNTRACK_IPV4=m
+CONFIG_NF_TABLES_IPV4=y
+CONFIG_NFT_CHAIN_ROUTE_IPV4=m
+CONFIG_NF_TABLES_ARP=y
+CONFIG_NFT_CHAIN_NAT_IPV4=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_AH=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_RPFILTER=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_CLUSTERIP=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_SECURITY=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+CONFIG_NF_CONNTRACK_IPV6=m
+CONFIG_NF_TABLES_IPV6=y
+CONFIG_NFT_CHAIN_ROUTE_IPV6=m
+CONFIG_NFT_CHAIN_NAT_IPV6=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_AH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_MH=m
+CONFIG_IP6_NF_MATCH_RPFILTER=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_TARGET_HL=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_RAW=m
+CONFIG_IP6_NF_SECURITY=m
+CONFIG_IP6_NF_NAT=m
+CONFIG_IP6_NF_TARGET_MASQUERADE=m
+CONFIG_NF_TABLES_BRIDGE=y
+CONFIG_RDS=m
+CONFIG_RDS_RDMA=m
+CONFIG_RDS_TCP=m
+CONFIG_L2TP=m
+CONFIG_L2TP_DEBUGFS=m
+CONFIG_L2TP_V3=y
+CONFIG_L2TP_IP=m
+CONFIG_L2TP_ETH=m
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+CONFIG_VLAN_8021Q_GVRP=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_HFSC=m
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_MULTIQ=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFB=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_DSMARK=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_SCH_DRR=m
+CONFIG_NET_SCH_MQPRIO=m
+CONFIG_NET_SCH_CHOKE=m
+CONFIG_NET_SCH_QFQ=m
+CONFIG_NET_SCH_CODEL=m
+CONFIG_NET_SCH_FQ_CODEL=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_SCH_PLUG=m
+CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_CLS_TCINDEX=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_CLS_U32_PERF=y
+CONFIG_CLS_U32_MARK=y
+CONFIG_NET_CLS_RSVP=m
+CONFIG_NET_CLS_RSVP6=m
+CONFIG_NET_CLS_FLOW=m
+CONFIG_NET_CLS_CGROUP=y
+CONFIG_NET_CLS_BPF=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_ACT_POLICE=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_GACT_PROB=y
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_IPT=m
+CONFIG_NET_ACT_NAT=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_SIMP=m
+CONFIG_NET_ACT_SKBEDIT=m
+CONFIG_NET_ACT_CSUM=m
+CONFIG_DNS_RESOLVER=y
+CONFIG_OPENVSWITCH=m
+CONFIG_NETLINK_DIAG=m
+CONFIG_CGROUP_NET_PRIO=y
+CONFIG_BPF_JIT=y
+CONFIG_NET_PKTGEN=m
+CONFIG_DEVTMPFS=y
+CONFIG_DMA_CMA=y
+CONFIG_CMA_SIZE_MBYTES=0
+CONFIG_CONNECTOR=y
+CONFIG_ZRAM=m
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_DRBD=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=32768
+CONFIG_VIRTIO_BLK=y
+CONFIG_BLK_DEV_RBD=m
+CONFIG_BLK_DEV_NVME=m
+CONFIG_ENCLOSURE_SERVICES=m
+CONFIG_GENWQE=m
+CONFIG_RAID_ATTRS=m
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=m
+CONFIG_CHR_DEV_OSST=m
+CONFIG_BLK_DEV_SR=m
+CONFIG_CHR_DEV_SG=y
+CONFIG_CHR_DEV_SCH=m
+CONFIG_SCSI_ENCLOSURE=m
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_SPI_ATTRS=m
+CONFIG_SCSI_FC_ATTRS=y
+CONFIG_SCSI_SAS_LIBSAS=m
+CONFIG_SCSI_SRP_ATTRS=m
+CONFIG_ISCSI_TCP=m
+CONFIG_SCSI_DEBUG=m
+CONFIG_ZFCP=y
+CONFIG_SCSI_VIRTIO=m
+CONFIG_SCSI_DH=y
+CONFIG_SCSI_DH_RDAC=m
+CONFIG_SCSI_DH_HP_SW=m
+CONFIG_SCSI_DH_EMC=m
+CONFIG_SCSI_DH_ALUA=m
+CONFIG_SCSI_OSD_INITIATOR=m
+CONFIG_SCSI_OSD_ULD=m
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_MD_FAULTY=m
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_THIN_PROVISIONING=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_LOG_USERSPACE=m
+CONFIG_DM_RAID=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_DM_MULTIPATH_QL=m
+CONFIG_DM_MULTIPATH_ST=m
+CONFIG_DM_DELAY=m
+CONFIG_DM_UEVENT=y
+CONFIG_DM_FLAKEY=m
+CONFIG_DM_VERITY=m
+CONFIG_DM_SWITCH=m
+CONFIG_NETDEVICES=y
+CONFIG_BONDING=m
+CONFIG_DUMMY=m
+CONFIG_EQUALIZER=m
+CONFIG_IFB=m
+CONFIG_MACVLAN=m
+CONFIG_MACVTAP=m
+CONFIG_VXLAN=m
+CONFIG_TUN=m
+CONFIG_VETH=m
+CONFIG_VIRTIO_NET=m
+CONFIG_NLMON=m
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_CHELSIO is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+CONFIG_MLX4_EN=m
+CONFIG_MLX5_CORE=m
+CONFIG_MLX5_CORE_EN=y
+# CONFIG_NET_VENDOR_NATSEMI is not set
+CONFIG_PPP=m
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPP_MPPE=m
+CONFIG_PPPOE=m
+CONFIG_PPTP=m
+CONFIG_PPPOL2TP=m
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+CONFIG_INPUT_EVDEV=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+CONFIG_LEGACY_PTY_COUNT=0
+CONFIG_HW_RANDOM_VIRTIO=m
+CONFIG_RAW_DRIVER=m
+CONFIG_HANGCHECK_TIMER=m
+CONFIG_TN3270_FS=y
+# CONFIG_HWMON is not set
+CONFIG_WATCHDOG=y
+CONFIG_WATCHDOG_NOWAYOUT=y
+CONFIG_SOFT_WATCHDOG=m
+CONFIG_DIAG288_WATCHDOG=m
+CONFIG_DRM=y
+CONFIG_DRM_VIRTIO_GPU=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_USER_ACCESS=m
+CONFIG_MLX4_INFINIBAND=m
+CONFIG_MLX5_INFINIBAND=m
+CONFIG_VFIO=m
+CONFIG_VFIO_PCI=m
+CONFIG_VIRTIO_PCI=m
+CONFIG_VIRTIO_BALLOON=m
+CONFIG_VIRTIO_INPUT=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_EXT4_ENCRYPTION=y
+CONFIG_JBD2_DEBUG=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_SECURITY=y
+CONFIG_JFS_STATISTICS=y
+CONFIG_XFS_FS=y
+CONFIG_XFS_QUOTA=y
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_XFS_RT=y
+CONFIG_GFS2_FS=m
+CONFIG_GFS2_FS_LOCKING_DLM=y
+CONFIG_OCFS2_FS=m
+CONFIG_BTRFS_FS=y
+CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_NILFS2_FS=m
+CONFIG_FS_DAX=y
+CONFIG_EXPORTFS_BLOCK_OPS=y
+CONFIG_FANOTIFY=y
+CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
+CONFIG_QUOTA_NETLINK_INTERFACE=y
+CONFIG_QFMT_V1=m
+CONFIG_QFMT_V2=m
+CONFIG_AUTOFS4_FS=m
+CONFIG_FUSE_FS=y
+CONFIG_CUSE=m
+CONFIG_OVERLAY_FS=m
+CONFIG_FSCACHE=m
+CONFIG_CACHEFILES=m
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_NTFS_FS=m
+CONFIG_NTFS_RW=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_HUGETLBFS=y
+CONFIG_CONFIGFS_FS=m
+CONFIG_ECRYPT_FS=m
+CONFIG_CRAMFS=m
+CONFIG_SQUASHFS=m
+CONFIG_SQUASHFS_XATTR=y
+CONFIG_SQUASHFS_LZO=y
+CONFIG_SQUASHFS_XZ=y
+CONFIG_ROMFS_FS=m
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=m
+CONFIG_NFS_SWAP=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3_ACL=y
+CONFIG_NFSD_V4=y
+CONFIG_NFSD_V4_SECURITY_LABEL=y
+CONFIG_CIFS=m
+CONFIG_CIFS_STATS=y
+CONFIG_CIFS_STATS2=y
+CONFIG_CIFS_WEAK_PW_HASH=y
+CONFIG_CIFS_UPCALL=y
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+# CONFIG_CIFS_DEBUG is not set
+CONFIG_CIFS_DFS_UPCALL=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_ASCII=m
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_UTF8=m
+CONFIG_DLM=m
+CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_GDB_SCRIPTS=y
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_FRAME_WARN=1024
+CONFIG_UNUSED_SYMBOLS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_RCU_TORTURE_TEST=m
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
+CONFIG_LATENCYTOP=y
+CONFIG_SCHED_TRACER=y
+CONFIG_FTRACE_SYSCALLS=y
+CONFIG_STACK_TRACER=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_FUNCTION_PROFILER=y
+CONFIG_HIST_TRIGGERS=y
+CONFIG_LKDTM=m
+CONFIG_PERCPU_TEST=m
+CONFIG_ATOMIC64_SELFTEST=y
+CONFIG_TEST_BPF=m
+CONFIG_BUG_ON_DATA_CORRUPTION=y
+CONFIG_S390_PTDUMP=y
+CONFIG_PERSISTENT_KEYRINGS=y
+CONFIG_BIG_KEYS=y
+CONFIG_ENCRYPTED_KEYS=m
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
+CONFIG_SECURITY_SELINUX_DISABLE=y
+CONFIG_INTEGRITY_SIGNATURE=y
+CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
+CONFIG_IMA=y
+CONFIG_IMA_DEFAULT_HASH_SHA256=y
+CONFIG_IMA_WRITE_POLICY=y
+CONFIG_IMA_APPRAISE=y
+CONFIG_CRYPTO_FIPS=y
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_USER=m
+# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
+CONFIG_CRYPTO_PCRYPT=m
+CONFIG_CRYPTO_CRYPTD=m
+CONFIG_CRYPTO_MCRYPTD=m
+CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_LRW=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_XCBC=m
+CONFIG_CRYPTO_VMAC=m
+CONFIG_CRYPTO_CRC32=m
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_RMD128=m
+CONFIG_CRYPTO_RMD160=m
+CONFIG_CRYPTO_RMD256=m
+CONFIG_CRYPTO_RMD320=m
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_SHA3=m
+CONFIG_CRYPTO_TGR192=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES_TI=m
+CONFIG_CRYPTO_ANUBIS=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_CAMELLIA=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_FCRYPT=m
+CONFIG_CRYPTO_KHAZAD=m
+CONFIG_CRYPTO_SALSA20=m
+CONFIG_CRYPTO_SEED=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_842=m
+CONFIG_CRYPTO_LZ4=m
+CONFIG_CRYPTO_LZ4HC=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
+CONFIG_CRYPTO_USER_API_HASH=m
+CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
+CONFIG_CRYPTO_USER_API_AEAD=m
+CONFIG_ZCRYPT=m
+CONFIG_PKEY=m
+CONFIG_CRYPTO_PAES_S390=m
+CONFIG_CRYPTO_SHA1_S390=m
+CONFIG_CRYPTO_SHA256_S390=m
+CONFIG_CRYPTO_SHA512_S390=m
+CONFIG_CRYPTO_DES_S390=m
+CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_GHASH_S390=m
+CONFIG_CRYPTO_CRC32_S390=y
+CONFIG_CRC7=m
+CONFIG_CRC8=m
+CONFIG_CORDIC=m
+CONFIG_CMM=m
+CONFIG_APPLDATA_BASE=y
+CONFIG_KVM=m
+CONFIG_KVM_S390_UCONTROL=y
+CONFIG_VHOST_NET=m
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
new file mode 100644
index 000000000..7dc7f58c4
--- /dev/null
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -0,0 +1,71 @@
+# CONFIG_SWAP is not set
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_IBM_PARTITION=y
+CONFIG_DEFAULT_DEADLINE=y
+CONFIG_TUNE_ZEC12=y
+# CONFIG_COMPAT is not set
+CONFIG_NR_CPUS=2
+# CONFIG_HOTPLUG_CPU is not set
+CONFIG_HZ_100=y
+# CONFIG_ARCH_RANDOM is not set
+# CONFIG_COMPACTION is not set
+# CONFIG_MIGRATION is not set
+# CONFIG_BOUNCE is not set
+# CONFIG_CHECK_STACK is not set
+# CONFIG_CHSC_SCH is not set
+# CONFIG_SCM_BUS is not set
+CONFIG_CRASH_DUMP=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+# CONFIG_SECCOMP is not set
+CONFIG_NET=y
+# CONFIG_IUCV is not set
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_DEVTMPFS=y
+CONFIG_BLK_DEV_RAM=y
+# CONFIG_BLK_DEV_XPRAM is not set
+# CONFIG_DCSSBLK is not set
+# CONFIG_DASD is not set
+CONFIG_ENCLOSURE_SERVICES=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_SCSI_ENCLOSURE=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_FC_ATTRS=y
+CONFIG_ZFCP=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_HVC_IUCV is not set
+# CONFIG_HW_RANDOM_S390 is not set
+CONFIG_RAW_DRIVER=y
+# CONFIG_SCLP_ASYNC is not set
+# CONFIG_HMC_DRV is not set
+# CONFIG_S390_TAPE is not set
+# CONFIG_VMCP is not set
+# CONFIG_MONWRITER is not set
+# CONFIG_S390_VMUR is not set
+# CONFIG_HID is not set
+# CONFIG_IOMMU_SUPPORT is not set
+# CONFIG_DNOTIFY is not set
+# CONFIG_INOTIFY_USER is not set
+CONFIG_CONFIGFS_FS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+# CONFIG_NETWORK_FILESYSTEMS is not set
+CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_FS=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_PANIC_ON_OOPS=y
+# CONFIG_SCHED_DEBUG is not set
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
+# CONFIG_FTRACE is not set
+# CONFIG_PFAULT is not set
+# CONFIG_S390_HYPFS_FS is not set
+# CONFIG_VIRTUALIZATION is not set
+# CONFIG_S390_GUEST is not set
diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile
new file mode 100644
index 000000000..a51010ea6
--- /dev/null
+++ b/arch/s390/crypto/Makefile
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Cryptographic API
+#
+
+obj-$(CONFIG_CRYPTO_SHA1_S390) += sha1_s390.o sha_common.o
+obj-$(CONFIG_CRYPTO_SHA256_S390) += sha256_s390.o sha_common.o
+obj-$(CONFIG_CRYPTO_SHA512_S390) += sha512_s390.o sha_common.o
+obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o
+obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o
+obj-$(CONFIG_CRYPTO_PAES_S390) += paes_s390.o
+obj-$(CONFIG_S390_PRNG) += prng.o
+obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o
+obj-$(CONFIG_CRYPTO_CRC32_S390) += crc32-vx_s390.o
+obj-$(CONFIG_ARCH_RANDOM) += arch_random.o
+
+crc32-vx_s390-y := crc32-vx.o crc32le-vx.o crc32be-vx.o
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
new file mode 100644
index 000000000..c663caf37
--- /dev/null
+++ b/arch/s390/crypto/aes_s390.c
@@ -0,0 +1,1214 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Cryptographic API.
+ *
+ * s390 implementation of the AES Cipher Algorithm.
+ *
+ * s390 Version:
+ * Copyright IBM Corp. 2005, 2017
+ * Author(s): Jan Glauber (jang@de.ibm.com)
+ * Sebastian Siewior (sebastian@breakpoint.cc> SW-Fallback
+ * Patrick Steuer <patrick.steuer@de.ibm.com>
+ * Harald Freudenberger <freude@de.ibm.com>
+ *
+ * Derived from "crypto/aes_generic.c"
+ */
+
+#define KMSG_COMPONENT "aes_s390"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <crypto/aes.h>
+#include <crypto/algapi.h>
+#include <crypto/ghash.h>
+#include <crypto/internal/aead.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/fips.h>
+#include <linux/string.h>
+#include <crypto/xts.h>
+#include <asm/cpacf.h>
+
+static u8 *ctrblk;
+static DEFINE_MUTEX(ctrblk_lock);
+
+static cpacf_mask_t km_functions, kmc_functions, kmctr_functions,
+ kma_functions;
+
+struct s390_aes_ctx {
+ u8 key[AES_MAX_KEY_SIZE];
+ int key_len;
+ unsigned long fc;
+ union {
+ struct crypto_skcipher *blk;
+ struct crypto_cipher *cip;
+ } fallback;
+};
+
+struct s390_xts_ctx {
+ u8 key[32];
+ u8 pcc_key[32];
+ int key_len;
+ unsigned long fc;
+ struct crypto_skcipher *fallback;
+};
+
+struct gcm_sg_walk {
+ struct scatter_walk walk;
+ unsigned int walk_bytes;
+ u8 *walk_ptr;
+ unsigned int walk_bytes_remain;
+ u8 buf[AES_BLOCK_SIZE];
+ unsigned int buf_bytes;
+ u8 *ptr;
+ unsigned int nbytes;
+};
+
+static int setkey_fallback_cip(struct crypto_tfm *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+ int ret;
+
+ sctx->fallback.cip->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
+ sctx->fallback.cip->base.crt_flags |= (tfm->crt_flags &
+ CRYPTO_TFM_REQ_MASK);
+
+ ret = crypto_cipher_setkey(sctx->fallback.cip, in_key, key_len);
+ if (ret) {
+ tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
+ tfm->crt_flags |= (sctx->fallback.cip->base.crt_flags &
+ CRYPTO_TFM_RES_MASK);
+ }
+ return ret;
+}
+
+static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+ unsigned long fc;
+
+ /* Pick the correct function code based on the key length */
+ fc = (key_len == 16) ? CPACF_KM_AES_128 :
+ (key_len == 24) ? CPACF_KM_AES_192 :
+ (key_len == 32) ? CPACF_KM_AES_256 : 0;
+
+ /* Check if the function code is available */
+ sctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
+ if (!sctx->fc)
+ return setkey_fallback_cip(tfm, in_key, key_len);
+
+ sctx->key_len = key_len;
+ memcpy(sctx->key, in_key, key_len);
+ return 0;
+}
+
+static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+ struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+
+ if (unlikely(!sctx->fc)) {
+ crypto_cipher_encrypt_one(sctx->fallback.cip, out, in);
+ return;
+ }
+ cpacf_km(sctx->fc, &sctx->key, out, in, AES_BLOCK_SIZE);
+}
+
+static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+ struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+
+ if (unlikely(!sctx->fc)) {
+ crypto_cipher_decrypt_one(sctx->fallback.cip, out, in);
+ return;
+ }
+ cpacf_km(sctx->fc | CPACF_DECRYPT,
+ &sctx->key, out, in, AES_BLOCK_SIZE);
+}
+
+static int fallback_init_cip(struct crypto_tfm *tfm)
+{
+ const char *name = tfm->__crt_alg->cra_name;
+ struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+
+ sctx->fallback.cip = crypto_alloc_cipher(name, 0,
+ CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
+
+ if (IS_ERR(sctx->fallback.cip)) {
+ pr_err("Allocating AES fallback algorithm %s failed\n",
+ name);
+ return PTR_ERR(sctx->fallback.cip);
+ }
+
+ return 0;
+}
+
+static void fallback_exit_cip(struct crypto_tfm *tfm)
+{
+ struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+
+ crypto_free_cipher(sctx->fallback.cip);
+ sctx->fallback.cip = NULL;
+}
+
+static struct crypto_alg aes_alg = {
+ .cra_name = "aes",
+ .cra_driver_name = "aes-s390",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_CIPHER |
+ CRYPTO_ALG_NEED_FALLBACK,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct s390_aes_ctx),
+ .cra_module = THIS_MODULE,
+ .cra_init = fallback_init_cip,
+ .cra_exit = fallback_exit_cip,
+ .cra_u = {
+ .cipher = {
+ .cia_min_keysize = AES_MIN_KEY_SIZE,
+ .cia_max_keysize = AES_MAX_KEY_SIZE,
+ .cia_setkey = aes_set_key,
+ .cia_encrypt = aes_encrypt,
+ .cia_decrypt = aes_decrypt,
+ }
+ }
+};
+
+static int setkey_fallback_blk(struct crypto_tfm *tfm, const u8 *key,
+ unsigned int len)
+{
+ struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+ unsigned int ret;
+
+ crypto_skcipher_clear_flags(sctx->fallback.blk, CRYPTO_TFM_REQ_MASK);
+ crypto_skcipher_set_flags(sctx->fallback.blk, tfm->crt_flags &
+ CRYPTO_TFM_REQ_MASK);
+
+ ret = crypto_skcipher_setkey(sctx->fallback.blk, key, len);
+
+ tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
+ tfm->crt_flags |= crypto_skcipher_get_flags(sctx->fallback.blk) &
+ CRYPTO_TFM_RES_MASK;
+
+ return ret;
+}
+
+static int fallback_blk_dec(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ unsigned int ret;
+ struct crypto_blkcipher *tfm = desc->tfm;
+ struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(tfm);
+ SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk);
+
+ skcipher_request_set_tfm(req, sctx->fallback.blk);
+ skcipher_request_set_callback(req, desc->flags, NULL, NULL);
+ skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
+
+ ret = crypto_skcipher_decrypt(req);
+
+ skcipher_request_zero(req);
+ return ret;
+}
+
+static int fallback_blk_enc(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ unsigned int ret;
+ struct crypto_blkcipher *tfm = desc->tfm;
+ struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(tfm);
+ SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk);
+
+ skcipher_request_set_tfm(req, sctx->fallback.blk);
+ skcipher_request_set_callback(req, desc->flags, NULL, NULL);
+ skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
+
+ ret = crypto_skcipher_encrypt(req);
+ return ret;
+}
+
+static int ecb_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+ unsigned long fc;
+
+ /* Pick the correct function code based on the key length */
+ fc = (key_len == 16) ? CPACF_KM_AES_128 :
+ (key_len == 24) ? CPACF_KM_AES_192 :
+ (key_len == 32) ? CPACF_KM_AES_256 : 0;
+
+ /* Check if the function code is available */
+ sctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
+ if (!sctx->fc)
+ return setkey_fallback_blk(tfm, in_key, key_len);
+
+ sctx->key_len = key_len;
+ memcpy(sctx->key, in_key, key_len);
+ return 0;
+}
+
+static int ecb_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
+ struct blkcipher_walk *walk)
+{
+ struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+ unsigned int nbytes, n;
+ int ret;
+
+ ret = blkcipher_walk_virt(desc, walk);
+ while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+ /* only use complete blocks */
+ n = nbytes & ~(AES_BLOCK_SIZE - 1);
+ cpacf_km(sctx->fc | modifier, sctx->key,
+ walk->dst.virt.addr, walk->src.virt.addr, n);
+ ret = blkcipher_walk_done(desc, walk, nbytes - n);
+ }
+
+ return ret;
+}
+
+static int ecb_aes_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+
+ if (unlikely(!sctx->fc))
+ return fallback_blk_enc(desc, dst, src, nbytes);
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return ecb_aes_crypt(desc, 0, &walk);
+}
+
+static int ecb_aes_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+
+ if (unlikely(!sctx->fc))
+ return fallback_blk_dec(desc, dst, src, nbytes);
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return ecb_aes_crypt(desc, CPACF_DECRYPT, &walk);
+}
+
+static int fallback_init_blk(struct crypto_tfm *tfm)
+{
+ const char *name = tfm->__crt_alg->cra_name;
+ struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+
+ sctx->fallback.blk = crypto_alloc_skcipher(name, 0,
+ CRYPTO_ALG_ASYNC |
+ CRYPTO_ALG_NEED_FALLBACK);
+
+ if (IS_ERR(sctx->fallback.blk)) {
+ pr_err("Allocating AES fallback algorithm %s failed\n",
+ name);
+ return PTR_ERR(sctx->fallback.blk);
+ }
+
+ return 0;
+}
+
+static void fallback_exit_blk(struct crypto_tfm *tfm)
+{
+ struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+
+ crypto_free_skcipher(sctx->fallback.blk);
+}
+
+static struct crypto_alg ecb_aes_alg = {
+ .cra_name = "ecb(aes)",
+ .cra_driver_name = "ecb-aes-s390",
+ .cra_priority = 401, /* combo: aes + ecb + 1 */
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_NEED_FALLBACK,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct s390_aes_ctx),
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_init = fallback_init_blk,
+ .cra_exit = fallback_exit_blk,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .setkey = ecb_aes_set_key,
+ .encrypt = ecb_aes_encrypt,
+ .decrypt = ecb_aes_decrypt,
+ }
+ }
+};
+
+static int cbc_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+ unsigned long fc;
+
+ /* Pick the correct function code based on the key length */
+ fc = (key_len == 16) ? CPACF_KMC_AES_128 :
+ (key_len == 24) ? CPACF_KMC_AES_192 :
+ (key_len == 32) ? CPACF_KMC_AES_256 : 0;
+
+ /* Check if the function code is available */
+ sctx->fc = (fc && cpacf_test_func(&kmc_functions, fc)) ? fc : 0;
+ if (!sctx->fc)
+ return setkey_fallback_blk(tfm, in_key, key_len);
+
+ sctx->key_len = key_len;
+ memcpy(sctx->key, in_key, key_len);
+ return 0;
+}
+
+static int cbc_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
+ struct blkcipher_walk *walk)
+{
+ struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+ unsigned int nbytes, n;
+ int ret;
+ struct {
+ u8 iv[AES_BLOCK_SIZE];
+ u8 key[AES_MAX_KEY_SIZE];
+ } param;
+
+ ret = blkcipher_walk_virt(desc, walk);
+ memcpy(param.iv, walk->iv, AES_BLOCK_SIZE);
+ memcpy(param.key, sctx->key, sctx->key_len);
+ while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+ /* only use complete blocks */
+ n = nbytes & ~(AES_BLOCK_SIZE - 1);
+ cpacf_kmc(sctx->fc | modifier, &param,
+ walk->dst.virt.addr, walk->src.virt.addr, n);
+ ret = blkcipher_walk_done(desc, walk, nbytes - n);
+ }
+ memcpy(walk->iv, param.iv, AES_BLOCK_SIZE);
+ return ret;
+}
+
+static int cbc_aes_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+
+ if (unlikely(!sctx->fc))
+ return fallback_blk_enc(desc, dst, src, nbytes);
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return cbc_aes_crypt(desc, 0, &walk);
+}
+
+static int cbc_aes_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+
+ if (unlikely(!sctx->fc))
+ return fallback_blk_dec(desc, dst, src, nbytes);
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return cbc_aes_crypt(desc, CPACF_DECRYPT, &walk);
+}
+
+static struct crypto_alg cbc_aes_alg = {
+ .cra_name = "cbc(aes)",
+ .cra_driver_name = "cbc-aes-s390",
+ .cra_priority = 402, /* ecb-aes-s390 + 1 */
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_NEED_FALLBACK,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct s390_aes_ctx),
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_init = fallback_init_blk,
+ .cra_exit = fallback_exit_blk,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = cbc_aes_set_key,
+ .encrypt = cbc_aes_encrypt,
+ .decrypt = cbc_aes_decrypt,
+ }
+ }
+};
+
+static int xts_fallback_setkey(struct crypto_tfm *tfm, const u8 *key,
+ unsigned int len)
+{
+ struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+ unsigned int ret;
+
+ crypto_skcipher_clear_flags(xts_ctx->fallback, CRYPTO_TFM_REQ_MASK);
+ crypto_skcipher_set_flags(xts_ctx->fallback, tfm->crt_flags &
+ CRYPTO_TFM_REQ_MASK);
+
+ ret = crypto_skcipher_setkey(xts_ctx->fallback, key, len);
+
+ tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
+ tfm->crt_flags |= crypto_skcipher_get_flags(xts_ctx->fallback) &
+ CRYPTO_TFM_RES_MASK;
+
+ return ret;
+}
+
+static int xts_fallback_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct crypto_blkcipher *tfm = desc->tfm;
+ struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(tfm);
+ SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback);
+ unsigned int ret;
+
+ skcipher_request_set_tfm(req, xts_ctx->fallback);
+ skcipher_request_set_callback(req, desc->flags, NULL, NULL);
+ skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
+
+ ret = crypto_skcipher_decrypt(req);
+
+ skcipher_request_zero(req);
+ return ret;
+}
+
+static int xts_fallback_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct crypto_blkcipher *tfm = desc->tfm;
+ struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(tfm);
+ SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback);
+ unsigned int ret;
+
+ skcipher_request_set_tfm(req, xts_ctx->fallback);
+ skcipher_request_set_callback(req, desc->flags, NULL, NULL);
+ skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
+
+ ret = crypto_skcipher_encrypt(req);
+
+ skcipher_request_zero(req);
+ return ret;
+}
+
+static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+ unsigned long fc;
+ int err;
+
+ err = xts_check_key(tfm, in_key, key_len);
+ if (err)
+ return err;
+
+ /* In fips mode only 128 bit or 256 bit keys are valid */
+ if (fips_enabled && key_len != 32 && key_len != 64) {
+ tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ return -EINVAL;
+ }
+
+ /* Pick the correct function code based on the key length */
+ fc = (key_len == 32) ? CPACF_KM_XTS_128 :
+ (key_len == 64) ? CPACF_KM_XTS_256 : 0;
+
+ /* Check if the function code is available */
+ xts_ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
+ if (!xts_ctx->fc)
+ return xts_fallback_setkey(tfm, in_key, key_len);
+
+ /* Split the XTS key into the two subkeys */
+ key_len = key_len / 2;
+ xts_ctx->key_len = key_len;
+ memcpy(xts_ctx->key, in_key, key_len);
+ memcpy(xts_ctx->pcc_key, in_key + key_len, key_len);
+ return 0;
+}
+
+static int xts_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
+ struct blkcipher_walk *walk)
+{
+ struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
+ unsigned int offset, nbytes, n;
+ int ret;
+ struct {
+ u8 key[32];
+ u8 tweak[16];
+ u8 block[16];
+ u8 bit[16];
+ u8 xts[16];
+ } pcc_param;
+ struct {
+ u8 key[32];
+ u8 init[16];
+ } xts_param;
+
+ ret = blkcipher_walk_virt(desc, walk);
+ offset = xts_ctx->key_len & 0x10;
+ memset(pcc_param.block, 0, sizeof(pcc_param.block));
+ memset(pcc_param.bit, 0, sizeof(pcc_param.bit));
+ memset(pcc_param.xts, 0, sizeof(pcc_param.xts));
+ memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak));
+ memcpy(pcc_param.key + offset, xts_ctx->pcc_key, xts_ctx->key_len);
+ cpacf_pcc(xts_ctx->fc, pcc_param.key + offset);
+
+ memcpy(xts_param.key + offset, xts_ctx->key, xts_ctx->key_len);
+ memcpy(xts_param.init, pcc_param.xts, 16);
+
+ while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+ /* only use complete blocks */
+ n = nbytes & ~(AES_BLOCK_SIZE - 1);
+ cpacf_km(xts_ctx->fc | modifier, xts_param.key + offset,
+ walk->dst.virt.addr, walk->src.virt.addr, n);
+ ret = blkcipher_walk_done(desc, walk, nbytes - n);
+ }
+ return ret;
+}
+
+static int xts_aes_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+
+ if (!nbytes)
+ return -EINVAL;
+
+ if (unlikely(!xts_ctx->fc))
+ return xts_fallback_encrypt(desc, dst, src, nbytes);
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return xts_aes_crypt(desc, 0, &walk);
+}
+
+static int xts_aes_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+
+ if (!nbytes)
+ return -EINVAL;
+
+ if (unlikely(!xts_ctx->fc))
+ return xts_fallback_decrypt(desc, dst, src, nbytes);
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return xts_aes_crypt(desc, CPACF_DECRYPT, &walk);
+}
+
+static int xts_fallback_init(struct crypto_tfm *tfm)
+{
+ const char *name = tfm->__crt_alg->cra_name;
+ struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+
+ xts_ctx->fallback = crypto_alloc_skcipher(name, 0,
+ CRYPTO_ALG_ASYNC |
+ CRYPTO_ALG_NEED_FALLBACK);
+
+ if (IS_ERR(xts_ctx->fallback)) {
+ pr_err("Allocating XTS fallback algorithm %s failed\n",
+ name);
+ return PTR_ERR(xts_ctx->fallback);
+ }
+ return 0;
+}
+
+static void xts_fallback_exit(struct crypto_tfm *tfm)
+{
+ struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+
+ crypto_free_skcipher(xts_ctx->fallback);
+}
+
+static struct crypto_alg xts_aes_alg = {
+ .cra_name = "xts(aes)",
+ .cra_driver_name = "xts-aes-s390",
+ .cra_priority = 402, /* ecb-aes-s390 + 1 */
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_NEED_FALLBACK,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct s390_xts_ctx),
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_init = xts_fallback_init,
+ .cra_exit = xts_fallback_exit,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = 2 * AES_MIN_KEY_SIZE,
+ .max_keysize = 2 * AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = xts_aes_set_key,
+ .encrypt = xts_aes_encrypt,
+ .decrypt = xts_aes_decrypt,
+ }
+ }
+};
+
+static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+ unsigned long fc;
+
+ /* Pick the correct function code based on the key length */
+ fc = (key_len == 16) ? CPACF_KMCTR_AES_128 :
+ (key_len == 24) ? CPACF_KMCTR_AES_192 :
+ (key_len == 32) ? CPACF_KMCTR_AES_256 : 0;
+
+ /* Check if the function code is available */
+ sctx->fc = (fc && cpacf_test_func(&kmctr_functions, fc)) ? fc : 0;
+ if (!sctx->fc)
+ return setkey_fallback_blk(tfm, in_key, key_len);
+
+ sctx->key_len = key_len;
+ memcpy(sctx->key, in_key, key_len);
+ return 0;
+}
+
+static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
+{
+ unsigned int i, n;
+
+ /* only use complete blocks, max. PAGE_SIZE */
+ memcpy(ctrptr, iv, AES_BLOCK_SIZE);
+ n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(AES_BLOCK_SIZE - 1);
+ for (i = (n / AES_BLOCK_SIZE) - 1; i > 0; i--) {
+ memcpy(ctrptr + AES_BLOCK_SIZE, ctrptr, AES_BLOCK_SIZE);
+ crypto_inc(ctrptr + AES_BLOCK_SIZE, AES_BLOCK_SIZE);
+ ctrptr += AES_BLOCK_SIZE;
+ }
+ return n;
+}
+
+static int ctr_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
+ struct blkcipher_walk *walk)
+{
+ struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+ u8 buf[AES_BLOCK_SIZE], *ctrptr;
+ unsigned int n, nbytes;
+ int ret, locked;
+
+ locked = mutex_trylock(&ctrblk_lock);
+
+ ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE);
+ while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+ n = AES_BLOCK_SIZE;
+ if (nbytes >= 2*AES_BLOCK_SIZE && locked)
+ n = __ctrblk_init(ctrblk, walk->iv, nbytes);
+ ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk->iv;
+ cpacf_kmctr(sctx->fc | modifier, sctx->key,
+ walk->dst.virt.addr, walk->src.virt.addr,
+ n, ctrptr);
+ if (ctrptr == ctrblk)
+ memcpy(walk->iv, ctrptr + n - AES_BLOCK_SIZE,
+ AES_BLOCK_SIZE);
+ crypto_inc(walk->iv, AES_BLOCK_SIZE);
+ ret = blkcipher_walk_done(desc, walk, nbytes - n);
+ }
+ if (locked)
+ mutex_unlock(&ctrblk_lock);
+ /*
+ * final block may be < AES_BLOCK_SIZE, copy only nbytes
+ */
+ if (nbytes) {
+ cpacf_kmctr(sctx->fc | modifier, sctx->key,
+ buf, walk->src.virt.addr,
+ AES_BLOCK_SIZE, walk->iv);
+ memcpy(walk->dst.virt.addr, buf, nbytes);
+ crypto_inc(walk->iv, AES_BLOCK_SIZE);
+ ret = blkcipher_walk_done(desc, walk, 0);
+ }
+
+ return ret;
+}
+
+static int ctr_aes_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+
+ if (unlikely(!sctx->fc))
+ return fallback_blk_enc(desc, dst, src, nbytes);
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return ctr_aes_crypt(desc, 0, &walk);
+}
+
+static int ctr_aes_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+
+ if (unlikely(!sctx->fc))
+ return fallback_blk_dec(desc, dst, src, nbytes);
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return ctr_aes_crypt(desc, CPACF_DECRYPT, &walk);
+}
+
+static struct crypto_alg ctr_aes_alg = {
+ .cra_name = "ctr(aes)",
+ .cra_driver_name = "ctr-aes-s390",
+ .cra_priority = 402, /* ecb-aes-s390 + 1 */
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_NEED_FALLBACK,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct s390_aes_ctx),
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_init = fallback_init_blk,
+ .cra_exit = fallback_exit_blk,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ctr_aes_set_key,
+ .encrypt = ctr_aes_encrypt,
+ .decrypt = ctr_aes_decrypt,
+ }
+ }
+};
+
+static int gcm_aes_setkey(struct crypto_aead *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ struct s390_aes_ctx *ctx = crypto_aead_ctx(tfm);
+
+ switch (keylen) {
+ case AES_KEYSIZE_128:
+ ctx->fc = CPACF_KMA_GCM_AES_128;
+ break;
+ case AES_KEYSIZE_192:
+ ctx->fc = CPACF_KMA_GCM_AES_192;
+ break;
+ case AES_KEYSIZE_256:
+ ctx->fc = CPACF_KMA_GCM_AES_256;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ memcpy(ctx->key, key, keylen);
+ ctx->key_len = keylen;
+ return 0;
+}
+
+static int gcm_aes_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
+{
+ switch (authsize) {
+ case 4:
+ case 8:
+ case 12:
+ case 13:
+ case 14:
+ case 15:
+ case 16:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void gcm_walk_start(struct gcm_sg_walk *gw, struct scatterlist *sg,
+ unsigned int len)
+{
+ memset(gw, 0, sizeof(*gw));
+ gw->walk_bytes_remain = len;
+ scatterwalk_start(&gw->walk, sg);
+}
+
+static inline unsigned int _gcm_sg_clamp_and_map(struct gcm_sg_walk *gw)
+{
+ struct scatterlist *nextsg;
+
+ gw->walk_bytes = scatterwalk_clamp(&gw->walk, gw->walk_bytes_remain);
+ while (!gw->walk_bytes) {
+ nextsg = sg_next(gw->walk.sg);
+ if (!nextsg)
+ return 0;
+ scatterwalk_start(&gw->walk, nextsg);
+ gw->walk_bytes = scatterwalk_clamp(&gw->walk,
+ gw->walk_bytes_remain);
+ }
+ gw->walk_ptr = scatterwalk_map(&gw->walk);
+ return gw->walk_bytes;
+}
+
+static inline void _gcm_sg_unmap_and_advance(struct gcm_sg_walk *gw,
+ unsigned int nbytes)
+{
+ gw->walk_bytes_remain -= nbytes;
+ scatterwalk_unmap(gw->walk_ptr);
+ scatterwalk_advance(&gw->walk, nbytes);
+ scatterwalk_done(&gw->walk, 0, gw->walk_bytes_remain);
+ gw->walk_ptr = NULL;
+}
+
+static int gcm_in_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded)
+{
+ int n;
+
+ if (gw->buf_bytes && gw->buf_bytes >= minbytesneeded) {
+ gw->ptr = gw->buf;
+ gw->nbytes = gw->buf_bytes;
+ goto out;
+ }
+
+ if (gw->walk_bytes_remain == 0) {
+ gw->ptr = NULL;
+ gw->nbytes = 0;
+ goto out;
+ }
+
+ if (!_gcm_sg_clamp_and_map(gw)) {
+ gw->ptr = NULL;
+ gw->nbytes = 0;
+ goto out;
+ }
+
+ if (!gw->buf_bytes && gw->walk_bytes >= minbytesneeded) {
+ gw->ptr = gw->walk_ptr;
+ gw->nbytes = gw->walk_bytes;
+ goto out;
+ }
+
+ while (1) {
+ n = min(gw->walk_bytes, AES_BLOCK_SIZE - gw->buf_bytes);
+ memcpy(gw->buf + gw->buf_bytes, gw->walk_ptr, n);
+ gw->buf_bytes += n;
+ _gcm_sg_unmap_and_advance(gw, n);
+ if (gw->buf_bytes >= minbytesneeded) {
+ gw->ptr = gw->buf;
+ gw->nbytes = gw->buf_bytes;
+ goto out;
+ }
+ if (!_gcm_sg_clamp_and_map(gw)) {
+ gw->ptr = NULL;
+ gw->nbytes = 0;
+ goto out;
+ }
+ }
+
+out:
+ return gw->nbytes;
+}
+
+static int gcm_out_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded)
+{
+ if (gw->walk_bytes_remain == 0) {
+ gw->ptr = NULL;
+ gw->nbytes = 0;
+ goto out;
+ }
+
+ if (!_gcm_sg_clamp_and_map(gw)) {
+ gw->ptr = NULL;
+ gw->nbytes = 0;
+ goto out;
+ }
+
+ if (gw->walk_bytes >= minbytesneeded) {
+ gw->ptr = gw->walk_ptr;
+ gw->nbytes = gw->walk_bytes;
+ goto out;
+ }
+
+ scatterwalk_unmap(gw->walk_ptr);
+ gw->walk_ptr = NULL;
+
+ gw->ptr = gw->buf;
+ gw->nbytes = sizeof(gw->buf);
+
+out:
+ return gw->nbytes;
+}
+
+static int gcm_in_walk_done(struct gcm_sg_walk *gw, unsigned int bytesdone)
+{
+ if (gw->ptr == NULL)
+ return 0;
+
+ if (gw->ptr == gw->buf) {
+ int n = gw->buf_bytes - bytesdone;
+ if (n > 0) {
+ memmove(gw->buf, gw->buf + bytesdone, n);
+ gw->buf_bytes = n;
+ } else
+ gw->buf_bytes = 0;
+ } else
+ _gcm_sg_unmap_and_advance(gw, bytesdone);
+
+ return bytesdone;
+}
+
+static int gcm_out_walk_done(struct gcm_sg_walk *gw, unsigned int bytesdone)
+{
+ int i, n;
+
+ if (gw->ptr == NULL)
+ return 0;
+
+ if (gw->ptr == gw->buf) {
+ for (i = 0; i < bytesdone; i += n) {
+ if (!_gcm_sg_clamp_and_map(gw))
+ return i;
+ n = min(gw->walk_bytes, bytesdone - i);
+ memcpy(gw->walk_ptr, gw->buf + i, n);
+ _gcm_sg_unmap_and_advance(gw, n);
+ }
+ } else
+ _gcm_sg_unmap_and_advance(gw, bytesdone);
+
+ return bytesdone;
+}
+
+static int gcm_aes_crypt(struct aead_request *req, unsigned int flags)
+{
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ struct s390_aes_ctx *ctx = crypto_aead_ctx(tfm);
+ unsigned int ivsize = crypto_aead_ivsize(tfm);
+ unsigned int taglen = crypto_aead_authsize(tfm);
+ unsigned int aadlen = req->assoclen;
+ unsigned int pclen = req->cryptlen;
+ int ret = 0;
+
+ unsigned int n, len, in_bytes, out_bytes,
+ min_bytes, bytes, aad_bytes, pc_bytes;
+ struct gcm_sg_walk gw_in, gw_out;
+ u8 tag[GHASH_DIGEST_SIZE];
+
+ struct {
+ u32 _[3]; /* reserved */
+ u32 cv; /* Counter Value */
+ u8 t[GHASH_DIGEST_SIZE];/* Tag */
+ u8 h[AES_BLOCK_SIZE]; /* Hash-subkey */
+ u64 taadl; /* Total AAD Length */
+ u64 tpcl; /* Total Plain-/Cipher-text Length */
+ u8 j0[GHASH_BLOCK_SIZE];/* initial counter value */
+ u8 k[AES_MAX_KEY_SIZE]; /* Key */
+ } param;
+
+ /*
+ * encrypt
+ * req->src: aad||plaintext
+ * req->dst: aad||ciphertext||tag
+ * decrypt
+ * req->src: aad||ciphertext||tag
+ * req->dst: aad||plaintext, return 0 or -EBADMSG
+ * aad, plaintext and ciphertext may be empty.
+ */
+ if (flags & CPACF_DECRYPT)
+ pclen -= taglen;
+ len = aadlen + pclen;
+
+ memset(&param, 0, sizeof(param));
+ param.cv = 1;
+ param.taadl = aadlen * 8;
+ param.tpcl = pclen * 8;
+ memcpy(param.j0, req->iv, ivsize);
+ *(u32 *)(param.j0 + ivsize) = 1;
+ memcpy(param.k, ctx->key, ctx->key_len);
+
+ gcm_walk_start(&gw_in, req->src, len);
+ gcm_walk_start(&gw_out, req->dst, len);
+
+ do {
+ min_bytes = min_t(unsigned int,
+ aadlen > 0 ? aadlen : pclen, AES_BLOCK_SIZE);
+ in_bytes = gcm_in_walk_go(&gw_in, min_bytes);
+ out_bytes = gcm_out_walk_go(&gw_out, min_bytes);
+ bytes = min(in_bytes, out_bytes);
+
+ if (aadlen + pclen <= bytes) {
+ aad_bytes = aadlen;
+ pc_bytes = pclen;
+ flags |= CPACF_KMA_LAAD | CPACF_KMA_LPC;
+ } else {
+ if (aadlen <= bytes) {
+ aad_bytes = aadlen;
+ pc_bytes = (bytes - aadlen) &
+ ~(AES_BLOCK_SIZE - 1);
+ flags |= CPACF_KMA_LAAD;
+ } else {
+ aad_bytes = bytes & ~(AES_BLOCK_SIZE - 1);
+ pc_bytes = 0;
+ }
+ }
+
+ if (aad_bytes > 0)
+ memcpy(gw_out.ptr, gw_in.ptr, aad_bytes);
+
+ cpacf_kma(ctx->fc | flags, &param,
+ gw_out.ptr + aad_bytes,
+ gw_in.ptr + aad_bytes, pc_bytes,
+ gw_in.ptr, aad_bytes);
+
+ n = aad_bytes + pc_bytes;
+ if (gcm_in_walk_done(&gw_in, n) != n)
+ return -ENOMEM;
+ if (gcm_out_walk_done(&gw_out, n) != n)
+ return -ENOMEM;
+ aadlen -= aad_bytes;
+ pclen -= pc_bytes;
+ } while (aadlen + pclen > 0);
+
+ if (flags & CPACF_DECRYPT) {
+ scatterwalk_map_and_copy(tag, req->src, len, taglen, 0);
+ if (crypto_memneq(tag, param.t, taglen))
+ ret = -EBADMSG;
+ } else
+ scatterwalk_map_and_copy(param.t, req->dst, len, taglen, 1);
+
+ memzero_explicit(&param, sizeof(param));
+ return ret;
+}
+
+static int gcm_aes_encrypt(struct aead_request *req)
+{
+ return gcm_aes_crypt(req, CPACF_ENCRYPT);
+}
+
+static int gcm_aes_decrypt(struct aead_request *req)
+{
+ return gcm_aes_crypt(req, CPACF_DECRYPT);
+}
+
+static struct aead_alg gcm_aes_aead = {
+ .setkey = gcm_aes_setkey,
+ .setauthsize = gcm_aes_setauthsize,
+ .encrypt = gcm_aes_encrypt,
+ .decrypt = gcm_aes_decrypt,
+
+ .ivsize = GHASH_BLOCK_SIZE - sizeof(u32),
+ .maxauthsize = GHASH_DIGEST_SIZE,
+ .chunksize = AES_BLOCK_SIZE,
+
+ .base = {
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct s390_aes_ctx),
+ .cra_priority = 900,
+ .cra_name = "gcm(aes)",
+ .cra_driver_name = "gcm-aes-s390",
+ .cra_module = THIS_MODULE,
+ },
+};
+
+static struct crypto_alg *aes_s390_algs_ptr[5];
+static int aes_s390_algs_num;
+static struct aead_alg *aes_s390_aead_alg;
+
+static int aes_s390_register_alg(struct crypto_alg *alg)
+{
+ int ret;
+
+ ret = crypto_register_alg(alg);
+ if (!ret)
+ aes_s390_algs_ptr[aes_s390_algs_num++] = alg;
+ return ret;
+}
+
+static void aes_s390_fini(void)
+{
+ while (aes_s390_algs_num--)
+ crypto_unregister_alg(aes_s390_algs_ptr[aes_s390_algs_num]);
+ if (ctrblk)
+ free_page((unsigned long) ctrblk);
+
+ if (aes_s390_aead_alg)
+ crypto_unregister_aead(aes_s390_aead_alg);
+}
+
+static int __init aes_s390_init(void)
+{
+ int ret;
+
+ /* Query available functions for KM, KMC, KMCTR and KMA */
+ cpacf_query(CPACF_KM, &km_functions);
+ cpacf_query(CPACF_KMC, &kmc_functions);
+ cpacf_query(CPACF_KMCTR, &kmctr_functions);
+ cpacf_query(CPACF_KMA, &kma_functions);
+
+ if (cpacf_test_func(&km_functions, CPACF_KM_AES_128) ||
+ cpacf_test_func(&km_functions, CPACF_KM_AES_192) ||
+ cpacf_test_func(&km_functions, CPACF_KM_AES_256)) {
+ ret = aes_s390_register_alg(&aes_alg);
+ if (ret)
+ goto out_err;
+ ret = aes_s390_register_alg(&ecb_aes_alg);
+ if (ret)
+ goto out_err;
+ }
+
+ if (cpacf_test_func(&kmc_functions, CPACF_KMC_AES_128) ||
+ cpacf_test_func(&kmc_functions, CPACF_KMC_AES_192) ||
+ cpacf_test_func(&kmc_functions, CPACF_KMC_AES_256)) {
+ ret = aes_s390_register_alg(&cbc_aes_alg);
+ if (ret)
+ goto out_err;
+ }
+
+ if (cpacf_test_func(&km_functions, CPACF_KM_XTS_128) ||
+ cpacf_test_func(&km_functions, CPACF_KM_XTS_256)) {
+ ret = aes_s390_register_alg(&xts_aes_alg);
+ if (ret)
+ goto out_err;
+ }
+
+ if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_AES_128) ||
+ cpacf_test_func(&kmctr_functions, CPACF_KMCTR_AES_192) ||
+ cpacf_test_func(&kmctr_functions, CPACF_KMCTR_AES_256)) {
+ ctrblk = (u8 *) __get_free_page(GFP_KERNEL);
+ if (!ctrblk) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+ ret = aes_s390_register_alg(&ctr_aes_alg);
+ if (ret)
+ goto out_err;
+ }
+
+ if (cpacf_test_func(&kma_functions, CPACF_KMA_GCM_AES_128) ||
+ cpacf_test_func(&kma_functions, CPACF_KMA_GCM_AES_192) ||
+ cpacf_test_func(&kma_functions, CPACF_KMA_GCM_AES_256)) {
+ ret = crypto_register_aead(&gcm_aes_aead);
+ if (ret)
+ goto out_err;
+ aes_s390_aead_alg = &gcm_aes_aead;
+ }
+
+ return 0;
+out_err:
+ aes_s390_fini();
+ return ret;
+}
+
+module_cpu_feature_match(MSA, aes_s390_init);
+module_exit(aes_s390_fini);
+
+MODULE_ALIAS_CRYPTO("aes-all");
+
+MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm");
+MODULE_LICENSE("GPL");
diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c
new file mode 100644
index 000000000..4cbb4b6d8
--- /dev/null
+++ b/arch/s390/crypto/arch_random.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * s390 arch random implementation.
+ *
+ * Copyright IBM Corp. 2017, 2018
+ * Author(s): Harald Freudenberger
+ *
+ * The s390_arch_random_generate() function may be called from random.c
+ * in interrupt context. So this implementation does the best to be very
+ * fast. There is a buffer of random data which is asynchronously checked
+ * and filled by a workqueue thread.
+ * If there are enough bytes in the buffer the s390_arch_random_generate()
+ * just delivers these bytes. Otherwise false is returned until the
+ * worker thread refills the buffer.
+ * The worker fills the rng buffer by pulling fresh entropy from the
+ * high quality (but slow) true hardware random generator. This entropy
+ * is then spread over the buffer with an pseudo random generator PRNG.
+ * As the arch_get_random_seed_long() fetches 8 bytes and the calling
+ * function add_interrupt_randomness() counts this as 1 bit entropy the
+ * distribution needs to make sure there is in fact 1 bit entropy contained
+ * in 8 bytes of the buffer. The current values pull 32 byte entropy
+ * and scatter this into a 2048 byte buffer. So 8 byte in the buffer
+ * will contain 1 bit of entropy.
+ * The worker thread is rescheduled based on the charge level of the
+ * buffer but at least with 500 ms delay to avoid too much CPU consumption.
+ * So the max. amount of rng data delivered via arch_get_random_seed is
+ * limited to 4k bytes per second.
+ */
+
+#include <linux/kernel.h>
+#include <linux/atomic.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/static_key.h>
+#include <linux/workqueue.h>
+#include <asm/cpacf.h>
+
+DEFINE_STATIC_KEY_FALSE(s390_arch_random_available);
+
+atomic64_t s390_arch_random_counter = ATOMIC64_INIT(0);
+EXPORT_SYMBOL(s390_arch_random_counter);
+
+#define ARCH_REFILL_TICKS (HZ/2)
+#define ARCH_PRNG_SEED_SIZE 32
+#define ARCH_RNG_BUF_SIZE 2048
+
+static DEFINE_SPINLOCK(arch_rng_lock);
+static u8 *arch_rng_buf;
+static unsigned int arch_rng_buf_idx;
+
+static void arch_rng_refill_buffer(struct work_struct *);
+static DECLARE_DELAYED_WORK(arch_rng_work, arch_rng_refill_buffer);
+
+bool s390_arch_random_generate(u8 *buf, unsigned int nbytes)
+{
+ /* max hunk is ARCH_RNG_BUF_SIZE */
+ if (nbytes > ARCH_RNG_BUF_SIZE)
+ return false;
+
+ /* lock rng buffer */
+ if (!spin_trylock(&arch_rng_lock))
+ return false;
+
+ /* try to resolve the requested amount of bytes from the buffer */
+ arch_rng_buf_idx -= nbytes;
+ if (arch_rng_buf_idx < ARCH_RNG_BUF_SIZE) {
+ memcpy(buf, arch_rng_buf + arch_rng_buf_idx, nbytes);
+ atomic64_add(nbytes, &s390_arch_random_counter);
+ spin_unlock(&arch_rng_lock);
+ return true;
+ }
+
+ /* not enough bytes in rng buffer, refill is done asynchronously */
+ spin_unlock(&arch_rng_lock);
+
+ return false;
+}
+EXPORT_SYMBOL(s390_arch_random_generate);
+
+static void arch_rng_refill_buffer(struct work_struct *unused)
+{
+ unsigned int delay = ARCH_REFILL_TICKS;
+
+ spin_lock(&arch_rng_lock);
+ if (arch_rng_buf_idx > ARCH_RNG_BUF_SIZE) {
+ /* buffer is exhausted and needs refill */
+ u8 seed[ARCH_PRNG_SEED_SIZE];
+ u8 prng_wa[240];
+ /* fetch ARCH_PRNG_SEED_SIZE bytes of entropy */
+ cpacf_trng(NULL, 0, seed, sizeof(seed));
+ /* blow this entropy up to ARCH_RNG_BUF_SIZE with PRNG */
+ memset(prng_wa, 0, sizeof(prng_wa));
+ cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED,
+ &prng_wa, NULL, 0, seed, sizeof(seed));
+ cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN,
+ &prng_wa, arch_rng_buf, ARCH_RNG_BUF_SIZE, NULL, 0);
+ arch_rng_buf_idx = ARCH_RNG_BUF_SIZE;
+ }
+ delay += (ARCH_REFILL_TICKS * arch_rng_buf_idx) / ARCH_RNG_BUF_SIZE;
+ spin_unlock(&arch_rng_lock);
+
+ /* kick next check */
+ queue_delayed_work(system_long_wq, &arch_rng_work, delay);
+}
+
+static int __init s390_arch_random_init(void)
+{
+ /* all the needed PRNO subfunctions available ? */
+ if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG) &&
+ cpacf_query_func(CPACF_PRNO, CPACF_PRNO_SHA512_DRNG_GEN)) {
+
+ /* alloc arch random working buffer */
+ arch_rng_buf = kmalloc(ARCH_RNG_BUF_SIZE, GFP_KERNEL);
+ if (!arch_rng_buf)
+ return -ENOMEM;
+
+ /* kick worker queue job to fill the random buffer */
+ queue_delayed_work(system_long_wq,
+ &arch_rng_work, ARCH_REFILL_TICKS);
+
+ /* enable arch random to the outside world */
+ static_branch_enable(&s390_arch_random_available);
+ }
+
+ return 0;
+}
+arch_initcall(s390_arch_random_init);
diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c
new file mode 100644
index 000000000..423ee0588
--- /dev/null
+++ b/arch/s390/crypto/crc32-vx.c
@@ -0,0 +1,314 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Crypto-API module for CRC-32 algorithms implemented with the
+ * z/Architecture Vector Extension Facility.
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+#define KMSG_COMPONENT "crc32-vx"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+#include <linux/crc32.h>
+#include <crypto/internal/hash.h>
+#include <asm/fpu/api.h>
+
+
+#define CRC32_BLOCK_SIZE 1
+#define CRC32_DIGEST_SIZE 4
+
+#define VX_MIN_LEN 64
+#define VX_ALIGNMENT 16L
+#define VX_ALIGN_MASK (VX_ALIGNMENT - 1)
+
+struct crc_ctx {
+ u32 key;
+};
+
+struct crc_desc_ctx {
+ u32 crc;
+};
+
+/* Prototypes for functions in assembly files */
+u32 crc32_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
+u32 crc32_be_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
+u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
+
+/*
+ * DEFINE_CRC32_VX() - Define a CRC-32 function using the vector extension
+ *
+ * Creates a function to perform a particular CRC-32 computation. Depending
+ * on the message buffer, the hardware-accelerated or software implementation
+ * is used. Note that the message buffer is aligned to improve fetch
+ * operations of VECTOR LOAD MULTIPLE instructions.
+ *
+ */
+#define DEFINE_CRC32_VX(___fname, ___crc32_vx, ___crc32_sw) \
+ static u32 __pure ___fname(u32 crc, \
+ unsigned char const *data, size_t datalen) \
+ { \
+ struct kernel_fpu vxstate; \
+ unsigned long prealign, aligned, remaining; \
+ \
+ if (datalen < VX_MIN_LEN + VX_ALIGN_MASK) \
+ return ___crc32_sw(crc, data, datalen); \
+ \
+ if ((unsigned long)data & VX_ALIGN_MASK) { \
+ prealign = VX_ALIGNMENT - \
+ ((unsigned long)data & VX_ALIGN_MASK); \
+ datalen -= prealign; \
+ crc = ___crc32_sw(crc, data, prealign); \
+ data = (void *)((unsigned long)data + prealign); \
+ } \
+ \
+ aligned = datalen & ~VX_ALIGN_MASK; \
+ remaining = datalen & VX_ALIGN_MASK; \
+ \
+ kernel_fpu_begin(&vxstate, KERNEL_VXR_LOW); \
+ crc = ___crc32_vx(crc, data, aligned); \
+ kernel_fpu_end(&vxstate, KERNEL_VXR_LOW); \
+ \
+ if (remaining) \
+ crc = ___crc32_sw(crc, data + aligned, remaining); \
+ \
+ return crc; \
+ }
+
+DEFINE_CRC32_VX(crc32_le_vx, crc32_le_vgfm_16, crc32_le)
+DEFINE_CRC32_VX(crc32_be_vx, crc32_be_vgfm_16, crc32_be)
+DEFINE_CRC32_VX(crc32c_le_vx, crc32c_le_vgfm_16, __crc32c_le)
+
+
+static int crc32_vx_cra_init_zero(struct crypto_tfm *tfm)
+{
+ struct crc_ctx *mctx = crypto_tfm_ctx(tfm);
+
+ mctx->key = 0;
+ return 0;
+}
+
+static int crc32_vx_cra_init_invert(struct crypto_tfm *tfm)
+{
+ struct crc_ctx *mctx = crypto_tfm_ctx(tfm);
+
+ mctx->key = ~0;
+ return 0;
+}
+
+static int crc32_vx_init(struct shash_desc *desc)
+{
+ struct crc_ctx *mctx = crypto_shash_ctx(desc->tfm);
+ struct crc_desc_ctx *ctx = shash_desc_ctx(desc);
+
+ ctx->crc = mctx->key;
+ return 0;
+}
+
+static int crc32_vx_setkey(struct crypto_shash *tfm, const u8 *newkey,
+ unsigned int newkeylen)
+{
+ struct crc_ctx *mctx = crypto_shash_ctx(tfm);
+
+ if (newkeylen != sizeof(mctx->key)) {
+ crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+ return -EINVAL;
+ }
+ mctx->key = le32_to_cpu(*(__le32 *)newkey);
+ return 0;
+}
+
+static int crc32be_vx_setkey(struct crypto_shash *tfm, const u8 *newkey,
+ unsigned int newkeylen)
+{
+ struct crc_ctx *mctx = crypto_shash_ctx(tfm);
+
+ if (newkeylen != sizeof(mctx->key)) {
+ crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+ return -EINVAL;
+ }
+ mctx->key = be32_to_cpu(*(__be32 *)newkey);
+ return 0;
+}
+
+static int crc32le_vx_final(struct shash_desc *desc, u8 *out)
+{
+ struct crc_desc_ctx *ctx = shash_desc_ctx(desc);
+
+ *(__le32 *)out = cpu_to_le32p(&ctx->crc);
+ return 0;
+}
+
+static int crc32be_vx_final(struct shash_desc *desc, u8 *out)
+{
+ struct crc_desc_ctx *ctx = shash_desc_ctx(desc);
+
+ *(__be32 *)out = cpu_to_be32p(&ctx->crc);
+ return 0;
+}
+
+static int crc32c_vx_final(struct shash_desc *desc, u8 *out)
+{
+ struct crc_desc_ctx *ctx = shash_desc_ctx(desc);
+
+ /*
+ * Perform a final XOR with 0xFFFFFFFF to be in sync
+ * with the generic crc32c shash implementation.
+ */
+ *(__le32 *)out = ~cpu_to_le32p(&ctx->crc);
+ return 0;
+}
+
+static int __crc32le_vx_finup(u32 *crc, const u8 *data, unsigned int len,
+ u8 *out)
+{
+ *(__le32 *)out = cpu_to_le32(crc32_le_vx(*crc, data, len));
+ return 0;
+}
+
+static int __crc32be_vx_finup(u32 *crc, const u8 *data, unsigned int len,
+ u8 *out)
+{
+ *(__be32 *)out = cpu_to_be32(crc32_be_vx(*crc, data, len));
+ return 0;
+}
+
+static int __crc32c_vx_finup(u32 *crc, const u8 *data, unsigned int len,
+ u8 *out)
+{
+ /*
+ * Perform a final XOR with 0xFFFFFFFF to be in sync
+ * with the generic crc32c shash implementation.
+ */
+ *(__le32 *)out = ~cpu_to_le32(crc32c_le_vx(*crc, data, len));
+ return 0;
+}
+
+
+#define CRC32_VX_FINUP(alg, func) \
+ static int alg ## _vx_finup(struct shash_desc *desc, const u8 *data, \
+ unsigned int datalen, u8 *out) \
+ { \
+ return __ ## alg ## _vx_finup(shash_desc_ctx(desc), \
+ data, datalen, out); \
+ }
+
+CRC32_VX_FINUP(crc32le, crc32_le_vx)
+CRC32_VX_FINUP(crc32be, crc32_be_vx)
+CRC32_VX_FINUP(crc32c, crc32c_le_vx)
+
+#define CRC32_VX_DIGEST(alg, func) \
+ static int alg ## _vx_digest(struct shash_desc *desc, const u8 *data, \
+ unsigned int len, u8 *out) \
+ { \
+ return __ ## alg ## _vx_finup(crypto_shash_ctx(desc->tfm), \
+ data, len, out); \
+ }
+
+CRC32_VX_DIGEST(crc32le, crc32_le_vx)
+CRC32_VX_DIGEST(crc32be, crc32_be_vx)
+CRC32_VX_DIGEST(crc32c, crc32c_le_vx)
+
+#define CRC32_VX_UPDATE(alg, func) \
+ static int alg ## _vx_update(struct shash_desc *desc, const u8 *data, \
+ unsigned int datalen) \
+ { \
+ struct crc_desc_ctx *ctx = shash_desc_ctx(desc); \
+ ctx->crc = func(ctx->crc, data, datalen); \
+ return 0; \
+ }
+
+CRC32_VX_UPDATE(crc32le, crc32_le_vx)
+CRC32_VX_UPDATE(crc32be, crc32_be_vx)
+CRC32_VX_UPDATE(crc32c, crc32c_le_vx)
+
+
+static struct shash_alg crc32_vx_algs[] = {
+ /* CRC-32 LE */
+ {
+ .init = crc32_vx_init,
+ .setkey = crc32_vx_setkey,
+ .update = crc32le_vx_update,
+ .final = crc32le_vx_final,
+ .finup = crc32le_vx_finup,
+ .digest = crc32le_vx_digest,
+ .descsize = sizeof(struct crc_desc_ctx),
+ .digestsize = CRC32_DIGEST_SIZE,
+ .base = {
+ .cra_name = "crc32",
+ .cra_driver_name = "crc32-vx",
+ .cra_priority = 200,
+ .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
+ .cra_blocksize = CRC32_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crc_ctx),
+ .cra_module = THIS_MODULE,
+ .cra_init = crc32_vx_cra_init_zero,
+ },
+ },
+ /* CRC-32 BE */
+ {
+ .init = crc32_vx_init,
+ .setkey = crc32be_vx_setkey,
+ .update = crc32be_vx_update,
+ .final = crc32be_vx_final,
+ .finup = crc32be_vx_finup,
+ .digest = crc32be_vx_digest,
+ .descsize = sizeof(struct crc_desc_ctx),
+ .digestsize = CRC32_DIGEST_SIZE,
+ .base = {
+ .cra_name = "crc32be",
+ .cra_driver_name = "crc32be-vx",
+ .cra_priority = 200,
+ .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
+ .cra_blocksize = CRC32_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crc_ctx),
+ .cra_module = THIS_MODULE,
+ .cra_init = crc32_vx_cra_init_zero,
+ },
+ },
+ /* CRC-32C LE */
+ {
+ .init = crc32_vx_init,
+ .setkey = crc32_vx_setkey,
+ .update = crc32c_vx_update,
+ .final = crc32c_vx_final,
+ .finup = crc32c_vx_finup,
+ .digest = crc32c_vx_digest,
+ .descsize = sizeof(struct crc_desc_ctx),
+ .digestsize = CRC32_DIGEST_SIZE,
+ .base = {
+ .cra_name = "crc32c",
+ .cra_driver_name = "crc32c-vx",
+ .cra_priority = 200,
+ .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
+ .cra_blocksize = CRC32_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crc_ctx),
+ .cra_module = THIS_MODULE,
+ .cra_init = crc32_vx_cra_init_invert,
+ },
+ },
+};
+
+
+static int __init crc_vx_mod_init(void)
+{
+ return crypto_register_shashes(crc32_vx_algs,
+ ARRAY_SIZE(crc32_vx_algs));
+}
+
+static void __exit crc_vx_mod_exit(void)
+{
+ crypto_unregister_shashes(crc32_vx_algs, ARRAY_SIZE(crc32_vx_algs));
+}
+
+module_cpu_feature_match(VXRS, crc_vx_mod_init);
+module_exit(crc_vx_mod_exit);
+
+MODULE_AUTHOR("Hendrik Brueckner <brueckner@linux.vnet.ibm.com>");
+MODULE_LICENSE("GPL");
+
+MODULE_ALIAS_CRYPTO("crc32");
+MODULE_ALIAS_CRYPTO("crc32-vx");
+MODULE_ALIAS_CRYPTO("crc32c");
+MODULE_ALIAS_CRYPTO("crc32c-vx");
diff --git a/arch/s390/crypto/crc32be-vx.S b/arch/s390/crypto/crc32be-vx.S
new file mode 100644
index 000000000..2bf01ba44
--- /dev/null
+++ b/arch/s390/crypto/crc32be-vx.S
@@ -0,0 +1,211 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Hardware-accelerated CRC-32 variants for Linux on z Systems
+ *
+ * Use the z/Architecture Vector Extension Facility to accelerate the
+ * computing of CRC-32 checksums.
+ *
+ * This CRC-32 implementation algorithm processes the most-significant
+ * bit first (BE).
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#include <linux/linkage.h>
+#include <asm/nospec-insn.h>
+#include <asm/vx-insn.h>
+
+/* Vector register range containing CRC-32 constants */
+#define CONST_R1R2 %v9
+#define CONST_R3R4 %v10
+#define CONST_R5 %v11
+#define CONST_R6 %v12
+#define CONST_RU_POLY %v13
+#define CONST_CRC_POLY %v14
+
+.data
+.align 8
+
+/*
+ * The CRC-32 constant block contains reduction constants to fold and
+ * process particular chunks of the input data stream in parallel.
+ *
+ * For the CRC-32 variants, the constants are precomputed according to
+ * these defintions:
+ *
+ * R1 = x4*128+64 mod P(x)
+ * R2 = x4*128 mod P(x)
+ * R3 = x128+64 mod P(x)
+ * R4 = x128 mod P(x)
+ * R5 = x96 mod P(x)
+ * R6 = x64 mod P(x)
+ *
+ * Barret reduction constant, u, is defined as floor(x**64 / P(x)).
+ *
+ * where P(x) is the polynomial in the normal domain and the P'(x) is the
+ * polynomial in the reversed (bitreflected) domain.
+ *
+ * Note that the constant definitions below are extended in order to compute
+ * intermediate results with a single VECTOR GALOIS FIELD MULTIPLY instruction.
+ * The righmost doubleword can be 0 to prevent contribution to the result or
+ * can be multiplied by 1 to perform an XOR without the need for a separate
+ * VECTOR EXCLUSIVE OR instruction.
+ *
+ * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials:
+ *
+ * P(x) = 0x04C11DB7
+ * P'(x) = 0xEDB88320
+ */
+
+.Lconstants_CRC_32_BE:
+ .quad 0x08833794c, 0x0e6228b11 # R1, R2
+ .quad 0x0c5b9cd4c, 0x0e8a45605 # R3, R4
+ .quad 0x0f200aa66, 1 << 32 # R5, x32
+ .quad 0x0490d678d, 1 # R6, 1
+ .quad 0x104d101df, 0 # u
+ .quad 0x104C11DB7, 0 # P(x)
+
+.previous
+
+ GEN_BR_THUNK %r14
+
+.text
+/*
+ * The CRC-32 function(s) use these calling conventions:
+ *
+ * Parameters:
+ *
+ * %r2: Initial CRC value, typically ~0; and final CRC (return) value.
+ * %r3: Input buffer pointer, performance might be improved if the
+ * buffer is on a doubleword boundary.
+ * %r4: Length of the buffer, must be 64 bytes or greater.
+ *
+ * Register usage:
+ *
+ * %r5: CRC-32 constant pool base pointer.
+ * V0: Initial CRC value and intermediate constants and results.
+ * V1..V4: Data for CRC computation.
+ * V5..V8: Next data chunks that are fetched from the input buffer.
+ *
+ * V9..V14: CRC-32 constants.
+ */
+ENTRY(crc32_be_vgfm_16)
+ /* Load CRC-32 constants */
+ larl %r5,.Lconstants_CRC_32_BE
+ VLM CONST_R1R2,CONST_CRC_POLY,0,%r5
+
+ /* Load the initial CRC value into the leftmost word of V0. */
+ VZERO %v0
+ VLVGF %v0,%r2,0
+
+ /* Load a 64-byte data chunk and XOR with CRC */
+ VLM %v1,%v4,0,%r3 /* 64-bytes into V1..V4 */
+ VX %v1,%v0,%v1 /* V1 ^= CRC */
+ aghi %r3,64 /* BUF = BUF + 64 */
+ aghi %r4,-64 /* LEN = LEN - 64 */
+
+ /* Check remaining buffer size and jump to proper folding method */
+ cghi %r4,64
+ jl .Lless_than_64bytes
+
+.Lfold_64bytes_loop:
+ /* Load the next 64-byte data chunk into V5 to V8 */
+ VLM %v5,%v8,0,%r3
+
+ /*
+ * Perform a GF(2) multiplication of the doublewords in V1 with
+ * the reduction constants in V0. The intermediate result is
+ * then folded (accumulated) with the next data chunk in V5 and
+ * stored in V1. Repeat this step for the register contents
+ * in V2, V3, and V4 respectively.
+ */
+ VGFMAG %v1,CONST_R1R2,%v1,%v5
+ VGFMAG %v2,CONST_R1R2,%v2,%v6
+ VGFMAG %v3,CONST_R1R2,%v3,%v7
+ VGFMAG %v4,CONST_R1R2,%v4,%v8
+
+ /* Adjust buffer pointer and length for next loop */
+ aghi %r3,64 /* BUF = BUF + 64 */
+ aghi %r4,-64 /* LEN = LEN - 64 */
+
+ cghi %r4,64
+ jnl .Lfold_64bytes_loop
+
+.Lless_than_64bytes:
+ /* Fold V1 to V4 into a single 128-bit value in V1 */
+ VGFMAG %v1,CONST_R3R4,%v1,%v2
+ VGFMAG %v1,CONST_R3R4,%v1,%v3
+ VGFMAG %v1,CONST_R3R4,%v1,%v4
+
+ /* Check whether to continue with 64-bit folding */
+ cghi %r4,16
+ jl .Lfinal_fold
+
+.Lfold_16bytes_loop:
+
+ VL %v2,0,,%r3 /* Load next data chunk */
+ VGFMAG %v1,CONST_R3R4,%v1,%v2 /* Fold next data chunk */
+
+ /* Adjust buffer pointer and size for folding next data chunk */
+ aghi %r3,16
+ aghi %r4,-16
+
+ /* Process remaining data chunks */
+ cghi %r4,16
+ jnl .Lfold_16bytes_loop
+
+.Lfinal_fold:
+ /*
+ * The R5 constant is used to fold a 128-bit value into an 96-bit value
+ * that is XORed with the next 96-bit input data chunk. To use a single
+ * VGFMG instruction, multiply the rightmost 64-bit with x^32 (1<<32) to
+ * form an intermediate 96-bit value (with appended zeros) which is then
+ * XORed with the intermediate reduction result.
+ */
+ VGFMG %v1,CONST_R5,%v1
+
+ /*
+ * Further reduce the remaining 96-bit value to a 64-bit value using a
+ * single VGFMG, the rightmost doubleword is multiplied with 0x1. The
+ * intermediate result is then XORed with the product of the leftmost
+ * doubleword with R6. The result is a 64-bit value and is subject to
+ * the Barret reduction.
+ */
+ VGFMG %v1,CONST_R6,%v1
+
+ /*
+ * The input values to the Barret reduction are the degree-63 polynomial
+ * in V1 (R(x)), degree-32 generator polynomial, and the reduction
+ * constant u. The Barret reduction result is the CRC value of R(x) mod
+ * P(x).
+ *
+ * The Barret reduction algorithm is defined as:
+ *
+ * 1. T1(x) = floor( R(x) / x^32 ) GF2MUL u
+ * 2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x)
+ * 3. C(x) = R(x) XOR T2(x) mod x^32
+ *
+ * Note: To compensate the division by x^32, use the vector unpack
+ * instruction to move the leftmost word into the leftmost doubleword
+ * of the vector register. The rightmost doubleword is multiplied
+ * with zero to not contribute to the intermedate results.
+ */
+
+ /* T1(x) = floor( R(x) / x^32 ) GF2MUL u */
+ VUPLLF %v2,%v1
+ VGFMG %v2,CONST_RU_POLY,%v2
+
+ /*
+ * Compute the GF(2) product of the CRC polynomial in VO with T1(x) in
+ * V2 and XOR the intermediate result, T2(x), with the value in V1.
+ * The final result is in the rightmost word of V2.
+ */
+ VUPLLF %v2,%v2
+ VGFMAG %v2,CONST_CRC_POLY,%v2,%v1
+
+.Ldone:
+ VLGVF %r2,%v2,3
+ BR_EX %r14
+
+.previous
diff --git a/arch/s390/crypto/crc32le-vx.S b/arch/s390/crypto/crc32le-vx.S
new file mode 100644
index 000000000..7d6f568bd
--- /dev/null
+++ b/arch/s390/crypto/crc32le-vx.S
@@ -0,0 +1,271 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Hardware-accelerated CRC-32 variants for Linux on z Systems
+ *
+ * Use the z/Architecture Vector Extension Facility to accelerate the
+ * computing of bitreflected CRC-32 checksums for IEEE 802.3 Ethernet
+ * and Castagnoli.
+ *
+ * This CRC-32 implementation algorithm is bitreflected and processes
+ * the least-significant bit first (Little-Endian).
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#include <linux/linkage.h>
+#include <asm/nospec-insn.h>
+#include <asm/vx-insn.h>
+
+/* Vector register range containing CRC-32 constants */
+#define CONST_PERM_LE2BE %v9
+#define CONST_R2R1 %v10
+#define CONST_R4R3 %v11
+#define CONST_R5 %v12
+#define CONST_RU_POLY %v13
+#define CONST_CRC_POLY %v14
+
+.data
+.align 8
+
+/*
+ * The CRC-32 constant block contains reduction constants to fold and
+ * process particular chunks of the input data stream in parallel.
+ *
+ * For the CRC-32 variants, the constants are precomputed according to
+ * these definitions:
+ *
+ * R1 = [(x4*128+32 mod P'(x) << 32)]' << 1
+ * R2 = [(x4*128-32 mod P'(x) << 32)]' << 1
+ * R3 = [(x128+32 mod P'(x) << 32)]' << 1
+ * R4 = [(x128-32 mod P'(x) << 32)]' << 1
+ * R5 = [(x64 mod P'(x) << 32)]' << 1
+ * R6 = [(x32 mod P'(x) << 32)]' << 1
+ *
+ * The bitreflected Barret reduction constant, u', is defined as
+ * the bit reversal of floor(x**64 / P(x)).
+ *
+ * where P(x) is the polynomial in the normal domain and the P'(x) is the
+ * polynomial in the reversed (bitreflected) domain.
+ *
+ * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials:
+ *
+ * P(x) = 0x04C11DB7
+ * P'(x) = 0xEDB88320
+ *
+ * CRC-32C (Castagnoli) polynomials:
+ *
+ * P(x) = 0x1EDC6F41
+ * P'(x) = 0x82F63B78
+ */
+
+.Lconstants_CRC_32_LE:
+ .octa 0x0F0E0D0C0B0A09080706050403020100 # BE->LE mask
+ .quad 0x1c6e41596, 0x154442bd4 # R2, R1
+ .quad 0x0ccaa009e, 0x1751997d0 # R4, R3
+ .octa 0x163cd6124 # R5
+ .octa 0x1F7011641 # u'
+ .octa 0x1DB710641 # P'(x) << 1
+
+.Lconstants_CRC_32C_LE:
+ .octa 0x0F0E0D0C0B0A09080706050403020100 # BE->LE mask
+ .quad 0x09e4addf8, 0x740eef02 # R2, R1
+ .quad 0x14cd00bd6, 0xf20c0dfe # R4, R3
+ .octa 0x0dd45aab8 # R5
+ .octa 0x0dea713f1 # u'
+ .octa 0x105ec76f0 # P'(x) << 1
+
+.previous
+
+ GEN_BR_THUNK %r14
+
+.text
+
+/*
+ * The CRC-32 functions use these calling conventions:
+ *
+ * Parameters:
+ *
+ * %r2: Initial CRC value, typically ~0; and final CRC (return) value.
+ * %r3: Input buffer pointer, performance might be improved if the
+ * buffer is on a doubleword boundary.
+ * %r4: Length of the buffer, must be 64 bytes or greater.
+ *
+ * Register usage:
+ *
+ * %r5: CRC-32 constant pool base pointer.
+ * V0: Initial CRC value and intermediate constants and results.
+ * V1..V4: Data for CRC computation.
+ * V5..V8: Next data chunks that are fetched from the input buffer.
+ * V9: Constant for BE->LE conversion and shift operations
+ *
+ * V10..V14: CRC-32 constants.
+ */
+
+ENTRY(crc32_le_vgfm_16)
+ larl %r5,.Lconstants_CRC_32_LE
+ j crc32_le_vgfm_generic
+
+ENTRY(crc32c_le_vgfm_16)
+ larl %r5,.Lconstants_CRC_32C_LE
+ j crc32_le_vgfm_generic
+
+
+crc32_le_vgfm_generic:
+ /* Load CRC-32 constants */
+ VLM CONST_PERM_LE2BE,CONST_CRC_POLY,0,%r5
+
+ /*
+ * Load the initial CRC value.
+ *
+ * The CRC value is loaded into the rightmost word of the
+ * vector register and is later XORed with the LSB portion
+ * of the loaded input data.
+ */
+ VZERO %v0 /* Clear V0 */
+ VLVGF %v0,%r2,3 /* Load CRC into rightmost word */
+
+ /* Load a 64-byte data chunk and XOR with CRC */
+ VLM %v1,%v4,0,%r3 /* 64-bytes into V1..V4 */
+ VPERM %v1,%v1,%v1,CONST_PERM_LE2BE
+ VPERM %v2,%v2,%v2,CONST_PERM_LE2BE
+ VPERM %v3,%v3,%v3,CONST_PERM_LE2BE
+ VPERM %v4,%v4,%v4,CONST_PERM_LE2BE
+
+ VX %v1,%v0,%v1 /* V1 ^= CRC */
+ aghi %r3,64 /* BUF = BUF + 64 */
+ aghi %r4,-64 /* LEN = LEN - 64 */
+
+ cghi %r4,64
+ jl .Lless_than_64bytes
+
+.Lfold_64bytes_loop:
+ /* Load the next 64-byte data chunk into V5 to V8 */
+ VLM %v5,%v8,0,%r3
+ VPERM %v5,%v5,%v5,CONST_PERM_LE2BE
+ VPERM %v6,%v6,%v6,CONST_PERM_LE2BE
+ VPERM %v7,%v7,%v7,CONST_PERM_LE2BE
+ VPERM %v8,%v8,%v8,CONST_PERM_LE2BE
+
+ /*
+ * Perform a GF(2) multiplication of the doublewords in V1 with
+ * the R1 and R2 reduction constants in V0. The intermediate result
+ * is then folded (accumulated) with the next data chunk in V5 and
+ * stored in V1. Repeat this step for the register contents
+ * in V2, V3, and V4 respectively.
+ */
+ VGFMAG %v1,CONST_R2R1,%v1,%v5
+ VGFMAG %v2,CONST_R2R1,%v2,%v6
+ VGFMAG %v3,CONST_R2R1,%v3,%v7
+ VGFMAG %v4,CONST_R2R1,%v4,%v8
+
+ aghi %r3,64 /* BUF = BUF + 64 */
+ aghi %r4,-64 /* LEN = LEN - 64 */
+
+ cghi %r4,64
+ jnl .Lfold_64bytes_loop
+
+.Lless_than_64bytes:
+ /*
+ * Fold V1 to V4 into a single 128-bit value in V1. Multiply V1 with R3
+ * and R4 and accumulating the next 128-bit chunk until a single 128-bit
+ * value remains.
+ */
+ VGFMAG %v1,CONST_R4R3,%v1,%v2
+ VGFMAG %v1,CONST_R4R3,%v1,%v3
+ VGFMAG %v1,CONST_R4R3,%v1,%v4
+
+ cghi %r4,16
+ jl .Lfinal_fold
+
+.Lfold_16bytes_loop:
+
+ VL %v2,0,,%r3 /* Load next data chunk */
+ VPERM %v2,%v2,%v2,CONST_PERM_LE2BE
+ VGFMAG %v1,CONST_R4R3,%v1,%v2 /* Fold next data chunk */
+
+ aghi %r3,16
+ aghi %r4,-16
+
+ cghi %r4,16
+ jnl .Lfold_16bytes_loop
+
+.Lfinal_fold:
+ /*
+ * Set up a vector register for byte shifts. The shift value must
+ * be loaded in bits 1-4 in byte element 7 of a vector register.
+ * Shift by 8 bytes: 0x40
+ * Shift by 4 bytes: 0x20
+ */
+ VLEIB %v9,0x40,7
+
+ /*
+ * Prepare V0 for the next GF(2) multiplication: shift V0 by 8 bytes
+ * to move R4 into the rightmost doubleword and set the leftmost
+ * doubleword to 0x1.
+ */
+ VSRLB %v0,CONST_R4R3,%v9
+ VLEIG %v0,1,0
+
+ /*
+ * Compute GF(2) product of V1 and V0. The rightmost doubleword
+ * of V1 is multiplied with R4. The leftmost doubleword of V1 is
+ * multiplied by 0x1 and is then XORed with rightmost product.
+ * Implicitly, the intermediate leftmost product becomes padded
+ */
+ VGFMG %v1,%v0,%v1
+
+ /*
+ * Now do the final 32-bit fold by multiplying the rightmost word
+ * in V1 with R5 and XOR the result with the remaining bits in V1.
+ *
+ * To achieve this by a single VGFMAG, right shift V1 by a word
+ * and store the result in V2 which is then accumulated. Use the
+ * vector unpack instruction to load the rightmost half of the
+ * doubleword into the rightmost doubleword element of V1; the other
+ * half is loaded in the leftmost doubleword.
+ * The vector register with CONST_R5 contains the R5 constant in the
+ * rightmost doubleword and the leftmost doubleword is zero to ignore
+ * the leftmost product of V1.
+ */
+ VLEIB %v9,0x20,7 /* Shift by words */
+ VSRLB %v2,%v1,%v9 /* Store remaining bits in V2 */
+ VUPLLF %v1,%v1 /* Split rightmost doubleword */
+ VGFMAG %v1,CONST_R5,%v1,%v2 /* V1 = (V1 * R5) XOR V2 */
+
+ /*
+ * Apply a Barret reduction to compute the final 32-bit CRC value.
+ *
+ * The input values to the Barret reduction are the degree-63 polynomial
+ * in V1 (R(x)), degree-32 generator polynomial, and the reduction
+ * constant u. The Barret reduction result is the CRC value of R(x) mod
+ * P(x).
+ *
+ * The Barret reduction algorithm is defined as:
+ *
+ * 1. T1(x) = floor( R(x) / x^32 ) GF2MUL u
+ * 2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x)
+ * 3. C(x) = R(x) XOR T2(x) mod x^32
+ *
+ * Note: The leftmost doubleword of vector register containing
+ * CONST_RU_POLY is zero and, thus, the intermediate GF(2) product
+ * is zero and does not contribute to the final result.
+ */
+
+ /* T1(x) = floor( R(x) / x^32 ) GF2MUL u */
+ VUPLLF %v2,%v1
+ VGFMG %v2,CONST_RU_POLY,%v2
+
+ /*
+ * Compute the GF(2) product of the CRC polynomial with T1(x) in
+ * V2 and XOR the intermediate result, T2(x), with the value in V1.
+ * The final result is stored in word element 2 of V2.
+ */
+ VUPLLF %v2,%v2
+ VGFMAG %v2,CONST_CRC_POLY,%v2,%v1
+
+.Ldone:
+ VLGVF %r2,%v2,2
+ BR_EX %r14
+
+.previous
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
new file mode 100644
index 000000000..65bda1178
--- /dev/null
+++ b/arch/s390/crypto/des_s390.c
@@ -0,0 +1,593 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Cryptographic API.
+ *
+ * s390 implementation of the DES Cipher Algorithm.
+ *
+ * Copyright IBM Corp. 2003, 2011
+ * Author(s): Thomas Spatzier
+ * Jan Glauber (jan.glauber@de.ibm.com)
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/fips.h>
+#include <linux/mutex.h>
+#include <crypto/algapi.h>
+#include <crypto/des.h>
+#include <asm/cpacf.h>
+
+#define DES3_KEY_SIZE (3 * DES_KEY_SIZE)
+
+static u8 *ctrblk;
+static DEFINE_MUTEX(ctrblk_lock);
+
+static cpacf_mask_t km_functions, kmc_functions, kmctr_functions;
+
+struct s390_des_ctx {
+ u8 iv[DES_BLOCK_SIZE];
+ u8 key[DES3_KEY_SIZE];
+};
+
+static int des_setkey(struct crypto_tfm *tfm, const u8 *key,
+ unsigned int key_len)
+{
+ struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
+ u32 tmp[DES_EXPKEY_WORDS];
+
+ /* check for weak keys */
+ if (!des_ekey(tmp, key) &&
+ (tfm->crt_flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+ tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
+ return -EINVAL;
+ }
+
+ memcpy(ctx->key, key, key_len);
+ return 0;
+}
+
+static void des_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+ struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ cpacf_km(CPACF_KM_DEA, ctx->key, out, in, DES_BLOCK_SIZE);
+}
+
+static void des_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+ struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ cpacf_km(CPACF_KM_DEA | CPACF_DECRYPT,
+ ctx->key, out, in, DES_BLOCK_SIZE);
+}
+
+static struct crypto_alg des_alg = {
+ .cra_name = "des",
+ .cra_driver_name = "des-s390",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
+ .cra_blocksize = DES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct s390_des_ctx),
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .cipher = {
+ .cia_min_keysize = DES_KEY_SIZE,
+ .cia_max_keysize = DES_KEY_SIZE,
+ .cia_setkey = des_setkey,
+ .cia_encrypt = des_encrypt,
+ .cia_decrypt = des_decrypt,
+ }
+ }
+};
+
+static int ecb_desall_crypt(struct blkcipher_desc *desc, unsigned long fc,
+ struct blkcipher_walk *walk)
+{
+ struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ unsigned int nbytes, n;
+ int ret;
+
+ ret = blkcipher_walk_virt(desc, walk);
+ while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) {
+ /* only use complete blocks */
+ n = nbytes & ~(DES_BLOCK_SIZE - 1);
+ cpacf_km(fc, ctx->key, walk->dst.virt.addr,
+ walk->src.virt.addr, n);
+ ret = blkcipher_walk_done(desc, walk, nbytes - n);
+ }
+ return ret;
+}
+
+static int cbc_desall_crypt(struct blkcipher_desc *desc, unsigned long fc,
+ struct blkcipher_walk *walk)
+{
+ struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ unsigned int nbytes, n;
+ int ret;
+ struct {
+ u8 iv[DES_BLOCK_SIZE];
+ u8 key[DES3_KEY_SIZE];
+ } param;
+
+ ret = blkcipher_walk_virt(desc, walk);
+ memcpy(param.iv, walk->iv, DES_BLOCK_SIZE);
+ memcpy(param.key, ctx->key, DES3_KEY_SIZE);
+ while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) {
+ /* only use complete blocks */
+ n = nbytes & ~(DES_BLOCK_SIZE - 1);
+ cpacf_kmc(fc, &param, walk->dst.virt.addr,
+ walk->src.virt.addr, n);
+ ret = blkcipher_walk_done(desc, walk, nbytes - n);
+ }
+ memcpy(walk->iv, param.iv, DES_BLOCK_SIZE);
+ return ret;
+}
+
+static int ecb_des_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct blkcipher_walk walk;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return ecb_desall_crypt(desc, CPACF_KM_DEA, &walk);
+}
+
+static int ecb_des_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct blkcipher_walk walk;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return ecb_desall_crypt(desc, CPACF_KM_DEA | CPACF_DECRYPT, &walk);
+}
+
+static struct crypto_alg ecb_des_alg = {
+ .cra_name = "ecb(des)",
+ .cra_driver_name = "ecb-des-s390",
+ .cra_priority = 400, /* combo: des + ecb */
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = DES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct s390_des_ctx),
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = DES_KEY_SIZE,
+ .max_keysize = DES_KEY_SIZE,
+ .setkey = des_setkey,
+ .encrypt = ecb_des_encrypt,
+ .decrypt = ecb_des_decrypt,
+ }
+ }
+};
+
+static int cbc_des_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct blkcipher_walk walk;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return cbc_desall_crypt(desc, CPACF_KMC_DEA, &walk);
+}
+
+static int cbc_des_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct blkcipher_walk walk;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return cbc_desall_crypt(desc, CPACF_KMC_DEA | CPACF_DECRYPT, &walk);
+}
+
+static struct crypto_alg cbc_des_alg = {
+ .cra_name = "cbc(des)",
+ .cra_driver_name = "cbc-des-s390",
+ .cra_priority = 400, /* combo: des + cbc */
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = DES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct s390_des_ctx),
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = DES_KEY_SIZE,
+ .max_keysize = DES_KEY_SIZE,
+ .ivsize = DES_BLOCK_SIZE,
+ .setkey = des_setkey,
+ .encrypt = cbc_des_encrypt,
+ .decrypt = cbc_des_decrypt,
+ }
+ }
+};
+
+/*
+ * RFC2451:
+ *
+ * For DES-EDE3, there is no known need to reject weak or
+ * complementation keys. Any weakness is obviated by the use of
+ * multiple keys.
+ *
+ * However, if the first two or last two independent 64-bit keys are
+ * equal (k1 == k2 or k2 == k3), then the DES3 operation is simply the
+ * same as DES. Implementers MUST reject keys that exhibit this
+ * property.
+ *
+ * In fips mode additinally check for all 3 keys are unique.
+ *
+ */
+static int des3_setkey(struct crypto_tfm *tfm, const u8 *key,
+ unsigned int key_len)
+{
+ struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ if (!(crypto_memneq(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) &&
+ crypto_memneq(&key[DES_KEY_SIZE], &key[DES_KEY_SIZE * 2],
+ DES_KEY_SIZE)) &&
+ (tfm->crt_flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+ tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
+ return -EINVAL;
+ }
+
+ /* in fips mode, ensure k1 != k2 and k2 != k3 and k1 != k3 */
+ if (fips_enabled &&
+ !(crypto_memneq(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) &&
+ crypto_memneq(&key[DES_KEY_SIZE], &key[DES_KEY_SIZE * 2],
+ DES_KEY_SIZE) &&
+ crypto_memneq(key, &key[DES_KEY_SIZE * 2], DES_KEY_SIZE))) {
+ tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
+ return -EINVAL;
+ }
+
+ memcpy(ctx->key, key, key_len);
+ return 0;
+}
+
+static void des3_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+ struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ cpacf_km(CPACF_KM_TDEA_192, ctx->key, dst, src, DES_BLOCK_SIZE);
+}
+
+static void des3_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+ struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ cpacf_km(CPACF_KM_TDEA_192 | CPACF_DECRYPT,
+ ctx->key, dst, src, DES_BLOCK_SIZE);
+}
+
+static struct crypto_alg des3_alg = {
+ .cra_name = "des3_ede",
+ .cra_driver_name = "des3_ede-s390",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
+ .cra_blocksize = DES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct s390_des_ctx),
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .cipher = {
+ .cia_min_keysize = DES3_KEY_SIZE,
+ .cia_max_keysize = DES3_KEY_SIZE,
+ .cia_setkey = des3_setkey,
+ .cia_encrypt = des3_encrypt,
+ .cia_decrypt = des3_decrypt,
+ }
+ }
+};
+
+static int ecb_des3_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct blkcipher_walk walk;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return ecb_desall_crypt(desc, CPACF_KM_TDEA_192, &walk);
+}
+
+static int ecb_des3_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct blkcipher_walk walk;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return ecb_desall_crypt(desc, CPACF_KM_TDEA_192 | CPACF_DECRYPT,
+ &walk);
+}
+
+static struct crypto_alg ecb_des3_alg = {
+ .cra_name = "ecb(des3_ede)",
+ .cra_driver_name = "ecb-des3_ede-s390",
+ .cra_priority = 400, /* combo: des3 + ecb */
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = DES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct s390_des_ctx),
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = DES3_KEY_SIZE,
+ .max_keysize = DES3_KEY_SIZE,
+ .setkey = des3_setkey,
+ .encrypt = ecb_des3_encrypt,
+ .decrypt = ecb_des3_decrypt,
+ }
+ }
+};
+
+static int cbc_des3_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct blkcipher_walk walk;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return cbc_desall_crypt(desc, CPACF_KMC_TDEA_192, &walk);
+}
+
+static int cbc_des3_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct blkcipher_walk walk;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return cbc_desall_crypt(desc, CPACF_KMC_TDEA_192 | CPACF_DECRYPT,
+ &walk);
+}
+
+static struct crypto_alg cbc_des3_alg = {
+ .cra_name = "cbc(des3_ede)",
+ .cra_driver_name = "cbc-des3_ede-s390",
+ .cra_priority = 400, /* combo: des3 + cbc */
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = DES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct s390_des_ctx),
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = DES3_KEY_SIZE,
+ .max_keysize = DES3_KEY_SIZE,
+ .ivsize = DES_BLOCK_SIZE,
+ .setkey = des3_setkey,
+ .encrypt = cbc_des3_encrypt,
+ .decrypt = cbc_des3_decrypt,
+ }
+ }
+};
+
+static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
+{
+ unsigned int i, n;
+
+ /* align to block size, max. PAGE_SIZE */
+ n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(DES_BLOCK_SIZE - 1);
+ memcpy(ctrptr, iv, DES_BLOCK_SIZE);
+ for (i = (n / DES_BLOCK_SIZE) - 1; i > 0; i--) {
+ memcpy(ctrptr + DES_BLOCK_SIZE, ctrptr, DES_BLOCK_SIZE);
+ crypto_inc(ctrptr + DES_BLOCK_SIZE, DES_BLOCK_SIZE);
+ ctrptr += DES_BLOCK_SIZE;
+ }
+ return n;
+}
+
+static int ctr_desall_crypt(struct blkcipher_desc *desc, unsigned long fc,
+ struct blkcipher_walk *walk)
+{
+ struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ u8 buf[DES_BLOCK_SIZE], *ctrptr;
+ unsigned int n, nbytes;
+ int ret, locked;
+
+ locked = mutex_trylock(&ctrblk_lock);
+
+ ret = blkcipher_walk_virt_block(desc, walk, DES_BLOCK_SIZE);
+ while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) {
+ n = DES_BLOCK_SIZE;
+ if (nbytes >= 2*DES_BLOCK_SIZE && locked)
+ n = __ctrblk_init(ctrblk, walk->iv, nbytes);
+ ctrptr = (n > DES_BLOCK_SIZE) ? ctrblk : walk->iv;
+ cpacf_kmctr(fc, ctx->key, walk->dst.virt.addr,
+ walk->src.virt.addr, n, ctrptr);
+ if (ctrptr == ctrblk)
+ memcpy(walk->iv, ctrptr + n - DES_BLOCK_SIZE,
+ DES_BLOCK_SIZE);
+ crypto_inc(walk->iv, DES_BLOCK_SIZE);
+ ret = blkcipher_walk_done(desc, walk, nbytes - n);
+ }
+ if (locked)
+ mutex_unlock(&ctrblk_lock);
+ /* final block may be < DES_BLOCK_SIZE, copy only nbytes */
+ if (nbytes) {
+ cpacf_kmctr(fc, ctx->key, buf, walk->src.virt.addr,
+ DES_BLOCK_SIZE, walk->iv);
+ memcpy(walk->dst.virt.addr, buf, nbytes);
+ crypto_inc(walk->iv, DES_BLOCK_SIZE);
+ ret = blkcipher_walk_done(desc, walk, 0);
+ }
+ return ret;
+}
+
+static int ctr_des_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct blkcipher_walk walk;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return ctr_desall_crypt(desc, CPACF_KMCTR_DEA, &walk);
+}
+
+static int ctr_des_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct blkcipher_walk walk;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return ctr_desall_crypt(desc, CPACF_KMCTR_DEA | CPACF_DECRYPT, &walk);
+}
+
+static struct crypto_alg ctr_des_alg = {
+ .cra_name = "ctr(des)",
+ .cra_driver_name = "ctr-des-s390",
+ .cra_priority = 400, /* combo: des + ctr */
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct s390_des_ctx),
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = DES_KEY_SIZE,
+ .max_keysize = DES_KEY_SIZE,
+ .ivsize = DES_BLOCK_SIZE,
+ .setkey = des_setkey,
+ .encrypt = ctr_des_encrypt,
+ .decrypt = ctr_des_decrypt,
+ }
+ }
+};
+
+static int ctr_des3_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct blkcipher_walk walk;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192, &walk);
+}
+
+static int ctr_des3_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct blkcipher_walk walk;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192 | CPACF_DECRYPT,
+ &walk);
+}
+
+static struct crypto_alg ctr_des3_alg = {
+ .cra_name = "ctr(des3_ede)",
+ .cra_driver_name = "ctr-des3_ede-s390",
+ .cra_priority = 400, /* combo: des3 + ede */
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct s390_des_ctx),
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = DES3_KEY_SIZE,
+ .max_keysize = DES3_KEY_SIZE,
+ .ivsize = DES_BLOCK_SIZE,
+ .setkey = des3_setkey,
+ .encrypt = ctr_des3_encrypt,
+ .decrypt = ctr_des3_decrypt,
+ }
+ }
+};
+
+static struct crypto_alg *des_s390_algs_ptr[8];
+static int des_s390_algs_num;
+
+static int des_s390_register_alg(struct crypto_alg *alg)
+{
+ int ret;
+
+ ret = crypto_register_alg(alg);
+ if (!ret)
+ des_s390_algs_ptr[des_s390_algs_num++] = alg;
+ return ret;
+}
+
+static void des_s390_exit(void)
+{
+ while (des_s390_algs_num--)
+ crypto_unregister_alg(des_s390_algs_ptr[des_s390_algs_num]);
+ if (ctrblk)
+ free_page((unsigned long) ctrblk);
+}
+
+static int __init des_s390_init(void)
+{
+ int ret;
+
+ /* Query available functions for KM, KMC and KMCTR */
+ cpacf_query(CPACF_KM, &km_functions);
+ cpacf_query(CPACF_KMC, &kmc_functions);
+ cpacf_query(CPACF_KMCTR, &kmctr_functions);
+
+ if (cpacf_test_func(&km_functions, CPACF_KM_DEA)) {
+ ret = des_s390_register_alg(&des_alg);
+ if (ret)
+ goto out_err;
+ ret = des_s390_register_alg(&ecb_des_alg);
+ if (ret)
+ goto out_err;
+ }
+ if (cpacf_test_func(&kmc_functions, CPACF_KMC_DEA)) {
+ ret = des_s390_register_alg(&cbc_des_alg);
+ if (ret)
+ goto out_err;
+ }
+ if (cpacf_test_func(&km_functions, CPACF_KM_TDEA_192)) {
+ ret = des_s390_register_alg(&des3_alg);
+ if (ret)
+ goto out_err;
+ ret = des_s390_register_alg(&ecb_des3_alg);
+ if (ret)
+ goto out_err;
+ }
+ if (cpacf_test_func(&kmc_functions, CPACF_KMC_TDEA_192)) {
+ ret = des_s390_register_alg(&cbc_des3_alg);
+ if (ret)
+ goto out_err;
+ }
+
+ if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_DEA) ||
+ cpacf_test_func(&kmctr_functions, CPACF_KMCTR_TDEA_192)) {
+ ctrblk = (u8 *) __get_free_page(GFP_KERNEL);
+ if (!ctrblk) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+ }
+
+ if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_DEA)) {
+ ret = des_s390_register_alg(&ctr_des_alg);
+ if (ret)
+ goto out_err;
+ }
+ if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_TDEA_192)) {
+ ret = des_s390_register_alg(&ctr_des3_alg);
+ if (ret)
+ goto out_err;
+ }
+
+ return 0;
+out_err:
+ des_s390_exit();
+ return ret;
+}
+
+module_cpu_feature_match(MSA, des_s390_init);
+module_exit(des_s390_exit);
+
+MODULE_ALIAS_CRYPTO("des");
+MODULE_ALIAS_CRYPTO("des3_ede");
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("DES & Triple DES EDE Cipher Algorithms");
diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c
new file mode 100644
index 000000000..86aed30fa
--- /dev/null
+++ b/arch/s390/crypto/ghash_s390.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Cryptographic API.
+ *
+ * s390 implementation of the GHASH algorithm for GCM (Galois/Counter Mode).
+ *
+ * Copyright IBM Corp. 2011
+ * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+#include <asm/cpacf.h>
+
+#define GHASH_BLOCK_SIZE 16
+#define GHASH_DIGEST_SIZE 16
+
+struct ghash_ctx {
+ u8 key[GHASH_BLOCK_SIZE];
+};
+
+struct ghash_desc_ctx {
+ u8 icv[GHASH_BLOCK_SIZE];
+ u8 key[GHASH_BLOCK_SIZE];
+ u8 buffer[GHASH_BLOCK_SIZE];
+ u32 bytes;
+};
+
+static int ghash_init(struct shash_desc *desc)
+{
+ struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+ struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
+
+ memset(dctx, 0, sizeof(*dctx));
+ memcpy(dctx->key, ctx->key, GHASH_BLOCK_SIZE);
+
+ return 0;
+}
+
+static int ghash_setkey(struct crypto_shash *tfm,
+ const u8 *key, unsigned int keylen)
+{
+ struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
+
+ if (keylen != GHASH_BLOCK_SIZE) {
+ crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+ return -EINVAL;
+ }
+
+ memcpy(ctx->key, key, GHASH_BLOCK_SIZE);
+
+ return 0;
+}
+
+static int ghash_update(struct shash_desc *desc,
+ const u8 *src, unsigned int srclen)
+{
+ struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+ unsigned int n;
+ u8 *buf = dctx->buffer;
+
+ if (dctx->bytes) {
+ u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes);
+
+ n = min(srclen, dctx->bytes);
+ dctx->bytes -= n;
+ srclen -= n;
+
+ memcpy(pos, src, n);
+ src += n;
+
+ if (!dctx->bytes) {
+ cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf,
+ GHASH_BLOCK_SIZE);
+ }
+ }
+
+ n = srclen & ~(GHASH_BLOCK_SIZE - 1);
+ if (n) {
+ cpacf_kimd(CPACF_KIMD_GHASH, dctx, src, n);
+ src += n;
+ srclen -= n;
+ }
+
+ if (srclen) {
+ dctx->bytes = GHASH_BLOCK_SIZE - srclen;
+ memcpy(buf, src, srclen);
+ }
+
+ return 0;
+}
+
+static int ghash_flush(struct ghash_desc_ctx *dctx)
+{
+ u8 *buf = dctx->buffer;
+
+ if (dctx->bytes) {
+ u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes);
+
+ memset(pos, 0, dctx->bytes);
+ cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE);
+ dctx->bytes = 0;
+ }
+
+ return 0;
+}
+
+static int ghash_final(struct shash_desc *desc, u8 *dst)
+{
+ struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+ int ret;
+
+ ret = ghash_flush(dctx);
+ if (!ret)
+ memcpy(dst, dctx->icv, GHASH_BLOCK_SIZE);
+ return ret;
+}
+
+static struct shash_alg ghash_alg = {
+ .digestsize = GHASH_DIGEST_SIZE,
+ .init = ghash_init,
+ .update = ghash_update,
+ .final = ghash_final,
+ .setkey = ghash_setkey,
+ .descsize = sizeof(struct ghash_desc_ctx),
+ .base = {
+ .cra_name = "ghash",
+ .cra_driver_name = "ghash-s390",
+ .cra_priority = 300,
+ .cra_blocksize = GHASH_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct ghash_ctx),
+ .cra_module = THIS_MODULE,
+ },
+};
+
+static int __init ghash_mod_init(void)
+{
+ if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_GHASH))
+ return -EOPNOTSUPP;
+
+ return crypto_register_shash(&ghash_alg);
+}
+
+static void __exit ghash_mod_exit(void)
+{
+ crypto_unregister_shash(&ghash_alg);
+}
+
+module_cpu_feature_match(MSA, ghash_mod_init);
+module_exit(ghash_mod_exit);
+
+MODULE_ALIAS_CRYPTO("ghash");
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("GHASH Message Digest Algorithm, s390 implementation");
diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c
new file mode 100644
index 000000000..ab9a0ebec
--- /dev/null
+++ b/arch/s390/crypto/paes_s390.c
@@ -0,0 +1,618 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Cryptographic API.
+ *
+ * s390 implementation of the AES Cipher Algorithm with protected keys.
+ *
+ * s390 Version:
+ * Copyright IBM Corp. 2017
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ * Harald Freudenberger <freude@de.ibm.com>
+ */
+
+#define KMSG_COMPONENT "paes_s390"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <crypto/aes.h>
+#include <crypto/algapi.h>
+#include <linux/bug.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <crypto/xts.h>
+#include <asm/cpacf.h>
+#include <asm/pkey.h>
+
+static u8 *ctrblk;
+static DEFINE_SPINLOCK(ctrblk_lock);
+
+static cpacf_mask_t km_functions, kmc_functions, kmctr_functions;
+
+struct s390_paes_ctx {
+ struct pkey_seckey sk;
+ struct pkey_protkey pk;
+ unsigned long fc;
+};
+
+struct s390_pxts_ctx {
+ struct pkey_seckey sk[2];
+ struct pkey_protkey pk[2];
+ unsigned long fc;
+};
+
+static inline int __paes_convert_key(struct pkey_seckey *sk,
+ struct pkey_protkey *pk)
+{
+ int i, ret;
+
+ /* try three times in case of failure */
+ for (i = 0; i < 3; i++) {
+ ret = pkey_skey2pkey(sk, pk);
+ if (ret == 0)
+ break;
+ }
+
+ return ret;
+}
+
+static int __paes_set_key(struct s390_paes_ctx *ctx)
+{
+ unsigned long fc;
+
+ if (__paes_convert_key(&ctx->sk, &ctx->pk))
+ return -EINVAL;
+
+ /* Pick the correct function code based on the protected key type */
+ fc = (ctx->pk.type == PKEY_KEYTYPE_AES_128) ? CPACF_KM_PAES_128 :
+ (ctx->pk.type == PKEY_KEYTYPE_AES_192) ? CPACF_KM_PAES_192 :
+ (ctx->pk.type == PKEY_KEYTYPE_AES_256) ? CPACF_KM_PAES_256 : 0;
+
+ /* Check if the function code is available */
+ ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
+
+ return ctx->fc ? 0 : -EINVAL;
+}
+
+static int ecb_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ if (key_len != SECKEYBLOBSIZE)
+ return -EINVAL;
+
+ memcpy(ctx->sk.seckey, in_key, SECKEYBLOBSIZE);
+ if (__paes_set_key(ctx)) {
+ tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int ecb_paes_crypt(struct blkcipher_desc *desc,
+ unsigned long modifier,
+ struct blkcipher_walk *walk)
+{
+ struct s390_paes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ unsigned int nbytes, n, k;
+ int ret;
+
+ ret = blkcipher_walk_virt(desc, walk);
+ while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+ /* only use complete blocks */
+ n = nbytes & ~(AES_BLOCK_SIZE - 1);
+ k = cpacf_km(ctx->fc | modifier, ctx->pk.protkey,
+ walk->dst.virt.addr, walk->src.virt.addr, n);
+ if (k)
+ ret = blkcipher_walk_done(desc, walk, nbytes - k);
+ if (k < n) {
+ if (__paes_set_key(ctx) != 0)
+ return blkcipher_walk_done(desc, walk, -EIO);
+ }
+ }
+ return ret;
+}
+
+static int ecb_paes_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct blkcipher_walk walk;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return ecb_paes_crypt(desc, CPACF_ENCRYPT, &walk);
+}
+
+static int ecb_paes_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct blkcipher_walk walk;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return ecb_paes_crypt(desc, CPACF_DECRYPT, &walk);
+}
+
+static struct crypto_alg ecb_paes_alg = {
+ .cra_name = "ecb(paes)",
+ .cra_driver_name = "ecb-paes-s390",
+ .cra_priority = 401, /* combo: aes + ecb + 1 */
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct s390_paes_ctx),
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(ecb_paes_alg.cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = SECKEYBLOBSIZE,
+ .max_keysize = SECKEYBLOBSIZE,
+ .setkey = ecb_paes_set_key,
+ .encrypt = ecb_paes_encrypt,
+ .decrypt = ecb_paes_decrypt,
+ }
+ }
+};
+
+static int __cbc_paes_set_key(struct s390_paes_ctx *ctx)
+{
+ unsigned long fc;
+
+ if (__paes_convert_key(&ctx->sk, &ctx->pk))
+ return -EINVAL;
+
+ /* Pick the correct function code based on the protected key type */
+ fc = (ctx->pk.type == PKEY_KEYTYPE_AES_128) ? CPACF_KMC_PAES_128 :
+ (ctx->pk.type == PKEY_KEYTYPE_AES_192) ? CPACF_KMC_PAES_192 :
+ (ctx->pk.type == PKEY_KEYTYPE_AES_256) ? CPACF_KMC_PAES_256 : 0;
+
+ /* Check if the function code is available */
+ ctx->fc = (fc && cpacf_test_func(&kmc_functions, fc)) ? fc : 0;
+
+ return ctx->fc ? 0 : -EINVAL;
+}
+
+static int cbc_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ memcpy(ctx->sk.seckey, in_key, SECKEYBLOBSIZE);
+ if (__cbc_paes_set_key(ctx)) {
+ tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int cbc_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
+ struct blkcipher_walk *walk)
+{
+ struct s390_paes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ unsigned int nbytes, n, k;
+ int ret;
+ struct {
+ u8 iv[AES_BLOCK_SIZE];
+ u8 key[MAXPROTKEYSIZE];
+ } param;
+
+ ret = blkcipher_walk_virt(desc, walk);
+ memcpy(param.iv, walk->iv, AES_BLOCK_SIZE);
+ memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
+ while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+ /* only use complete blocks */
+ n = nbytes & ~(AES_BLOCK_SIZE - 1);
+ k = cpacf_kmc(ctx->fc | modifier, &param,
+ walk->dst.virt.addr, walk->src.virt.addr, n);
+ if (k)
+ ret = blkcipher_walk_done(desc, walk, nbytes - k);
+ if (k < n) {
+ if (__cbc_paes_set_key(ctx) != 0)
+ return blkcipher_walk_done(desc, walk, -EIO);
+ memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
+ }
+ }
+ memcpy(walk->iv, param.iv, AES_BLOCK_SIZE);
+ return ret;
+}
+
+static int cbc_paes_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct blkcipher_walk walk;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return cbc_paes_crypt(desc, 0, &walk);
+}
+
+static int cbc_paes_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct blkcipher_walk walk;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return cbc_paes_crypt(desc, CPACF_DECRYPT, &walk);
+}
+
+static struct crypto_alg cbc_paes_alg = {
+ .cra_name = "cbc(paes)",
+ .cra_driver_name = "cbc-paes-s390",
+ .cra_priority = 402, /* ecb-paes-s390 + 1 */
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct s390_paes_ctx),
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(cbc_paes_alg.cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = SECKEYBLOBSIZE,
+ .max_keysize = SECKEYBLOBSIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = cbc_paes_set_key,
+ .encrypt = cbc_paes_encrypt,
+ .decrypt = cbc_paes_decrypt,
+ }
+ }
+};
+
+static int __xts_paes_set_key(struct s390_pxts_ctx *ctx)
+{
+ unsigned long fc;
+
+ if (__paes_convert_key(&ctx->sk[0], &ctx->pk[0]) ||
+ __paes_convert_key(&ctx->sk[1], &ctx->pk[1]))
+ return -EINVAL;
+
+ if (ctx->pk[0].type != ctx->pk[1].type)
+ return -EINVAL;
+
+ /* Pick the correct function code based on the protected key type */
+ fc = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? CPACF_KM_PXTS_128 :
+ (ctx->pk[0].type == PKEY_KEYTYPE_AES_256) ?
+ CPACF_KM_PXTS_256 : 0;
+
+ /* Check if the function code is available */
+ ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
+
+ return ctx->fc ? 0 : -EINVAL;
+}
+
+static int xts_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct s390_pxts_ctx *ctx = crypto_tfm_ctx(tfm);
+ u8 ckey[2 * AES_MAX_KEY_SIZE];
+ unsigned int ckey_len;
+
+ memcpy(ctx->sk[0].seckey, in_key, SECKEYBLOBSIZE);
+ memcpy(ctx->sk[1].seckey, in_key + SECKEYBLOBSIZE, SECKEYBLOBSIZE);
+ if (__xts_paes_set_key(ctx)) {
+ tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ return -EINVAL;
+ }
+
+ /*
+ * xts_check_key verifies the key length is not odd and makes
+ * sure that the two keys are not the same. This can be done
+ * on the two protected keys as well
+ */
+ ckey_len = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ?
+ AES_KEYSIZE_128 : AES_KEYSIZE_256;
+ memcpy(ckey, ctx->pk[0].protkey, ckey_len);
+ memcpy(ckey + ckey_len, ctx->pk[1].protkey, ckey_len);
+ return xts_check_key(tfm, ckey, 2*ckey_len);
+}
+
+static int xts_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
+ struct blkcipher_walk *walk)
+{
+ struct s390_pxts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ unsigned int keylen, offset, nbytes, n, k;
+ int ret;
+ struct {
+ u8 key[MAXPROTKEYSIZE]; /* key + verification pattern */
+ u8 tweak[16];
+ u8 block[16];
+ u8 bit[16];
+ u8 xts[16];
+ } pcc_param;
+ struct {
+ u8 key[MAXPROTKEYSIZE]; /* key + verification pattern */
+ u8 init[16];
+ } xts_param;
+
+ ret = blkcipher_walk_virt(desc, walk);
+ keylen = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 48 : 64;
+ offset = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 16 : 0;
+retry:
+ memset(&pcc_param, 0, sizeof(pcc_param));
+ memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak));
+ memcpy(pcc_param.key + offset, ctx->pk[1].protkey, keylen);
+ cpacf_pcc(ctx->fc, pcc_param.key + offset);
+
+ memcpy(xts_param.key + offset, ctx->pk[0].protkey, keylen);
+ memcpy(xts_param.init, pcc_param.xts, 16);
+
+ while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+ /* only use complete blocks */
+ n = nbytes & ~(AES_BLOCK_SIZE - 1);
+ k = cpacf_km(ctx->fc | modifier, xts_param.key + offset,
+ walk->dst.virt.addr, walk->src.virt.addr, n);
+ if (k)
+ ret = blkcipher_walk_done(desc, walk, nbytes - k);
+ if (k < n) {
+ if (__xts_paes_set_key(ctx) != 0)
+ return blkcipher_walk_done(desc, walk, -EIO);
+ goto retry;
+ }
+ }
+ return ret;
+}
+
+static int xts_paes_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct blkcipher_walk walk;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return xts_paes_crypt(desc, 0, &walk);
+}
+
+static int xts_paes_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct blkcipher_walk walk;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return xts_paes_crypt(desc, CPACF_DECRYPT, &walk);
+}
+
+static struct crypto_alg xts_paes_alg = {
+ .cra_name = "xts(paes)",
+ .cra_driver_name = "xts-paes-s390",
+ .cra_priority = 402, /* ecb-paes-s390 + 1 */
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct s390_pxts_ctx),
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(xts_paes_alg.cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = 2 * SECKEYBLOBSIZE,
+ .max_keysize = 2 * SECKEYBLOBSIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = xts_paes_set_key,
+ .encrypt = xts_paes_encrypt,
+ .decrypt = xts_paes_decrypt,
+ }
+ }
+};
+
+static int __ctr_paes_set_key(struct s390_paes_ctx *ctx)
+{
+ unsigned long fc;
+
+ if (__paes_convert_key(&ctx->sk, &ctx->pk))
+ return -EINVAL;
+
+ /* Pick the correct function code based on the protected key type */
+ fc = (ctx->pk.type == PKEY_KEYTYPE_AES_128) ? CPACF_KMCTR_PAES_128 :
+ (ctx->pk.type == PKEY_KEYTYPE_AES_192) ? CPACF_KMCTR_PAES_192 :
+ (ctx->pk.type == PKEY_KEYTYPE_AES_256) ?
+ CPACF_KMCTR_PAES_256 : 0;
+
+ /* Check if the function code is available */
+ ctx->fc = (fc && cpacf_test_func(&kmctr_functions, fc)) ? fc : 0;
+
+ return ctx->fc ? 0 : -EINVAL;
+}
+
+static int ctr_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ memcpy(ctx->sk.seckey, in_key, key_len);
+ if (__ctr_paes_set_key(ctx)) {
+ tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
+{
+ unsigned int i, n;
+
+ /* only use complete blocks, max. PAGE_SIZE */
+ memcpy(ctrptr, iv, AES_BLOCK_SIZE);
+ n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(AES_BLOCK_SIZE - 1);
+ for (i = (n / AES_BLOCK_SIZE) - 1; i > 0; i--) {
+ memcpy(ctrptr + AES_BLOCK_SIZE, ctrptr, AES_BLOCK_SIZE);
+ crypto_inc(ctrptr + AES_BLOCK_SIZE, AES_BLOCK_SIZE);
+ ctrptr += AES_BLOCK_SIZE;
+ }
+ return n;
+}
+
+static int ctr_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
+ struct blkcipher_walk *walk)
+{
+ struct s390_paes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ u8 buf[AES_BLOCK_SIZE], *ctrptr;
+ unsigned int nbytes, n, k;
+ int ret, locked;
+
+ locked = spin_trylock(&ctrblk_lock);
+
+ ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE);
+ while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+ n = AES_BLOCK_SIZE;
+ if (nbytes >= 2*AES_BLOCK_SIZE && locked)
+ n = __ctrblk_init(ctrblk, walk->iv, nbytes);
+ ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk->iv;
+ k = cpacf_kmctr(ctx->fc | modifier, ctx->pk.protkey,
+ walk->dst.virt.addr, walk->src.virt.addr,
+ n, ctrptr);
+ if (k) {
+ if (ctrptr == ctrblk)
+ memcpy(walk->iv, ctrptr + k - AES_BLOCK_SIZE,
+ AES_BLOCK_SIZE);
+ crypto_inc(walk->iv, AES_BLOCK_SIZE);
+ ret = blkcipher_walk_done(desc, walk, nbytes - n);
+ }
+ if (k < n) {
+ if (__ctr_paes_set_key(ctx) != 0) {
+ if (locked)
+ spin_unlock(&ctrblk_lock);
+ return blkcipher_walk_done(desc, walk, -EIO);
+ }
+ }
+ }
+ if (locked)
+ spin_unlock(&ctrblk_lock);
+ /*
+ * final block may be < AES_BLOCK_SIZE, copy only nbytes
+ */
+ if (nbytes) {
+ while (1) {
+ if (cpacf_kmctr(ctx->fc | modifier,
+ ctx->pk.protkey, buf,
+ walk->src.virt.addr, AES_BLOCK_SIZE,
+ walk->iv) == AES_BLOCK_SIZE)
+ break;
+ if (__ctr_paes_set_key(ctx) != 0)
+ return blkcipher_walk_done(desc, walk, -EIO);
+ }
+ memcpy(walk->dst.virt.addr, buf, nbytes);
+ crypto_inc(walk->iv, AES_BLOCK_SIZE);
+ ret = blkcipher_walk_done(desc, walk, 0);
+ }
+
+ return ret;
+}
+
+static int ctr_paes_encrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct blkcipher_walk walk;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return ctr_paes_crypt(desc, 0, &walk);
+}
+
+static int ctr_paes_decrypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ struct blkcipher_walk walk;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return ctr_paes_crypt(desc, CPACF_DECRYPT, &walk);
+}
+
+static struct crypto_alg ctr_paes_alg = {
+ .cra_name = "ctr(paes)",
+ .cra_driver_name = "ctr-paes-s390",
+ .cra_priority = 402, /* ecb-paes-s390 + 1 */
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct s390_paes_ctx),
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(ctr_paes_alg.cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = SECKEYBLOBSIZE,
+ .max_keysize = SECKEYBLOBSIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ctr_paes_set_key,
+ .encrypt = ctr_paes_encrypt,
+ .decrypt = ctr_paes_decrypt,
+ }
+ }
+};
+
+static inline void __crypto_unregister_alg(struct crypto_alg *alg)
+{
+ if (!list_empty(&alg->cra_list))
+ crypto_unregister_alg(alg);
+}
+
+static void paes_s390_fini(void)
+{
+ if (ctrblk)
+ free_page((unsigned long) ctrblk);
+ __crypto_unregister_alg(&ctr_paes_alg);
+ __crypto_unregister_alg(&xts_paes_alg);
+ __crypto_unregister_alg(&cbc_paes_alg);
+ __crypto_unregister_alg(&ecb_paes_alg);
+}
+
+static int __init paes_s390_init(void)
+{
+ int ret;
+
+ /* Query available functions for KM, KMC and KMCTR */
+ cpacf_query(CPACF_KM, &km_functions);
+ cpacf_query(CPACF_KMC, &kmc_functions);
+ cpacf_query(CPACF_KMCTR, &kmctr_functions);
+
+ if (cpacf_test_func(&km_functions, CPACF_KM_PAES_128) ||
+ cpacf_test_func(&km_functions, CPACF_KM_PAES_192) ||
+ cpacf_test_func(&km_functions, CPACF_KM_PAES_256)) {
+ ret = crypto_register_alg(&ecb_paes_alg);
+ if (ret)
+ goto out_err;
+ }
+
+ if (cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_128) ||
+ cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_192) ||
+ cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_256)) {
+ ret = crypto_register_alg(&cbc_paes_alg);
+ if (ret)
+ goto out_err;
+ }
+
+ if (cpacf_test_func(&km_functions, CPACF_KM_PXTS_128) ||
+ cpacf_test_func(&km_functions, CPACF_KM_PXTS_256)) {
+ ret = crypto_register_alg(&xts_paes_alg);
+ if (ret)
+ goto out_err;
+ }
+
+ if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_128) ||
+ cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_192) ||
+ cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_256)) {
+ ret = crypto_register_alg(&ctr_paes_alg);
+ if (ret)
+ goto out_err;
+ ctrblk = (u8 *) __get_free_page(GFP_KERNEL);
+ if (!ctrblk) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+ }
+
+ return 0;
+out_err:
+ paes_s390_fini();
+ return ret;
+}
+
+module_init(paes_s390_init);
+module_exit(paes_s390_fini);
+
+MODULE_ALIAS_CRYPTO("paes");
+
+MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm with protected keys");
+MODULE_LICENSE("GPL");
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
new file mode 100644
index 000000000..a97a1802c
--- /dev/null
+++ b/arch/s390/crypto/prng.c
@@ -0,0 +1,875 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2006, 2015
+ * Author(s): Jan Glauber <jan.glauber@de.ibm.com>
+ * Harald Freudenberger <freude@de.ibm.com>
+ * Driver for the s390 pseudo random number generator
+ */
+
+#define KMSG_COMPONENT "prng"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/fs.h>
+#include <linux/fips.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/mutex.h>
+#include <linux/cpufeature.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/sched/signal.h>
+
+#include <asm/debug.h>
+#include <linux/uaccess.h>
+#include <asm/timex.h>
+#include <asm/cpacf.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("IBM Corporation");
+MODULE_DESCRIPTION("s390 PRNG interface");
+
+
+#define PRNG_MODE_AUTO 0
+#define PRNG_MODE_TDES 1
+#define PRNG_MODE_SHA512 2
+
+static unsigned int prng_mode = PRNG_MODE_AUTO;
+module_param_named(mode, prng_mode, int, 0);
+MODULE_PARM_DESC(prng_mode, "PRNG mode: 0 - auto, 1 - TDES, 2 - SHA512");
+
+
+#define PRNG_CHUNKSIZE_TDES_MIN 8
+#define PRNG_CHUNKSIZE_TDES_MAX (64*1024)
+#define PRNG_CHUNKSIZE_SHA512_MIN 64
+#define PRNG_CHUNKSIZE_SHA512_MAX (64*1024)
+
+static unsigned int prng_chunk_size = 256;
+module_param_named(chunksize, prng_chunk_size, int, 0);
+MODULE_PARM_DESC(prng_chunk_size, "PRNG read chunk size in bytes");
+
+
+#define PRNG_RESEED_LIMIT_TDES 4096
+#define PRNG_RESEED_LIMIT_TDES_LOWER 4096
+#define PRNG_RESEED_LIMIT_SHA512 100000
+#define PRNG_RESEED_LIMIT_SHA512_LOWER 10000
+
+static unsigned int prng_reseed_limit;
+module_param_named(reseed_limit, prng_reseed_limit, int, 0);
+MODULE_PARM_DESC(prng_reseed_limit, "PRNG reseed limit");
+
+
+/*
+ * Any one who considers arithmetical methods of producing random digits is,
+ * of course, in a state of sin. -- John von Neumann
+ */
+
+static int prng_errorflag;
+
+#define PRNG_GEN_ENTROPY_FAILED 1
+#define PRNG_SELFTEST_FAILED 2
+#define PRNG_INSTANTIATE_FAILED 3
+#define PRNG_SEED_FAILED 4
+#define PRNG_RESEED_FAILED 5
+#define PRNG_GEN_FAILED 6
+
+struct prng_ws_s {
+ u8 parm_block[32];
+ u32 reseed_counter;
+ u64 byte_counter;
+};
+
+struct prno_ws_s {
+ u32 res;
+ u32 reseed_counter;
+ u64 stream_bytes;
+ u8 V[112];
+ u8 C[112];
+};
+
+struct prng_data_s {
+ struct mutex mutex;
+ union {
+ struct prng_ws_s prngws;
+ struct prno_ws_s prnows;
+ };
+ u8 *buf;
+ u32 rest;
+ u8 *prev;
+};
+
+static struct prng_data_s *prng_data;
+
+/* initial parameter block for tdes mode, copied from libica */
+static const u8 initial_parm_block[32] __initconst = {
+ 0x0F, 0x2B, 0x8E, 0x63, 0x8C, 0x8E, 0xD2, 0x52,
+ 0x64, 0xB7, 0xA0, 0x7B, 0x75, 0x28, 0xB8, 0xF4,
+ 0x75, 0x5F, 0xD2, 0xA6, 0x8D, 0x97, 0x11, 0xFF,
+ 0x49, 0xD8, 0x23, 0xF3, 0x7E, 0x21, 0xEC, 0xA0 };
+
+
+/*** helper functions ***/
+
+/*
+ * generate_entropy:
+ * This algorithm produces 64 bytes of entropy data based on 1024
+ * individual stckf() invocations assuming that each stckf() value
+ * contributes 0.25 bits of entropy. So the caller gets 256 bit
+ * entropy per 64 byte or 4 bits entropy per byte.
+ */
+static int generate_entropy(u8 *ebuf, size_t nbytes)
+{
+ int n, ret = 0;
+ u8 *pg, *h, hash[64];
+
+ /* allocate 2 pages */
+ pg = (u8 *) __get_free_pages(GFP_KERNEL, 1);
+ if (!pg) {
+ prng_errorflag = PRNG_GEN_ENTROPY_FAILED;
+ return -ENOMEM;
+ }
+
+ while (nbytes) {
+ /* fill pages with urandom bytes */
+ get_random_bytes(pg, 2*PAGE_SIZE);
+ /* exor pages with 1024 stckf values */
+ for (n = 0; n < 2 * PAGE_SIZE / sizeof(u64); n++) {
+ u64 *p = ((u64 *)pg) + n;
+ *p ^= get_tod_clock_fast();
+ }
+ n = (nbytes < sizeof(hash)) ? nbytes : sizeof(hash);
+ if (n < sizeof(hash))
+ h = hash;
+ else
+ h = ebuf;
+ /* hash over the filled pages */
+ cpacf_kimd(CPACF_KIMD_SHA_512, h, pg, 2*PAGE_SIZE);
+ if (n < sizeof(hash))
+ memcpy(ebuf, hash, n);
+ ret += n;
+ ebuf += n;
+ nbytes -= n;
+ }
+
+ free_pages((unsigned long)pg, 1);
+ return ret;
+}
+
+
+/*** tdes functions ***/
+
+static void prng_tdes_add_entropy(void)
+{
+ __u64 entropy[4];
+ unsigned int i;
+
+ for (i = 0; i < 16; i++) {
+ cpacf_kmc(CPACF_KMC_PRNG, prng_data->prngws.parm_block,
+ (char *) entropy, (char *) entropy,
+ sizeof(entropy));
+ memcpy(prng_data->prngws.parm_block, entropy, sizeof(entropy));
+ }
+}
+
+
+static void prng_tdes_seed(int nbytes)
+{
+ char buf[16];
+ int i = 0;
+
+ BUG_ON(nbytes > sizeof(buf));
+
+ get_random_bytes(buf, nbytes);
+
+ /* Add the entropy */
+ while (nbytes >= 8) {
+ *((__u64 *)prng_data->prngws.parm_block) ^= *((__u64 *)(buf+i));
+ prng_tdes_add_entropy();
+ i += 8;
+ nbytes -= 8;
+ }
+ prng_tdes_add_entropy();
+ prng_data->prngws.reseed_counter = 0;
+}
+
+
+static int __init prng_tdes_instantiate(void)
+{
+ int datalen;
+
+ pr_debug("prng runs in TDES mode with "
+ "chunksize=%d and reseed_limit=%u\n",
+ prng_chunk_size, prng_reseed_limit);
+
+ /* memory allocation, prng_data struct init, mutex init */
+ datalen = sizeof(struct prng_data_s) + prng_chunk_size;
+ prng_data = kzalloc(datalen, GFP_KERNEL);
+ if (!prng_data) {
+ prng_errorflag = PRNG_INSTANTIATE_FAILED;
+ return -ENOMEM;
+ }
+ mutex_init(&prng_data->mutex);
+ prng_data->buf = ((u8 *)prng_data) + sizeof(struct prng_data_s);
+ memcpy(prng_data->prngws.parm_block, initial_parm_block, 32);
+
+ /* initialize the PRNG, add 128 bits of entropy */
+ prng_tdes_seed(16);
+
+ return 0;
+}
+
+
+static void prng_tdes_deinstantiate(void)
+{
+ pr_debug("The prng module stopped "
+ "after running in triple DES mode\n");
+ kzfree(prng_data);
+}
+
+
+/*** sha512 functions ***/
+
+static int __init prng_sha512_selftest(void)
+{
+ /* NIST DRBG testvector for Hash Drbg, Sha-512, Count #0 */
+ static const u8 seed[] __initconst = {
+ 0x6b, 0x50, 0xa7, 0xd8, 0xf8, 0xa5, 0x5d, 0x7a,
+ 0x3d, 0xf8, 0xbb, 0x40, 0xbc, 0xc3, 0xb7, 0x22,
+ 0xd8, 0x70, 0x8d, 0xe6, 0x7f, 0xda, 0x01, 0x0b,
+ 0x03, 0xc4, 0xc8, 0x4d, 0x72, 0x09, 0x6f, 0x8c,
+ 0x3e, 0xc6, 0x49, 0xcc, 0x62, 0x56, 0xd9, 0xfa,
+ 0x31, 0xdb, 0x7a, 0x29, 0x04, 0xaa, 0xf0, 0x25 };
+ static const u8 V0[] __initconst = {
+ 0x00, 0xad, 0xe3, 0x6f, 0x9a, 0x01, 0xc7, 0x76,
+ 0x61, 0x34, 0x35, 0xf5, 0x4e, 0x24, 0x74, 0x22,
+ 0x21, 0x9a, 0x29, 0x89, 0xc7, 0x93, 0x2e, 0x60,
+ 0x1e, 0xe8, 0x14, 0x24, 0x8d, 0xd5, 0x03, 0xf1,
+ 0x65, 0x5d, 0x08, 0x22, 0x72, 0xd5, 0xad, 0x95,
+ 0xe1, 0x23, 0x1e, 0x8a, 0xa7, 0x13, 0xd9, 0x2b,
+ 0x5e, 0xbc, 0xbb, 0x80, 0xab, 0x8d, 0xe5, 0x79,
+ 0xab, 0x5b, 0x47, 0x4e, 0xdd, 0xee, 0x6b, 0x03,
+ 0x8f, 0x0f, 0x5c, 0x5e, 0xa9, 0x1a, 0x83, 0xdd,
+ 0xd3, 0x88, 0xb2, 0x75, 0x4b, 0xce, 0x83, 0x36,
+ 0x57, 0x4b, 0xf1, 0x5c, 0xca, 0x7e, 0x09, 0xc0,
+ 0xd3, 0x89, 0xc6, 0xe0, 0xda, 0xc4, 0x81, 0x7e,
+ 0x5b, 0xf9, 0xe1, 0x01, 0xc1, 0x92, 0x05, 0xea,
+ 0xf5, 0x2f, 0xc6, 0xc6, 0xc7, 0x8f, 0xbc, 0xf4 };
+ static const u8 C0[] __initconst = {
+ 0x00, 0xf4, 0xa3, 0xe5, 0xa0, 0x72, 0x63, 0x95,
+ 0xc6, 0x4f, 0x48, 0xd0, 0x8b, 0x5b, 0x5f, 0x8e,
+ 0x6b, 0x96, 0x1f, 0x16, 0xed, 0xbc, 0x66, 0x94,
+ 0x45, 0x31, 0xd7, 0x47, 0x73, 0x22, 0xa5, 0x86,
+ 0xce, 0xc0, 0x4c, 0xac, 0x63, 0xb8, 0x39, 0x50,
+ 0xbf, 0xe6, 0x59, 0x6c, 0x38, 0x58, 0x99, 0x1f,
+ 0x27, 0xa7, 0x9d, 0x71, 0x2a, 0xb3, 0x7b, 0xf9,
+ 0xfb, 0x17, 0x86, 0xaa, 0x99, 0x81, 0xaa, 0x43,
+ 0xe4, 0x37, 0xd3, 0x1e, 0x6e, 0xe5, 0xe6, 0xee,
+ 0xc2, 0xed, 0x95, 0x4f, 0x53, 0x0e, 0x46, 0x8a,
+ 0xcc, 0x45, 0xa5, 0xdb, 0x69, 0x0d, 0x81, 0xc9,
+ 0x32, 0x92, 0xbc, 0x8f, 0x33, 0xe6, 0xf6, 0x09,
+ 0x7c, 0x8e, 0x05, 0x19, 0x0d, 0xf1, 0xb6, 0xcc,
+ 0xf3, 0x02, 0x21, 0x90, 0x25, 0xec, 0xed, 0x0e };
+ static const u8 random[] __initconst = {
+ 0x95, 0xb7, 0xf1, 0x7e, 0x98, 0x02, 0xd3, 0x57,
+ 0x73, 0x92, 0xc6, 0xa9, 0xc0, 0x80, 0x83, 0xb6,
+ 0x7d, 0xd1, 0x29, 0x22, 0x65, 0xb5, 0xf4, 0x2d,
+ 0x23, 0x7f, 0x1c, 0x55, 0xbb, 0x9b, 0x10, 0xbf,
+ 0xcf, 0xd8, 0x2c, 0x77, 0xa3, 0x78, 0xb8, 0x26,
+ 0x6a, 0x00, 0x99, 0x14, 0x3b, 0x3c, 0x2d, 0x64,
+ 0x61, 0x1e, 0xee, 0xb6, 0x9a, 0xcd, 0xc0, 0x55,
+ 0x95, 0x7c, 0x13, 0x9e, 0x8b, 0x19, 0x0c, 0x7a,
+ 0x06, 0x95, 0x5f, 0x2c, 0x79, 0x7c, 0x27, 0x78,
+ 0xde, 0x94, 0x03, 0x96, 0xa5, 0x01, 0xf4, 0x0e,
+ 0x91, 0x39, 0x6a, 0xcf, 0x8d, 0x7e, 0x45, 0xeb,
+ 0xdb, 0xb5, 0x3b, 0xbf, 0x8c, 0x97, 0x52, 0x30,
+ 0xd2, 0xf0, 0xff, 0x91, 0x06, 0xc7, 0x61, 0x19,
+ 0xae, 0x49, 0x8e, 0x7f, 0xbc, 0x03, 0xd9, 0x0f,
+ 0x8e, 0x4c, 0x51, 0x62, 0x7a, 0xed, 0x5c, 0x8d,
+ 0x42, 0x63, 0xd5, 0xd2, 0xb9, 0x78, 0x87, 0x3a,
+ 0x0d, 0xe5, 0x96, 0xee, 0x6d, 0xc7, 0xf7, 0xc2,
+ 0x9e, 0x37, 0xee, 0xe8, 0xb3, 0x4c, 0x90, 0xdd,
+ 0x1c, 0xf6, 0xa9, 0xdd, 0xb2, 0x2b, 0x4c, 0xbd,
+ 0x08, 0x6b, 0x14, 0xb3, 0x5d, 0xe9, 0x3d, 0xa2,
+ 0xd5, 0xcb, 0x18, 0x06, 0x69, 0x8c, 0xbd, 0x7b,
+ 0xbb, 0x67, 0xbf, 0xe3, 0xd3, 0x1f, 0xd2, 0xd1,
+ 0xdb, 0xd2, 0xa1, 0xe0, 0x58, 0xa3, 0xeb, 0x99,
+ 0xd7, 0xe5, 0x1f, 0x1a, 0x93, 0x8e, 0xed, 0x5e,
+ 0x1c, 0x1d, 0xe2, 0x3a, 0x6b, 0x43, 0x45, 0xd3,
+ 0x19, 0x14, 0x09, 0xf9, 0x2f, 0x39, 0xb3, 0x67,
+ 0x0d, 0x8d, 0xbf, 0xb6, 0x35, 0xd8, 0xe6, 0xa3,
+ 0x69, 0x32, 0xd8, 0x10, 0x33, 0xd1, 0x44, 0x8d,
+ 0x63, 0xb4, 0x03, 0xdd, 0xf8, 0x8e, 0x12, 0x1b,
+ 0x6e, 0x81, 0x9a, 0xc3, 0x81, 0x22, 0x6c, 0x13,
+ 0x21, 0xe4, 0xb0, 0x86, 0x44, 0xf6, 0x72, 0x7c,
+ 0x36, 0x8c, 0x5a, 0x9f, 0x7a, 0x4b, 0x3e, 0xe2 };
+
+ u8 buf[sizeof(random)];
+ struct prno_ws_s ws;
+
+ memset(&ws, 0, sizeof(ws));
+
+ /* initial seed */
+ cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED,
+ &ws, NULL, 0, seed, sizeof(seed));
+
+ /* check working states V and C */
+ if (memcmp(ws.V, V0, sizeof(V0)) != 0
+ || memcmp(ws.C, C0, sizeof(C0)) != 0) {
+ pr_err("The prng self test state test "
+ "for the SHA-512 mode failed\n");
+ prng_errorflag = PRNG_SELFTEST_FAILED;
+ return -EIO;
+ }
+
+ /* generate random bytes */
+ cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN,
+ &ws, buf, sizeof(buf), NULL, 0);
+ cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN,
+ &ws, buf, sizeof(buf), NULL, 0);
+
+ /* check against expected data */
+ if (memcmp(buf, random, sizeof(random)) != 0) {
+ pr_err("The prng self test data test "
+ "for the SHA-512 mode failed\n");
+ prng_errorflag = PRNG_SELFTEST_FAILED;
+ return -EIO;
+ }
+
+ return 0;
+}
+
+
+static int __init prng_sha512_instantiate(void)
+{
+ int ret, datalen;
+ u8 seed[64 + 32 + 16];
+
+ pr_debug("prng runs in SHA-512 mode "
+ "with chunksize=%d and reseed_limit=%u\n",
+ prng_chunk_size, prng_reseed_limit);
+
+ /* memory allocation, prng_data struct init, mutex init */
+ datalen = sizeof(struct prng_data_s) + prng_chunk_size;
+ if (fips_enabled)
+ datalen += prng_chunk_size;
+ prng_data = kzalloc(datalen, GFP_KERNEL);
+ if (!prng_data) {
+ prng_errorflag = PRNG_INSTANTIATE_FAILED;
+ return -ENOMEM;
+ }
+ mutex_init(&prng_data->mutex);
+ prng_data->buf = ((u8 *)prng_data) + sizeof(struct prng_data_s);
+
+ /* selftest */
+ ret = prng_sha512_selftest();
+ if (ret)
+ goto outfree;
+
+ /* generate initial seed bytestring, with 256 + 128 bits entropy */
+ ret = generate_entropy(seed, 64 + 32);
+ if (ret != 64 + 32)
+ goto outfree;
+ /* followed by 16 bytes of unique nonce */
+ get_tod_clock_ext(seed + 64 + 32);
+
+ /* initial seed of the prno drng */
+ cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED,
+ &prng_data->prnows, NULL, 0, seed, sizeof(seed));
+
+ /* if fips mode is enabled, generate a first block of random
+ bytes for the FIPS 140-2 Conditional Self Test */
+ if (fips_enabled) {
+ prng_data->prev = prng_data->buf + prng_chunk_size;
+ cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN,
+ &prng_data->prnows,
+ prng_data->prev, prng_chunk_size, NULL, 0);
+ }
+
+ return 0;
+
+outfree:
+ kfree(prng_data);
+ return ret;
+}
+
+
+static void prng_sha512_deinstantiate(void)
+{
+ pr_debug("The prng module stopped after running in SHA-512 mode\n");
+ kzfree(prng_data);
+}
+
+
+static int prng_sha512_reseed(void)
+{
+ int ret;
+ u8 seed[64];
+
+ /* fetch 256 bits of fresh entropy */
+ ret = generate_entropy(seed, sizeof(seed));
+ if (ret != sizeof(seed))
+ return ret;
+
+ /* do a reseed of the prno drng with this bytestring */
+ cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED,
+ &prng_data->prnows, NULL, 0, seed, sizeof(seed));
+
+ return 0;
+}
+
+
+static int prng_sha512_generate(u8 *buf, size_t nbytes)
+{
+ int ret;
+
+ /* reseed needed ? */
+ if (prng_data->prnows.reseed_counter > prng_reseed_limit) {
+ ret = prng_sha512_reseed();
+ if (ret)
+ return ret;
+ }
+
+ /* PRNO generate */
+ cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN,
+ &prng_data->prnows, buf, nbytes, NULL, 0);
+
+ /* FIPS 140-2 Conditional Self Test */
+ if (fips_enabled) {
+ if (!memcmp(prng_data->prev, buf, nbytes)) {
+ prng_errorflag = PRNG_GEN_FAILED;
+ return -EILSEQ;
+ }
+ memcpy(prng_data->prev, buf, nbytes);
+ }
+
+ return nbytes;
+}
+
+
+/*** file io functions ***/
+
+static int prng_open(struct inode *inode, struct file *file)
+{
+ return nonseekable_open(inode, file);
+}
+
+
+static ssize_t prng_tdes_read(struct file *file, char __user *ubuf,
+ size_t nbytes, loff_t *ppos)
+{
+ int chunk, n, ret = 0;
+
+ /* lock prng_data struct */
+ if (mutex_lock_interruptible(&prng_data->mutex))
+ return -ERESTARTSYS;
+
+ while (nbytes) {
+ if (need_resched()) {
+ if (signal_pending(current)) {
+ if (ret == 0)
+ ret = -ERESTARTSYS;
+ break;
+ }
+ /* give mutex free before calling schedule() */
+ mutex_unlock(&prng_data->mutex);
+ schedule();
+ /* occopy mutex again */
+ if (mutex_lock_interruptible(&prng_data->mutex)) {
+ if (ret == 0)
+ ret = -ERESTARTSYS;
+ return ret;
+ }
+ }
+
+ /*
+ * we lose some random bytes if an attacker issues
+ * reads < 8 bytes, but we don't care
+ */
+ chunk = min_t(int, nbytes, prng_chunk_size);
+
+ /* PRNG only likes multiples of 8 bytes */
+ n = (chunk + 7) & -8;
+
+ if (prng_data->prngws.reseed_counter > prng_reseed_limit)
+ prng_tdes_seed(8);
+
+ /* if the CPU supports PRNG stckf is present too */
+ *((unsigned long long *)prng_data->buf) = get_tod_clock_fast();
+
+ /*
+ * Beside the STCKF the input for the TDES-EDE is the output
+ * of the last operation. We differ here from X9.17 since we
+ * only store one timestamp into the buffer. Padding the whole
+ * buffer with timestamps does not improve security, since
+ * successive stckf have nearly constant offsets.
+ * If an attacker knows the first timestamp it would be
+ * trivial to guess the additional values. One timestamp
+ * is therefore enough and still guarantees unique input values.
+ *
+ * Note: you can still get strict X9.17 conformity by setting
+ * prng_chunk_size to 8 bytes.
+ */
+ cpacf_kmc(CPACF_KMC_PRNG, prng_data->prngws.parm_block,
+ prng_data->buf, prng_data->buf, n);
+
+ prng_data->prngws.byte_counter += n;
+ prng_data->prngws.reseed_counter += n;
+
+ if (copy_to_user(ubuf, prng_data->buf, chunk)) {
+ ret = -EFAULT;
+ break;
+ }
+
+ nbytes -= chunk;
+ ret += chunk;
+ ubuf += chunk;
+ }
+
+ /* unlock prng_data struct */
+ mutex_unlock(&prng_data->mutex);
+
+ return ret;
+}
+
+
+static ssize_t prng_sha512_read(struct file *file, char __user *ubuf,
+ size_t nbytes, loff_t *ppos)
+{
+ int n, ret = 0;
+ u8 *p;
+
+ /* if errorflag is set do nothing and return 'broken pipe' */
+ if (prng_errorflag)
+ return -EPIPE;
+
+ /* lock prng_data struct */
+ if (mutex_lock_interruptible(&prng_data->mutex))
+ return -ERESTARTSYS;
+
+ while (nbytes) {
+ if (need_resched()) {
+ if (signal_pending(current)) {
+ if (ret == 0)
+ ret = -ERESTARTSYS;
+ break;
+ }
+ /* give mutex free before calling schedule() */
+ mutex_unlock(&prng_data->mutex);
+ schedule();
+ /* occopy mutex again */
+ if (mutex_lock_interruptible(&prng_data->mutex)) {
+ if (ret == 0)
+ ret = -ERESTARTSYS;
+ return ret;
+ }
+ }
+ if (prng_data->rest) {
+ /* push left over random bytes from the previous read */
+ p = prng_data->buf + prng_chunk_size - prng_data->rest;
+ n = (nbytes < prng_data->rest) ?
+ nbytes : prng_data->rest;
+ prng_data->rest -= n;
+ } else {
+ /* generate one chunk of random bytes into read buf */
+ p = prng_data->buf;
+ n = prng_sha512_generate(p, prng_chunk_size);
+ if (n < 0) {
+ ret = n;
+ break;
+ }
+ if (nbytes < prng_chunk_size) {
+ n = nbytes;
+ prng_data->rest = prng_chunk_size - n;
+ } else {
+ n = prng_chunk_size;
+ prng_data->rest = 0;
+ }
+ }
+ if (copy_to_user(ubuf, p, n)) {
+ ret = -EFAULT;
+ break;
+ }
+ ubuf += n;
+ nbytes -= n;
+ ret += n;
+ }
+
+ /* unlock prng_data struct */
+ mutex_unlock(&prng_data->mutex);
+
+ return ret;
+}
+
+
+/*** sysfs stuff ***/
+
+static const struct file_operations prng_sha512_fops = {
+ .owner = THIS_MODULE,
+ .open = &prng_open,
+ .release = NULL,
+ .read = &prng_sha512_read,
+ .llseek = noop_llseek,
+};
+static const struct file_operations prng_tdes_fops = {
+ .owner = THIS_MODULE,
+ .open = &prng_open,
+ .release = NULL,
+ .read = &prng_tdes_read,
+ .llseek = noop_llseek,
+};
+
+static struct miscdevice prng_sha512_dev = {
+ .name = "prandom",
+ .minor = MISC_DYNAMIC_MINOR,
+ .mode = 0644,
+ .fops = &prng_sha512_fops,
+};
+static struct miscdevice prng_tdes_dev = {
+ .name = "prandom",
+ .minor = MISC_DYNAMIC_MINOR,
+ .mode = 0644,
+ .fops = &prng_tdes_fops,
+};
+
+
+/* chunksize attribute (ro) */
+static ssize_t prng_chunksize_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%u\n", prng_chunk_size);
+}
+static DEVICE_ATTR(chunksize, 0444, prng_chunksize_show, NULL);
+
+/* counter attribute (ro) */
+static ssize_t prng_counter_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ u64 counter;
+
+ if (mutex_lock_interruptible(&prng_data->mutex))
+ return -ERESTARTSYS;
+ if (prng_mode == PRNG_MODE_SHA512)
+ counter = prng_data->prnows.stream_bytes;
+ else
+ counter = prng_data->prngws.byte_counter;
+ mutex_unlock(&prng_data->mutex);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", counter);
+}
+static DEVICE_ATTR(byte_counter, 0444, prng_counter_show, NULL);
+
+/* errorflag attribute (ro) */
+static ssize_t prng_errorflag_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%d\n", prng_errorflag);
+}
+static DEVICE_ATTR(errorflag, 0444, prng_errorflag_show, NULL);
+
+/* mode attribute (ro) */
+static ssize_t prng_mode_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ if (prng_mode == PRNG_MODE_TDES)
+ return snprintf(buf, PAGE_SIZE, "TDES\n");
+ else
+ return snprintf(buf, PAGE_SIZE, "SHA512\n");
+}
+static DEVICE_ATTR(mode, 0444, prng_mode_show, NULL);
+
+/* reseed attribute (w) */
+static ssize_t prng_reseed_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ if (mutex_lock_interruptible(&prng_data->mutex))
+ return -ERESTARTSYS;
+ prng_sha512_reseed();
+ mutex_unlock(&prng_data->mutex);
+
+ return count;
+}
+static DEVICE_ATTR(reseed, 0200, NULL, prng_reseed_store);
+
+/* reseed limit attribute (rw) */
+static ssize_t prng_reseed_limit_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%u\n", prng_reseed_limit);
+}
+static ssize_t prng_reseed_limit_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned limit;
+
+ if (sscanf(buf, "%u\n", &limit) != 1)
+ return -EINVAL;
+
+ if (prng_mode == PRNG_MODE_SHA512) {
+ if (limit < PRNG_RESEED_LIMIT_SHA512_LOWER)
+ return -EINVAL;
+ } else {
+ if (limit < PRNG_RESEED_LIMIT_TDES_LOWER)
+ return -EINVAL;
+ }
+
+ prng_reseed_limit = limit;
+
+ return count;
+}
+static DEVICE_ATTR(reseed_limit, 0644,
+ prng_reseed_limit_show, prng_reseed_limit_store);
+
+/* strength attribute (ro) */
+static ssize_t prng_strength_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "256\n");
+}
+static DEVICE_ATTR(strength, 0444, prng_strength_show, NULL);
+
+static struct attribute *prng_sha512_dev_attrs[] = {
+ &dev_attr_errorflag.attr,
+ &dev_attr_chunksize.attr,
+ &dev_attr_byte_counter.attr,
+ &dev_attr_mode.attr,
+ &dev_attr_reseed.attr,
+ &dev_attr_reseed_limit.attr,
+ &dev_attr_strength.attr,
+ NULL
+};
+static struct attribute *prng_tdes_dev_attrs[] = {
+ &dev_attr_chunksize.attr,
+ &dev_attr_byte_counter.attr,
+ &dev_attr_mode.attr,
+ NULL
+};
+
+static struct attribute_group prng_sha512_dev_attr_group = {
+ .attrs = prng_sha512_dev_attrs
+};
+static struct attribute_group prng_tdes_dev_attr_group = {
+ .attrs = prng_tdes_dev_attrs
+};
+
+
+/*** module init and exit ***/
+
+static int __init prng_init(void)
+{
+ int ret;
+
+ /* check if the CPU has a PRNG */
+ if (!cpacf_query_func(CPACF_KMC, CPACF_KMC_PRNG))
+ return -EOPNOTSUPP;
+
+ /* choose prng mode */
+ if (prng_mode != PRNG_MODE_TDES) {
+ /* check for MSA5 support for PRNO operations */
+ if (!cpacf_query_func(CPACF_PRNO, CPACF_PRNO_SHA512_DRNG_GEN)) {
+ if (prng_mode == PRNG_MODE_SHA512) {
+ pr_err("The prng module cannot "
+ "start in SHA-512 mode\n");
+ return -EOPNOTSUPP;
+ }
+ prng_mode = PRNG_MODE_TDES;
+ } else
+ prng_mode = PRNG_MODE_SHA512;
+ }
+
+ if (prng_mode == PRNG_MODE_SHA512) {
+
+ /* SHA512 mode */
+
+ if (prng_chunk_size < PRNG_CHUNKSIZE_SHA512_MIN
+ || prng_chunk_size > PRNG_CHUNKSIZE_SHA512_MAX)
+ return -EINVAL;
+ prng_chunk_size = (prng_chunk_size + 0x3f) & ~0x3f;
+
+ if (prng_reseed_limit == 0)
+ prng_reseed_limit = PRNG_RESEED_LIMIT_SHA512;
+ else if (prng_reseed_limit < PRNG_RESEED_LIMIT_SHA512_LOWER)
+ return -EINVAL;
+
+ ret = prng_sha512_instantiate();
+ if (ret)
+ goto out;
+
+ ret = misc_register(&prng_sha512_dev);
+ if (ret) {
+ prng_sha512_deinstantiate();
+ goto out;
+ }
+ ret = sysfs_create_group(&prng_sha512_dev.this_device->kobj,
+ &prng_sha512_dev_attr_group);
+ if (ret) {
+ misc_deregister(&prng_sha512_dev);
+ prng_sha512_deinstantiate();
+ goto out;
+ }
+
+ } else {
+
+ /* TDES mode */
+
+ if (prng_chunk_size < PRNG_CHUNKSIZE_TDES_MIN
+ || prng_chunk_size > PRNG_CHUNKSIZE_TDES_MAX)
+ return -EINVAL;
+ prng_chunk_size = (prng_chunk_size + 0x07) & ~0x07;
+
+ if (prng_reseed_limit == 0)
+ prng_reseed_limit = PRNG_RESEED_LIMIT_TDES;
+ else if (prng_reseed_limit < PRNG_RESEED_LIMIT_TDES_LOWER)
+ return -EINVAL;
+
+ ret = prng_tdes_instantiate();
+ if (ret)
+ goto out;
+
+ ret = misc_register(&prng_tdes_dev);
+ if (ret) {
+ prng_tdes_deinstantiate();
+ goto out;
+ }
+ ret = sysfs_create_group(&prng_tdes_dev.this_device->kobj,
+ &prng_tdes_dev_attr_group);
+ if (ret) {
+ misc_deregister(&prng_tdes_dev);
+ prng_tdes_deinstantiate();
+ goto out;
+ }
+
+ }
+
+out:
+ return ret;
+}
+
+
+static void __exit prng_exit(void)
+{
+ if (prng_mode == PRNG_MODE_SHA512) {
+ sysfs_remove_group(&prng_sha512_dev.this_device->kobj,
+ &prng_sha512_dev_attr_group);
+ misc_deregister(&prng_sha512_dev);
+ prng_sha512_deinstantiate();
+ } else {
+ sysfs_remove_group(&prng_tdes_dev.this_device->kobj,
+ &prng_tdes_dev_attr_group);
+ misc_deregister(&prng_tdes_dev);
+ prng_tdes_deinstantiate();
+ }
+}
+
+module_cpu_feature_match(MSA, prng_init);
+module_exit(prng_exit);
diff --git a/arch/s390/crypto/sha.h b/arch/s390/crypto/sha.h
new file mode 100644
index 000000000..d6f8258b4
--- /dev/null
+++ b/arch/s390/crypto/sha.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Cryptographic API.
+ *
+ * s390 generic implementation of the SHA Secure Hash Algorithms.
+ *
+ * Copyright IBM Corp. 2007
+ * Author(s): Jan Glauber (jang@de.ibm.com)
+ */
+#ifndef _CRYPTO_ARCH_S390_SHA_H
+#define _CRYPTO_ARCH_S390_SHA_H
+
+#include <linux/crypto.h>
+#include <crypto/sha.h>
+
+/* must be big enough for the largest SHA variant */
+#define SHA_MAX_STATE_SIZE (SHA512_DIGEST_SIZE / 4)
+#define SHA_MAX_BLOCK_SIZE SHA512_BLOCK_SIZE
+
+struct s390_sha_ctx {
+ u64 count; /* message length in bytes */
+ u32 state[SHA_MAX_STATE_SIZE];
+ u8 buf[2 * SHA_MAX_BLOCK_SIZE];
+ int func; /* KIMD function to use */
+};
+
+struct shash_desc;
+
+int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len);
+int s390_sha_final(struct shash_desc *desc, u8 *out);
+
+#endif
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
new file mode 100644
index 000000000..009572e82
--- /dev/null
+++ b/arch/s390/crypto/sha1_s390.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Cryptographic API.
+ *
+ * s390 implementation of the SHA1 Secure Hash Algorithm.
+ *
+ * Derived from cryptoapi implementation, adapted for in-place
+ * scatterlist interface. Originally based on the public domain
+ * implementation written by Steve Reid.
+ *
+ * s390 Version:
+ * Copyright IBM Corp. 2003, 2007
+ * Author(s): Thomas Spatzier
+ * Jan Glauber (jan.glauber@de.ibm.com)
+ *
+ * Derived from "crypto/sha1_generic.c"
+ * Copyright (c) Alan Smithee.
+ * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
+ * Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
+ */
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+#include <crypto/sha.h>
+#include <asm/cpacf.h>
+
+#include "sha.h"
+
+static int sha1_init(struct shash_desc *desc)
+{
+ struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+
+ sctx->state[0] = SHA1_H0;
+ sctx->state[1] = SHA1_H1;
+ sctx->state[2] = SHA1_H2;
+ sctx->state[3] = SHA1_H3;
+ sctx->state[4] = SHA1_H4;
+ sctx->count = 0;
+ sctx->func = CPACF_KIMD_SHA_1;
+
+ return 0;
+}
+
+static int sha1_export(struct shash_desc *desc, void *out)
+{
+ struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+ struct sha1_state *octx = out;
+
+ octx->count = sctx->count;
+ memcpy(octx->state, sctx->state, sizeof(octx->state));
+ memcpy(octx->buffer, sctx->buf, sizeof(octx->buffer));
+ return 0;
+}
+
+static int sha1_import(struct shash_desc *desc, const void *in)
+{
+ struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+ const struct sha1_state *ictx = in;
+
+ sctx->count = ictx->count;
+ memcpy(sctx->state, ictx->state, sizeof(ictx->state));
+ memcpy(sctx->buf, ictx->buffer, sizeof(ictx->buffer));
+ sctx->func = CPACF_KIMD_SHA_1;
+ return 0;
+}
+
+static struct shash_alg alg = {
+ .digestsize = SHA1_DIGEST_SIZE,
+ .init = sha1_init,
+ .update = s390_sha_update,
+ .final = s390_sha_final,
+ .export = sha1_export,
+ .import = sha1_import,
+ .descsize = sizeof(struct s390_sha_ctx),
+ .statesize = sizeof(struct sha1_state),
+ .base = {
+ .cra_name = "sha1",
+ .cra_driver_name= "sha1-s390",
+ .cra_priority = 300,
+ .cra_blocksize = SHA1_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+};
+
+static int __init sha1_s390_init(void)
+{
+ if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_1))
+ return -EOPNOTSUPP;
+ return crypto_register_shash(&alg);
+}
+
+static void __exit sha1_s390_fini(void)
+{
+ crypto_unregister_shash(&alg);
+}
+
+module_cpu_feature_match(MSA, sha1_s390_init);
+module_exit(sha1_s390_fini);
+
+MODULE_ALIAS_CRYPTO("sha1");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm");
diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c
new file mode 100644
index 000000000..62833a1d8
--- /dev/null
+++ b/arch/s390/crypto/sha256_s390.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Cryptographic API.
+ *
+ * s390 implementation of the SHA256 and SHA224 Secure Hash Algorithm.
+ *
+ * s390 Version:
+ * Copyright IBM Corp. 2005, 2011
+ * Author(s): Jan Glauber (jang@de.ibm.com)
+ */
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+#include <crypto/sha.h>
+#include <asm/cpacf.h>
+
+#include "sha.h"
+
+static int sha256_init(struct shash_desc *desc)
+{
+ struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+
+ sctx->state[0] = SHA256_H0;
+ sctx->state[1] = SHA256_H1;
+ sctx->state[2] = SHA256_H2;
+ sctx->state[3] = SHA256_H3;
+ sctx->state[4] = SHA256_H4;
+ sctx->state[5] = SHA256_H5;
+ sctx->state[6] = SHA256_H6;
+ sctx->state[7] = SHA256_H7;
+ sctx->count = 0;
+ sctx->func = CPACF_KIMD_SHA_256;
+
+ return 0;
+}
+
+static int sha256_export(struct shash_desc *desc, void *out)
+{
+ struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+ struct sha256_state *octx = out;
+
+ octx->count = sctx->count;
+ memcpy(octx->state, sctx->state, sizeof(octx->state));
+ memcpy(octx->buf, sctx->buf, sizeof(octx->buf));
+ return 0;
+}
+
+static int sha256_import(struct shash_desc *desc, const void *in)
+{
+ struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+ const struct sha256_state *ictx = in;
+
+ sctx->count = ictx->count;
+ memcpy(sctx->state, ictx->state, sizeof(ictx->state));
+ memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf));
+ sctx->func = CPACF_KIMD_SHA_256;
+ return 0;
+}
+
+static struct shash_alg sha256_alg = {
+ .digestsize = SHA256_DIGEST_SIZE,
+ .init = sha256_init,
+ .update = s390_sha_update,
+ .final = s390_sha_final,
+ .export = sha256_export,
+ .import = sha256_import,
+ .descsize = sizeof(struct s390_sha_ctx),
+ .statesize = sizeof(struct sha256_state),
+ .base = {
+ .cra_name = "sha256",
+ .cra_driver_name= "sha256-s390",
+ .cra_priority = 300,
+ .cra_blocksize = SHA256_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+};
+
+static int sha224_init(struct shash_desc *desc)
+{
+ struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+
+ sctx->state[0] = SHA224_H0;
+ sctx->state[1] = SHA224_H1;
+ sctx->state[2] = SHA224_H2;
+ sctx->state[3] = SHA224_H3;
+ sctx->state[4] = SHA224_H4;
+ sctx->state[5] = SHA224_H5;
+ sctx->state[6] = SHA224_H6;
+ sctx->state[7] = SHA224_H7;
+ sctx->count = 0;
+ sctx->func = CPACF_KIMD_SHA_256;
+
+ return 0;
+}
+
+static struct shash_alg sha224_alg = {
+ .digestsize = SHA224_DIGEST_SIZE,
+ .init = sha224_init,
+ .update = s390_sha_update,
+ .final = s390_sha_final,
+ .export = sha256_export,
+ .import = sha256_import,
+ .descsize = sizeof(struct s390_sha_ctx),
+ .statesize = sizeof(struct sha256_state),
+ .base = {
+ .cra_name = "sha224",
+ .cra_driver_name= "sha224-s390",
+ .cra_priority = 300,
+ .cra_blocksize = SHA224_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+};
+
+static int __init sha256_s390_init(void)
+{
+ int ret;
+
+ if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_256))
+ return -EOPNOTSUPP;
+ ret = crypto_register_shash(&sha256_alg);
+ if (ret < 0)
+ goto out;
+ ret = crypto_register_shash(&sha224_alg);
+ if (ret < 0)
+ crypto_unregister_shash(&sha256_alg);
+out:
+ return ret;
+}
+
+static void __exit sha256_s390_fini(void)
+{
+ crypto_unregister_shash(&sha224_alg);
+ crypto_unregister_shash(&sha256_alg);
+}
+
+module_cpu_feature_match(MSA, sha256_s390_init);
+module_exit(sha256_s390_fini);
+
+MODULE_ALIAS_CRYPTO("sha256");
+MODULE_ALIAS_CRYPTO("sha224");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA256 and SHA224 Secure Hash Algorithm");
diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c
new file mode 100644
index 000000000..be589c340
--- /dev/null
+++ b/arch/s390/crypto/sha512_s390.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Cryptographic API.
+ *
+ * s390 implementation of the SHA512 and SHA38 Secure Hash Algorithm.
+ *
+ * Copyright IBM Corp. 2007
+ * Author(s): Jan Glauber (jang@de.ibm.com)
+ */
+#include <crypto/internal/hash.h>
+#include <crypto/sha.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+#include <asm/cpacf.h>
+
+#include "sha.h"
+
+static int sha512_init(struct shash_desc *desc)
+{
+ struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
+
+ *(__u64 *)&ctx->state[0] = 0x6a09e667f3bcc908ULL;
+ *(__u64 *)&ctx->state[2] = 0xbb67ae8584caa73bULL;
+ *(__u64 *)&ctx->state[4] = 0x3c6ef372fe94f82bULL;
+ *(__u64 *)&ctx->state[6] = 0xa54ff53a5f1d36f1ULL;
+ *(__u64 *)&ctx->state[8] = 0x510e527fade682d1ULL;
+ *(__u64 *)&ctx->state[10] = 0x9b05688c2b3e6c1fULL;
+ *(__u64 *)&ctx->state[12] = 0x1f83d9abfb41bd6bULL;
+ *(__u64 *)&ctx->state[14] = 0x5be0cd19137e2179ULL;
+ ctx->count = 0;
+ ctx->func = CPACF_KIMD_SHA_512;
+
+ return 0;
+}
+
+static int sha512_export(struct shash_desc *desc, void *out)
+{
+ struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+ struct sha512_state *octx = out;
+
+ octx->count[0] = sctx->count;
+ octx->count[1] = 0;
+ memcpy(octx->state, sctx->state, sizeof(octx->state));
+ memcpy(octx->buf, sctx->buf, sizeof(octx->buf));
+ return 0;
+}
+
+static int sha512_import(struct shash_desc *desc, const void *in)
+{
+ struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+ const struct sha512_state *ictx = in;
+
+ if (unlikely(ictx->count[1]))
+ return -ERANGE;
+ sctx->count = ictx->count[0];
+
+ memcpy(sctx->state, ictx->state, sizeof(ictx->state));
+ memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf));
+ sctx->func = CPACF_KIMD_SHA_512;
+ return 0;
+}
+
+static struct shash_alg sha512_alg = {
+ .digestsize = SHA512_DIGEST_SIZE,
+ .init = sha512_init,
+ .update = s390_sha_update,
+ .final = s390_sha_final,
+ .export = sha512_export,
+ .import = sha512_import,
+ .descsize = sizeof(struct s390_sha_ctx),
+ .statesize = sizeof(struct sha512_state),
+ .base = {
+ .cra_name = "sha512",
+ .cra_driver_name= "sha512-s390",
+ .cra_priority = 300,
+ .cra_blocksize = SHA512_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+};
+
+MODULE_ALIAS_CRYPTO("sha512");
+
+static int sha384_init(struct shash_desc *desc)
+{
+ struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
+
+ *(__u64 *)&ctx->state[0] = 0xcbbb9d5dc1059ed8ULL;
+ *(__u64 *)&ctx->state[2] = 0x629a292a367cd507ULL;
+ *(__u64 *)&ctx->state[4] = 0x9159015a3070dd17ULL;
+ *(__u64 *)&ctx->state[6] = 0x152fecd8f70e5939ULL;
+ *(__u64 *)&ctx->state[8] = 0x67332667ffc00b31ULL;
+ *(__u64 *)&ctx->state[10] = 0x8eb44a8768581511ULL;
+ *(__u64 *)&ctx->state[12] = 0xdb0c2e0d64f98fa7ULL;
+ *(__u64 *)&ctx->state[14] = 0x47b5481dbefa4fa4ULL;
+ ctx->count = 0;
+ ctx->func = CPACF_KIMD_SHA_512;
+
+ return 0;
+}
+
+static struct shash_alg sha384_alg = {
+ .digestsize = SHA384_DIGEST_SIZE,
+ .init = sha384_init,
+ .update = s390_sha_update,
+ .final = s390_sha_final,
+ .export = sha512_export,
+ .import = sha512_import,
+ .descsize = sizeof(struct s390_sha_ctx),
+ .statesize = sizeof(struct sha512_state),
+ .base = {
+ .cra_name = "sha384",
+ .cra_driver_name= "sha384-s390",
+ .cra_priority = 300,
+ .cra_blocksize = SHA384_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct s390_sha_ctx),
+ .cra_module = THIS_MODULE,
+ }
+};
+
+MODULE_ALIAS_CRYPTO("sha384");
+
+static int __init init(void)
+{
+ int ret;
+
+ if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_512))
+ return -EOPNOTSUPP;
+ if ((ret = crypto_register_shash(&sha512_alg)) < 0)
+ goto out;
+ if ((ret = crypto_register_shash(&sha384_alg)) < 0)
+ crypto_unregister_shash(&sha512_alg);
+out:
+ return ret;
+}
+
+static void __exit fini(void)
+{
+ crypto_unregister_shash(&sha512_alg);
+ crypto_unregister_shash(&sha384_alg);
+}
+
+module_cpu_feature_match(MSA, init);
+module_exit(fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA512 and SHA-384 Secure Hash Algorithm");
diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c
new file mode 100644
index 000000000..cf0718d12
--- /dev/null
+++ b/arch/s390/crypto/sha_common.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Cryptographic API.
+ *
+ * s390 generic implementation of the SHA Secure Hash Algorithms.
+ *
+ * Copyright IBM Corp. 2007
+ * Author(s): Jan Glauber (jang@de.ibm.com)
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/module.h>
+#include <asm/cpacf.h>
+#include "sha.h"
+
+int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len)
+{
+ struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
+ unsigned int bsize = crypto_shash_blocksize(desc->tfm);
+ unsigned int index, n;
+
+ /* how much is already in the buffer? */
+ index = ctx->count & (bsize - 1);
+ ctx->count += len;
+
+ if ((index + len) < bsize)
+ goto store;
+
+ /* process one stored block */
+ if (index) {
+ memcpy(ctx->buf + index, data, bsize - index);
+ cpacf_kimd(ctx->func, ctx->state, ctx->buf, bsize);
+ data += bsize - index;
+ len -= bsize - index;
+ index = 0;
+ }
+
+ /* process as many blocks as possible */
+ if (len >= bsize) {
+ n = len & ~(bsize - 1);
+ cpacf_kimd(ctx->func, ctx->state, data, n);
+ data += n;
+ len -= n;
+ }
+store:
+ if (len)
+ memcpy(ctx->buf + index , data, len);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(s390_sha_update);
+
+int s390_sha_final(struct shash_desc *desc, u8 *out)
+{
+ struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
+ unsigned int bsize = crypto_shash_blocksize(desc->tfm);
+ u64 bits;
+ unsigned int index, end, plen;
+
+ /* SHA-512 uses 128 bit padding length */
+ plen = (bsize > SHA256_BLOCK_SIZE) ? 16 : 8;
+
+ /* must perform manual padding */
+ index = ctx->count & (bsize - 1);
+ end = (index < bsize - plen) ? bsize : (2 * bsize);
+
+ /* start pad with 1 */
+ ctx->buf[index] = 0x80;
+ index++;
+
+ /* pad with zeros */
+ memset(ctx->buf + index, 0x00, end - index - 8);
+
+ /*
+ * Append message length. Well, SHA-512 wants a 128 bit length value,
+ * nevertheless we use u64, should be enough for now...
+ */
+ bits = ctx->count * 8;
+ memcpy(ctx->buf + end - 8, &bits, sizeof(bits));
+ cpacf_kimd(ctx->func, ctx->state, ctx->buf, end);
+
+ /* copy digest to out */
+ memcpy(out, ctx->state, crypto_shash_digestsize(desc->tfm));
+ /* wipe context */
+ memset(ctx, 0, sizeof *ctx);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(s390_sha_final);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("s390 SHA cipher common functions");
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
new file mode 100644
index 000000000..5134c71a4
--- /dev/null
+++ b/arch/s390/defconfig
@@ -0,0 +1,249 @@
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_USELIB=y
+CONFIG_AUDIT=y
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+# CONFIG_CPU_ISOLATION is not set
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_CGROUPS=y
+CONFIG_MEMCG=y
+CONFIG_MEMCG_SWAP=y
+CONFIG_BLK_CGROUP=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_CGROUP_PIDS=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_HUGETLB=y
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_PERF=y
+CONFIG_NAMESPACES=y
+CONFIG_USER_NS=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+# CONFIG_SYSFS_SYSCALL is not set
+CONFIG_CHECKPOINT_RESTORE=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_USERFAULTFD=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=y
+CONFIG_KPROBES=y
+CONFIG_JUMP_LABEL=y
+CONFIG_STATIC_KEYS_SELFTEST=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_BLK_DEV_INTEGRITY=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_IBM_PARTITION=y
+CONFIG_DEFAULT_DEADLINE=y
+CONFIG_LIVEPATCH=y
+CONFIG_NR_CPUS=256
+CONFIG_NUMA=y
+CONFIG_HZ_100=y
+CONFIG_KEXEC_FILE=y
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTREMOVE=y
+CONFIG_KSM=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_CLEANCACHE=y
+CONFIG_FRONTSWAP=y
+CONFIG_ZSWAP=y
+CONFIG_ZBUD=m
+CONFIG_ZSMALLOC=m
+CONFIG_ZSMALLOC_STAT=y
+CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_CRASH_DUMP=y
+CONFIG_BINFMT_MISC=m
+CONFIG_HIBERNATION=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_L2TP=m
+CONFIG_L2TP_DEBUGFS=m
+CONFIG_VLAN_8021Q=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_DSMARK=m
+CONFIG_NET_CLS_TCINDEX=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_CLS_U32_MARK=y
+CONFIG_NET_CLS_RSVP=m
+CONFIG_NET_CLS_RSVP6=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_ACT_POLICE=y
+CONFIG_BPF_JIT=y
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_DEVTMPFS=y
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_VIRTIO_BLK=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_BLK_DEV_SR_VENDOR=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_FC_ATTRS=y
+CONFIG_ZFCP=y
+CONFIG_SCSI_VIRTIO=y
+CONFIG_MD=y
+CONFIG_MD_LINEAR=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_BLK_DEV_DM=y
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_LOG_USERSPACE=m
+CONFIG_DM_RAID=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_DM_MULTIPATH_QL=m
+CONFIG_DM_MULTIPATH_ST=m
+CONFIG_DM_UEVENT=y
+CONFIG_DM_VERITY=m
+CONFIG_DM_SWITCH=m
+CONFIG_NETDEVICES=y
+CONFIG_BONDING=m
+CONFIG_DUMMY=m
+CONFIG_EQUALIZER=m
+CONFIG_TUN=m
+CONFIG_VIRTIO_NET=y
+# CONFIG_NET_VENDOR_ALACRITECH is not set
+# CONFIG_NET_VENDOR_CORTINA is not set
+# CONFIG_NET_VENDOR_SOLARFLARE is not set
+# CONFIG_NET_VENDOR_SOCIONEXT is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+CONFIG_DEVKMEM=y
+CONFIG_RAW_DRIVER=m
+CONFIG_VIRTIO_BALLOON=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_XFS_FS=y
+CONFIG_XFS_QUOTA=y
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_XFS_RT=y
+CONFIG_BTRFS_FS=y
+CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_FANOTIFY=y
+CONFIG_FUSE_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_HUGETLBFS=y
+# CONFIG_NETWORK_FILESYSTEMS is not set
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_GDB_SCRIPTS=y
+CONFIG_UNUSED_SYMBOLS=y
+CONFIG_DEBUG_SECTION_MISMATCH=y
+CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_PAGEALLOC=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_LOCK_STAT=y
+CONFIG_DEBUG_LOCKDEP=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_DEBUG_LIST=y
+CONFIG_DEBUG_SG=y
+CONFIG_DEBUG_NOTIFIERS=y
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
+CONFIG_LATENCYTOP=y
+CONFIG_SCHED_TRACER=y
+CONFIG_FTRACE_SYSCALLS=y
+CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y
+CONFIG_STACK_TRACER=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_FUNCTION_PROFILER=y
+# CONFIG_RUNTIME_TESTING_MENU is not set
+CONFIG_S390_PTDUMP=y
+CONFIG_CRYPTO_CRYPTD=m
+CONFIG_CRYPTO_AUTHENC=m
+CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_CCM=m
+CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CBC=y
+CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_CTS=m
+CONFIG_CRYPTO_LRW=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
+CONFIG_CRYPTO_CMAC=m
+CONFIG_CRYPTO_XCBC=m
+CONFIG_CRYPTO_VMAC=m
+CONFIG_CRYPTO_CRC32=m
+CONFIG_CRYPTO_MD4=m
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_RMD128=m
+CONFIG_CRYPTO_RMD160=m
+CONFIG_CRYPTO_RMD256=m
+CONFIG_CRYPTO_RMD320=m
+CONFIG_CRYPTO_SHA256=y
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_TGR192=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_ANUBIS=m
+CONFIG_CRYPTO_ARC4=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_CAMELLIA=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_FCRYPT=m
+CONFIG_CRYPTO_KHAZAD=m
+CONFIG_CRYPTO_SALSA20=m
+CONFIG_CRYPTO_SEED=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_DEFLATE=m
+CONFIG_CRYPTO_LZ4=m
+CONFIG_CRYPTO_LZ4HC=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
+CONFIG_CRYPTO_USER_API_HASH=m
+CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
+CONFIG_ZCRYPT=m
+CONFIG_PKEY=m
+CONFIG_CRYPTO_PAES_S390=m
+CONFIG_CRYPTO_SHA1_S390=m
+CONFIG_CRYPTO_SHA256_S390=m
+CONFIG_CRYPTO_SHA512_S390=m
+CONFIG_CRYPTO_DES_S390=m
+CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_CRC32_S390=y
+CONFIG_CRC7=m
+# CONFIG_XZ_DEC_X86 is not set
+# CONFIG_XZ_DEC_POWERPC is not set
+# CONFIG_XZ_DEC_IA64 is not set
+# CONFIG_XZ_DEC_ARM is not set
+# CONFIG_XZ_DEC_ARMTHUMB is not set
+# CONFIG_XZ_DEC_SPARC is not set
+CONFIG_CMM=m
diff --git a/arch/s390/hypfs/Makefile b/arch/s390/hypfs/Makefile
new file mode 100644
index 000000000..06f601509
--- /dev/null
+++ b/arch/s390/hypfs/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the linux hypfs filesystem routines.
+#
+
+obj-$(CONFIG_S390_HYPFS_FS) += s390_hypfs.o
+
+s390_hypfs-objs := inode.o hypfs_diag.o hypfs_vm.o hypfs_dbfs.o hypfs_sprp.o
+s390_hypfs-objs += hypfs_diag0c.o
diff --git a/arch/s390/hypfs/hypfs.h b/arch/s390/hypfs/hypfs.h
new file mode 100644
index 000000000..52348e0a8
--- /dev/null
+++ b/arch/s390/hypfs/hypfs.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Hypervisor filesystem for Linux on s390.
+ *
+ * Copyright IBM Corp. 2006
+ * Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#ifndef _HYPFS_H_
+#define _HYPFS_H_
+
+#include <linux/fs.h>
+#include <linux/types.h>
+#include <linux/debugfs.h>
+#include <linux/workqueue.h>
+#include <linux/kref.h>
+#include <asm/hypfs.h>
+
+#define REG_FILE_MODE 0440
+#define UPDATE_FILE_MODE 0220
+#define DIR_MODE 0550
+
+extern struct dentry *hypfs_mkdir(struct dentry *parent, const char *name);
+
+extern struct dentry *hypfs_create_u64(struct dentry *dir, const char *name,
+ __u64 value);
+
+extern struct dentry *hypfs_create_str(struct dentry *dir, const char *name,
+ char *string);
+
+/* LPAR Hypervisor */
+extern int hypfs_diag_init(void);
+extern void hypfs_diag_exit(void);
+extern int hypfs_diag_create_files(struct dentry *root);
+
+/* VM Hypervisor */
+extern int hypfs_vm_init(void);
+extern void hypfs_vm_exit(void);
+extern int hypfs_vm_create_files(struct dentry *root);
+
+/* VM diagnose 0c */
+int hypfs_diag0c_init(void);
+void hypfs_diag0c_exit(void);
+
+/* Set Partition-Resource Parameter */
+int hypfs_sprp_init(void);
+void hypfs_sprp_exit(void);
+
+/* debugfs interface */
+struct hypfs_dbfs_file;
+
+struct hypfs_dbfs_data {
+ void *buf;
+ void *buf_free_ptr;
+ size_t size;
+ struct hypfs_dbfs_file *dbfs_file;
+};
+
+struct hypfs_dbfs_file {
+ const char *name;
+ int (*data_create)(void **data, void **data_free_ptr,
+ size_t *size);
+ void (*data_free)(const void *buf_free_ptr);
+ long (*unlocked_ioctl) (struct file *, unsigned int,
+ unsigned long);
+
+ /* Private data for hypfs_dbfs.c */
+ struct mutex lock;
+ struct dentry *dentry;
+};
+
+extern int hypfs_dbfs_init(void);
+extern void hypfs_dbfs_exit(void);
+extern int hypfs_dbfs_create_file(struct hypfs_dbfs_file *df);
+extern void hypfs_dbfs_remove_file(struct hypfs_dbfs_file *df);
+
+#endif /* _HYPFS_H_ */
diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c
new file mode 100644
index 000000000..b9bdf5c19
--- /dev/null
+++ b/arch/s390/hypfs/hypfs_dbfs.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hypervisor filesystem for Linux on s390 - debugfs interface
+ *
+ * Copyright IBM Corp. 2010
+ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#include <linux/slab.h>
+#include "hypfs.h"
+
+static struct dentry *dbfs_dir;
+
+static struct hypfs_dbfs_data *hypfs_dbfs_data_alloc(struct hypfs_dbfs_file *f)
+{
+ struct hypfs_dbfs_data *data;
+
+ data = kmalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return NULL;
+ data->dbfs_file = f;
+ return data;
+}
+
+static void hypfs_dbfs_data_free(struct hypfs_dbfs_data *data)
+{
+ data->dbfs_file->data_free(data->buf_free_ptr);
+ kfree(data);
+}
+
+static ssize_t dbfs_read(struct file *file, char __user *buf,
+ size_t size, loff_t *ppos)
+{
+ struct hypfs_dbfs_data *data;
+ struct hypfs_dbfs_file *df;
+ ssize_t rc;
+
+ if (*ppos != 0)
+ return 0;
+
+ df = file_inode(file)->i_private;
+ mutex_lock(&df->lock);
+ data = hypfs_dbfs_data_alloc(df);
+ if (!data) {
+ mutex_unlock(&df->lock);
+ return -ENOMEM;
+ }
+ rc = df->data_create(&data->buf, &data->buf_free_ptr, &data->size);
+ if (rc) {
+ mutex_unlock(&df->lock);
+ kfree(data);
+ return rc;
+ }
+ mutex_unlock(&df->lock);
+
+ rc = simple_read_from_buffer(buf, size, ppos, data->buf, data->size);
+ hypfs_dbfs_data_free(data);
+ return rc;
+}
+
+static long dbfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct hypfs_dbfs_file *df = file_inode(file)->i_private;
+ long rc;
+
+ mutex_lock(&df->lock);
+ if (df->unlocked_ioctl)
+ rc = df->unlocked_ioctl(file, cmd, arg);
+ else
+ rc = -ENOTTY;
+ mutex_unlock(&df->lock);
+ return rc;
+}
+
+static const struct file_operations dbfs_ops = {
+ .read = dbfs_read,
+ .llseek = no_llseek,
+ .unlocked_ioctl = dbfs_ioctl,
+};
+
+int hypfs_dbfs_create_file(struct hypfs_dbfs_file *df)
+{
+ df->dentry = debugfs_create_file(df->name, 0400, dbfs_dir, df,
+ &dbfs_ops);
+ if (IS_ERR(df->dentry))
+ return PTR_ERR(df->dentry);
+ mutex_init(&df->lock);
+ return 0;
+}
+
+void hypfs_dbfs_remove_file(struct hypfs_dbfs_file *df)
+{
+ debugfs_remove(df->dentry);
+}
+
+int hypfs_dbfs_init(void)
+{
+ dbfs_dir = debugfs_create_dir("s390_hypfs", NULL);
+ return PTR_ERR_OR_ZERO(dbfs_dir);
+}
+
+void hypfs_dbfs_exit(void)
+{
+ debugfs_remove(dbfs_dir);
+}
diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c
new file mode 100644
index 000000000..3452e18bb
--- /dev/null
+++ b/arch/s390/hypfs/hypfs_diag.c
@@ -0,0 +1,617 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hypervisor filesystem for Linux on s390. Diag 204 and 224
+ * implementation.
+ *
+ * Copyright IBM Corp. 2006, 2008
+ * Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#define KMSG_COMPONENT "hypfs"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <asm/diag.h>
+#include <asm/ebcdic.h>
+#include "hypfs.h"
+
+#define TMP_SIZE 64 /* size of temporary buffers */
+
+#define DBFS_D204_HDR_VERSION 0
+
+static char *diag224_cpu_names; /* diag 224 name table */
+static enum diag204_sc diag204_store_sc; /* used subcode for store */
+static enum diag204_format diag204_info_type; /* used diag 204 data format */
+
+static void *diag204_buf; /* 4K aligned buffer for diag204 data */
+static void *diag204_buf_vmalloc; /* vmalloc pointer for diag204 data */
+static int diag204_buf_pages; /* number of pages for diag204 data */
+
+static struct dentry *dbfs_d204_file;
+
+/*
+ * DIAG 204 member access functions.
+ *
+ * Since we have two different diag 204 data formats for old and new s390
+ * machines, we do not access the structs directly, but use getter functions for
+ * each struct member instead. This should make the code more readable.
+ */
+
+/* Time information block */
+
+static inline int info_blk_hdr__size(enum diag204_format type)
+{
+ if (type == DIAG204_INFO_SIMPLE)
+ return sizeof(struct diag204_info_blk_hdr);
+ else /* DIAG204_INFO_EXT */
+ return sizeof(struct diag204_x_info_blk_hdr);
+}
+
+static inline __u8 info_blk_hdr__npar(enum diag204_format type, void *hdr)
+{
+ if (type == DIAG204_INFO_SIMPLE)
+ return ((struct diag204_info_blk_hdr *)hdr)->npar;
+ else /* DIAG204_INFO_EXT */
+ return ((struct diag204_x_info_blk_hdr *)hdr)->npar;
+}
+
+static inline __u8 info_blk_hdr__flags(enum diag204_format type, void *hdr)
+{
+ if (type == DIAG204_INFO_SIMPLE)
+ return ((struct diag204_info_blk_hdr *)hdr)->flags;
+ else /* DIAG204_INFO_EXT */
+ return ((struct diag204_x_info_blk_hdr *)hdr)->flags;
+}
+
+static inline __u16 info_blk_hdr__pcpus(enum diag204_format type, void *hdr)
+{
+ if (type == DIAG204_INFO_SIMPLE)
+ return ((struct diag204_info_blk_hdr *)hdr)->phys_cpus;
+ else /* DIAG204_INFO_EXT */
+ return ((struct diag204_x_info_blk_hdr *)hdr)->phys_cpus;
+}
+
+/* Partition header */
+
+static inline int part_hdr__size(enum diag204_format type)
+{
+ if (type == DIAG204_INFO_SIMPLE)
+ return sizeof(struct diag204_part_hdr);
+ else /* DIAG204_INFO_EXT */
+ return sizeof(struct diag204_x_part_hdr);
+}
+
+static inline __u8 part_hdr__rcpus(enum diag204_format type, void *hdr)
+{
+ if (type == DIAG204_INFO_SIMPLE)
+ return ((struct diag204_part_hdr *)hdr)->cpus;
+ else /* DIAG204_INFO_EXT */
+ return ((struct diag204_x_part_hdr *)hdr)->rcpus;
+}
+
+static inline void part_hdr__part_name(enum diag204_format type, void *hdr,
+ char *name)
+{
+ if (type == DIAG204_INFO_SIMPLE)
+ memcpy(name, ((struct diag204_part_hdr *)hdr)->part_name,
+ DIAG204_LPAR_NAME_LEN);
+ else /* DIAG204_INFO_EXT */
+ memcpy(name, ((struct diag204_x_part_hdr *)hdr)->part_name,
+ DIAG204_LPAR_NAME_LEN);
+ EBCASC(name, DIAG204_LPAR_NAME_LEN);
+ name[DIAG204_LPAR_NAME_LEN] = 0;
+ strim(name);
+}
+
+/* CPU info block */
+
+static inline int cpu_info__size(enum diag204_format type)
+{
+ if (type == DIAG204_INFO_SIMPLE)
+ return sizeof(struct diag204_cpu_info);
+ else /* DIAG204_INFO_EXT */
+ return sizeof(struct diag204_x_cpu_info);
+}
+
+static inline __u8 cpu_info__ctidx(enum diag204_format type, void *hdr)
+{
+ if (type == DIAG204_INFO_SIMPLE)
+ return ((struct diag204_cpu_info *)hdr)->ctidx;
+ else /* DIAG204_INFO_EXT */
+ return ((struct diag204_x_cpu_info *)hdr)->ctidx;
+}
+
+static inline __u16 cpu_info__cpu_addr(enum diag204_format type, void *hdr)
+{
+ if (type == DIAG204_INFO_SIMPLE)
+ return ((struct diag204_cpu_info *)hdr)->cpu_addr;
+ else /* DIAG204_INFO_EXT */
+ return ((struct diag204_x_cpu_info *)hdr)->cpu_addr;
+}
+
+static inline __u64 cpu_info__acc_time(enum diag204_format type, void *hdr)
+{
+ if (type == DIAG204_INFO_SIMPLE)
+ return ((struct diag204_cpu_info *)hdr)->acc_time;
+ else /* DIAG204_INFO_EXT */
+ return ((struct diag204_x_cpu_info *)hdr)->acc_time;
+}
+
+static inline __u64 cpu_info__lp_time(enum diag204_format type, void *hdr)
+{
+ if (type == DIAG204_INFO_SIMPLE)
+ return ((struct diag204_cpu_info *)hdr)->lp_time;
+ else /* DIAG204_INFO_EXT */
+ return ((struct diag204_x_cpu_info *)hdr)->lp_time;
+}
+
+static inline __u64 cpu_info__online_time(enum diag204_format type, void *hdr)
+{
+ if (type == DIAG204_INFO_SIMPLE)
+ return 0; /* online_time not available in simple info */
+ else /* DIAG204_INFO_EXT */
+ return ((struct diag204_x_cpu_info *)hdr)->online_time;
+}
+
+/* Physical header */
+
+static inline int phys_hdr__size(enum diag204_format type)
+{
+ if (type == DIAG204_INFO_SIMPLE)
+ return sizeof(struct diag204_phys_hdr);
+ else /* DIAG204_INFO_EXT */
+ return sizeof(struct diag204_x_phys_hdr);
+}
+
+static inline __u8 phys_hdr__cpus(enum diag204_format type, void *hdr)
+{
+ if (type == DIAG204_INFO_SIMPLE)
+ return ((struct diag204_phys_hdr *)hdr)->cpus;
+ else /* DIAG204_INFO_EXT */
+ return ((struct diag204_x_phys_hdr *)hdr)->cpus;
+}
+
+/* Physical CPU info block */
+
+static inline int phys_cpu__size(enum diag204_format type)
+{
+ if (type == DIAG204_INFO_SIMPLE)
+ return sizeof(struct diag204_phys_cpu);
+ else /* DIAG204_INFO_EXT */
+ return sizeof(struct diag204_x_phys_cpu);
+}
+
+static inline __u16 phys_cpu__cpu_addr(enum diag204_format type, void *hdr)
+{
+ if (type == DIAG204_INFO_SIMPLE)
+ return ((struct diag204_phys_cpu *)hdr)->cpu_addr;
+ else /* DIAG204_INFO_EXT */
+ return ((struct diag204_x_phys_cpu *)hdr)->cpu_addr;
+}
+
+static inline __u64 phys_cpu__mgm_time(enum diag204_format type, void *hdr)
+{
+ if (type == DIAG204_INFO_SIMPLE)
+ return ((struct diag204_phys_cpu *)hdr)->mgm_time;
+ else /* DIAG204_INFO_EXT */
+ return ((struct diag204_x_phys_cpu *)hdr)->mgm_time;
+}
+
+static inline __u64 phys_cpu__ctidx(enum diag204_format type, void *hdr)
+{
+ if (type == DIAG204_INFO_SIMPLE)
+ return ((struct diag204_phys_cpu *)hdr)->ctidx;
+ else /* DIAG204_INFO_EXT */
+ return ((struct diag204_x_phys_cpu *)hdr)->ctidx;
+}
+
+/* Diagnose 204 functions */
+/*
+ * For the old diag subcode 4 with simple data format we have to use real
+ * memory. If we use subcode 6 or 7 with extended data format, we can (and
+ * should) use vmalloc, since we need a lot of memory in that case. Currently
+ * up to 93 pages!
+ */
+
+static void diag204_free_buffer(void)
+{
+ if (!diag204_buf)
+ return;
+ if (diag204_buf_vmalloc) {
+ vfree(diag204_buf_vmalloc);
+ diag204_buf_vmalloc = NULL;
+ } else {
+ free_pages((unsigned long) diag204_buf, 0);
+ }
+ diag204_buf = NULL;
+}
+
+static void *page_align_ptr(void *ptr)
+{
+ return (void *) PAGE_ALIGN((unsigned long) ptr);
+}
+
+static void *diag204_alloc_vbuf(int pages)
+{
+ /* The buffer has to be page aligned! */
+ diag204_buf_vmalloc = vmalloc(array_size(PAGE_SIZE, (pages + 1)));
+ if (!diag204_buf_vmalloc)
+ return ERR_PTR(-ENOMEM);
+ diag204_buf = page_align_ptr(diag204_buf_vmalloc);
+ diag204_buf_pages = pages;
+ return diag204_buf;
+}
+
+static void *diag204_alloc_rbuf(void)
+{
+ diag204_buf = (void*)__get_free_pages(GFP_KERNEL,0);
+ if (!diag204_buf)
+ return ERR_PTR(-ENOMEM);
+ diag204_buf_pages = 1;
+ return diag204_buf;
+}
+
+static void *diag204_get_buffer(enum diag204_format fmt, int *pages)
+{
+ if (diag204_buf) {
+ *pages = diag204_buf_pages;
+ return diag204_buf;
+ }
+ if (fmt == DIAG204_INFO_SIMPLE) {
+ *pages = 1;
+ return diag204_alloc_rbuf();
+ } else {/* DIAG204_INFO_EXT */
+ *pages = diag204((unsigned long)DIAG204_SUBC_RSI |
+ (unsigned long)DIAG204_INFO_EXT, 0, NULL);
+ if (*pages <= 0)
+ return ERR_PTR(-ENOSYS);
+ else
+ return diag204_alloc_vbuf(*pages);
+ }
+}
+
+/*
+ * diag204_probe() has to find out, which type of diagnose 204 implementation
+ * we have on our machine. Currently there are three possible scanarios:
+ * - subcode 4 + simple data format (only one page)
+ * - subcode 4-6 + extended data format
+ * - subcode 4-7 + extended data format
+ *
+ * Subcode 5 is used to retrieve the size of the data, provided by subcodes
+ * 6 and 7. Subcode 7 basically has the same function as subcode 6. In addition
+ * to subcode 6 it provides also information about secondary cpus.
+ * In order to get as much information as possible, we first try
+ * subcode 7, then 6 and if both fail, we use subcode 4.
+ */
+
+static int diag204_probe(void)
+{
+ void *buf;
+ int pages, rc;
+
+ buf = diag204_get_buffer(DIAG204_INFO_EXT, &pages);
+ if (!IS_ERR(buf)) {
+ if (diag204((unsigned long)DIAG204_SUBC_STIB7 |
+ (unsigned long)DIAG204_INFO_EXT, pages, buf) >= 0) {
+ diag204_store_sc = DIAG204_SUBC_STIB7;
+ diag204_info_type = DIAG204_INFO_EXT;
+ goto out;
+ }
+ if (diag204((unsigned long)DIAG204_SUBC_STIB6 |
+ (unsigned long)DIAG204_INFO_EXT, pages, buf) >= 0) {
+ diag204_store_sc = DIAG204_SUBC_STIB6;
+ diag204_info_type = DIAG204_INFO_EXT;
+ goto out;
+ }
+ diag204_free_buffer();
+ }
+
+ /* subcodes 6 and 7 failed, now try subcode 4 */
+
+ buf = diag204_get_buffer(DIAG204_INFO_SIMPLE, &pages);
+ if (IS_ERR(buf)) {
+ rc = PTR_ERR(buf);
+ goto fail_alloc;
+ }
+ if (diag204((unsigned long)DIAG204_SUBC_STIB4 |
+ (unsigned long)DIAG204_INFO_SIMPLE, pages, buf) >= 0) {
+ diag204_store_sc = DIAG204_SUBC_STIB4;
+ diag204_info_type = DIAG204_INFO_SIMPLE;
+ goto out;
+ } else {
+ rc = -ENOSYS;
+ goto fail_store;
+ }
+out:
+ rc = 0;
+fail_store:
+ diag204_free_buffer();
+fail_alloc:
+ return rc;
+}
+
+static int diag204_do_store(void *buf, int pages)
+{
+ int rc;
+
+ rc = diag204((unsigned long) diag204_store_sc |
+ (unsigned long) diag204_info_type, pages, buf);
+ return rc < 0 ? -ENOSYS : 0;
+}
+
+static void *diag204_store(void)
+{
+ void *buf;
+ int pages, rc;
+
+ buf = diag204_get_buffer(diag204_info_type, &pages);
+ if (IS_ERR(buf))
+ goto out;
+ rc = diag204_do_store(buf, pages);
+ if (rc)
+ return ERR_PTR(rc);
+out:
+ return buf;
+}
+
+/* Diagnose 224 functions */
+
+static int diag224_get_name_table(void)
+{
+ /* memory must be below 2GB */
+ diag224_cpu_names = (char *) __get_free_page(GFP_KERNEL | GFP_DMA);
+ if (!diag224_cpu_names)
+ return -ENOMEM;
+ if (diag224(diag224_cpu_names)) {
+ free_page((unsigned long) diag224_cpu_names);
+ return -EOPNOTSUPP;
+ }
+ EBCASC(diag224_cpu_names + 16, (*diag224_cpu_names + 1) * 16);
+ return 0;
+}
+
+static void diag224_delete_name_table(void)
+{
+ free_page((unsigned long) diag224_cpu_names);
+}
+
+static int diag224_idx2name(int index, char *name)
+{
+ memcpy(name, diag224_cpu_names + ((index + 1) * DIAG204_CPU_NAME_LEN),
+ DIAG204_CPU_NAME_LEN);
+ name[DIAG204_CPU_NAME_LEN] = 0;
+ strim(name);
+ return 0;
+}
+
+struct dbfs_d204_hdr {
+ u64 len; /* Length of d204 buffer without header */
+ u16 version; /* Version of header */
+ u8 sc; /* Used subcode */
+ char reserved[53];
+} __attribute__ ((packed));
+
+struct dbfs_d204 {
+ struct dbfs_d204_hdr hdr; /* 64 byte header */
+ char buf[]; /* d204 buffer */
+} __attribute__ ((packed));
+
+static int dbfs_d204_create(void **data, void **data_free_ptr, size_t *size)
+{
+ struct dbfs_d204 *d204;
+ int rc, buf_size;
+ void *base;
+
+ buf_size = PAGE_SIZE * (diag204_buf_pages + 1) + sizeof(d204->hdr);
+ base = vzalloc(buf_size);
+ if (!base)
+ return -ENOMEM;
+ d204 = page_align_ptr(base + sizeof(d204->hdr)) - sizeof(d204->hdr);
+ rc = diag204_do_store(d204->buf, diag204_buf_pages);
+ if (rc) {
+ vfree(base);
+ return rc;
+ }
+ d204->hdr.version = DBFS_D204_HDR_VERSION;
+ d204->hdr.len = PAGE_SIZE * diag204_buf_pages;
+ d204->hdr.sc = diag204_store_sc;
+ *data = d204;
+ *data_free_ptr = base;
+ *size = d204->hdr.len + sizeof(struct dbfs_d204_hdr);
+ return 0;
+}
+
+static struct hypfs_dbfs_file dbfs_file_d204 = {
+ .name = "diag_204",
+ .data_create = dbfs_d204_create,
+ .data_free = vfree,
+};
+
+__init int hypfs_diag_init(void)
+{
+ int rc;
+
+ if (diag204_probe()) {
+ pr_err("The hardware system does not support hypfs\n");
+ return -ENODATA;
+ }
+ if (diag204_info_type == DIAG204_INFO_EXT) {
+ rc = hypfs_dbfs_create_file(&dbfs_file_d204);
+ if (rc)
+ return rc;
+ }
+ if (MACHINE_IS_LPAR) {
+ rc = diag224_get_name_table();
+ if (rc) {
+ pr_err("The hardware system does not provide all "
+ "functions required by hypfs\n");
+ debugfs_remove(dbfs_d204_file);
+ return rc;
+ }
+ }
+ return 0;
+}
+
+void hypfs_diag_exit(void)
+{
+ debugfs_remove(dbfs_d204_file);
+ diag224_delete_name_table();
+ diag204_free_buffer();
+ hypfs_dbfs_remove_file(&dbfs_file_d204);
+}
+
+/*
+ * Functions to create the directory structure
+ * *******************************************
+ */
+
+static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info)
+{
+ struct dentry *cpu_dir;
+ char buffer[TMP_SIZE];
+ void *rc;
+
+ snprintf(buffer, TMP_SIZE, "%d", cpu_info__cpu_addr(diag204_info_type,
+ cpu_info));
+ cpu_dir = hypfs_mkdir(cpus_dir, buffer);
+ rc = hypfs_create_u64(cpu_dir, "mgmtime",
+ cpu_info__acc_time(diag204_info_type, cpu_info) -
+ cpu_info__lp_time(diag204_info_type, cpu_info));
+ if (IS_ERR(rc))
+ return PTR_ERR(rc);
+ rc = hypfs_create_u64(cpu_dir, "cputime",
+ cpu_info__lp_time(diag204_info_type, cpu_info));
+ if (IS_ERR(rc))
+ return PTR_ERR(rc);
+ if (diag204_info_type == DIAG204_INFO_EXT) {
+ rc = hypfs_create_u64(cpu_dir, "onlinetime",
+ cpu_info__online_time(diag204_info_type,
+ cpu_info));
+ if (IS_ERR(rc))
+ return PTR_ERR(rc);
+ }
+ diag224_idx2name(cpu_info__ctidx(diag204_info_type, cpu_info), buffer);
+ rc = hypfs_create_str(cpu_dir, "type", buffer);
+ return PTR_ERR_OR_ZERO(rc);
+}
+
+static void *hypfs_create_lpar_files(struct dentry *systems_dir, void *part_hdr)
+{
+ struct dentry *cpus_dir;
+ struct dentry *lpar_dir;
+ char lpar_name[DIAG204_LPAR_NAME_LEN + 1];
+ void *cpu_info;
+ int i;
+
+ part_hdr__part_name(diag204_info_type, part_hdr, lpar_name);
+ lpar_name[DIAG204_LPAR_NAME_LEN] = 0;
+ lpar_dir = hypfs_mkdir(systems_dir, lpar_name);
+ if (IS_ERR(lpar_dir))
+ return lpar_dir;
+ cpus_dir = hypfs_mkdir(lpar_dir, "cpus");
+ if (IS_ERR(cpus_dir))
+ return cpus_dir;
+ cpu_info = part_hdr + part_hdr__size(diag204_info_type);
+ for (i = 0; i < part_hdr__rcpus(diag204_info_type, part_hdr); i++) {
+ int rc;
+ rc = hypfs_create_cpu_files(cpus_dir, cpu_info);
+ if (rc)
+ return ERR_PTR(rc);
+ cpu_info += cpu_info__size(diag204_info_type);
+ }
+ return cpu_info;
+}
+
+static int hypfs_create_phys_cpu_files(struct dentry *cpus_dir, void *cpu_info)
+{
+ struct dentry *cpu_dir;
+ char buffer[TMP_SIZE];
+ void *rc;
+
+ snprintf(buffer, TMP_SIZE, "%i", phys_cpu__cpu_addr(diag204_info_type,
+ cpu_info));
+ cpu_dir = hypfs_mkdir(cpus_dir, buffer);
+ if (IS_ERR(cpu_dir))
+ return PTR_ERR(cpu_dir);
+ rc = hypfs_create_u64(cpu_dir, "mgmtime",
+ phys_cpu__mgm_time(diag204_info_type, cpu_info));
+ if (IS_ERR(rc))
+ return PTR_ERR(rc);
+ diag224_idx2name(phys_cpu__ctidx(diag204_info_type, cpu_info), buffer);
+ rc = hypfs_create_str(cpu_dir, "type", buffer);
+ return PTR_ERR_OR_ZERO(rc);
+}
+
+static void *hypfs_create_phys_files(struct dentry *parent_dir, void *phys_hdr)
+{
+ int i;
+ void *cpu_info;
+ struct dentry *cpus_dir;
+
+ cpus_dir = hypfs_mkdir(parent_dir, "cpus");
+ if (IS_ERR(cpus_dir))
+ return cpus_dir;
+ cpu_info = phys_hdr + phys_hdr__size(diag204_info_type);
+ for (i = 0; i < phys_hdr__cpus(diag204_info_type, phys_hdr); i++) {
+ int rc;
+ rc = hypfs_create_phys_cpu_files(cpus_dir, cpu_info);
+ if (rc)
+ return ERR_PTR(rc);
+ cpu_info += phys_cpu__size(diag204_info_type);
+ }
+ return cpu_info;
+}
+
+int hypfs_diag_create_files(struct dentry *root)
+{
+ struct dentry *systems_dir, *hyp_dir;
+ void *time_hdr, *part_hdr;
+ int i, rc;
+ void *buffer, *ptr;
+
+ buffer = diag204_store();
+ if (IS_ERR(buffer))
+ return PTR_ERR(buffer);
+
+ systems_dir = hypfs_mkdir(root, "systems");
+ if (IS_ERR(systems_dir)) {
+ rc = PTR_ERR(systems_dir);
+ goto err_out;
+ }
+ time_hdr = (struct x_info_blk_hdr *)buffer;
+ part_hdr = time_hdr + info_blk_hdr__size(diag204_info_type);
+ for (i = 0; i < info_blk_hdr__npar(diag204_info_type, time_hdr); i++) {
+ part_hdr = hypfs_create_lpar_files(systems_dir, part_hdr);
+ if (IS_ERR(part_hdr)) {
+ rc = PTR_ERR(part_hdr);
+ goto err_out;
+ }
+ }
+ if (info_blk_hdr__flags(diag204_info_type, time_hdr) &
+ DIAG204_LPAR_PHYS_FLG) {
+ ptr = hypfs_create_phys_files(root, part_hdr);
+ if (IS_ERR(ptr)) {
+ rc = PTR_ERR(ptr);
+ goto err_out;
+ }
+ }
+ hyp_dir = hypfs_mkdir(root, "hyp");
+ if (IS_ERR(hyp_dir)) {
+ rc = PTR_ERR(hyp_dir);
+ goto err_out;
+ }
+ ptr = hypfs_create_str(hyp_dir, "type", "LPAR Hypervisor");
+ if (IS_ERR(ptr)) {
+ rc = PTR_ERR(ptr);
+ goto err_out;
+ }
+ rc = 0;
+
+err_out:
+ return rc;
+}
diff --git a/arch/s390/hypfs/hypfs_diag0c.c b/arch/s390/hypfs/hypfs_diag0c.c
new file mode 100644
index 000000000..cebf05150
--- /dev/null
+++ b/arch/s390/hypfs/hypfs_diag0c.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hypervisor filesystem for Linux on s390
+ *
+ * Diag 0C implementation
+ *
+ * Copyright IBM Corp. 2014
+ */
+
+#include <linux/slab.h>
+#include <linux/cpu.h>
+#include <asm/diag.h>
+#include <asm/hypfs.h>
+#include "hypfs.h"
+
+#define DBFS_D0C_HDR_VERSION 0
+
+/*
+ * Execute diagnose 0c in 31 bit mode
+ */
+static void diag0c(struct hypfs_diag0c_entry *entry)
+{
+ diag_stat_inc(DIAG_STAT_X00C);
+ asm volatile (
+ " sam31\n"
+ " diag %0,%0,0x0c\n"
+ " sam64\n"
+ : /* no output register */
+ : "a" (entry)
+ : "memory");
+}
+
+/*
+ * Get hypfs_diag0c_entry from CPU vector and store diag0c data
+ */
+static void diag0c_fn(void *data)
+{
+ diag0c(((void **) data)[smp_processor_id()]);
+}
+
+/*
+ * Allocate buffer and store diag 0c data
+ */
+static void *diag0c_store(unsigned int *count)
+{
+ struct hypfs_diag0c_data *diag0c_data;
+ unsigned int cpu_count, cpu, i;
+ void **cpu_vec;
+
+ get_online_cpus();
+ cpu_count = num_online_cpus();
+ cpu_vec = kmalloc_array(num_possible_cpus(), sizeof(*cpu_vec),
+ GFP_KERNEL);
+ if (!cpu_vec)
+ goto fail_put_online_cpus;
+ /* Note: Diag 0c needs 8 byte alignment and real storage */
+ diag0c_data = kzalloc(sizeof(struct hypfs_diag0c_hdr) +
+ cpu_count * sizeof(struct hypfs_diag0c_entry),
+ GFP_KERNEL | GFP_DMA);
+ if (!diag0c_data)
+ goto fail_kfree_cpu_vec;
+ i = 0;
+ /* Fill CPU vector for each online CPU */
+ for_each_online_cpu(cpu) {
+ diag0c_data->entry[i].cpu = cpu;
+ cpu_vec[cpu] = &diag0c_data->entry[i++];
+ }
+ /* Collect data all CPUs */
+ on_each_cpu(diag0c_fn, cpu_vec, 1);
+ *count = cpu_count;
+ kfree(cpu_vec);
+ put_online_cpus();
+ return diag0c_data;
+
+fail_kfree_cpu_vec:
+ kfree(cpu_vec);
+fail_put_online_cpus:
+ put_online_cpus();
+ return ERR_PTR(-ENOMEM);
+}
+
+/*
+ * Hypfs DBFS callback: Free diag 0c data
+ */
+static void dbfs_diag0c_free(const void *data)
+{
+ kfree(data);
+}
+
+/*
+ * Hypfs DBFS callback: Create diag 0c data
+ */
+static int dbfs_diag0c_create(void **data, void **data_free_ptr, size_t *size)
+{
+ struct hypfs_diag0c_data *diag0c_data;
+ unsigned int count;
+
+ diag0c_data = diag0c_store(&count);
+ if (IS_ERR(diag0c_data))
+ return PTR_ERR(diag0c_data);
+ memset(&diag0c_data->hdr, 0, sizeof(diag0c_data->hdr));
+ get_tod_clock_ext(diag0c_data->hdr.tod_ext);
+ diag0c_data->hdr.len = count * sizeof(struct hypfs_diag0c_entry);
+ diag0c_data->hdr.version = DBFS_D0C_HDR_VERSION;
+ diag0c_data->hdr.count = count;
+ *data = diag0c_data;
+ *data_free_ptr = diag0c_data;
+ *size = diag0c_data->hdr.len + sizeof(struct hypfs_diag0c_hdr);
+ return 0;
+}
+
+/*
+ * Hypfs DBFS file structure
+ */
+static struct hypfs_dbfs_file dbfs_file_0c = {
+ .name = "diag_0c",
+ .data_create = dbfs_diag0c_create,
+ .data_free = dbfs_diag0c_free,
+};
+
+/*
+ * Initialize diag 0c interface for z/VM
+ */
+int __init hypfs_diag0c_init(void)
+{
+ if (!MACHINE_IS_VM)
+ return 0;
+ return hypfs_dbfs_create_file(&dbfs_file_0c);
+}
+
+/*
+ * Shutdown diag 0c interface for z/VM
+ */
+void hypfs_diag0c_exit(void)
+{
+ if (!MACHINE_IS_VM)
+ return;
+ hypfs_dbfs_remove_file(&dbfs_file_0c);
+}
diff --git a/arch/s390/hypfs/hypfs_sprp.c b/arch/s390/hypfs/hypfs_sprp.c
new file mode 100644
index 000000000..5d85a0393
--- /dev/null
+++ b/arch/s390/hypfs/hypfs_sprp.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hypervisor filesystem for Linux on s390.
+ * Set Partition-Resource Parameter interface.
+ *
+ * Copyright IBM Corp. 2013
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/compat.h>
+#include <linux/errno.h>
+#include <linux/gfp.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <asm/diag.h>
+#include <asm/sclp.h>
+#include "hypfs.h"
+
+#define DIAG304_SET_WEIGHTS 0
+#define DIAG304_QUERY_PRP 1
+#define DIAG304_SET_CAPPING 2
+
+#define DIAG304_CMD_MAX 2
+
+static inline unsigned long __hypfs_sprp_diag304(void *data, unsigned long cmd)
+{
+ register unsigned long _data asm("2") = (unsigned long) data;
+ register unsigned long _rc asm("3");
+ register unsigned long _cmd asm("4") = cmd;
+
+ asm volatile("diag %1,%2,0x304\n"
+ : "=d" (_rc) : "d" (_data), "d" (_cmd) : "memory");
+
+ return _rc;
+}
+
+static unsigned long hypfs_sprp_diag304(void *data, unsigned long cmd)
+{
+ diag_stat_inc(DIAG_STAT_X304);
+ return __hypfs_sprp_diag304(data, cmd);
+}
+
+static void hypfs_sprp_free(const void *data)
+{
+ free_page((unsigned long) data);
+}
+
+static int hypfs_sprp_create(void **data_ptr, void **free_ptr, size_t *size)
+{
+ unsigned long rc;
+ void *data;
+
+ data = (void *) get_zeroed_page(GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+ rc = hypfs_sprp_diag304(data, DIAG304_QUERY_PRP);
+ if (rc != 1) {
+ *data_ptr = *free_ptr = NULL;
+ *size = 0;
+ free_page((unsigned long) data);
+ return -EIO;
+ }
+ *data_ptr = *free_ptr = data;
+ *size = PAGE_SIZE;
+ return 0;
+}
+
+static int __hypfs_sprp_ioctl(void __user *user_area)
+{
+ struct hypfs_diag304 diag304;
+ unsigned long cmd;
+ void __user *udata;
+ void *data;
+ int rc;
+
+ if (copy_from_user(&diag304, user_area, sizeof(diag304)))
+ return -EFAULT;
+ if ((diag304.args[0] >> 8) != 0 || diag304.args[1] > DIAG304_CMD_MAX)
+ return -EINVAL;
+
+ data = (void *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
+ if (!data)
+ return -ENOMEM;
+
+ udata = (void __user *)(unsigned long) diag304.data;
+ if (diag304.args[1] == DIAG304_SET_WEIGHTS ||
+ diag304.args[1] == DIAG304_SET_CAPPING)
+ if (copy_from_user(data, udata, PAGE_SIZE)) {
+ rc = -EFAULT;
+ goto out;
+ }
+
+ cmd = *(unsigned long *) &diag304.args[0];
+ diag304.rc = hypfs_sprp_diag304(data, cmd);
+
+ if (diag304.args[1] == DIAG304_QUERY_PRP)
+ if (copy_to_user(udata, data, PAGE_SIZE)) {
+ rc = -EFAULT;
+ goto out;
+ }
+
+ rc = copy_to_user(user_area, &diag304, sizeof(diag304)) ? -EFAULT : 0;
+out:
+ free_page((unsigned long) data);
+ return rc;
+}
+
+static long hypfs_sprp_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ void __user *argp;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+ if (is_compat_task())
+ argp = compat_ptr(arg);
+ else
+ argp = (void __user *) arg;
+ switch (cmd) {
+ case HYPFS_DIAG304:
+ return __hypfs_sprp_ioctl(argp);
+ default: /* unknown ioctl number */
+ return -ENOTTY;
+ }
+ return 0;
+}
+
+static struct hypfs_dbfs_file hypfs_sprp_file = {
+ .name = "diag_304",
+ .data_create = hypfs_sprp_create,
+ .data_free = hypfs_sprp_free,
+ .unlocked_ioctl = hypfs_sprp_ioctl,
+};
+
+int hypfs_sprp_init(void)
+{
+ if (!sclp.has_sprp)
+ return 0;
+ return hypfs_dbfs_create_file(&hypfs_sprp_file);
+}
+
+void hypfs_sprp_exit(void)
+{
+ if (!sclp.has_sprp)
+ return;
+ hypfs_dbfs_remove_file(&hypfs_sprp_file);
+}
diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c
new file mode 100644
index 000000000..90740be25
--- /dev/null
+++ b/arch/s390/hypfs/hypfs_vm.c
@@ -0,0 +1,292 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hypervisor filesystem for Linux on s390. z/VM implementation.
+ *
+ * Copyright IBM Corp. 2006
+ * Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/vmalloc.h>
+#include <asm/diag.h>
+#include <asm/ebcdic.h>
+#include <asm/timex.h>
+#include "hypfs.h"
+
+#define NAME_LEN 8
+#define DBFS_D2FC_HDR_VERSION 0
+
+static char local_guest[] = " ";
+static char all_guests[] = "* ";
+static char *all_groups = all_guests;
+static char *guest_query;
+
+struct diag2fc_data {
+ __u32 version;
+ __u32 flags;
+ __u64 used_cpu;
+ __u64 el_time;
+ __u64 mem_min_kb;
+ __u64 mem_max_kb;
+ __u64 mem_share_kb;
+ __u64 mem_used_kb;
+ __u32 pcpus;
+ __u32 lcpus;
+ __u32 vcpus;
+ __u32 ocpus;
+ __u32 cpu_max;
+ __u32 cpu_shares;
+ __u32 cpu_use_samp;
+ __u32 cpu_delay_samp;
+ __u32 page_wait_samp;
+ __u32 idle_samp;
+ __u32 other_samp;
+ __u32 total_samp;
+ char guest_name[NAME_LEN];
+};
+
+struct diag2fc_parm_list {
+ char userid[NAME_LEN];
+ char aci_grp[NAME_LEN];
+ __u64 addr;
+ __u32 size;
+ __u32 fmt;
+};
+
+static int diag2fc(int size, char* query, void *addr)
+{
+ unsigned long residual_cnt;
+ unsigned long rc;
+ struct diag2fc_parm_list parm_list;
+
+ memcpy(parm_list.userid, query, NAME_LEN);
+ ASCEBC(parm_list.userid, NAME_LEN);
+ memcpy(parm_list.aci_grp, all_groups, NAME_LEN);
+ ASCEBC(parm_list.aci_grp, NAME_LEN);
+ parm_list.addr = (unsigned long)addr;
+ parm_list.size = size;
+ parm_list.fmt = 0x02;
+ rc = -1;
+
+ diag_stat_inc(DIAG_STAT_X2FC);
+ asm volatile(
+ " diag %0,%1,0x2fc\n"
+ "0: nopr %%r7\n"
+ EX_TABLE(0b,0b)
+ : "=d" (residual_cnt), "+d" (rc) : "0" (&parm_list) : "memory");
+
+ if ((rc != 0 ) && (rc != -2))
+ return rc;
+ else
+ return -residual_cnt;
+}
+
+/*
+ * Allocate buffer for "query" and store diag 2fc at "offset"
+ */
+static void *diag2fc_store(char *query, unsigned int *count, int offset)
+{
+ void *data;
+ int size;
+
+ do {
+ size = diag2fc(0, query, NULL);
+ if (size < 0)
+ return ERR_PTR(-EACCES);
+ data = vmalloc(size + offset);
+ if (!data)
+ return ERR_PTR(-ENOMEM);
+ if (diag2fc(size, query, data + offset) == 0)
+ break;
+ vfree(data);
+ } while (1);
+ *count = (size / sizeof(struct diag2fc_data));
+
+ return data;
+}
+
+static void diag2fc_free(const void *data)
+{
+ vfree(data);
+}
+
+#define ATTRIBUTE(dir, name, member) \
+do { \
+ void *rc; \
+ rc = hypfs_create_u64(dir, name, member); \
+ if (IS_ERR(rc)) \
+ return PTR_ERR(rc); \
+} while(0)
+
+static int hpyfs_vm_create_guest(struct dentry *systems_dir,
+ struct diag2fc_data *data)
+{
+ char guest_name[NAME_LEN + 1] = {};
+ struct dentry *guest_dir, *cpus_dir, *samples_dir, *mem_dir;
+ int dedicated_flag, capped_value;
+
+ capped_value = (data->flags & 0x00000006) >> 1;
+ dedicated_flag = (data->flags & 0x00000008) >> 3;
+
+ /* guest dir */
+ memcpy(guest_name, data->guest_name, NAME_LEN);
+ EBCASC(guest_name, NAME_LEN);
+ strim(guest_name);
+ guest_dir = hypfs_mkdir(systems_dir, guest_name);
+ if (IS_ERR(guest_dir))
+ return PTR_ERR(guest_dir);
+ ATTRIBUTE(guest_dir, "onlinetime_us", data->el_time);
+
+ /* logical cpu information */
+ cpus_dir = hypfs_mkdir(guest_dir, "cpus");
+ if (IS_ERR(cpus_dir))
+ return PTR_ERR(cpus_dir);
+ ATTRIBUTE(cpus_dir, "cputime_us", data->used_cpu);
+ ATTRIBUTE(cpus_dir, "capped", capped_value);
+ ATTRIBUTE(cpus_dir, "dedicated", dedicated_flag);
+ ATTRIBUTE(cpus_dir, "count", data->vcpus);
+ /*
+ * Note: The "weight_min" attribute got the wrong name.
+ * The value represents the number of non-stopped (operating)
+ * CPUS.
+ */
+ ATTRIBUTE(cpus_dir, "weight_min", data->ocpus);
+ ATTRIBUTE(cpus_dir, "weight_max", data->cpu_max);
+ ATTRIBUTE(cpus_dir, "weight_cur", data->cpu_shares);
+
+ /* memory information */
+ mem_dir = hypfs_mkdir(guest_dir, "mem");
+ if (IS_ERR(mem_dir))
+ return PTR_ERR(mem_dir);
+ ATTRIBUTE(mem_dir, "min_KiB", data->mem_min_kb);
+ ATTRIBUTE(mem_dir, "max_KiB", data->mem_max_kb);
+ ATTRIBUTE(mem_dir, "used_KiB", data->mem_used_kb);
+ ATTRIBUTE(mem_dir, "share_KiB", data->mem_share_kb);
+
+ /* samples */
+ samples_dir = hypfs_mkdir(guest_dir, "samples");
+ if (IS_ERR(samples_dir))
+ return PTR_ERR(samples_dir);
+ ATTRIBUTE(samples_dir, "cpu_using", data->cpu_use_samp);
+ ATTRIBUTE(samples_dir, "cpu_delay", data->cpu_delay_samp);
+ ATTRIBUTE(samples_dir, "mem_delay", data->page_wait_samp);
+ ATTRIBUTE(samples_dir, "idle", data->idle_samp);
+ ATTRIBUTE(samples_dir, "other", data->other_samp);
+ ATTRIBUTE(samples_dir, "total", data->total_samp);
+ return 0;
+}
+
+int hypfs_vm_create_files(struct dentry *root)
+{
+ struct dentry *dir, *file;
+ struct diag2fc_data *data;
+ unsigned int count = 0;
+ int rc, i;
+
+ data = diag2fc_store(guest_query, &count, 0);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+
+ /* Hpervisor Info */
+ dir = hypfs_mkdir(root, "hyp");
+ if (IS_ERR(dir)) {
+ rc = PTR_ERR(dir);
+ goto failed;
+ }
+ file = hypfs_create_str(dir, "type", "z/VM Hypervisor");
+ if (IS_ERR(file)) {
+ rc = PTR_ERR(file);
+ goto failed;
+ }
+
+ /* physical cpus */
+ dir = hypfs_mkdir(root, "cpus");
+ if (IS_ERR(dir)) {
+ rc = PTR_ERR(dir);
+ goto failed;
+ }
+ file = hypfs_create_u64(dir, "count", data->lcpus);
+ if (IS_ERR(file)) {
+ rc = PTR_ERR(file);
+ goto failed;
+ }
+
+ /* guests */
+ dir = hypfs_mkdir(root, "systems");
+ if (IS_ERR(dir)) {
+ rc = PTR_ERR(dir);
+ goto failed;
+ }
+
+ for (i = 0; i < count; i++) {
+ rc = hpyfs_vm_create_guest(dir, &(data[i]));
+ if (rc)
+ goto failed;
+ }
+ diag2fc_free(data);
+ return 0;
+
+failed:
+ diag2fc_free(data);
+ return rc;
+}
+
+struct dbfs_d2fc_hdr {
+ u64 len; /* Length of d2fc buffer without header */
+ u16 version; /* Version of header */
+ char tod_ext[STORE_CLOCK_EXT_SIZE]; /* TOD clock for d2fc */
+ u64 count; /* Number of VM guests in d2fc buffer */
+ char reserved[30];
+} __attribute__ ((packed));
+
+struct dbfs_d2fc {
+ struct dbfs_d2fc_hdr hdr; /* 64 byte header */
+ char buf[]; /* d2fc buffer */
+} __attribute__ ((packed));
+
+static int dbfs_diag2fc_create(void **data, void **data_free_ptr, size_t *size)
+{
+ struct dbfs_d2fc *d2fc;
+ unsigned int count;
+
+ d2fc = diag2fc_store(guest_query, &count, sizeof(d2fc->hdr));
+ if (IS_ERR(d2fc))
+ return PTR_ERR(d2fc);
+ get_tod_clock_ext(d2fc->hdr.tod_ext);
+ d2fc->hdr.len = count * sizeof(struct diag2fc_data);
+ d2fc->hdr.version = DBFS_D2FC_HDR_VERSION;
+ d2fc->hdr.count = count;
+ memset(&d2fc->hdr.reserved, 0, sizeof(d2fc->hdr.reserved));
+ *data = d2fc;
+ *data_free_ptr = d2fc;
+ *size = d2fc->hdr.len + sizeof(struct dbfs_d2fc_hdr);
+ return 0;
+}
+
+static struct hypfs_dbfs_file dbfs_file_2fc = {
+ .name = "diag_2fc",
+ .data_create = dbfs_diag2fc_create,
+ .data_free = diag2fc_free,
+};
+
+int hypfs_vm_init(void)
+{
+ if (!MACHINE_IS_VM)
+ return 0;
+ if (diag2fc(0, all_guests, NULL) > 0)
+ guest_query = all_guests;
+ else if (diag2fc(0, local_guest, NULL) > 0)
+ guest_query = local_guest;
+ else
+ return -EACCES;
+ return hypfs_dbfs_create_file(&dbfs_file_2fc);
+}
+
+void hypfs_vm_exit(void)
+{
+ if (!MACHINE_IS_VM)
+ return;
+ hypfs_dbfs_remove_file(&dbfs_file_2fc);
+}
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
new file mode 100644
index 000000000..e4d17d9ea
--- /dev/null
+++ b/arch/s390/hypfs/inode.c
@@ -0,0 +1,502 @@
+// SPDX-License-Identifier: GPL-1.0+
+/*
+ * Hypervisor filesystem for Linux on s390.
+ *
+ * Copyright IBM Corp. 2006, 2008
+ * Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#define KMSG_COMPONENT "hypfs"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/vfs.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <linux/time.h>
+#include <linux/parser.h>
+#include <linux/sysfs.h>
+#include <linux/init.h>
+#include <linux/kobject.h>
+#include <linux/seq_file.h>
+#include <linux/mount.h>
+#include <linux/uio.h>
+#include <asm/ebcdic.h>
+#include "hypfs.h"
+
+#define HYPFS_MAGIC 0x687970 /* ASCII 'hyp' */
+#define TMP_SIZE 64 /* size of temporary buffers */
+
+static struct dentry *hypfs_create_update_file(struct dentry *dir);
+
+struct hypfs_sb_info {
+ kuid_t uid; /* uid used for files and dirs */
+ kgid_t gid; /* gid used for files and dirs */
+ struct dentry *update_file; /* file to trigger update */
+ time64_t last_update; /* last update, CLOCK_MONOTONIC time */
+ struct mutex lock; /* lock to protect update process */
+};
+
+static const struct file_operations hypfs_file_ops;
+static struct file_system_type hypfs_type;
+static const struct super_operations hypfs_s_ops;
+
+/* start of list of all dentries, which have to be deleted on update */
+static struct dentry *hypfs_last_dentry;
+
+static void hypfs_update_update(struct super_block *sb)
+{
+ struct hypfs_sb_info *sb_info = sb->s_fs_info;
+ struct inode *inode = d_inode(sb_info->update_file);
+
+ sb_info->last_update = ktime_get_seconds();
+ inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+}
+
+/* directory tree removal functions */
+
+static void hypfs_add_dentry(struct dentry *dentry)
+{
+ dentry->d_fsdata = hypfs_last_dentry;
+ hypfs_last_dentry = dentry;
+}
+
+static void hypfs_remove(struct dentry *dentry)
+{
+ struct dentry *parent;
+
+ parent = dentry->d_parent;
+ inode_lock(d_inode(parent));
+ if (simple_positive(dentry)) {
+ if (d_is_dir(dentry))
+ simple_rmdir(d_inode(parent), dentry);
+ else
+ simple_unlink(d_inode(parent), dentry);
+ }
+ d_delete(dentry);
+ dput(dentry);
+ inode_unlock(d_inode(parent));
+}
+
+static void hypfs_delete_tree(struct dentry *root)
+{
+ while (hypfs_last_dentry) {
+ struct dentry *next_dentry;
+ next_dentry = hypfs_last_dentry->d_fsdata;
+ hypfs_remove(hypfs_last_dentry);
+ hypfs_last_dentry = next_dentry;
+ }
+}
+
+static struct inode *hypfs_make_inode(struct super_block *sb, umode_t mode)
+{
+ struct inode *ret = new_inode(sb);
+
+ if (ret) {
+ struct hypfs_sb_info *hypfs_info = sb->s_fs_info;
+ ret->i_ino = get_next_ino();
+ ret->i_mode = mode;
+ ret->i_uid = hypfs_info->uid;
+ ret->i_gid = hypfs_info->gid;
+ ret->i_atime = ret->i_mtime = ret->i_ctime = current_time(ret);
+ if (S_ISDIR(mode))
+ set_nlink(ret, 2);
+ }
+ return ret;
+}
+
+static void hypfs_evict_inode(struct inode *inode)
+{
+ clear_inode(inode);
+ kfree(inode->i_private);
+}
+
+static int hypfs_open(struct inode *inode, struct file *filp)
+{
+ char *data = file_inode(filp)->i_private;
+ struct hypfs_sb_info *fs_info;
+
+ if (filp->f_mode & FMODE_WRITE) {
+ if (!(inode->i_mode & S_IWUGO))
+ return -EACCES;
+ }
+ if (filp->f_mode & FMODE_READ) {
+ if (!(inode->i_mode & S_IRUGO))
+ return -EACCES;
+ }
+
+ fs_info = inode->i_sb->s_fs_info;
+ if(data) {
+ mutex_lock(&fs_info->lock);
+ filp->private_data = kstrdup(data, GFP_KERNEL);
+ if (!filp->private_data) {
+ mutex_unlock(&fs_info->lock);
+ return -ENOMEM;
+ }
+ mutex_unlock(&fs_info->lock);
+ }
+ return nonseekable_open(inode, filp);
+}
+
+static ssize_t hypfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+ struct file *file = iocb->ki_filp;
+ char *data = file->private_data;
+ size_t available = strlen(data);
+ loff_t pos = iocb->ki_pos;
+ size_t count;
+
+ if (pos < 0)
+ return -EINVAL;
+ if (pos >= available || !iov_iter_count(to))
+ return 0;
+ count = copy_to_iter(data + pos, available - pos, to);
+ if (!count)
+ return -EFAULT;
+ iocb->ki_pos = pos + count;
+ file_accessed(file);
+ return count;
+}
+
+static ssize_t hypfs_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+ int rc;
+ struct super_block *sb = file_inode(iocb->ki_filp)->i_sb;
+ struct hypfs_sb_info *fs_info = sb->s_fs_info;
+ size_t count = iov_iter_count(from);
+
+ /*
+ * Currently we only allow one update per second for two reasons:
+ * 1. diag 204 is VERY expensive
+ * 2. If several processes do updates in parallel and then read the
+ * hypfs data, the likelihood of collisions is reduced, if we restrict
+ * the minimum update interval. A collision occurs, if during the
+ * data gathering of one process another process triggers an update
+ * If the first process wants to ensure consistent data, it has
+ * to restart data collection in this case.
+ */
+ mutex_lock(&fs_info->lock);
+ if (fs_info->last_update == ktime_get_seconds()) {
+ rc = -EBUSY;
+ goto out;
+ }
+ hypfs_delete_tree(sb->s_root);
+ if (MACHINE_IS_VM)
+ rc = hypfs_vm_create_files(sb->s_root);
+ else
+ rc = hypfs_diag_create_files(sb->s_root);
+ if (rc) {
+ pr_err("Updating the hypfs tree failed\n");
+ hypfs_delete_tree(sb->s_root);
+ goto out;
+ }
+ hypfs_update_update(sb);
+ rc = count;
+ iov_iter_advance(from, count);
+out:
+ mutex_unlock(&fs_info->lock);
+ return rc;
+}
+
+static int hypfs_release(struct inode *inode, struct file *filp)
+{
+ kfree(filp->private_data);
+ return 0;
+}
+
+enum { opt_uid, opt_gid, opt_err };
+
+static const match_table_t hypfs_tokens = {
+ {opt_uid, "uid=%u"},
+ {opt_gid, "gid=%u"},
+ {opt_err, NULL}
+};
+
+static int hypfs_parse_options(char *options, struct super_block *sb)
+{
+ char *str;
+ substring_t args[MAX_OPT_ARGS];
+ kuid_t uid;
+ kgid_t gid;
+
+ if (!options)
+ return 0;
+ while ((str = strsep(&options, ",")) != NULL) {
+ int token, option;
+ struct hypfs_sb_info *hypfs_info = sb->s_fs_info;
+
+ if (!*str)
+ continue;
+ token = match_token(str, hypfs_tokens, args);
+ switch (token) {
+ case opt_uid:
+ if (match_int(&args[0], &option))
+ return -EINVAL;
+ uid = make_kuid(current_user_ns(), option);
+ if (!uid_valid(uid))
+ return -EINVAL;
+ hypfs_info->uid = uid;
+ break;
+ case opt_gid:
+ if (match_int(&args[0], &option))
+ return -EINVAL;
+ gid = make_kgid(current_user_ns(), option);
+ if (!gid_valid(gid))
+ return -EINVAL;
+ hypfs_info->gid = gid;
+ break;
+ case opt_err:
+ default:
+ pr_err("%s is not a valid mount option\n", str);
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+static int hypfs_show_options(struct seq_file *s, struct dentry *root)
+{
+ struct hypfs_sb_info *hypfs_info = root->d_sb->s_fs_info;
+
+ seq_printf(s, ",uid=%u", from_kuid_munged(&init_user_ns, hypfs_info->uid));
+ seq_printf(s, ",gid=%u", from_kgid_munged(&init_user_ns, hypfs_info->gid));
+ return 0;
+}
+
+static int hypfs_fill_super(struct super_block *sb, void *data, int silent)
+{
+ struct inode *root_inode;
+ struct dentry *root_dentry, *update_file;
+ int rc = 0;
+ struct hypfs_sb_info *sbi;
+
+ sbi = kzalloc(sizeof(struct hypfs_sb_info), GFP_KERNEL);
+ if (!sbi)
+ return -ENOMEM;
+ mutex_init(&sbi->lock);
+ sbi->uid = current_uid();
+ sbi->gid = current_gid();
+ sb->s_fs_info = sbi;
+ sb->s_blocksize = PAGE_SIZE;
+ sb->s_blocksize_bits = PAGE_SHIFT;
+ sb->s_magic = HYPFS_MAGIC;
+ sb->s_op = &hypfs_s_ops;
+ if (hypfs_parse_options(data, sb))
+ return -EINVAL;
+ root_inode = hypfs_make_inode(sb, S_IFDIR | 0755);
+ if (!root_inode)
+ return -ENOMEM;
+ root_inode->i_op = &simple_dir_inode_operations;
+ root_inode->i_fop = &simple_dir_operations;
+ sb->s_root = root_dentry = d_make_root(root_inode);
+ if (!root_dentry)
+ return -ENOMEM;
+ if (MACHINE_IS_VM)
+ rc = hypfs_vm_create_files(root_dentry);
+ else
+ rc = hypfs_diag_create_files(root_dentry);
+ if (rc)
+ return rc;
+ update_file = hypfs_create_update_file(root_dentry);
+ if (IS_ERR(update_file))
+ return PTR_ERR(update_file);
+ sbi->update_file = update_file;
+ hypfs_update_update(sb);
+ pr_info("Hypervisor filesystem mounted\n");
+ return 0;
+}
+
+static struct dentry *hypfs_mount(struct file_system_type *fst, int flags,
+ const char *devname, void *data)
+{
+ return mount_single(fst, flags, data, hypfs_fill_super);
+}
+
+static void hypfs_kill_super(struct super_block *sb)
+{
+ struct hypfs_sb_info *sb_info = sb->s_fs_info;
+
+ if (sb->s_root)
+ hypfs_delete_tree(sb->s_root);
+ if (sb_info && sb_info->update_file)
+ hypfs_remove(sb_info->update_file);
+ kfree(sb->s_fs_info);
+ sb->s_fs_info = NULL;
+ kill_litter_super(sb);
+}
+
+static struct dentry *hypfs_create_file(struct dentry *parent, const char *name,
+ char *data, umode_t mode)
+{
+ struct dentry *dentry;
+ struct inode *inode;
+
+ inode_lock(d_inode(parent));
+ dentry = lookup_one_len(name, parent, strlen(name));
+ if (IS_ERR(dentry)) {
+ dentry = ERR_PTR(-ENOMEM);
+ goto fail;
+ }
+ inode = hypfs_make_inode(parent->d_sb, mode);
+ if (!inode) {
+ dput(dentry);
+ dentry = ERR_PTR(-ENOMEM);
+ goto fail;
+ }
+ if (S_ISREG(mode)) {
+ inode->i_fop = &hypfs_file_ops;
+ if (data)
+ inode->i_size = strlen(data);
+ else
+ inode->i_size = 0;
+ } else if (S_ISDIR(mode)) {
+ inode->i_op = &simple_dir_inode_operations;
+ inode->i_fop = &simple_dir_operations;
+ inc_nlink(d_inode(parent));
+ } else
+ BUG();
+ inode->i_private = data;
+ d_instantiate(dentry, inode);
+ dget(dentry);
+fail:
+ inode_unlock(d_inode(parent));
+ return dentry;
+}
+
+struct dentry *hypfs_mkdir(struct dentry *parent, const char *name)
+{
+ struct dentry *dentry;
+
+ dentry = hypfs_create_file(parent, name, NULL, S_IFDIR | DIR_MODE);
+ if (IS_ERR(dentry))
+ return dentry;
+ hypfs_add_dentry(dentry);
+ return dentry;
+}
+
+static struct dentry *hypfs_create_update_file(struct dentry *dir)
+{
+ struct dentry *dentry;
+
+ dentry = hypfs_create_file(dir, "update", NULL,
+ S_IFREG | UPDATE_FILE_MODE);
+ /*
+ * We do not put the update file on the 'delete' list with
+ * hypfs_add_dentry(), since it should not be removed when the tree
+ * is updated.
+ */
+ return dentry;
+}
+
+struct dentry *hypfs_create_u64(struct dentry *dir,
+ const char *name, __u64 value)
+{
+ char *buffer;
+ char tmp[TMP_SIZE];
+ struct dentry *dentry;
+
+ snprintf(tmp, TMP_SIZE, "%llu\n", (unsigned long long int)value);
+ buffer = kstrdup(tmp, GFP_KERNEL);
+ if (!buffer)
+ return ERR_PTR(-ENOMEM);
+ dentry =
+ hypfs_create_file(dir, name, buffer, S_IFREG | REG_FILE_MODE);
+ if (IS_ERR(dentry)) {
+ kfree(buffer);
+ return ERR_PTR(-ENOMEM);
+ }
+ hypfs_add_dentry(dentry);
+ return dentry;
+}
+
+struct dentry *hypfs_create_str(struct dentry *dir,
+ const char *name, char *string)
+{
+ char *buffer;
+ struct dentry *dentry;
+
+ buffer = kmalloc(strlen(string) + 2, GFP_KERNEL);
+ if (!buffer)
+ return ERR_PTR(-ENOMEM);
+ sprintf(buffer, "%s\n", string);
+ dentry =
+ hypfs_create_file(dir, name, buffer, S_IFREG | REG_FILE_MODE);
+ if (IS_ERR(dentry)) {
+ kfree(buffer);
+ return ERR_PTR(-ENOMEM);
+ }
+ hypfs_add_dentry(dentry);
+ return dentry;
+}
+
+static const struct file_operations hypfs_file_ops = {
+ .open = hypfs_open,
+ .release = hypfs_release,
+ .read_iter = hypfs_read_iter,
+ .write_iter = hypfs_write_iter,
+ .llseek = no_llseek,
+};
+
+static struct file_system_type hypfs_type = {
+ .owner = THIS_MODULE,
+ .name = "s390_hypfs",
+ .mount = hypfs_mount,
+ .kill_sb = hypfs_kill_super
+};
+
+static const struct super_operations hypfs_s_ops = {
+ .statfs = simple_statfs,
+ .evict_inode = hypfs_evict_inode,
+ .show_options = hypfs_show_options,
+};
+
+static int __init hypfs_init(void)
+{
+ int rc;
+
+ rc = hypfs_dbfs_init();
+ if (rc)
+ return rc;
+ if (hypfs_diag_init()) {
+ rc = -ENODATA;
+ goto fail_dbfs_exit;
+ }
+ if (hypfs_vm_init()) {
+ rc = -ENODATA;
+ goto fail_hypfs_diag_exit;
+ }
+ if (hypfs_sprp_init()) {
+ rc = -ENODATA;
+ goto fail_hypfs_vm_exit;
+ }
+ if (hypfs_diag0c_init()) {
+ rc = -ENODATA;
+ goto fail_hypfs_sprp_exit;
+ }
+ rc = sysfs_create_mount_point(hypervisor_kobj, "s390");
+ if (rc)
+ goto fail_hypfs_diag0c_exit;
+ rc = register_filesystem(&hypfs_type);
+ if (rc)
+ goto fail_filesystem;
+ return 0;
+
+fail_filesystem:
+ sysfs_remove_mount_point(hypervisor_kobj, "s390");
+fail_hypfs_diag0c_exit:
+ hypfs_diag0c_exit();
+fail_hypfs_sprp_exit:
+ hypfs_sprp_exit();
+fail_hypfs_vm_exit:
+ hypfs_vm_exit();
+fail_hypfs_diag_exit:
+ hypfs_diag_exit();
+fail_dbfs_exit:
+ hypfs_dbfs_exit();
+ pr_err("Initialization of hypfs failed with rc=%i\n", rc);
+ return rc;
+}
+device_initcall(hypfs_init)
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
new file mode 100644
index 000000000..e32397728
--- /dev/null
+++ b/arch/s390/include/asm/Kbuild
@@ -0,0 +1,27 @@
+# SPDX-License-Identifier: GPL-2.0
+generated-y += dis-defs.h
+generated-y += facility-defs.h
+generated-y += syscall_table.h
+generated-y += unistd_nr.h
+
+generic-y += asm-offsets.h
+generic-y += cacheflush.h
+generic-y += device.h
+generic-y += dma-contiguous.h
+generic-y += dma-mapping.h
+generic-y += div64.h
+generic-y += emergency-restart.h
+generic-y += export.h
+generic-y += fb.h
+generic-y += irq_regs.h
+generic-y += irq_work.h
+generic-y += kmap_types.h
+generic-y += local.h
+generic-y += local64.h
+generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
+generic-y += preempt.h
+generic-y += rwsem.h
+generic-y += trace_clock.h
+generic-y += unaligned.h
+generic-y += word-at-a-time.h
diff --git a/arch/s390/include/asm/airq.h b/arch/s390/include/asm/airq.h
new file mode 100644
index 000000000..fcf539efb
--- /dev/null
+++ b/arch/s390/include/asm/airq.h
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2002, 2007
+ * Author(s): Ingo Adlung <adlung@de.ibm.com>
+ * Cornelia Huck <cornelia.huck@de.ibm.com>
+ * Arnd Bergmann <arndb@de.ibm.com>
+ * Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_AIRQ_H
+#define _ASM_S390_AIRQ_H
+
+#include <linux/bit_spinlock.h>
+
+struct airq_struct {
+ struct hlist_node list; /* Handler queueing. */
+ void (*handler)(struct airq_struct *); /* Thin-interrupt handler */
+ u8 *lsi_ptr; /* Local-Summary-Indicator pointer */
+ u8 lsi_mask; /* Local-Summary-Indicator mask */
+ u8 isc; /* Interrupt-subclass */
+ u8 flags;
+};
+
+#define AIRQ_PTR_ALLOCATED 0x01
+
+int register_adapter_interrupt(struct airq_struct *airq);
+void unregister_adapter_interrupt(struct airq_struct *airq);
+
+/* Adapter interrupt bit vector */
+struct airq_iv {
+ unsigned long *vector; /* Adapter interrupt bit vector */
+ unsigned long *avail; /* Allocation bit mask for the bit vector */
+ unsigned long *bitlock; /* Lock bit mask for the bit vector */
+ unsigned long *ptr; /* Pointer associated with each bit */
+ unsigned int *data; /* 32 bit value associated with each bit */
+ unsigned long bits; /* Number of bits in the vector */
+ unsigned long end; /* Number of highest allocated bit + 1 */
+ spinlock_t lock; /* Lock to protect alloc & free */
+};
+
+#define AIRQ_IV_ALLOC 1 /* Use an allocation bit mask */
+#define AIRQ_IV_BITLOCK 2 /* Allocate the lock bit mask */
+#define AIRQ_IV_PTR 4 /* Allocate the ptr array */
+#define AIRQ_IV_DATA 8 /* Allocate the data array */
+
+struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags);
+void airq_iv_release(struct airq_iv *iv);
+unsigned long airq_iv_alloc(struct airq_iv *iv, unsigned long num);
+void airq_iv_free(struct airq_iv *iv, unsigned long bit, unsigned long num);
+unsigned long airq_iv_scan(struct airq_iv *iv, unsigned long start,
+ unsigned long end);
+
+static inline unsigned long airq_iv_alloc_bit(struct airq_iv *iv)
+{
+ return airq_iv_alloc(iv, 1);
+}
+
+static inline void airq_iv_free_bit(struct airq_iv *iv, unsigned long bit)
+{
+ airq_iv_free(iv, bit, 1);
+}
+
+static inline unsigned long airq_iv_end(struct airq_iv *iv)
+{
+ return iv->end;
+}
+
+static inline void airq_iv_lock(struct airq_iv *iv, unsigned long bit)
+{
+ const unsigned long be_to_le = BITS_PER_LONG - 1;
+ bit_spin_lock(bit ^ be_to_le, iv->bitlock);
+}
+
+static inline void airq_iv_unlock(struct airq_iv *iv, unsigned long bit)
+{
+ const unsigned long be_to_le = BITS_PER_LONG - 1;
+ bit_spin_unlock(bit ^ be_to_le, iv->bitlock);
+}
+
+static inline void airq_iv_set_data(struct airq_iv *iv, unsigned long bit,
+ unsigned int data)
+{
+ iv->data[bit] = data;
+}
+
+static inline unsigned int airq_iv_get_data(struct airq_iv *iv,
+ unsigned long bit)
+{
+ return iv->data[bit];
+}
+
+static inline void airq_iv_set_ptr(struct airq_iv *iv, unsigned long bit,
+ unsigned long ptr)
+{
+ iv->ptr[bit] = ptr;
+}
+
+static inline unsigned long airq_iv_get_ptr(struct airq_iv *iv,
+ unsigned long bit)
+{
+ return iv->ptr[bit];
+}
+
+#endif /* _ASM_S390_AIRQ_H */
diff --git a/arch/s390/include/asm/alternative-asm.h b/arch/s390/include/asm/alternative-asm.h
new file mode 100644
index 000000000..955d620db
--- /dev/null
+++ b/arch/s390/include/asm/alternative-asm.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_ALTERNATIVE_ASM_H
+#define _ASM_S390_ALTERNATIVE_ASM_H
+
+#ifdef __ASSEMBLY__
+
+/*
+ * Check the length of an instruction sequence. The length may not be larger
+ * than 254 bytes and it has to be divisible by 2.
+ */
+.macro alt_len_check start,end
+ .if ( \end - \start ) > 254
+ .error "cpu alternatives does not support instructions blocks > 254 bytes\n"
+ .endif
+ .if ( \end - \start ) % 2
+ .error "cpu alternatives instructions length is odd\n"
+ .endif
+.endm
+
+/*
+ * Issue one struct alt_instr descriptor entry (need to put it into
+ * the section .altinstructions, see below). This entry contains
+ * enough information for the alternatives patching code to patch an
+ * instruction. See apply_alternatives().
+ */
+.macro alt_entry orig_start, orig_end, alt_start, alt_end, feature
+ .long \orig_start - .
+ .long \alt_start - .
+ .word \feature
+ .byte \orig_end - \orig_start
+ .byte \alt_end - \alt_start
+.endm
+
+/*
+ * Fill up @bytes with nops. The macro emits 6-byte nop instructions
+ * for the bulk of the area, possibly followed by a 4-byte and/or
+ * a 2-byte nop if the size of the area is not divisible by 6.
+ */
+.macro alt_pad_fill bytes
+ .fill ( \bytes ) / 6, 6, 0xc0040000
+ .fill ( \bytes ) % 6 / 4, 4, 0x47000000
+ .fill ( \bytes ) % 6 % 4 / 2, 2, 0x0700
+.endm
+
+/*
+ * Fill up @bytes with nops. If the number of bytes is larger
+ * than 6, emit a jg instruction to branch over all nops, then
+ * fill an area of size (@bytes - 6) with nop instructions.
+ */
+.macro alt_pad bytes
+ .if ( \bytes > 0 )
+ .if ( \bytes > 6 )
+ jg . + \bytes
+ alt_pad_fill \bytes - 6
+ .else
+ alt_pad_fill \bytes
+ .endif
+ .endif
+.endm
+
+/*
+ * Define an alternative between two instructions. If @feature is
+ * present, early code in apply_alternatives() replaces @oldinstr with
+ * @newinstr. ".skip" directive takes care of proper instruction padding
+ * in case @newinstr is longer than @oldinstr.
+ */
+.macro ALTERNATIVE oldinstr, newinstr, feature
+ .pushsection .altinstr_replacement,"ax"
+770: \newinstr
+771: .popsection
+772: \oldinstr
+773: alt_len_check 770b, 771b
+ alt_len_check 772b, 773b
+ alt_pad ( ( 771b - 770b ) - ( 773b - 772b ) )
+774: .pushsection .altinstructions,"a"
+ alt_entry 772b, 774b, 770b, 771b, \feature
+ .popsection
+.endm
+
+/*
+ * Define an alternative between two instructions. If @feature is
+ * present, early code in apply_alternatives() replaces @oldinstr with
+ * @newinstr. ".skip" directive takes care of proper instruction padding
+ * in case @newinstr is longer than @oldinstr.
+ */
+.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
+ .pushsection .altinstr_replacement,"ax"
+770: \newinstr1
+771: \newinstr2
+772: .popsection
+773: \oldinstr
+774: alt_len_check 770b, 771b
+ alt_len_check 771b, 772b
+ alt_len_check 773b, 774b
+ .if ( 771b - 770b > 772b - 771b )
+ alt_pad ( ( 771b - 770b ) - ( 774b - 773b ) )
+ .else
+ alt_pad ( ( 772b - 771b ) - ( 774b - 773b ) )
+ .endif
+775: .pushsection .altinstructions,"a"
+ alt_entry 773b, 775b, 770b, 771b,\feature1
+ alt_entry 773b, 775b, 771b, 772b,\feature2
+ .popsection
+.endm
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_S390_ALTERNATIVE_ASM_H */
diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h
new file mode 100644
index 000000000..c2cf7bcde
--- /dev/null
+++ b/arch/s390/include/asm/alternative.h
@@ -0,0 +1,150 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_ALTERNATIVE_H
+#define _ASM_S390_ALTERNATIVE_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/stringify.h>
+
+struct alt_instr {
+ s32 instr_offset; /* original instruction */
+ s32 repl_offset; /* offset to replacement instruction */
+ u16 facility; /* facility bit set for replacement */
+ u8 instrlen; /* length of original instruction */
+ u8 replacementlen; /* length of new instruction */
+} __packed;
+
+void apply_alternative_instructions(void);
+void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
+
+/*
+ * |661: |662: |6620 |663:
+ * +-----------+---------------------+
+ * | oldinstr | oldinstr_padding |
+ * | +----------+----------+
+ * | | | |
+ * | | >6 bytes |6/4/2 nops|
+ * | |6 bytes jg----------->
+ * +-----------+---------------------+
+ * ^^ static padding ^^
+ *
+ * .altinstr_replacement section
+ * +---------------------+-----------+
+ * |6641: |6651:
+ * | alternative instr 1 |
+ * +-----------+---------+- - - - - -+
+ * |6642: |6652: |
+ * | alternative instr 2 | padding
+ * +---------------------+- - - - - -+
+ * ^ runtime ^
+ *
+ * .altinstructions section
+ * +---------------------------------+
+ * | alt_instr entries for each |
+ * | alternative instr |
+ * +---------------------------------+
+ */
+
+#define b_altinstr(num) "664"#num
+#define e_altinstr(num) "665"#num
+
+#define e_oldinstr_pad_end "663"
+#define oldinstr_len "662b-661b"
+#define oldinstr_total_len e_oldinstr_pad_end"b-661b"
+#define altinstr_len(num) e_altinstr(num)"b-"b_altinstr(num)"b"
+#define oldinstr_pad_len(num) \
+ "-(((" altinstr_len(num) ")-(" oldinstr_len ")) > 0) * " \
+ "((" altinstr_len(num) ")-(" oldinstr_len "))"
+
+#define INSTR_LEN_SANITY_CHECK(len) \
+ ".if " len " > 254\n" \
+ "\t.error \"cpu alternatives does not support instructions " \
+ "blocks > 254 bytes\"\n" \
+ ".endif\n" \
+ ".if (" len ") %% 2\n" \
+ "\t.error \"cpu alternatives instructions length is odd\"\n" \
+ ".endif\n"
+
+#define OLDINSTR_PADDING(oldinstr, num) \
+ ".if " oldinstr_pad_len(num) " > 6\n" \
+ "\tjg " e_oldinstr_pad_end "f\n" \
+ "6620:\n" \
+ "\t.fill (" oldinstr_pad_len(num) " - (6620b-662b)) / 2, 2, 0x0700\n" \
+ ".else\n" \
+ "\t.fill " oldinstr_pad_len(num) " / 6, 6, 0xc0040000\n" \
+ "\t.fill " oldinstr_pad_len(num) " %% 6 / 4, 4, 0x47000000\n" \
+ "\t.fill " oldinstr_pad_len(num) " %% 6 %% 4 / 2, 2, 0x0700\n" \
+ ".endif\n"
+
+#define OLDINSTR(oldinstr, num) \
+ "661:\n\t" oldinstr "\n662:\n" \
+ OLDINSTR_PADDING(oldinstr, num) \
+ e_oldinstr_pad_end ":\n" \
+ INSTR_LEN_SANITY_CHECK(oldinstr_len)
+
+#define OLDINSTR_2(oldinstr, num1, num2) \
+ "661:\n\t" oldinstr "\n662:\n" \
+ ".if " altinstr_len(num1) " < " altinstr_len(num2) "\n" \
+ OLDINSTR_PADDING(oldinstr, num2) \
+ ".else\n" \
+ OLDINSTR_PADDING(oldinstr, num1) \
+ ".endif\n" \
+ e_oldinstr_pad_end ":\n" \
+ INSTR_LEN_SANITY_CHECK(oldinstr_len)
+
+#define ALTINSTR_ENTRY(facility, num) \
+ "\t.long 661b - .\n" /* old instruction */ \
+ "\t.long " b_altinstr(num)"b - .\n" /* alt instruction */ \
+ "\t.word " __stringify(facility) "\n" /* facility bit */ \
+ "\t.byte " oldinstr_total_len "\n" /* source len */ \
+ "\t.byte " altinstr_len(num) "\n" /* alt instruction len */
+
+#define ALTINSTR_REPLACEMENT(altinstr, num) /* replacement */ \
+ b_altinstr(num)":\n\t" altinstr "\n" e_altinstr(num) ":\n" \
+ INSTR_LEN_SANITY_CHECK(altinstr_len(num))
+
+/* alternative assembly primitive: */
+#define ALTERNATIVE(oldinstr, altinstr, facility) \
+ ".pushsection .altinstr_replacement, \"ax\"\n" \
+ ALTINSTR_REPLACEMENT(altinstr, 1) \
+ ".popsection\n" \
+ OLDINSTR(oldinstr, 1) \
+ ".pushsection .altinstructions,\"a\"\n" \
+ ALTINSTR_ENTRY(facility, 1) \
+ ".popsection\n"
+
+#define ALTERNATIVE_2(oldinstr, altinstr1, facility1, altinstr2, facility2)\
+ ".pushsection .altinstr_replacement, \"ax\"\n" \
+ ALTINSTR_REPLACEMENT(altinstr1, 1) \
+ ALTINSTR_REPLACEMENT(altinstr2, 2) \
+ ".popsection\n" \
+ OLDINSTR_2(oldinstr, 1, 2) \
+ ".pushsection .altinstructions,\"a\"\n" \
+ ALTINSTR_ENTRY(facility1, 1) \
+ ALTINSTR_ENTRY(facility2, 2) \
+ ".popsection\n"
+
+/*
+ * Alternative instructions for different CPU types or capabilities.
+ *
+ * This allows to use optimized instructions even on generic binary
+ * kernels.
+ *
+ * oldinstr is padded with jump and nops at compile time if altinstr is
+ * longer. altinstr is padded with jump and nops at run-time during patching.
+ *
+ * For non barrier like inlines please define new variants
+ * without volatile and memory clobber.
+ */
+#define alternative(oldinstr, altinstr, facility) \
+ asm volatile(ALTERNATIVE(oldinstr, altinstr, facility) : : : "memory")
+
+#define alternative_2(oldinstr, altinstr1, facility1, altinstr2, facility2) \
+ asm volatile(ALTERNATIVE_2(oldinstr, altinstr1, facility1, \
+ altinstr2, facility2) ::: "memory")
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_S390_ALTERNATIVE_H */
diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h
new file mode 100644
index 000000000..1a6a7092d
--- /dev/null
+++ b/arch/s390/include/asm/ap.h
@@ -0,0 +1,363 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Adjunct processor (AP) interfaces
+ *
+ * Copyright IBM Corp. 2017
+ *
+ * Author(s): Tony Krowiak <akrowia@linux.vnet.ibm.com>
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>
+ * Harald Freudenberger <freude@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_AP_H_
+#define _ASM_S390_AP_H_
+
+/**
+ * The ap_qid_t identifier of an ap queue.
+ * If the AP facilities test (APFT) facility is available,
+ * card and queue index are 8 bit values, otherwise
+ * card index is 6 bit and queue index a 4 bit value.
+ */
+typedef unsigned int ap_qid_t;
+
+#define AP_MKQID(_card, _queue) (((_card) & 0xff) << 8 | ((_queue) & 0xff))
+#define AP_QID_CARD(_qid) (((_qid) >> 8) & 0xff)
+#define AP_QID_QUEUE(_qid) ((_qid) & 0xff)
+
+/**
+ * struct ap_queue_status - Holds the AP queue status.
+ * @queue_empty: Shows if queue is empty
+ * @replies_waiting: Waiting replies
+ * @queue_full: Is 1 if the queue is full
+ * @irq_enabled: Shows if interrupts are enabled for the AP
+ * @response_code: Holds the 8 bit response code
+ *
+ * The ap queue status word is returned by all three AP functions
+ * (PQAP, NQAP and DQAP). There's a set of flags in the first
+ * byte, followed by a 1 byte response code.
+ */
+struct ap_queue_status {
+ unsigned int queue_empty : 1;
+ unsigned int replies_waiting : 1;
+ unsigned int queue_full : 1;
+ unsigned int _pad1 : 4;
+ unsigned int irq_enabled : 1;
+ unsigned int response_code : 8;
+ unsigned int _pad2 : 16;
+};
+
+/**
+ * ap_intructions_available() - Test if AP instructions are available.
+ *
+ * Returns true if the AP instructions are installed, otherwise false.
+ */
+static inline bool ap_instructions_available(void)
+{
+ register unsigned long reg0 asm ("0") = AP_MKQID(0, 0);
+ register unsigned long reg1 asm ("1") = 0;
+ register unsigned long reg2 asm ("2") = 0;
+
+ asm volatile(
+ " .long 0xb2af0000\n" /* PQAP(TAPQ) */
+ "0: la %0,1\n"
+ "1:\n"
+ EX_TABLE(0b, 1b)
+ : "+d" (reg1), "+d" (reg2)
+ : "d" (reg0)
+ : "cc");
+ return reg1 != 0;
+}
+
+/**
+ * ap_tapq(): Test adjunct processor queue.
+ * @qid: The AP queue number
+ * @info: Pointer to queue descriptor
+ *
+ * Returns AP queue status structure.
+ */
+static inline struct ap_queue_status ap_tapq(ap_qid_t qid, unsigned long *info)
+{
+ register unsigned long reg0 asm ("0") = qid;
+ register struct ap_queue_status reg1 asm ("1");
+ register unsigned long reg2 asm ("2");
+
+ asm volatile(".long 0xb2af0000" /* PQAP(TAPQ) */
+ : "=d" (reg1), "=d" (reg2)
+ : "d" (reg0)
+ : "cc");
+ if (info)
+ *info = reg2;
+ return reg1;
+}
+
+/**
+ * ap_test_queue(): Test adjunct processor queue.
+ * @qid: The AP queue number
+ * @tbit: Test facilities bit
+ * @info: Pointer to queue descriptor
+ *
+ * Returns AP queue status structure.
+ */
+static inline struct ap_queue_status ap_test_queue(ap_qid_t qid,
+ int tbit,
+ unsigned long *info)
+{
+ if (tbit)
+ qid |= 1UL << 23; /* set T bit*/
+ return ap_tapq(qid, info);
+}
+
+/**
+ * ap_pqap_rapq(): Reset adjunct processor queue.
+ * @qid: The AP queue number
+ *
+ * Returns AP queue status structure.
+ */
+static inline struct ap_queue_status ap_rapq(ap_qid_t qid)
+{
+ register unsigned long reg0 asm ("0") = qid | (1UL << 24);
+ register struct ap_queue_status reg1 asm ("1");
+
+ asm volatile(
+ ".long 0xb2af0000" /* PQAP(RAPQ) */
+ : "=d" (reg1)
+ : "d" (reg0)
+ : "cc");
+ return reg1;
+}
+
+/**
+ * ap_pqap_zapq(): Reset and zeroize adjunct processor queue.
+ * @qid: The AP queue number
+ *
+ * Returns AP queue status structure.
+ */
+static inline struct ap_queue_status ap_zapq(ap_qid_t qid)
+{
+ register unsigned long reg0 asm ("0") = qid | (2UL << 24);
+ register struct ap_queue_status reg1 asm ("1");
+
+ asm volatile(
+ ".long 0xb2af0000" /* PQAP(ZAPQ) */
+ : "=d" (reg1)
+ : "d" (reg0)
+ : "cc");
+ return reg1;
+}
+
+/**
+ * struct ap_config_info - convenience struct for AP crypto
+ * config info as returned by the ap_qci() function.
+ */
+struct ap_config_info {
+ unsigned int apsc : 1; /* S bit */
+ unsigned int apxa : 1; /* N bit */
+ unsigned int qact : 1; /* C bit */
+ unsigned int rc8a : 1; /* R bit */
+ unsigned char _reserved1 : 4;
+ unsigned char _reserved2[3];
+ unsigned char Na; /* max # of APs - 1 */
+ unsigned char Nd; /* max # of Domains - 1 */
+ unsigned char _reserved3[10];
+ unsigned int apm[8]; /* AP ID mask */
+ unsigned int aqm[8]; /* AP queue mask */
+ unsigned int adm[8]; /* AP domain mask */
+ unsigned char _reserved4[16];
+} __aligned(8);
+
+/**
+ * ap_qci(): Get AP configuration data
+ *
+ * Returns 0 on success, or -EOPNOTSUPP.
+ */
+static inline int ap_qci(struct ap_config_info *config)
+{
+ register unsigned long reg0 asm ("0") = 4UL << 24;
+ register unsigned long reg1 asm ("1") = -EOPNOTSUPP;
+ register struct ap_config_info *reg2 asm ("2") = config;
+
+ asm volatile(
+ ".long 0xb2af0000\n" /* PQAP(QCI) */
+ "0: la %0,0\n"
+ "1:\n"
+ EX_TABLE(0b, 1b)
+ : "+d" (reg1)
+ : "d" (reg0), "d" (reg2)
+ : "cc", "memory");
+
+ return reg1;
+}
+
+/*
+ * struct ap_qirq_ctrl - convenient struct for easy invocation
+ * of the ap_aqic() function. This struct is passed as GR1
+ * parameter to the PQAP(AQIC) instruction. For details please
+ * see the AR documentation.
+ */
+struct ap_qirq_ctrl {
+ unsigned int _res1 : 8;
+ unsigned int zone : 8; /* zone info */
+ unsigned int ir : 1; /* ir flag: enable (1) or disable (0) irq */
+ unsigned int _res2 : 4;
+ unsigned int gisc : 3; /* guest isc field */
+ unsigned int _res3 : 6;
+ unsigned int gf : 2; /* gisa format */
+ unsigned int _res4 : 1;
+ unsigned int gisa : 27; /* gisa origin */
+ unsigned int _res5 : 1;
+ unsigned int isc : 3; /* irq sub class */
+};
+
+/**
+ * ap_aqic(): Control interruption for a specific AP.
+ * @qid: The AP queue number
+ * @qirqctrl: struct ap_qirq_ctrl (64 bit value)
+ * @ind: The notification indicator byte
+ *
+ * Returns AP queue status.
+ */
+static inline struct ap_queue_status ap_aqic(ap_qid_t qid,
+ struct ap_qirq_ctrl qirqctrl,
+ void *ind)
+{
+ register unsigned long reg0 asm ("0") = qid | (3UL << 24);
+ register union {
+ unsigned long value;
+ struct ap_qirq_ctrl qirqctrl;
+ struct ap_queue_status status;
+ } reg1 asm ("1");
+ register void *reg2 asm ("2") = ind;
+
+ reg1.qirqctrl = qirqctrl;
+
+ asm volatile(
+ ".long 0xb2af0000" /* PQAP(AQIC) */
+ : "+d" (reg1)
+ : "d" (reg0), "d" (reg2)
+ : "cc");
+
+ return reg1.status;
+}
+
+/*
+ * union ap_qact_ap_info - used together with the
+ * ap_aqic() function to provide a convenient way
+ * to handle the ap info needed by the qact function.
+ */
+union ap_qact_ap_info {
+ unsigned long val;
+ struct {
+ unsigned int : 3;
+ unsigned int mode : 3;
+ unsigned int : 26;
+ unsigned int cat : 8;
+ unsigned int : 8;
+ unsigned char ver[2];
+ };
+};
+
+/**
+ * ap_qact(): Query AP combatibility type.
+ * @qid: The AP queue number
+ * @apinfo: On input the info about the AP queue. On output the
+ * alternate AP queue info provided by the qact function
+ * in GR2 is stored in.
+ *
+ * Returns AP queue status. Check response_code field for failures.
+ */
+static inline struct ap_queue_status ap_qact(ap_qid_t qid, int ifbit,
+ union ap_qact_ap_info *apinfo)
+{
+ register unsigned long reg0 asm ("0") = qid | (5UL << 24)
+ | ((ifbit & 0x01) << 22);
+ register union {
+ unsigned long value;
+ struct ap_queue_status status;
+ } reg1 asm ("1");
+ register unsigned long reg2 asm ("2");
+
+ reg1.value = apinfo->val;
+
+ asm volatile(
+ ".long 0xb2af0000" /* PQAP(QACT) */
+ : "+d" (reg1), "=d" (reg2)
+ : "d" (reg0)
+ : "cc");
+ apinfo->val = reg2;
+ return reg1.status;
+}
+
+/**
+ * ap_nqap(): Send message to adjunct processor queue.
+ * @qid: The AP queue number
+ * @psmid: The program supplied message identifier
+ * @msg: The message text
+ * @length: The message length
+ *
+ * Returns AP queue status structure.
+ * Condition code 1 on NQAP can't happen because the L bit is 1.
+ * Condition code 2 on NQAP also means the send is incomplete,
+ * because a segment boundary was reached. The NQAP is repeated.
+ */
+static inline struct ap_queue_status ap_nqap(ap_qid_t qid,
+ unsigned long long psmid,
+ void *msg, size_t length)
+{
+ register unsigned long reg0 asm ("0") = qid | 0x40000000UL;
+ register struct ap_queue_status reg1 asm ("1");
+ register unsigned long reg2 asm ("2") = (unsigned long) msg;
+ register unsigned long reg3 asm ("3") = (unsigned long) length;
+ register unsigned long reg4 asm ("4") = (unsigned int) (psmid >> 32);
+ register unsigned long reg5 asm ("5") = psmid & 0xffffffff;
+
+ asm volatile (
+ "0: .long 0xb2ad0042\n" /* NQAP */
+ " brc 2,0b"
+ : "+d" (reg0), "=d" (reg1), "+d" (reg2), "+d" (reg3)
+ : "d" (reg4), "d" (reg5)
+ : "cc", "memory");
+ return reg1;
+}
+
+/**
+ * ap_dqap(): Receive message from adjunct processor queue.
+ * @qid: The AP queue number
+ * @psmid: Pointer to program supplied message identifier
+ * @msg: The message text
+ * @length: The message length
+ *
+ * Returns AP queue status structure.
+ * Condition code 1 on DQAP means the receive has taken place
+ * but only partially. The response is incomplete, hence the
+ * DQAP is repeated.
+ * Condition code 2 on DQAP also means the receive is incomplete,
+ * this time because a segment boundary was reached. Again, the
+ * DQAP is repeated.
+ * Note that gpr2 is used by the DQAP instruction to keep track of
+ * any 'residual' length, in case the instruction gets interrupted.
+ * Hence it gets zeroed before the instruction.
+ */
+static inline struct ap_queue_status ap_dqap(ap_qid_t qid,
+ unsigned long long *psmid,
+ void *msg, size_t length)
+{
+ register unsigned long reg0 asm("0") = qid | 0x80000000UL;
+ register struct ap_queue_status reg1 asm ("1");
+ register unsigned long reg2 asm("2") = 0UL;
+ register unsigned long reg4 asm("4") = (unsigned long) msg;
+ register unsigned long reg5 asm("5") = (unsigned long) length;
+ register unsigned long reg6 asm("6") = 0UL;
+ register unsigned long reg7 asm("7") = 0UL;
+
+
+ asm volatile(
+ "0: .long 0xb2ae0064\n" /* DQAP */
+ " brc 6,0b\n"
+ : "+d" (reg0), "=d" (reg1), "+d" (reg2),
+ "+d" (reg4), "+d" (reg5), "+d" (reg6), "+d" (reg7)
+ : : "cc", "memory");
+ *psmid = (((unsigned long long) reg6) << 32) + reg7;
+ return reg1;
+}
+
+#endif /* _ASM_S390_AP_H_ */
diff --git a/arch/s390/include/asm/appldata.h b/arch/s390/include/asm/appldata.h
new file mode 100644
index 000000000..4afbb5938
--- /dev/null
+++ b/arch/s390/include/asm/appldata.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2006
+ *
+ * Author(s): Melissa Howland <melissah@us.ibm.com>
+ */
+
+#ifndef _ASM_S390_APPLDATA_H
+#define _ASM_S390_APPLDATA_H
+
+#include <asm/diag.h>
+#include <asm/io.h>
+
+#define APPLDATA_START_INTERVAL_REC 0x80
+#define APPLDATA_STOP_REC 0x81
+#define APPLDATA_GEN_EVENT_REC 0x82
+#define APPLDATA_START_CONFIG_REC 0x83
+
+/*
+ * Parameter list for DIAGNOSE X'DC'
+ */
+struct appldata_parameter_list {
+ u16 diag;
+ u8 function;
+ u8 parlist_length;
+ u32 unused01;
+ u16 reserved;
+ u16 buffer_length;
+ u32 unused02;
+ u64 product_id_addr;
+ u64 buffer_addr;
+} __attribute__ ((packed));
+
+struct appldata_product_id {
+ char prod_nr[7]; /* product number */
+ u16 prod_fn; /* product function */
+ u8 record_nr; /* record number */
+ u16 version_nr; /* version */
+ u16 release_nr; /* release */
+ u16 mod_lvl; /* modification level */
+} __attribute__ ((packed));
+
+static inline int appldata_asm(struct appldata_product_id *id,
+ unsigned short fn, void *buffer,
+ unsigned short length)
+{
+ struct appldata_parameter_list parm_list;
+ int ry;
+
+ if (!MACHINE_IS_VM)
+ return -EOPNOTSUPP;
+ parm_list.diag = 0xdc;
+ parm_list.function = fn;
+ parm_list.parlist_length = sizeof(parm_list);
+ parm_list.buffer_length = length;
+ parm_list.product_id_addr = (unsigned long) id;
+ parm_list.buffer_addr = virt_to_phys(buffer);
+ diag_stat_inc(DIAG_STAT_X0DC);
+ asm volatile(
+ " diag %1,%0,0xdc"
+ : "=d" (ry)
+ : "d" (&parm_list), "m" (parm_list), "m" (*id)
+ : "cc");
+ return ry;
+}
+
+#endif /* _ASM_S390_APPLDATA_H */
diff --git a/arch/s390/include/asm/archrandom.h b/arch/s390/include/asm/archrandom.h
new file mode 100644
index 000000000..9a6835137
--- /dev/null
+++ b/arch/s390/include/asm/archrandom.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Kernel interface for the s390 arch_random_* functions
+ *
+ * Copyright IBM Corp. 2017
+ *
+ * Author: Harald Freudenberger <freude@de.ibm.com>
+ *
+ */
+
+#ifndef _ASM_S390_ARCHRANDOM_H
+#define _ASM_S390_ARCHRANDOM_H
+
+#ifdef CONFIG_ARCH_RANDOM
+
+#include <linux/static_key.h>
+#include <linux/atomic.h>
+
+DECLARE_STATIC_KEY_FALSE(s390_arch_random_available);
+extern atomic64_t s390_arch_random_counter;
+
+bool s390_arch_random_generate(u8 *buf, unsigned int nbytes);
+
+static inline bool arch_get_random_long(unsigned long *v)
+{
+ return false;
+}
+
+static inline bool arch_get_random_int(unsigned int *v)
+{
+ return false;
+}
+
+static inline bool arch_get_random_seed_long(unsigned long *v)
+{
+ if (static_branch_likely(&s390_arch_random_available)) {
+ return s390_arch_random_generate((u8 *)v, sizeof(*v));
+ }
+ return false;
+}
+
+static inline bool arch_get_random_seed_int(unsigned int *v)
+{
+ if (static_branch_likely(&s390_arch_random_available)) {
+ return s390_arch_random_generate((u8 *)v, sizeof(*v));
+ }
+ return false;
+}
+
+#endif /* CONFIG_ARCH_RANDOM */
+#endif /* _ASM_S390_ARCHRANDOM_H */
diff --git a/arch/s390/include/asm/asm-prototypes.h b/arch/s390/include/asm/asm-prototypes.h
new file mode 100644
index 000000000..c37eb921b
--- /dev/null
+++ b/arch/s390/include/asm/asm-prototypes.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_PROTOTYPES_H
+
+#include <linux/kvm_host.h>
+#include <linux/ftrace.h>
+#include <asm/fpu/api.h>
+#include <asm-generic/asm-prototypes.h>
+
+#endif /* _ASM_S390_PROTOTYPES_H */
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
new file mode 100644
index 000000000..fd20ab5d4
--- /dev/null
+++ b/arch/s390/include/asm/atomic.h
@@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 1999, 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ * Denis Joseph Barrow,
+ * Arnd Bergmann,
+ */
+
+#ifndef __ARCH_S390_ATOMIC__
+#define __ARCH_S390_ATOMIC__
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <asm/atomic_ops.h>
+#include <asm/barrier.h>
+#include <asm/cmpxchg.h>
+
+#define ATOMIC_INIT(i) { (i) }
+
+static inline int atomic_read(const atomic_t *v)
+{
+ int c;
+
+ asm volatile(
+ " l %0,%1\n"
+ : "=d" (c) : "Q" (v->counter));
+ return c;
+}
+
+static inline void atomic_set(atomic_t *v, int i)
+{
+ asm volatile(
+ " st %1,%0\n"
+ : "=Q" (v->counter) : "d" (i));
+}
+
+static inline int atomic_add_return(int i, atomic_t *v)
+{
+ return __atomic_add_barrier(i, &v->counter) + i;
+}
+
+static inline int atomic_fetch_add(int i, atomic_t *v)
+{
+ return __atomic_add_barrier(i, &v->counter);
+}
+
+static inline void atomic_add(int i, atomic_t *v)
+{
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+ if (__builtin_constant_p(i) && (i > -129) && (i < 128)) {
+ __atomic_add_const(i, &v->counter);
+ return;
+ }
+#endif
+ __atomic_add(i, &v->counter);
+}
+
+#define atomic_sub(_i, _v) atomic_add(-(int)(_i), _v)
+#define atomic_sub_return(_i, _v) atomic_add_return(-(int)(_i), _v)
+#define atomic_fetch_sub(_i, _v) atomic_fetch_add(-(int)(_i), _v)
+
+#define ATOMIC_OPS(op) \
+static inline void atomic_##op(int i, atomic_t *v) \
+{ \
+ __atomic_##op(i, &v->counter); \
+} \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ return __atomic_##op##_barrier(i, &v->counter); \
+}
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
+
+#undef ATOMIC_OPS
+
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+
+static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
+{
+ return __atomic_cmpxchg(&v->counter, old, new);
+}
+
+#define ATOMIC64_INIT(i) { (i) }
+
+static inline long atomic64_read(const atomic64_t *v)
+{
+ long c;
+
+ asm volatile(
+ " lg %0,%1\n"
+ : "=d" (c) : "Q" (v->counter));
+ return c;
+}
+
+static inline void atomic64_set(atomic64_t *v, long i)
+{
+ asm volatile(
+ " stg %1,%0\n"
+ : "=Q" (v->counter) : "d" (i));
+}
+
+static inline long atomic64_add_return(long i, atomic64_t *v)
+{
+ return __atomic64_add_barrier(i, &v->counter) + i;
+}
+
+static inline long atomic64_fetch_add(long i, atomic64_t *v)
+{
+ return __atomic64_add_barrier(i, &v->counter);
+}
+
+static inline void atomic64_add(long i, atomic64_t *v)
+{
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+ if (__builtin_constant_p(i) && (i > -129) && (i < 128)) {
+ __atomic64_add_const(i, &v->counter);
+ return;
+ }
+#endif
+ __atomic64_add(i, &v->counter);
+}
+
+#define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
+
+static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new)
+{
+ return __atomic64_cmpxchg(&v->counter, old, new);
+}
+
+#define ATOMIC64_OPS(op) \
+static inline void atomic64_##op(long i, atomic64_t *v) \
+{ \
+ __atomic64_##op(i, &v->counter); \
+} \
+static inline long atomic64_fetch_##op(long i, atomic64_t *v) \
+{ \
+ return __atomic64_##op##_barrier(i, &v->counter); \
+}
+
+ATOMIC64_OPS(and)
+ATOMIC64_OPS(or)
+ATOMIC64_OPS(xor)
+
+#undef ATOMIC64_OPS
+
+#define atomic64_sub_return(_i, _v) atomic64_add_return(-(long)(_i), _v)
+#define atomic64_fetch_sub(_i, _v) atomic64_fetch_add(-(long)(_i), _v)
+#define atomic64_sub(_i, _v) atomic64_add(-(long)(_i), _v)
+
+#endif /* __ARCH_S390_ATOMIC__ */
diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h
new file mode 100644
index 000000000..d3f09526e
--- /dev/null
+++ b/arch/s390/include/asm/atomic_ops.h
@@ -0,0 +1,143 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Low level function for atomic operations
+ *
+ * Copyright IBM Corp. 1999, 2016
+ */
+
+#ifndef __ARCH_S390_ATOMIC_OPS__
+#define __ARCH_S390_ATOMIC_OPS__
+
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+
+#define __ATOMIC_OP(op_name, op_type, op_string, op_barrier) \
+static inline op_type op_name(op_type val, op_type *ptr) \
+{ \
+ op_type old; \
+ \
+ asm volatile( \
+ op_string " %[old],%[val],%[ptr]\n" \
+ op_barrier \
+ : [old] "=d" (old), [ptr] "+Q" (*ptr) \
+ : [val] "d" (val) : "cc", "memory"); \
+ return old; \
+} \
+
+#define __ATOMIC_OPS(op_name, op_type, op_string) \
+ __ATOMIC_OP(op_name, op_type, op_string, "\n") \
+ __ATOMIC_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n")
+
+__ATOMIC_OPS(__atomic_add, int, "laa")
+__ATOMIC_OPS(__atomic_and, int, "lan")
+__ATOMIC_OPS(__atomic_or, int, "lao")
+__ATOMIC_OPS(__atomic_xor, int, "lax")
+
+__ATOMIC_OPS(__atomic64_add, long, "laag")
+__ATOMIC_OPS(__atomic64_and, long, "lang")
+__ATOMIC_OPS(__atomic64_or, long, "laog")
+__ATOMIC_OPS(__atomic64_xor, long, "laxg")
+
+#undef __ATOMIC_OPS
+#undef __ATOMIC_OP
+
+#define __ATOMIC_CONST_OP(op_name, op_type, op_string, op_barrier) \
+static inline void op_name(op_type val, op_type *ptr) \
+{ \
+ asm volatile( \
+ op_string " %[ptr],%[val]\n" \
+ op_barrier \
+ : [ptr] "+Q" (*ptr) : [val] "i" (val) : "cc", "memory");\
+}
+
+#define __ATOMIC_CONST_OPS(op_name, op_type, op_string) \
+ __ATOMIC_CONST_OP(op_name, op_type, op_string, "\n") \
+ __ATOMIC_CONST_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n")
+
+__ATOMIC_CONST_OPS(__atomic_add_const, int, "asi")
+__ATOMIC_CONST_OPS(__atomic64_add_const, long, "agsi")
+
+#undef __ATOMIC_CONST_OPS
+#undef __ATOMIC_CONST_OP
+
+#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+#define __ATOMIC_OP(op_name, op_string) \
+static inline int op_name(int val, int *ptr) \
+{ \
+ int old, new; \
+ \
+ asm volatile( \
+ "0: lr %[new],%[old]\n" \
+ op_string " %[new],%[val]\n" \
+ " cs %[old],%[new],%[ptr]\n" \
+ " jl 0b" \
+ : [old] "=d" (old), [new] "=&d" (new), [ptr] "+Q" (*ptr)\
+ : [val] "d" (val), "0" (*ptr) : "cc", "memory"); \
+ return old; \
+}
+
+#define __ATOMIC_OPS(op_name, op_string) \
+ __ATOMIC_OP(op_name, op_string) \
+ __ATOMIC_OP(op_name##_barrier, op_string)
+
+__ATOMIC_OPS(__atomic_add, "ar")
+__ATOMIC_OPS(__atomic_and, "nr")
+__ATOMIC_OPS(__atomic_or, "or")
+__ATOMIC_OPS(__atomic_xor, "xr")
+
+#undef __ATOMIC_OPS
+
+#define __ATOMIC64_OP(op_name, op_string) \
+static inline long op_name(long val, long *ptr) \
+{ \
+ long old, new; \
+ \
+ asm volatile( \
+ "0: lgr %[new],%[old]\n" \
+ op_string " %[new],%[val]\n" \
+ " csg %[old],%[new],%[ptr]\n" \
+ " jl 0b" \
+ : [old] "=d" (old), [new] "=&d" (new), [ptr] "+Q" (*ptr)\
+ : [val] "d" (val), "0" (*ptr) : "cc", "memory"); \
+ return old; \
+}
+
+#define __ATOMIC64_OPS(op_name, op_string) \
+ __ATOMIC64_OP(op_name, op_string) \
+ __ATOMIC64_OP(op_name##_barrier, op_string)
+
+__ATOMIC64_OPS(__atomic64_add, "agr")
+__ATOMIC64_OPS(__atomic64_and, "ngr")
+__ATOMIC64_OPS(__atomic64_or, "ogr")
+__ATOMIC64_OPS(__atomic64_xor, "xgr")
+
+#undef __ATOMIC64_OPS
+
+#define __atomic_add_const(val, ptr) __atomic_add(val, ptr)
+#define __atomic_add_const_barrier(val, ptr) __atomic_add(val, ptr)
+#define __atomic64_add_const(val, ptr) __atomic64_add(val, ptr)
+#define __atomic64_add_const_barrier(val, ptr) __atomic64_add(val, ptr)
+
+#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+static inline int __atomic_cmpxchg(int *ptr, int old, int new)
+{
+ return __sync_val_compare_and_swap(ptr, old, new);
+}
+
+static inline int __atomic_cmpxchg_bool(int *ptr, int old, int new)
+{
+ return __sync_bool_compare_and_swap(ptr, old, new);
+}
+
+static inline long __atomic64_cmpxchg(long *ptr, long old, long new)
+{
+ return __sync_val_compare_and_swap(ptr, old, new);
+}
+
+static inline long __atomic64_cmpxchg_bool(long *ptr, long old, long new)
+{
+ return __sync_bool_compare_and_swap(ptr, old, new);
+}
+
+#endif /* __ARCH_S390_ATOMIC_OPS__ */
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
new file mode 100644
index 000000000..f9eddbca7
--- /dev/null
+++ b/arch/s390/include/asm/barrier.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 1999, 2009
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef __ASM_BARRIER_H
+#define __ASM_BARRIER_H
+
+/*
+ * Force strict CPU ordering.
+ * And yes, this is required on UP too when we're talking
+ * to devices.
+ */
+
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+/* Fast-BCR without checkpoint synchronization */
+#define __ASM_BARRIER "bcr 14,0\n"
+#else
+#define __ASM_BARRIER "bcr 15,0\n"
+#endif
+
+#define mb() do { asm volatile(__ASM_BARRIER : : : "memory"); } while (0)
+
+#define rmb() barrier()
+#define wmb() barrier()
+#define dma_rmb() mb()
+#define dma_wmb() mb()
+#define __smp_mb() mb()
+#define __smp_rmb() rmb()
+#define __smp_wmb() wmb()
+
+#define __smp_store_release(p, v) \
+do { \
+ compiletime_assert_atomic_type(*p); \
+ barrier(); \
+ WRITE_ONCE(*p, v); \
+} while (0)
+
+#define __smp_load_acquire(p) \
+({ \
+ typeof(*p) ___p1 = READ_ONCE(*p); \
+ compiletime_assert_atomic_type(*p); \
+ barrier(); \
+ ___p1; \
+})
+
+#define __smp_mb__before_atomic() barrier()
+#define __smp_mb__after_atomic() barrier()
+
+/**
+ * array_index_mask_nospec - generate a mask for array_idx() that is
+ * ~0UL when the bounds check succeeds and 0 otherwise
+ * @index: array element index
+ * @size: number of elements in array
+ */
+#define array_index_mask_nospec array_index_mask_nospec
+static inline unsigned long array_index_mask_nospec(unsigned long index,
+ unsigned long size)
+{
+ unsigned long mask;
+
+ if (__builtin_constant_p(size) && size > 0) {
+ asm(" clgr %2,%1\n"
+ " slbgr %0,%0\n"
+ :"=d" (mask) : "d" (size-1), "d" (index) :"cc");
+ return mask;
+ }
+ asm(" clgr %1,%2\n"
+ " slbgr %0,%0\n"
+ :"=d" (mask) : "d" (size), "d" (index) :"cc");
+ return ~mask;
+}
+
+#include <asm-generic/barrier.h>
+
+#endif /* __ASM_BARRIER_H */
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
new file mode 100644
index 000000000..86e5b2fde
--- /dev/null
+++ b/arch/s390/include/asm/bitops.h
@@ -0,0 +1,422 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 1999,2013
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ *
+ * The description below was taken in large parts from the powerpc
+ * bitops header file:
+ * Within a word, bits are numbered LSB first. Lot's of places make
+ * this assumption by directly testing bits with (val & (1<<nr)).
+ * This can cause confusion for large (> 1 word) bitmaps on a
+ * big-endian system because, unlike little endian, the number of each
+ * bit depends on the word size.
+ *
+ * The bitop functions are defined to work on unsigned longs, so the bits
+ * end up numbered:
+ * |63..............0|127............64|191...........128|255...........192|
+ *
+ * We also have special functions which work with an MSB0 encoding.
+ * The bits are numbered:
+ * |0..............63|64............127|128...........191|192...........255|
+ *
+ * The main difference is that bit 0-63 in the bit number field needs to be
+ * reversed compared to the LSB0 encoded bit fields. This can be achieved by
+ * XOR with 0x3f.
+ *
+ */
+
+#ifndef _S390_BITOPS_H
+#define _S390_BITOPS_H
+
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
+#include <linux/typecheck.h>
+#include <linux/compiler.h>
+#include <asm/atomic_ops.h>
+#include <asm/barrier.h>
+
+#define __BITOPS_WORDS(bits) (((bits) + BITS_PER_LONG - 1) / BITS_PER_LONG)
+
+static inline unsigned long *
+__bitops_word(unsigned long nr, volatile unsigned long *ptr)
+{
+ unsigned long addr;
+
+ addr = (unsigned long)ptr + ((nr ^ (nr & (BITS_PER_LONG - 1))) >> 3);
+ return (unsigned long *)addr;
+}
+
+static inline unsigned char *
+__bitops_byte(unsigned long nr, volatile unsigned long *ptr)
+{
+ return ((unsigned char *)ptr) + ((nr ^ (BITS_PER_LONG - 8)) >> 3);
+}
+
+static inline void set_bit(unsigned long nr, volatile unsigned long *ptr)
+{
+ unsigned long *addr = __bitops_word(nr, ptr);
+ unsigned long mask;
+
+#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
+ if (__builtin_constant_p(nr)) {
+ unsigned char *caddr = __bitops_byte(nr, ptr);
+
+ asm volatile(
+ "oi %0,%b1\n"
+ : "+Q" (*caddr)
+ : "i" (1 << (nr & 7))
+ : "cc", "memory");
+ return;
+ }
+#endif
+ mask = 1UL << (nr & (BITS_PER_LONG - 1));
+ __atomic64_or(mask, addr);
+}
+
+static inline void clear_bit(unsigned long nr, volatile unsigned long *ptr)
+{
+ unsigned long *addr = __bitops_word(nr, ptr);
+ unsigned long mask;
+
+#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
+ if (__builtin_constant_p(nr)) {
+ unsigned char *caddr = __bitops_byte(nr, ptr);
+
+ asm volatile(
+ "ni %0,%b1\n"
+ : "+Q" (*caddr)
+ : "i" (~(1 << (nr & 7)))
+ : "cc", "memory");
+ return;
+ }
+#endif
+ mask = ~(1UL << (nr & (BITS_PER_LONG - 1)));
+ __atomic64_and(mask, addr);
+}
+
+static inline void change_bit(unsigned long nr, volatile unsigned long *ptr)
+{
+ unsigned long *addr = __bitops_word(nr, ptr);
+ unsigned long mask;
+
+#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
+ if (__builtin_constant_p(nr)) {
+ unsigned char *caddr = __bitops_byte(nr, ptr);
+
+ asm volatile(
+ "xi %0,%b1\n"
+ : "+Q" (*caddr)
+ : "i" (1 << (nr & 7))
+ : "cc", "memory");
+ return;
+ }
+#endif
+ mask = 1UL << (nr & (BITS_PER_LONG - 1));
+ __atomic64_xor(mask, addr);
+}
+
+static inline int
+test_and_set_bit(unsigned long nr, volatile unsigned long *ptr)
+{
+ unsigned long *addr = __bitops_word(nr, ptr);
+ unsigned long old, mask;
+
+ mask = 1UL << (nr & (BITS_PER_LONG - 1));
+ old = __atomic64_or_barrier(mask, addr);
+ return (old & mask) != 0;
+}
+
+static inline int
+test_and_clear_bit(unsigned long nr, volatile unsigned long *ptr)
+{
+ unsigned long *addr = __bitops_word(nr, ptr);
+ unsigned long old, mask;
+
+ mask = ~(1UL << (nr & (BITS_PER_LONG - 1)));
+ old = __atomic64_and_barrier(mask, addr);
+ return (old & ~mask) != 0;
+}
+
+static inline int
+test_and_change_bit(unsigned long nr, volatile unsigned long *ptr)
+{
+ unsigned long *addr = __bitops_word(nr, ptr);
+ unsigned long old, mask;
+
+ mask = 1UL << (nr & (BITS_PER_LONG - 1));
+ old = __atomic64_xor_barrier(mask, addr);
+ return (old & mask) != 0;
+}
+
+static inline void __set_bit(unsigned long nr, volatile unsigned long *ptr)
+{
+ unsigned char *addr = __bitops_byte(nr, ptr);
+
+ *addr |= 1 << (nr & 7);
+}
+
+static inline void
+__clear_bit(unsigned long nr, volatile unsigned long *ptr)
+{
+ unsigned char *addr = __bitops_byte(nr, ptr);
+
+ *addr &= ~(1 << (nr & 7));
+}
+
+static inline void __change_bit(unsigned long nr, volatile unsigned long *ptr)
+{
+ unsigned char *addr = __bitops_byte(nr, ptr);
+
+ *addr ^= 1 << (nr & 7);
+}
+
+static inline int
+__test_and_set_bit(unsigned long nr, volatile unsigned long *ptr)
+{
+ unsigned char *addr = __bitops_byte(nr, ptr);
+ unsigned char ch;
+
+ ch = *addr;
+ *addr |= 1 << (nr & 7);
+ return (ch >> (nr & 7)) & 1;
+}
+
+static inline int
+__test_and_clear_bit(unsigned long nr, volatile unsigned long *ptr)
+{
+ unsigned char *addr = __bitops_byte(nr, ptr);
+ unsigned char ch;
+
+ ch = *addr;
+ *addr &= ~(1 << (nr & 7));
+ return (ch >> (nr & 7)) & 1;
+}
+
+static inline int
+__test_and_change_bit(unsigned long nr, volatile unsigned long *ptr)
+{
+ unsigned char *addr = __bitops_byte(nr, ptr);
+ unsigned char ch;
+
+ ch = *addr;
+ *addr ^= 1 << (nr & 7);
+ return (ch >> (nr & 7)) & 1;
+}
+
+static inline int test_bit(unsigned long nr, const volatile unsigned long *ptr)
+{
+ const volatile unsigned char *addr;
+
+ addr = ((const volatile unsigned char *)ptr);
+ addr += (nr ^ (BITS_PER_LONG - 8)) >> 3;
+ return (*addr >> (nr & 7)) & 1;
+}
+
+static inline int test_and_set_bit_lock(unsigned long nr,
+ volatile unsigned long *ptr)
+{
+ if (test_bit(nr, ptr))
+ return 1;
+ return test_and_set_bit(nr, ptr);
+}
+
+static inline void clear_bit_unlock(unsigned long nr,
+ volatile unsigned long *ptr)
+{
+ smp_mb__before_atomic();
+ clear_bit(nr, ptr);
+}
+
+static inline void __clear_bit_unlock(unsigned long nr,
+ volatile unsigned long *ptr)
+{
+ smp_mb();
+ __clear_bit(nr, ptr);
+}
+
+/*
+ * Functions which use MSB0 bit numbering.
+ * The bits are numbered:
+ * |0..............63|64............127|128...........191|192...........255|
+ */
+unsigned long find_first_bit_inv(const unsigned long *addr, unsigned long size);
+unsigned long find_next_bit_inv(const unsigned long *addr, unsigned long size,
+ unsigned long offset);
+
+#define for_each_set_bit_inv(bit, addr, size) \
+ for ((bit) = find_first_bit_inv((addr), (size)); \
+ (bit) < (size); \
+ (bit) = find_next_bit_inv((addr), (size), (bit) + 1))
+
+static inline void set_bit_inv(unsigned long nr, volatile unsigned long *ptr)
+{
+ return set_bit(nr ^ (BITS_PER_LONG - 1), ptr);
+}
+
+static inline void clear_bit_inv(unsigned long nr, volatile unsigned long *ptr)
+{
+ return clear_bit(nr ^ (BITS_PER_LONG - 1), ptr);
+}
+
+static inline int test_and_clear_bit_inv(unsigned long nr, volatile unsigned long *ptr)
+{
+ return test_and_clear_bit(nr ^ (BITS_PER_LONG - 1), ptr);
+}
+
+static inline void __set_bit_inv(unsigned long nr, volatile unsigned long *ptr)
+{
+ return __set_bit(nr ^ (BITS_PER_LONG - 1), ptr);
+}
+
+static inline void __clear_bit_inv(unsigned long nr, volatile unsigned long *ptr)
+{
+ return __clear_bit(nr ^ (BITS_PER_LONG - 1), ptr);
+}
+
+static inline int test_bit_inv(unsigned long nr,
+ const volatile unsigned long *ptr)
+{
+ return test_bit(nr ^ (BITS_PER_LONG - 1), ptr);
+}
+
+#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES
+
+/**
+ * __flogr - find leftmost one
+ * @word - The word to search
+ *
+ * Returns the bit number of the most significant bit set,
+ * where the most significant bit has bit number 0.
+ * If no bit is set this function returns 64.
+ */
+static inline unsigned char __flogr(unsigned long word)
+{
+ if (__builtin_constant_p(word)) {
+ unsigned long bit = 0;
+
+ if (!word)
+ return 64;
+ if (!(word & 0xffffffff00000000UL)) {
+ word <<= 32;
+ bit += 32;
+ }
+ if (!(word & 0xffff000000000000UL)) {
+ word <<= 16;
+ bit += 16;
+ }
+ if (!(word & 0xff00000000000000UL)) {
+ word <<= 8;
+ bit += 8;
+ }
+ if (!(word & 0xf000000000000000UL)) {
+ word <<= 4;
+ bit += 4;
+ }
+ if (!(word & 0xc000000000000000UL)) {
+ word <<= 2;
+ bit += 2;
+ }
+ if (!(word & 0x8000000000000000UL)) {
+ word <<= 1;
+ bit += 1;
+ }
+ return bit;
+ } else {
+ register unsigned long bit asm("4") = word;
+ register unsigned long out asm("5");
+
+ asm volatile(
+ " flogr %[bit],%[bit]\n"
+ : [bit] "+d" (bit), [out] "=d" (out) : : "cc");
+ return bit;
+ }
+}
+
+/**
+ * __ffs - find first bit in word.
+ * @word: The word to search
+ *
+ * Undefined if no bit exists, so code should check against 0 first.
+ */
+static inline unsigned long __ffs(unsigned long word)
+{
+ return __flogr(-word & word) ^ (BITS_PER_LONG - 1);
+}
+
+/**
+ * ffs - find first bit set
+ * @word: the word to search
+ *
+ * This is defined the same way as the libc and
+ * compiler builtin ffs routines (man ffs).
+ */
+static inline int ffs(int word)
+{
+ unsigned long mask = 2 * BITS_PER_LONG - 1;
+ unsigned int val = (unsigned int)word;
+
+ return (1 + (__flogr(-val & val) ^ (BITS_PER_LONG - 1))) & mask;
+}
+
+/**
+ * __fls - find last (most-significant) set bit in a long word
+ * @word: the word to search
+ *
+ * Undefined if no set bit exists, so code should check against 0 first.
+ */
+static inline unsigned long __fls(unsigned long word)
+{
+ return __flogr(word) ^ (BITS_PER_LONG - 1);
+}
+
+/**
+ * fls64 - find last set bit in a 64-bit word
+ * @word: the word to search
+ *
+ * This is defined in a similar way as the libc and compiler builtin
+ * ffsll, but returns the position of the most significant set bit.
+ *
+ * fls64(value) returns 0 if value is 0 or the position of the last
+ * set bit if value is nonzero. The last (most significant) bit is
+ * at position 64.
+ */
+static inline int fls64(unsigned long word)
+{
+ unsigned long mask = 2 * BITS_PER_LONG - 1;
+
+ return (1 + (__flogr(word) ^ (BITS_PER_LONG - 1))) & mask;
+}
+
+/**
+ * fls - find last (most-significant) bit set
+ * @word: the word to search
+ *
+ * This is defined the same way as ffs.
+ * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
+ */
+static inline int fls(int word)
+{
+ return fls64((unsigned int)word);
+}
+
+#else /* CONFIG_HAVE_MARCH_Z9_109_FEATURES */
+
+#include <asm-generic/bitops/__ffs.h>
+#include <asm-generic/bitops/ffs.h>
+#include <asm-generic/bitops/__fls.h>
+#include <asm-generic/bitops/fls.h>
+#include <asm-generic/bitops/fls64.h>
+
+#endif /* CONFIG_HAVE_MARCH_Z9_109_FEATURES */
+
+#include <asm-generic/bitops/ffz.h>
+#include <asm-generic/bitops/find.h>
+#include <asm-generic/bitops/hweight.h>
+#include <asm-generic/bitops/sched.h>
+#include <asm-generic/bitops/le.h>
+#include <asm-generic/bitops/ext2-atomic-setbit.h>
+
+#endif /* _S390_BITOPS_H */
diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h
new file mode 100644
index 000000000..429f43a8a
--- /dev/null
+++ b/arch/s390/include/asm/bug.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_BUG_H
+#define _ASM_S390_BUG_H
+
+#include <linux/kernel.h>
+
+#ifdef CONFIG_BUG
+
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+
+#define __EMIT_BUG(x) do { \
+ asm volatile( \
+ "0: j 0b+2\n" \
+ "1:\n" \
+ ".section .rodata.str,\"aMS\",@progbits,1\n" \
+ "2: .asciz \""__FILE__"\"\n" \
+ ".previous\n" \
+ ".section __bug_table,\"aw\"\n" \
+ "3: .long 1b-3b,2b-3b\n" \
+ " .short %0,%1\n" \
+ " .org 3b+%2\n" \
+ ".previous\n" \
+ : : "i" (__LINE__), \
+ "i" (x), \
+ "i" (sizeof(struct bug_entry))); \
+} while (0)
+
+#else /* CONFIG_DEBUG_BUGVERBOSE */
+
+#define __EMIT_BUG(x) do { \
+ asm volatile( \
+ "0: j 0b+2\n" \
+ "1:\n" \
+ ".section __bug_table,\"aw\"\n" \
+ "2: .long 1b-2b\n" \
+ " .short %0\n" \
+ " .org 2b+%1\n" \
+ ".previous\n" \
+ : : "i" (x), \
+ "i" (sizeof(struct bug_entry))); \
+} while (0)
+
+#endif /* CONFIG_DEBUG_BUGVERBOSE */
+
+#define BUG() do { \
+ __EMIT_BUG(0); \
+ unreachable(); \
+} while (0)
+
+#define __WARN_FLAGS(flags) do { \
+ __EMIT_BUG(BUGFLAG_WARNING|(flags)); \
+} while (0)
+
+#define WARN_ON(x) ({ \
+ int __ret_warn_on = !!(x); \
+ if (__builtin_constant_p(__ret_warn_on)) { \
+ if (__ret_warn_on) \
+ __WARN(); \
+ } else { \
+ if (unlikely(__ret_warn_on)) \
+ __WARN(); \
+ } \
+ unlikely(__ret_warn_on); \
+})
+
+#define HAVE_ARCH_BUG
+#define HAVE_ARCH_WARN_ON
+#endif /* CONFIG_BUG */
+
+#include <asm-generic/bug.h>
+
+#endif /* _ASM_S390_BUG_H */
diff --git a/arch/s390/include/asm/bugs.h b/arch/s390/include/asm/bugs.h
new file mode 100644
index 000000000..aa42a179b
--- /dev/null
+++ b/arch/s390/include/asm/bugs.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * Derived from "include/asm-i386/bugs.h"
+ * Copyright (C) 1994 Linus Torvalds
+ */
+
+/*
+ * This is included by init/main.c to check for architecture-dependent bugs.
+ *
+ * Needs:
+ * void check_bugs(void);
+ */
+
+static inline void check_bugs(void)
+{
+ /* s390 has no bugs ... */
+}
diff --git a/arch/s390/include/asm/cache.h b/arch/s390/include/asm/cache.h
new file mode 100644
index 000000000..d5e22e837
--- /dev/null
+++ b/arch/s390/include/asm/cache.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999
+ *
+ * Derived from "include/asm-i386/cache.h"
+ * Copyright (C) 1992, Linus Torvalds
+ */
+
+#ifndef __ARCH_S390_CACHE_H
+#define __ARCH_S390_CACHE_H
+
+#define L1_CACHE_BYTES 256
+#define L1_CACHE_SHIFT 8
+#define NET_SKB_PAD 32
+
+#define __read_mostly __section(.data..read_mostly)
+
+#endif
diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h
new file mode 100644
index 000000000..a29dd430f
--- /dev/null
+++ b/arch/s390/include/asm/ccwdev.h
@@ -0,0 +1,235 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2002, 2009
+ *
+ * Author(s): Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * Interface for CCW device drivers
+ */
+#ifndef _S390_CCWDEV_H_
+#define _S390_CCWDEV_H_
+
+#include <linux/device.h>
+#include <linux/mod_devicetable.h>
+#include <asm/fcx.h>
+#include <asm/irq.h>
+#include <asm/schid.h>
+
+/* structs from asm/cio.h */
+struct irb;
+struct ccw1;
+struct ccw_dev_id;
+
+/* simplified initializers for struct ccw_device:
+ * CCW_DEVICE and CCW_DEVICE_DEVTYPE initialize one
+ * entry in your MODULE_DEVICE_TABLE and set the match_flag correctly */
+#define CCW_DEVICE(cu, cum) \
+ .cu_type=(cu), .cu_model=(cum), \
+ .match_flags=(CCW_DEVICE_ID_MATCH_CU_TYPE \
+ | (cum ? CCW_DEVICE_ID_MATCH_CU_MODEL : 0))
+
+#define CCW_DEVICE_DEVTYPE(cu, cum, dev, devm) \
+ .cu_type=(cu), .cu_model=(cum), .dev_type=(dev), .dev_model=(devm),\
+ .match_flags=CCW_DEVICE_ID_MATCH_CU_TYPE \
+ | ((cum) ? CCW_DEVICE_ID_MATCH_CU_MODEL : 0) \
+ | CCW_DEVICE_ID_MATCH_DEVICE_TYPE \
+ | ((devm) ? CCW_DEVICE_ID_MATCH_DEVICE_MODEL : 0)
+
+/* scan through an array of device ids and return the first
+ * entry that matches the device.
+ *
+ * the array must end with an entry containing zero match_flags
+ */
+static inline const struct ccw_device_id *
+ccw_device_id_match(const struct ccw_device_id *array,
+ const struct ccw_device_id *match)
+{
+ const struct ccw_device_id *id = array;
+
+ for (id = array; id->match_flags; id++) {
+ if ((id->match_flags & CCW_DEVICE_ID_MATCH_CU_TYPE)
+ && (id->cu_type != match->cu_type))
+ continue;
+
+ if ((id->match_flags & CCW_DEVICE_ID_MATCH_CU_MODEL)
+ && (id->cu_model != match->cu_model))
+ continue;
+
+ if ((id->match_flags & CCW_DEVICE_ID_MATCH_DEVICE_TYPE)
+ && (id->dev_type != match->dev_type))
+ continue;
+
+ if ((id->match_flags & CCW_DEVICE_ID_MATCH_DEVICE_MODEL)
+ && (id->dev_model != match->dev_model))
+ continue;
+
+ return id;
+ }
+
+ return NULL;
+}
+
+/**
+ * struct ccw_device - channel attached device
+ * @ccwlock: pointer to device lock
+ * @id: id of this device
+ * @drv: ccw driver for this device
+ * @dev: embedded device structure
+ * @online: online status of device
+ * @handler: interrupt handler
+ *
+ * @handler is a member of the device rather than the driver since a driver
+ * can have different interrupt handlers for different ccw devices
+ * (multi-subchannel drivers).
+ */
+struct ccw_device {
+ spinlock_t *ccwlock;
+/* private: */
+ struct ccw_device_private *private; /* cio private information */
+/* public: */
+ struct ccw_device_id id;
+ struct ccw_driver *drv;
+ struct device dev;
+ int online;
+ void (*handler) (struct ccw_device *, unsigned long, struct irb *);
+};
+
+/*
+ * Possible events used by the path_event notifier.
+ */
+#define PE_NONE 0x0
+#define PE_PATH_GONE 0x1 /* A path is no longer available. */
+#define PE_PATH_AVAILABLE 0x2 /* A path has become available and
+ was successfully verified. */
+#define PE_PATHGROUP_ESTABLISHED 0x4 /* A pathgroup was reset and had
+ to be established again. */
+
+/*
+ * Possible CIO actions triggered by the unit check handler.
+ */
+enum uc_todo {
+ UC_TODO_RETRY,
+ UC_TODO_RETRY_ON_NEW_PATH,
+ UC_TODO_STOP
+};
+
+/**
+ * struct ccw driver - device driver for channel attached devices
+ * @ids: ids supported by this driver
+ * @probe: function called on probe
+ * @remove: function called on remove
+ * @set_online: called when setting device online
+ * @set_offline: called when setting device offline
+ * @notify: notify driver of device state changes
+ * @path_event: notify driver of channel path events
+ * @shutdown: called at device shutdown
+ * @prepare: prepare for pm state transition
+ * @complete: undo work done in @prepare
+ * @freeze: callback for freezing during hibernation snapshotting
+ * @thaw: undo work done in @freeze
+ * @restore: callback for restoring after hibernation
+ * @uc_handler: callback for unit check handler
+ * @driver: embedded device driver structure
+ * @int_class: interruption class to use for accounting interrupts
+ */
+struct ccw_driver {
+ struct ccw_device_id *ids;
+ int (*probe) (struct ccw_device *);
+ void (*remove) (struct ccw_device *);
+ int (*set_online) (struct ccw_device *);
+ int (*set_offline) (struct ccw_device *);
+ int (*notify) (struct ccw_device *, int);
+ void (*path_event) (struct ccw_device *, int *);
+ void (*shutdown) (struct ccw_device *);
+ int (*prepare) (struct ccw_device *);
+ void (*complete) (struct ccw_device *);
+ int (*freeze)(struct ccw_device *);
+ int (*thaw) (struct ccw_device *);
+ int (*restore)(struct ccw_device *);
+ enum uc_todo (*uc_handler) (struct ccw_device *, struct irb *);
+ struct device_driver driver;
+ enum interruption_class int_class;
+};
+
+extern struct ccw_device *get_ccwdev_by_busid(struct ccw_driver *cdrv,
+ const char *bus_id);
+
+/* devices drivers call these during module load and unload.
+ * When a driver is registered, its probe method is called
+ * when new devices for its type pop up */
+extern int ccw_driver_register (struct ccw_driver *driver);
+extern void ccw_driver_unregister (struct ccw_driver *driver);
+
+struct ccw1;
+
+extern int ccw_device_set_options_mask(struct ccw_device *, unsigned long);
+extern int ccw_device_set_options(struct ccw_device *, unsigned long);
+extern void ccw_device_clear_options(struct ccw_device *, unsigned long);
+int ccw_device_is_pathgroup(struct ccw_device *cdev);
+int ccw_device_is_multipath(struct ccw_device *cdev);
+
+/* Allow for i/o completion notification after primary interrupt status. */
+#define CCWDEV_EARLY_NOTIFICATION 0x0001
+/* Report all interrupt conditions. */
+#define CCWDEV_REPORT_ALL 0x0002
+/* Try to perform path grouping. */
+#define CCWDEV_DO_PATHGROUP 0x0004
+/* Allow forced onlining of boxed devices. */
+#define CCWDEV_ALLOW_FORCE 0x0008
+/* Try to use multipath mode. */
+#define CCWDEV_DO_MULTIPATH 0x0010
+
+extern int ccw_device_start(struct ccw_device *, struct ccw1 *,
+ unsigned long, __u8, unsigned long);
+extern int ccw_device_start_timeout(struct ccw_device *, struct ccw1 *,
+ unsigned long, __u8, unsigned long, int);
+extern int ccw_device_start_key(struct ccw_device *, struct ccw1 *,
+ unsigned long, __u8, __u8, unsigned long);
+extern int ccw_device_start_timeout_key(struct ccw_device *, struct ccw1 *,
+ unsigned long, __u8, __u8,
+ unsigned long, int);
+
+
+extern int ccw_device_resume(struct ccw_device *);
+extern int ccw_device_halt(struct ccw_device *, unsigned long);
+extern int ccw_device_clear(struct ccw_device *, unsigned long);
+int ccw_device_tm_start_key(struct ccw_device *cdev, struct tcw *tcw,
+ unsigned long intparm, u8 lpm, u8 key);
+int ccw_device_tm_start_key(struct ccw_device *, struct tcw *,
+ unsigned long, u8, u8);
+int ccw_device_tm_start_timeout_key(struct ccw_device *, struct tcw *,
+ unsigned long, u8, u8, int);
+int ccw_device_tm_start(struct ccw_device *, struct tcw *,
+ unsigned long, u8);
+int ccw_device_tm_start_timeout(struct ccw_device *, struct tcw *,
+ unsigned long, u8, int);
+int ccw_device_tm_intrg(struct ccw_device *cdev);
+
+int ccw_device_get_mdc(struct ccw_device *cdev, u8 mask);
+
+extern int ccw_device_set_online(struct ccw_device *cdev);
+extern int ccw_device_set_offline(struct ccw_device *cdev);
+
+
+extern struct ciw *ccw_device_get_ciw(struct ccw_device *, __u32 cmd);
+extern __u8 ccw_device_get_path_mask(struct ccw_device *);
+extern void ccw_device_get_id(struct ccw_device *, struct ccw_dev_id *);
+
+#define get_ccwdev_lock(x) (x)->ccwlock
+
+#define to_ccwdev(n) container_of(n, struct ccw_device, dev)
+#define to_ccwdrv(n) container_of(n, struct ccw_driver, driver)
+
+extern struct ccw_device *ccw_device_create_console(struct ccw_driver *);
+extern void ccw_device_destroy_console(struct ccw_device *);
+extern int ccw_device_enable_console(struct ccw_device *);
+extern void ccw_device_wait_idle(struct ccw_device *);
+extern int ccw_device_force_console(struct ccw_device *);
+
+int ccw_device_siosl(struct ccw_device *);
+
+extern void ccw_device_get_schid(struct ccw_device *, struct subchannel_id *);
+
+struct channel_path_desc_fmt0 *ccw_device_get_chp_desc(struct ccw_device *, int);
+u8 *ccw_device_get_util_str(struct ccw_device *cdev, int chp_idx);
+#endif /* _S390_CCWDEV_H_ */
diff --git a/arch/s390/include/asm/ccwgroup.h b/arch/s390/include/asm/ccwgroup.h
new file mode 100644
index 000000000..860cab747
--- /dev/null
+++ b/arch/s390/include/asm/ccwgroup.h
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef S390_CCWGROUP_H
+#define S390_CCWGROUP_H
+
+struct ccw_device;
+struct ccw_driver;
+
+/**
+ * struct ccwgroup_device - ccw group device
+ * @state: online/offline state
+ * @count: number of attached slave devices
+ * @dev: embedded device structure
+ * @cdev: variable number of slave devices, allocated as needed
+ * @ungroup_work: work to be done when a ccwgroup notifier has action
+ * type %BUS_NOTIFY_UNBIND_DRIVER
+ */
+struct ccwgroup_device {
+ enum {
+ CCWGROUP_OFFLINE,
+ CCWGROUP_ONLINE,
+ } state;
+/* private: */
+ atomic_t onoff;
+ struct mutex reg_mutex;
+/* public: */
+ unsigned int count;
+ struct device dev;
+ struct work_struct ungroup_work;
+ struct ccw_device *cdev[0];
+};
+
+/**
+ * struct ccwgroup_driver - driver for ccw group devices
+ * @setup: function called during device creation to setup the device
+ * @remove: function called on remove
+ * @set_online: function called when device is set online
+ * @set_offline: function called when device is set offline
+ * @shutdown: function called when device is shut down
+ * @prepare: prepare for pm state transition
+ * @complete: undo work done in @prepare
+ * @freeze: callback for freezing during hibernation snapshotting
+ * @thaw: undo work done in @freeze
+ * @restore: callback for restoring after hibernation
+ * @driver: embedded driver structure
+ * @ccw_driver: supported ccw_driver (optional)
+ */
+struct ccwgroup_driver {
+ int (*setup) (struct ccwgroup_device *);
+ void (*remove) (struct ccwgroup_device *);
+ int (*set_online) (struct ccwgroup_device *);
+ int (*set_offline) (struct ccwgroup_device *);
+ void (*shutdown)(struct ccwgroup_device *);
+ int (*prepare) (struct ccwgroup_device *);
+ void (*complete) (struct ccwgroup_device *);
+ int (*freeze)(struct ccwgroup_device *);
+ int (*thaw) (struct ccwgroup_device *);
+ int (*restore)(struct ccwgroup_device *);
+
+ struct device_driver driver;
+ struct ccw_driver *ccw_driver;
+};
+
+extern int ccwgroup_driver_register (struct ccwgroup_driver *cdriver);
+extern void ccwgroup_driver_unregister (struct ccwgroup_driver *cdriver);
+int ccwgroup_create_dev(struct device *root, struct ccwgroup_driver *gdrv,
+ int num_devices, const char *buf);
+
+extern int ccwgroup_set_online(struct ccwgroup_device *gdev);
+extern int ccwgroup_set_offline(struct ccwgroup_device *gdev);
+
+extern int ccwgroup_probe_ccwdev(struct ccw_device *cdev);
+extern void ccwgroup_remove_ccwdev(struct ccw_device *cdev);
+
+#define to_ccwgroupdev(x) container_of((x), struct ccwgroup_device, dev)
+#define to_ccwgroupdrv(x) container_of((x), struct ccwgroup_driver, driver)
+
+#if IS_ENABLED(CONFIG_CCWGROUP)
+bool dev_is_ccwgroup(struct device *dev);
+#else /* CONFIG_CCWGROUP */
+static inline bool dev_is_ccwgroup(struct device *dev)
+{
+ return false;
+}
+#endif /* CONFIG_CCWGROUP */
+
+#endif
diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h
new file mode 100644
index 000000000..91e376b0d
--- /dev/null
+++ b/arch/s390/include/asm/checksum.h
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * S390 fast network checksum routines
+ *
+ * S390 version
+ * Copyright IBM Corp. 1999
+ * Author(s): Ulrich Hild (first version)
+ * Martin Schwidefsky (heavily optimized CKSM version)
+ * D.J. Barrow (third attempt)
+ */
+
+#ifndef _S390_CHECKSUM_H
+#define _S390_CHECKSUM_H
+
+#include <linux/uaccess.h>
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
+ */
+static inline __wsum
+csum_partial(const void *buff, int len, __wsum sum)
+{
+ register unsigned long reg2 asm("2") = (unsigned long) buff;
+ register unsigned long reg3 asm("3") = (unsigned long) len;
+
+ asm volatile(
+ "0: cksm %0,%1\n" /* do checksum on longs */
+ " jo 0b\n"
+ : "+d" (sum), "+d" (reg2), "+d" (reg3) : : "cc", "memory");
+ return sum;
+}
+
+/*
+ * the same as csum_partial_copy, but copies from user space.
+ *
+ * here even more important to align src and dst on a 32-bit (or even
+ * better 64-bit) boundary
+ *
+ * Copy from userspace and compute checksum.
+ */
+static inline __wsum
+csum_partial_copy_from_user(const void __user *src, void *dst,
+ int len, __wsum sum,
+ int *err_ptr)
+{
+ if (unlikely(copy_from_user(dst, src, len)))
+ *err_ptr = -EFAULT;
+ return csum_partial(dst, len, sum);
+}
+
+
+static inline __wsum
+csum_partial_copy_nocheck (const void *src, void *dst, int len, __wsum sum)
+{
+ memcpy(dst,src,len);
+ return csum_partial(dst, len, sum);
+}
+
+/*
+ * Fold a partial checksum without adding pseudo headers
+ */
+static inline __sum16 csum_fold(__wsum sum)
+{
+ u32 csum = (__force u32) sum;
+
+ csum += (csum >> 16) + (csum << 16);
+ csum >>= 16;
+ return (__force __sum16) ~csum;
+}
+
+/*
+ * This is a version of ip_compute_csum() optimized for IP headers,
+ * which always checksum on 4 octet boundaries.
+ *
+ */
+static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+{
+ return csum_fold(csum_partial(iph, ihl*4, 0));
+}
+
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 32-bit checksum
+ */
+static inline __wsum
+csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto,
+ __wsum sum)
+{
+ __u32 csum = (__force __u32)sum;
+
+ csum += (__force __u32)saddr;
+ if (csum < (__force __u32)saddr)
+ csum++;
+
+ csum += (__force __u32)daddr;
+ if (csum < (__force __u32)daddr)
+ csum++;
+
+ csum += len + proto;
+ if (csum < len + proto)
+ csum++;
+
+ return (__force __wsum)csum;
+}
+
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 16-bit checksum, already complemented
+ */
+
+static inline __sum16
+csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto,
+ __wsum sum)
+{
+ return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum));
+}
+
+/*
+ * this routine is used for miscellaneous IP-like checksums, mainly
+ * in icmp.c
+ */
+
+static inline __sum16 ip_compute_csum(const void *buff, int len)
+{
+ return csum_fold(csum_partial(buff, len, 0));
+}
+
+#endif /* _S390_CHECKSUM_H */
+
+
diff --git a/arch/s390/include/asm/chpid.h b/arch/s390/include/asm/chpid.h
new file mode 100644
index 000000000..20e0d22f2
--- /dev/null
+++ b/arch/s390/include/asm/chpid.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2007, 2012
+ * Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
+ */
+#ifndef _ASM_S390_CHPID_H
+#define _ASM_S390_CHPID_H
+
+#include <uapi/asm/chpid.h>
+#include <asm/cio.h>
+
+struct channel_path_desc_fmt0 {
+ u8 flags;
+ u8 lsn;
+ u8 desc;
+ u8 chpid;
+ u8 swla;
+ u8 zeroes;
+ u8 chla;
+ u8 chpp;
+} __packed;
+
+static inline void chp_id_init(struct chp_id *chpid)
+{
+ memset(chpid, 0, sizeof(struct chp_id));
+}
+
+static inline int chp_id_is_equal(struct chp_id *a, struct chp_id *b)
+{
+ return (a->id == b->id) && (a->cssid == b->cssid);
+}
+
+static inline void chp_id_next(struct chp_id *chpid)
+{
+ if (chpid->id < __MAX_CHPID)
+ chpid->id++;
+ else {
+ chpid->id = 0;
+ chpid->cssid++;
+ }
+}
+
+static inline int chp_id_is_valid(struct chp_id *chpid)
+{
+ return (chpid->cssid <= __MAX_CSSID);
+}
+
+
+#define chp_id_for_each(c) \
+ for (chp_id_init(c); chp_id_is_valid(c); chp_id_next(c))
+#endif /* _ASM_S390_CHPID_H */
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
new file mode 100644
index 000000000..225667652
--- /dev/null
+++ b/arch/s390/include/asm/cio.h
@@ -0,0 +1,335 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Common interface for I/O on S/390
+ */
+#ifndef _ASM_S390_CIO_H_
+#define _ASM_S390_CIO_H_
+
+#include <linux/spinlock.h>
+#include <linux/bitops.h>
+#include <asm/types.h>
+
+#define LPM_ANYPATH 0xff
+#define __MAX_CSSID 0
+#define __MAX_SUBCHANNEL 65535
+#define __MAX_SSID 3
+
+#include <asm/scsw.h>
+
+/**
+ * struct ccw1 - channel command word
+ * @cmd_code: command code
+ * @flags: flags, like IDA addressing, etc.
+ * @count: byte count
+ * @cda: data address
+ *
+ * The ccw is the basic structure to build channel programs that perform
+ * operations with the device or the control unit. Only Format-1 channel
+ * command words are supported.
+ */
+struct ccw1 {
+ __u8 cmd_code;
+ __u8 flags;
+ __u16 count;
+ __u32 cda;
+} __attribute__ ((packed,aligned(8)));
+
+/**
+ * struct ccw0 - channel command word
+ * @cmd_code: command code
+ * @cda: data address
+ * @flags: flags, like IDA addressing, etc.
+ * @reserved: will be ignored
+ * @count: byte count
+ *
+ * The format-0 ccw structure.
+ */
+struct ccw0 {
+ __u8 cmd_code;
+ __u32 cda : 24;
+ __u8 flags;
+ __u8 reserved;
+ __u16 count;
+} __packed __aligned(8);
+
+#define CCW_FLAG_DC 0x80
+#define CCW_FLAG_CC 0x40
+#define CCW_FLAG_SLI 0x20
+#define CCW_FLAG_SKIP 0x10
+#define CCW_FLAG_PCI 0x08
+#define CCW_FLAG_IDA 0x04
+#define CCW_FLAG_SUSPEND 0x02
+
+#define CCW_CMD_READ_IPL 0x02
+#define CCW_CMD_NOOP 0x03
+#define CCW_CMD_BASIC_SENSE 0x04
+#define CCW_CMD_TIC 0x08
+#define CCW_CMD_STLCK 0x14
+#define CCW_CMD_SENSE_PGID 0x34
+#define CCW_CMD_SUSPEND_RECONN 0x5B
+#define CCW_CMD_RDC 0x64
+#define CCW_CMD_RELEASE 0x94
+#define CCW_CMD_SET_PGID 0xAF
+#define CCW_CMD_SENSE_ID 0xE4
+#define CCW_CMD_DCTL 0xF3
+
+#define SENSE_MAX_COUNT 0x20
+
+/**
+ * struct erw - extended report word
+ * @res0: reserved
+ * @auth: authorization check
+ * @pvrf: path-verification-required flag
+ * @cpt: channel-path timeout
+ * @fsavf: failing storage address validity flag
+ * @cons: concurrent sense
+ * @scavf: secondary ccw address validity flag
+ * @fsaf: failing storage address format
+ * @scnt: sense count, if @cons == %1
+ * @res16: reserved
+ */
+struct erw {
+ __u32 res0 : 3;
+ __u32 auth : 1;
+ __u32 pvrf : 1;
+ __u32 cpt : 1;
+ __u32 fsavf : 1;
+ __u32 cons : 1;
+ __u32 scavf : 1;
+ __u32 fsaf : 1;
+ __u32 scnt : 6;
+ __u32 res16 : 16;
+} __attribute__ ((packed));
+
+/**
+ * struct erw_eadm - EADM Subchannel extended report word
+ * @b: aob error
+ * @r: arsb error
+ */
+struct erw_eadm {
+ __u32 : 16;
+ __u32 b : 1;
+ __u32 r : 1;
+ __u32 : 14;
+} __packed;
+
+/**
+ * struct sublog - subchannel logout area
+ * @res0: reserved
+ * @esf: extended status flags
+ * @lpum: last path used mask
+ * @arep: ancillary report
+ * @fvf: field-validity flags
+ * @sacc: storage access code
+ * @termc: termination code
+ * @devsc: device-status check
+ * @serr: secondary error
+ * @ioerr: i/o-error alert
+ * @seqc: sequence code
+ */
+struct sublog {
+ __u32 res0 : 1;
+ __u32 esf : 7;
+ __u32 lpum : 8;
+ __u32 arep : 1;
+ __u32 fvf : 5;
+ __u32 sacc : 2;
+ __u32 termc : 2;
+ __u32 devsc : 1;
+ __u32 serr : 1;
+ __u32 ioerr : 1;
+ __u32 seqc : 3;
+} __attribute__ ((packed));
+
+/**
+ * struct esw0 - Format 0 Extended Status Word (ESW)
+ * @sublog: subchannel logout
+ * @erw: extended report word
+ * @faddr: failing storage address
+ * @saddr: secondary ccw address
+ */
+struct esw0 {
+ struct sublog sublog;
+ struct erw erw;
+ __u32 faddr[2];
+ __u32 saddr;
+} __attribute__ ((packed));
+
+/**
+ * struct esw1 - Format 1 Extended Status Word (ESW)
+ * @zero0: reserved zeros
+ * @lpum: last path used mask
+ * @zero16: reserved zeros
+ * @erw: extended report word
+ * @zeros: three fullwords of zeros
+ */
+struct esw1 {
+ __u8 zero0;
+ __u8 lpum;
+ __u16 zero16;
+ struct erw erw;
+ __u32 zeros[3];
+} __attribute__ ((packed));
+
+/**
+ * struct esw2 - Format 2 Extended Status Word (ESW)
+ * @zero0: reserved zeros
+ * @lpum: last path used mask
+ * @dcti: device-connect-time interval
+ * @erw: extended report word
+ * @zeros: three fullwords of zeros
+ */
+struct esw2 {
+ __u8 zero0;
+ __u8 lpum;
+ __u16 dcti;
+ struct erw erw;
+ __u32 zeros[3];
+} __attribute__ ((packed));
+
+/**
+ * struct esw3 - Format 3 Extended Status Word (ESW)
+ * @zero0: reserved zeros
+ * @lpum: last path used mask
+ * @res: reserved
+ * @erw: extended report word
+ * @zeros: three fullwords of zeros
+ */
+struct esw3 {
+ __u8 zero0;
+ __u8 lpum;
+ __u16 res;
+ struct erw erw;
+ __u32 zeros[3];
+} __attribute__ ((packed));
+
+/**
+ * struct esw_eadm - EADM Subchannel Extended Status Word (ESW)
+ * @sublog: subchannel logout
+ * @erw: extended report word
+ */
+struct esw_eadm {
+ __u32 sublog;
+ struct erw_eadm erw;
+ __u32 : 32;
+ __u32 : 32;
+ __u32 : 32;
+} __packed;
+
+/**
+ * struct irb - interruption response block
+ * @scsw: subchannel status word
+ * @esw: extended status word
+ * @ecw: extended control word
+ *
+ * The irb that is handed to the device driver when an interrupt occurs. For
+ * solicited interrupts, the common I/O layer already performs checks whether
+ * a field is valid; a field not being valid is always passed as %0.
+ * If a unit check occurred, @ecw may contain sense data; this is retrieved
+ * by the common I/O layer itself if the device doesn't support concurrent
+ * sense (so that the device driver never needs to perform basic sense itself).
+ * For unsolicited interrupts, the irb is passed as-is (expect for sense data,
+ * if applicable).
+ */
+struct irb {
+ union scsw scsw;
+ union {
+ struct esw0 esw0;
+ struct esw1 esw1;
+ struct esw2 esw2;
+ struct esw3 esw3;
+ struct esw_eadm eadm;
+ } esw;
+ __u8 ecw[32];
+} __attribute__ ((packed,aligned(4)));
+
+/**
+ * struct ciw - command information word (CIW) layout
+ * @et: entry type
+ * @reserved: reserved bits
+ * @ct: command type
+ * @cmd: command code
+ * @count: command count
+ */
+struct ciw {
+ __u32 et : 2;
+ __u32 reserved : 2;
+ __u32 ct : 4;
+ __u32 cmd : 8;
+ __u32 count : 16;
+} __attribute__ ((packed));
+
+#define CIW_TYPE_RCD 0x0 /* read configuration data */
+#define CIW_TYPE_SII 0x1 /* set interface identifier */
+#define CIW_TYPE_RNI 0x2 /* read node identifier */
+
+/*
+ * Flags used as input parameters for do_IO()
+ */
+#define DOIO_ALLOW_SUSPEND 0x0001 /* allow for channel prog. suspend */
+#define DOIO_DENY_PREFETCH 0x0002 /* don't allow for CCW prefetch */
+#define DOIO_SUPPRESS_INTER 0x0004 /* suppress intermediate inter. */
+ /* ... for suspended CCWs */
+/* Device or subchannel gone. */
+#define CIO_GONE 0x0001
+/* No path to device. */
+#define CIO_NO_PATH 0x0002
+/* Device has appeared. */
+#define CIO_OPER 0x0004
+/* Sick revalidation of device. */
+#define CIO_REVALIDATE 0x0008
+/* Device did not respond in time. */
+#define CIO_BOXED 0x0010
+
+/**
+ * struct ccw_dev_id - unique identifier for ccw devices
+ * @ssid: subchannel set id
+ * @devno: device number
+ *
+ * This structure is not directly based on any hardware structure. The
+ * hardware identifies a device by its device number and its subchannel,
+ * which is in turn identified by its id. In order to get a unique identifier
+ * for ccw devices across subchannel sets, @struct ccw_dev_id has been
+ * introduced.
+ */
+struct ccw_dev_id {
+ u8 ssid;
+ u16 devno;
+};
+
+/**
+ * ccw_device_id_is_equal() - compare two ccw_dev_ids
+ * @dev_id1: a ccw_dev_id
+ * @dev_id2: another ccw_dev_id
+ * Returns:
+ * %1 if the two structures are equal field-by-field,
+ * %0 if not.
+ * Context:
+ * any
+ */
+static inline int ccw_dev_id_is_equal(struct ccw_dev_id *dev_id1,
+ struct ccw_dev_id *dev_id2)
+{
+ if ((dev_id1->ssid == dev_id2->ssid) &&
+ (dev_id1->devno == dev_id2->devno))
+ return 1;
+ return 0;
+}
+
+/**
+ * pathmask_to_pos() - find the position of the left-most bit in a pathmask
+ * @mask: pathmask with at least one bit set
+ */
+static inline u8 pathmask_to_pos(u8 mask)
+{
+ return 8 - ffs(mask);
+}
+
+void channel_subsystem_reinit(void);
+extern void css_schedule_reprobe(void);
+
+/* Function from drivers/s390/cio/chsc.c */
+int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta);
+int chsc_sstpi(void *page, void *result, size_t size);
+
+#endif
diff --git a/arch/s390/include/asm/clp.h b/arch/s390/include/asm/clp.h
new file mode 100644
index 000000000..3925b0f08
--- /dev/null
+++ b/arch/s390/include/asm/clp.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_CLP_H
+#define _ASM_S390_CLP_H
+
+/* CLP common request & response block size */
+#define CLP_BLK_SIZE PAGE_SIZE
+
+#define CLP_LPS_BASE 0
+#define CLP_LPS_PCI 2
+
+struct clp_req_hdr {
+ u16 len;
+ u16 cmd;
+ u32 fmt : 4;
+ u32 reserved1 : 28;
+ u64 reserved2;
+} __packed;
+
+struct clp_rsp_hdr {
+ u16 len;
+ u16 rsp;
+ u32 fmt : 4;
+ u32 reserved1 : 28;
+ u64 reserved2;
+} __packed;
+
+/* CLP Response Codes */
+#define CLP_RC_OK 0x0010 /* Command request successfully */
+#define CLP_RC_CMD 0x0020 /* Command code not recognized */
+#define CLP_RC_PERM 0x0030 /* Command not authorized */
+#define CLP_RC_FMT 0x0040 /* Invalid command request format */
+#define CLP_RC_LEN 0x0050 /* Invalid command request length */
+#define CLP_RC_8K 0x0060 /* Command requires 8K LPCB */
+#define CLP_RC_RESNOT0 0x0070 /* Reserved field not zero */
+#define CLP_RC_NODATA 0x0080 /* No data available */
+#define CLP_RC_FC_UNKNOWN 0x0100 /* Function code not recognized */
+
+/* Store logical-processor characteristics request */
+struct clp_req_slpc {
+ struct clp_req_hdr hdr;
+} __packed;
+
+struct clp_rsp_slpc {
+ struct clp_rsp_hdr hdr;
+ u32 reserved2[4];
+ u32 lpif[8];
+ u32 reserved3[8];
+ u32 lpic[8];
+} __packed;
+
+struct clp_req_rsp_slpc {
+ struct clp_req_slpc request;
+ struct clp_rsp_slpc response;
+} __packed;
+
+#endif
diff --git a/arch/s390/include/asm/cmb.h b/arch/s390/include/asm/cmb.h
new file mode 100644
index 000000000..599594c37
--- /dev/null
+++ b/arch/s390/include/asm/cmb.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef S390_CMB_H
+#define S390_CMB_H
+
+#include <uapi/asm/cmb.h>
+
+struct ccw_device;
+extern int enable_cmf(struct ccw_device *cdev);
+extern int disable_cmf(struct ccw_device *cdev);
+extern int __disable_cmf(struct ccw_device *cdev);
+extern u64 cmf_read(struct ccw_device *cdev, int index);
+extern int cmf_readall(struct ccw_device *cdev, struct cmbdata *data);
+
+#endif /* S390_CMB_H */
diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h
new file mode 100644
index 000000000..af99c1f66
--- /dev/null
+++ b/arch/s390/include/asm/cmpxchg.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 1999, 2011
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ */
+
+#ifndef __ASM_CMPXCHG_H
+#define __ASM_CMPXCHG_H
+
+#include <linux/mmdebug.h>
+#include <linux/types.h>
+#include <linux/bug.h>
+
+#define cmpxchg(ptr, o, n) \
+({ \
+ __typeof__(*(ptr)) __o = (o); \
+ __typeof__(*(ptr)) __n = (n); \
+ (__typeof__(*(ptr))) __sync_val_compare_and_swap((ptr),__o,__n);\
+})
+
+#define cmpxchg64 cmpxchg
+#define cmpxchg_local cmpxchg
+#define cmpxchg64_local cmpxchg
+
+#define xchg(ptr, x) \
+({ \
+ __typeof__(ptr) __ptr = (ptr); \
+ __typeof__(*(ptr)) __old; \
+ do { \
+ __old = *__ptr; \
+ } while (!__sync_bool_compare_and_swap(__ptr, __old, x)); \
+ __old; \
+})
+
+#define __cmpxchg_double(p1, p2, o1, o2, n1, n2) \
+({ \
+ register __typeof__(*(p1)) __old1 asm("2") = (o1); \
+ register __typeof__(*(p2)) __old2 asm("3") = (o2); \
+ register __typeof__(*(p1)) __new1 asm("4") = (n1); \
+ register __typeof__(*(p2)) __new2 asm("5") = (n2); \
+ int cc; \
+ asm volatile( \
+ " cdsg %[old],%[new],%[ptr]\n" \
+ " ipm %[cc]\n" \
+ " srl %[cc],28" \
+ : [cc] "=d" (cc), [old] "+d" (__old1), "+d" (__old2) \
+ : [new] "d" (__new1), "d" (__new2), \
+ [ptr] "Q" (*(p1)), "Q" (*(p2)) \
+ : "memory", "cc"); \
+ !cc; \
+})
+
+#define cmpxchg_double(p1, p2, o1, o2, n1, n2) \
+({ \
+ __typeof__(p1) __p1 = (p1); \
+ __typeof__(p2) __p2 = (p2); \
+ BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long)); \
+ BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long)); \
+ VM_BUG_ON((unsigned long)((__p1) + 1) != (unsigned long)(__p2));\
+ __cmpxchg_double(__p1, __p2, o1, o2, n1, n2); \
+})
+
+#define system_has_cmpxchg_double() 1
+
+#endif /* __ASM_CMPXCHG_H */
diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
new file mode 100644
index 000000000..97db2fba5
--- /dev/null
+++ b/arch/s390/include/asm/compat.h
@@ -0,0 +1,276 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390X_COMPAT_H
+#define _ASM_S390X_COMPAT_H
+/*
+ * Architecture specific compatibility types
+ */
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+#include <linux/thread_info.h>
+
+#define __TYPE_IS_PTR(t) (!__builtin_types_compatible_p( \
+ typeof(0?(__force t)0:0ULL), u64))
+
+#define __SC_DELOUSE(t,v) ({ \
+ BUILD_BUG_ON(sizeof(t) > 4 && !__TYPE_IS_PTR(t)); \
+ (__force t)(__TYPE_IS_PTR(t) ? ((v) & 0x7fffffff) : (v)); \
+})
+
+#define PSW32_MASK_PER 0x40000000UL
+#define PSW32_MASK_DAT 0x04000000UL
+#define PSW32_MASK_IO 0x02000000UL
+#define PSW32_MASK_EXT 0x01000000UL
+#define PSW32_MASK_KEY 0x00F00000UL
+#define PSW32_MASK_BASE 0x00080000UL /* Always one */
+#define PSW32_MASK_MCHECK 0x00040000UL
+#define PSW32_MASK_WAIT 0x00020000UL
+#define PSW32_MASK_PSTATE 0x00010000UL
+#define PSW32_MASK_ASC 0x0000C000UL
+#define PSW32_MASK_CC 0x00003000UL
+#define PSW32_MASK_PM 0x00000f00UL
+#define PSW32_MASK_RI 0x00000080UL
+
+#define PSW32_MASK_USER 0x0000FF00UL
+
+#define PSW32_ADDR_AMODE 0x80000000UL
+#define PSW32_ADDR_INSN 0x7FFFFFFFUL
+
+#define PSW32_DEFAULT_KEY (((u32) PAGE_DEFAULT_ACC) << 20)
+
+#define PSW32_ASC_PRIMARY 0x00000000UL
+#define PSW32_ASC_ACCREG 0x00004000UL
+#define PSW32_ASC_SECONDARY 0x00008000UL
+#define PSW32_ASC_HOME 0x0000C000UL
+
+#define PSW32_USER_BITS (PSW32_MASK_DAT | PSW32_MASK_IO | PSW32_MASK_EXT | \
+ PSW32_DEFAULT_KEY | PSW32_MASK_BASE | \
+ PSW32_MASK_MCHECK | PSW32_MASK_PSTATE | \
+ PSW32_ASC_PRIMARY)
+
+#define COMPAT_USER_HZ 100
+#define COMPAT_UTS_MACHINE "s390\0\0\0\0"
+
+typedef u32 compat_size_t;
+typedef s32 compat_ssize_t;
+typedef s32 compat_clock_t;
+typedef s32 compat_pid_t;
+typedef u16 __compat_uid_t;
+typedef u16 __compat_gid_t;
+typedef u32 __compat_uid32_t;
+typedef u32 __compat_gid32_t;
+typedef u16 compat_mode_t;
+typedef u32 compat_ino_t;
+typedef u16 compat_dev_t;
+typedef s32 compat_off_t;
+typedef s64 compat_loff_t;
+typedef u16 compat_nlink_t;
+typedef u16 compat_ipc_pid_t;
+typedef s32 compat_daddr_t;
+typedef u32 compat_caddr_t;
+typedef __kernel_fsid_t compat_fsid_t;
+typedef s32 compat_key_t;
+typedef s32 compat_timer_t;
+
+typedef s32 compat_int_t;
+typedef s32 compat_long_t;
+typedef s64 compat_s64;
+typedef u32 compat_uint_t;
+typedef u32 compat_ulong_t;
+typedef u64 compat_u64;
+typedef u32 compat_uptr_t;
+
+typedef struct {
+ u32 mask;
+ u32 addr;
+} __aligned(8) psw_compat_t;
+
+typedef struct {
+ psw_compat_t psw;
+ u32 gprs[NUM_GPRS];
+ u32 acrs[NUM_ACRS];
+ u32 orig_gpr2;
+} s390_compat_regs;
+
+typedef struct {
+ u32 gprs_high[NUM_GPRS];
+} s390_compat_regs_high;
+
+struct compat_stat {
+ compat_dev_t st_dev;
+ u16 __pad1;
+ compat_ino_t st_ino;
+ compat_mode_t st_mode;
+ compat_nlink_t st_nlink;
+ __compat_uid_t st_uid;
+ __compat_gid_t st_gid;
+ compat_dev_t st_rdev;
+ u16 __pad2;
+ u32 st_size;
+ u32 st_blksize;
+ u32 st_blocks;
+ u32 st_atime;
+ u32 st_atime_nsec;
+ u32 st_mtime;
+ u32 st_mtime_nsec;
+ u32 st_ctime;
+ u32 st_ctime_nsec;
+ u32 __unused4;
+ u32 __unused5;
+};
+
+struct compat_flock {
+ short l_type;
+ short l_whence;
+ compat_off_t l_start;
+ compat_off_t l_len;
+ compat_pid_t l_pid;
+};
+
+#define F_GETLK64 12
+#define F_SETLK64 13
+#define F_SETLKW64 14
+
+struct compat_flock64 {
+ short l_type;
+ short l_whence;
+ compat_loff_t l_start;
+ compat_loff_t l_len;
+ compat_pid_t l_pid;
+};
+
+struct compat_statfs {
+ u32 f_type;
+ u32 f_bsize;
+ u32 f_blocks;
+ u32 f_bfree;
+ u32 f_bavail;
+ u32 f_files;
+ u32 f_ffree;
+ compat_fsid_t f_fsid;
+ u32 f_namelen;
+ u32 f_frsize;
+ u32 f_flags;
+ u32 f_spare[4];
+};
+
+struct compat_statfs64 {
+ u32 f_type;
+ u32 f_bsize;
+ u64 f_blocks;
+ u64 f_bfree;
+ u64 f_bavail;
+ u64 f_files;
+ u64 f_ffree;
+ compat_fsid_t f_fsid;
+ u32 f_namelen;
+ u32 f_frsize;
+ u32 f_flags;
+ u32 f_spare[4];
+};
+
+#define COMPAT_RLIM_INFINITY 0xffffffff
+
+typedef u32 compat_old_sigset_t; /* at least 32 bits */
+
+#define _COMPAT_NSIG 64
+#define _COMPAT_NSIG_BPW 32
+
+typedef u32 compat_sigset_word;
+
+#define COMPAT_OFF_T_MAX 0x7fffffff
+
+/*
+ * A pointer passed in from user mode. This should not
+ * be used for syscall parameters, just declare them
+ * as pointers because the syscall entry code will have
+ * appropriately converted them already.
+ */
+
+static inline void __user *compat_ptr(compat_uptr_t uptr)
+{
+ return (void __user *)(unsigned long)(uptr & 0x7fffffffUL);
+}
+
+static inline compat_uptr_t ptr_to_compat(void __user *uptr)
+{
+ return (u32)(unsigned long)uptr;
+}
+
+#ifdef CONFIG_COMPAT
+
+static inline int is_compat_task(void)
+{
+ return test_thread_flag(TIF_31BIT);
+}
+
+static inline void __user *arch_compat_alloc_user_space(long len)
+{
+ unsigned long stack;
+
+ stack = KSTK_ESP(current);
+ if (is_compat_task())
+ stack &= 0x7fffffffUL;
+ return (void __user *) (stack - len);
+}
+
+#endif
+
+struct compat_ipc64_perm {
+ compat_key_t key;
+ __compat_uid32_t uid;
+ __compat_gid32_t gid;
+ __compat_uid32_t cuid;
+ __compat_gid32_t cgid;
+ compat_mode_t mode;
+ unsigned short __pad1;
+ unsigned short seq;
+ unsigned short __pad2;
+ unsigned int __unused1;
+ unsigned int __unused2;
+};
+
+struct compat_semid64_ds {
+ struct compat_ipc64_perm sem_perm;
+ compat_ulong_t sem_otime;
+ compat_ulong_t sem_otime_high;
+ compat_ulong_t sem_ctime;
+ compat_ulong_t sem_ctime_high;
+ compat_ulong_t sem_nsems;
+ compat_ulong_t __unused1;
+ compat_ulong_t __unused2;
+};
+
+struct compat_msqid64_ds {
+ struct compat_ipc64_perm msg_perm;
+ compat_ulong_t msg_stime;
+ compat_ulong_t msg_stime_high;
+ compat_ulong_t msg_rtime;
+ compat_ulong_t msg_rtime_high;
+ compat_ulong_t msg_ctime;
+ compat_ulong_t msg_ctime_high;
+ compat_ulong_t msg_cbytes;
+ compat_ulong_t msg_qnum;
+ compat_ulong_t msg_qbytes;
+ compat_pid_t msg_lspid;
+ compat_pid_t msg_lrpid;
+ compat_ulong_t __unused1;
+ compat_ulong_t __unused2;
+};
+
+struct compat_shmid64_ds {
+ struct compat_ipc64_perm shm_perm;
+ compat_size_t shm_segsz;
+ compat_ulong_t shm_atime;
+ compat_ulong_t shm_atime_high;
+ compat_ulong_t shm_dtime;
+ compat_ulong_t shm_dtime_high;
+ compat_ulong_t shm_ctime;
+ compat_ulong_t shm_ctime_high;
+ compat_pid_t shm_cpid;
+ compat_pid_t shm_lpid;
+ compat_ulong_t shm_nattch;
+ compat_ulong_t __unused1;
+ compat_ulong_t __unused2;
+};
+#endif /* _ASM_S390X_COMPAT_H */
diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h
new file mode 100644
index 000000000..3cc52e37b
--- /dev/null
+++ b/arch/s390/include/asm/cpacf.h
@@ -0,0 +1,523 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * CP Assist for Cryptographic Functions (CPACF)
+ *
+ * Copyright IBM Corp. 2003, 2017
+ * Author(s): Thomas Spatzier
+ * Jan Glauber
+ * Harald Freudenberger (freude@de.ibm.com)
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+#ifndef _ASM_S390_CPACF_H
+#define _ASM_S390_CPACF_H
+
+#include <asm/facility.h>
+
+/*
+ * Instruction opcodes for the CPACF instructions
+ */
+#define CPACF_KMAC 0xb91e /* MSA */
+#define CPACF_KM 0xb92e /* MSA */
+#define CPACF_KMC 0xb92f /* MSA */
+#define CPACF_KIMD 0xb93e /* MSA */
+#define CPACF_KLMD 0xb93f /* MSA */
+#define CPACF_PCKMO 0xb928 /* MSA3 */
+#define CPACF_KMF 0xb92a /* MSA4 */
+#define CPACF_KMO 0xb92b /* MSA4 */
+#define CPACF_PCC 0xb92c /* MSA4 */
+#define CPACF_KMCTR 0xb92d /* MSA4 */
+#define CPACF_PRNO 0xb93c /* MSA5 */
+#define CPACF_KMA 0xb929 /* MSA8 */
+
+/*
+ * En/decryption modifier bits
+ */
+#define CPACF_ENCRYPT 0x00
+#define CPACF_DECRYPT 0x80
+
+/*
+ * Function codes for the KM (CIPHER MESSAGE) instruction
+ */
+#define CPACF_KM_QUERY 0x00
+#define CPACF_KM_DEA 0x01
+#define CPACF_KM_TDEA_128 0x02
+#define CPACF_KM_TDEA_192 0x03
+#define CPACF_KM_AES_128 0x12
+#define CPACF_KM_AES_192 0x13
+#define CPACF_KM_AES_256 0x14
+#define CPACF_KM_PAES_128 0x1a
+#define CPACF_KM_PAES_192 0x1b
+#define CPACF_KM_PAES_256 0x1c
+#define CPACF_KM_XTS_128 0x32
+#define CPACF_KM_XTS_256 0x34
+#define CPACF_KM_PXTS_128 0x3a
+#define CPACF_KM_PXTS_256 0x3c
+
+/*
+ * Function codes for the KMC (CIPHER MESSAGE WITH CHAINING)
+ * instruction
+ */
+#define CPACF_KMC_QUERY 0x00
+#define CPACF_KMC_DEA 0x01
+#define CPACF_KMC_TDEA_128 0x02
+#define CPACF_KMC_TDEA_192 0x03
+#define CPACF_KMC_AES_128 0x12
+#define CPACF_KMC_AES_192 0x13
+#define CPACF_KMC_AES_256 0x14
+#define CPACF_KMC_PAES_128 0x1a
+#define CPACF_KMC_PAES_192 0x1b
+#define CPACF_KMC_PAES_256 0x1c
+#define CPACF_KMC_PRNG 0x43
+
+/*
+ * Function codes for the KMCTR (CIPHER MESSAGE WITH COUNTER)
+ * instruction
+ */
+#define CPACF_KMCTR_QUERY 0x00
+#define CPACF_KMCTR_DEA 0x01
+#define CPACF_KMCTR_TDEA_128 0x02
+#define CPACF_KMCTR_TDEA_192 0x03
+#define CPACF_KMCTR_AES_128 0x12
+#define CPACF_KMCTR_AES_192 0x13
+#define CPACF_KMCTR_AES_256 0x14
+#define CPACF_KMCTR_PAES_128 0x1a
+#define CPACF_KMCTR_PAES_192 0x1b
+#define CPACF_KMCTR_PAES_256 0x1c
+
+/*
+ * Function codes for the KIMD (COMPUTE INTERMEDIATE MESSAGE DIGEST)
+ * instruction
+ */
+#define CPACF_KIMD_QUERY 0x00
+#define CPACF_KIMD_SHA_1 0x01
+#define CPACF_KIMD_SHA_256 0x02
+#define CPACF_KIMD_SHA_512 0x03
+#define CPACF_KIMD_GHASH 0x41
+
+/*
+ * Function codes for the KLMD (COMPUTE LAST MESSAGE DIGEST)
+ * instruction
+ */
+#define CPACF_KLMD_QUERY 0x00
+#define CPACF_KLMD_SHA_1 0x01
+#define CPACF_KLMD_SHA_256 0x02
+#define CPACF_KLMD_SHA_512 0x03
+
+/*
+ * function codes for the KMAC (COMPUTE MESSAGE AUTHENTICATION CODE)
+ * instruction
+ */
+#define CPACF_KMAC_QUERY 0x00
+#define CPACF_KMAC_DEA 0x01
+#define CPACF_KMAC_TDEA_128 0x02
+#define CPACF_KMAC_TDEA_192 0x03
+
+/*
+ * Function codes for the PCKMO (PERFORM CRYPTOGRAPHIC KEY MANAGEMENT)
+ * instruction
+ */
+#define CPACF_PCKMO_QUERY 0x00
+#define CPACF_PCKMO_ENC_DES_KEY 0x01
+#define CPACF_PCKMO_ENC_TDES_128_KEY 0x02
+#define CPACF_PCKMO_ENC_TDES_192_KEY 0x03
+#define CPACF_PCKMO_ENC_AES_128_KEY 0x12
+#define CPACF_PCKMO_ENC_AES_192_KEY 0x13
+#define CPACF_PCKMO_ENC_AES_256_KEY 0x14
+
+/*
+ * Function codes for the PRNO (PERFORM RANDOM NUMBER OPERATION)
+ * instruction
+ */
+#define CPACF_PRNO_QUERY 0x00
+#define CPACF_PRNO_SHA512_DRNG_GEN 0x03
+#define CPACF_PRNO_SHA512_DRNG_SEED 0x83
+#define CPACF_PRNO_TRNG_Q_R2C_RATIO 0x70
+#define CPACF_PRNO_TRNG 0x72
+
+/*
+ * Function codes for the KMA (CIPHER MESSAGE WITH AUTHENTICATION)
+ * instruction
+ */
+#define CPACF_KMA_QUERY 0x00
+#define CPACF_KMA_GCM_AES_128 0x12
+#define CPACF_KMA_GCM_AES_192 0x13
+#define CPACF_KMA_GCM_AES_256 0x14
+
+/*
+ * Flags for the KMA (CIPHER MESSAGE WITH AUTHENTICATION) instruction
+ */
+#define CPACF_KMA_LPC 0x100 /* Last-Plaintext/Ciphertext */
+#define CPACF_KMA_LAAD 0x200 /* Last-AAD */
+#define CPACF_KMA_HS 0x400 /* Hash-subkey Supplied */
+
+typedef struct { unsigned char bytes[16]; } cpacf_mask_t;
+
+/**
+ * cpacf_query() - check if a specific CPACF function is available
+ * @opcode: the opcode of the crypto instruction
+ * @func: the function code to test for
+ *
+ * Executes the query function for the given crypto instruction @opcode
+ * and checks if @func is available
+ *
+ * Returns 1 if @func is available for @opcode, 0 otherwise
+ */
+static inline void __cpacf_query(unsigned int opcode, cpacf_mask_t *mask)
+{
+ register unsigned long r0 asm("0") = 0; /* query function */
+ register unsigned long r1 asm("1") = (unsigned long) mask;
+
+ asm volatile(
+ " spm 0\n" /* pckmo doesn't change the cc */
+ /* Parameter regs are ignored, but must be nonzero and unique */
+ "0: .insn rrf,%[opc] << 16,2,4,6,0\n"
+ " brc 1,0b\n" /* handle partial completion */
+ : "=m" (*mask)
+ : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (opcode)
+ : "cc");
+}
+
+static inline int __cpacf_check_opcode(unsigned int opcode)
+{
+ switch (opcode) {
+ case CPACF_KMAC:
+ case CPACF_KM:
+ case CPACF_KMC:
+ case CPACF_KIMD:
+ case CPACF_KLMD:
+ return test_facility(17); /* check for MSA */
+ case CPACF_PCKMO:
+ return test_facility(76); /* check for MSA3 */
+ case CPACF_KMF:
+ case CPACF_KMO:
+ case CPACF_PCC:
+ case CPACF_KMCTR:
+ return test_facility(77); /* check for MSA4 */
+ case CPACF_PRNO:
+ return test_facility(57); /* check for MSA5 */
+ case CPACF_KMA:
+ return test_facility(146); /* check for MSA8 */
+ default:
+ BUG();
+ }
+}
+
+static inline int cpacf_query(unsigned int opcode, cpacf_mask_t *mask)
+{
+ if (__cpacf_check_opcode(opcode)) {
+ __cpacf_query(opcode, mask);
+ return 1;
+ }
+ memset(mask, 0, sizeof(*mask));
+ return 0;
+}
+
+static inline int cpacf_test_func(cpacf_mask_t *mask, unsigned int func)
+{
+ return (mask->bytes[func >> 3] & (0x80 >> (func & 7))) != 0;
+}
+
+static inline int cpacf_query_func(unsigned int opcode, unsigned int func)
+{
+ cpacf_mask_t mask;
+
+ if (cpacf_query(opcode, &mask))
+ return cpacf_test_func(&mask, func);
+ return 0;
+}
+
+/**
+ * cpacf_km() - executes the KM (CIPHER MESSAGE) instruction
+ * @func: the function code passed to KM; see CPACF_KM_xxx defines
+ * @param: address of parameter block; see POP for details on each func
+ * @dest: address of destination memory area
+ * @src: address of source memory area
+ * @src_len: length of src operand in bytes
+ *
+ * Returns 0 for the query func, number of processed bytes for
+ * encryption/decryption funcs
+ */
+static inline int cpacf_km(unsigned long func, void *param,
+ u8 *dest, const u8 *src, long src_len)
+{
+ register unsigned long r0 asm("0") = (unsigned long) func;
+ register unsigned long r1 asm("1") = (unsigned long) param;
+ register unsigned long r2 asm("2") = (unsigned long) src;
+ register unsigned long r3 asm("3") = (unsigned long) src_len;
+ register unsigned long r4 asm("4") = (unsigned long) dest;
+
+ asm volatile(
+ "0: .insn rre,%[opc] << 16,%[dst],%[src]\n"
+ " brc 1,0b\n" /* handle partial completion */
+ : [src] "+a" (r2), [len] "+d" (r3), [dst] "+a" (r4)
+ : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KM)
+ : "cc", "memory");
+
+ return src_len - r3;
+}
+
+/**
+ * cpacf_kmc() - executes the KMC (CIPHER MESSAGE WITH CHAINING) instruction
+ * @func: the function code passed to KM; see CPACF_KMC_xxx defines
+ * @param: address of parameter block; see POP for details on each func
+ * @dest: address of destination memory area
+ * @src: address of source memory area
+ * @src_len: length of src operand in bytes
+ *
+ * Returns 0 for the query func, number of processed bytes for
+ * encryption/decryption funcs
+ */
+static inline int cpacf_kmc(unsigned long func, void *param,
+ u8 *dest, const u8 *src, long src_len)
+{
+ register unsigned long r0 asm("0") = (unsigned long) func;
+ register unsigned long r1 asm("1") = (unsigned long) param;
+ register unsigned long r2 asm("2") = (unsigned long) src;
+ register unsigned long r3 asm("3") = (unsigned long) src_len;
+ register unsigned long r4 asm("4") = (unsigned long) dest;
+
+ asm volatile(
+ "0: .insn rre,%[opc] << 16,%[dst],%[src]\n"
+ " brc 1,0b\n" /* handle partial completion */
+ : [src] "+a" (r2), [len] "+d" (r3), [dst] "+a" (r4)
+ : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KMC)
+ : "cc", "memory");
+
+ return src_len - r3;
+}
+
+/**
+ * cpacf_kimd() - executes the KIMD (COMPUTE INTERMEDIATE MESSAGE DIGEST)
+ * instruction
+ * @func: the function code passed to KM; see CPACF_KIMD_xxx defines
+ * @param: address of parameter block; see POP for details on each func
+ * @src: address of source memory area
+ * @src_len: length of src operand in bytes
+ */
+static inline void cpacf_kimd(unsigned long func, void *param,
+ const u8 *src, long src_len)
+{
+ register unsigned long r0 asm("0") = (unsigned long) func;
+ register unsigned long r1 asm("1") = (unsigned long) param;
+ register unsigned long r2 asm("2") = (unsigned long) src;
+ register unsigned long r3 asm("3") = (unsigned long) src_len;
+
+ asm volatile(
+ "0: .insn rre,%[opc] << 16,0,%[src]\n"
+ " brc 1,0b\n" /* handle partial completion */
+ : [src] "+a" (r2), [len] "+d" (r3)
+ : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KIMD)
+ : "cc", "memory");
+}
+
+/**
+ * cpacf_klmd() - executes the KLMD (COMPUTE LAST MESSAGE DIGEST) instruction
+ * @func: the function code passed to KM; see CPACF_KLMD_xxx defines
+ * @param: address of parameter block; see POP for details on each func
+ * @src: address of source memory area
+ * @src_len: length of src operand in bytes
+ */
+static inline void cpacf_klmd(unsigned long func, void *param,
+ const u8 *src, long src_len)
+{
+ register unsigned long r0 asm("0") = (unsigned long) func;
+ register unsigned long r1 asm("1") = (unsigned long) param;
+ register unsigned long r2 asm("2") = (unsigned long) src;
+ register unsigned long r3 asm("3") = (unsigned long) src_len;
+
+ asm volatile(
+ "0: .insn rre,%[opc] << 16,0,%[src]\n"
+ " brc 1,0b\n" /* handle partial completion */
+ : [src] "+a" (r2), [len] "+d" (r3)
+ : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KLMD)
+ : "cc", "memory");
+}
+
+/**
+ * cpacf_kmac() - executes the KMAC (COMPUTE MESSAGE AUTHENTICATION CODE)
+ * instruction
+ * @func: the function code passed to KM; see CPACF_KMAC_xxx defines
+ * @param: address of parameter block; see POP for details on each func
+ * @src: address of source memory area
+ * @src_len: length of src operand in bytes
+ *
+ * Returns 0 for the query func, number of processed bytes for digest funcs
+ */
+static inline int cpacf_kmac(unsigned long func, void *param,
+ const u8 *src, long src_len)
+{
+ register unsigned long r0 asm("0") = (unsigned long) func;
+ register unsigned long r1 asm("1") = (unsigned long) param;
+ register unsigned long r2 asm("2") = (unsigned long) src;
+ register unsigned long r3 asm("3") = (unsigned long) src_len;
+
+ asm volatile(
+ "0: .insn rre,%[opc] << 16,0,%[src]\n"
+ " brc 1,0b\n" /* handle partial completion */
+ : [src] "+a" (r2), [len] "+d" (r3)
+ : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KMAC)
+ : "cc", "memory");
+
+ return src_len - r3;
+}
+
+/**
+ * cpacf_kmctr() - executes the KMCTR (CIPHER MESSAGE WITH COUNTER) instruction
+ * @func: the function code passed to KMCTR; see CPACF_KMCTR_xxx defines
+ * @param: address of parameter block; see POP for details on each func
+ * @dest: address of destination memory area
+ * @src: address of source memory area
+ * @src_len: length of src operand in bytes
+ * @counter: address of counter value
+ *
+ * Returns 0 for the query func, number of processed bytes for
+ * encryption/decryption funcs
+ */
+static inline int cpacf_kmctr(unsigned long func, void *param, u8 *dest,
+ const u8 *src, long src_len, u8 *counter)
+{
+ register unsigned long r0 asm("0") = (unsigned long) func;
+ register unsigned long r1 asm("1") = (unsigned long) param;
+ register unsigned long r2 asm("2") = (unsigned long) src;
+ register unsigned long r3 asm("3") = (unsigned long) src_len;
+ register unsigned long r4 asm("4") = (unsigned long) dest;
+ register unsigned long r6 asm("6") = (unsigned long) counter;
+
+ asm volatile(
+ "0: .insn rrf,%[opc] << 16,%[dst],%[src],%[ctr],0\n"
+ " brc 1,0b\n" /* handle partial completion */
+ : [src] "+a" (r2), [len] "+d" (r3),
+ [dst] "+a" (r4), [ctr] "+a" (r6)
+ : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KMCTR)
+ : "cc", "memory");
+
+ return src_len - r3;
+}
+
+/**
+ * cpacf_prno() - executes the PRNO (PERFORM RANDOM NUMBER OPERATION)
+ * instruction
+ * @func: the function code passed to PRNO; see CPACF_PRNO_xxx defines
+ * @param: address of parameter block; see POP for details on each func
+ * @dest: address of destination memory area
+ * @dest_len: size of destination memory area in bytes
+ * @seed: address of seed data
+ * @seed_len: size of seed data in bytes
+ */
+static inline void cpacf_prno(unsigned long func, void *param,
+ u8 *dest, unsigned long dest_len,
+ const u8 *seed, unsigned long seed_len)
+{
+ register unsigned long r0 asm("0") = (unsigned long) func;
+ register unsigned long r1 asm("1") = (unsigned long) param;
+ register unsigned long r2 asm("2") = (unsigned long) dest;
+ register unsigned long r3 asm("3") = (unsigned long) dest_len;
+ register unsigned long r4 asm("4") = (unsigned long) seed;
+ register unsigned long r5 asm("5") = (unsigned long) seed_len;
+
+ asm volatile (
+ "0: .insn rre,%[opc] << 16,%[dst],%[seed]\n"
+ " brc 1,0b\n" /* handle partial completion */
+ : [dst] "+a" (r2), [dlen] "+d" (r3)
+ : [fc] "d" (r0), [pba] "a" (r1),
+ [seed] "a" (r4), [slen] "d" (r5), [opc] "i" (CPACF_PRNO)
+ : "cc", "memory");
+}
+
+/**
+ * cpacf_trng() - executes the TRNG subfunction of the PRNO instruction
+ * @ucbuf: buffer for unconditioned data
+ * @ucbuf_len: amount of unconditioned data to fetch in bytes
+ * @cbuf: buffer for conditioned data
+ * @cbuf_len: amount of conditioned data to fetch in bytes
+ */
+static inline void cpacf_trng(u8 *ucbuf, unsigned long ucbuf_len,
+ u8 *cbuf, unsigned long cbuf_len)
+{
+ register unsigned long r0 asm("0") = (unsigned long) CPACF_PRNO_TRNG;
+ register unsigned long r2 asm("2") = (unsigned long) ucbuf;
+ register unsigned long r3 asm("3") = (unsigned long) ucbuf_len;
+ register unsigned long r4 asm("4") = (unsigned long) cbuf;
+ register unsigned long r5 asm("5") = (unsigned long) cbuf_len;
+
+ asm volatile (
+ "0: .insn rre,%[opc] << 16,%[ucbuf],%[cbuf]\n"
+ " brc 1,0b\n" /* handle partial completion */
+ : [ucbuf] "+a" (r2), [ucbuflen] "+d" (r3),
+ [cbuf] "+a" (r4), [cbuflen] "+d" (r5)
+ : [fc] "d" (r0), [opc] "i" (CPACF_PRNO)
+ : "cc", "memory");
+}
+
+/**
+ * cpacf_pcc() - executes the PCC (PERFORM CRYPTOGRAPHIC COMPUTATION)
+ * instruction
+ * @func: the function code passed to PCC; see CPACF_KM_xxx defines
+ * @param: address of parameter block; see POP for details on each func
+ */
+static inline void cpacf_pcc(unsigned long func, void *param)
+{
+ register unsigned long r0 asm("0") = (unsigned long) func;
+ register unsigned long r1 asm("1") = (unsigned long) param;
+
+ asm volatile(
+ "0: .insn rre,%[opc] << 16,0,0\n" /* PCC opcode */
+ " brc 1,0b\n" /* handle partial completion */
+ :
+ : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_PCC)
+ : "cc", "memory");
+}
+
+/**
+ * cpacf_pckmo() - executes the PCKMO (PERFORM CRYPTOGRAPHIC KEY
+ * MANAGEMENT) instruction
+ * @func: the function code passed to PCKMO; see CPACF_PCKMO_xxx defines
+ * @param: address of parameter block; see POP for details on each func
+ *
+ * Returns 0.
+ */
+static inline void cpacf_pckmo(long func, void *param)
+{
+ register unsigned long r0 asm("0") = (unsigned long) func;
+ register unsigned long r1 asm("1") = (unsigned long) param;
+
+ asm volatile(
+ " .insn rre,%[opc] << 16,0,0\n" /* PCKMO opcode */
+ :
+ : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_PCKMO)
+ : "cc", "memory");
+}
+
+/**
+ * cpacf_kma() - executes the KMA (CIPHER MESSAGE WITH AUTHENTICATION)
+ * instruction
+ * @func: the function code passed to KMA; see CPACF_KMA_xxx defines
+ * @param: address of parameter block; see POP for details on each func
+ * @dest: address of destination memory area
+ * @src: address of source memory area
+ * @src_len: length of src operand in bytes
+ * @aad: address of additional authenticated data memory area
+ * @aad_len: length of aad operand in bytes
+ */
+static inline void cpacf_kma(unsigned long func, void *param, u8 *dest,
+ const u8 *src, unsigned long src_len,
+ const u8 *aad, unsigned long aad_len)
+{
+ register unsigned long r0 asm("0") = (unsigned long) func;
+ register unsigned long r1 asm("1") = (unsigned long) param;
+ register unsigned long r2 asm("2") = (unsigned long) src;
+ register unsigned long r3 asm("3") = (unsigned long) src_len;
+ register unsigned long r4 asm("4") = (unsigned long) aad;
+ register unsigned long r5 asm("5") = (unsigned long) aad_len;
+ register unsigned long r6 asm("6") = (unsigned long) dest;
+
+ asm volatile(
+ "0: .insn rrf,%[opc] << 16,%[dst],%[src],%[aad],0\n"
+ " brc 1,0b\n" /* handle partial completion */
+ : [dst] "+a" (r6), [src] "+a" (r2), [slen] "+d" (r3),
+ [aad] "+a" (r4), [alen] "+d" (r5)
+ : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KMA)
+ : "cc", "memory");
+}
+
+#endif /* _ASM_S390_CPACF_H */
diff --git a/arch/s390/include/asm/cpcmd.h b/arch/s390/include/asm/cpcmd.h
new file mode 100644
index 000000000..c3c993abe
--- /dev/null
+++ b/arch/s390/include/asm/cpcmd.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ * Christian Borntraeger (cborntra@de.ibm.com),
+ */
+
+#ifndef _ASM_S390_CPCMD_H
+#define _ASM_S390_CPCMD_H
+
+/*
+ * the lowlevel function for cpcmd
+ */
+int __cpcmd(const char *cmd, char *response, int rlen, int *response_code);
+
+/*
+ * cpcmd is the in-kernel interface for issuing CP commands
+ *
+ * cmd: null-terminated command string, max 240 characters
+ * response: response buffer for VM's textual response
+ * rlen: size of the response buffer, cpcmd will not exceed this size
+ * but will cap the output, if its too large. Everything that
+ * did not fit into the buffer will be silently dropped
+ * response_code: return pointer for VM's error code
+ * return value: the size of the response. The caller can check if the buffer
+ * was large enough by comparing the return value and rlen
+ * NOTE: If the response buffer is not in real storage, cpcmd can sleep
+ */
+int cpcmd(const char *cmd, char *response, int rlen, int *response_code);
+
+#endif /* _ASM_S390_CPCMD_H */
diff --git a/arch/s390/include/asm/cpu.h b/arch/s390/include/asm/cpu.h
new file mode 100644
index 000000000..62228a884
--- /dev/null
+++ b/arch/s390/include/asm/cpu.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2000, 2009
+ * Author(s): Hartmut Penner <hp@de.ibm.com>,
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ * Christian Ehrhardt <ehrhardt@de.ibm.com>,
+ */
+
+#ifndef _ASM_S390_CPU_H
+#define _ASM_S390_CPU_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+
+struct cpuid
+{
+ unsigned int version : 8;
+ unsigned int ident : 24;
+ unsigned int machine : 16;
+ unsigned int unused : 16;
+} __attribute__ ((packed, aligned(8)));
+
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_S390_CPU_H */
diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
new file mode 100644
index 000000000..bf2cbff92
--- /dev/null
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -0,0 +1,314 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * CPU-measurement facilities
+ *
+ * Copyright IBM Corp. 2012, 2018
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ * Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+#ifndef _ASM_S390_CPU_MF_H
+#define _ASM_S390_CPU_MF_H
+
+#include <linux/errno.h>
+#include <asm/facility.h>
+
+#define CPU_MF_INT_SF_IAE (1 << 31) /* invalid entry address */
+#define CPU_MF_INT_SF_ISE (1 << 30) /* incorrect SDBT entry */
+#define CPU_MF_INT_SF_PRA (1 << 29) /* program request alert */
+#define CPU_MF_INT_SF_SACA (1 << 23) /* sampler auth. change alert */
+#define CPU_MF_INT_SF_LSDA (1 << 22) /* loss of sample data alert */
+#define CPU_MF_INT_CF_MTDA (1 << 15) /* loss of MT ctr. data alert */
+#define CPU_MF_INT_CF_CACA (1 << 7) /* counter auth. change alert */
+#define CPU_MF_INT_CF_LCDA (1 << 6) /* loss of counter data alert */
+#define CPU_MF_INT_CF_MASK (CPU_MF_INT_CF_MTDA|CPU_MF_INT_CF_CACA| \
+ CPU_MF_INT_CF_LCDA)
+#define CPU_MF_INT_SF_MASK (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE| \
+ CPU_MF_INT_SF_PRA|CPU_MF_INT_SF_SACA| \
+ CPU_MF_INT_SF_LSDA)
+
+/* CPU measurement facility support */
+static inline int cpum_cf_avail(void)
+{
+ return test_facility(40) && test_facility(67);
+}
+
+static inline int cpum_sf_avail(void)
+{
+ return test_facility(40) && test_facility(68);
+}
+
+
+struct cpumf_ctr_info {
+ u16 cfvn;
+ u16 auth_ctl;
+ u16 enable_ctl;
+ u16 act_ctl;
+ u16 max_cpu;
+ u16 csvn;
+ u16 max_cg;
+ u16 reserved1;
+ u32 reserved2[12];
+} __packed;
+
+/* QUERY SAMPLING INFORMATION block */
+struct hws_qsi_info_block { /* Bit(s) */
+ unsigned int b0_13:14; /* 0-13: zeros */
+ unsigned int as:1; /* 14: basic-sampling authorization */
+ unsigned int ad:1; /* 15: diag-sampling authorization */
+ unsigned int b16_21:6; /* 16-21: zeros */
+ unsigned int es:1; /* 22: basic-sampling enable control */
+ unsigned int ed:1; /* 23: diag-sampling enable control */
+ unsigned int b24_29:6; /* 24-29: zeros */
+ unsigned int cs:1; /* 30: basic-sampling activation control */
+ unsigned int cd:1; /* 31: diag-sampling activation control */
+ unsigned int bsdes:16; /* 4-5: size of basic sampling entry */
+ unsigned int dsdes:16; /* 6-7: size of diagnostic sampling entry */
+ unsigned long min_sampl_rate; /* 8-15: minimum sampling interval */
+ unsigned long max_sampl_rate; /* 16-23: maximum sampling interval*/
+ unsigned long tear; /* 24-31: TEAR contents */
+ unsigned long dear; /* 32-39: DEAR contents */
+ unsigned int rsvrd0; /* 40-43: reserved */
+ unsigned int cpu_speed; /* 44-47: CPU speed */
+ unsigned long long rsvrd1; /* 48-55: reserved */
+ unsigned long long rsvrd2; /* 56-63: reserved */
+} __packed;
+
+/* SET SAMPLING CONTROLS request block */
+struct hws_lsctl_request_block {
+ unsigned int s:1; /* 0: maximum buffer indicator */
+ unsigned int h:1; /* 1: part. level reserved for VM use*/
+ unsigned long long b2_53:52;/* 2-53: zeros */
+ unsigned int es:1; /* 54: basic-sampling enable control */
+ unsigned int ed:1; /* 55: diag-sampling enable control */
+ unsigned int b56_61:6; /* 56-61: - zeros */
+ unsigned int cs:1; /* 62: basic-sampling activation control */
+ unsigned int cd:1; /* 63: diag-sampling activation control */
+ unsigned long interval; /* 8-15: sampling interval */
+ unsigned long tear; /* 16-23: TEAR contents */
+ unsigned long dear; /* 24-31: DEAR contents */
+ /* 32-63: */
+ unsigned long rsvrd1; /* reserved */
+ unsigned long rsvrd2; /* reserved */
+ unsigned long rsvrd3; /* reserved */
+ unsigned long rsvrd4; /* reserved */
+} __packed;
+
+struct hws_basic_entry {
+ unsigned int def:16; /* 0-15 Data Entry Format */
+ unsigned int R:4; /* 16-19 reserved */
+ unsigned int U:4; /* 20-23 Number of unique instruct. */
+ unsigned int z:2; /* zeros */
+ unsigned int T:1; /* 26 PSW DAT mode */
+ unsigned int W:1; /* 27 PSW wait state */
+ unsigned int P:1; /* 28 PSW Problem state */
+ unsigned int AS:2; /* 29-30 PSW address-space control */
+ unsigned int I:1; /* 31 entry valid or invalid */
+ unsigned int CL:2; /* 32-33 Configuration Level */
+ unsigned int:14;
+ unsigned int prim_asn:16; /* primary ASN */
+ unsigned long long ia; /* Instruction Address */
+ unsigned long long gpp; /* Guest Program Parameter */
+ unsigned long long hpp; /* Host Program Parameter */
+} __packed;
+
+struct hws_diag_entry {
+ unsigned int def:16; /* 0-15 Data Entry Format */
+ unsigned int R:15; /* 16-19 and 20-30 reserved */
+ unsigned int I:1; /* 31 entry valid or invalid */
+ u8 data[]; /* Machine-dependent sample data */
+} __packed;
+
+struct hws_combined_entry {
+ struct hws_basic_entry basic; /* Basic-sampling data entry */
+ struct hws_diag_entry diag; /* Diagnostic-sampling data entry */
+} __packed;
+
+struct hws_trailer_entry {
+ union {
+ struct {
+ unsigned int f:1; /* 0 - Block Full Indicator */
+ unsigned int a:1; /* 1 - Alert request control */
+ unsigned int t:1; /* 2 - Timestamp format */
+ unsigned int :29; /* 3 - 31: Reserved */
+ unsigned int bsdes:16; /* 32-47: size of basic SDE */
+ unsigned int dsdes:16; /* 48-63: size of diagnostic SDE */
+ };
+ unsigned long long flags; /* 0 - 63: All indicators */
+ };
+ unsigned long long overflow; /* 64 - sample Overflow count */
+ unsigned char timestamp[16]; /* 16 - 31 timestamp */
+ unsigned long long reserved1; /* 32 -Reserved */
+ unsigned long long reserved2; /* */
+ union { /* 48 - reserved for programming use */
+ struct {
+ unsigned int clock_base:1; /* in progusage2 */
+ unsigned long long progusage1:63;
+ unsigned long long progusage2;
+ };
+ unsigned long long progusage[2];
+ };
+} __packed;
+
+/* Load program parameter */
+static inline void lpp(void *pp)
+{
+ asm volatile(".insn s,0xb2800000,0(%0)\n":: "a" (pp) : "memory");
+}
+
+/* Query counter information */
+static inline int qctri(struct cpumf_ctr_info *info)
+{
+ int rc = -EINVAL;
+
+ asm volatile (
+ "0: .insn s,0xb28e0000,%1\n"
+ "1: lhi %0,0\n"
+ "2:\n"
+ EX_TABLE(1b, 2b)
+ : "+d" (rc), "=Q" (*info));
+ return rc;
+}
+
+/* Load CPU-counter-set controls */
+static inline int lcctl(u64 ctl)
+{
+ int cc;
+
+ asm volatile (
+ " .insn s,0xb2840000,%1\n"
+ " ipm %0\n"
+ " srl %0,28\n"
+ : "=d" (cc) : "Q" (ctl) : "cc");
+ return cc;
+}
+
+/* Extract CPU counter */
+static inline int __ecctr(u64 ctr, u64 *content)
+{
+ u64 _content;
+ int cc;
+
+ asm volatile (
+ " .insn rre,0xb2e40000,%0,%2\n"
+ " ipm %1\n"
+ " srl %1,28\n"
+ : "=d" (_content), "=d" (cc) : "d" (ctr) : "cc");
+ *content = _content;
+ return cc;
+}
+
+/* Extract CPU counter */
+static inline int ecctr(u64 ctr, u64 *val)
+{
+ u64 content;
+ int cc;
+
+ cc = __ecctr(ctr, &content);
+ if (!cc)
+ *val = content;
+ return cc;
+}
+
+/* Store CPU counter multiple for the MT utilization counter set */
+static inline int stcctm5(u64 num, u64 *val)
+{
+ int cc;
+
+ asm volatile (
+ " .insn rsy,0xeb0000000017,%2,5,%1\n"
+ " ipm %0\n"
+ " srl %0,28\n"
+ : "=d" (cc)
+ : "Q" (*val), "d" (num)
+ : "cc", "memory");
+ return cc;
+}
+
+/* Query sampling information */
+static inline int qsi(struct hws_qsi_info_block *info)
+{
+ int cc = 1;
+
+ asm volatile(
+ "0: .insn s,0xb2860000,%1\n"
+ "1: lhi %0,0\n"
+ "2:\n"
+ EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
+ : "+d" (cc), "+Q" (*info));
+ return cc ? -EINVAL : 0;
+}
+
+/* Load sampling controls */
+static inline int lsctl(struct hws_lsctl_request_block *req)
+{
+ int cc;
+
+ cc = 1;
+ asm volatile(
+ "0: .insn s,0xb2870000,0(%1)\n"
+ "1: ipm %0\n"
+ " srl %0,28\n"
+ "2:\n"
+ EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
+ : "+d" (cc), "+a" (req)
+ : "m" (*req)
+ : "cc", "memory");
+
+ return cc ? -EINVAL : 0;
+}
+
+/* Sampling control helper functions */
+
+#include <linux/time.h>
+
+static inline unsigned long freq_to_sample_rate(struct hws_qsi_info_block *qsi,
+ unsigned long freq)
+{
+ return (USEC_PER_SEC / freq) * qsi->cpu_speed;
+}
+
+static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi,
+ unsigned long rate)
+{
+ return USEC_PER_SEC * qsi->cpu_speed / rate;
+}
+
+#define SDB_TE_ALERT_REQ_MASK 0x4000000000000000UL
+#define SDB_TE_BUFFER_FULL_MASK 0x8000000000000000UL
+
+/* Return TOD timestamp contained in an trailer entry */
+static inline unsigned long long trailer_timestamp(struct hws_trailer_entry *te)
+{
+ /* TOD in STCKE format */
+ if (te->t)
+ return *((unsigned long long *) &te->timestamp[1]);
+
+ /* TOD in STCK format */
+ return *((unsigned long long *) &te->timestamp[0]);
+}
+
+/* Return pointer to trailer entry of an sample data block */
+static inline unsigned long *trailer_entry_ptr(unsigned long v)
+{
+ void *ret;
+
+ ret = (void *) v;
+ ret += PAGE_SIZE;
+ ret -= sizeof(struct hws_trailer_entry);
+
+ return (unsigned long *) ret;
+}
+
+/* Return if the entry in the sample data block table (sdbt)
+ * is a link to the next sdbt */
+static inline int is_link_entry(unsigned long *s)
+{
+ return *s & 0x1ul ? 1 : 0;
+}
+
+/* Return pointer to the linked sdbt */
+static inline unsigned long *get_next_sdbt(unsigned long *s)
+{
+ return (unsigned long *) (*s & ~0x1ul);
+}
+#endif /* _ASM_S390_CPU_MF_H */
diff --git a/arch/s390/include/asm/cpufeature.h b/arch/s390/include/asm/cpufeature.h
new file mode 100644
index 000000000..1d007c6ed
--- /dev/null
+++ b/arch/s390/include/asm/cpufeature.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Module interface for CPU features
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef __ASM_S390_CPUFEATURE_H
+#define __ASM_S390_CPUFEATURE_H
+
+#include <asm/elf.h>
+
+/* Hardware features on Linux on z Systems are indicated by facility bits that
+ * are mapped to the so-called machine flags. Particular machine flags are
+ * then used to define ELF hardware capabilities; most notably hardware flags
+ * that are essential for user space / glibc.
+ *
+ * Restrict the set of exposed CPU features to ELF hardware capabilities for
+ * now. Additional machine flags can be indicated by values larger than
+ * MAX_ELF_HWCAP_FEATURES.
+ */
+#define MAX_ELF_HWCAP_FEATURES (8 * sizeof(elf_hwcap))
+#define MAX_CPU_FEATURES MAX_ELF_HWCAP_FEATURES
+
+#define cpu_feature(feat) ilog2(HWCAP_S390_ ## feat)
+
+int cpu_have_feature(unsigned int nr);
+
+#endif /* __ASM_S390_CPUFEATURE_H */
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
new file mode 100644
index 000000000..cb729d111
--- /dev/null
+++ b/arch/s390/include/asm/cputime.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2004
+ *
+ * Author: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef _S390_CPUTIME_H
+#define _S390_CPUTIME_H
+
+#include <linux/types.h>
+#include <asm/timex.h>
+
+#define CPUTIME_PER_USEC 4096ULL
+#define CPUTIME_PER_SEC (CPUTIME_PER_USEC * USEC_PER_SEC)
+
+/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
+
+#define cmpxchg_cputime(ptr, old, new) cmpxchg64(ptr, old, new)
+
+/*
+ * Convert cputime to microseconds.
+ */
+static inline u64 cputime_to_usecs(const u64 cputime)
+{
+ return cputime >> 12;
+}
+
+/*
+ * Convert cputime to nanoseconds.
+ */
+#define cputime_to_nsecs(cputime) tod_to_ns(cputime)
+
+u64 arch_cpu_idle_time(int cpu);
+
+#define arch_idle_time(cpu) arch_cpu_idle_time(cpu)
+
+#endif /* _S390_CPUTIME_H */
diff --git a/arch/s390/include/asm/crw.h b/arch/s390/include/asm/crw.h
new file mode 100644
index 000000000..c6ebfd31f
--- /dev/null
+++ b/arch/s390/include/asm/crw.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Data definitions for channel report processing
+ * Copyright IBM Corp. 2000, 2009
+ * Author(s): Ingo Adlung <adlung@de.ibm.com>,
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ * Cornelia Huck <cornelia.huck@de.ibm.com>,
+ * Heiko Carstens <heiko.carstens@de.ibm.com>,
+ */
+
+#ifndef _ASM_S390_CRW_H
+#define _ASM_S390_CRW_H
+
+#include <linux/types.h>
+
+/*
+ * Channel Report Word
+ */
+struct crw {
+ __u32 res1 : 1; /* reserved zero */
+ __u32 slct : 1; /* solicited */
+ __u32 oflw : 1; /* overflow */
+ __u32 chn : 1; /* chained */
+ __u32 rsc : 4; /* reporting source code */
+ __u32 anc : 1; /* ancillary report */
+ __u32 res2 : 1; /* reserved zero */
+ __u32 erc : 6; /* error-recovery code */
+ __u32 rsid : 16; /* reporting-source ID */
+} __attribute__ ((packed));
+
+typedef void (*crw_handler_t)(struct crw *, struct crw *, int);
+
+extern int crw_register_handler(int rsc, crw_handler_t handler);
+extern void crw_unregister_handler(int rsc);
+extern void crw_handle_channel_report(void);
+void crw_wait_for_channel_report(void);
+
+#define NR_RSCS 16
+
+#define CRW_RSC_MONITOR 0x2 /* monitoring facility */
+#define CRW_RSC_SCH 0x3 /* subchannel */
+#define CRW_RSC_CPATH 0x4 /* channel path */
+#define CRW_RSC_CONFIG 0x9 /* configuration-alert facility */
+#define CRW_RSC_CSS 0xB /* channel subsystem */
+
+#define CRW_ERC_EVENT 0x00 /* event information pending */
+#define CRW_ERC_AVAIL 0x01 /* available */
+#define CRW_ERC_INIT 0x02 /* initialized */
+#define CRW_ERC_TERROR 0x03 /* temporary error */
+#define CRW_ERC_IPARM 0x04 /* installed parm initialized */
+#define CRW_ERC_TERM 0x05 /* terminal */
+#define CRW_ERC_PERRN 0x06 /* perm. error, fac. not init */
+#define CRW_ERC_PERRI 0x07 /* perm. error, facility init */
+#define CRW_ERC_PMOD 0x08 /* installed parameters modified */
+
+#endif /* _ASM_S390_CRW_H */
diff --git a/arch/s390/include/asm/css_chars.h b/arch/s390/include/asm/css_chars.h
new file mode 100644
index 000000000..480bb02cc
--- /dev/null
+++ b/arch/s390/include/asm/css_chars.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_CSS_CHARS_H
+#define _ASM_CSS_CHARS_H
+
+#include <linux/types.h>
+
+struct css_general_char {
+ u64 : 12;
+ u64 dynio : 1; /* bit 12 */
+ u64 : 4;
+ u64 eadm : 1; /* bit 17 */
+ u64 : 23;
+ u64 aif : 1; /* bit 41 */
+ u64 : 3;
+ u64 mcss : 1; /* bit 45 */
+ u64 fcs : 1; /* bit 46 */
+ u64 : 1;
+ u64 ext_mb : 1; /* bit 48 */
+ u64 : 7;
+ u64 aif_tdd : 1; /* bit 56 */
+ u64 : 1;
+ u64 qebsm : 1; /* bit 58 */
+ u64 : 2;
+ u64 aiv : 1; /* bit 61 */
+ u64 : 2;
+
+ u64 : 3;
+ u64 aif_osa : 1; /* bit 67 */
+ u64 : 12;
+ u64 eadm_rf : 1; /* bit 80 */
+ u64 : 1;
+ u64 cib : 1; /* bit 82 */
+ u64 : 5;
+ u64 fcx : 1; /* bit 88 */
+ u64 : 19;
+ u64 alt_ssi : 1; /* bit 108 */
+ u64 : 1;
+ u64 narf : 1; /* bit 110 */
+ u64 : 12;
+ u64 util_str : 1;/* bit 123 */
+} __packed;
+
+extern struct css_general_char css_general_characteristics;
+
+#endif
diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
new file mode 100644
index 000000000..460045353
--- /dev/null
+++ b/arch/s390/include/asm/ctl_reg.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 1999, 2009
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef __ASM_CTL_REG_H
+#define __ASM_CTL_REG_H
+
+#include <linux/const.h>
+
+#define CR0_CLOCK_COMPARATOR_SIGN _BITUL(63 - 10)
+#define CR0_EMERGENCY_SIGNAL_SUBMASK _BITUL(63 - 49)
+#define CR0_EXTERNAL_CALL_SUBMASK _BITUL(63 - 50)
+#define CR0_CLOCK_COMPARATOR_SUBMASK _BITUL(63 - 52)
+#define CR0_CPU_TIMER_SUBMASK _BITUL(63 - 53)
+#define CR0_SERVICE_SIGNAL_SUBMASK _BITUL(63 - 54)
+#define CR0_UNUSED_56 _BITUL(63 - 56)
+#define CR0_INTERRUPT_KEY_SUBMASK _BITUL(63 - 57)
+#define CR0_MEASUREMENT_ALERT_SUBMASK _BITUL(63 - 58)
+
+#define CR2_GUARDED_STORAGE _BITUL(63 - 59)
+
+#define CR14_UNUSED_32 _BITUL(63 - 32)
+#define CR14_UNUSED_33 _BITUL(63 - 33)
+#define CR14_CHANNEL_REPORT_SUBMASK _BITUL(63 - 35)
+#define CR14_RECOVERY_SUBMASK _BITUL(63 - 36)
+#define CR14_DEGRADATION_SUBMASK _BITUL(63 - 37)
+#define CR14_EXTERNAL_DAMAGE_SUBMASK _BITUL(63 - 38)
+#define CR14_WARNING_SUBMASK _BITUL(63 - 39)
+
+#ifndef __ASSEMBLY__
+
+#include <linux/bug.h>
+
+#define __ctl_load(array, low, high) do { \
+ typedef struct { char _[sizeof(array)]; } addrtype; \
+ \
+ BUILD_BUG_ON(sizeof(addrtype) != (high - low + 1) * sizeof(long));\
+ asm volatile( \
+ " lctlg %1,%2,%0\n" \
+ : \
+ : "Q" (*(addrtype *)(&array)), "i" (low), "i" (high) \
+ : "memory"); \
+} while (0)
+
+#define __ctl_store(array, low, high) do { \
+ typedef struct { char _[sizeof(array)]; } addrtype; \
+ \
+ BUILD_BUG_ON(sizeof(addrtype) != (high - low + 1) * sizeof(long));\
+ asm volatile( \
+ " stctg %1,%2,%0\n" \
+ : "=Q" (*(addrtype *)(&array)) \
+ : "i" (low), "i" (high)); \
+} while (0)
+
+static inline void __ctl_set_bit(unsigned int cr, unsigned int bit)
+{
+ unsigned long reg;
+
+ __ctl_store(reg, cr, cr);
+ reg |= 1UL << bit;
+ __ctl_load(reg, cr, cr);
+}
+
+static inline void __ctl_clear_bit(unsigned int cr, unsigned int bit)
+{
+ unsigned long reg;
+
+ __ctl_store(reg, cr, cr);
+ reg &= ~(1UL << bit);
+ __ctl_load(reg, cr, cr);
+}
+
+void smp_ctl_set_bit(int cr, int bit);
+void smp_ctl_clear_bit(int cr, int bit);
+
+union ctlreg0 {
+ unsigned long val;
+ struct {
+ unsigned long : 8;
+ unsigned long tcx : 1; /* Transactional-Execution control */
+ unsigned long pifo : 1; /* Transactional-Execution Program-
+ Interruption-Filtering Override */
+ unsigned long : 22;
+ unsigned long : 3;
+ unsigned long lap : 1; /* Low-address-protection control */
+ unsigned long : 4;
+ unsigned long edat : 1; /* Enhanced-DAT-enablement control */
+ unsigned long : 2;
+ unsigned long iep : 1; /* Instruction-Execution-Protection */
+ unsigned long : 1;
+ unsigned long afp : 1; /* AFP-register control */
+ unsigned long vx : 1; /* Vector enablement control */
+ unsigned long : 7;
+ unsigned long sssm : 1; /* Service signal subclass mask */
+ unsigned long : 9;
+ };
+};
+
+union ctlreg2 {
+ unsigned long val;
+ struct {
+ unsigned long : 33;
+ unsigned long ducto : 25;
+ unsigned long : 1;
+ unsigned long gse : 1;
+ unsigned long : 1;
+ unsigned long tds : 1;
+ unsigned long tdc : 2;
+ };
+};
+
+#ifdef CONFIG_SMP
+# define ctl_set_bit(cr, bit) smp_ctl_set_bit(cr, bit)
+# define ctl_clear_bit(cr, bit) smp_ctl_clear_bit(cr, bit)
+#else
+# define ctl_set_bit(cr, bit) __ctl_set_bit(cr, bit)
+# define ctl_clear_bit(cr, bit) __ctl_clear_bit(cr, bit)
+#endif
+
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_CTL_REG_H */
diff --git a/arch/s390/include/asm/current.h b/arch/s390/include/asm/current.h
new file mode 100644
index 000000000..68f843152
--- /dev/null
+++ b/arch/s390/include/asm/current.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * Derived from "include/asm-i386/current.h"
+ */
+
+#ifndef _S390_CURRENT_H
+#define _S390_CURRENT_H
+
+#include <asm/lowcore.h>
+
+struct task_struct;
+
+#define current ((struct task_struct *const)S390_lowcore.current_task)
+
+#endif /* !(_S390_CURRENT_H) */
diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h
new file mode 100644
index 000000000..c305d39f5
--- /dev/null
+++ b/arch/s390/include/asm/debug.h
@@ -0,0 +1,270 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * S/390 debug facility
+ *
+ * Copyright IBM Corp. 1999, 2000
+ */
+#ifndef DEBUG_H
+#define DEBUG_H
+
+#include <linux/string.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/time.h>
+#include <linux/refcount.h>
+#include <uapi/asm/debug.h>
+
+#define DEBUG_MAX_LEVEL 6 /* debug levels range from 0 to 6 */
+#define DEBUG_OFF_LEVEL -1 /* level where debug is switched off */
+#define DEBUG_FLUSH_ALL -1 /* parameter to flush all areas */
+#define DEBUG_MAX_VIEWS 10 /* max number of views in proc fs */
+#define DEBUG_MAX_NAME_LEN 64 /* max length for a debugfs file name */
+#define DEBUG_DEFAULT_LEVEL 3 /* initial debug level */
+
+#define DEBUG_DIR_ROOT "s390dbf" /* name of debug root directory in proc fs */
+
+#define DEBUG_DATA(entry) (char *)(entry + 1) /* data is stored behind */
+ /* the entry information */
+
+typedef struct __debug_entry debug_entry_t;
+
+struct debug_view;
+
+typedef struct debug_info {
+ struct debug_info *next;
+ struct debug_info *prev;
+ refcount_t ref_count;
+ spinlock_t lock;
+ int level;
+ int nr_areas;
+ int pages_per_area;
+ int buf_size;
+ int entry_size;
+ debug_entry_t ***areas;
+ int active_area;
+ int *active_pages;
+ int *active_entries;
+ struct dentry *debugfs_root_entry;
+ struct dentry *debugfs_entries[DEBUG_MAX_VIEWS];
+ struct debug_view *views[DEBUG_MAX_VIEWS];
+ char name[DEBUG_MAX_NAME_LEN];
+ umode_t mode;
+} debug_info_t;
+
+typedef int (debug_header_proc_t) (debug_info_t *id,
+ struct debug_view *view,
+ int area,
+ debug_entry_t *entry,
+ char *out_buf);
+
+typedef int (debug_format_proc_t) (debug_info_t *id,
+ struct debug_view *view, char *out_buf,
+ const char *in_buf);
+typedef int (debug_prolog_proc_t) (debug_info_t *id,
+ struct debug_view *view,
+ char *out_buf);
+typedef int (debug_input_proc_t) (debug_info_t *id,
+ struct debug_view *view,
+ struct file *file,
+ const char __user *user_buf,
+ size_t in_buf_size, loff_t *offset);
+
+int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view,
+ int area, debug_entry_t *entry, char *out_buf);
+
+struct debug_view {
+ char name[DEBUG_MAX_NAME_LEN];
+ debug_prolog_proc_t *prolog_proc;
+ debug_header_proc_t *header_proc;
+ debug_format_proc_t *format_proc;
+ debug_input_proc_t *input_proc;
+ void *private_data;
+};
+
+extern struct debug_view debug_hex_ascii_view;
+extern struct debug_view debug_raw_view;
+extern struct debug_view debug_sprintf_view;
+
+/* do NOT use the _common functions */
+
+debug_entry_t *debug_event_common(debug_info_t *id, int level,
+ const void *data, int length);
+
+debug_entry_t *debug_exception_common(debug_info_t *id, int level,
+ const void *data, int length);
+
+/* Debug Feature API: */
+
+debug_info_t *debug_register(const char *name, int pages, int nr_areas,
+ int buf_size);
+
+debug_info_t *debug_register_mode(const char *name, int pages, int nr_areas,
+ int buf_size, umode_t mode, uid_t uid,
+ gid_t gid);
+
+void debug_unregister(debug_info_t *id);
+
+void debug_set_level(debug_info_t *id, int new_level);
+
+void debug_set_critical(void);
+void debug_stop_all(void);
+
+static inline bool debug_level_enabled(debug_info_t *id, int level)
+{
+ return level <= id->level;
+}
+
+static inline debug_entry_t *debug_event(debug_info_t *id, int level,
+ void *data, int length)
+{
+ if ((!id) || (level > id->level) || (id->pages_per_area == 0))
+ return NULL;
+ return debug_event_common(id, level, data, length);
+}
+
+static inline debug_entry_t *debug_int_event(debug_info_t *id, int level,
+ unsigned int tag)
+{
+ unsigned int t = tag;
+
+ if ((!id) || (level > id->level) || (id->pages_per_area == 0))
+ return NULL;
+ return debug_event_common(id, level, &t, sizeof(unsigned int));
+}
+
+static inline debug_entry_t *debug_long_event(debug_info_t *id, int level,
+ unsigned long tag)
+{
+ unsigned long t = tag;
+
+ if ((!id) || (level > id->level) || (id->pages_per_area == 0))
+ return NULL;
+ return debug_event_common(id, level, &t, sizeof(unsigned long));
+}
+
+static inline debug_entry_t *debug_text_event(debug_info_t *id, int level,
+ const char *txt)
+{
+ if ((!id) || (level > id->level) || (id->pages_per_area == 0))
+ return NULL;
+ return debug_event_common(id, level, txt, strlen(txt));
+}
+
+/*
+ * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are
+ * stored in the s390dbf. See Documentation/s390/s390dbf.txt for more details!
+ */
+extern debug_entry_t *
+__debug_sprintf_event(debug_info_t *id, int level, char *string, ...)
+ __attribute__ ((format(printf, 3, 4)));
+
+#define debug_sprintf_event(_id, _level, _fmt, ...) \
+({ \
+ debug_entry_t *__ret; \
+ debug_info_t *__id = _id; \
+ int __level = _level; \
+ \
+ if ((!__id) || (__level > __id->level)) \
+ __ret = NULL; \
+ else \
+ __ret = __debug_sprintf_event(__id, __level, \
+ _fmt, ## __VA_ARGS__); \
+ __ret; \
+})
+
+static inline debug_entry_t *debug_exception(debug_info_t *id, int level,
+ void *data, int length)
+{
+ if ((!id) || (level > id->level) || (id->pages_per_area == 0))
+ return NULL;
+ return debug_exception_common(id, level, data, length);
+}
+
+static inline debug_entry_t *debug_int_exception(debug_info_t *id, int level,
+ unsigned int tag)
+{
+ unsigned int t = tag;
+
+ if ((!id) || (level > id->level) || (id->pages_per_area == 0))
+ return NULL;
+ return debug_exception_common(id, level, &t, sizeof(unsigned int));
+}
+
+static inline debug_entry_t *debug_long_exception (debug_info_t *id, int level,
+ unsigned long tag)
+{
+ unsigned long t = tag;
+
+ if ((!id) || (level > id->level) || (id->pages_per_area == 0))
+ return NULL;
+ return debug_exception_common(id, level, &t, sizeof(unsigned long));
+}
+
+static inline debug_entry_t *debug_text_exception(debug_info_t *id, int level,
+ const char *txt)
+{
+ if ((!id) || (level > id->level) || (id->pages_per_area == 0))
+ return NULL;
+ return debug_exception_common(id, level, txt, strlen(txt));
+}
+
+/*
+ * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are
+ * stored in the s390dbf. See Documentation/s390/s390dbf.txt for more details!
+ */
+extern debug_entry_t *
+__debug_sprintf_exception(debug_info_t *id, int level, char *string, ...)
+ __attribute__ ((format(printf, 3, 4)));
+
+#define debug_sprintf_exception(_id, _level, _fmt, ...) \
+({ \
+ debug_entry_t *__ret; \
+ debug_info_t *__id = _id; \
+ int __level = _level; \
+ \
+ if ((!__id) || (__level > __id->level)) \
+ __ret = NULL; \
+ else \
+ __ret = __debug_sprintf_exception(__id, __level, \
+ _fmt, ## __VA_ARGS__);\
+ __ret; \
+})
+
+int debug_register_view(debug_info_t *id, struct debug_view *view);
+int debug_unregister_view(debug_info_t *id, struct debug_view *view);
+
+/*
+ define the debug levels:
+ - 0 No debugging output to console or syslog
+ - 1 Log internal errors to syslog, ignore check conditions
+ - 2 Log internal errors and check conditions to syslog
+ - 3 Log internal errors to console, log check conditions to syslog
+ - 4 Log internal errors and check conditions to console
+ - 5 panic on internal errors, log check conditions to console
+ - 6 panic on both, internal errors and check conditions
+ */
+
+#ifndef DEBUG_LEVEL
+#define DEBUG_LEVEL 4
+#endif
+
+#define INTERNAL_ERRMSG(x,y...) "E" __FILE__ "%d: " x, __LINE__, y
+#define INTERNAL_WRNMSG(x,y...) "W" __FILE__ "%d: " x, __LINE__, y
+#define INTERNAL_INFMSG(x,y...) "I" __FILE__ "%d: " x, __LINE__, y
+#define INTERNAL_DEBMSG(x,y...) "D" __FILE__ "%d: " x, __LINE__, y
+
+#if DEBUG_LEVEL > 0
+#define PRINT_DEBUG(x...) printk(KERN_DEBUG PRINTK_HEADER x)
+#define PRINT_INFO(x...) printk(KERN_INFO PRINTK_HEADER x)
+#define PRINT_WARN(x...) printk(KERN_WARNING PRINTK_HEADER x)
+#define PRINT_ERR(x...) printk(KERN_ERR PRINTK_HEADER x)
+#define PRINT_FATAL(x...) panic(PRINTK_HEADER x)
+#else
+#define PRINT_DEBUG(x...) printk(KERN_DEBUG PRINTK_HEADER x)
+#define PRINT_INFO(x...) printk(KERN_DEBUG PRINTK_HEADER x)
+#define PRINT_WARN(x...) printk(KERN_DEBUG PRINTK_HEADER x)
+#define PRINT_ERR(x...) printk(KERN_DEBUG PRINTK_HEADER x)
+#define PRINT_FATAL(x...) printk(KERN_DEBUG PRINTK_HEADER x)
+#endif /* DASD_DEBUG */
+
+#endif /* DEBUG_H */
diff --git a/arch/s390/include/asm/delay.h b/arch/s390/include/asm/delay.h
new file mode 100644
index 000000000..898323fd9
--- /dev/null
+++ b/arch/s390/include/asm/delay.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * Derived from "include/asm-i386/delay.h"
+ * Copyright (C) 1993 Linus Torvalds
+ *
+ * Delay routines calling functions in arch/s390/lib/delay.c
+ */
+
+#ifndef _S390_DELAY_H
+#define _S390_DELAY_H
+
+void __ndelay(unsigned long long nsecs);
+void __udelay(unsigned long long usecs);
+void udelay_simple(unsigned long long usecs);
+void __delay(unsigned long loops);
+
+#define ndelay(n) __ndelay((unsigned long long) (n))
+#define udelay(n) __udelay((unsigned long long) (n))
+#define mdelay(n) __udelay((unsigned long long) (n) * 1000)
+
+#endif /* defined(_S390_DELAY_H) */
diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h
new file mode 100644
index 000000000..cdbaad50c
--- /dev/null
+++ b/arch/s390/include/asm/diag.h
@@ -0,0 +1,299 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * s390 diagnose functions
+ *
+ * Copyright IBM Corp. 2007
+ * Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_DIAG_H
+#define _ASM_S390_DIAG_H
+
+#include <linux/if_ether.h>
+#include <linux/percpu.h>
+
+enum diag_stat_enum {
+ DIAG_STAT_X008,
+ DIAG_STAT_X00C,
+ DIAG_STAT_X010,
+ DIAG_STAT_X014,
+ DIAG_STAT_X044,
+ DIAG_STAT_X064,
+ DIAG_STAT_X09C,
+ DIAG_STAT_X0DC,
+ DIAG_STAT_X204,
+ DIAG_STAT_X210,
+ DIAG_STAT_X224,
+ DIAG_STAT_X250,
+ DIAG_STAT_X258,
+ DIAG_STAT_X26C,
+ DIAG_STAT_X288,
+ DIAG_STAT_X2C4,
+ DIAG_STAT_X2FC,
+ DIAG_STAT_X304,
+ DIAG_STAT_X308,
+ DIAG_STAT_X500,
+ NR_DIAG_STAT
+};
+
+void diag_stat_inc(enum diag_stat_enum nr);
+void diag_stat_inc_norecursion(enum diag_stat_enum nr);
+
+/*
+ * Diagnose 10: Release page range
+ */
+static inline void diag10_range(unsigned long start_pfn, unsigned long num_pfn)
+{
+ unsigned long start_addr, end_addr;
+
+ start_addr = start_pfn << PAGE_SHIFT;
+ end_addr = (start_pfn + num_pfn - 1) << PAGE_SHIFT;
+
+ diag_stat_inc(DIAG_STAT_X010);
+ asm volatile(
+ "0: diag %0,%1,0x10\n"
+ "1: nopr %%r7\n"
+ EX_TABLE(0b, 1b)
+ EX_TABLE(1b, 1b)
+ : : "a" (start_addr), "a" (end_addr));
+}
+
+/*
+ * Diagnose 14: Input spool file manipulation
+ */
+extern int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode);
+
+/*
+ * Diagnose 210: Get information about a virtual device
+ */
+struct diag210 {
+ u16 vrdcdvno; /* device number (input) */
+ u16 vrdclen; /* data block length (input) */
+ u8 vrdcvcla; /* virtual device class (output) */
+ u8 vrdcvtyp; /* virtual device type (output) */
+ u8 vrdcvsta; /* virtual device status (output) */
+ u8 vrdcvfla; /* virtual device flags (output) */
+ u8 vrdcrccl; /* real device class (output) */
+ u8 vrdccrty; /* real device type (output) */
+ u8 vrdccrmd; /* real device model (output) */
+ u8 vrdccrft; /* real device feature (output) */
+} __attribute__((packed, aligned(4)));
+
+extern int diag210(struct diag210 *addr);
+
+/* bit is set in flags, when physical cpu info is included in diag 204 data */
+#define DIAG204_LPAR_PHYS_FLG 0x80
+#define DIAG204_LPAR_NAME_LEN 8 /* lpar name len in diag 204 data */
+#define DIAG204_CPU_NAME_LEN 16 /* type name len of cpus in diag224 name table */
+
+/* diag 204 subcodes */
+enum diag204_sc {
+ DIAG204_SUBC_STIB4 = 4,
+ DIAG204_SUBC_RSI = 5,
+ DIAG204_SUBC_STIB6 = 6,
+ DIAG204_SUBC_STIB7 = 7
+};
+
+/* The two available diag 204 data formats */
+enum diag204_format {
+ DIAG204_INFO_SIMPLE = 0,
+ DIAG204_INFO_EXT = 0x00010000
+};
+
+enum diag204_cpu_flags {
+ DIAG204_CPU_ONLINE = 0x20,
+ DIAG204_CPU_CAPPED = 0x40,
+};
+
+struct diag204_info_blk_hdr {
+ __u8 npar;
+ __u8 flags;
+ __u16 tslice;
+ __u16 phys_cpus;
+ __u16 this_part;
+ __u64 curtod;
+} __packed;
+
+struct diag204_x_info_blk_hdr {
+ __u8 npar;
+ __u8 flags;
+ __u16 tslice;
+ __u16 phys_cpus;
+ __u16 this_part;
+ __u64 curtod1;
+ __u64 curtod2;
+ char reserved[40];
+} __packed;
+
+struct diag204_part_hdr {
+ __u8 pn;
+ __u8 cpus;
+ char reserved[6];
+ char part_name[DIAG204_LPAR_NAME_LEN];
+} __packed;
+
+struct diag204_x_part_hdr {
+ __u8 pn;
+ __u8 cpus;
+ __u8 rcpus;
+ __u8 pflag;
+ __u32 mlu;
+ char part_name[DIAG204_LPAR_NAME_LEN];
+ char lpc_name[8];
+ char os_name[8];
+ __u64 online_cs;
+ __u64 online_es;
+ __u8 upid;
+ __u8 reserved:3;
+ __u8 mtid:5;
+ char reserved1[2];
+ __u32 group_mlu;
+ char group_name[8];
+ char hardware_group_name[8];
+ char reserved2[24];
+} __packed;
+
+struct diag204_cpu_info {
+ __u16 cpu_addr;
+ char reserved1[2];
+ __u8 ctidx;
+ __u8 cflag;
+ __u16 weight;
+ __u64 acc_time;
+ __u64 lp_time;
+} __packed;
+
+struct diag204_x_cpu_info {
+ __u16 cpu_addr;
+ char reserved1[2];
+ __u8 ctidx;
+ __u8 cflag;
+ __u16 weight;
+ __u64 acc_time;
+ __u64 lp_time;
+ __u16 min_weight;
+ __u16 cur_weight;
+ __u16 max_weight;
+ char reseved2[2];
+ __u64 online_time;
+ __u64 wait_time;
+ __u32 pma_weight;
+ __u32 polar_weight;
+ __u32 cpu_type_cap;
+ __u32 group_cpu_type_cap;
+ char reserved3[32];
+} __packed;
+
+struct diag204_phys_hdr {
+ char reserved1[1];
+ __u8 cpus;
+ char reserved2[6];
+ char mgm_name[8];
+} __packed;
+
+struct diag204_x_phys_hdr {
+ char reserved1[1];
+ __u8 cpus;
+ char reserved2[6];
+ char mgm_name[8];
+ char reserved3[80];
+} __packed;
+
+struct diag204_phys_cpu {
+ __u16 cpu_addr;
+ char reserved1[2];
+ __u8 ctidx;
+ char reserved2[3];
+ __u64 mgm_time;
+ char reserved3[8];
+} __packed;
+
+struct diag204_x_phys_cpu {
+ __u16 cpu_addr;
+ char reserved1[2];
+ __u8 ctidx;
+ char reserved2[1];
+ __u16 weight;
+ __u64 mgm_time;
+ char reserved3[80];
+} __packed;
+
+struct diag204_x_part_block {
+ struct diag204_x_part_hdr hdr;
+ struct diag204_x_cpu_info cpus[];
+} __packed;
+
+struct diag204_x_phys_block {
+ struct diag204_x_phys_hdr hdr;
+ struct diag204_x_phys_cpu cpus[];
+} __packed;
+
+enum diag26c_sc {
+ DIAG26C_PORT_VNIC = 0x00000024,
+ DIAG26C_MAC_SERVICES = 0x00000030
+};
+
+enum diag26c_version {
+ DIAG26C_VERSION2 = 0x00000002, /* z/VM 5.4.0 */
+ DIAG26C_VERSION6_VM65918 = 0x00020006 /* z/VM 6.4.0 + VM65918 */
+};
+
+#define DIAG26C_VNIC_INFO 0x0002
+struct diag26c_vnic_req {
+ u32 resp_buf_len;
+ u32 resp_version;
+ u16 req_format;
+ u16 vlan_id;
+ u64 sys_name;
+ u8 res[2];
+ u16 devno;
+} __packed __aligned(8);
+
+#define VNIC_INFO_PROT_L3 1
+#define VNIC_INFO_PROT_L2 2
+/* Note: this is the bare minimum, use it for uninitialized VNICs only. */
+struct diag26c_vnic_resp {
+ u32 version;
+ u32 entry_cnt;
+ /* VNIC info: */
+ u32 next_entry;
+ u64 owner;
+ u16 devno;
+ u8 status;
+ u8 type;
+ u64 lan_owner;
+ u64 lan_name;
+ u64 port_name;
+ u8 port_type;
+ u8 ext_status:6;
+ u8 protocol:2;
+ u16 base_devno;
+ u32 port_num;
+ u32 ifindex;
+ u32 maxinfo;
+ u32 dev_count;
+ /* 3x device info: */
+ u8 dev_info1[28];
+ u8 dev_info2[28];
+ u8 dev_info3[28];
+} __packed __aligned(8);
+
+#define DIAG26C_GET_MAC 0x0000
+struct diag26c_mac_req {
+ u32 resp_buf_len;
+ u32 resp_version;
+ u16 op_code;
+ u16 devno;
+ u8 res[4];
+};
+
+struct diag26c_mac_resp {
+ u32 version;
+ u8 mac[ETH_ALEN];
+ u8 res[2];
+} __aligned(8);
+
+int diag204(unsigned long subcode, unsigned long size, void *addr);
+int diag224(void *ptr);
+int diag26c(void *req, void *resp, enum diag26c_sc subcode);
+#endif /* _ASM_S390_DIAG_H */
diff --git a/arch/s390/include/asm/dis.h b/arch/s390/include/asm/dis.h
new file mode 100644
index 000000000..c18ed6091
--- /dev/null
+++ b/arch/s390/include/asm/dis.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Disassemble s390 instructions.
+ *
+ * Copyright IBM Corp. 2007
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ */
+
+#ifndef __ASM_S390_DIS_H__
+#define __ASM_S390_DIS_H__
+
+#include <asm/dis-defs.h>
+
+static inline int insn_length(unsigned char code)
+{
+ return ((((int) code + 64) >> 7) + 1) << 1;
+}
+
+struct pt_regs;
+
+void show_code(struct pt_regs *regs);
+void print_fn_code(unsigned char *code, unsigned long len);
+struct s390_insn *find_insn(unsigned char *code);
+
+static inline int is_known_insn(unsigned char *code)
+{
+ return !!find_insn(code);
+}
+
+#endif /* __ASM_S390_DIS_H__ */
diff --git a/arch/s390/include/asm/dma.h b/arch/s390/include/asm/dma.h
new file mode 100644
index 000000000..6f26f35d4
--- /dev/null
+++ b/arch/s390/include/asm/dma.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_DMA_H
+#define _ASM_S390_DMA_H
+
+#include <asm/io.h>
+
+/*
+ * MAX_DMA_ADDRESS is ambiguous because on s390 its completely unrelated
+ * to DMA. It _is_ used for the s390 memory zone split at 2GB caused
+ * by the 31 bit heritage.
+ */
+#define MAX_DMA_ADDRESS 0x80000000
+
+#ifdef CONFIG_PCI
+extern int isa_dma_bridge_buggy;
+#else
+#define isa_dma_bridge_buggy (0)
+#endif
+
+#endif /* _ASM_S390_DMA_H */
diff --git a/arch/s390/include/asm/dwarf.h b/arch/s390/include/asm/dwarf.h
new file mode 100644
index 000000000..4f21ae561
--- /dev/null
+++ b/arch/s390/include/asm/dwarf.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_DWARF_H
+#define _ASM_S390_DWARF_H
+
+#ifdef __ASSEMBLY__
+
+#define CFI_STARTPROC .cfi_startproc
+#define CFI_ENDPROC .cfi_endproc
+#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset
+#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset
+#define CFI_RESTORE .cfi_restore
+
+#ifdef CONFIG_AS_CFI_VAL_OFFSET
+#define CFI_VAL_OFFSET .cfi_val_offset
+#else
+#define CFI_VAL_OFFSET #
+#endif
+
+#ifndef BUILD_VDSO
+ /*
+ * Emit CFI data in .debug_frame sections and not in .eh_frame
+ * sections. The .eh_frame CFI is used for runtime unwind
+ * information that is not being used. Hence, vmlinux.lds.S
+ * can discard the .eh_frame sections.
+ */
+ .cfi_sections .debug_frame
+#else
+ /*
+ * For vDSO, emit CFI data in both, .eh_frame and .debug_frame
+ * sections.
+ */
+ .cfi_sections .eh_frame, .debug_frame
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_S390_DWARF_H */
diff --git a/arch/s390/include/asm/eadm.h b/arch/s390/include/asm/eadm.h
new file mode 100644
index 000000000..bb63b2afd
--- /dev/null
+++ b/arch/s390/include/asm/eadm.h
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_EADM_H
+#define _ASM_S390_EADM_H
+
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/blk_types.h>
+
+struct arqb {
+ u64 data;
+ u16 fmt:4;
+ u16:12;
+ u16 cmd_code;
+ u16:16;
+ u16 msb_count;
+ u32 reserved[12];
+} __packed;
+
+#define ARQB_CMD_MOVE 1
+
+struct arsb {
+ u16 fmt:4;
+ u32:28;
+ u8 ef;
+ u8:8;
+ u8 ecbi;
+ u8:8;
+ u8 fvf;
+ u16:16;
+ u8 eqc;
+ u32:32;
+ u64 fail_msb;
+ u64 fail_aidaw;
+ u64 fail_ms;
+ u64 fail_scm;
+ u32 reserved[4];
+} __packed;
+
+#define EQC_WR_PROHIBIT 22
+
+struct msb {
+ u8 fmt:4;
+ u8 oc:4;
+ u8 flags;
+ u16:12;
+ u16 bs:4;
+ u32 blk_count;
+ u64 data_addr;
+ u64 scm_addr;
+ u64:64;
+} __packed;
+
+struct aidaw {
+ u8 flags;
+ u32 :24;
+ u32 :32;
+ u64 data_addr;
+} __packed;
+
+#define MSB_OC_CLEAR 0
+#define MSB_OC_READ 1
+#define MSB_OC_WRITE 2
+#define MSB_OC_RELEASE 3
+
+#define MSB_FLAG_BNM 0x80
+#define MSB_FLAG_IDA 0x40
+
+#define MSB_BS_4K 0
+#define MSB_BS_1M 1
+
+#define AOB_NR_MSB 124
+
+struct aob {
+ struct arqb request;
+ struct arsb response;
+ struct msb msb[AOB_NR_MSB];
+} __packed __aligned(PAGE_SIZE);
+
+struct aob_rq_header {
+ struct scm_device *scmdev;
+ char data[0];
+};
+
+struct scm_device {
+ u64 address;
+ u64 size;
+ unsigned int nr_max_block;
+ struct device dev;
+ struct {
+ unsigned int persistence:4;
+ unsigned int oper_state:4;
+ unsigned int data_state:4;
+ unsigned int rank:4;
+ unsigned int release:1;
+ unsigned int res_id:8;
+ } __packed attrs;
+};
+
+#define OP_STATE_GOOD 1
+#define OP_STATE_TEMP_ERR 2
+#define OP_STATE_PERM_ERR 3
+
+enum scm_event {SCM_CHANGE, SCM_AVAIL};
+
+struct scm_driver {
+ struct device_driver drv;
+ int (*probe) (struct scm_device *scmdev);
+ int (*remove) (struct scm_device *scmdev);
+ void (*notify) (struct scm_device *scmdev, enum scm_event event);
+ void (*handler) (struct scm_device *scmdev, void *data,
+ blk_status_t error);
+};
+
+int scm_driver_register(struct scm_driver *scmdrv);
+void scm_driver_unregister(struct scm_driver *scmdrv);
+
+int eadm_start_aob(struct aob *aob);
+void scm_irq_handler(struct aob *aob, blk_status_t error);
+
+#endif /* _ASM_S390_EADM_H */
diff --git a/arch/s390/include/asm/ebcdic.h b/arch/s390/include/asm/ebcdic.h
new file mode 100644
index 000000000..29441beb9
--- /dev/null
+++ b/arch/s390/include/asm/ebcdic.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * EBCDIC -> ASCII, ASCII -> EBCDIC conversion routines.
+ *
+ * S390 version
+ * Copyright IBM Corp. 1999
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef _EBCDIC_H
+#define _EBCDIC_H
+
+#include <linux/types.h>
+
+extern __u8 _ascebc_500[256]; /* ASCII -> EBCDIC 500 conversion table */
+extern __u8 _ebcasc_500[256]; /* EBCDIC 500 -> ASCII conversion table */
+extern __u8 _ascebc[256]; /* ASCII -> EBCDIC conversion table */
+extern __u8 _ebcasc[256]; /* EBCDIC -> ASCII conversion table */
+extern __u8 _ebc_tolower[256]; /* EBCDIC -> lowercase */
+extern __u8 _ebc_toupper[256]; /* EBCDIC -> uppercase */
+
+static inline void
+codepage_convert(const __u8 *codepage, volatile __u8 * addr, unsigned long nr)
+{
+ if (nr-- <= 0)
+ return;
+ asm volatile(
+ " bras 1,1f\n"
+ " tr 0(1,%0),0(%2)\n"
+ "0: tr 0(256,%0),0(%2)\n"
+ " la %0,256(%0)\n"
+ "1: ahi %1,-256\n"
+ " jnm 0b\n"
+ " ex %1,0(1)"
+ : "+&a" (addr), "+&a" (nr)
+ : "a" (codepage) : "cc", "memory", "1");
+}
+
+#define ASCEBC(addr,nr) codepage_convert(_ascebc, addr, nr)
+#define EBCASC(addr,nr) codepage_convert(_ebcasc, addr, nr)
+#define ASCEBC_500(addr,nr) codepage_convert(_ascebc_500, addr, nr)
+#define EBCASC_500(addr,nr) codepage_convert(_ebcasc_500, addr, nr)
+#define EBC_TOLOWER(addr,nr) codepage_convert(_ebc_tolower, addr, nr)
+#define EBC_TOUPPER(addr,nr) codepage_convert(_ebc_toupper, addr, nr)
+
+#endif
+
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
new file mode 100644
index 000000000..f74639a05
--- /dev/null
+++ b/arch/s390/include/asm/elf.h
@@ -0,0 +1,280 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * S390 version
+ *
+ * Derived from "include/asm-i386/elf.h"
+ */
+
+#ifndef __ASMS390_ELF_H
+#define __ASMS390_ELF_H
+
+/* s390 relocations defined by the ABIs */
+#define R_390_NONE 0 /* No reloc. */
+#define R_390_8 1 /* Direct 8 bit. */
+#define R_390_12 2 /* Direct 12 bit. */
+#define R_390_16 3 /* Direct 16 bit. */
+#define R_390_32 4 /* Direct 32 bit. */
+#define R_390_PC32 5 /* PC relative 32 bit. */
+#define R_390_GOT12 6 /* 12 bit GOT offset. */
+#define R_390_GOT32 7 /* 32 bit GOT offset. */
+#define R_390_PLT32 8 /* 32 bit PC relative PLT address. */
+#define R_390_COPY 9 /* Copy symbol at runtime. */
+#define R_390_GLOB_DAT 10 /* Create GOT entry. */
+#define R_390_JMP_SLOT 11 /* Create PLT entry. */
+#define R_390_RELATIVE 12 /* Adjust by program base. */
+#define R_390_GOTOFF32 13 /* 32 bit offset to GOT. */
+#define R_390_GOTPC 14 /* 32 bit PC rel. offset to GOT. */
+#define R_390_GOT16 15 /* 16 bit GOT offset. */
+#define R_390_PC16 16 /* PC relative 16 bit. */
+#define R_390_PC16DBL 17 /* PC relative 16 bit shifted by 1. */
+#define R_390_PLT16DBL 18 /* 16 bit PC rel. PLT shifted by 1. */
+#define R_390_PC32DBL 19 /* PC relative 32 bit shifted by 1. */
+#define R_390_PLT32DBL 20 /* 32 bit PC rel. PLT shifted by 1. */
+#define R_390_GOTPCDBL 21 /* 32 bit PC rel. GOT shifted by 1. */
+#define R_390_64 22 /* Direct 64 bit. */
+#define R_390_PC64 23 /* PC relative 64 bit. */
+#define R_390_GOT64 24 /* 64 bit GOT offset. */
+#define R_390_PLT64 25 /* 64 bit PC relative PLT address. */
+#define R_390_GOTENT 26 /* 32 bit PC rel. to GOT entry >> 1. */
+#define R_390_GOTOFF16 27 /* 16 bit offset to GOT. */
+#define R_390_GOTOFF64 28 /* 64 bit offset to GOT. */
+#define R_390_GOTPLT12 29 /* 12 bit offset to jump slot. */
+#define R_390_GOTPLT16 30 /* 16 bit offset to jump slot. */
+#define R_390_GOTPLT32 31 /* 32 bit offset to jump slot. */
+#define R_390_GOTPLT64 32 /* 64 bit offset to jump slot. */
+#define R_390_GOTPLTENT 33 /* 32 bit rel. offset to jump slot. */
+#define R_390_PLTOFF16 34 /* 16 bit offset from GOT to PLT. */
+#define R_390_PLTOFF32 35 /* 32 bit offset from GOT to PLT. */
+#define R_390_PLTOFF64 36 /* 16 bit offset from GOT to PLT. */
+#define R_390_TLS_LOAD 37 /* Tag for load insn in TLS code. */
+#define R_390_TLS_GDCALL 38 /* Tag for function call in general
+ dynamic TLS code. */
+#define R_390_TLS_LDCALL 39 /* Tag for function call in local
+ dynamic TLS code. */
+#define R_390_TLS_GD32 40 /* Direct 32 bit for general dynamic
+ thread local data. */
+#define R_390_TLS_GD64 41 /* Direct 64 bit for general dynamic
+ thread local data. */
+#define R_390_TLS_GOTIE12 42 /* 12 bit GOT offset for static TLS
+ block offset. */
+#define R_390_TLS_GOTIE32 43 /* 32 bit GOT offset for static TLS
+ block offset. */
+#define R_390_TLS_GOTIE64 44 /* 64 bit GOT offset for static TLS
+ block offset. */
+#define R_390_TLS_LDM32 45 /* Direct 32 bit for local dynamic
+ thread local data in LD code. */
+#define R_390_TLS_LDM64 46 /* Direct 64 bit for local dynamic
+ thread local data in LD code. */
+#define R_390_TLS_IE32 47 /* 32 bit address of GOT entry for
+ negated static TLS block offset. */
+#define R_390_TLS_IE64 48 /* 64 bit address of GOT entry for
+ negated static TLS block offset. */
+#define R_390_TLS_IEENT 49 /* 32 bit rel. offset to GOT entry for
+ negated static TLS block offset. */
+#define R_390_TLS_LE32 50 /* 32 bit negated offset relative to
+ static TLS block. */
+#define R_390_TLS_LE64 51 /* 64 bit negated offset relative to
+ static TLS block. */
+#define R_390_TLS_LDO32 52 /* 32 bit offset relative to TLS
+ block. */
+#define R_390_TLS_LDO64 53 /* 64 bit offset relative to TLS
+ block. */
+#define R_390_TLS_DTPMOD 54 /* ID of module containing symbol. */
+#define R_390_TLS_DTPOFF 55 /* Offset in TLS block. */
+#define R_390_TLS_TPOFF 56 /* Negate offset in static TLS
+ block. */
+#define R_390_20 57 /* Direct 20 bit. */
+#define R_390_GOT20 58 /* 20 bit GOT offset. */
+#define R_390_GOTPLT20 59 /* 20 bit offset to jump slot. */
+#define R_390_TLS_GOTIE20 60 /* 20 bit GOT offset for static TLS
+ block offset. */
+/* Keep this the last entry. */
+#define R_390_NUM 61
+
+/* Bits present in AT_HWCAP. */
+#define HWCAP_S390_ESAN3 1
+#define HWCAP_S390_ZARCH 2
+#define HWCAP_S390_STFLE 4
+#define HWCAP_S390_MSA 8
+#define HWCAP_S390_LDISP 16
+#define HWCAP_S390_EIMM 32
+#define HWCAP_S390_DFP 64
+#define HWCAP_S390_HPAGE 128
+#define HWCAP_S390_ETF3EH 256
+#define HWCAP_S390_HIGH_GPRS 512
+#define HWCAP_S390_TE 1024
+#define HWCAP_S390_VXRS 2048
+#define HWCAP_S390_VXRS_BCD 4096
+#define HWCAP_S390_VXRS_EXT 8192
+#define HWCAP_S390_GS 16384
+
+/* Internal bits, not exposed via elf */
+#define HWCAP_INT_SIE 1UL
+
+/*
+ * These are used to set parameters in the core dumps.
+ */
+#define ELF_CLASS ELFCLASS64
+#define ELF_DATA ELFDATA2MSB
+#define ELF_ARCH EM_S390
+
+/* s390 specific phdr types */
+#define PT_S390_PGSTE 0x70000000
+
+/*
+ * ELF register definitions..
+ */
+
+#include <linux/compat.h>
+
+#include <asm/ptrace.h>
+#include <asm/syscall.h>
+#include <asm/user.h>
+
+typedef s390_fp_regs elf_fpregset_t;
+typedef s390_regs elf_gregset_t;
+
+typedef s390_fp_regs compat_elf_fpregset_t;
+typedef s390_compat_regs compat_elf_gregset_t;
+
+#include <linux/sched/mm.h> /* for task_struct */
+#include <asm/mmu_context.h>
+
+#include <asm/vdso.h>
+
+extern unsigned int vdso_enabled;
+
+/*
+ * This is used to ensure we don't load something for the wrong architecture.
+ */
+#define elf_check_arch(x) \
+ (((x)->e_machine == EM_S390 || (x)->e_machine == EM_S390_OLD) \
+ && (x)->e_ident[EI_CLASS] == ELF_CLASS)
+#define compat_elf_check_arch(x) \
+ (((x)->e_machine == EM_S390 || (x)->e_machine == EM_S390_OLD) \
+ && (x)->e_ident[EI_CLASS] == ELF_CLASS)
+#define compat_start_thread start_thread31
+
+struct arch_elf_state {
+ int rc;
+};
+
+#define INIT_ARCH_ELF_STATE { .rc = 0 }
+
+#define arch_check_elf(ehdr, interp, interp_ehdr, state) (0)
+#ifdef CONFIG_PGSTE
+#define arch_elf_pt_proc(ehdr, phdr, elf, interp, state) \
+({ \
+ struct arch_elf_state *_state = state; \
+ if ((phdr)->p_type == PT_S390_PGSTE && \
+ !page_table_allocate_pgste && \
+ !test_thread_flag(TIF_PGSTE) && \
+ !current->mm->context.alloc_pgste) { \
+ set_thread_flag(TIF_PGSTE); \
+ set_pt_regs_flag(task_pt_regs(current), \
+ PIF_SYSCALL_RESTART); \
+ _state->rc = -EAGAIN; \
+ } \
+ _state->rc; \
+})
+#else
+#define arch_elf_pt_proc(ehdr, phdr, elf, interp, state) \
+({ \
+ (state)->rc; \
+})
+#endif
+
+/* For SVR4/S390 the function pointer to be registered with `atexit` is
+ passed in R14. */
+#define ELF_PLAT_INIT(_r, load_addr) \
+ do { \
+ _r->gprs[14] = 0; \
+ } while (0)
+
+#define CORE_DUMP_USE_REGSET
+#define ELF_EXEC_PAGESIZE PAGE_SIZE
+
+/* This is the location that an ET_DYN program is loaded if exec'ed. Typical
+ use of this is to invoke "./ld.so someprog" to test out a new version of
+ the loader. We need to make sure that it is out of the way of the program
+ that it will "exec", and that there is sufficient room for the brk. 64-bit
+ tasks are aligned to 4GB. */
+#define ELF_ET_DYN_BASE (is_compat_task() ? \
+ (STACK_TOP / 3 * 2) : \
+ (STACK_TOP / 3 * 2) & ~((1UL << 32) - 1))
+
+/* This yields a mask that user programs can use to figure out what
+ instruction set this CPU supports. */
+
+extern unsigned long elf_hwcap;
+#define ELF_HWCAP (elf_hwcap)
+
+/* Internal hardware capabilities, not exposed via elf */
+
+extern unsigned long int_hwcap;
+
+/* This yields a string that ld.so will use to load implementation
+ specific libraries for optimization. This is more specific in
+ intent than poking at uname or /proc/cpuinfo.
+
+ For the moment, we have only optimizations for the Intel generations,
+ but that could change... */
+
+#define ELF_PLATFORM_SIZE 8
+extern char elf_platform[];
+#define ELF_PLATFORM (elf_platform)
+
+#ifndef CONFIG_COMPAT
+#define SET_PERSONALITY(ex) \
+do { \
+ set_personality(PER_LINUX | \
+ (current->personality & (~PER_MASK))); \
+ current->thread.sys_call_table = \
+ (unsigned long) &sys_call_table; \
+} while (0)
+#else /* CONFIG_COMPAT */
+#define SET_PERSONALITY(ex) \
+do { \
+ if (personality(current->personality) != PER_LINUX32) \
+ set_personality(PER_LINUX | \
+ (current->personality & ~PER_MASK)); \
+ if ((ex).e_ident[EI_CLASS] == ELFCLASS32) { \
+ set_thread_flag(TIF_31BIT); \
+ current->thread.sys_call_table = \
+ (unsigned long) &sys_call_table_emu; \
+ } else { \
+ clear_thread_flag(TIF_31BIT); \
+ current->thread.sys_call_table = \
+ (unsigned long) &sys_call_table; \
+ } \
+} while (0)
+#endif /* CONFIG_COMPAT */
+
+/*
+ * Cache aliasing on the latest machines calls for a mapping granularity
+ * of 512KB for the anonymous mapping base. For 64-bit processes use a
+ * 512KB alignment and a randomization of up to 1GB. For 31-bit processes
+ * the virtual address space is limited, use no alignment and limit the
+ * randomization to 8MB.
+ * For the additional randomization of the program break use 32MB for
+ * 64-bit and 8MB for 31-bit.
+ */
+#define BRK_RND_MASK (is_compat_task() ? 0x7ffUL : 0x1fffUL)
+#define MMAP_RND_MASK (is_compat_task() ? 0x7ffUL : 0x3ff80UL)
+#define MMAP_ALIGN_MASK (is_compat_task() ? 0 : 0x7fUL)
+#define STACK_RND_MASK MMAP_RND_MASK
+
+/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */
+#define ARCH_DLINFO \
+do { \
+ if (vdso_enabled) \
+ NEW_AUX_ENT(AT_SYSINFO_EHDR, \
+ (unsigned long)current->mm->context.vdso_base); \
+} while (0)
+
+struct linux_binprm;
+
+#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
+int arch_setup_additional_pages(struct linux_binprm *, int);
+
+#endif
diff --git a/arch/s390/include/asm/exec.h b/arch/s390/include/asm/exec.h
new file mode 100644
index 000000000..641bfbec9
--- /dev/null
+++ b/arch/s390/include/asm/exec.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 1999, 2009
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef __ASM_EXEC_H
+#define __ASM_EXEC_H
+
+extern unsigned long arch_align_stack(unsigned long sp);
+
+#endif /* __ASM_EXEC_H */
diff --git a/arch/s390/include/asm/extable.h b/arch/s390/include/asm/extable.h
new file mode 100644
index 000000000..80a4e5a9c
--- /dev/null
+++ b/arch/s390/include/asm/extable.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __S390_EXTABLE_H
+#define __S390_EXTABLE_H
+/*
+ * The exception table consists of pairs of addresses: the first is the
+ * address of an instruction that is allowed to fault, and the second is
+ * the address at which the program should continue. No registers are
+ * modified, so it is entirely up to the continuation code to figure out
+ * what to do.
+ *
+ * All the routines below use bits of fixup code that are out of line
+ * with the main instruction path. This means when everything is well,
+ * we don't even have to jump over them. Further, they do not intrude
+ * on our cache or tlb entries.
+ */
+
+struct exception_table_entry
+{
+ int insn, fixup;
+};
+
+static inline unsigned long extable_fixup(const struct exception_table_entry *x)
+{
+ return (unsigned long)&x->fixup + x->fixup;
+}
+
+#define ARCH_HAS_RELATIVE_EXTABLE
+
+#endif
diff --git a/arch/s390/include/asm/extmem.h b/arch/s390/include/asm/extmem.h
new file mode 100644
index 000000000..568fd81bb
--- /dev/null
+++ b/arch/s390/include/asm/extmem.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * definitions for external memory segment support
+ * Copyright IBM Corp. 2003
+ */
+
+#ifndef _ASM_S390X_DCSS_H
+#define _ASM_S390X_DCSS_H
+#ifndef __ASSEMBLY__
+
+/* possible values for segment type as returned by segment_info */
+#define SEG_TYPE_SW 0
+#define SEG_TYPE_EW 1
+#define SEG_TYPE_SR 2
+#define SEG_TYPE_ER 3
+#define SEG_TYPE_SN 4
+#define SEG_TYPE_EN 5
+#define SEG_TYPE_SC 6
+#define SEG_TYPE_EWEN 7
+
+#define SEGMENT_SHARED 0
+#define SEGMENT_EXCLUSIVE 1
+
+int segment_load (char *name, int segtype, unsigned long *addr, unsigned long *length);
+void segment_unload(char *name);
+void segment_save(char *name);
+int segment_type (char* name);
+int segment_modify_shared (char *name, int do_nonshared);
+void segment_warning(int rc, char *seg_name);
+
+#endif
+#endif
diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h
new file mode 100644
index 000000000..7ffbc5d7c
--- /dev/null
+++ b/arch/s390/include/asm/facility.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 1999, 2009
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef __ASM_FACILITY_H
+#define __ASM_FACILITY_H
+
+#include <asm/facility-defs.h>
+#include <linux/string.h>
+#include <linux/preempt.h>
+#include <asm/lowcore.h>
+
+#define MAX_FACILITY_BIT (sizeof(((struct lowcore *)0)->stfle_fac_list) * 8)
+
+static inline void __set_facility(unsigned long nr, void *facilities)
+{
+ unsigned char *ptr = (unsigned char *) facilities;
+
+ if (nr >= MAX_FACILITY_BIT)
+ return;
+ ptr[nr >> 3] |= 0x80 >> (nr & 7);
+}
+
+static inline void __clear_facility(unsigned long nr, void *facilities)
+{
+ unsigned char *ptr = (unsigned char *) facilities;
+
+ if (nr >= MAX_FACILITY_BIT)
+ return;
+ ptr[nr >> 3] &= ~(0x80 >> (nr & 7));
+}
+
+static inline int __test_facility(unsigned long nr, void *facilities)
+{
+ unsigned char *ptr;
+
+ if (nr >= MAX_FACILITY_BIT)
+ return 0;
+ ptr = (unsigned char *) facilities + (nr >> 3);
+ return (*ptr & (0x80 >> (nr & 7))) != 0;
+}
+
+/*
+ * The test_facility function uses the bit odering where the MSB is bit 0.
+ * That makes it easier to query facility bits with the bit number as
+ * documented in the Principles of Operation.
+ */
+static inline int test_facility(unsigned long nr)
+{
+ unsigned long facilities_als[] = { FACILITIES_ALS };
+
+ if (__builtin_constant_p(nr) && nr < sizeof(facilities_als) * 8) {
+ if (__test_facility(nr, &facilities_als))
+ return 1;
+ }
+ return __test_facility(nr, &S390_lowcore.stfle_fac_list);
+}
+
+static inline unsigned long __stfle_asm(u64 *stfle_fac_list, int size)
+{
+ register unsigned long reg0 asm("0") = size - 1;
+
+ asm volatile(
+ ".insn s,0xb2b00000,0(%1)" /* stfle */
+ : "+d" (reg0)
+ : "a" (stfle_fac_list)
+ : "memory", "cc");
+ return reg0;
+}
+
+/**
+ * stfle - Store facility list extended
+ * @stfle_fac_list: array where facility list can be stored
+ * @size: size of passed in array in double words
+ */
+static inline void stfle(u64 *stfle_fac_list, int size)
+{
+ unsigned long nr;
+
+ preempt_disable();
+ asm volatile(
+ " stfl 0(0)\n"
+ : "=m" (S390_lowcore.stfl_fac_list));
+ nr = 4; /* bytes stored by stfl */
+ memcpy(stfle_fac_list, &S390_lowcore.stfl_fac_list, 4);
+ if (S390_lowcore.stfl_fac_list & 0x01000000) {
+ /* More facility bits available with stfle */
+ nr = __stfle_asm(stfle_fac_list, size);
+ nr = min_t(unsigned long, (nr + 1) * 8, size * 8);
+ }
+ memset((char *) stfle_fac_list + nr, 0, size * 8 - nr);
+ preempt_enable();
+}
+
+#endif /* __ASM_FACILITY_H */
diff --git a/arch/s390/include/asm/fcx.h b/arch/s390/include/asm/fcx.h
new file mode 100644
index 000000000..cff0749e9
--- /dev/null
+++ b/arch/s390/include/asm/fcx.h
@@ -0,0 +1,312 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Functions for assembling fcx enabled I/O control blocks.
+ *
+ * Copyright IBM Corp. 2008
+ * Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_FCX_H
+#define _ASM_S390_FCX_H
+
+#include <linux/types.h>
+
+#define TCW_FORMAT_DEFAULT 0
+#define TCW_TIDAW_FORMAT_DEFAULT 0
+#define TCW_FLAGS_INPUT_TIDA (1 << (23 - 5))
+#define TCW_FLAGS_TCCB_TIDA (1 << (23 - 6))
+#define TCW_FLAGS_OUTPUT_TIDA (1 << (23 - 7))
+#define TCW_FLAGS_TIDAW_FORMAT(x) ((x) & 3) << (23 - 9)
+#define TCW_FLAGS_GET_TIDAW_FORMAT(x) (((x) >> (23 - 9)) & 3)
+
+/**
+ * struct tcw - Transport Control Word (TCW)
+ * @format: TCW format
+ * @flags: TCW flags
+ * @tccbl: Transport-Command-Control-Block Length
+ * @r: Read Operations
+ * @w: Write Operations
+ * @output: Output-Data Address
+ * @input: Input-Data Address
+ * @tsb: Transport-Status-Block Address
+ * @tccb: Transport-Command-Control-Block Address
+ * @output_count: Output Count
+ * @input_count: Input Count
+ * @intrg: Interrogate TCW Address
+ */
+struct tcw {
+ u32 format:2;
+ u32 :6;
+ u32 flags:24;
+ u32 :8;
+ u32 tccbl:6;
+ u32 r:1;
+ u32 w:1;
+ u32 :16;
+ u64 output;
+ u64 input;
+ u64 tsb;
+ u64 tccb;
+ u32 output_count;
+ u32 input_count;
+ u32 :32;
+ u32 :32;
+ u32 :32;
+ u32 intrg;
+} __attribute__ ((packed, aligned(64)));
+
+#define TIDAW_FLAGS_LAST (1 << (7 - 0))
+#define TIDAW_FLAGS_SKIP (1 << (7 - 1))
+#define TIDAW_FLAGS_DATA_INT (1 << (7 - 2))
+#define TIDAW_FLAGS_TTIC (1 << (7 - 3))
+#define TIDAW_FLAGS_INSERT_CBC (1 << (7 - 4))
+
+/**
+ * struct tidaw - Transport-Indirect-Addressing Word (TIDAW)
+ * @flags: TIDAW flags. Can be an arithmetic OR of the following constants:
+ * %TIDAW_FLAGS_LAST, %TIDAW_FLAGS_SKIP, %TIDAW_FLAGS_DATA_INT,
+ * %TIDAW_FLAGS_TTIC, %TIDAW_FLAGS_INSERT_CBC
+ * @count: Count
+ * @addr: Address
+ */
+struct tidaw {
+ u32 flags:8;
+ u32 :24;
+ u32 count;
+ u64 addr;
+} __attribute__ ((packed, aligned(16)));
+
+/**
+ * struct tsa_iostat - I/O-Status Transport-Status Area (IO-Stat TSA)
+ * @dev_time: Device Time
+ * @def_time: Defer Time
+ * @queue_time: Queue Time
+ * @dev_busy_time: Device-Busy Time
+ * @dev_act_time: Device-Active-Only Time
+ * @sense: Sense Data (if present)
+ */
+struct tsa_iostat {
+ u32 dev_time;
+ u32 def_time;
+ u32 queue_time;
+ u32 dev_busy_time;
+ u32 dev_act_time;
+ u8 sense[32];
+} __attribute__ ((packed));
+
+/**
+ * struct tsa_ddpcs - Device-Detected-Program-Check Transport-Status Area (DDPC TSA)
+ * @rc: Reason Code
+ * @rcq: Reason Code Qualifier
+ * @sense: Sense Data (if present)
+ */
+struct tsa_ddpc {
+ u32 :24;
+ u32 rc:8;
+ u8 rcq[16];
+ u8 sense[32];
+} __attribute__ ((packed));
+
+#define TSA_INTRG_FLAGS_CU_STATE_VALID (1 << (7 - 0))
+#define TSA_INTRG_FLAGS_DEV_STATE_VALID (1 << (7 - 1))
+#define TSA_INTRG_FLAGS_OP_STATE_VALID (1 << (7 - 2))
+
+/**
+ * struct tsa_intrg - Interrogate Transport-Status Area (Intrg. TSA)
+ * @format: Format
+ * @flags: Flags. Can be an arithmetic OR of the following constants:
+ * %TSA_INTRG_FLAGS_CU_STATE_VALID, %TSA_INTRG_FLAGS_DEV_STATE_VALID,
+ * %TSA_INTRG_FLAGS_OP_STATE_VALID
+ * @cu_state: Controle-Unit State
+ * @dev_state: Device State
+ * @op_state: Operation State
+ * @sd_info: State-Dependent Information
+ * @dl_id: Device-Level Identifier
+ * @dd_data: Device-Dependent Data
+ */
+struct tsa_intrg {
+ u32 format:8;
+ u32 flags:8;
+ u32 cu_state:8;
+ u32 dev_state:8;
+ u32 op_state:8;
+ u32 :24;
+ u8 sd_info[12];
+ u32 dl_id;
+ u8 dd_data[28];
+} __attribute__ ((packed));
+
+#define TSB_FORMAT_NONE 0
+#define TSB_FORMAT_IOSTAT 1
+#define TSB_FORMAT_DDPC 2
+#define TSB_FORMAT_INTRG 3
+
+#define TSB_FLAGS_DCW_OFFSET_VALID (1 << (7 - 0))
+#define TSB_FLAGS_COUNT_VALID (1 << (7 - 1))
+#define TSB_FLAGS_CACHE_MISS (1 << (7 - 2))
+#define TSB_FLAGS_TIME_VALID (1 << (7 - 3))
+#define TSB_FLAGS_FORMAT(x) ((x) & 7)
+#define TSB_FORMAT(t) ((t)->flags & 7)
+
+/**
+ * struct tsb - Transport-Status Block (TSB)
+ * @length: Length
+ * @flags: Flags. Can be an arithmetic OR of the following constants:
+ * %TSB_FLAGS_DCW_OFFSET_VALID, %TSB_FLAGS_COUNT_VALID, %TSB_FLAGS_CACHE_MISS,
+ * %TSB_FLAGS_TIME_VALID
+ * @dcw_offset: DCW Offset
+ * @count: Count
+ * @tsa: Transport-Status-Area
+ */
+struct tsb {
+ u32 length:8;
+ u32 flags:8;
+ u32 dcw_offset:16;
+ u32 count;
+ u32 :32;
+ union {
+ struct tsa_iostat iostat;
+ struct tsa_ddpc ddpc;
+ struct tsa_intrg intrg;
+ } __attribute__ ((packed)) tsa;
+} __attribute__ ((packed, aligned(8)));
+
+#define DCW_INTRG_FORMAT_DEFAULT 0
+
+#define DCW_INTRG_RC_UNSPECIFIED 0
+#define DCW_INTRG_RC_TIMEOUT 1
+
+#define DCW_INTRG_RCQ_UNSPECIFIED 0
+#define DCW_INTRG_RCQ_PRIMARY 1
+#define DCW_INTRG_RCQ_SECONDARY 2
+
+#define DCW_INTRG_FLAGS_MPM (1 << (7 - 0))
+#define DCW_INTRG_FLAGS_PPR (1 << (7 - 1))
+#define DCW_INTRG_FLAGS_CRIT (1 << (7 - 2))
+
+/**
+ * struct dcw_intrg_data - Interrogate DCW data
+ * @format: Format. Should be %DCW_INTRG_FORMAT_DEFAULT
+ * @rc: Reason Code. Can be one of %DCW_INTRG_RC_UNSPECIFIED,
+ * %DCW_INTRG_RC_TIMEOUT
+ * @rcq: Reason Code Qualifier: Can be one of %DCW_INTRG_RCQ_UNSPECIFIED,
+ * %DCW_INTRG_RCQ_PRIMARY, %DCW_INTRG_RCQ_SECONDARY
+ * @lpm: Logical-Path Mask
+ * @pam: Path-Available Mask
+ * @pim: Path-Installed Mask
+ * @timeout: Timeout
+ * @flags: Flags. Can be an arithmetic OR of %DCW_INTRG_FLAGS_MPM,
+ * %DCW_INTRG_FLAGS_PPR, %DCW_INTRG_FLAGS_CRIT
+ * @time: Time
+ * @prog_id: Program Identifier
+ * @prog_data: Program-Dependent Data
+ */
+struct dcw_intrg_data {
+ u32 format:8;
+ u32 rc:8;
+ u32 rcq:8;
+ u32 lpm:8;
+ u32 pam:8;
+ u32 pim:8;
+ u32 timeout:16;
+ u32 flags:8;
+ u32 :24;
+ u32 :32;
+ u64 time;
+ u64 prog_id;
+ u8 prog_data[0];
+} __attribute__ ((packed));
+
+#define DCW_FLAGS_CC (1 << (7 - 1))
+
+#define DCW_CMD_WRITE 0x01
+#define DCW_CMD_READ 0x02
+#define DCW_CMD_CONTROL 0x03
+#define DCW_CMD_SENSE 0x04
+#define DCW_CMD_SENSE_ID 0xe4
+#define DCW_CMD_INTRG 0x40
+
+/**
+ * struct dcw - Device-Command Word (DCW)
+ * @cmd: Command Code. Can be one of %DCW_CMD_WRITE, %DCW_CMD_READ,
+ * %DCW_CMD_CONTROL, %DCW_CMD_SENSE, %DCW_CMD_SENSE_ID, %DCW_CMD_INTRG
+ * @flags: Flags. Can be an arithmetic OR of %DCW_FLAGS_CC
+ * @cd_count: Control-Data Count
+ * @count: Count
+ * @cd: Control Data
+ */
+struct dcw {
+ u32 cmd:8;
+ u32 flags:8;
+ u32 :8;
+ u32 cd_count:8;
+ u32 count;
+ u8 cd[0];
+} __attribute__ ((packed));
+
+#define TCCB_FORMAT_DEFAULT 0x7f
+#define TCCB_MAX_DCW 30
+#define TCCB_MAX_SIZE (sizeof(struct tccb_tcah) + \
+ TCCB_MAX_DCW * sizeof(struct dcw) + \
+ sizeof(struct tccb_tcat))
+#define TCCB_SAC_DEFAULT 0x1ffe
+#define TCCB_SAC_INTRG 0x1fff
+
+/**
+ * struct tccb_tcah - Transport-Command-Area Header (TCAH)
+ * @format: Format. Should be %TCCB_FORMAT_DEFAULT
+ * @tcal: Transport-Command-Area Length
+ * @sac: Service-Action Code. Can be one of %TCCB_SAC_DEFAULT, %TCCB_SAC_INTRG
+ * @prio: Priority
+ */
+struct tccb_tcah {
+ u32 format:8;
+ u32 :24;
+ u32 :24;
+ u32 tcal:8;
+ u32 sac:16;
+ u32 :8;
+ u32 prio:8;
+ u32 :32;
+} __attribute__ ((packed));
+
+/**
+ * struct tccb_tcat - Transport-Command-Area Trailer (TCAT)
+ * @count: Transport Count
+ */
+struct tccb_tcat {
+ u32 :32;
+ u32 count;
+} __attribute__ ((packed));
+
+/**
+ * struct tccb - (partial) Transport-Command-Control Block (TCCB)
+ * @tcah: TCAH
+ * @tca: Transport-Command Area
+ */
+struct tccb {
+ struct tccb_tcah tcah;
+ u8 tca[0];
+} __attribute__ ((packed, aligned(8)));
+
+struct tcw *tcw_get_intrg(struct tcw *tcw);
+void *tcw_get_data(struct tcw *tcw);
+struct tccb *tcw_get_tccb(struct tcw *tcw);
+struct tsb *tcw_get_tsb(struct tcw *tcw);
+
+void tcw_init(struct tcw *tcw, int r, int w);
+void tcw_finalize(struct tcw *tcw, int num_tidaws);
+
+void tcw_set_intrg(struct tcw *tcw, struct tcw *intrg_tcw);
+void tcw_set_data(struct tcw *tcw, void *data, int use_tidal);
+void tcw_set_tccb(struct tcw *tcw, struct tccb *tccb);
+void tcw_set_tsb(struct tcw *tcw, struct tsb *tsb);
+
+void tccb_init(struct tccb *tccb, size_t tccb_size, u32 sac);
+void tsb_init(struct tsb *tsb);
+struct dcw *tccb_add_dcw(struct tccb *tccb, size_t tccb_size, u8 cmd, u8 flags,
+ void *cd, u8 cd_count, u32 count);
+struct tidaw *tcw_add_tidaw(struct tcw *tcw, int num_tidaws, u8 flags,
+ void *addr, u32 count);
+
+#endif /* _ASM_S390_FCX_H */
diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h
new file mode 100644
index 000000000..34a7ae684
--- /dev/null
+++ b/arch/s390/include/asm/fpu/api.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * In-kernel FPU support functions
+ *
+ *
+ * Consider these guidelines before using in-kernel FPU functions:
+ *
+ * 1. Use kernel_fpu_begin() and kernel_fpu_end() to enclose all in-kernel
+ * use of floating-point or vector registers and instructions.
+ *
+ * 2. For kernel_fpu_begin(), specify the vector register range you want to
+ * use with the KERNEL_VXR_* constants. Consider these usage guidelines:
+ *
+ * a) If your function typically runs in process-context, use the lower
+ * half of the vector registers, for example, specify KERNEL_VXR_LOW.
+ * b) If your function typically runs in soft-irq or hard-irq context,
+ * prefer using the upper half of the vector registers, for example,
+ * specify KERNEL_VXR_HIGH.
+ *
+ * If you adhere to these guidelines, an interrupted process context
+ * does not require to save and restore vector registers because of
+ * disjoint register ranges.
+ *
+ * Also note that the __kernel_fpu_begin()/__kernel_fpu_end() functions
+ * includes logic to save and restore up to 16 vector registers at once.
+ *
+ * 3. You can nest kernel_fpu_begin()/kernel_fpu_end() by using different
+ * struct kernel_fpu states. Vector registers that are in use by outer
+ * levels are saved and restored. You can minimize the save and restore
+ * effort by choosing disjoint vector register ranges.
+ *
+ * 5. To use vector floating-point instructions, specify the KERNEL_FPC
+ * flag to save and restore floating-point controls in addition to any
+ * vector register range.
+ *
+ * 6. To use floating-point registers and instructions only, specify the
+ * KERNEL_FPR flag. This flag triggers a save and restore of vector
+ * registers V0 to V15 and floating-point controls.
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef _ASM_S390_FPU_API_H
+#define _ASM_S390_FPU_API_H
+
+#include <linux/preempt.h>
+
+void save_fpu_regs(void);
+
+static inline int test_fp_ctl(u32 fpc)
+{
+ u32 orig_fpc;
+ int rc;
+
+ asm volatile(
+ " efpc %1\n"
+ " sfpc %2\n"
+ "0: sfpc %1\n"
+ " la %0,0\n"
+ "1:\n"
+ EX_TABLE(0b,1b)
+ : "=d" (rc), "=&d" (orig_fpc)
+ : "d" (fpc), "0" (-EINVAL));
+ return rc;
+}
+
+#define KERNEL_FPC 1
+#define KERNEL_VXR_V0V7 2
+#define KERNEL_VXR_V8V15 4
+#define KERNEL_VXR_V16V23 8
+#define KERNEL_VXR_V24V31 16
+
+#define KERNEL_VXR_LOW (KERNEL_VXR_V0V7|KERNEL_VXR_V8V15)
+#define KERNEL_VXR_MID (KERNEL_VXR_V8V15|KERNEL_VXR_V16V23)
+#define KERNEL_VXR_HIGH (KERNEL_VXR_V16V23|KERNEL_VXR_V24V31)
+
+#define KERNEL_VXR (KERNEL_VXR_LOW|KERNEL_VXR_HIGH)
+#define KERNEL_FPR (KERNEL_FPC|KERNEL_VXR_V0V7)
+
+struct kernel_fpu;
+
+/*
+ * Note the functions below must be called with preemption disabled.
+ * Do not enable preemption before calling __kernel_fpu_end() to prevent
+ * an corruption of an existing kernel FPU state.
+ *
+ * Prefer using the kernel_fpu_begin()/kernel_fpu_end() pair of functions.
+ */
+void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags);
+void __kernel_fpu_end(struct kernel_fpu *state, u32 flags);
+
+
+static inline void kernel_fpu_begin(struct kernel_fpu *state, u32 flags)
+{
+ preempt_disable();
+ state->mask = S390_lowcore.fpu_flags;
+ if (!test_cpu_flag(CIF_FPU))
+ /* Save user space FPU state and register contents */
+ save_fpu_regs();
+ else if (state->mask & flags)
+ /* Save FPU/vector register in-use by the kernel */
+ __kernel_fpu_begin(state, flags);
+ S390_lowcore.fpu_flags |= flags;
+}
+
+static inline void kernel_fpu_end(struct kernel_fpu *state, u32 flags)
+{
+ S390_lowcore.fpu_flags = state->mask;
+ if (state->mask & flags)
+ /* Restore FPU/vector register in-use by the kernel */
+ __kernel_fpu_end(state, flags);
+ preempt_enable();
+}
+
+#endif /* _ASM_S390_FPU_API_H */
diff --git a/arch/s390/include/asm/fpu/internal.h b/arch/s390/include/asm/fpu/internal.h
new file mode 100644
index 000000000..4a71dbbf7
--- /dev/null
+++ b/arch/s390/include/asm/fpu/internal.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * FPU state and register content conversion primitives
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef _ASM_S390_FPU_INTERNAL_H
+#define _ASM_S390_FPU_INTERNAL_H
+
+#include <linux/string.h>
+#include <asm/ctl_reg.h>
+#include <asm/fpu/types.h>
+
+static inline void save_vx_regs(__vector128 *vxrs)
+{
+ asm volatile(
+ " la 1,%0\n"
+ " .word 0xe70f,0x1000,0x003e\n" /* vstm 0,15,0(1) */
+ " .word 0xe70f,0x1100,0x0c3e\n" /* vstm 16,31,256(1) */
+ : "=Q" (*(struct vx_array *) vxrs) : : "1");
+}
+
+static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs)
+{
+ int i;
+
+ for (i = 0; i < __NUM_FPRS; i++)
+ fprs[i] = *(freg_t *)(vxrs + i);
+}
+
+static inline void convert_fp_to_vx(__vector128 *vxrs, freg_t *fprs)
+{
+ int i;
+
+ for (i = 0; i < __NUM_FPRS; i++)
+ *(freg_t *)(vxrs + i) = fprs[i];
+}
+
+static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu)
+{
+ fpregs->pad = 0;
+ fpregs->fpc = fpu->fpc;
+ if (MACHINE_HAS_VX)
+ convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs);
+ else
+ memcpy((freg_t *)&fpregs->fprs, fpu->fprs,
+ sizeof(fpregs->fprs));
+}
+
+static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu)
+{
+ fpu->fpc = fpregs->fpc;
+ if (MACHINE_HAS_VX)
+ convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs);
+ else
+ memcpy(fpu->fprs, (freg_t *)&fpregs->fprs,
+ sizeof(fpregs->fprs));
+}
+
+#endif /* _ASM_S390_FPU_INTERNAL_H */
diff --git a/arch/s390/include/asm/fpu/types.h b/arch/s390/include/asm/fpu/types.h
new file mode 100644
index 000000000..d889e9436
--- /dev/null
+++ b/arch/s390/include/asm/fpu/types.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * FPU data structures
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef _ASM_S390_FPU_TYPES_H
+#define _ASM_S390_FPU_TYPES_H
+
+#include <asm/sigcontext.h>
+
+struct fpu {
+ __u32 fpc; /* Floating-point control */
+ void *regs; /* Pointer to the current save area */
+ union {
+ /* Floating-point register save area */
+ freg_t fprs[__NUM_FPRS];
+ /* Vector register save area */
+ __vector128 vxrs[__NUM_VXRS];
+ };
+};
+
+/* VX array structure for address operand constraints in inline assemblies */
+struct vx_array { __vector128 _[__NUM_VXRS]; };
+
+/* In-kernel FPU state structure */
+struct kernel_fpu {
+ u32 mask;
+ u32 fpc;
+ union {
+ freg_t fprs[__NUM_FPRS];
+ __vector128 vxrs[__NUM_VXRS];
+ };
+};
+
+#endif /* _ASM_S390_FPU_TYPES_H */
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
new file mode 100644
index 000000000..ec698fc52
--- /dev/null
+++ b/arch/s390/include/asm/ftrace.h
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_FTRACE_H
+#define _ASM_S390_FTRACE_H
+
+#define ARCH_SUPPORTS_FTRACE_OPS 1
+
+#if defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT)
+#define MCOUNT_INSN_SIZE 6
+#else
+#define MCOUNT_INSN_SIZE 24
+#define MCOUNT_RETURN_FIXUP 18
+#endif
+
+#ifndef __ASSEMBLY__
+
+#define ftrace_return_address(n) __builtin_return_address(n)
+
+void _mcount(void);
+void ftrace_caller(void);
+
+extern char ftrace_graph_caller_end;
+extern unsigned long ftrace_plt;
+extern void *ftrace_func;
+
+struct dyn_arch_ftrace { };
+
+#define MCOUNT_ADDR ((unsigned long)_mcount)
+#define FTRACE_ADDR ((unsigned long)ftrace_caller)
+
+#define KPROBE_ON_FTRACE_NOP 0
+#define KPROBE_ON_FTRACE_CALL 1
+
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+ return addr;
+}
+
+struct ftrace_insn {
+ u16 opc;
+ s32 disp;
+} __packed;
+
+static inline void ftrace_generate_nop_insn(struct ftrace_insn *insn)
+{
+#ifdef CONFIG_FUNCTION_TRACER
+#if defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT)
+ /* brcl 0,0 */
+ insn->opc = 0xc004;
+ insn->disp = 0;
+#else
+ /* jg .+24 */
+ insn->opc = 0xc0f4;
+ insn->disp = MCOUNT_INSN_SIZE / 2;
+#endif
+#endif
+}
+
+static inline int is_ftrace_nop(struct ftrace_insn *insn)
+{
+#ifdef CONFIG_FUNCTION_TRACER
+#if defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT)
+ if (insn->disp == 0)
+ return 1;
+#else
+ if (insn->disp == MCOUNT_INSN_SIZE / 2)
+ return 1;
+#endif
+#endif
+ return 0;
+}
+
+static inline void ftrace_generate_call_insn(struct ftrace_insn *insn,
+ unsigned long ip)
+{
+#ifdef CONFIG_FUNCTION_TRACER
+ unsigned long target;
+
+ /* brasl r0,ftrace_caller */
+ target = is_module_addr((void *) ip) ? ftrace_plt : FTRACE_ADDR;
+ insn->opc = 0xc005;
+ insn->disp = (target - ip) / 2;
+#endif
+}
+
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_S390_FTRACE_H */
diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h
new file mode 100644
index 000000000..5e97a4353
--- /dev/null
+++ b/arch/s390/include/asm/futex.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_FUTEX_H
+#define _ASM_S390_FUTEX_H
+
+#include <linux/uaccess.h>
+#include <linux/futex.h>
+#include <asm/mmu_context.h>
+#include <asm/errno.h>
+
+#define __futex_atomic_op(insn, ret, oldval, newval, uaddr, oparg) \
+ asm volatile( \
+ " sacf 256\n" \
+ "0: l %1,0(%6)\n" \
+ "1:"insn \
+ "2: cs %1,%2,0(%6)\n" \
+ "3: jl 1b\n" \
+ " lhi %0,0\n" \
+ "4: sacf 768\n" \
+ EX_TABLE(0b,4b) EX_TABLE(2b,4b) EX_TABLE(3b,4b) \
+ : "=d" (ret), "=&d" (oldval), "=&d" (newval), \
+ "=m" (*uaddr) \
+ : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \
+ "m" (*uaddr) : "cc");
+
+static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
+ u32 __user *uaddr)
+{
+ int oldval = 0, newval, ret;
+ mm_segment_t old_fs;
+
+ old_fs = enable_sacf_uaccess();
+ pagefault_disable();
+ switch (op) {
+ case FUTEX_OP_SET:
+ __futex_atomic_op("lr %2,%5\n",
+ ret, oldval, newval, uaddr, oparg);
+ break;
+ case FUTEX_OP_ADD:
+ __futex_atomic_op("lr %2,%1\nar %2,%5\n",
+ ret, oldval, newval, uaddr, oparg);
+ break;
+ case FUTEX_OP_OR:
+ __futex_atomic_op("lr %2,%1\nor %2,%5\n",
+ ret, oldval, newval, uaddr, oparg);
+ break;
+ case FUTEX_OP_ANDN:
+ __futex_atomic_op("lr %2,%1\nnr %2,%5\n",
+ ret, oldval, newval, uaddr, oparg);
+ break;
+ case FUTEX_OP_XOR:
+ __futex_atomic_op("lr %2,%1\nxr %2,%5\n",
+ ret, oldval, newval, uaddr, oparg);
+ break;
+ default:
+ ret = -ENOSYS;
+ }
+ pagefault_enable();
+ disable_sacf_uaccess(old_fs);
+
+ if (!ret)
+ *oval = oldval;
+
+ return ret;
+}
+
+static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
+{
+ mm_segment_t old_fs;
+ int ret;
+
+ old_fs = enable_sacf_uaccess();
+ asm volatile(
+ " sacf 256\n"
+ "0: cs %1,%4,0(%5)\n"
+ "1: la %0,0\n"
+ "2: sacf 768\n"
+ EX_TABLE(0b,2b) EX_TABLE(1b,2b)
+ : "=d" (ret), "+d" (oldval), "=m" (*uaddr)
+ : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
+ : "cc", "memory");
+ disable_sacf_uaccess(old_fs);
+ *uval = oldval;
+ return ret;
+}
+
+#endif /* _ASM_S390_FUTEX_H */
diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h
new file mode 100644
index 000000000..fcbd638fb
--- /dev/null
+++ b/arch/s390/include/asm/gmap.h
@@ -0,0 +1,145 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * KVM guest address space mapping code
+ *
+ * Copyright IBM Corp. 2007, 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_GMAP_H
+#define _ASM_S390_GMAP_H
+
+/* Generic bits for GMAP notification on DAT table entry changes. */
+#define GMAP_NOTIFY_SHADOW 0x2
+#define GMAP_NOTIFY_MPROT 0x1
+
+/* Status bits only for huge segment entries */
+#define _SEGMENT_ENTRY_GMAP_IN 0x8000 /* invalidation notify bit */
+#define _SEGMENT_ENTRY_GMAP_UC 0x4000 /* dirty (migration) */
+
+/**
+ * struct gmap_struct - guest address space
+ * @list: list head for the mm->context gmap list
+ * @crst_list: list of all crst tables used in the guest address space
+ * @mm: pointer to the parent mm_struct
+ * @guest_to_host: radix tree with guest to host address translation
+ * @host_to_guest: radix tree with pointer to segment table entries
+ * @guest_table_lock: spinlock to protect all entries in the guest page table
+ * @ref_count: reference counter for the gmap structure
+ * @table: pointer to the page directory
+ * @asce: address space control element for gmap page table
+ * @pfault_enabled: defines if pfaults are applicable for the guest
+ * @host_to_rmap: radix tree with gmap_rmap lists
+ * @children: list of shadow gmap structures
+ * @pt_list: list of all page tables used in the shadow guest address space
+ * @shadow_lock: spinlock to protect the shadow gmap list
+ * @parent: pointer to the parent gmap for shadow guest address spaces
+ * @orig_asce: ASCE for which the shadow page table has been created
+ * @edat_level: edat level to be used for the shadow translation
+ * @removed: flag to indicate if a shadow guest address space has been removed
+ * @initialized: flag to indicate if a shadow guest address space can be used
+ */
+struct gmap {
+ struct list_head list;
+ struct list_head crst_list;
+ struct mm_struct *mm;
+ struct radix_tree_root guest_to_host;
+ struct radix_tree_root host_to_guest;
+ spinlock_t guest_table_lock;
+ atomic_t ref_count;
+ unsigned long *table;
+ unsigned long asce;
+ unsigned long asce_end;
+ void *private;
+ bool pfault_enabled;
+ /* Additional data for shadow guest address spaces */
+ struct radix_tree_root host_to_rmap;
+ struct list_head children;
+ struct list_head pt_list;
+ spinlock_t shadow_lock;
+ struct gmap *parent;
+ unsigned long orig_asce;
+ int edat_level;
+ bool removed;
+ bool initialized;
+};
+
+/**
+ * struct gmap_rmap - reverse mapping for shadow page table entries
+ * @next: pointer to next rmap in the list
+ * @raddr: virtual rmap address in the shadow guest address space
+ */
+struct gmap_rmap {
+ struct gmap_rmap *next;
+ unsigned long raddr;
+};
+
+#define gmap_for_each_rmap(pos, head) \
+ for (pos = (head); pos; pos = pos->next)
+
+#define gmap_for_each_rmap_safe(pos, n, head) \
+ for (pos = (head); n = pos ? pos->next : NULL, pos; pos = n)
+
+/**
+ * struct gmap_notifier - notify function block for page invalidation
+ * @notifier_call: address of callback function
+ */
+struct gmap_notifier {
+ struct list_head list;
+ struct rcu_head rcu;
+ void (*notifier_call)(struct gmap *gmap, unsigned long start,
+ unsigned long end);
+};
+
+static inline int gmap_is_shadow(struct gmap *gmap)
+{
+ return !!gmap->parent;
+}
+
+struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit);
+void gmap_remove(struct gmap *gmap);
+struct gmap *gmap_get(struct gmap *gmap);
+void gmap_put(struct gmap *gmap);
+
+void gmap_enable(struct gmap *gmap);
+void gmap_disable(struct gmap *gmap);
+struct gmap *gmap_get_enabled(void);
+int gmap_map_segment(struct gmap *gmap, unsigned long from,
+ unsigned long to, unsigned long len);
+int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
+unsigned long __gmap_translate(struct gmap *, unsigned long gaddr);
+unsigned long gmap_translate(struct gmap *, unsigned long gaddr);
+int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr);
+int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags);
+void gmap_discard(struct gmap *, unsigned long from, unsigned long to);
+void __gmap_zap(struct gmap *, unsigned long gaddr);
+void gmap_unlink(struct mm_struct *, unsigned long *table, unsigned long vmaddr);
+
+int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val);
+
+struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
+ int edat_level);
+int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level);
+int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
+ int fake);
+int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
+ int fake);
+int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
+ int fake);
+int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
+ int fake);
+int gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr,
+ unsigned long *pgt, int *dat_protection, int *fake);
+int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte);
+
+void gmap_register_pte_notifier(struct gmap_notifier *);
+void gmap_unregister_pte_notifier(struct gmap_notifier *);
+void gmap_pte_notify(struct mm_struct *, unsigned long addr, pte_t *,
+ unsigned long bits);
+
+int gmap_mprotect_notify(struct gmap *, unsigned long start,
+ unsigned long len, int prot);
+
+void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4],
+ unsigned long gaddr, unsigned long vmaddr);
+#endif /* _ASM_S390_GMAP_H */
diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h
new file mode 100644
index 000000000..dfbc3c6c0
--- /dev/null
+++ b/arch/s390/include/asm/hardirq.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999, 2000
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
+ *
+ * Derived from "include/asm-i386/hardirq.h"
+ */
+
+#ifndef __ASM_HARDIRQ_H
+#define __ASM_HARDIRQ_H
+
+#include <asm/lowcore.h>
+
+#define local_softirq_pending() (S390_lowcore.softirq_pending)
+#define set_softirq_pending(x) (S390_lowcore.softirq_pending = (x))
+#define or_softirq_pending(x) (S390_lowcore.softirq_pending |= (x))
+
+#define __ARCH_IRQ_STAT
+#define __ARCH_HAS_DO_SOFTIRQ
+#define __ARCH_IRQ_EXIT_IRQS_DISABLED
+
+static inline void ack_bad_irq(unsigned int irq)
+{
+ printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq);
+}
+
+#endif /* __ASM_HARDIRQ_H */
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
new file mode 100644
index 000000000..2d1afa58a
--- /dev/null
+++ b/arch/s390/include/asm/hugetlb.h
@@ -0,0 +1,122 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * IBM System z Huge TLB Page Support for Kernel.
+ *
+ * Copyright IBM Corp. 2008
+ * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_HUGETLB_H
+#define _ASM_S390_HUGETLB_H
+
+#include <asm/page.h>
+#include <asm/pgtable.h>
+
+
+#define is_hugepage_only_range(mm, addr, len) 0
+#define hugetlb_free_pgd_range free_pgd_range
+#define hugepages_supported() (MACHINE_HAS_EDAT1)
+
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte);
+pte_t huge_ptep_get(pte_t *ptep);
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep);
+
+/*
+ * If the arch doesn't supply something else, assume that hugepage
+ * size aligned regions are ok without further preparation.
+ */
+static inline int prepare_hugepage_range(struct file *file,
+ unsigned long addr, unsigned long len)
+{
+ if (len & ~HPAGE_MASK)
+ return -EINVAL;
+ if (addr & ~HPAGE_MASK)
+ return -EINVAL;
+ return 0;
+}
+
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+ clear_bit(PG_arch_1, &page->flags);
+}
+
+static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned long sz)
+{
+ if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
+ pte_val(*ptep) = _REGION3_ENTRY_EMPTY;
+ else
+ pte_val(*ptep) = _SEGMENT_ENTRY_EMPTY;
+}
+
+static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep)
+{
+ huge_ptep_get_and_clear(vma->vm_mm, address, ptep);
+}
+
+static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t pte, int dirty)
+{
+ int changed = !pte_same(huge_ptep_get(ptep), pte);
+ if (changed) {
+ huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
+ set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
+ }
+ return changed;
+}
+
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ pte_t pte = huge_ptep_get_and_clear(mm, addr, ptep);
+ set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte));
+}
+
+static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot)
+{
+ return mk_pte(page, pgprot);
+}
+
+static inline int huge_pte_none(pte_t pte)
+{
+ return pte_none(pte);
+}
+
+static inline int huge_pte_write(pte_t pte)
+{
+ return pte_write(pte);
+}
+
+static inline int huge_pte_dirty(pte_t pte)
+{
+ return pte_dirty(pte);
+}
+
+static inline pte_t huge_pte_mkwrite(pte_t pte)
+{
+ return pte_mkwrite(pte);
+}
+
+static inline pte_t huge_pte_mkdirty(pte_t pte)
+{
+ return pte_mkdirty(pte);
+}
+
+static inline pte_t huge_pte_wrprotect(pte_t pte)
+{
+ return pte_wrprotect(pte);
+}
+
+static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot)
+{
+ return pte_modify(pte, newprot);
+}
+
+#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
+static inline bool gigantic_page_supported(void) { return true; }
+#endif
+#endif /* _ASM_S390_HUGETLB_H */
diff --git a/arch/s390/include/asm/hw_irq.h b/arch/s390/include/asm/hw_irq.h
new file mode 100644
index 000000000..adae17675
--- /dev/null
+++ b/arch/s390/include/asm/hw_irq.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _HW_IRQ_H
+#define _HW_IRQ_H
+
+#include <linux/msi.h>
+#include <linux/pci.h>
+
+void __init init_airq_interrupts(void);
+void __init init_cio_interrupts(void);
+void __init init_ext_interrupts(void);
+
+#endif
diff --git a/arch/s390/include/asm/idals.h b/arch/s390/include/asm/idals.h
new file mode 100644
index 000000000..15578fd76
--- /dev/null
+++ b/arch/s390/include/asm/idals.h
@@ -0,0 +1,233 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Author(s)......: Holger Smolinski <Holger.Smolinski@de.ibm.com>
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>
+ * Bugreports.to..: <Linux390@de.ibm.com>
+ * Copyright IBM Corp. 2000
+ *
+ * History of changes
+ * 07/24/00 new file
+ * 05/04/02 code restructuring.
+ */
+
+#ifndef _S390_IDALS_H
+#define _S390_IDALS_H
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <asm/cio.h>
+#include <linux/uaccess.h>
+
+#define IDA_SIZE_LOG 12 /* 11 for 2k , 12 for 4k */
+#define IDA_BLOCK_SIZE (1L<<IDA_SIZE_LOG)
+
+/*
+ * Test if an address/length pair needs an idal list.
+ */
+static inline int
+idal_is_needed(void *vaddr, unsigned int length)
+{
+ return ((__pa(vaddr) + length - 1) >> 31) != 0;
+}
+
+
+/*
+ * Return the number of idal words needed for an address/length pair.
+ */
+static inline unsigned int idal_nr_words(void *vaddr, unsigned int length)
+{
+ return ((__pa(vaddr) & (IDA_BLOCK_SIZE-1)) + length +
+ (IDA_BLOCK_SIZE-1)) >> IDA_SIZE_LOG;
+}
+
+/*
+ * Create the list of idal words for an address/length pair.
+ */
+static inline unsigned long *idal_create_words(unsigned long *idaws,
+ void *vaddr, unsigned int length)
+{
+ unsigned long paddr;
+ unsigned int cidaw;
+
+ paddr = __pa(vaddr);
+ cidaw = ((paddr & (IDA_BLOCK_SIZE-1)) + length +
+ (IDA_BLOCK_SIZE-1)) >> IDA_SIZE_LOG;
+ *idaws++ = paddr;
+ paddr &= -IDA_BLOCK_SIZE;
+ while (--cidaw > 0) {
+ paddr += IDA_BLOCK_SIZE;
+ *idaws++ = paddr;
+ }
+ return idaws;
+}
+
+/*
+ * Sets the address of the data in CCW.
+ * If necessary it allocates an IDAL and sets the appropriate flags.
+ */
+static inline int
+set_normalized_cda(struct ccw1 * ccw, void *vaddr)
+{
+ unsigned int nridaws;
+ unsigned long *idal;
+
+ if (ccw->flags & CCW_FLAG_IDA)
+ return -EINVAL;
+ nridaws = idal_nr_words(vaddr, ccw->count);
+ if (nridaws > 0) {
+ idal = kmalloc(nridaws * sizeof(unsigned long),
+ GFP_ATOMIC | GFP_DMA );
+ if (idal == NULL)
+ return -ENOMEM;
+ idal_create_words(idal, vaddr, ccw->count);
+ ccw->flags |= CCW_FLAG_IDA;
+ vaddr = idal;
+ }
+ ccw->cda = (__u32)(unsigned long) vaddr;
+ return 0;
+}
+
+/*
+ * Releases any allocated IDAL related to the CCW.
+ */
+static inline void
+clear_normalized_cda(struct ccw1 * ccw)
+{
+ if (ccw->flags & CCW_FLAG_IDA) {
+ kfree((void *)(unsigned long) ccw->cda);
+ ccw->flags &= ~CCW_FLAG_IDA;
+ }
+ ccw->cda = 0;
+}
+
+/*
+ * Idal buffer extension
+ */
+struct idal_buffer {
+ size_t size;
+ size_t page_order;
+ void *data[0];
+};
+
+/*
+ * Allocate an idal buffer
+ */
+static inline struct idal_buffer *
+idal_buffer_alloc(size_t size, int page_order)
+{
+ struct idal_buffer *ib;
+ int nr_chunks, nr_ptrs, i;
+
+ nr_ptrs = (size + IDA_BLOCK_SIZE - 1) >> IDA_SIZE_LOG;
+ nr_chunks = (4096 << page_order) >> IDA_SIZE_LOG;
+ ib = kmalloc(sizeof(struct idal_buffer) + nr_ptrs*sizeof(void *),
+ GFP_DMA | GFP_KERNEL);
+ if (ib == NULL)
+ return ERR_PTR(-ENOMEM);
+ ib->size = size;
+ ib->page_order = page_order;
+ for (i = 0; i < nr_ptrs; i++) {
+ if ((i & (nr_chunks - 1)) != 0) {
+ ib->data[i] = ib->data[i-1] + IDA_BLOCK_SIZE;
+ continue;
+ }
+ ib->data[i] = (void *)
+ __get_free_pages(GFP_KERNEL, page_order);
+ if (ib->data[i] != NULL)
+ continue;
+ // Not enough memory
+ while (i >= nr_chunks) {
+ i -= nr_chunks;
+ free_pages((unsigned long) ib->data[i],
+ ib->page_order);
+ }
+ kfree(ib);
+ return ERR_PTR(-ENOMEM);
+ }
+ return ib;
+}
+
+/*
+ * Free an idal buffer.
+ */
+static inline void
+idal_buffer_free(struct idal_buffer *ib)
+{
+ int nr_chunks, nr_ptrs, i;
+
+ nr_ptrs = (ib->size + IDA_BLOCK_SIZE - 1) >> IDA_SIZE_LOG;
+ nr_chunks = (4096 << ib->page_order) >> IDA_SIZE_LOG;
+ for (i = 0; i < nr_ptrs; i += nr_chunks)
+ free_pages((unsigned long) ib->data[i], ib->page_order);
+ kfree(ib);
+}
+
+/*
+ * Test if a idal list is really needed.
+ */
+static inline int
+__idal_buffer_is_needed(struct idal_buffer *ib)
+{
+ return ib->size > (4096ul << ib->page_order) ||
+ idal_is_needed(ib->data[0], ib->size);
+}
+
+/*
+ * Set channel data address to idal buffer.
+ */
+static inline void
+idal_buffer_set_cda(struct idal_buffer *ib, struct ccw1 *ccw)
+{
+ if (__idal_buffer_is_needed(ib)) {
+ // setup idals;
+ ccw->cda = (u32)(addr_t) ib->data;
+ ccw->flags |= CCW_FLAG_IDA;
+ } else
+ // we do not need idals - use direct addressing
+ ccw->cda = (u32)(addr_t) ib->data[0];
+ ccw->count = ib->size;
+}
+
+/*
+ * Copy count bytes from an idal buffer to user memory
+ */
+static inline size_t
+idal_buffer_to_user(struct idal_buffer *ib, void __user *to, size_t count)
+{
+ size_t left;
+ int i;
+
+ BUG_ON(count > ib->size);
+ for (i = 0; count > IDA_BLOCK_SIZE; i++) {
+ left = copy_to_user(to, ib->data[i], IDA_BLOCK_SIZE);
+ if (left)
+ return left + count - IDA_BLOCK_SIZE;
+ to = (void __user *) to + IDA_BLOCK_SIZE;
+ count -= IDA_BLOCK_SIZE;
+ }
+ return copy_to_user(to, ib->data[i], count);
+}
+
+/*
+ * Copy count bytes from user memory to an idal buffer
+ */
+static inline size_t
+idal_buffer_from_user(struct idal_buffer *ib, const void __user *from, size_t count)
+{
+ size_t left;
+ int i;
+
+ BUG_ON(count > ib->size);
+ for (i = 0; count > IDA_BLOCK_SIZE; i++) {
+ left = copy_from_user(ib->data[i], from, IDA_BLOCK_SIZE);
+ if (left)
+ return left + count - IDA_BLOCK_SIZE;
+ from = (void __user *) from + IDA_BLOCK_SIZE;
+ count -= IDA_BLOCK_SIZE;
+ }
+ return copy_from_user(ib->data[i], from, count);
+}
+
+#endif
diff --git a/arch/s390/include/asm/idle.h b/arch/s390/include/asm/idle.h
new file mode 100644
index 000000000..6d4226dcf
--- /dev/null
+++ b/arch/s390/include/asm/idle.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2014
+ *
+ * Author: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef _S390_IDLE_H
+#define _S390_IDLE_H
+
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/seqlock.h>
+
+struct s390_idle_data {
+ seqcount_t seqcount;
+ unsigned long long idle_count;
+ unsigned long long idle_time;
+ unsigned long long clock_idle_enter;
+ unsigned long long clock_idle_exit;
+ unsigned long long timer_idle_enter;
+ unsigned long long timer_idle_exit;
+};
+
+extern struct device_attribute dev_attr_idle_count;
+extern struct device_attribute dev_attr_idle_time_us;
+
+void psw_idle(struct s390_idle_data *, unsigned long);
+
+#endif /* _S390_IDLE_H */
diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h
new file mode 100644
index 000000000..f34d72934
--- /dev/null
+++ b/arch/s390/include/asm/io.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * Derived from "include/asm-i386/io.h"
+ */
+
+#ifndef _S390_IO_H
+#define _S390_IO_H
+
+#include <linux/kernel.h>
+#include <asm/page.h>
+#include <asm/pci_io.h>
+
+#define xlate_dev_mem_ptr xlate_dev_mem_ptr
+void *xlate_dev_mem_ptr(phys_addr_t phys);
+#define unxlate_dev_mem_ptr unxlate_dev_mem_ptr
+void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr);
+
+/*
+ * Convert a virtual cached pointer to an uncached pointer
+ */
+#define xlate_dev_kmem_ptr(p) p
+
+#define IO_SPACE_LIMIT 0
+
+#define ioremap_nocache(addr, size) ioremap(addr, size)
+#define ioremap_wc ioremap_nocache
+#define ioremap_wt ioremap_nocache
+
+static inline void __iomem *ioremap(unsigned long offset, unsigned long size)
+{
+ return (void __iomem *) offset;
+}
+
+static inline void iounmap(volatile void __iomem *addr)
+{
+}
+
+static inline void __iomem *ioport_map(unsigned long port, unsigned int nr)
+{
+ return NULL;
+}
+
+static inline void ioport_unmap(void __iomem *p)
+{
+}
+
+#ifdef CONFIG_PCI
+
+/*
+ * s390 needs a private implementation of pci_iomap since ioremap with its
+ * offset parameter isn't sufficient. That's because BAR spaces are not
+ * disjunctive on s390 so we need the bar parameter of pci_iomap to find
+ * the corresponding device and create the mapping cookie.
+ */
+#define pci_iomap pci_iomap
+#define pci_iounmap pci_iounmap
+#define pci_iomap_wc pci_iomap
+#define pci_iomap_wc_range pci_iomap_range
+
+#define memcpy_fromio(dst, src, count) zpci_memcpy_fromio(dst, src, count)
+#define memcpy_toio(dst, src, count) zpci_memcpy_toio(dst, src, count)
+#define memset_io(dst, val, count) zpci_memset_io(dst, val, count)
+
+#define __raw_readb zpci_read_u8
+#define __raw_readw zpci_read_u16
+#define __raw_readl zpci_read_u32
+#define __raw_readq zpci_read_u64
+#define __raw_writeb zpci_write_u8
+#define __raw_writew zpci_write_u16
+#define __raw_writel zpci_write_u32
+#define __raw_writeq zpci_write_u64
+
+#endif /* CONFIG_PCI */
+
+#include <asm-generic/io.h>
+
+#endif
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
new file mode 100644
index 000000000..ae5135704
--- /dev/null
+++ b/arch/s390/include/asm/ipl.h
@@ -0,0 +1,165 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * s390 (re)ipl support
+ *
+ * Copyright IBM Corp. 2007
+ */
+
+#ifndef _ASM_S390_IPL_H
+#define _ASM_S390_IPL_H
+
+#include <asm/lowcore.h>
+#include <asm/types.h>
+#include <asm/cio.h>
+#include <asm/setup.h>
+
+#define NSS_NAME_SIZE 8
+
+#define IPL_PARM_BLK_FCP_LEN (sizeof(struct ipl_list_hdr) + \
+ sizeof(struct ipl_block_fcp))
+
+#define IPL_PARM_BLK0_FCP_LEN (sizeof(struct ipl_block_fcp) + 16)
+
+#define IPL_PARM_BLK_CCW_LEN (sizeof(struct ipl_list_hdr) + \
+ sizeof(struct ipl_block_ccw))
+
+#define IPL_PARM_BLK0_CCW_LEN (sizeof(struct ipl_block_ccw) + 16)
+
+#define IPL_MAX_SUPPORTED_VERSION (0)
+
+struct ipl_list_hdr {
+ u32 len;
+ u8 reserved1[3];
+ u8 version;
+ u32 blk0_len;
+ u8 pbt;
+ u8 flags;
+ u16 reserved2;
+ u8 loadparm[8];
+} __attribute__((packed));
+
+struct ipl_block_fcp {
+ u8 reserved1[305-1];
+ u8 opt;
+ u8 reserved2[3];
+ u16 reserved3;
+ u16 devno;
+ u8 reserved4[4];
+ u64 wwpn;
+ u64 lun;
+ u32 bootprog;
+ u8 reserved5[12];
+ u64 br_lba;
+ u32 scp_data_len;
+ u8 reserved6[260];
+ u8 scp_data[];
+} __attribute__((packed));
+
+#define DIAG308_VMPARM_SIZE 64
+#define DIAG308_SCPDATA_SIZE (PAGE_SIZE - (sizeof(struct ipl_list_hdr) + \
+ offsetof(struct ipl_block_fcp, scp_data)))
+
+struct ipl_block_ccw {
+ u8 reserved1[84];
+ u16 reserved2 : 13;
+ u8 ssid : 3;
+ u16 devno;
+ u8 vm_flags;
+ u8 reserved3[3];
+ u32 vm_parm_len;
+ u8 nss_name[8];
+ u8 vm_parm[DIAG308_VMPARM_SIZE];
+ u8 reserved4[8];
+} __attribute__((packed));
+
+struct ipl_parameter_block {
+ struct ipl_list_hdr hdr;
+ union {
+ struct ipl_block_fcp fcp;
+ struct ipl_block_ccw ccw;
+ char raw[PAGE_SIZE - sizeof(struct ipl_list_hdr)];
+ } ipl_info;
+} __packed __aligned(PAGE_SIZE);
+
+struct save_area;
+struct save_area * __init save_area_alloc(bool is_boot_cpu);
+struct save_area * __init save_area_boot_cpu(void);
+void __init save_area_add_regs(struct save_area *, void *regs);
+void __init save_area_add_vxrs(struct save_area *, __vector128 *vxrs);
+
+extern void s390_reset_system(void);
+extern void ipl_store_parameters(void);
+extern size_t append_ipl_vmparm(char *, size_t);
+extern size_t append_ipl_scpdata(char *, size_t);
+
+enum ipl_type {
+ IPL_TYPE_UNKNOWN = 1,
+ IPL_TYPE_CCW = 2,
+ IPL_TYPE_FCP = 4,
+ IPL_TYPE_FCP_DUMP = 8,
+ IPL_TYPE_NSS = 16,
+};
+
+struct ipl_info
+{
+ enum ipl_type type;
+ union {
+ struct {
+ struct ccw_dev_id dev_id;
+ } ccw;
+ struct {
+ struct ccw_dev_id dev_id;
+ u64 wwpn;
+ u64 lun;
+ } fcp;
+ struct {
+ char name[NSS_NAME_SIZE + 1];
+ } nss;
+ } data;
+};
+
+extern struct ipl_info ipl_info;
+extern void setup_ipl(void);
+extern void set_os_info_reipl_block(void);
+
+/*
+ * DIAG 308 support
+ */
+enum diag308_subcode {
+ DIAG308_REL_HSA = 2,
+ DIAG308_LOAD_CLEAR = 3,
+ DIAG308_LOAD_NORMAL_DUMP = 4,
+ DIAG308_SET = 5,
+ DIAG308_STORE = 6,
+};
+
+enum diag308_ipl_type {
+ DIAG308_IPL_TYPE_FCP = 0,
+ DIAG308_IPL_TYPE_CCW = 2,
+};
+
+enum diag308_opt {
+ DIAG308_IPL_OPT_IPL = 0x10,
+ DIAG308_IPL_OPT_DUMP = 0x20,
+};
+
+enum diag308_flags {
+ DIAG308_FLAGS_LP_VALID = 0x80,
+};
+
+enum diag308_vm_flags {
+ DIAG308_VM_FLAGS_NSS_VALID = 0x80,
+ DIAG308_VM_FLAGS_VP_VALID = 0x40,
+};
+
+enum diag308_rc {
+ DIAG308_RC_OK = 0x0001,
+ DIAG308_RC_NOCONFIG = 0x0102,
+};
+
+extern int diag308(unsigned long subcode, void *addr);
+extern void diag308_reset(void);
+extern void store_status(void (*fn)(void *), void *data);
+extern void lgr_info_log(void);
+
+#endif /* _ASM_S390_IPL_H */
diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h
new file mode 100644
index 000000000..2f7f27e54
--- /dev/null
+++ b/arch/s390/include/asm/irq.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_IRQ_H
+#define _ASM_IRQ_H
+
+#define EXT_INTERRUPT 0
+#define IO_INTERRUPT 1
+#define THIN_INTERRUPT 2
+
+#define NR_IRQS_BASE 3
+
+#define NR_IRQS NR_IRQS_BASE
+#define NR_IRQS_LEGACY NR_IRQS_BASE
+
+/* External interruption codes */
+#define EXT_IRQ_INTERRUPT_KEY 0x0040
+#define EXT_IRQ_CLK_COMP 0x1004
+#define EXT_IRQ_CPU_TIMER 0x1005
+#define EXT_IRQ_WARNING_TRACK 0x1007
+#define EXT_IRQ_MALFUNC_ALERT 0x1200
+#define EXT_IRQ_EMERGENCY_SIG 0x1201
+#define EXT_IRQ_EXTERNAL_CALL 0x1202
+#define EXT_IRQ_TIMING_ALERT 0x1406
+#define EXT_IRQ_MEASURE_ALERT 0x1407
+#define EXT_IRQ_SERVICE_SIG 0x2401
+#define EXT_IRQ_CP_SERVICE 0x2603
+#define EXT_IRQ_IUCV 0x4000
+
+#ifndef __ASSEMBLY__
+
+#include <linux/hardirq.h>
+#include <linux/percpu.h>
+#include <linux/cache.h>
+#include <linux/types.h>
+
+enum interruption_class {
+ IRQEXT_CLK,
+ IRQEXT_EXC,
+ IRQEXT_EMS,
+ IRQEXT_TMR,
+ IRQEXT_TLA,
+ IRQEXT_PFL,
+ IRQEXT_DSD,
+ IRQEXT_VRT,
+ IRQEXT_SCP,
+ IRQEXT_IUC,
+ IRQEXT_CMS,
+ IRQEXT_CMC,
+ IRQEXT_FTP,
+ IRQIO_CIO,
+ IRQIO_QAI,
+ IRQIO_DAS,
+ IRQIO_C15,
+ IRQIO_C70,
+ IRQIO_TAP,
+ IRQIO_VMR,
+ IRQIO_LCS,
+ IRQIO_CTC,
+ IRQIO_APB,
+ IRQIO_ADM,
+ IRQIO_CSC,
+ IRQIO_PCI,
+ IRQIO_MSI,
+ IRQIO_VIR,
+ IRQIO_VAI,
+ NMI_NMI,
+ CPU_RST,
+ NR_ARCH_IRQS
+};
+
+struct irq_stat {
+ unsigned int irqs[NR_ARCH_IRQS];
+};
+
+DECLARE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat);
+
+static __always_inline void inc_irq_stat(enum interruption_class irq)
+{
+ __this_cpu_inc(irq_stat.irqs[irq]);
+}
+
+struct ext_code {
+ unsigned short subcode;
+ unsigned short code;
+};
+
+typedef void (*ext_int_handler_t)(struct ext_code, unsigned int, unsigned long);
+
+int register_external_irq(u16 code, ext_int_handler_t handler);
+int unregister_external_irq(u16 code, ext_int_handler_t handler);
+
+enum irq_subclass {
+ IRQ_SUBCLASS_MEASUREMENT_ALERT = 5,
+ IRQ_SUBCLASS_SERVICE_SIGNAL = 9,
+};
+
+#define CR0_IRQ_SUBCLASS_MASK \
+ ((1UL << (63 - 30)) /* Warning Track */ | \
+ (1UL << (63 - 48)) /* Malfunction Alert */ | \
+ (1UL << (63 - 49)) /* Emergency Signal */ | \
+ (1UL << (63 - 50)) /* External Call */ | \
+ (1UL << (63 - 52)) /* Clock Comparator */ | \
+ (1UL << (63 - 53)) /* CPU Timer */ | \
+ (1UL << (63 - 54)) /* Service Signal */ | \
+ (1UL << (63 - 57)) /* Interrupt Key */ | \
+ (1UL << (63 - 58)) /* Measurement Alert */ | \
+ (1UL << (63 - 59)) /* Timing Alert */ | \
+ (1UL << (63 - 62))) /* IUCV */
+
+void irq_subclass_register(enum irq_subclass subclass);
+void irq_subclass_unregister(enum irq_subclass subclass);
+
+#define irq_canonicalize(irq) (irq)
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_IRQ_H */
diff --git a/arch/s390/include/asm/irqflags.h b/arch/s390/include/asm/irqflags.h
new file mode 100644
index 000000000..586df4c9e
--- /dev/null
+++ b/arch/s390/include/asm/irqflags.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2006, 2010
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef __ASM_IRQFLAGS_H
+#define __ASM_IRQFLAGS_H
+
+#include <linux/types.h>
+
+#define ARCH_IRQ_ENABLED (3UL << (BITS_PER_LONG - 8))
+
+/* store then OR system mask. */
+#define __arch_local_irq_stosm(__or) \
+({ \
+ unsigned long __mask; \
+ asm volatile( \
+ " stosm %0,%1" \
+ : "=Q" (__mask) : "i" (__or) : "memory"); \
+ __mask; \
+})
+
+/* store then AND system mask. */
+#define __arch_local_irq_stnsm(__and) \
+({ \
+ unsigned long __mask; \
+ asm volatile( \
+ " stnsm %0,%1" \
+ : "=Q" (__mask) : "i" (__and) : "memory"); \
+ __mask; \
+})
+
+/* set system mask. */
+static inline notrace void __arch_local_irq_ssm(unsigned long flags)
+{
+ asm volatile("ssm %0" : : "Q" (flags) : "memory");
+}
+
+static inline notrace unsigned long arch_local_save_flags(void)
+{
+ return __arch_local_irq_stnsm(0xff);
+}
+
+static inline notrace unsigned long arch_local_irq_save(void)
+{
+ return __arch_local_irq_stnsm(0xfc);
+}
+
+static inline notrace void arch_local_irq_disable(void)
+{
+ arch_local_irq_save();
+}
+
+static inline notrace void arch_local_irq_enable(void)
+{
+ __arch_local_irq_stosm(0x03);
+}
+
+/* This only restores external and I/O interrupt state */
+static inline notrace void arch_local_irq_restore(unsigned long flags)
+{
+ /* only disabled->disabled and disabled->enabled is valid */
+ if (flags & ARCH_IRQ_ENABLED)
+ arch_local_irq_enable();
+}
+
+static inline notrace bool arch_irqs_disabled_flags(unsigned long flags)
+{
+ return !(flags & ARCH_IRQ_ENABLED);
+}
+
+static inline notrace bool arch_irqs_disabled(void)
+{
+ return arch_irqs_disabled_flags(arch_local_save_flags());
+}
+
+#endif /* __ASM_IRQFLAGS_H */
diff --git a/arch/s390/include/asm/isc.h b/arch/s390/include/asm/isc.h
new file mode 100644
index 000000000..6cb9e2ed0
--- /dev/null
+++ b/arch/s390/include/asm/isc.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_ISC_H
+#define _ASM_S390_ISC_H
+
+#include <linux/types.h>
+
+/*
+ * I/O interruption subclasses used by drivers.
+ * Please add all used iscs here so that it is possible to distribute
+ * isc usage between drivers.
+ * Reminder: 0 is highest priority, 7 lowest.
+ */
+#define MAX_ISC 7
+
+/* Regular I/O interrupts. */
+#define IO_SCH_ISC 3 /* regular I/O subchannels */
+#define CONSOLE_ISC 1 /* console I/O subchannel */
+#define EADM_SCH_ISC 4 /* EADM subchannels */
+#define CHSC_SCH_ISC 7 /* CHSC subchannels */
+#define VFIO_CCW_ISC IO_SCH_ISC /* VFIO-CCW I/O subchannels */
+/* Adapter interrupts. */
+#define QDIO_AIRQ_ISC IO_SCH_ISC /* I/O subchannel in qdio mode */
+#define PCI_ISC 2 /* PCI I/O subchannels */
+#define AP_ISC 6 /* adjunct processor (crypto) devices */
+
+/* Functions for registration of I/O interruption subclasses */
+void isc_register(unsigned int isc);
+void isc_unregister(unsigned int isc);
+
+#endif /* _ASM_S390_ISC_H */
diff --git a/arch/s390/include/asm/itcw.h b/arch/s390/include/asm/itcw.h
new file mode 100644
index 000000000..59b739613
--- /dev/null
+++ b/arch/s390/include/asm/itcw.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Functions for incremental construction of fcx enabled I/O control blocks.
+ *
+ * Copyright IBM Corp. 2008
+ * Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_ITCW_H
+#define _ASM_S390_ITCW_H
+
+#include <linux/types.h>
+#include <asm/fcx.h>
+
+#define ITCW_OP_READ 0
+#define ITCW_OP_WRITE 1
+
+struct itcw;
+
+struct tcw *itcw_get_tcw(struct itcw *itcw);
+size_t itcw_calc_size(int intrg, int max_tidaws, int intrg_max_tidaws);
+struct itcw *itcw_init(void *buffer, size_t size, int op, int intrg,
+ int max_tidaws, int intrg_max_tidaws);
+struct dcw *itcw_add_dcw(struct itcw *itcw, u8 cmd, u8 flags, void *cd,
+ u8 cd_count, u32 count);
+struct tidaw *itcw_add_tidaw(struct itcw *itcw, u8 flags, void *addr,
+ u32 count);
+void itcw_set_data(struct itcw *itcw, void *addr, int use_tidal);
+void itcw_finalize(struct itcw *itcw);
+
+#endif /* _ASM_S390_ITCW_H */
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h
new file mode 100644
index 000000000..9c7dc970e
--- /dev/null
+++ b/arch/s390/include/asm/jump_label.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_JUMP_LABEL_H
+#define _ASM_S390_JUMP_LABEL_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+#include <linux/stringify.h>
+
+#define JUMP_LABEL_NOP_SIZE 6
+#define JUMP_LABEL_NOP_OFFSET 2
+
+#if __GNUC__ < 9
+#define JUMP_LABEL_STATIC_KEY_CONSTRAINT "X"
+#else
+#define JUMP_LABEL_STATIC_KEY_CONSTRAINT "jdd"
+#endif
+
+/*
+ * We use a brcl 0,2 instruction for jump labels at compile time so it
+ * can be easily distinguished from a hotpatch generated instruction.
+ */
+static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
+{
+ asm_volatile_goto("0: brcl 0,"__stringify(JUMP_LABEL_NOP_OFFSET)"\n"
+ ".pushsection __jump_table, \"aw\"\n"
+ ".balign 8\n"
+ ".quad 0b, %l[label], %0+%1\n"
+ ".popsection\n"
+ : : JUMP_LABEL_STATIC_KEY_CONSTRAINT (key), "i" (branch) : : label);
+
+ return false;
+label:
+ return true;
+}
+
+static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
+{
+ asm_volatile_goto("0: brcl 15, %l[label]\n"
+ ".pushsection __jump_table, \"aw\"\n"
+ ".balign 8\n"
+ ".quad 0b, %l[label], %0+%1\n"
+ ".popsection\n"
+ : : JUMP_LABEL_STATIC_KEY_CONSTRAINT (key), "i" (branch) : : label);
+
+ return false;
+label:
+ return true;
+}
+
+typedef unsigned long jump_label_t;
+
+struct jump_entry {
+ jump_label_t code;
+ jump_label_t target;
+ jump_label_t key;
+};
+
+#endif /* __ASSEMBLY__ */
+#endif
diff --git a/arch/s390/include/asm/kdebug.h b/arch/s390/include/asm/kdebug.h
new file mode 100644
index 000000000..d5327f064
--- /dev/null
+++ b/arch/s390/include/asm/kdebug.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _S390_KDEBUG_H
+#define _S390_KDEBUG_H
+
+/*
+ * Feb 2006 Ported to s390 <grundym@us.ibm.com>
+ */
+
+struct pt_regs;
+
+enum die_val {
+ DIE_OOPS = 1,
+ DIE_BPT,
+ DIE_SSTEP,
+ DIE_PANIC,
+ DIE_NMI,
+ DIE_DIE,
+ DIE_NMIWATCHDOG,
+ DIE_KERNELDEBUG,
+ DIE_TRAP,
+ DIE_GPF,
+ DIE_CALL,
+ DIE_NMI_IPI,
+};
+
+extern void die(struct pt_regs *, const char *);
+
+#endif
diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h
new file mode 100644
index 000000000..825dd0f7f
--- /dev/null
+++ b/arch/s390/include/asm/kexec.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2005
+ *
+ * Author(s): Rolf Adelsberger <adelsberger@de.ibm.com>
+ *
+ */
+
+#ifndef _S390_KEXEC_H
+#define _S390_KEXEC_H
+
+#include <asm/processor.h>
+#include <asm/page.h>
+/*
+ * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return.
+ * I.e. Maximum page that is mapped directly into kernel memory,
+ * and kmap is not required.
+ */
+
+/* Maximum physical address we can use pages from */
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can reach in physical address mode */
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can use for the control pages */
+/* Not more than 2GB */
+#define KEXEC_CONTROL_MEMORY_LIMIT (1UL<<31)
+
+/* Allocate control page with GFP_DMA */
+#define KEXEC_CONTROL_MEMORY_GFP GFP_DMA
+
+/* Maximum address we can use for the crash control pages */
+#define KEXEC_CRASH_CONTROL_MEMORY_LIMIT (-1UL)
+
+/* Allocate one page for the pdp and the second for the code */
+#define KEXEC_CONTROL_PAGE_SIZE 4096
+
+/* Alignment of crashkernel memory */
+#define KEXEC_CRASH_MEM_ALIGN HPAGE_SIZE
+
+/* The native architecture */
+#define KEXEC_ARCH KEXEC_ARCH_S390
+
+/* Provide a dummy definition to avoid build failures. */
+static inline void crash_setup_regs(struct pt_regs *newregs,
+ struct pt_regs *oldregs) { }
+
+struct kimage;
+struct s390_load_data {
+ /* Pointer to the kernel buffer. Used to register cmdline etc.. */
+ void *kernel_buf;
+
+ /* Total size of loaded segments in memory. Used as an offset. */
+ size_t memsz;
+
+ /* Load address of initrd. Used to register INITRD_START in kernel. */
+ unsigned long initrd_load_addr;
+};
+
+int kexec_file_add_purgatory(struct kimage *image,
+ struct s390_load_data *data);
+int kexec_file_add_initrd(struct kimage *image,
+ struct s390_load_data *data,
+ char *initrd, unsigned long initrd_len);
+int *kexec_file_update_kernel(struct kimage *iamge,
+ struct s390_load_data *data);
+
+extern const struct kexec_file_ops s390_kexec_image_ops;
+extern const struct kexec_file_ops s390_kexec_elf_ops;
+
+#endif /*_S390_KEXEC_H */
diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h
new file mode 100644
index 000000000..b106aa29b
--- /dev/null
+++ b/arch/s390/include/asm/kprobes.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#ifndef _ASM_S390_KPROBES_H
+#define _ASM_S390_KPROBES_H
+/*
+ * Kernel Probes (KProbes)
+ *
+ * Copyright IBM Corp. 2002, 2006
+ *
+ * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
+ * Probes initial implementation ( includes suggestions from
+ * Rusty Russell).
+ * 2004-Nov Modified for PPC64 by Ananth N Mavinakayanahalli
+ * <ananth@in.ibm.com>
+ * 2005-Dec Used as a template for s390 by Mike Grundy
+ * <grundym@us.ibm.com>
+ */
+#include <linux/types.h>
+#include <asm-generic/kprobes.h>
+
+#define BREAKPOINT_INSTRUCTION 0x0002
+
+#define FIXUP_PSW_NORMAL 0x08
+#define FIXUP_BRANCH_NOT_TAKEN 0x04
+#define FIXUP_RETURN_REGISTER 0x02
+#define FIXUP_NOT_REQUIRED 0x01
+
+int probe_is_prohibited_opcode(u16 *insn);
+int probe_get_fixup_type(u16 *insn);
+int probe_is_insn_relative_long(u16 *insn);
+
+#ifdef CONFIG_KPROBES
+#include <linux/ptrace.h>
+#include <linux/percpu.h>
+#include <linux/sched/task_stack.h>
+
+#define __ARCH_WANT_KPROBES_INSN_SLOT
+
+struct pt_regs;
+struct kprobe;
+
+typedef u16 kprobe_opcode_t;
+
+/* Maximum instruction size is 3 (16bit) halfwords: */
+#define MAX_INSN_SIZE 0x0003
+#define MAX_STACK_SIZE 64
+#define MIN_STACK_SIZE(ADDR) (((MAX_STACK_SIZE) < \
+ (((unsigned long)task_stack_page(current)) + THREAD_SIZE - (ADDR))) \
+ ? (MAX_STACK_SIZE) \
+ : (((unsigned long)task_stack_page(current)) + THREAD_SIZE - (ADDR)))
+
+#define kretprobe_blacklist_size 0
+
+/* Architecture specific copy of original instruction */
+struct arch_specific_insn {
+ /* copy of original instruction */
+ kprobe_opcode_t *insn;
+ unsigned int is_ftrace_insn : 1;
+};
+
+struct prev_kprobe {
+ struct kprobe *kp;
+ unsigned long status;
+};
+
+/* per-cpu kprobe control block */
+struct kprobe_ctlblk {
+ unsigned long kprobe_status;
+ unsigned long kprobe_saved_imask;
+ unsigned long kprobe_saved_ctl[3];
+ struct prev_kprobe prev_kprobe;
+};
+
+void arch_remove_kprobe(struct kprobe *p);
+void kretprobe_trampoline(void);
+
+int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
+int kprobe_exceptions_notify(struct notifier_block *self,
+ unsigned long val, void *data);
+
+#define flush_insn_slot(p) do { } while (0)
+
+#endif /* CONFIG_KPROBES */
+#endif /* _ASM_S390_KPROBES_H */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
new file mode 100644
index 000000000..be8fa30a6
--- /dev/null
+++ b/arch/s390/include/asm/kvm_host.h
@@ -0,0 +1,877 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * definition for kernel virtual machines on s390
+ *
+ * Copyright IBM Corp. 2008, 2018
+ *
+ * Author(s): Carsten Otte <cotte@de.ibm.com>
+ */
+
+
+#ifndef ASM_KVM_HOST_H
+#define ASM_KVM_HOST_H
+
+#include <linux/types.h>
+#include <linux/hrtimer.h>
+#include <linux/interrupt.h>
+#include <linux/kvm_types.h>
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+#include <linux/seqlock.h>
+#include <asm/debug.h>
+#include <asm/cpu.h>
+#include <asm/fpu/api.h>
+#include <asm/isc.h>
+#include <asm/guarded_storage.h>
+
+#define KVM_S390_BSCA_CPU_SLOTS 64
+#define KVM_S390_ESCA_CPU_SLOTS 248
+#define KVM_MAX_VCPUS 255
+#define KVM_USER_MEM_SLOTS 32
+
+/*
+ * These seem to be used for allocating ->chip in the routing table, which we
+ * don't use. 1 is as small as we can get to reduce the needed memory. If we
+ * need to look at ->chip later on, we'll need to revisit this.
+ */
+#define KVM_NR_IRQCHIPS 1
+#define KVM_IRQCHIP_NUM_PINS 1
+#define KVM_HALT_POLL_NS_DEFAULT 80000
+
+/* s390-specific vcpu->requests bit members */
+#define KVM_REQ_ENABLE_IBS KVM_ARCH_REQ(0)
+#define KVM_REQ_DISABLE_IBS KVM_ARCH_REQ(1)
+#define KVM_REQ_ICPT_OPEREXC KVM_ARCH_REQ(2)
+#define KVM_REQ_START_MIGRATION KVM_ARCH_REQ(3)
+#define KVM_REQ_STOP_MIGRATION KVM_ARCH_REQ(4)
+
+#define SIGP_CTRL_C 0x80
+#define SIGP_CTRL_SCN_MASK 0x3f
+
+union bsca_sigp_ctrl {
+ __u8 value;
+ struct {
+ __u8 c : 1;
+ __u8 r : 1;
+ __u8 scn : 6;
+ };
+};
+
+union esca_sigp_ctrl {
+ __u16 value;
+ struct {
+ __u8 c : 1;
+ __u8 reserved: 7;
+ __u8 scn;
+ };
+};
+
+struct esca_entry {
+ union esca_sigp_ctrl sigp_ctrl;
+ __u16 reserved1[3];
+ __u64 sda;
+ __u64 reserved2[6];
+};
+
+struct bsca_entry {
+ __u8 reserved0;
+ union bsca_sigp_ctrl sigp_ctrl;
+ __u16 reserved[3];
+ __u64 sda;
+ __u64 reserved2[2];
+};
+
+union ipte_control {
+ unsigned long val;
+ struct {
+ unsigned long k : 1;
+ unsigned long kh : 31;
+ unsigned long kg : 32;
+ };
+};
+
+struct bsca_block {
+ union ipte_control ipte_control;
+ __u64 reserved[5];
+ __u64 mcn;
+ __u64 reserved2;
+ struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS];
+};
+
+struct esca_block {
+ union ipte_control ipte_control;
+ __u64 reserved1[7];
+ __u64 mcn[4];
+ __u64 reserved2[20];
+ struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS];
+};
+
+/*
+ * This struct is used to store some machine check info from lowcore
+ * for machine checks that happen while the guest is running.
+ * This info in host's lowcore might be overwritten by a second machine
+ * check from host when host is in the machine check's high-level handling.
+ * The size is 24 bytes.
+ */
+struct mcck_volatile_info {
+ __u64 mcic;
+ __u64 failing_storage_address;
+ __u32 ext_damage_code;
+ __u32 reserved;
+};
+
+#define CPUSTAT_STOPPED 0x80000000
+#define CPUSTAT_WAIT 0x10000000
+#define CPUSTAT_ECALL_PEND 0x08000000
+#define CPUSTAT_STOP_INT 0x04000000
+#define CPUSTAT_IO_INT 0x02000000
+#define CPUSTAT_EXT_INT 0x01000000
+#define CPUSTAT_RUNNING 0x00800000
+#define CPUSTAT_RETAINED 0x00400000
+#define CPUSTAT_TIMING_SUB 0x00020000
+#define CPUSTAT_SIE_SUB 0x00010000
+#define CPUSTAT_RRF 0x00008000
+#define CPUSTAT_SLSV 0x00004000
+#define CPUSTAT_SLSR 0x00002000
+#define CPUSTAT_ZARCH 0x00000800
+#define CPUSTAT_MCDS 0x00000100
+#define CPUSTAT_KSS 0x00000200
+#define CPUSTAT_SM 0x00000080
+#define CPUSTAT_IBS 0x00000040
+#define CPUSTAT_GED2 0x00000010
+#define CPUSTAT_G 0x00000008
+#define CPUSTAT_GED 0x00000004
+#define CPUSTAT_J 0x00000002
+#define CPUSTAT_P 0x00000001
+
+struct kvm_s390_sie_block {
+ atomic_t cpuflags; /* 0x0000 */
+ __u32 : 1; /* 0x0004 */
+ __u32 prefix : 18;
+ __u32 : 1;
+ __u32 ibc : 12;
+ __u8 reserved08[4]; /* 0x0008 */
+#define PROG_IN_SIE (1<<0)
+ __u32 prog0c; /* 0x000c */
+ __u8 reserved10[16]; /* 0x0010 */
+#define PROG_BLOCK_SIE (1<<0)
+#define PROG_REQUEST (1<<1)
+ atomic_t prog20; /* 0x0020 */
+ __u8 reserved24[4]; /* 0x0024 */
+ __u64 cputm; /* 0x0028 */
+ __u64 ckc; /* 0x0030 */
+ __u64 epoch; /* 0x0038 */
+ __u32 svcc; /* 0x0040 */
+#define LCTL_CR0 0x8000
+#define LCTL_CR6 0x0200
+#define LCTL_CR9 0x0040
+#define LCTL_CR10 0x0020
+#define LCTL_CR11 0x0010
+#define LCTL_CR14 0x0002
+ __u16 lctl; /* 0x0044 */
+ __s16 icpua; /* 0x0046 */
+#define ICTL_OPEREXC 0x80000000
+#define ICTL_PINT 0x20000000
+#define ICTL_LPSW 0x00400000
+#define ICTL_STCTL 0x00040000
+#define ICTL_ISKE 0x00004000
+#define ICTL_SSKE 0x00002000
+#define ICTL_RRBE 0x00001000
+#define ICTL_TPROT 0x00000200
+ __u32 ictl; /* 0x0048 */
+#define ECA_CEI 0x80000000
+#define ECA_IB 0x40000000
+#define ECA_SIGPI 0x10000000
+#define ECA_MVPGI 0x01000000
+#define ECA_AIV 0x00200000
+#define ECA_VX 0x00020000
+#define ECA_PROTEXCI 0x00002000
+#define ECA_SII 0x00000001
+ __u32 eca; /* 0x004c */
+#define ICPT_INST 0x04
+#define ICPT_PROGI 0x08
+#define ICPT_INSTPROGI 0x0C
+#define ICPT_EXTREQ 0x10
+#define ICPT_EXTINT 0x14
+#define ICPT_IOREQ 0x18
+#define ICPT_WAIT 0x1c
+#define ICPT_VALIDITY 0x20
+#define ICPT_STOP 0x28
+#define ICPT_OPEREXC 0x2C
+#define ICPT_PARTEXEC 0x38
+#define ICPT_IOINST 0x40
+#define ICPT_KSS 0x5c
+ __u8 icptcode; /* 0x0050 */
+ __u8 icptstatus; /* 0x0051 */
+ __u16 ihcpu; /* 0x0052 */
+ __u8 reserved54[2]; /* 0x0054 */
+ __u16 ipa; /* 0x0056 */
+ __u32 ipb; /* 0x0058 */
+ __u32 scaoh; /* 0x005c */
+#define FPF_BPBC 0x20
+ __u8 fpf; /* 0x0060 */
+#define ECB_GS 0x40
+#define ECB_TE 0x10
+#define ECB_SRSI 0x04
+#define ECB_HOSTPROTINT 0x02
+ __u8 ecb; /* 0x0061 */
+#define ECB2_CMMA 0x80
+#define ECB2_IEP 0x20
+#define ECB2_PFMFI 0x08
+#define ECB2_ESCA 0x04
+ __u8 ecb2; /* 0x0062 */
+#define ECB3_DEA 0x08
+#define ECB3_AES 0x04
+#define ECB3_RI 0x01
+ __u8 ecb3; /* 0x0063 */
+ __u32 scaol; /* 0x0064 */
+ __u8 reserved68; /* 0x0068 */
+ __u8 epdx; /* 0x0069 */
+ __u8 reserved6a[2]; /* 0x006a */
+ __u32 todpr; /* 0x006c */
+#define GISA_FORMAT1 0x00000001
+ __u32 gd; /* 0x0070 */
+ __u8 reserved74[12]; /* 0x0074 */
+ __u64 mso; /* 0x0080 */
+ __u64 msl; /* 0x0088 */
+ psw_t gpsw; /* 0x0090 */
+ __u64 gg14; /* 0x00a0 */
+ __u64 gg15; /* 0x00a8 */
+ __u8 reservedb0[20]; /* 0x00b0 */
+ __u16 extcpuaddr; /* 0x00c4 */
+ __u16 eic; /* 0x00c6 */
+ __u32 reservedc8; /* 0x00c8 */
+ __u16 pgmilc; /* 0x00cc */
+ __u16 iprcc; /* 0x00ce */
+ __u32 dxc; /* 0x00d0 */
+ __u16 mcn; /* 0x00d4 */
+ __u8 perc; /* 0x00d6 */
+ __u8 peratmid; /* 0x00d7 */
+ __u64 peraddr; /* 0x00d8 */
+ __u8 eai; /* 0x00e0 */
+ __u8 peraid; /* 0x00e1 */
+ __u8 oai; /* 0x00e2 */
+ __u8 armid; /* 0x00e3 */
+ __u8 reservede4[4]; /* 0x00e4 */
+ __u64 tecmc; /* 0x00e8 */
+ __u8 reservedf0[12]; /* 0x00f0 */
+#define CRYCB_FORMAT1 0x00000001
+#define CRYCB_FORMAT2 0x00000003
+ __u32 crycbd; /* 0x00fc */
+ __u64 gcr[16]; /* 0x0100 */
+ __u64 gbea; /* 0x0180 */
+ __u8 reserved188[8]; /* 0x0188 */
+ __u64 sdnxo; /* 0x0190 */
+ __u8 reserved198[8]; /* 0x0198 */
+ __u32 fac; /* 0x01a0 */
+ __u8 reserved1a4[20]; /* 0x01a4 */
+ __u64 cbrlo; /* 0x01b8 */
+ __u8 reserved1c0[8]; /* 0x01c0 */
+#define ECD_HOSTREGMGMT 0x20000000
+#define ECD_MEF 0x08000000
+#define ECD_ETOKENF 0x02000000
+ __u32 ecd; /* 0x01c8 */
+ __u8 reserved1cc[18]; /* 0x01cc */
+ __u64 pp; /* 0x01de */
+ __u8 reserved1e6[2]; /* 0x01e6 */
+ __u64 itdba; /* 0x01e8 */
+ __u64 riccbd; /* 0x01f0 */
+ __u64 gvrd; /* 0x01f8 */
+} __attribute__((packed));
+
+struct kvm_s390_itdb {
+ __u8 data[256];
+};
+
+struct sie_page {
+ struct kvm_s390_sie_block sie_block;
+ struct mcck_volatile_info mcck_info; /* 0x0200 */
+ __u8 reserved218[1000]; /* 0x0218 */
+ struct kvm_s390_itdb itdb; /* 0x0600 */
+ __u8 reserved700[2304]; /* 0x0700 */
+};
+
+struct kvm_vcpu_stat {
+ u64 exit_userspace;
+ u64 exit_null;
+ u64 exit_external_request;
+ u64 exit_io_request;
+ u64 exit_external_interrupt;
+ u64 exit_stop_request;
+ u64 exit_validity;
+ u64 exit_instruction;
+ u64 exit_pei;
+ u64 halt_successful_poll;
+ u64 halt_attempted_poll;
+ u64 halt_poll_invalid;
+ u64 halt_wakeup;
+ u64 instruction_lctl;
+ u64 instruction_lctlg;
+ u64 instruction_stctl;
+ u64 instruction_stctg;
+ u64 exit_program_interruption;
+ u64 exit_instr_and_program;
+ u64 exit_operation_exception;
+ u64 deliver_ckc;
+ u64 deliver_cputm;
+ u64 deliver_external_call;
+ u64 deliver_emergency_signal;
+ u64 deliver_service_signal;
+ u64 deliver_virtio;
+ u64 deliver_stop_signal;
+ u64 deliver_prefix_signal;
+ u64 deliver_restart_signal;
+ u64 deliver_program;
+ u64 deliver_io;
+ u64 deliver_machine_check;
+ u64 exit_wait_state;
+ u64 inject_ckc;
+ u64 inject_cputm;
+ u64 inject_external_call;
+ u64 inject_emergency_signal;
+ u64 inject_mchk;
+ u64 inject_pfault_init;
+ u64 inject_program;
+ u64 inject_restart;
+ u64 inject_set_prefix;
+ u64 inject_stop_signal;
+ u64 instruction_epsw;
+ u64 instruction_gs;
+ u64 instruction_io_other;
+ u64 instruction_lpsw;
+ u64 instruction_lpswe;
+ u64 instruction_pfmf;
+ u64 instruction_ptff;
+ u64 instruction_sck;
+ u64 instruction_sckpf;
+ u64 instruction_stidp;
+ u64 instruction_spx;
+ u64 instruction_stpx;
+ u64 instruction_stap;
+ u64 instruction_iske;
+ u64 instruction_ri;
+ u64 instruction_rrbe;
+ u64 instruction_sske;
+ u64 instruction_ipte_interlock;
+ u64 instruction_stsi;
+ u64 instruction_stfl;
+ u64 instruction_tb;
+ u64 instruction_tpi;
+ u64 instruction_tprot;
+ u64 instruction_tsch;
+ u64 instruction_sie;
+ u64 instruction_essa;
+ u64 instruction_sthyi;
+ u64 instruction_sigp_sense;
+ u64 instruction_sigp_sense_running;
+ u64 instruction_sigp_external_call;
+ u64 instruction_sigp_emergency;
+ u64 instruction_sigp_cond_emergency;
+ u64 instruction_sigp_start;
+ u64 instruction_sigp_stop;
+ u64 instruction_sigp_stop_store_status;
+ u64 instruction_sigp_store_status;
+ u64 instruction_sigp_store_adtl_status;
+ u64 instruction_sigp_arch;
+ u64 instruction_sigp_prefix;
+ u64 instruction_sigp_restart;
+ u64 instruction_sigp_init_cpu_reset;
+ u64 instruction_sigp_cpu_reset;
+ u64 instruction_sigp_unknown;
+ u64 diagnose_10;
+ u64 diagnose_44;
+ u64 diagnose_9c;
+ u64 diagnose_258;
+ u64 diagnose_308;
+ u64 diagnose_500;
+ u64 diagnose_other;
+};
+
+#define PGM_OPERATION 0x01
+#define PGM_PRIVILEGED_OP 0x02
+#define PGM_EXECUTE 0x03
+#define PGM_PROTECTION 0x04
+#define PGM_ADDRESSING 0x05
+#define PGM_SPECIFICATION 0x06
+#define PGM_DATA 0x07
+#define PGM_FIXED_POINT_OVERFLOW 0x08
+#define PGM_FIXED_POINT_DIVIDE 0x09
+#define PGM_DECIMAL_OVERFLOW 0x0a
+#define PGM_DECIMAL_DIVIDE 0x0b
+#define PGM_HFP_EXPONENT_OVERFLOW 0x0c
+#define PGM_HFP_EXPONENT_UNDERFLOW 0x0d
+#define PGM_HFP_SIGNIFICANCE 0x0e
+#define PGM_HFP_DIVIDE 0x0f
+#define PGM_SEGMENT_TRANSLATION 0x10
+#define PGM_PAGE_TRANSLATION 0x11
+#define PGM_TRANSLATION_SPEC 0x12
+#define PGM_SPECIAL_OPERATION 0x13
+#define PGM_OPERAND 0x15
+#define PGM_TRACE_TABEL 0x16
+#define PGM_VECTOR_PROCESSING 0x1b
+#define PGM_SPACE_SWITCH 0x1c
+#define PGM_HFP_SQUARE_ROOT 0x1d
+#define PGM_PC_TRANSLATION_SPEC 0x1f
+#define PGM_AFX_TRANSLATION 0x20
+#define PGM_ASX_TRANSLATION 0x21
+#define PGM_LX_TRANSLATION 0x22
+#define PGM_EX_TRANSLATION 0x23
+#define PGM_PRIMARY_AUTHORITY 0x24
+#define PGM_SECONDARY_AUTHORITY 0x25
+#define PGM_LFX_TRANSLATION 0x26
+#define PGM_LSX_TRANSLATION 0x27
+#define PGM_ALET_SPECIFICATION 0x28
+#define PGM_ALEN_TRANSLATION 0x29
+#define PGM_ALE_SEQUENCE 0x2a
+#define PGM_ASTE_VALIDITY 0x2b
+#define PGM_ASTE_SEQUENCE 0x2c
+#define PGM_EXTENDED_AUTHORITY 0x2d
+#define PGM_LSTE_SEQUENCE 0x2e
+#define PGM_ASTE_INSTANCE 0x2f
+#define PGM_STACK_FULL 0x30
+#define PGM_STACK_EMPTY 0x31
+#define PGM_STACK_SPECIFICATION 0x32
+#define PGM_STACK_TYPE 0x33
+#define PGM_STACK_OPERATION 0x34
+#define PGM_ASCE_TYPE 0x38
+#define PGM_REGION_FIRST_TRANS 0x39
+#define PGM_REGION_SECOND_TRANS 0x3a
+#define PGM_REGION_THIRD_TRANS 0x3b
+#define PGM_MONITOR 0x40
+#define PGM_PER 0x80
+#define PGM_CRYPTO_OPERATION 0x119
+
+/* irq types in ascend order of priorities */
+enum irq_types {
+ IRQ_PEND_SET_PREFIX = 0,
+ IRQ_PEND_RESTART,
+ IRQ_PEND_SIGP_STOP,
+ IRQ_PEND_IO_ISC_7,
+ IRQ_PEND_IO_ISC_6,
+ IRQ_PEND_IO_ISC_5,
+ IRQ_PEND_IO_ISC_4,
+ IRQ_PEND_IO_ISC_3,
+ IRQ_PEND_IO_ISC_2,
+ IRQ_PEND_IO_ISC_1,
+ IRQ_PEND_IO_ISC_0,
+ IRQ_PEND_VIRTIO,
+ IRQ_PEND_PFAULT_DONE,
+ IRQ_PEND_PFAULT_INIT,
+ IRQ_PEND_EXT_HOST,
+ IRQ_PEND_EXT_SERVICE,
+ IRQ_PEND_EXT_TIMING,
+ IRQ_PEND_EXT_CPU_TIMER,
+ IRQ_PEND_EXT_CLOCK_COMP,
+ IRQ_PEND_EXT_EXTERNAL,
+ IRQ_PEND_EXT_EMERGENCY,
+ IRQ_PEND_EXT_MALFUNC,
+ IRQ_PEND_EXT_IRQ_KEY,
+ IRQ_PEND_MCHK_REP,
+ IRQ_PEND_PROG,
+ IRQ_PEND_SVC,
+ IRQ_PEND_MCHK_EX,
+ IRQ_PEND_COUNT
+};
+
+/* We have 2M for virtio device descriptor pages. Smallest amount of
+ * memory per page is 24 bytes (1 queue), so (2048*1024) / 24 = 87381
+ */
+#define KVM_S390_MAX_VIRTIO_IRQS 87381
+
+/*
+ * Repressible (non-floating) machine check interrupts
+ * subclass bits in MCIC
+ */
+#define MCHK_EXTD_BIT 58
+#define MCHK_DEGR_BIT 56
+#define MCHK_WARN_BIT 55
+#define MCHK_REP_MASK ((1UL << MCHK_DEGR_BIT) | \
+ (1UL << MCHK_EXTD_BIT) | \
+ (1UL << MCHK_WARN_BIT))
+
+/* Exigent machine check interrupts subclass bits in MCIC */
+#define MCHK_SD_BIT 63
+#define MCHK_PD_BIT 62
+#define MCHK_EX_MASK ((1UL << MCHK_SD_BIT) | (1UL << MCHK_PD_BIT))
+
+#define IRQ_PEND_EXT_MASK ((1UL << IRQ_PEND_EXT_IRQ_KEY) | \
+ (1UL << IRQ_PEND_EXT_CLOCK_COMP) | \
+ (1UL << IRQ_PEND_EXT_CPU_TIMER) | \
+ (1UL << IRQ_PEND_EXT_MALFUNC) | \
+ (1UL << IRQ_PEND_EXT_EMERGENCY) | \
+ (1UL << IRQ_PEND_EXT_EXTERNAL) | \
+ (1UL << IRQ_PEND_EXT_TIMING) | \
+ (1UL << IRQ_PEND_EXT_HOST) | \
+ (1UL << IRQ_PEND_EXT_SERVICE) | \
+ (1UL << IRQ_PEND_VIRTIO) | \
+ (1UL << IRQ_PEND_PFAULT_INIT) | \
+ (1UL << IRQ_PEND_PFAULT_DONE))
+
+#define IRQ_PEND_IO_MASK ((1UL << IRQ_PEND_IO_ISC_0) | \
+ (1UL << IRQ_PEND_IO_ISC_1) | \
+ (1UL << IRQ_PEND_IO_ISC_2) | \
+ (1UL << IRQ_PEND_IO_ISC_3) | \
+ (1UL << IRQ_PEND_IO_ISC_4) | \
+ (1UL << IRQ_PEND_IO_ISC_5) | \
+ (1UL << IRQ_PEND_IO_ISC_6) | \
+ (1UL << IRQ_PEND_IO_ISC_7))
+
+#define IRQ_PEND_MCHK_MASK ((1UL << IRQ_PEND_MCHK_REP) | \
+ (1UL << IRQ_PEND_MCHK_EX))
+
+struct kvm_s390_interrupt_info {
+ struct list_head list;
+ u64 type;
+ union {
+ struct kvm_s390_io_info io;
+ struct kvm_s390_ext_info ext;
+ struct kvm_s390_pgm_info pgm;
+ struct kvm_s390_emerg_info emerg;
+ struct kvm_s390_extcall_info extcall;
+ struct kvm_s390_prefix_info prefix;
+ struct kvm_s390_stop_info stop;
+ struct kvm_s390_mchk_info mchk;
+ };
+};
+
+struct kvm_s390_irq_payload {
+ struct kvm_s390_io_info io;
+ struct kvm_s390_ext_info ext;
+ struct kvm_s390_pgm_info pgm;
+ struct kvm_s390_emerg_info emerg;
+ struct kvm_s390_extcall_info extcall;
+ struct kvm_s390_prefix_info prefix;
+ struct kvm_s390_stop_info stop;
+ struct kvm_s390_mchk_info mchk;
+};
+
+struct kvm_s390_local_interrupt {
+ spinlock_t lock;
+ DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS);
+ struct kvm_s390_irq_payload irq;
+ unsigned long pending_irqs;
+};
+
+#define FIRQ_LIST_IO_ISC_0 0
+#define FIRQ_LIST_IO_ISC_1 1
+#define FIRQ_LIST_IO_ISC_2 2
+#define FIRQ_LIST_IO_ISC_3 3
+#define FIRQ_LIST_IO_ISC_4 4
+#define FIRQ_LIST_IO_ISC_5 5
+#define FIRQ_LIST_IO_ISC_6 6
+#define FIRQ_LIST_IO_ISC_7 7
+#define FIRQ_LIST_PFAULT 8
+#define FIRQ_LIST_VIRTIO 9
+#define FIRQ_LIST_COUNT 10
+#define FIRQ_CNTR_IO 0
+#define FIRQ_CNTR_SERVICE 1
+#define FIRQ_CNTR_VIRTIO 2
+#define FIRQ_CNTR_PFAULT 3
+#define FIRQ_MAX_COUNT 4
+
+/* mask the AIS mode for a given ISC */
+#define AIS_MODE_MASK(isc) (0x80 >> isc)
+
+#define KVM_S390_AIS_MODE_ALL 0
+#define KVM_S390_AIS_MODE_SINGLE 1
+
+struct kvm_s390_float_interrupt {
+ unsigned long pending_irqs;
+ spinlock_t lock;
+ struct list_head lists[FIRQ_LIST_COUNT];
+ int counters[FIRQ_MAX_COUNT];
+ struct kvm_s390_mchk_info mchk;
+ struct kvm_s390_ext_info srv_signal;
+ int next_rr_cpu;
+ unsigned long idle_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)];
+ struct mutex ais_lock;
+ u8 simm;
+ u8 nimm;
+};
+
+struct kvm_hw_wp_info_arch {
+ unsigned long addr;
+ unsigned long phys_addr;
+ int len;
+ char *old_data;
+};
+
+struct kvm_hw_bp_info_arch {
+ unsigned long addr;
+ int len;
+};
+
+/*
+ * Only the upper 16 bits of kvm_guest_debug->control are arch specific.
+ * Further KVM_GUESTDBG flags which an be used from userspace can be found in
+ * arch/s390/include/uapi/asm/kvm.h
+ */
+#define KVM_GUESTDBG_EXIT_PENDING 0x10000000
+
+#define guestdbg_enabled(vcpu) \
+ (vcpu->guest_debug & KVM_GUESTDBG_ENABLE)
+#define guestdbg_sstep_enabled(vcpu) \
+ (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+#define guestdbg_hw_bp_enabled(vcpu) \
+ (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
+#define guestdbg_exit_pending(vcpu) (guestdbg_enabled(vcpu) && \
+ (vcpu->guest_debug & KVM_GUESTDBG_EXIT_PENDING))
+
+struct kvm_guestdbg_info_arch {
+ unsigned long cr0;
+ unsigned long cr9;
+ unsigned long cr10;
+ unsigned long cr11;
+ struct kvm_hw_bp_info_arch *hw_bp_info;
+ struct kvm_hw_wp_info_arch *hw_wp_info;
+ int nr_hw_bp;
+ int nr_hw_wp;
+ unsigned long last_bp;
+};
+
+struct kvm_vcpu_arch {
+ struct kvm_s390_sie_block *sie_block;
+ /* if vsie is active, currently executed shadow sie control block */
+ struct kvm_s390_sie_block *vsie_block;
+ unsigned int host_acrs[NUM_ACRS];
+ struct gs_cb *host_gscb;
+ struct fpu host_fpregs;
+ struct kvm_s390_local_interrupt local_int;
+ struct hrtimer ckc_timer;
+ struct kvm_s390_pgm_info pgm;
+ struct gmap *gmap;
+ /* backup location for the currently enabled gmap when scheduled out */
+ struct gmap *enabled_gmap;
+ struct kvm_guestdbg_info_arch guestdbg;
+ unsigned long pfault_token;
+ unsigned long pfault_select;
+ unsigned long pfault_compare;
+ bool cputm_enabled;
+ /*
+ * The seqcount protects updates to cputm_start and sie_block.cputm,
+ * this way we can have non-blocking reads with consistent values.
+ * Only the owning VCPU thread (vcpu->cpu) is allowed to change these
+ * values and to start/stop/enable/disable cpu timer accounting.
+ */
+ seqcount_t cputm_seqcount;
+ __u64 cputm_start;
+ bool gs_enabled;
+ bool skey_enabled;
+};
+
+struct kvm_vm_stat {
+ u64 inject_io;
+ u64 inject_float_mchk;
+ u64 inject_pfault_done;
+ u64 inject_service_signal;
+ u64 inject_virtio;
+ u64 remote_tlb_flush;
+};
+
+struct kvm_arch_memory_slot {
+};
+
+struct s390_map_info {
+ struct list_head list;
+ __u64 guest_addr;
+ __u64 addr;
+ struct page *page;
+};
+
+struct s390_io_adapter {
+ unsigned int id;
+ int isc;
+ bool maskable;
+ bool masked;
+ bool swap;
+ bool suppressible;
+ struct rw_semaphore maps_lock;
+ struct list_head maps;
+ atomic_t nr_maps;
+};
+
+#define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8)
+#define MAX_S390_ADAPTER_MAPS 256
+
+/* maximum size of facilities and facility mask is 2k bytes */
+#define S390_ARCH_FAC_LIST_SIZE_BYTE (1<<11)
+#define S390_ARCH_FAC_LIST_SIZE_U64 \
+ (S390_ARCH_FAC_LIST_SIZE_BYTE / sizeof(u64))
+#define S390_ARCH_FAC_MASK_SIZE_BYTE S390_ARCH_FAC_LIST_SIZE_BYTE
+#define S390_ARCH_FAC_MASK_SIZE_U64 \
+ (S390_ARCH_FAC_MASK_SIZE_BYTE / sizeof(u64))
+
+struct kvm_s390_cpu_model {
+ /* facility mask supported by kvm & hosting machine */
+ __u64 fac_mask[S390_ARCH_FAC_LIST_SIZE_U64];
+ /* facility list requested by guest (in dma page) */
+ __u64 *fac_list;
+ u64 cpuid;
+ unsigned short ibc;
+};
+
+struct kvm_s390_crypto {
+ struct kvm_s390_crypto_cb *crycb;
+ __u32 crycbd;
+ __u8 aes_kw;
+ __u8 dea_kw;
+};
+
+#define APCB0_MASK_SIZE 1
+struct kvm_s390_apcb0 {
+ __u64 apm[APCB0_MASK_SIZE]; /* 0x0000 */
+ __u64 aqm[APCB0_MASK_SIZE]; /* 0x0008 */
+ __u64 adm[APCB0_MASK_SIZE]; /* 0x0010 */
+ __u64 reserved18; /* 0x0018 */
+};
+
+#define APCB1_MASK_SIZE 4
+struct kvm_s390_apcb1 {
+ __u64 apm[APCB1_MASK_SIZE]; /* 0x0000 */
+ __u64 aqm[APCB1_MASK_SIZE]; /* 0x0020 */
+ __u64 adm[APCB1_MASK_SIZE]; /* 0x0040 */
+ __u64 reserved60[4]; /* 0x0060 */
+};
+
+struct kvm_s390_crypto_cb {
+ struct kvm_s390_apcb0 apcb0; /* 0x0000 */
+ __u8 reserved20[0x0048 - 0x0020]; /* 0x0020 */
+ __u8 dea_wrapping_key_mask[24]; /* 0x0048 */
+ __u8 aes_wrapping_key_mask[32]; /* 0x0060 */
+ struct kvm_s390_apcb1 apcb1; /* 0x0080 */
+};
+
+struct kvm_s390_gisa {
+ union {
+ struct { /* common to all formats */
+ u32 next_alert;
+ u8 ipm;
+ u8 reserved01[2];
+ u8 iam;
+ };
+ struct { /* format 0 */
+ u32 next_alert;
+ u8 ipm;
+ u8 reserved01;
+ u8 : 6;
+ u8 g : 1;
+ u8 c : 1;
+ u8 iam;
+ u8 reserved02[4];
+ u32 airq_count;
+ } g0;
+ struct { /* format 1 */
+ u32 next_alert;
+ u8 ipm;
+ u8 simm;
+ u8 nimm;
+ u8 iam;
+ u8 aism[8];
+ u8 : 6;
+ u8 g : 1;
+ u8 c : 1;
+ u8 reserved03[11];
+ u32 airq_count;
+ } g1;
+ };
+};
+
+/*
+ * sie_page2 has to be allocated as DMA because fac_list, crycb and
+ * gisa need 31bit addresses in the sie control block.
+ */
+struct sie_page2 {
+ __u64 fac_list[S390_ARCH_FAC_LIST_SIZE_U64]; /* 0x0000 */
+ struct kvm_s390_crypto_cb crycb; /* 0x0800 */
+ struct kvm_s390_gisa gisa; /* 0x0900 */
+ u8 reserved920[0x1000 - 0x920]; /* 0x0920 */
+};
+
+struct kvm_s390_vsie {
+ struct mutex mutex;
+ struct radix_tree_root addr_to_page;
+ int page_count;
+ int next;
+ struct page *pages[KVM_MAX_VCPUS];
+};
+
+struct kvm_arch{
+ void *sca;
+ int use_esca;
+ rwlock_t sca_lock;
+ debug_info_t *dbf;
+ struct kvm_s390_float_interrupt float_int;
+ struct kvm_device *flic;
+ struct gmap *gmap;
+ unsigned long mem_limit;
+ int css_support;
+ int use_irqchip;
+ int use_cmma;
+ int use_pfmfi;
+ int use_skf;
+ int user_cpu_state_ctrl;
+ int user_sigp;
+ int user_stsi;
+ int user_instr0;
+ struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
+ wait_queue_head_t ipte_wq;
+ int ipte_lock_count;
+ struct mutex ipte_mutex;
+ spinlock_t start_stop_lock;
+ struct sie_page2 *sie_page2;
+ struct kvm_s390_cpu_model model;
+ struct kvm_s390_crypto crypto;
+ struct kvm_s390_vsie vsie;
+ u8 epdx;
+ u64 epoch;
+ int migration_mode;
+ atomic64_t cmma_dirty_pages;
+ /* subset of available cpu features enabled by user space */
+ DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
+ struct kvm_s390_gisa *gisa;
+};
+
+#define KVM_HVA_ERR_BAD (-1UL)
+#define KVM_HVA_ERR_RO_BAD (-2UL)
+
+static inline bool kvm_is_error_hva(unsigned long addr)
+{
+ return IS_ERR_VALUE(addr);
+}
+
+#define ASYNC_PF_PER_VCPU 64
+struct kvm_arch_async_pf {
+ unsigned long pfault_token;
+};
+
+bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu);
+
+void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work);
+
+void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work);
+
+void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work);
+
+extern int sie64a(struct kvm_s390_sie_block *, u64 *);
+extern char sie_exit;
+
+static inline void kvm_arch_hardware_disable(void) {}
+static inline void kvm_arch_check_processor_compat(void *rtn) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+static inline void kvm_arch_free_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
+static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
+static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *slot) {}
+static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
+
+void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu);
+
+#endif
diff --git a/arch/s390/include/asm/kvm_para.h b/arch/s390/include/asm/kvm_para.h
new file mode 100644
index 000000000..cbc7c3a68
--- /dev/null
+++ b/arch/s390/include/asm/kvm_para.h
@@ -0,0 +1,206 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * definition for paravirtual devices on s390
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Author(s): Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+/*
+ * Hypercalls for KVM on s390. The calling convention is similar to the
+ * s390 ABI, so we use R2-R6 for parameters 1-5. In addition we use R1
+ * as hypercall number and R7 as parameter 6. The return value is
+ * written to R2. We use the diagnose instruction as hypercall. To avoid
+ * conflicts with existing diagnoses for LPAR and z/VM, we do not use
+ * the instruction encoded number, but specify the number in R1 and
+ * use 0x500 as KVM hypercall
+ *
+ * Copyright IBM Corp. 2007,2008
+ * Author(s): Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+#ifndef __S390_KVM_PARA_H
+#define __S390_KVM_PARA_H
+
+#include <uapi/asm/kvm_para.h>
+#include <asm/diag.h>
+
+static inline long __kvm_hypercall0(unsigned long nr)
+{
+ register unsigned long __nr asm("1") = nr;
+ register long __rc asm("2");
+
+ asm volatile ("diag 2,4,0x500\n"
+ : "=d" (__rc) : "d" (__nr): "memory", "cc");
+ return __rc;
+}
+
+static inline long kvm_hypercall0(unsigned long nr)
+{
+ diag_stat_inc(DIAG_STAT_X500);
+ return __kvm_hypercall0(nr);
+}
+
+static inline long __kvm_hypercall1(unsigned long nr, unsigned long p1)
+{
+ register unsigned long __nr asm("1") = nr;
+ register unsigned long __p1 asm("2") = p1;
+ register long __rc asm("2");
+
+ asm volatile ("diag 2,4,0x500\n"
+ : "=d" (__rc) : "d" (__nr), "0" (__p1) : "memory", "cc");
+ return __rc;
+}
+
+static inline long kvm_hypercall1(unsigned long nr, unsigned long p1)
+{
+ diag_stat_inc(DIAG_STAT_X500);
+ return __kvm_hypercall1(nr, p1);
+}
+
+static inline long __kvm_hypercall2(unsigned long nr, unsigned long p1,
+ unsigned long p2)
+{
+ register unsigned long __nr asm("1") = nr;
+ register unsigned long __p1 asm("2") = p1;
+ register unsigned long __p2 asm("3") = p2;
+ register long __rc asm("2");
+
+ asm volatile ("diag 2,4,0x500\n"
+ : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2)
+ : "memory", "cc");
+ return __rc;
+}
+
+static inline long kvm_hypercall2(unsigned long nr, unsigned long p1,
+ unsigned long p2)
+{
+ diag_stat_inc(DIAG_STAT_X500);
+ return __kvm_hypercall2(nr, p1, p2);
+}
+
+static inline long __kvm_hypercall3(unsigned long nr, unsigned long p1,
+ unsigned long p2, unsigned long p3)
+{
+ register unsigned long __nr asm("1") = nr;
+ register unsigned long __p1 asm("2") = p1;
+ register unsigned long __p2 asm("3") = p2;
+ register unsigned long __p3 asm("4") = p3;
+ register long __rc asm("2");
+
+ asm volatile ("diag 2,4,0x500\n"
+ : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2),
+ "d" (__p3) : "memory", "cc");
+ return __rc;
+}
+
+static inline long kvm_hypercall3(unsigned long nr, unsigned long p1,
+ unsigned long p2, unsigned long p3)
+{
+ diag_stat_inc(DIAG_STAT_X500);
+ return __kvm_hypercall3(nr, p1, p2, p3);
+}
+
+static inline long __kvm_hypercall4(unsigned long nr, unsigned long p1,
+ unsigned long p2, unsigned long p3,
+ unsigned long p4)
+{
+ register unsigned long __nr asm("1") = nr;
+ register unsigned long __p1 asm("2") = p1;
+ register unsigned long __p2 asm("3") = p2;
+ register unsigned long __p3 asm("4") = p3;
+ register unsigned long __p4 asm("5") = p4;
+ register long __rc asm("2");
+
+ asm volatile ("diag 2,4,0x500\n"
+ : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2),
+ "d" (__p3), "d" (__p4) : "memory", "cc");
+ return __rc;
+}
+
+static inline long kvm_hypercall4(unsigned long nr, unsigned long p1,
+ unsigned long p2, unsigned long p3,
+ unsigned long p4)
+{
+ diag_stat_inc(DIAG_STAT_X500);
+ return __kvm_hypercall4(nr, p1, p2, p3, p4);
+}
+
+static inline long __kvm_hypercall5(unsigned long nr, unsigned long p1,
+ unsigned long p2, unsigned long p3,
+ unsigned long p4, unsigned long p5)
+{
+ register unsigned long __nr asm("1") = nr;
+ register unsigned long __p1 asm("2") = p1;
+ register unsigned long __p2 asm("3") = p2;
+ register unsigned long __p3 asm("4") = p3;
+ register unsigned long __p4 asm("5") = p4;
+ register unsigned long __p5 asm("6") = p5;
+ register long __rc asm("2");
+
+ asm volatile ("diag 2,4,0x500\n"
+ : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2),
+ "d" (__p3), "d" (__p4), "d" (__p5) : "memory", "cc");
+ return __rc;
+}
+
+static inline long kvm_hypercall5(unsigned long nr, unsigned long p1,
+ unsigned long p2, unsigned long p3,
+ unsigned long p4, unsigned long p5)
+{
+ diag_stat_inc(DIAG_STAT_X500);
+ return __kvm_hypercall5(nr, p1, p2, p3, p4, p5);
+}
+
+static inline long __kvm_hypercall6(unsigned long nr, unsigned long p1,
+ unsigned long p2, unsigned long p3,
+ unsigned long p4, unsigned long p5,
+ unsigned long p6)
+{
+ register unsigned long __nr asm("1") = nr;
+ register unsigned long __p1 asm("2") = p1;
+ register unsigned long __p2 asm("3") = p2;
+ register unsigned long __p3 asm("4") = p3;
+ register unsigned long __p4 asm("5") = p4;
+ register unsigned long __p5 asm("6") = p5;
+ register unsigned long __p6 asm("7") = p6;
+ register long __rc asm("2");
+
+ asm volatile ("diag 2,4,0x500\n"
+ : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2),
+ "d" (__p3), "d" (__p4), "d" (__p5), "d" (__p6)
+ : "memory", "cc");
+ return __rc;
+}
+
+static inline long kvm_hypercall6(unsigned long nr, unsigned long p1,
+ unsigned long p2, unsigned long p3,
+ unsigned long p4, unsigned long p5,
+ unsigned long p6)
+{
+ diag_stat_inc(DIAG_STAT_X500);
+ return __kvm_hypercall6(nr, p1, p2, p3, p4, p5, p6);
+}
+
+/* kvm on s390 is always paravirtualization enabled */
+static inline int kvm_para_available(void)
+{
+ return 1;
+}
+
+/* No feature bits are currently assigned for kvm on s390 */
+static inline unsigned int kvm_arch_para_features(void)
+{
+ return 0;
+}
+
+static inline unsigned int kvm_arch_para_hints(void)
+{
+ return 0;
+}
+
+static inline bool kvm_check_and_clear_guest_paused(void)
+{
+ return false;
+}
+
+#endif /* __S390_KVM_PARA_H */
diff --git a/arch/s390/include/asm/linkage.h b/arch/s390/include/asm/linkage.h
new file mode 100644
index 000000000..1b95da3fd
--- /dev/null
+++ b/arch/s390/include/asm/linkage.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_LINKAGE_H
+#define __ASM_LINKAGE_H
+
+#include <linux/stringify.h>
+
+#define __ALIGN .align 4, 0x07
+#define __ALIGN_STR __stringify(__ALIGN)
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Helper macro for exception table entries
+ */
+#define EX_TABLE(_fault, _target) \
+ ".section __ex_table,\"a\"\n" \
+ ".align 4\n" \
+ ".long (" #_fault ") - .\n" \
+ ".long (" #_target ") - .\n" \
+ ".previous\n"
+
+#else /* __ASSEMBLY__ */
+
+#define EX_TABLE(_fault, _target) \
+ .section __ex_table,"a" ; \
+ .align 4 ; \
+ .long (_fault) - . ; \
+ .long (_target) - . ; \
+ .previous
+
+#endif /* __ASSEMBLY__ */
+#endif
diff --git a/arch/s390/include/asm/livepatch.h b/arch/s390/include/asm/livepatch.h
new file mode 100644
index 000000000..672f95b12
--- /dev/null
+++ b/arch/s390/include/asm/livepatch.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * livepatch.h - s390-specific Kernel Live Patching Core
+ *
+ * Copyright (c) 2013-2015 SUSE
+ * Authors: Jiri Kosina
+ * Vojtech Pavlik
+ * Jiri Slaby
+ */
+
+#ifndef ASM_LIVEPATCH_H
+#define ASM_LIVEPATCH_H
+
+#include <asm/ptrace.h>
+
+static inline int klp_check_compiler_support(void)
+{
+ return 0;
+}
+
+static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip)
+{
+ regs->psw.addr = ip;
+}
+
+#endif
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
new file mode 100644
index 000000000..406d94017
--- /dev/null
+++ b/arch/s390/include/asm/lowcore.h
@@ -0,0 +1,207 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 1999, 2012
+ * Author(s): Hartmut Penner <hp@de.ibm.com>,
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ * Denis Joseph Barrow,
+ */
+
+#ifndef _ASM_S390_LOWCORE_H
+#define _ASM_S390_LOWCORE_H
+
+#include <linux/types.h>
+#include <asm/ptrace.h>
+#include <asm/cpu.h>
+#include <asm/types.h>
+
+#define LC_ORDER 1
+#define LC_PAGES 2
+
+struct lowcore {
+ __u8 pad_0x0000[0x0014-0x0000]; /* 0x0000 */
+ __u32 ipl_parmblock_ptr; /* 0x0014 */
+ __u8 pad_0x0018[0x0080-0x0018]; /* 0x0018 */
+ __u32 ext_params; /* 0x0080 */
+ __u16 ext_cpu_addr; /* 0x0084 */
+ __u16 ext_int_code; /* 0x0086 */
+ __u16 svc_ilc; /* 0x0088 */
+ __u16 svc_code; /* 0x008a */
+ __u16 pgm_ilc; /* 0x008c */
+ __u16 pgm_code; /* 0x008e */
+ __u32 data_exc_code; /* 0x0090 */
+ __u16 mon_class_num; /* 0x0094 */
+ __u8 per_code; /* 0x0096 */
+ __u8 per_atmid; /* 0x0097 */
+ __u64 per_address; /* 0x0098 */
+ __u8 exc_access_id; /* 0x00a0 */
+ __u8 per_access_id; /* 0x00a1 */
+ __u8 op_access_id; /* 0x00a2 */
+ __u8 ar_mode_id; /* 0x00a3 */
+ __u8 pad_0x00a4[0x00a8-0x00a4]; /* 0x00a4 */
+ __u64 trans_exc_code; /* 0x00a8 */
+ __u64 monitor_code; /* 0x00b0 */
+ __u16 subchannel_id; /* 0x00b8 */
+ __u16 subchannel_nr; /* 0x00ba */
+ __u32 io_int_parm; /* 0x00bc */
+ __u32 io_int_word; /* 0x00c0 */
+ __u8 pad_0x00c4[0x00c8-0x00c4]; /* 0x00c4 */
+ __u32 stfl_fac_list; /* 0x00c8 */
+ __u8 pad_0x00cc[0x00e8-0x00cc]; /* 0x00cc */
+ __u64 mcck_interruption_code; /* 0x00e8 */
+ __u8 pad_0x00f0[0x00f4-0x00f0]; /* 0x00f0 */
+ __u32 external_damage_code; /* 0x00f4 */
+ __u64 failing_storage_address; /* 0x00f8 */
+ __u8 pad_0x0100[0x0110-0x0100]; /* 0x0100 */
+ __u64 breaking_event_addr; /* 0x0110 */
+ __u8 pad_0x0118[0x0120-0x0118]; /* 0x0118 */
+ psw_t restart_old_psw; /* 0x0120 */
+ psw_t external_old_psw; /* 0x0130 */
+ psw_t svc_old_psw; /* 0x0140 */
+ psw_t program_old_psw; /* 0x0150 */
+ psw_t mcck_old_psw; /* 0x0160 */
+ psw_t io_old_psw; /* 0x0170 */
+ __u8 pad_0x0180[0x01a0-0x0180]; /* 0x0180 */
+ psw_t restart_psw; /* 0x01a0 */
+ psw_t external_new_psw; /* 0x01b0 */
+ psw_t svc_new_psw; /* 0x01c0 */
+ psw_t program_new_psw; /* 0x01d0 */
+ psw_t mcck_new_psw; /* 0x01e0 */
+ psw_t io_new_psw; /* 0x01f0 */
+
+ /* Save areas. */
+ __u64 save_area_sync[8]; /* 0x0200 */
+ __u64 save_area_async[8]; /* 0x0240 */
+ __u64 save_area_restart[1]; /* 0x0280 */
+
+ /* CPU flags. */
+ __u64 cpu_flags; /* 0x0288 */
+
+ /* Return psws. */
+ psw_t return_psw; /* 0x0290 */
+ psw_t return_mcck_psw; /* 0x02a0 */
+
+ /* CPU accounting and timing values. */
+ __u64 sync_enter_timer; /* 0x02b0 */
+ __u64 async_enter_timer; /* 0x02b8 */
+ __u64 mcck_enter_timer; /* 0x02c0 */
+ __u64 exit_timer; /* 0x02c8 */
+ __u64 user_timer; /* 0x02d0 */
+ __u64 guest_timer; /* 0x02d8 */
+ __u64 system_timer; /* 0x02e0 */
+ __u64 hardirq_timer; /* 0x02e8 */
+ __u64 softirq_timer; /* 0x02f0 */
+ __u64 steal_timer; /* 0x02f8 */
+ __u64 last_update_timer; /* 0x0300 */
+ __u64 last_update_clock; /* 0x0308 */
+ __u64 int_clock; /* 0x0310 */
+ __u64 mcck_clock; /* 0x0318 */
+ __u64 clock_comparator; /* 0x0320 */
+ __u64 boot_clock[2]; /* 0x0328 */
+
+ /* Current process. */
+ __u64 current_task; /* 0x0338 */
+ __u64 kernel_stack; /* 0x0340 */
+
+ /* Interrupt, panic and restart stack. */
+ __u64 async_stack; /* 0x0348 */
+ __u64 panic_stack; /* 0x0350 */
+ __u64 restart_stack; /* 0x0358 */
+
+ /* Restart function and parameter. */
+ __u64 restart_fn; /* 0x0360 */
+ __u64 restart_data; /* 0x0368 */
+ __u64 restart_source; /* 0x0370 */
+
+ /* Address space pointer. */
+ __u64 kernel_asce; /* 0x0378 */
+ __u64 user_asce; /* 0x0380 */
+ __u64 vdso_asce; /* 0x0388 */
+
+ /*
+ * The lpp and current_pid fields form a
+ * 64-bit value that is set as program
+ * parameter with the LPP instruction.
+ */
+ __u32 lpp; /* 0x0390 */
+ __u32 current_pid; /* 0x0394 */
+
+ /* SMP info area */
+ __u32 cpu_nr; /* 0x0398 */
+ __u32 softirq_pending; /* 0x039c */
+ __u32 preempt_count; /* 0x03a0 */
+ __u32 spinlock_lockval; /* 0x03a4 */
+ __u32 spinlock_index; /* 0x03a8 */
+ __u32 fpu_flags; /* 0x03ac */
+ __u64 percpu_offset; /* 0x03b0 */
+ __u64 vdso_per_cpu_data; /* 0x03b8 */
+ __u64 machine_flags; /* 0x03c0 */
+ __u64 gmap; /* 0x03c8 */
+ __u8 pad_0x03d0[0x0400-0x03d0]; /* 0x03d0 */
+
+ /* br %r1 trampoline */
+ __u16 br_r1_trampoline; /* 0x0400 */
+ __u8 pad_0x0402[0x0e00-0x0402]; /* 0x0402 */
+
+ /*
+ * 0xe00 contains the address of the IPL Parameter Information
+ * block. Dump tools need IPIB for IPL after dump.
+ * Note: do not change the position of any fields in 0x0e00-0x0f00
+ */
+ __u64 ipib; /* 0x0e00 */
+ __u32 ipib_checksum; /* 0x0e08 */
+ __u64 vmcore_info; /* 0x0e0c */
+ __u8 pad_0x0e14[0x0e18-0x0e14]; /* 0x0e14 */
+ __u64 os_info; /* 0x0e18 */
+ __u8 pad_0x0e20[0x0f00-0x0e20]; /* 0x0e20 */
+
+ /* Extended facility list */
+ __u64 stfle_fac_list[16]; /* 0x0f00 */
+ __u64 alt_stfle_fac_list[16]; /* 0x0f80 */
+ __u8 pad_0x1000[0x11b0-0x1000]; /* 0x1000 */
+
+ /* Pointer to the machine check extended save area */
+ __u64 mcesad; /* 0x11b0 */
+
+ /* 64 bit extparam used for pfault/diag 250: defined by architecture */
+ __u64 ext_params2; /* 0x11B8 */
+ __u8 pad_0x11c0[0x1200-0x11C0]; /* 0x11C0 */
+
+ /* CPU register save area: defined by architecture */
+ __u64 floating_pt_save_area[16]; /* 0x1200 */
+ __u64 gpregs_save_area[16]; /* 0x1280 */
+ psw_t psw_save_area; /* 0x1300 */
+ __u8 pad_0x1310[0x1318-0x1310]; /* 0x1310 */
+ __u32 prefixreg_save_area; /* 0x1318 */
+ __u32 fpt_creg_save_area; /* 0x131c */
+ __u8 pad_0x1320[0x1324-0x1320]; /* 0x1320 */
+ __u32 tod_progreg_save_area; /* 0x1324 */
+ __u32 cpu_timer_save_area[2]; /* 0x1328 */
+ __u32 clock_comp_save_area[2]; /* 0x1330 */
+ __u8 pad_0x1338[0x1340-0x1338]; /* 0x1338 */
+ __u32 access_regs_save_area[16]; /* 0x1340 */
+ __u64 cregs_save_area[16]; /* 0x1380 */
+ __u8 pad_0x1400[0x1800-0x1400]; /* 0x1400 */
+
+ /* Transaction abort diagnostic block */
+ __u8 pgm_tdb[256]; /* 0x1800 */
+ __u8 pad_0x1900[0x2000-0x1900]; /* 0x1900 */
+} __packed __aligned(8192);
+
+#define S390_lowcore (*((struct lowcore *) 0))
+
+extern struct lowcore *lowcore_ptr[];
+
+static inline void set_prefix(__u32 address)
+{
+ asm volatile("spx %0" : : "Q" (address) : "memory");
+}
+
+static inline __u32 store_prefix(void)
+{
+ __u32 address;
+
+ asm volatile("stpx %0" : "=Q" (address));
+ return address;
+}
+
+#endif /* _ASM_S390_LOWCORE_H */
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
new file mode 100644
index 000000000..bcfb63710
--- /dev/null
+++ b/arch/s390/include/asm/mmu.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __MMU_H
+#define __MMU_H
+
+#include <linux/cpumask.h>
+#include <linux/errno.h>
+
+typedef struct {
+ spinlock_t lock;
+ cpumask_t cpu_attach_mask;
+ atomic_t flush_count;
+ unsigned int flush_mm;
+ struct list_head pgtable_list;
+ struct list_head gmap_list;
+ unsigned long gmap_asce;
+ unsigned long asce;
+ unsigned long asce_limit;
+ unsigned long vdso_base;
+ /*
+ * The following bitfields need a down_write on the mm
+ * semaphore when they are written to. As they are only
+ * written once, they can be read without a lock.
+ *
+ * The mmu context allocates 4K page tables.
+ */
+ unsigned int alloc_pgste:1;
+ /* The mmu context uses extended page tables. */
+ unsigned int has_pgste:1;
+ /* The mmu context uses storage keys. */
+ unsigned int uses_skeys:1;
+ /* The mmu context uses CMM. */
+ unsigned int uses_cmm:1;
+ /* The gmaps associated with this context are allowed to use huge pages. */
+ unsigned int allow_gmap_hpage_1m:1;
+ /* The mmu context is for compat task */
+ unsigned int compat_mm:1;
+} mm_context_t;
+
+#define INIT_MM_CONTEXT(name) \
+ .context.lock = __SPIN_LOCK_UNLOCKED(name.context.lock), \
+ .context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), \
+ .context.gmap_list = LIST_HEAD_INIT(name.context.gmap_list),
+
+static inline int tprot(unsigned long addr)
+{
+ int rc = -EFAULT;
+
+ asm volatile(
+ " tprot 0(%1),0\n"
+ "0: ipm %0\n"
+ " srl %0,28\n"
+ "1:\n"
+ EX_TABLE(0b,1b)
+ : "+d" (rc) : "a" (addr) : "cc");
+ return rc;
+}
+
+#endif
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
new file mode 100644
index 000000000..8d04e6f3f
--- /dev/null
+++ b/arch/s390/include/asm/mmu_context.h
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * S390 version
+ *
+ * Derived from "include/asm-i386/mmu_context.h"
+ */
+
+#ifndef __S390_MMU_CONTEXT_H
+#define __S390_MMU_CONTEXT_H
+
+#include <asm/pgalloc.h>
+#include <linux/uaccess.h>
+#include <linux/mm_types.h>
+#include <asm/tlbflush.h>
+#include <asm/ctl_reg.h>
+#include <asm-generic/mm_hooks.h>
+
+static inline int init_new_context(struct task_struct *tsk,
+ struct mm_struct *mm)
+{
+ spin_lock_init(&mm->context.lock);
+ INIT_LIST_HEAD(&mm->context.pgtable_list);
+ INIT_LIST_HEAD(&mm->context.gmap_list);
+ cpumask_clear(&mm->context.cpu_attach_mask);
+ atomic_set(&mm->context.flush_count, 0);
+ mm->context.gmap_asce = 0;
+ mm->context.flush_mm = 0;
+ mm->context.compat_mm = test_thread_flag(TIF_31BIT);
+#ifdef CONFIG_PGSTE
+ mm->context.alloc_pgste = page_table_allocate_pgste ||
+ test_thread_flag(TIF_PGSTE) ||
+ (current->mm && current->mm->context.alloc_pgste);
+ mm->context.has_pgste = 0;
+ mm->context.uses_skeys = 0;
+ mm->context.uses_cmm = 0;
+ mm->context.allow_gmap_hpage_1m = 0;
+#endif
+ switch (mm->context.asce_limit) {
+ case _REGION2_SIZE:
+ /*
+ * forked 3-level task, fall through to set new asce with new
+ * mm->pgd
+ */
+ case 0:
+ /* context created by exec, set asce limit to 4TB */
+ mm->context.asce_limit = STACK_TOP_MAX;
+ mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS | _ASCE_TYPE_REGION3;
+ break;
+ case -PAGE_SIZE:
+ /* forked 5-level task, set new asce with new_mm->pgd */
+ mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS | _ASCE_TYPE_REGION1;
+ break;
+ case _REGION1_SIZE:
+ /* forked 4-level task, set new asce with new mm->pgd */
+ mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS | _ASCE_TYPE_REGION2;
+ break;
+ case _REGION3_SIZE:
+ /* forked 2-level compat task, set new asce with new mm->pgd */
+ mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
+ }
+ crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
+ return 0;
+}
+
+#define destroy_context(mm) do { } while (0)
+
+static inline void set_user_asce(struct mm_struct *mm)
+{
+ S390_lowcore.user_asce = mm->context.asce;
+ __ctl_load(S390_lowcore.user_asce, 1, 1);
+ clear_cpu_flag(CIF_ASCE_PRIMARY);
+}
+
+static inline void clear_user_asce(void)
+{
+ S390_lowcore.user_asce = S390_lowcore.kernel_asce;
+ __ctl_load(S390_lowcore.kernel_asce, 1, 1);
+ set_cpu_flag(CIF_ASCE_PRIMARY);
+}
+
+mm_segment_t enable_sacf_uaccess(void);
+void disable_sacf_uaccess(mm_segment_t old_fs);
+
+static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+ struct task_struct *tsk)
+{
+ int cpu = smp_processor_id();
+
+ S390_lowcore.user_asce = next->context.asce;
+ cpumask_set_cpu(cpu, &next->context.cpu_attach_mask);
+ /* Clear previous user-ASCE from CR1 and CR7 */
+ if (!test_cpu_flag(CIF_ASCE_PRIMARY)) {
+ __ctl_load(S390_lowcore.kernel_asce, 1, 1);
+ set_cpu_flag(CIF_ASCE_PRIMARY);
+ }
+ if (test_cpu_flag(CIF_ASCE_SECONDARY)) {
+ __ctl_load(S390_lowcore.vdso_asce, 7, 7);
+ clear_cpu_flag(CIF_ASCE_SECONDARY);
+ }
+ if (prev != next)
+ cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask);
+}
+
+#define finish_arch_post_lock_switch finish_arch_post_lock_switch
+static inline void finish_arch_post_lock_switch(void)
+{
+ struct task_struct *tsk = current;
+ struct mm_struct *mm = tsk->mm;
+
+ if (mm) {
+ preempt_disable();
+ while (atomic_read(&mm->context.flush_count))
+ cpu_relax();
+ cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
+ __tlb_flush_mm_lazy(mm);
+ preempt_enable();
+ }
+ set_fs(current->thread.mm_segment);
+}
+
+#define enter_lazy_tlb(mm,tsk) do { } while (0)
+#define deactivate_mm(tsk,mm) do { } while (0)
+
+static inline void activate_mm(struct mm_struct *prev,
+ struct mm_struct *next)
+{
+ switch_mm(prev, next, current);
+ cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
+ set_user_asce(next);
+}
+
+#endif /* __S390_MMU_CONTEXT_H */
diff --git a/arch/s390/include/asm/mmzone.h b/arch/s390/include/asm/mmzone.h
new file mode 100644
index 000000000..73e3e7c69
--- /dev/null
+++ b/arch/s390/include/asm/mmzone.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * NUMA support for s390
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#ifndef _ASM_S390_MMZONE_H
+#define _ASM_S390_MMZONE_H
+
+#ifdef CONFIG_NUMA
+
+extern struct pglist_data *node_data[];
+#define NODE_DATA(nid) (node_data[nid])
+
+#endif /* CONFIG_NUMA */
+#endif /* _ASM_S390_MMZONE_H */
diff --git a/arch/s390/include/asm/module.h b/arch/s390/include/asm/module.h
new file mode 100644
index 000000000..e0a6d2984
--- /dev/null
+++ b/arch/s390/include/asm/module.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_MODULE_H
+#define _ASM_S390_MODULE_H
+
+#include <asm-generic/module.h>
+
+/*
+ * This file contains the s390 architecture specific module code.
+ */
+
+struct mod_arch_syminfo
+{
+ unsigned long got_offset;
+ unsigned long plt_offset;
+ int got_initialized;
+ int plt_initialized;
+};
+
+struct mod_arch_specific
+{
+ /* Starting offset of got in the module core memory. */
+ unsigned long got_offset;
+ /* Starting offset of plt in the module core memory. */
+ unsigned long plt_offset;
+ /* Size of the got. */
+ unsigned long got_size;
+ /* Size of the plt. */
+ unsigned long plt_size;
+ /* Number of symbols in syminfo. */
+ int nsyms;
+ /* Additional symbol information (got and plt offsets). */
+ struct mod_arch_syminfo *syminfo;
+};
+
+#endif /* _ASM_S390_MODULE_H */
diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h
new file mode 100644
index 000000000..1e5dc4537
--- /dev/null
+++ b/arch/s390/include/asm/nmi.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Machine check handler definitions
+ *
+ * Copyright IBM Corp. 2000, 2009
+ * Author(s): Ingo Adlung <adlung@de.ibm.com>,
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ * Cornelia Huck <cornelia.huck@de.ibm.com>,
+ * Heiko Carstens <heiko.carstens@de.ibm.com>,
+ */
+
+#ifndef _ASM_S390_NMI_H
+#define _ASM_S390_NMI_H
+
+#include <linux/const.h>
+#include <linux/types.h>
+
+#define MCIC_SUBCLASS_MASK (1ULL<<63 | 1ULL<<62 | 1ULL<<61 | \
+ 1ULL<<59 | 1ULL<<58 | 1ULL<<56 | \
+ 1ULL<<55 | 1ULL<<54 | 1ULL<<53 | \
+ 1ULL<<52 | 1ULL<<47 | 1ULL<<46 | \
+ 1ULL<<45 | 1ULL<<44)
+#define MCCK_CODE_SYSTEM_DAMAGE _BITUL(63)
+#define MCCK_CODE_EXT_DAMAGE _BITUL(63 - 5)
+#define MCCK_CODE_CP _BITUL(63 - 9)
+#define MCCK_CODE_CPU_TIMER_VALID _BITUL(63 - 46)
+#define MCCK_CODE_PSW_MWP_VALID _BITUL(63 - 20)
+#define MCCK_CODE_PSW_IA_VALID _BITUL(63 - 23)
+#define MCCK_CODE_CR_VALID _BITUL(63 - 29)
+#define MCCK_CODE_GS_VALID _BITUL(63 - 36)
+#define MCCK_CODE_FC_VALID _BITUL(63 - 43)
+
+#ifndef __ASSEMBLY__
+
+union mci {
+ unsigned long val;
+ struct {
+ u64 sd : 1; /* 00 system damage */
+ u64 pd : 1; /* 01 instruction-processing damage */
+ u64 sr : 1; /* 02 system recovery */
+ u64 : 1; /* 03 */
+ u64 cd : 1; /* 04 timing-facility damage */
+ u64 ed : 1; /* 05 external damage */
+ u64 : 1; /* 06 */
+ u64 dg : 1; /* 07 degradation */
+ u64 w : 1; /* 08 warning pending */
+ u64 cp : 1; /* 09 channel-report pending */
+ u64 sp : 1; /* 10 service-processor damage */
+ u64 ck : 1; /* 11 channel-subsystem damage */
+ u64 : 2; /* 12-13 */
+ u64 b : 1; /* 14 backed up */
+ u64 : 1; /* 15 */
+ u64 se : 1; /* 16 storage error uncorrected */
+ u64 sc : 1; /* 17 storage error corrected */
+ u64 ke : 1; /* 18 storage-key error uncorrected */
+ u64 ds : 1; /* 19 storage degradation */
+ u64 wp : 1; /* 20 psw mwp validity */
+ u64 ms : 1; /* 21 psw mask and key validity */
+ u64 pm : 1; /* 22 psw program mask and cc validity */
+ u64 ia : 1; /* 23 psw instruction address validity */
+ u64 fa : 1; /* 24 failing storage address validity */
+ u64 vr : 1; /* 25 vector register validity */
+ u64 ec : 1; /* 26 external damage code validity */
+ u64 fp : 1; /* 27 floating point register validity */
+ u64 gr : 1; /* 28 general register validity */
+ u64 cr : 1; /* 29 control register validity */
+ u64 : 1; /* 30 */
+ u64 st : 1; /* 31 storage logical validity */
+ u64 ie : 1; /* 32 indirect storage error */
+ u64 ar : 1; /* 33 access register validity */
+ u64 da : 1; /* 34 delayed access exception */
+ u64 : 1; /* 35 */
+ u64 gs : 1; /* 36 guarded storage registers validity */
+ u64 : 5; /* 37-41 */
+ u64 pr : 1; /* 42 tod programmable register validity */
+ u64 fc : 1; /* 43 fp control register validity */
+ u64 ap : 1; /* 44 ancillary report */
+ u64 : 1; /* 45 */
+ u64 ct : 1; /* 46 cpu timer validity */
+ u64 cc : 1; /* 47 clock comparator validity */
+ u64 : 16; /* 47-63 */
+ };
+};
+
+#define MCESA_ORIGIN_MASK (~0x3ffUL)
+#define MCESA_LC_MASK (0xfUL)
+#define MCESA_MIN_SIZE (1024)
+#define MCESA_MAX_SIZE (2048)
+
+struct mcesa {
+ u8 vector_save_area[1024];
+ u8 guarded_storage_save_area[32];
+};
+
+struct pt_regs;
+
+void nmi_alloc_boot_cpu(struct lowcore *lc);
+int nmi_alloc_per_cpu(struct lowcore *lc);
+void nmi_free_per_cpu(struct lowcore *lc);
+
+void s390_handle_mcck(void);
+void s390_do_machine_check(struct pt_regs *regs);
+
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_S390_NMI_H */
diff --git a/arch/s390/include/asm/nospec-branch.h b/arch/s390/include/asm/nospec-branch.h
new file mode 100644
index 000000000..b4bd8c41e
--- /dev/null
+++ b/arch/s390/include/asm/nospec-branch.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_EXPOLINE_H
+#define _ASM_S390_EXPOLINE_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+
+extern int nospec_disable;
+
+void nospec_init_branches(void);
+void nospec_auto_detect(void);
+void nospec_revert(s32 *start, s32 *end);
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_S390_EXPOLINE_H */
diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h
new file mode 100644
index 000000000..123dac371
--- /dev/null
+++ b/arch/s390/include/asm/nospec-insn.h
@@ -0,0 +1,196 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_NOSPEC_ASM_H
+#define _ASM_S390_NOSPEC_ASM_H
+
+#include <asm/alternative-asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/dwarf.h>
+
+#ifdef __ASSEMBLY__
+
+#ifdef CC_USING_EXPOLINE
+
+_LC_BR_R1 = __LC_BR_R1
+
+/*
+ * The expoline macros are used to create thunks in the same format
+ * as gcc generates them. The 'comdat' section flag makes sure that
+ * the various thunks are merged into a single copy.
+ */
+ .macro __THUNK_PROLOG_NAME name
+ .pushsection .text.\name,"axG",@progbits,\name,comdat
+ .globl \name
+ .hidden \name
+ .type \name,@function
+\name:
+ CFI_STARTPROC
+ .endm
+
+ .macro __THUNK_EPILOG
+ CFI_ENDPROC
+ .popsection
+ .endm
+
+ .macro __THUNK_PROLOG_BR r1,r2
+ __THUNK_PROLOG_NAME __s390x_indirect_jump_r\r2\()use_r\r1
+ .endm
+
+ .macro __THUNK_PROLOG_BC d0,r1,r2
+ __THUNK_PROLOG_NAME __s390x_indirect_branch_\d0\()_\r2\()use_\r1
+ .endm
+
+ .macro __THUNK_BR r1,r2
+ jg __s390x_indirect_jump_r\r2\()use_r\r1
+ .endm
+
+ .macro __THUNK_BC d0,r1,r2
+ jg __s390x_indirect_branch_\d0\()_\r2\()use_\r1
+ .endm
+
+ .macro __THUNK_BRASL r1,r2,r3
+ brasl \r1,__s390x_indirect_jump_r\r3\()use_r\r2
+ .endm
+
+ .macro __DECODE_RR expand,reg,ruse
+ .set __decode_fail,1
+ .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+ .ifc \reg,%r\r1
+ .irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+ .ifc \ruse,%r\r2
+ \expand \r1,\r2
+ .set __decode_fail,0
+ .endif
+ .endr
+ .endif
+ .endr
+ .if __decode_fail == 1
+ .error "__DECODE_RR failed"
+ .endif
+ .endm
+
+ .macro __DECODE_RRR expand,rsave,rtarget,ruse
+ .set __decode_fail,1
+ .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+ .ifc \rsave,%r\r1
+ .irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+ .ifc \rtarget,%r\r2
+ .irp r3,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+ .ifc \ruse,%r\r3
+ \expand \r1,\r2,\r3
+ .set __decode_fail,0
+ .endif
+ .endr
+ .endif
+ .endr
+ .endif
+ .endr
+ .if __decode_fail == 1
+ .error "__DECODE_RRR failed"
+ .endif
+ .endm
+
+ .macro __DECODE_DRR expand,disp,reg,ruse
+ .set __decode_fail,1
+ .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+ .ifc \reg,%r\r1
+ .irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+ .ifc \ruse,%r\r2
+ \expand \disp,\r1,\r2
+ .set __decode_fail,0
+ .endif
+ .endr
+ .endif
+ .endr
+ .if __decode_fail == 1
+ .error "__DECODE_DRR failed"
+ .endif
+ .endm
+
+ .macro __THUNK_EX_BR reg,ruse
+ # Be very careful when adding instructions to this macro!
+ # The ALTERNATIVE replacement code has a .+10 which targets
+ # the "br \reg" after the code has been patched.
+#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
+ exrl 0,555f
+ j .
+#else
+ .ifc \reg,%r1
+ ALTERNATIVE "ex %r0,_LC_BR_R1", ".insn ril,0xc60000000000,0,.+10", 35
+ j .
+ .else
+ larl \ruse,555f
+ ex 0,0(\ruse)
+ j .
+ .endif
+#endif
+555: br \reg
+ .endm
+
+ .macro __THUNK_EX_BC disp,reg,ruse
+#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
+ exrl 0,556f
+ j .
+#else
+ larl \ruse,556f
+ ex 0,0(\ruse)
+ j .
+#endif
+556: b \disp(\reg)
+ .endm
+
+ .macro GEN_BR_THUNK reg,ruse=%r1
+ __DECODE_RR __THUNK_PROLOG_BR,\reg,\ruse
+ __THUNK_EX_BR \reg,\ruse
+ __THUNK_EPILOG
+ .endm
+
+ .macro GEN_B_THUNK disp,reg,ruse=%r1
+ __DECODE_DRR __THUNK_PROLOG_BC,\disp,\reg,\ruse
+ __THUNK_EX_BC \disp,\reg,\ruse
+ __THUNK_EPILOG
+ .endm
+
+ .macro BR_EX reg,ruse=%r1
+557: __DECODE_RR __THUNK_BR,\reg,\ruse
+ .pushsection .s390_indirect_branches,"a",@progbits
+ .long 557b-.
+ .popsection
+ .endm
+
+ .macro B_EX disp,reg,ruse=%r1
+558: __DECODE_DRR __THUNK_BC,\disp,\reg,\ruse
+ .pushsection .s390_indirect_branches,"a",@progbits
+ .long 558b-.
+ .popsection
+ .endm
+
+ .macro BASR_EX rsave,rtarget,ruse=%r1
+559: __DECODE_RRR __THUNK_BRASL,\rsave,\rtarget,\ruse
+ .pushsection .s390_indirect_branches,"a",@progbits
+ .long 559b-.
+ .popsection
+ .endm
+
+#else
+ .macro GEN_BR_THUNK reg,ruse=%r1
+ .endm
+
+ .macro GEN_B_THUNK disp,reg,ruse=%r1
+ .endm
+
+ .macro BR_EX reg,ruse=%r1
+ br \reg
+ .endm
+
+ .macro B_EX disp,reg,ruse=%r1
+ b \disp(\reg)
+ .endm
+
+ .macro BASR_EX rsave,rtarget,ruse=%r1
+ basr \rsave,\rtarget
+ .endm
+#endif /* CC_USING_EXPOLINE */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_S390_NOSPEC_ASM_H */
diff --git a/arch/s390/include/asm/numa.h b/arch/s390/include/asm/numa.h
new file mode 100644
index 000000000..35f8cbe7e
--- /dev/null
+++ b/arch/s390/include/asm/numa.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * NUMA support for s390
+ *
+ * Declare the NUMA core code structures and functions.
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#ifndef _ASM_S390_NUMA_H
+#define _ASM_S390_NUMA_H
+
+#ifdef CONFIG_NUMA
+
+#include <linux/numa.h>
+#include <linux/cpumask.h>
+
+void numa_setup(void);
+int numa_pfn_to_nid(unsigned long pfn);
+int __node_distance(int a, int b);
+void numa_update_cpu_topology(void);
+
+extern cpumask_t node_to_cpumask_map[MAX_NUMNODES];
+extern int numa_debug_enabled;
+
+#else
+
+static inline void numa_setup(void) { }
+static inline void numa_update_cpu_topology(void) { }
+static inline int numa_pfn_to_nid(unsigned long pfn)
+{
+ return 0;
+}
+
+#endif /* CONFIG_NUMA */
+#endif /* _ASM_S390_NUMA_H */
diff --git a/arch/s390/include/asm/os_info.h b/arch/s390/include/asm/os_info.h
new file mode 100644
index 000000000..3c89279d2
--- /dev/null
+++ b/arch/s390/include/asm/os_info.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * OS info memory interface
+ *
+ * Copyright IBM Corp. 2012
+ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+#ifndef _ASM_S390_OS_INFO_H
+#define _ASM_S390_OS_INFO_H
+
+#define OS_INFO_VERSION_MAJOR 1
+#define OS_INFO_VERSION_MINOR 1
+#define OS_INFO_MAGIC 0x4f53494e464f535aULL /* OSINFOSZ */
+
+#define OS_INFO_VMCOREINFO 0
+#define OS_INFO_REIPL_BLOCK 1
+
+struct os_info_entry {
+ u64 addr;
+ u64 size;
+ u32 csum;
+} __packed;
+
+struct os_info {
+ u64 magic;
+ u32 csum;
+ u16 version_major;
+ u16 version_minor;
+ u64 crashkernel_addr;
+ u64 crashkernel_size;
+ struct os_info_entry entry[2];
+ u8 reserved[4024];
+} __packed;
+
+void os_info_init(void);
+void os_info_entry_add(int nr, void *ptr, u64 len);
+void os_info_crashkernel_add(unsigned long base, unsigned long size);
+u32 os_info_csum(struct os_info *os_info);
+
+#ifdef CONFIG_CRASH_DUMP
+void *os_info_old_entry(int nr, unsigned long *size);
+int copy_oldmem_kernel(void *dst, void *src, size_t count);
+#else
+static inline void *os_info_old_entry(int nr, unsigned long *size)
+{
+ return NULL;
+}
+#endif
+
+#endif /* _ASM_S390_OS_INFO_H */
diff --git a/arch/s390/include/asm/page-states.h b/arch/s390/include/asm/page-states.h
new file mode 100644
index 000000000..c33c4deb5
--- /dev/null
+++ b/arch/s390/include/asm/page-states.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2017
+ * Author(s): Claudio Imbrenda <imbrenda@linux.vnet.ibm.com>
+ */
+
+#ifndef PAGE_STATES_H
+#define PAGE_STATES_H
+
+#define ESSA_GET_STATE 0
+#define ESSA_SET_STABLE 1
+#define ESSA_SET_UNUSED 2
+#define ESSA_SET_VOLATILE 3
+#define ESSA_SET_POT_VOLATILE 4
+#define ESSA_SET_STABLE_RESIDENT 5
+#define ESSA_SET_STABLE_IF_RESIDENT 6
+#define ESSA_SET_STABLE_NODAT 7
+
+#define ESSA_MAX ESSA_SET_STABLE_NODAT
+
+#endif
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
new file mode 100644
index 000000000..349b1c1ef
--- /dev/null
+++ b/arch/s390/include/asm/page.h
@@ -0,0 +1,186 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999, 2000
+ * Author(s): Hartmut Penner (hp@de.ibm.com)
+ */
+
+#ifndef _S390_PAGE_H
+#define _S390_PAGE_H
+
+#include <linux/const.h>
+#include <asm/types.h>
+
+#define _PAGE_SHIFT 12
+#define _PAGE_SIZE (_AC(1, UL) << _PAGE_SHIFT)
+#define _PAGE_MASK (~(_PAGE_SIZE - 1))
+
+/* PAGE_SHIFT determines the page size */
+#define PAGE_SHIFT _PAGE_SHIFT
+#define PAGE_SIZE _PAGE_SIZE
+#define PAGE_MASK _PAGE_MASK
+#define PAGE_DEFAULT_ACC 0
+#define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4)
+
+#define HPAGE_SHIFT 20
+#define HPAGE_SIZE (1UL << HPAGE_SHIFT)
+#define HPAGE_MASK (~(HPAGE_SIZE - 1))
+#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
+#define HUGE_MAX_HSTATE 2
+
+#define ARCH_HAS_SETCLEAR_HUGE_PTE
+#define ARCH_HAS_HUGE_PTE_TYPE
+#define ARCH_HAS_PREPARE_HUGEPAGE
+#define ARCH_HAS_HUGEPAGE_CLEAR_FLUSH
+
+#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+
+#include <asm/setup.h>
+#ifndef __ASSEMBLY__
+
+void __storage_key_init_range(unsigned long start, unsigned long end);
+
+static inline void storage_key_init_range(unsigned long start, unsigned long end)
+{
+ if (PAGE_DEFAULT_KEY != 0)
+ __storage_key_init_range(start, end);
+}
+
+#define clear_page(page) memset((page), 0, PAGE_SIZE)
+
+/*
+ * copy_page uses the mvcl instruction with 0xb0 padding byte in order to
+ * bypass caches when copying a page. Especially when copying huge pages
+ * this keeps L1 and L2 data caches alive.
+ */
+static inline void copy_page(void *to, void *from)
+{
+ register void *reg2 asm ("2") = to;
+ register unsigned long reg3 asm ("3") = 0x1000;
+ register void *reg4 asm ("4") = from;
+ register unsigned long reg5 asm ("5") = 0xb0001000;
+ asm volatile(
+ " mvcl 2,4"
+ : "+d" (reg2), "+d" (reg3), "+d" (reg4), "+d" (reg5)
+ : : "memory", "cc");
+}
+
+#define clear_user_page(page, vaddr, pg) clear_page(page)
+#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
+
+#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
+ alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
+#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+
+/*
+ * These are used to make use of C type-checking..
+ */
+
+typedef struct { unsigned long pgprot; } pgprot_t;
+typedef struct { unsigned long pgste; } pgste_t;
+typedef struct { unsigned long pte; } pte_t;
+typedef struct { unsigned long pmd; } pmd_t;
+typedef struct { unsigned long pud; } pud_t;
+typedef struct { unsigned long p4d; } p4d_t;
+typedef struct { unsigned long pgd; } pgd_t;
+typedef pte_t *pgtable_t;
+
+#define pgprot_val(x) ((x).pgprot)
+#define pgste_val(x) ((x).pgste)
+#define pte_val(x) ((x).pte)
+#define pmd_val(x) ((x).pmd)
+#define pud_val(x) ((x).pud)
+#define p4d_val(x) ((x).p4d)
+#define pgd_val(x) ((x).pgd)
+
+#define __pgste(x) ((pgste_t) { (x) } )
+#define __pte(x) ((pte_t) { (x) } )
+#define __pmd(x) ((pmd_t) { (x) } )
+#define __pud(x) ((pud_t) { (x) } )
+#define __p4d(x) ((p4d_t) { (x) } )
+#define __pgd(x) ((pgd_t) { (x) } )
+#define __pgprot(x) ((pgprot_t) { (x) } )
+
+static inline void page_set_storage_key(unsigned long addr,
+ unsigned char skey, int mapped)
+{
+ if (!mapped)
+ asm volatile(".insn rrf,0xb22b0000,%0,%1,8,0"
+ : : "d" (skey), "a" (addr));
+ else
+ asm volatile("sske %0,%1" : : "d" (skey), "a" (addr));
+}
+
+static inline unsigned char page_get_storage_key(unsigned long addr)
+{
+ unsigned char skey;
+
+ asm volatile("iske %0,%1" : "=d" (skey) : "a" (addr));
+ return skey;
+}
+
+static inline int page_reset_referenced(unsigned long addr)
+{
+ int cc;
+
+ asm volatile(
+ " rrbe 0,%1\n"
+ " ipm %0\n"
+ " srl %0,28\n"
+ : "=d" (cc) : "a" (addr) : "cc");
+ return cc;
+}
+
+/* Bits int the storage key */
+#define _PAGE_CHANGED 0x02 /* HW changed bit */
+#define _PAGE_REFERENCED 0x04 /* HW referenced bit */
+#define _PAGE_FP_BIT 0x08 /* HW fetch protection bit */
+#define _PAGE_ACC_BITS 0xf0 /* HW access control bits */
+
+struct page;
+void arch_free_page(struct page *page, int order);
+void arch_alloc_page(struct page *page, int order);
+void arch_set_page_dat(struct page *page, int order);
+void arch_set_page_nodat(struct page *page, int order);
+int arch_test_page_nodat(struct page *page);
+void arch_set_page_states(int make_stable);
+
+static inline int devmem_is_allowed(unsigned long pfn)
+{
+ return 0;
+}
+
+#define HAVE_ARCH_FREE_PAGE
+#define HAVE_ARCH_ALLOC_PAGE
+
+#endif /* !__ASSEMBLY__ */
+
+#define __PAGE_OFFSET 0x0UL
+#define PAGE_OFFSET 0x0UL
+
+#define __pa(x) ((unsigned long)(x))
+#define __va(x) ((void *)(unsigned long)(x))
+
+#define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT)
+#define pfn_to_virt(pfn) __va((pfn) << PAGE_SHIFT)
+
+#define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr))
+#define page_to_virt(page) pfn_to_virt(page_to_pfn(page))
+
+#define phys_to_pfn(kaddr) ((kaddr) >> PAGE_SHIFT)
+#define pfn_to_phys(pfn) ((pfn) << PAGE_SHIFT)
+
+#define phys_to_page(kaddr) pfn_to_page(phys_to_pfn(kaddr))
+#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT)
+
+#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+
+#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#define ARCH_ZONE_DMA_BITS 31
+
+#include <asm-generic/memory_model.h>
+#include <asm-generic/getorder.h>
+
+#endif /* _S390_PAGE_H */
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
new file mode 100644
index 000000000..10fe982f2
--- /dev/null
+++ b/arch/s390/include/asm/pci.h
@@ -0,0 +1,253 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_S390_PCI_H
+#define __ASM_S390_PCI_H
+
+/* must be set before including pci_clp.h */
+#define PCI_BAR_COUNT 6
+
+#include <linux/pci.h>
+#include <linux/mutex.h>
+#include <linux/iommu.h>
+#include <asm-generic/pci.h>
+#include <asm/pci_clp.h>
+#include <asm/pci_debug.h>
+#include <asm/sclp.h>
+
+#define PCIBIOS_MIN_IO 0x1000
+#define PCIBIOS_MIN_MEM 0x10000000
+
+#define pcibios_assign_all_busses() (0)
+
+void __iomem *pci_iomap(struct pci_dev *, int, unsigned long);
+void pci_iounmap(struct pci_dev *, void __iomem *);
+int pci_domain_nr(struct pci_bus *);
+int pci_proc_domain(struct pci_bus *);
+
+#define ZPCI_BUS_NR 0 /* default bus number */
+#define ZPCI_DEVFN 0 /* default device number */
+
+/* PCI Function Controls */
+#define ZPCI_FC_FN_ENABLED 0x80
+#define ZPCI_FC_ERROR 0x40
+#define ZPCI_FC_BLOCKED 0x20
+#define ZPCI_FC_DMA_ENABLED 0x10
+
+#define ZPCI_FMB_DMA_COUNTER_VALID (1 << 23)
+
+struct zpci_fmb_fmt0 {
+ u64 dma_rbytes;
+ u64 dma_wbytes;
+};
+
+struct zpci_fmb_fmt1 {
+ u64 rx_bytes;
+ u64 rx_packets;
+ u64 tx_bytes;
+ u64 tx_packets;
+};
+
+struct zpci_fmb_fmt2 {
+ u64 consumed_work_units;
+ u64 max_work_units;
+};
+
+struct zpci_fmb_fmt3 {
+ u64 tx_bytes;
+};
+
+struct zpci_fmb {
+ u32 format : 8;
+ u32 fmt_ind : 24;
+ u32 samples;
+ u64 last_update;
+ /* common counters */
+ u64 ld_ops;
+ u64 st_ops;
+ u64 stb_ops;
+ u64 rpcit_ops;
+ /* format specific counters */
+ union {
+ struct zpci_fmb_fmt0 fmt0;
+ struct zpci_fmb_fmt1 fmt1;
+ struct zpci_fmb_fmt2 fmt2;
+ struct zpci_fmb_fmt3 fmt3;
+ };
+} __packed __aligned(128);
+
+enum zpci_state {
+ ZPCI_FN_STATE_STANDBY = 0,
+ ZPCI_FN_STATE_CONFIGURED = 1,
+ ZPCI_FN_STATE_RESERVED = 2,
+ ZPCI_FN_STATE_ONLINE = 3,
+};
+
+struct zpci_bar_struct {
+ struct resource *res; /* bus resource */
+ u32 val; /* bar start & 3 flag bits */
+ u16 map_idx; /* index into bar mapping array */
+ u8 size; /* order 2 exponent */
+};
+
+struct s390_domain;
+
+/* Private data per function */
+struct zpci_dev {
+ struct pci_bus *bus;
+ struct list_head entry; /* list of all zpci_devices, needed for hotplug, etc. */
+
+ enum zpci_state state;
+ u32 fid; /* function ID, used by sclp */
+ u32 fh; /* function handle, used by insn's */
+ u16 vfn; /* virtual function number */
+ u16 pchid; /* physical channel ID */
+ u8 pfgid; /* function group ID */
+ u8 pft; /* pci function type */
+ u16 domain;
+
+ struct mutex lock;
+ u8 pfip[CLP_PFIP_NR_SEGMENTS]; /* pci function internal path */
+ u32 uid; /* user defined id */
+ u8 util_str[CLP_UTIL_STR_LEN]; /* utility string */
+
+ /* IRQ stuff */
+ u64 msi_addr; /* MSI address */
+ unsigned int max_msi; /* maximum number of MSI's */
+ struct airq_iv *aibv; /* adapter interrupt bit vector */
+ unsigned long aisb; /* number of the summary bit */
+
+ /* DMA stuff */
+ unsigned long *dma_table;
+ spinlock_t dma_table_lock;
+ int tlb_refresh;
+
+ spinlock_t iommu_bitmap_lock;
+ unsigned long *iommu_bitmap;
+ unsigned long *lazy_bitmap;
+ unsigned long iommu_size;
+ unsigned long iommu_pages;
+ unsigned int next_bit;
+
+ struct iommu_device iommu_dev; /* IOMMU core handle */
+
+ char res_name[16];
+ struct zpci_bar_struct bars[PCI_BAR_COUNT];
+
+ u64 start_dma; /* Start of available DMA addresses */
+ u64 end_dma; /* End of available DMA addresses */
+ u64 dma_mask; /* DMA address space mask */
+
+ /* Function measurement block */
+ struct zpci_fmb *fmb;
+ u16 fmb_update; /* update interval */
+ u16 fmb_length;
+ /* software counters */
+ atomic64_t allocated_pages;
+ atomic64_t mapped_pages;
+ atomic64_t unmapped_pages;
+
+ enum pci_bus_speed max_bus_speed;
+
+ struct dentry *debugfs_dev;
+ struct dentry *debugfs_perf;
+
+ struct s390_domain *s390_domain; /* s390 IOMMU domain data */
+};
+
+static inline bool zdev_enabled(struct zpci_dev *zdev)
+{
+ return (zdev->fh & (1UL << 31)) ? true : false;
+}
+
+extern const struct attribute_group *zpci_attr_groups[];
+
+/* -----------------------------------------------------------------------------
+ Prototypes
+----------------------------------------------------------------------------- */
+/* Base stuff */
+int zpci_create_device(struct zpci_dev *);
+void zpci_remove_device(struct zpci_dev *zdev);
+int zpci_enable_device(struct zpci_dev *);
+int zpci_disable_device(struct zpci_dev *);
+int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64);
+int zpci_unregister_ioat(struct zpci_dev *, u8);
+void zpci_remove_reserved_devices(void);
+
+/* CLP */
+int clp_scan_pci_devices(void);
+int clp_rescan_pci_devices(void);
+int clp_rescan_pci_devices_simple(void);
+int clp_add_pci_device(u32, u32, int);
+int clp_enable_fh(struct zpci_dev *, u8);
+int clp_disable_fh(struct zpci_dev *);
+int clp_get_state(u32 fid, enum zpci_state *state);
+
+/* IOMMU Interface */
+int zpci_init_iommu(struct zpci_dev *zdev);
+void zpci_destroy_iommu(struct zpci_dev *zdev);
+
+#ifdef CONFIG_PCI
+/* Error handling and recovery */
+void zpci_event_error(void *);
+void zpci_event_availability(void *);
+void zpci_rescan(void);
+bool zpci_is_enabled(void);
+#else /* CONFIG_PCI */
+static inline void zpci_event_error(void *e) {}
+static inline void zpci_event_availability(void *e) {}
+static inline void zpci_rescan(void) {}
+#endif /* CONFIG_PCI */
+
+#ifdef CONFIG_HOTPLUG_PCI_S390
+int zpci_init_slot(struct zpci_dev *);
+void zpci_exit_slot(struct zpci_dev *);
+#else /* CONFIG_HOTPLUG_PCI_S390 */
+static inline int zpci_init_slot(struct zpci_dev *zdev)
+{
+ return 0;
+}
+static inline void zpci_exit_slot(struct zpci_dev *zdev) {}
+#endif /* CONFIG_HOTPLUG_PCI_S390 */
+
+/* Helpers */
+static inline struct zpci_dev *to_zpci(struct pci_dev *pdev)
+{
+ return pdev->sysdata;
+}
+
+struct zpci_dev *get_zdev_by_fid(u32);
+
+/* DMA */
+int zpci_dma_init(void);
+void zpci_dma_exit(void);
+
+/* FMB */
+int zpci_fmb_enable_device(struct zpci_dev *);
+int zpci_fmb_disable_device(struct zpci_dev *);
+
+/* Debug */
+int zpci_debug_init(void);
+void zpci_debug_exit(void);
+void zpci_debug_init_device(struct zpci_dev *, const char *);
+void zpci_debug_exit_device(struct zpci_dev *);
+void zpci_debug_info(struct zpci_dev *, struct seq_file *);
+
+/* Error reporting */
+int zpci_report_error(struct pci_dev *, struct zpci_report_error_header *);
+
+#ifdef CONFIG_NUMA
+
+/* Returns the node based on PCI bus */
+static inline int __pcibus_to_node(const struct pci_bus *bus)
+{
+ return NUMA_NO_NODE;
+}
+
+static inline const struct cpumask *
+cpumask_of_pcibus(const struct pci_bus *bus)
+{
+ return cpu_online_mask;
+}
+
+#endif /* CONFIG_NUMA */
+
+#endif
diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h
new file mode 100644
index 000000000..b3b31b31f
--- /dev/null
+++ b/arch/s390/include/asm/pci_clp.h
@@ -0,0 +1,167 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_PCI_CLP_H
+#define _ASM_S390_PCI_CLP_H
+
+#include <asm/clp.h>
+
+/*
+ * Call Logical Processor - Command Codes
+ */
+#define CLP_LIST_PCI 0x0002
+#define CLP_QUERY_PCI_FN 0x0003
+#define CLP_QUERY_PCI_FNGRP 0x0004
+#define CLP_SET_PCI_FN 0x0005
+
+/* PCI function handle list entry */
+struct clp_fh_list_entry {
+ u16 device_id;
+ u16 vendor_id;
+ u32 config_state : 1;
+ u32 : 31;
+ u32 fid; /* PCI function id */
+ u32 fh; /* PCI function handle */
+} __packed;
+
+#define CLP_RC_SETPCIFN_FH 0x0101 /* Invalid PCI fn handle */
+#define CLP_RC_SETPCIFN_FHOP 0x0102 /* Fn handle not valid for op */
+#define CLP_RC_SETPCIFN_DMAAS 0x0103 /* Invalid DMA addr space */
+#define CLP_RC_SETPCIFN_RES 0x0104 /* Insufficient resources */
+#define CLP_RC_SETPCIFN_ALRDY 0x0105 /* Fn already in requested state */
+#define CLP_RC_SETPCIFN_ERR 0x0106 /* Fn in permanent error state */
+#define CLP_RC_SETPCIFN_RECPND 0x0107 /* Error recovery pending */
+#define CLP_RC_SETPCIFN_BUSY 0x0108 /* Fn busy */
+#define CLP_RC_LISTPCI_BADRT 0x010a /* Resume token not recognized */
+#define CLP_RC_QUERYPCIFG_PFGID 0x010b /* Unrecognized PFGID */
+
+/* request or response block header length */
+#define LIST_PCI_HDR_LEN 32
+
+/* Number of function handles fitting in response block */
+#define CLP_FH_LIST_NR_ENTRIES \
+ ((CLP_BLK_SIZE - 2 * LIST_PCI_HDR_LEN) \
+ / sizeof(struct clp_fh_list_entry))
+
+#define CLP_SET_ENABLE_PCI_FN 0 /* Yes, 0 enables it */
+#define CLP_SET_DISABLE_PCI_FN 1 /* Yes, 1 disables it */
+
+#define CLP_UTIL_STR_LEN 64
+#define CLP_PFIP_NR_SEGMENTS 4
+
+extern bool zpci_unique_uid;
+
+/* List PCI functions request */
+struct clp_req_list_pci {
+ struct clp_req_hdr hdr;
+ u64 resume_token;
+ u64 reserved2;
+} __packed;
+
+/* List PCI functions response */
+struct clp_rsp_list_pci {
+ struct clp_rsp_hdr hdr;
+ u64 resume_token;
+ u32 reserved2;
+ u16 max_fn;
+ u8 : 7;
+ u8 uid_checking : 1;
+ u8 entry_size;
+ struct clp_fh_list_entry fh_list[CLP_FH_LIST_NR_ENTRIES];
+} __packed;
+
+/* Query PCI function request */
+struct clp_req_query_pci {
+ struct clp_req_hdr hdr;
+ u32 fh; /* function handle */
+ u32 reserved2;
+ u64 reserved3;
+} __packed;
+
+/* Query PCI function response */
+struct clp_rsp_query_pci {
+ struct clp_rsp_hdr hdr;
+ u16 vfn; /* virtual fn number */
+ u16 : 7;
+ u16 util_str_avail : 1; /* utility string available? */
+ u16 pfgid : 8; /* pci function group id */
+ u32 fid; /* pci function id */
+ u8 bar_size[PCI_BAR_COUNT];
+ u16 pchid;
+ __le32 bar[PCI_BAR_COUNT];
+ u8 pfip[CLP_PFIP_NR_SEGMENTS]; /* pci function internal path */
+ u32 : 16;
+ u8 fmb_len;
+ u8 pft; /* pci function type */
+ u64 sdma; /* start dma as */
+ u64 edma; /* end dma as */
+ u32 reserved[11];
+ u32 uid; /* user defined id */
+ u8 util_str[CLP_UTIL_STR_LEN]; /* utility string */
+} __packed;
+
+/* Query PCI function group request */
+struct clp_req_query_pci_grp {
+ struct clp_req_hdr hdr;
+ u32 reserved2 : 24;
+ u32 pfgid : 8; /* function group id */
+ u32 reserved3;
+ u64 reserved4;
+} __packed;
+
+/* Query PCI function group response */
+struct clp_rsp_query_pci_grp {
+ struct clp_rsp_hdr hdr;
+ u16 : 4;
+ u16 noi : 12; /* number of interrupts */
+ u8 version;
+ u8 : 6;
+ u8 frame : 1;
+ u8 refresh : 1; /* TLB refresh mode */
+ u16 reserved2;
+ u16 mui;
+ u64 reserved3;
+ u64 dasm; /* dma address space mask */
+ u64 msia; /* MSI address */
+ u64 reserved4;
+ u64 reserved5;
+} __packed;
+
+/* Set PCI function request */
+struct clp_req_set_pci {
+ struct clp_req_hdr hdr;
+ u32 fh; /* function handle */
+ u16 reserved2;
+ u8 oc; /* operation controls */
+ u8 ndas; /* number of dma spaces */
+ u64 reserved3;
+} __packed;
+
+/* Set PCI function response */
+struct clp_rsp_set_pci {
+ struct clp_rsp_hdr hdr;
+ u32 fh; /* function handle */
+ u32 reserved3;
+ u64 reserved4;
+} __packed;
+
+/* Combined request/response block structures used by clp insn */
+struct clp_req_rsp_list_pci {
+ struct clp_req_list_pci request;
+ struct clp_rsp_list_pci response;
+} __packed;
+
+struct clp_req_rsp_set_pci {
+ struct clp_req_set_pci request;
+ struct clp_rsp_set_pci response;
+} __packed;
+
+struct clp_req_rsp_query_pci {
+ struct clp_req_query_pci request;
+ struct clp_rsp_query_pci response;
+} __packed;
+
+struct clp_req_rsp_query_pci_grp {
+ struct clp_req_query_pci_grp request;
+ struct clp_rsp_query_pci_grp response;
+} __packed;
+
+#endif
diff --git a/arch/s390/include/asm/pci_debug.h b/arch/s390/include/asm/pci_debug.h
new file mode 100644
index 000000000..5dfe47588
--- /dev/null
+++ b/arch/s390/include/asm/pci_debug.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _S390_ASM_PCI_DEBUG_H
+#define _S390_ASM_PCI_DEBUG_H
+
+#include <asm/debug.h>
+
+extern debug_info_t *pci_debug_msg_id;
+extern debug_info_t *pci_debug_err_id;
+
+#define zpci_dbg(imp, fmt, args...) \
+ debug_sprintf_event(pci_debug_msg_id, imp, fmt, ##args)
+
+#define zpci_err(text...) \
+ do { \
+ char debug_buffer[16]; \
+ snprintf(debug_buffer, 16, text); \
+ debug_text_event(pci_debug_err_id, 0, debug_buffer); \
+ } while (0)
+
+static inline void zpci_err_hex(void *addr, int len)
+{
+ debug_event(pci_debug_err_id, 0, addr, len);
+}
+
+#endif
diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h
new file mode 100644
index 000000000..419fac7a6
--- /dev/null
+++ b/arch/s390/include/asm/pci_dma.h
@@ -0,0 +1,207 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_PCI_DMA_H
+#define _ASM_S390_PCI_DMA_H
+
+/* I/O Translation Anchor (IOTA) */
+enum zpci_ioat_dtype {
+ ZPCI_IOTA_STO = 0,
+ ZPCI_IOTA_RTTO = 1,
+ ZPCI_IOTA_RSTO = 2,
+ ZPCI_IOTA_RFTO = 3,
+ ZPCI_IOTA_PFAA = 4,
+ ZPCI_IOTA_IOPFAA = 5,
+ ZPCI_IOTA_IOPTO = 7
+};
+
+#define ZPCI_IOTA_IOT_ENABLED 0x800UL
+#define ZPCI_IOTA_DT_ST (ZPCI_IOTA_STO << 2)
+#define ZPCI_IOTA_DT_RT (ZPCI_IOTA_RTTO << 2)
+#define ZPCI_IOTA_DT_RS (ZPCI_IOTA_RSTO << 2)
+#define ZPCI_IOTA_DT_RF (ZPCI_IOTA_RFTO << 2)
+#define ZPCI_IOTA_DT_PF (ZPCI_IOTA_PFAA << 2)
+#define ZPCI_IOTA_FS_4K 0
+#define ZPCI_IOTA_FS_1M 1
+#define ZPCI_IOTA_FS_2G 2
+#define ZPCI_KEY (PAGE_DEFAULT_KEY << 5)
+
+#define ZPCI_TABLE_SIZE_RT (1UL << 42)
+
+#define ZPCI_IOTA_STO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_ST)
+#define ZPCI_IOTA_RTTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RT)
+#define ZPCI_IOTA_RSTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RS)
+#define ZPCI_IOTA_RFTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RF)
+#define ZPCI_IOTA_RFAA_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_PF | ZPCI_IOTA_FS_2G)
+
+/* I/O Region and segment tables */
+#define ZPCI_INDEX_MASK 0x7ffUL
+
+#define ZPCI_TABLE_TYPE_MASK 0xc
+#define ZPCI_TABLE_TYPE_RFX 0xc
+#define ZPCI_TABLE_TYPE_RSX 0x8
+#define ZPCI_TABLE_TYPE_RTX 0x4
+#define ZPCI_TABLE_TYPE_SX 0x0
+
+#define ZPCI_TABLE_LEN_RFX 0x3
+#define ZPCI_TABLE_LEN_RSX 0x3
+#define ZPCI_TABLE_LEN_RTX 0x3
+
+#define ZPCI_TABLE_OFFSET_MASK 0xc0
+#define ZPCI_TABLE_SIZE 0x4000
+#define ZPCI_TABLE_ALIGN ZPCI_TABLE_SIZE
+#define ZPCI_TABLE_ENTRY_SIZE (sizeof(unsigned long))
+#define ZPCI_TABLE_ENTRIES (ZPCI_TABLE_SIZE / ZPCI_TABLE_ENTRY_SIZE)
+
+#define ZPCI_TABLE_BITS 11
+#define ZPCI_PT_BITS 8
+#define ZPCI_ST_SHIFT (ZPCI_PT_BITS + PAGE_SHIFT)
+#define ZPCI_RT_SHIFT (ZPCI_ST_SHIFT + ZPCI_TABLE_BITS)
+
+#define ZPCI_RTE_FLAG_MASK 0x3fffUL
+#define ZPCI_RTE_ADDR_MASK (~ZPCI_RTE_FLAG_MASK)
+#define ZPCI_STE_FLAG_MASK 0x7ffUL
+#define ZPCI_STE_ADDR_MASK (~ZPCI_STE_FLAG_MASK)
+
+/* I/O Page tables */
+#define ZPCI_PTE_VALID_MASK 0x400
+#define ZPCI_PTE_INVALID 0x400
+#define ZPCI_PTE_VALID 0x000
+#define ZPCI_PT_SIZE 0x800
+#define ZPCI_PT_ALIGN ZPCI_PT_SIZE
+#define ZPCI_PT_ENTRIES (ZPCI_PT_SIZE / ZPCI_TABLE_ENTRY_SIZE)
+#define ZPCI_PT_MASK (ZPCI_PT_ENTRIES - 1)
+
+#define ZPCI_PTE_FLAG_MASK 0xfffUL
+#define ZPCI_PTE_ADDR_MASK (~ZPCI_PTE_FLAG_MASK)
+
+/* Shared bits */
+#define ZPCI_TABLE_VALID 0x00
+#define ZPCI_TABLE_INVALID 0x20
+#define ZPCI_TABLE_PROTECTED 0x200
+#define ZPCI_TABLE_UNPROTECTED 0x000
+
+#define ZPCI_TABLE_VALID_MASK 0x20
+#define ZPCI_TABLE_PROT_MASK 0x200
+
+static inline unsigned int calc_rtx(dma_addr_t ptr)
+{
+ return ((unsigned long) ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK;
+}
+
+static inline unsigned int calc_sx(dma_addr_t ptr)
+{
+ return ((unsigned long) ptr >> ZPCI_ST_SHIFT) & ZPCI_INDEX_MASK;
+}
+
+static inline unsigned int calc_px(dma_addr_t ptr)
+{
+ return ((unsigned long) ptr >> PAGE_SHIFT) & ZPCI_PT_MASK;
+}
+
+static inline void set_pt_pfaa(unsigned long *entry, void *pfaa)
+{
+ *entry &= ZPCI_PTE_FLAG_MASK;
+ *entry |= ((unsigned long) pfaa & ZPCI_PTE_ADDR_MASK);
+}
+
+static inline void set_rt_sto(unsigned long *entry, void *sto)
+{
+ *entry &= ZPCI_RTE_FLAG_MASK;
+ *entry |= ((unsigned long) sto & ZPCI_RTE_ADDR_MASK);
+ *entry |= ZPCI_TABLE_TYPE_RTX;
+}
+
+static inline void set_st_pto(unsigned long *entry, void *pto)
+{
+ *entry &= ZPCI_STE_FLAG_MASK;
+ *entry |= ((unsigned long) pto & ZPCI_STE_ADDR_MASK);
+ *entry |= ZPCI_TABLE_TYPE_SX;
+}
+
+static inline void validate_rt_entry(unsigned long *entry)
+{
+ *entry &= ~ZPCI_TABLE_VALID_MASK;
+ *entry &= ~ZPCI_TABLE_OFFSET_MASK;
+ *entry |= ZPCI_TABLE_VALID;
+ *entry |= ZPCI_TABLE_LEN_RTX;
+}
+
+static inline void validate_st_entry(unsigned long *entry)
+{
+ *entry &= ~ZPCI_TABLE_VALID_MASK;
+ *entry |= ZPCI_TABLE_VALID;
+}
+
+static inline void invalidate_table_entry(unsigned long *entry)
+{
+ *entry &= ~ZPCI_TABLE_VALID_MASK;
+ *entry |= ZPCI_TABLE_INVALID;
+}
+
+static inline void invalidate_pt_entry(unsigned long *entry)
+{
+ WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_INVALID);
+ *entry &= ~ZPCI_PTE_VALID_MASK;
+ *entry |= ZPCI_PTE_INVALID;
+}
+
+static inline void validate_pt_entry(unsigned long *entry)
+{
+ WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID);
+ *entry &= ~ZPCI_PTE_VALID_MASK;
+ *entry |= ZPCI_PTE_VALID;
+}
+
+static inline void entry_set_protected(unsigned long *entry)
+{
+ *entry &= ~ZPCI_TABLE_PROT_MASK;
+ *entry |= ZPCI_TABLE_PROTECTED;
+}
+
+static inline void entry_clr_protected(unsigned long *entry)
+{
+ *entry &= ~ZPCI_TABLE_PROT_MASK;
+ *entry |= ZPCI_TABLE_UNPROTECTED;
+}
+
+static inline int reg_entry_isvalid(unsigned long entry)
+{
+ return (entry & ZPCI_TABLE_VALID_MASK) == ZPCI_TABLE_VALID;
+}
+
+static inline int pt_entry_isvalid(unsigned long entry)
+{
+ return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID;
+}
+
+static inline int entry_isprotected(unsigned long entry)
+{
+ return (entry & ZPCI_TABLE_PROT_MASK) == ZPCI_TABLE_PROTECTED;
+}
+
+static inline unsigned long *get_rt_sto(unsigned long entry)
+{
+ return ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX)
+ ? (unsigned long *) (entry & ZPCI_RTE_ADDR_MASK)
+ : NULL;
+}
+
+static inline unsigned long *get_st_pto(unsigned long entry)
+{
+ return ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_SX)
+ ? (unsigned long *) (entry & ZPCI_STE_ADDR_MASK)
+ : NULL;
+}
+
+/* Prototypes */
+int zpci_dma_init_device(struct zpci_dev *);
+void zpci_dma_exit_device(struct zpci_dev *);
+void dma_free_seg_table(unsigned long);
+unsigned long *dma_alloc_cpu_table(void);
+void dma_cleanup_tables(unsigned long *);
+unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr);
+void dma_update_cpu_trans(unsigned long *entry, void *page_addr, int flags);
+
+extern const struct dma_map_ops s390_pci_dma_ops;
+
+
+#endif
diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h
new file mode 100644
index 000000000..ba22a6ea5
--- /dev/null
+++ b/arch/s390/include/asm/pci_insn.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_PCI_INSN_H
+#define _ASM_S390_PCI_INSN_H
+
+/* Load/Store status codes */
+#define ZPCI_PCI_ST_FUNC_NOT_ENABLED 4
+#define ZPCI_PCI_ST_FUNC_IN_ERR 8
+#define ZPCI_PCI_ST_BLOCKED 12
+#define ZPCI_PCI_ST_INSUF_RES 16
+#define ZPCI_PCI_ST_INVAL_AS 20
+#define ZPCI_PCI_ST_FUNC_ALREADY_ENABLED 24
+#define ZPCI_PCI_ST_DMA_AS_NOT_ENABLED 28
+#define ZPCI_PCI_ST_2ND_OP_IN_INV_AS 36
+#define ZPCI_PCI_ST_FUNC_NOT_AVAIL 40
+#define ZPCI_PCI_ST_ALREADY_IN_RQ_STATE 44
+
+/* Load/Store return codes */
+#define ZPCI_PCI_LS_OK 0
+#define ZPCI_PCI_LS_ERR 1
+#define ZPCI_PCI_LS_BUSY 2
+#define ZPCI_PCI_LS_INVAL_HANDLE 3
+
+/* Load/Store address space identifiers */
+#define ZPCI_PCIAS_MEMIO_0 0
+#define ZPCI_PCIAS_MEMIO_1 1
+#define ZPCI_PCIAS_MEMIO_2 2
+#define ZPCI_PCIAS_MEMIO_3 3
+#define ZPCI_PCIAS_MEMIO_4 4
+#define ZPCI_PCIAS_MEMIO_5 5
+#define ZPCI_PCIAS_CFGSPC 15
+
+/* Modify PCI Function Controls */
+#define ZPCI_MOD_FC_REG_INT 2
+#define ZPCI_MOD_FC_DEREG_INT 3
+#define ZPCI_MOD_FC_REG_IOAT 4
+#define ZPCI_MOD_FC_DEREG_IOAT 5
+#define ZPCI_MOD_FC_REREG_IOAT 6
+#define ZPCI_MOD_FC_RESET_ERROR 7
+#define ZPCI_MOD_FC_RESET_BLOCK 9
+#define ZPCI_MOD_FC_SET_MEASURE 10
+
+/* FIB function controls */
+#define ZPCI_FIB_FC_ENABLED 0x80
+#define ZPCI_FIB_FC_ERROR 0x40
+#define ZPCI_FIB_FC_LS_BLOCKED 0x20
+#define ZPCI_FIB_FC_DMAAS_REG 0x10
+
+/* FIB function controls */
+#define ZPCI_FIB_FC_ENABLED 0x80
+#define ZPCI_FIB_FC_ERROR 0x40
+#define ZPCI_FIB_FC_LS_BLOCKED 0x20
+#define ZPCI_FIB_FC_DMAAS_REG 0x10
+
+/* Function Information Block */
+struct zpci_fib {
+ u32 fmt : 8; /* format */
+ u32 : 24;
+ u32 : 32;
+ u8 fc; /* function controls */
+ u64 : 56;
+ u64 pba; /* PCI base address */
+ u64 pal; /* PCI address limit */
+ u64 iota; /* I/O Translation Anchor */
+ u32 : 1;
+ u32 isc : 3; /* Interrupt subclass */
+ u32 noi : 12; /* Number of interrupts */
+ u32 : 2;
+ u32 aibvo : 6; /* Adapter interrupt bit vector offset */
+ u32 sum : 1; /* Adapter int summary bit enabled */
+ u32 : 1;
+ u32 aisbo : 6; /* Adapter int summary bit offset */
+ u32 : 32;
+ u64 aibv; /* Adapter int bit vector address */
+ u64 aisb; /* Adapter int summary bit address */
+ u64 fmb_addr; /* Function measurement block address and key */
+ u32 : 32;
+ u32 gd;
+} __packed __aligned(8);
+
+u8 zpci_mod_fc(u64 req, struct zpci_fib *fib, u8 *status);
+int zpci_refresh_trans(u64 fn, u64 addr, u64 range);
+int zpci_load(u64 *data, u64 req, u64 offset);
+int zpci_store(u64 data, u64 req, u64 offset);
+int zpci_store_block(const u64 *data, u64 req, u64 offset);
+int zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc);
+
+#endif
diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h
new file mode 100644
index 000000000..cbb9cb9c6
--- /dev/null
+++ b/arch/s390/include/asm/pci_io.h
@@ -0,0 +1,206 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_PCI_IO_H
+#define _ASM_S390_PCI_IO_H
+
+#ifdef CONFIG_PCI
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <asm/pci_insn.h>
+
+/* I/O Map */
+#define ZPCI_IOMAP_SHIFT 48
+#define ZPCI_IOMAP_ADDR_BASE 0x8000000000000000UL
+#define ZPCI_IOMAP_ADDR_OFF_MASK ((1UL << ZPCI_IOMAP_SHIFT) - 1)
+#define ZPCI_IOMAP_MAX_ENTRIES \
+ ((ULONG_MAX - ZPCI_IOMAP_ADDR_BASE + 1) / (1UL << ZPCI_IOMAP_SHIFT))
+#define ZPCI_IOMAP_ADDR_IDX_MASK \
+ (~ZPCI_IOMAP_ADDR_OFF_MASK - ZPCI_IOMAP_ADDR_BASE)
+
+struct zpci_iomap_entry {
+ u32 fh;
+ u8 bar;
+ u16 count;
+};
+
+extern struct zpci_iomap_entry *zpci_iomap_start;
+
+#define ZPCI_ADDR(idx) (ZPCI_IOMAP_ADDR_BASE | ((u64) idx << ZPCI_IOMAP_SHIFT))
+#define ZPCI_IDX(addr) \
+ (((__force u64) addr & ZPCI_IOMAP_ADDR_IDX_MASK) >> ZPCI_IOMAP_SHIFT)
+#define ZPCI_OFFSET(addr) \
+ ((__force u64) addr & ZPCI_IOMAP_ADDR_OFF_MASK)
+
+#define ZPCI_CREATE_REQ(handle, space, len) \
+ ((u64) handle << 32 | space << 16 | len)
+
+#define zpci_read(LENGTH, RETTYPE) \
+static inline RETTYPE zpci_read_##RETTYPE(const volatile void __iomem *addr) \
+{ \
+ struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)]; \
+ u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, LENGTH); \
+ u64 data; \
+ int rc; \
+ \
+ rc = zpci_load(&data, req, ZPCI_OFFSET(addr)); \
+ if (rc) \
+ data = -1ULL; \
+ return (RETTYPE) data; \
+}
+
+#define zpci_write(LENGTH, VALTYPE) \
+static inline void zpci_write_##VALTYPE(VALTYPE val, \
+ const volatile void __iomem *addr) \
+{ \
+ struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)]; \
+ u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, LENGTH); \
+ u64 data = (VALTYPE) val; \
+ \
+ zpci_store(data, req, ZPCI_OFFSET(addr)); \
+}
+
+zpci_read(8, u64)
+zpci_read(4, u32)
+zpci_read(2, u16)
+zpci_read(1, u8)
+zpci_write(8, u64)
+zpci_write(4, u32)
+zpci_write(2, u16)
+zpci_write(1, u8)
+
+static inline int zpci_write_single(u64 req, const u64 *data, u64 offset, u8 len)
+{
+ u64 val;
+
+ switch (len) {
+ case 1:
+ val = (u64) *((u8 *) data);
+ break;
+ case 2:
+ val = (u64) *((u16 *) data);
+ break;
+ case 4:
+ val = (u64) *((u32 *) data);
+ break;
+ case 8:
+ val = (u64) *((u64 *) data);
+ break;
+ default:
+ val = 0; /* let FW report error */
+ break;
+ }
+ return zpci_store(val, req, offset);
+}
+
+static inline int zpci_read_single(u64 req, u64 *dst, u64 offset, u8 len)
+{
+ u64 data;
+ int cc;
+
+ cc = zpci_load(&data, req, offset);
+ if (cc)
+ goto out;
+
+ switch (len) {
+ case 1:
+ *((u8 *) dst) = (u8) data;
+ break;
+ case 2:
+ *((u16 *) dst) = (u16) data;
+ break;
+ case 4:
+ *((u32 *) dst) = (u32) data;
+ break;
+ case 8:
+ *((u64 *) dst) = (u64) data;
+ break;
+ }
+out:
+ return cc;
+}
+
+static inline int zpci_write_block(u64 req, const u64 *data, u64 offset)
+{
+ return zpci_store_block(data, req, offset);
+}
+
+static inline u8 zpci_get_max_write_size(u64 src, u64 dst, int len, int max)
+{
+ int count = len > max ? max : len, size = 1;
+
+ while (!(src & 0x1) && !(dst & 0x1) && ((size << 1) <= count)) {
+ dst = dst >> 1;
+ src = src >> 1;
+ size = size << 1;
+ }
+ return size;
+}
+
+static inline int zpci_memcpy_fromio(void *dst,
+ const volatile void __iomem *src,
+ unsigned long n)
+{
+ struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(src)];
+ u64 req, offset = ZPCI_OFFSET(src);
+ int size, rc = 0;
+
+ while (n > 0) {
+ size = zpci_get_max_write_size((u64 __force) src,
+ (u64) dst, n, 8);
+ req = ZPCI_CREATE_REQ(entry->fh, entry->bar, size);
+ rc = zpci_read_single(req, dst, offset, size);
+ if (rc)
+ break;
+ offset += size;
+ dst += size;
+ n -= size;
+ }
+ return rc;
+}
+
+static inline int zpci_memcpy_toio(volatile void __iomem *dst,
+ const void *src, unsigned long n)
+{
+ struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(dst)];
+ u64 req, offset = ZPCI_OFFSET(dst);
+ int size, rc = 0;
+
+ if (!src)
+ return -EINVAL;
+
+ while (n > 0) {
+ size = zpci_get_max_write_size((u64 __force) dst,
+ (u64) src, n, 128);
+ req = ZPCI_CREATE_REQ(entry->fh, entry->bar, size);
+
+ if (size > 8) /* main path */
+ rc = zpci_write_block(req, src, offset);
+ else
+ rc = zpci_write_single(req, src, offset, size);
+ if (rc)
+ break;
+ offset += size;
+ src += size;
+ n -= size;
+ }
+ return rc;
+}
+
+static inline int zpci_memset_io(volatile void __iomem *dst,
+ unsigned char val, size_t count)
+{
+ u8 *src = kmalloc(count, GFP_KERNEL);
+ int rc;
+
+ if (src == NULL)
+ return -ENOMEM;
+ memset(src, val, count);
+
+ rc = zpci_memcpy_toio(dst, src, count);
+ kfree(src);
+ return rc;
+}
+
+#endif /* CONFIG_PCI */
+
+#endif /* _ASM_S390_PCI_IO_H */
diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h
new file mode 100644
index 000000000..50f6661ba
--- /dev/null
+++ b/arch/s390/include/asm/percpu.h
@@ -0,0 +1,186 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ARCH_S390_PERCPU__
+#define __ARCH_S390_PERCPU__
+
+#include <linux/preempt.h>
+#include <asm/cmpxchg.h>
+
+/*
+ * s390 uses its own implementation for per cpu data, the offset of
+ * the cpu local data area is cached in the cpu's lowcore memory.
+ */
+#define __my_cpu_offset S390_lowcore.percpu_offset
+
+/*
+ * For 64 bit module code, the module may be more than 4G above the
+ * per cpu area, use weak definitions to force the compiler to
+ * generate external references.
+ */
+#if defined(CONFIG_SMP) && defined(MODULE)
+#define ARCH_NEEDS_WEAK_PER_CPU
+#endif
+
+/*
+ * We use a compare-and-swap loop since that uses less cpu cycles than
+ * disabling and enabling interrupts like the generic variant would do.
+ */
+#define arch_this_cpu_to_op_simple(pcp, val, op) \
+({ \
+ typedef typeof(pcp) pcp_op_T__; \
+ pcp_op_T__ old__, new__, prev__; \
+ pcp_op_T__ *ptr__; \
+ preempt_disable_notrace(); \
+ ptr__ = raw_cpu_ptr(&(pcp)); \
+ prev__ = *ptr__; \
+ do { \
+ old__ = prev__; \
+ new__ = old__ op (val); \
+ prev__ = cmpxchg(ptr__, old__, new__); \
+ } while (prev__ != old__); \
+ preempt_enable_notrace(); \
+ new__; \
+})
+
+#define this_cpu_add_1(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_add_2(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_add_return_1(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_add_return_2(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_and_1(pcp, val) arch_this_cpu_to_op_simple(pcp, val, &)
+#define this_cpu_and_2(pcp, val) arch_this_cpu_to_op_simple(pcp, val, &)
+#define this_cpu_or_1(pcp, val) arch_this_cpu_to_op_simple(pcp, val, |)
+#define this_cpu_or_2(pcp, val) arch_this_cpu_to_op_simple(pcp, val, |)
+
+#ifndef CONFIG_HAVE_MARCH_Z196_FEATURES
+
+#define this_cpu_add_4(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_add_8(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_add_return_4(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_add_return_8(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_and_4(pcp, val) arch_this_cpu_to_op_simple(pcp, val, &)
+#define this_cpu_and_8(pcp, val) arch_this_cpu_to_op_simple(pcp, val, &)
+#define this_cpu_or_4(pcp, val) arch_this_cpu_to_op_simple(pcp, val, |)
+#define this_cpu_or_8(pcp, val) arch_this_cpu_to_op_simple(pcp, val, |)
+
+#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+#define arch_this_cpu_add(pcp, val, op1, op2, szcast) \
+{ \
+ typedef typeof(pcp) pcp_op_T__; \
+ pcp_op_T__ val__ = (val); \
+ pcp_op_T__ old__, *ptr__; \
+ preempt_disable_notrace(); \
+ ptr__ = raw_cpu_ptr(&(pcp)); \
+ if (__builtin_constant_p(val__) && \
+ ((szcast)val__ > -129) && ((szcast)val__ < 128)) { \
+ asm volatile( \
+ op2 " %[ptr__],%[val__]\n" \
+ : [ptr__] "+Q" (*ptr__) \
+ : [val__] "i" ((szcast)val__) \
+ : "cc"); \
+ } else { \
+ asm volatile( \
+ op1 " %[old__],%[val__],%[ptr__]\n" \
+ : [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \
+ : [val__] "d" (val__) \
+ : "cc"); \
+ } \
+ preempt_enable_notrace(); \
+}
+
+#define this_cpu_add_4(pcp, val) arch_this_cpu_add(pcp, val, "laa", "asi", int)
+#define this_cpu_add_8(pcp, val) arch_this_cpu_add(pcp, val, "laag", "agsi", long)
+
+#define arch_this_cpu_add_return(pcp, val, op) \
+({ \
+ typedef typeof(pcp) pcp_op_T__; \
+ pcp_op_T__ val__ = (val); \
+ pcp_op_T__ old__, *ptr__; \
+ preempt_disable_notrace(); \
+ ptr__ = raw_cpu_ptr(&(pcp)); \
+ asm volatile( \
+ op " %[old__],%[val__],%[ptr__]\n" \
+ : [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \
+ : [val__] "d" (val__) \
+ : "cc"); \
+ preempt_enable_notrace(); \
+ old__ + val__; \
+})
+
+#define this_cpu_add_return_4(pcp, val) arch_this_cpu_add_return(pcp, val, "laa")
+#define this_cpu_add_return_8(pcp, val) arch_this_cpu_add_return(pcp, val, "laag")
+
+#define arch_this_cpu_to_op(pcp, val, op) \
+{ \
+ typedef typeof(pcp) pcp_op_T__; \
+ pcp_op_T__ val__ = (val); \
+ pcp_op_T__ old__, *ptr__; \
+ preempt_disable_notrace(); \
+ ptr__ = raw_cpu_ptr(&(pcp)); \
+ asm volatile( \
+ op " %[old__],%[val__],%[ptr__]\n" \
+ : [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \
+ : [val__] "d" (val__) \
+ : "cc"); \
+ preempt_enable_notrace(); \
+}
+
+#define this_cpu_and_4(pcp, val) arch_this_cpu_to_op(pcp, val, "lan")
+#define this_cpu_and_8(pcp, val) arch_this_cpu_to_op(pcp, val, "lang")
+#define this_cpu_or_4(pcp, val) arch_this_cpu_to_op(pcp, val, "lao")
+#define this_cpu_or_8(pcp, val) arch_this_cpu_to_op(pcp, val, "laog")
+
+#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+#define arch_this_cpu_cmpxchg(pcp, oval, nval) \
+({ \
+ typedef typeof(pcp) pcp_op_T__; \
+ pcp_op_T__ ret__; \
+ pcp_op_T__ *ptr__; \
+ preempt_disable_notrace(); \
+ ptr__ = raw_cpu_ptr(&(pcp)); \
+ ret__ = cmpxchg(ptr__, oval, nval); \
+ preempt_enable_notrace(); \
+ ret__; \
+})
+
+#define this_cpu_cmpxchg_1(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval)
+#define this_cpu_cmpxchg_2(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval)
+#define this_cpu_cmpxchg_4(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval)
+#define this_cpu_cmpxchg_8(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval)
+
+#define arch_this_cpu_xchg(pcp, nval) \
+({ \
+ typeof(pcp) *ptr__; \
+ typeof(pcp) ret__; \
+ preempt_disable_notrace(); \
+ ptr__ = raw_cpu_ptr(&(pcp)); \
+ ret__ = xchg(ptr__, nval); \
+ preempt_enable_notrace(); \
+ ret__; \
+})
+
+#define this_cpu_xchg_1(pcp, nval) arch_this_cpu_xchg(pcp, nval)
+#define this_cpu_xchg_2(pcp, nval) arch_this_cpu_xchg(pcp, nval)
+#define this_cpu_xchg_4(pcp, nval) arch_this_cpu_xchg(pcp, nval)
+#define this_cpu_xchg_8(pcp, nval) arch_this_cpu_xchg(pcp, nval)
+
+#define arch_this_cpu_cmpxchg_double(pcp1, pcp2, o1, o2, n1, n2) \
+({ \
+ typeof(pcp1) o1__ = (o1), n1__ = (n1); \
+ typeof(pcp2) o2__ = (o2), n2__ = (n2); \
+ typeof(pcp1) *p1__; \
+ typeof(pcp2) *p2__; \
+ int ret__; \
+ preempt_disable_notrace(); \
+ p1__ = raw_cpu_ptr(&(pcp1)); \
+ p2__ = raw_cpu_ptr(&(pcp2)); \
+ ret__ = __cmpxchg_double(p1__, p2__, o1__, o2__, n1__, n2__); \
+ preempt_enable_notrace(); \
+ ret__; \
+})
+
+#define this_cpu_cmpxchg_double_8 arch_this_cpu_cmpxchg_double
+
+#include <asm-generic/percpu.h>
+
+#endif /* __ARCH_S390_PERCPU__ */
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
new file mode 100644
index 000000000..b9c0e3617
--- /dev/null
+++ b/arch/s390/include/asm/perf_event.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Performance event support - s390 specific definitions.
+ *
+ * Copyright IBM Corp. 2009, 2017
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ * Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef _ASM_S390_PERF_EVENT_H
+#define _ASM_S390_PERF_EVENT_H
+
+#include <linux/perf_event.h>
+#include <linux/device.h>
+#include <asm/cpu_mf.h>
+
+/* Per-CPU flags for PMU states */
+#define PMU_F_RESERVED 0x1000
+#define PMU_F_ENABLED 0x2000
+#define PMU_F_IN_USE 0x4000
+#define PMU_F_ERR_IBE 0x0100
+#define PMU_F_ERR_LSDA 0x0200
+#define PMU_F_ERR_MASK (PMU_F_ERR_IBE|PMU_F_ERR_LSDA)
+
+/* Perf definitions for PMU event attributes in sysfs */
+extern __init const struct attribute_group **cpumf_cf_event_group(void);
+extern ssize_t cpumf_events_sysfs_show(struct device *dev,
+ struct device_attribute *attr,
+ char *page);
+#define EVENT_VAR(_cat, _name) event_attr_##_cat##_##_name
+#define EVENT_PTR(_cat, _name) (&EVENT_VAR(_cat, _name).attr.attr)
+
+#define CPUMF_EVENT_ATTR(cat, name, id) \
+ PMU_EVENT_ATTR(name, EVENT_VAR(cat, name), id, cpumf_events_sysfs_show)
+#define CPUMF_EVENT_PTR(cat, name) EVENT_PTR(cat, name)
+
+
+/* Perf callbacks */
+struct pt_regs;
+extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
+extern unsigned long perf_misc_flags(struct pt_regs *regs);
+#define perf_misc_flags(regs) perf_misc_flags(regs)
+#define perf_arch_bpf_user_pt_regs(regs) &regs->user_regs
+
+/* Perf pt_regs extension for sample-data-entry indicators */
+struct perf_sf_sde_regs {
+ unsigned char in_guest:1; /* guest sample */
+ unsigned long reserved:63; /* reserved */
+};
+
+/* Perf PMU definitions for the counter facility */
+#define PERF_CPUM_CF_MAX_CTR 0xffffUL /* Max ctr for ECCTR */
+
+/* Perf PMU definitions for the sampling facility */
+#define PERF_CPUM_SF_MAX_CTR 2
+#define PERF_EVENT_CPUM_SF 0xB0000UL /* Event: Basic-sampling */
+#define PERF_EVENT_CPUM_SF_DIAG 0xBD000UL /* Event: Combined-sampling */
+#define PERF_CPUM_SF_BASIC_MODE 0x0001 /* Basic-sampling flag */
+#define PERF_CPUM_SF_DIAG_MODE 0x0002 /* Diagnostic-sampling flag */
+#define PERF_CPUM_SF_MODE_MASK (PERF_CPUM_SF_BASIC_MODE| \
+ PERF_CPUM_SF_DIAG_MODE)
+#define PERF_CPUM_SF_FULL_BLOCKS 0x0004 /* Process full SDBs only */
+
+#define REG_NONE 0
+#define REG_OVERFLOW 1
+#define OVERFLOW_REG(hwc) ((hwc)->extra_reg.config)
+#define SFB_ALLOC_REG(hwc) ((hwc)->extra_reg.alloc)
+#define TEAR_REG(hwc) ((hwc)->last_tag)
+#define SAMPL_RATE(hwc) ((hwc)->event_base)
+#define SAMPL_FLAGS(hwc) ((hwc)->config_base)
+#define SAMPL_DIAG_MODE(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_DIAG_MODE)
+#define SDB_FULL_BLOCKS(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FULL_BLOCKS)
+
+#endif /* _ASM_S390_PERF_EVENT_H */
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
new file mode 100644
index 000000000..67838df3f
--- /dev/null
+++ b/arch/s390/include/asm/pgalloc.h
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999, 2000
+ * Author(s): Hartmut Penner (hp@de.ibm.com)
+ * Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * Derived from "include/asm-i386/pgalloc.h"
+ * Copyright (C) 1994 Linus Torvalds
+ */
+
+#ifndef _S390_PGALLOC_H
+#define _S390_PGALLOC_H
+
+#include <linux/threads.h>
+#include <linux/string.h>
+#include <linux/gfp.h>
+#include <linux/mm.h>
+
+#define CRST_ALLOC_ORDER 2
+
+unsigned long *crst_table_alloc(struct mm_struct *);
+void crst_table_free(struct mm_struct *, unsigned long *);
+
+unsigned long *page_table_alloc(struct mm_struct *);
+struct page *page_table_alloc_pgste(struct mm_struct *mm);
+void page_table_free(struct mm_struct *, unsigned long *);
+void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long);
+void page_table_free_pgste(struct page *page);
+extern int page_table_allocate_pgste;
+
+static inline void crst_table_init(unsigned long *crst, unsigned long entry)
+{
+ memset64((u64 *)crst, entry, _CRST_ENTRIES);
+}
+
+static inline unsigned long pgd_entry_type(struct mm_struct *mm)
+{
+ if (mm_pmd_folded(mm))
+ return _SEGMENT_ENTRY_EMPTY;
+ if (mm_pud_folded(mm))
+ return _REGION3_ENTRY_EMPTY;
+ if (mm_p4d_folded(mm))
+ return _REGION2_ENTRY_EMPTY;
+ return _REGION1_ENTRY_EMPTY;
+}
+
+int crst_table_upgrade(struct mm_struct *mm, unsigned long limit);
+void crst_table_downgrade(struct mm_struct *);
+
+static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+ unsigned long *table = crst_table_alloc(mm);
+
+ if (table)
+ crst_table_init(table, _REGION2_ENTRY_EMPTY);
+ return (p4d_t *) table;
+}
+
+static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
+{
+ if (!mm_p4d_folded(mm))
+ crst_table_free(mm, (unsigned long *) p4d);
+}
+
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+ unsigned long *table = crst_table_alloc(mm);
+ if (table)
+ crst_table_init(table, _REGION3_ENTRY_EMPTY);
+ return (pud_t *) table;
+}
+
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+ if (!mm_pud_folded(mm))
+ crst_table_free(mm, (unsigned long *) pud);
+}
+
+static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
+{
+ unsigned long *table = crst_table_alloc(mm);
+
+ if (!table)
+ return NULL;
+ crst_table_init(table, _SEGMENT_ENTRY_EMPTY);
+ if (!pgtable_pmd_page_ctor(virt_to_page(table))) {
+ crst_table_free(mm, table);
+ return NULL;
+ }
+ return (pmd_t *) table;
+}
+
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+{
+ if (mm_pmd_folded(mm))
+ return;
+ pgtable_pmd_page_dtor(virt_to_page(pmd));
+ crst_table_free(mm, (unsigned long *) pmd);
+}
+
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d)
+{
+ pgd_val(*pgd) = _REGION1_ENTRY | __pa(p4d);
+}
+
+static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
+{
+ p4d_val(*p4d) = _REGION2_ENTRY | __pa(pud);
+}
+
+static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+{
+ pud_val(*pud) = _REGION3_ENTRY | __pa(pmd);
+}
+
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+ unsigned long *table = crst_table_alloc(mm);
+
+ if (!table)
+ return NULL;
+ if (mm->context.asce_limit == _REGION3_SIZE) {
+ /* Forking a compat process with 2 page table levels */
+ if (!pgtable_pmd_page_ctor(virt_to_page(table))) {
+ crst_table_free(mm, table);
+ return NULL;
+ }
+ }
+ return (pgd_t *) table;
+}
+
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+ if (mm->context.asce_limit == _REGION3_SIZE)
+ pgtable_pmd_page_dtor(virt_to_page(pgd));
+ crst_table_free(mm, (unsigned long *) pgd);
+}
+
+static inline void pmd_populate(struct mm_struct *mm,
+ pmd_t *pmd, pgtable_t pte)
+{
+ pmd_val(*pmd) = _SEGMENT_ENTRY + __pa(pte);
+}
+
+#define pmd_populate_kernel(mm, pmd, pte) pmd_populate(mm, pmd, pte)
+
+#define pmd_pgtable(pmd) \
+ (pgtable_t)(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE)
+
+/*
+ * page table entry allocation/free routines.
+ */
+#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
+#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
+
+#define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte)
+#define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte)
+
+extern void rcu_table_freelist_finish(void);
+
+void vmem_map_init(void);
+void *vmem_crst_alloc(unsigned long val);
+pte_t *vmem_pte_alloc(void);
+
+unsigned long base_asce_alloc(unsigned long addr, unsigned long num_pages);
+void base_asce_free(unsigned long asce);
+
+#endif /* _S390_PGALLOC_H */
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
new file mode 100644
index 000000000..0a326da15
--- /dev/null
+++ b/arch/s390/include/asm/pgtable.h
@@ -0,0 +1,1643 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999, 2000
+ * Author(s): Hartmut Penner (hp@de.ibm.com)
+ * Ulrich Weigand (weigand@de.ibm.com)
+ * Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * Derived from "include/asm-i386/pgtable.h"
+ */
+
+#ifndef _ASM_S390_PGTABLE_H
+#define _ASM_S390_PGTABLE_H
+
+#include <linux/sched.h>
+#include <linux/mm_types.h>
+#include <linux/page-flags.h>
+#include <linux/radix-tree.h>
+#include <linux/atomic.h>
+#include <asm/bug.h>
+#include <asm/page.h>
+
+extern pgd_t swapper_pg_dir[];
+extern void paging_init(void);
+
+enum {
+ PG_DIRECT_MAP_4K = 0,
+ PG_DIRECT_MAP_1M,
+ PG_DIRECT_MAP_2G,
+ PG_DIRECT_MAP_MAX
+};
+
+extern atomic_long_t direct_pages_count[PG_DIRECT_MAP_MAX];
+
+static inline void update_page_count(int level, long count)
+{
+ if (IS_ENABLED(CONFIG_PROC_FS))
+ atomic_long_add(count, &direct_pages_count[level]);
+}
+
+struct seq_file;
+void arch_report_meminfo(struct seq_file *m);
+
+/*
+ * The S390 doesn't have any external MMU info: the kernel page
+ * tables contain all the necessary information.
+ */
+#define update_mmu_cache(vma, address, ptep) do { } while (0)
+#define update_mmu_cache_pmd(vma, address, ptep) do { } while (0)
+
+/*
+ * ZERO_PAGE is a global shared page that is always zero; used
+ * for zero-mapped memory areas etc..
+ */
+
+extern unsigned long empty_zero_page;
+extern unsigned long zero_page_mask;
+
+#define ZERO_PAGE(vaddr) \
+ (virt_to_page((void *)(empty_zero_page + \
+ (((unsigned long)(vaddr)) &zero_page_mask))))
+#define __HAVE_COLOR_ZERO_PAGE
+
+/* TODO: s390 cannot support io_remap_pfn_range... */
+
+#define FIRST_USER_ADDRESS 0UL
+
+#define pte_ERROR(e) \
+ printk("%s:%d: bad pte %p.\n", __FILE__, __LINE__, (void *) pte_val(e))
+#define pmd_ERROR(e) \
+ printk("%s:%d: bad pmd %p.\n", __FILE__, __LINE__, (void *) pmd_val(e))
+#define pud_ERROR(e) \
+ printk("%s:%d: bad pud %p.\n", __FILE__, __LINE__, (void *) pud_val(e))
+#define p4d_ERROR(e) \
+ printk("%s:%d: bad p4d %p.\n", __FILE__, __LINE__, (void *) p4d_val(e))
+#define pgd_ERROR(e) \
+ printk("%s:%d: bad pgd %p.\n", __FILE__, __LINE__, (void *) pgd_val(e))
+
+/*
+ * The vmalloc and module area will always be on the topmost area of the
+ * kernel mapping. We reserve 128GB (64bit) for vmalloc and modules.
+ * On 64 bit kernels we have a 2GB area at the top of the vmalloc area where
+ * modules will reside. That makes sure that inter module branches always
+ * happen without trampolines and in addition the placement within a 2GB frame
+ * is branch prediction unit friendly.
+ */
+extern unsigned long VMALLOC_START;
+extern unsigned long VMALLOC_END;
+extern struct page *vmemmap;
+
+#define VMEM_MAX_PHYS ((unsigned long) vmemmap)
+
+extern unsigned long MODULES_VADDR;
+extern unsigned long MODULES_END;
+#define MODULES_VADDR MODULES_VADDR
+#define MODULES_END MODULES_END
+#define MODULES_LEN (1UL << 31)
+
+static inline int is_module_addr(void *addr)
+{
+ BUILD_BUG_ON(MODULES_LEN > (1UL << 31));
+ if (addr < (void *)MODULES_VADDR)
+ return 0;
+ if (addr > (void *)MODULES_END)
+ return 0;
+ return 1;
+}
+
+/*
+ * A 64 bit pagetable entry of S390 has following format:
+ * | PFRA |0IPC| OS |
+ * 0000000000111111111122222222223333333333444444444455555555556666
+ * 0123456789012345678901234567890123456789012345678901234567890123
+ *
+ * I Page-Invalid Bit: Page is not available for address-translation
+ * P Page-Protection Bit: Store access not possible for page
+ * C Change-bit override: HW is not required to set change bit
+ *
+ * A 64 bit segmenttable entry of S390 has following format:
+ * | P-table origin | TT
+ * 0000000000111111111122222222223333333333444444444455555555556666
+ * 0123456789012345678901234567890123456789012345678901234567890123
+ *
+ * I Segment-Invalid Bit: Segment is not available for address-translation
+ * C Common-Segment Bit: Segment is not private (PoP 3-30)
+ * P Page-Protection Bit: Store access not possible for page
+ * TT Type 00
+ *
+ * A 64 bit region table entry of S390 has following format:
+ * | S-table origin | TF TTTL
+ * 0000000000111111111122222222223333333333444444444455555555556666
+ * 0123456789012345678901234567890123456789012345678901234567890123
+ *
+ * I Segment-Invalid Bit: Segment is not available for address-translation
+ * TT Type 01
+ * TF
+ * TL Table length
+ *
+ * The 64 bit regiontable origin of S390 has following format:
+ * | region table origon | DTTL
+ * 0000000000111111111122222222223333333333444444444455555555556666
+ * 0123456789012345678901234567890123456789012345678901234567890123
+ *
+ * X Space-Switch event:
+ * G Segment-Invalid Bit:
+ * P Private-Space Bit:
+ * S Storage-Alteration:
+ * R Real space
+ * TL Table-Length:
+ *
+ * A storage key has the following format:
+ * | ACC |F|R|C|0|
+ * 0 3 4 5 6 7
+ * ACC: access key
+ * F : fetch protection bit
+ * R : referenced bit
+ * C : changed bit
+ */
+
+/* Hardware bits in the page table entry */
+#define _PAGE_NOEXEC 0x100 /* HW no-execute bit */
+#define _PAGE_PROTECT 0x200 /* HW read-only bit */
+#define _PAGE_INVALID 0x400 /* HW invalid bit */
+#define _PAGE_LARGE 0x800 /* Bit to mark a large pte */
+
+/* Software bits in the page table entry */
+#define _PAGE_PRESENT 0x001 /* SW pte present bit */
+#define _PAGE_YOUNG 0x004 /* SW pte young bit */
+#define _PAGE_DIRTY 0x008 /* SW pte dirty bit */
+#define _PAGE_READ 0x010 /* SW pte read bit */
+#define _PAGE_WRITE 0x020 /* SW pte write bit */
+#define _PAGE_SPECIAL 0x040 /* SW associated with special page */
+#define _PAGE_UNUSED 0x080 /* SW bit for pgste usage state */
+
+#ifdef CONFIG_MEM_SOFT_DIRTY
+#define _PAGE_SOFT_DIRTY 0x002 /* SW pte soft dirty bit */
+#else
+#define _PAGE_SOFT_DIRTY 0x000
+#endif
+
+/* Set of bits not changed in pte_modify */
+#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL | _PAGE_DIRTY | \
+ _PAGE_YOUNG | _PAGE_SOFT_DIRTY)
+
+/*
+ * handle_pte_fault uses pte_present and pte_none to find out the pte type
+ * WITHOUT holding the page table lock. The _PAGE_PRESENT bit is used to
+ * distinguish present from not-present ptes. It is changed only with the page
+ * table lock held.
+ *
+ * The following table gives the different possible bit combinations for
+ * the pte hardware and software bits in the last 12 bits of a pte
+ * (. unassigned bit, x don't care, t swap type):
+ *
+ * 842100000000
+ * 000084210000
+ * 000000008421
+ * .IR.uswrdy.p
+ * empty .10.00000000
+ * swap .11..ttttt.0
+ * prot-none, clean, old .11.xx0000.1
+ * prot-none, clean, young .11.xx0001.1
+ * prot-none, dirty, old .11.xx0010.1
+ * prot-none, dirty, young .11.xx0011.1
+ * read-only, clean, old .11.xx0100.1
+ * read-only, clean, young .01.xx0101.1
+ * read-only, dirty, old .11.xx0110.1
+ * read-only, dirty, young .01.xx0111.1
+ * read-write, clean, old .11.xx1100.1
+ * read-write, clean, young .01.xx1101.1
+ * read-write, dirty, old .10.xx1110.1
+ * read-write, dirty, young .00.xx1111.1
+ * HW-bits: R read-only, I invalid
+ * SW-bits: p present, y young, d dirty, r read, w write, s special,
+ * u unused, l large
+ *
+ * pte_none is true for the bit pattern .10.00000000, pte == 0x400
+ * pte_swap is true for the bit pattern .11..ooooo.0, (pte & 0x201) == 0x200
+ * pte_present is true for the bit pattern .xx.xxxxxx.1, (pte & 0x001) == 0x001
+ */
+
+/* Bits in the segment/region table address-space-control-element */
+#define _ASCE_ORIGIN ~0xfffUL/* region/segment table origin */
+#define _ASCE_PRIVATE_SPACE 0x100 /* private space control */
+#define _ASCE_ALT_EVENT 0x80 /* storage alteration event control */
+#define _ASCE_SPACE_SWITCH 0x40 /* space switch event */
+#define _ASCE_REAL_SPACE 0x20 /* real space control */
+#define _ASCE_TYPE_MASK 0x0c /* asce table type mask */
+#define _ASCE_TYPE_REGION1 0x0c /* region first table type */
+#define _ASCE_TYPE_REGION2 0x08 /* region second table type */
+#define _ASCE_TYPE_REGION3 0x04 /* region third table type */
+#define _ASCE_TYPE_SEGMENT 0x00 /* segment table type */
+#define _ASCE_TABLE_LENGTH 0x03 /* region table length */
+
+/* Bits in the region table entry */
+#define _REGION_ENTRY_ORIGIN ~0xfffUL/* region/segment table origin */
+#define _REGION_ENTRY_PROTECT 0x200 /* region protection bit */
+#define _REGION_ENTRY_NOEXEC 0x100 /* region no-execute bit */
+#define _REGION_ENTRY_OFFSET 0xc0 /* region table offset */
+#define _REGION_ENTRY_INVALID 0x20 /* invalid region table entry */
+#define _REGION_ENTRY_TYPE_MASK 0x0c /* region/segment table type mask */
+#define _REGION_ENTRY_TYPE_R1 0x0c /* region first table type */
+#define _REGION_ENTRY_TYPE_R2 0x08 /* region second table type */
+#define _REGION_ENTRY_TYPE_R3 0x04 /* region third table type */
+#define _REGION_ENTRY_LENGTH 0x03 /* region third length */
+
+#define _REGION1_ENTRY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_LENGTH)
+#define _REGION1_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID)
+#define _REGION2_ENTRY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_LENGTH)
+#define _REGION2_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID)
+#define _REGION3_ENTRY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH)
+#define _REGION3_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID)
+
+#define _REGION3_ENTRY_ORIGIN_LARGE ~0x7fffffffUL /* large page address */
+#define _REGION3_ENTRY_DIRTY 0x2000 /* SW region dirty bit */
+#define _REGION3_ENTRY_YOUNG 0x1000 /* SW region young bit */
+#define _REGION3_ENTRY_LARGE 0x0400 /* RTTE-format control, large page */
+#define _REGION3_ENTRY_READ 0x0002 /* SW region read bit */
+#define _REGION3_ENTRY_WRITE 0x0001 /* SW region write bit */
+
+#ifdef CONFIG_MEM_SOFT_DIRTY
+#define _REGION3_ENTRY_SOFT_DIRTY 0x4000 /* SW region soft dirty bit */
+#else
+#define _REGION3_ENTRY_SOFT_DIRTY 0x0000 /* SW region soft dirty bit */
+#endif
+
+#define _REGION_ENTRY_BITS 0xfffffffffffff22fUL
+#define _REGION_ENTRY_BITS_LARGE 0xffffffff8000fe2fUL
+
+/* Bits in the segment table entry */
+#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL
+#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff0ff33UL
+#define _SEGMENT_ENTRY_HARDWARE_BITS 0xfffffffffffffe30UL
+#define _SEGMENT_ENTRY_HARDWARE_BITS_LARGE 0xfffffffffff00730UL
+#define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */
+#define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* page table origin */
+#define _SEGMENT_ENTRY_PROTECT 0x200 /* segment protection bit */
+#define _SEGMENT_ENTRY_NOEXEC 0x100 /* segment no-execute bit */
+#define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */
+
+#define _SEGMENT_ENTRY (0)
+#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID)
+
+#define _SEGMENT_ENTRY_DIRTY 0x2000 /* SW segment dirty bit */
+#define _SEGMENT_ENTRY_YOUNG 0x1000 /* SW segment young bit */
+#define _SEGMENT_ENTRY_LARGE 0x0400 /* STE-format control, large page */
+#define _SEGMENT_ENTRY_WRITE 0x0002 /* SW segment write bit */
+#define _SEGMENT_ENTRY_READ 0x0001 /* SW segment read bit */
+
+#ifdef CONFIG_MEM_SOFT_DIRTY
+#define _SEGMENT_ENTRY_SOFT_DIRTY 0x4000 /* SW segment soft dirty bit */
+#else
+#define _SEGMENT_ENTRY_SOFT_DIRTY 0x0000 /* SW segment soft dirty bit */
+#endif
+
+#define _CRST_ENTRIES 2048 /* number of region/segment table entries */
+#define _PAGE_ENTRIES 256 /* number of page table entries */
+
+#define _CRST_TABLE_SIZE (_CRST_ENTRIES * 8)
+#define _PAGE_TABLE_SIZE (_PAGE_ENTRIES * 8)
+
+#define _REGION1_SHIFT 53
+#define _REGION2_SHIFT 42
+#define _REGION3_SHIFT 31
+#define _SEGMENT_SHIFT 20
+
+#define _REGION1_INDEX (0x7ffUL << _REGION1_SHIFT)
+#define _REGION2_INDEX (0x7ffUL << _REGION2_SHIFT)
+#define _REGION3_INDEX (0x7ffUL << _REGION3_SHIFT)
+#define _SEGMENT_INDEX (0x7ffUL << _SEGMENT_SHIFT)
+#define _PAGE_INDEX (0xffUL << _PAGE_SHIFT)
+
+#define _REGION1_SIZE (1UL << _REGION1_SHIFT)
+#define _REGION2_SIZE (1UL << _REGION2_SHIFT)
+#define _REGION3_SIZE (1UL << _REGION3_SHIFT)
+#define _SEGMENT_SIZE (1UL << _SEGMENT_SHIFT)
+
+#define _REGION1_MASK (~(_REGION1_SIZE - 1))
+#define _REGION2_MASK (~(_REGION2_SIZE - 1))
+#define _REGION3_MASK (~(_REGION3_SIZE - 1))
+#define _SEGMENT_MASK (~(_SEGMENT_SIZE - 1))
+
+#define PMD_SHIFT _SEGMENT_SHIFT
+#define PUD_SHIFT _REGION3_SHIFT
+#define P4D_SHIFT _REGION2_SHIFT
+#define PGDIR_SHIFT _REGION1_SHIFT
+
+#define PMD_SIZE _SEGMENT_SIZE
+#define PUD_SIZE _REGION3_SIZE
+#define P4D_SIZE _REGION2_SIZE
+#define PGDIR_SIZE _REGION1_SIZE
+
+#define PMD_MASK _SEGMENT_MASK
+#define PUD_MASK _REGION3_MASK
+#define P4D_MASK _REGION2_MASK
+#define PGDIR_MASK _REGION1_MASK
+
+#define PTRS_PER_PTE _PAGE_ENTRIES
+#define PTRS_PER_PMD _CRST_ENTRIES
+#define PTRS_PER_PUD _CRST_ENTRIES
+#define PTRS_PER_P4D _CRST_ENTRIES
+#define PTRS_PER_PGD _CRST_ENTRIES
+
+/*
+ * Segment table and region3 table entry encoding
+ * (R = read-only, I = invalid, y = young bit):
+ * dy..R...I...wr
+ * prot-none, clean, old 00..1...1...00
+ * prot-none, clean, young 01..1...1...00
+ * prot-none, dirty, old 10..1...1...00
+ * prot-none, dirty, young 11..1...1...00
+ * read-only, clean, old 00..1...1...01
+ * read-only, clean, young 01..1...0...01
+ * read-only, dirty, old 10..1...1...01
+ * read-only, dirty, young 11..1...0...01
+ * read-write, clean, old 00..1...1...11
+ * read-write, clean, young 01..1...0...11
+ * read-write, dirty, old 10..0...1...11
+ * read-write, dirty, young 11..0...0...11
+ * The segment table origin is used to distinguish empty (origin==0) from
+ * read-write, old segment table entries (origin!=0)
+ * HW-bits: R read-only, I invalid
+ * SW-bits: y young, d dirty, r read, w write
+ */
+
+/* Page status table bits for virtualization */
+#define PGSTE_ACC_BITS 0xf000000000000000UL
+#define PGSTE_FP_BIT 0x0800000000000000UL
+#define PGSTE_PCL_BIT 0x0080000000000000UL
+#define PGSTE_HR_BIT 0x0040000000000000UL
+#define PGSTE_HC_BIT 0x0020000000000000UL
+#define PGSTE_GR_BIT 0x0004000000000000UL
+#define PGSTE_GC_BIT 0x0002000000000000UL
+#define PGSTE_UC_BIT 0x0000800000000000UL /* user dirty (migration) */
+#define PGSTE_IN_BIT 0x0000400000000000UL /* IPTE notify bit */
+#define PGSTE_VSIE_BIT 0x0000200000000000UL /* ref'd in a shadow table */
+
+/* Guest Page State used for virtualization */
+#define _PGSTE_GPS_ZERO 0x0000000080000000UL
+#define _PGSTE_GPS_NODAT 0x0000000040000000UL
+#define _PGSTE_GPS_USAGE_MASK 0x0000000003000000UL
+#define _PGSTE_GPS_USAGE_STABLE 0x0000000000000000UL
+#define _PGSTE_GPS_USAGE_UNUSED 0x0000000001000000UL
+#define _PGSTE_GPS_USAGE_POT_VOLATILE 0x0000000002000000UL
+#define _PGSTE_GPS_USAGE_VOLATILE _PGSTE_GPS_USAGE_MASK
+
+/*
+ * A user page table pointer has the space-switch-event bit, the
+ * private-space-control bit and the storage-alteration-event-control
+ * bit set. A kernel page table pointer doesn't need them.
+ */
+#define _ASCE_USER_BITS (_ASCE_SPACE_SWITCH | _ASCE_PRIVATE_SPACE | \
+ _ASCE_ALT_EVENT)
+
+/*
+ * Page protection definitions.
+ */
+#define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_INVALID | _PAGE_PROTECT)
+#define PAGE_RO __pgprot(_PAGE_PRESENT | _PAGE_READ | \
+ _PAGE_NOEXEC | _PAGE_INVALID | _PAGE_PROTECT)
+#define PAGE_RX __pgprot(_PAGE_PRESENT | _PAGE_READ | \
+ _PAGE_INVALID | _PAGE_PROTECT)
+#define PAGE_RW __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+ _PAGE_NOEXEC | _PAGE_INVALID | _PAGE_PROTECT)
+#define PAGE_RWX __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+ _PAGE_INVALID | _PAGE_PROTECT)
+
+#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+ _PAGE_YOUNG | _PAGE_DIRTY | _PAGE_NOEXEC)
+#define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+ _PAGE_YOUNG | _PAGE_DIRTY | _PAGE_NOEXEC)
+#define PAGE_KERNEL_RO __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_YOUNG | \
+ _PAGE_PROTECT | _PAGE_NOEXEC)
+#define PAGE_KERNEL_EXEC __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+ _PAGE_YOUNG | _PAGE_DIRTY)
+
+/*
+ * On s390 the page table entry has an invalid bit and a read-only bit.
+ * Read permission implies execute permission and write permission
+ * implies read permission.
+ */
+ /*xwr*/
+#define __P000 PAGE_NONE
+#define __P001 PAGE_RO
+#define __P010 PAGE_RO
+#define __P011 PAGE_RO
+#define __P100 PAGE_RX
+#define __P101 PAGE_RX
+#define __P110 PAGE_RX
+#define __P111 PAGE_RX
+
+#define __S000 PAGE_NONE
+#define __S001 PAGE_RO
+#define __S010 PAGE_RW
+#define __S011 PAGE_RW
+#define __S100 PAGE_RX
+#define __S101 PAGE_RX
+#define __S110 PAGE_RWX
+#define __S111 PAGE_RWX
+
+/*
+ * Segment entry (large page) protection definitions.
+ */
+#define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_INVALID | \
+ _SEGMENT_ENTRY_PROTECT)
+#define SEGMENT_RO __pgprot(_SEGMENT_ENTRY_PROTECT | \
+ _SEGMENT_ENTRY_READ | \
+ _SEGMENT_ENTRY_NOEXEC)
+#define SEGMENT_RX __pgprot(_SEGMENT_ENTRY_PROTECT | \
+ _SEGMENT_ENTRY_READ)
+#define SEGMENT_RW __pgprot(_SEGMENT_ENTRY_READ | \
+ _SEGMENT_ENTRY_WRITE | \
+ _SEGMENT_ENTRY_NOEXEC)
+#define SEGMENT_RWX __pgprot(_SEGMENT_ENTRY_READ | \
+ _SEGMENT_ENTRY_WRITE)
+#define SEGMENT_KERNEL __pgprot(_SEGMENT_ENTRY | \
+ _SEGMENT_ENTRY_LARGE | \
+ _SEGMENT_ENTRY_READ | \
+ _SEGMENT_ENTRY_WRITE | \
+ _SEGMENT_ENTRY_YOUNG | \
+ _SEGMENT_ENTRY_DIRTY | \
+ _SEGMENT_ENTRY_NOEXEC)
+#define SEGMENT_KERNEL_RO __pgprot(_SEGMENT_ENTRY | \
+ _SEGMENT_ENTRY_LARGE | \
+ _SEGMENT_ENTRY_READ | \
+ _SEGMENT_ENTRY_YOUNG | \
+ _SEGMENT_ENTRY_PROTECT | \
+ _SEGMENT_ENTRY_NOEXEC)
+
+/*
+ * Region3 entry (large page) protection definitions.
+ */
+
+#define REGION3_KERNEL __pgprot(_REGION_ENTRY_TYPE_R3 | \
+ _REGION3_ENTRY_LARGE | \
+ _REGION3_ENTRY_READ | \
+ _REGION3_ENTRY_WRITE | \
+ _REGION3_ENTRY_YOUNG | \
+ _REGION3_ENTRY_DIRTY | \
+ _REGION_ENTRY_NOEXEC)
+#define REGION3_KERNEL_RO __pgprot(_REGION_ENTRY_TYPE_R3 | \
+ _REGION3_ENTRY_LARGE | \
+ _REGION3_ENTRY_READ | \
+ _REGION3_ENTRY_YOUNG | \
+ _REGION_ENTRY_PROTECT | \
+ _REGION_ENTRY_NOEXEC)
+
+static inline bool mm_p4d_folded(struct mm_struct *mm)
+{
+ return mm->context.asce_limit <= _REGION1_SIZE;
+}
+#define mm_p4d_folded(mm) mm_p4d_folded(mm)
+
+static inline bool mm_pud_folded(struct mm_struct *mm)
+{
+ return mm->context.asce_limit <= _REGION2_SIZE;
+}
+#define mm_pud_folded(mm) mm_pud_folded(mm)
+
+static inline bool mm_pmd_folded(struct mm_struct *mm)
+{
+ return mm->context.asce_limit <= _REGION3_SIZE;
+}
+#define mm_pmd_folded(mm) mm_pmd_folded(mm)
+
+static inline int mm_has_pgste(struct mm_struct *mm)
+{
+#ifdef CONFIG_PGSTE
+ if (unlikely(mm->context.has_pgste))
+ return 1;
+#endif
+ return 0;
+}
+
+static inline int mm_alloc_pgste(struct mm_struct *mm)
+{
+#ifdef CONFIG_PGSTE
+ if (unlikely(mm->context.alloc_pgste))
+ return 1;
+#endif
+ return 0;
+}
+
+/*
+ * In the case that a guest uses storage keys
+ * faults should no longer be backed by zero pages
+ */
+#define mm_forbids_zeropage mm_has_pgste
+static inline int mm_uses_skeys(struct mm_struct *mm)
+{
+#ifdef CONFIG_PGSTE
+ if (mm->context.uses_skeys)
+ return 1;
+#endif
+ return 0;
+}
+
+static inline void csp(unsigned int *ptr, unsigned int old, unsigned int new)
+{
+ register unsigned long reg2 asm("2") = old;
+ register unsigned long reg3 asm("3") = new;
+ unsigned long address = (unsigned long)ptr | 1;
+
+ asm volatile(
+ " csp %0,%3"
+ : "+d" (reg2), "+m" (*ptr)
+ : "d" (reg3), "d" (address)
+ : "cc");
+}
+
+static inline void cspg(unsigned long *ptr, unsigned long old, unsigned long new)
+{
+ register unsigned long reg2 asm("2") = old;
+ register unsigned long reg3 asm("3") = new;
+ unsigned long address = (unsigned long)ptr | 1;
+
+ asm volatile(
+ " .insn rre,0xb98a0000,%0,%3"
+ : "+d" (reg2), "+m" (*ptr)
+ : "d" (reg3), "d" (address)
+ : "cc");
+}
+
+#define CRDTE_DTT_PAGE 0x00UL
+#define CRDTE_DTT_SEGMENT 0x10UL
+#define CRDTE_DTT_REGION3 0x14UL
+#define CRDTE_DTT_REGION2 0x18UL
+#define CRDTE_DTT_REGION1 0x1cUL
+
+static inline void crdte(unsigned long old, unsigned long new,
+ unsigned long table, unsigned long dtt,
+ unsigned long address, unsigned long asce)
+{
+ register unsigned long reg2 asm("2") = old;
+ register unsigned long reg3 asm("3") = new;
+ register unsigned long reg4 asm("4") = table | dtt;
+ register unsigned long reg5 asm("5") = address;
+
+ asm volatile(".insn rrf,0xb98f0000,%0,%2,%4,0"
+ : "+d" (reg2)
+ : "d" (reg3), "d" (reg4), "d" (reg5), "a" (asce)
+ : "memory", "cc");
+}
+
+/*
+ * pgd/p4d/pud/pmd/pte query functions
+ */
+static inline int pgd_folded(pgd_t pgd)
+{
+ return (pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R1;
+}
+
+static inline int pgd_present(pgd_t pgd)
+{
+ if (pgd_folded(pgd))
+ return 1;
+ return (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) != 0UL;
+}
+
+static inline int pgd_none(pgd_t pgd)
+{
+ if (pgd_folded(pgd))
+ return 0;
+ return (pgd_val(pgd) & _REGION_ENTRY_INVALID) != 0UL;
+}
+
+static inline int pgd_bad(pgd_t pgd)
+{
+ /*
+ * With dynamic page table levels the pgd can be a region table
+ * entry or a segment table entry. Check for the bit that are
+ * invalid for either table entry.
+ */
+ unsigned long mask =
+ ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INVALID &
+ ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH;
+ return (pgd_val(pgd) & mask) != 0;
+}
+
+static inline int p4d_folded(p4d_t p4d)
+{
+ return (p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2;
+}
+
+static inline int p4d_present(p4d_t p4d)
+{
+ if (p4d_folded(p4d))
+ return 1;
+ return (p4d_val(p4d) & _REGION_ENTRY_ORIGIN) != 0UL;
+}
+
+static inline int p4d_none(p4d_t p4d)
+{
+ if (p4d_folded(p4d))
+ return 0;
+ return p4d_val(p4d) == _REGION2_ENTRY_EMPTY;
+}
+
+static inline unsigned long p4d_pfn(p4d_t p4d)
+{
+ unsigned long origin_mask;
+
+ origin_mask = _REGION_ENTRY_ORIGIN;
+ return (p4d_val(p4d) & origin_mask) >> PAGE_SHIFT;
+}
+
+static inline int pud_folded(pud_t pud)
+{
+ return (pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3;
+}
+
+static inline int pud_present(pud_t pud)
+{
+ if (pud_folded(pud))
+ return 1;
+ return (pud_val(pud) & _REGION_ENTRY_ORIGIN) != 0UL;
+}
+
+static inline int pud_none(pud_t pud)
+{
+ if (pud_folded(pud))
+ return 0;
+ return pud_val(pud) == _REGION3_ENTRY_EMPTY;
+}
+
+static inline int pud_large(pud_t pud)
+{
+ if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) != _REGION_ENTRY_TYPE_R3)
+ return 0;
+ return !!(pud_val(pud) & _REGION3_ENTRY_LARGE);
+}
+
+static inline unsigned long pud_pfn(pud_t pud)
+{
+ unsigned long origin_mask;
+
+ origin_mask = _REGION_ENTRY_ORIGIN;
+ if (pud_large(pud))
+ origin_mask = _REGION3_ENTRY_ORIGIN_LARGE;
+ return (pud_val(pud) & origin_mask) >> PAGE_SHIFT;
+}
+
+static inline int pmd_large(pmd_t pmd)
+{
+ return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0;
+}
+
+static inline int pmd_bad(pmd_t pmd)
+{
+ if (pmd_large(pmd))
+ return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0;
+ return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0;
+}
+
+static inline int pud_bad(pud_t pud)
+{
+ if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3)
+ return pmd_bad(__pmd(pud_val(pud)));
+ if (pud_large(pud))
+ return (pud_val(pud) & ~_REGION_ENTRY_BITS_LARGE) != 0;
+ return (pud_val(pud) & ~_REGION_ENTRY_BITS) != 0;
+}
+
+static inline int p4d_bad(p4d_t p4d)
+{
+ if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
+ return pud_bad(__pud(p4d_val(p4d)));
+ return (p4d_val(p4d) & ~_REGION_ENTRY_BITS) != 0;
+}
+
+static inline int pmd_present(pmd_t pmd)
+{
+ return pmd_val(pmd) != _SEGMENT_ENTRY_EMPTY;
+}
+
+static inline int pmd_none(pmd_t pmd)
+{
+ return pmd_val(pmd) == _SEGMENT_ENTRY_EMPTY;
+}
+
+static inline unsigned long pmd_pfn(pmd_t pmd)
+{
+ unsigned long origin_mask;
+
+ origin_mask = _SEGMENT_ENTRY_ORIGIN;
+ if (pmd_large(pmd))
+ origin_mask = _SEGMENT_ENTRY_ORIGIN_LARGE;
+ return (pmd_val(pmd) & origin_mask) >> PAGE_SHIFT;
+}
+
+#define pmd_write pmd_write
+static inline int pmd_write(pmd_t pmd)
+{
+ return (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) != 0;
+}
+
+static inline int pmd_dirty(pmd_t pmd)
+{
+ int dirty = 1;
+ if (pmd_large(pmd))
+ dirty = (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0;
+ return dirty;
+}
+
+static inline int pmd_young(pmd_t pmd)
+{
+ int young = 1;
+ if (pmd_large(pmd))
+ young = (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0;
+ return young;
+}
+
+static inline int pte_present(pte_t pte)
+{
+ /* Bit pattern: (pte & 0x001) == 0x001 */
+ return (pte_val(pte) & _PAGE_PRESENT) != 0;
+}
+
+static inline int pte_none(pte_t pte)
+{
+ /* Bit pattern: pte == 0x400 */
+ return pte_val(pte) == _PAGE_INVALID;
+}
+
+static inline int pte_swap(pte_t pte)
+{
+ /* Bit pattern: (pte & 0x201) == 0x200 */
+ return (pte_val(pte) & (_PAGE_PROTECT | _PAGE_PRESENT))
+ == _PAGE_PROTECT;
+}
+
+static inline int pte_special(pte_t pte)
+{
+ return (pte_val(pte) & _PAGE_SPECIAL);
+}
+
+#define __HAVE_ARCH_PTE_SAME
+static inline int pte_same(pte_t a, pte_t b)
+{
+ return pte_val(a) == pte_val(b);
+}
+
+#ifdef CONFIG_NUMA_BALANCING
+static inline int pte_protnone(pte_t pte)
+{
+ return pte_present(pte) && !(pte_val(pte) & _PAGE_READ);
+}
+
+static inline int pmd_protnone(pmd_t pmd)
+{
+ /* pmd_large(pmd) implies pmd_present(pmd) */
+ return pmd_large(pmd) && !(pmd_val(pmd) & _SEGMENT_ENTRY_READ);
+}
+#endif
+
+static inline int pte_soft_dirty(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_SOFT_DIRTY;
+}
+#define pte_swp_soft_dirty pte_soft_dirty
+
+static inline pte_t pte_mksoft_dirty(pte_t pte)
+{
+ pte_val(pte) |= _PAGE_SOFT_DIRTY;
+ return pte;
+}
+#define pte_swp_mksoft_dirty pte_mksoft_dirty
+
+static inline pte_t pte_clear_soft_dirty(pte_t pte)
+{
+ pte_val(pte) &= ~_PAGE_SOFT_DIRTY;
+ return pte;
+}
+#define pte_swp_clear_soft_dirty pte_clear_soft_dirty
+
+static inline int pmd_soft_dirty(pmd_t pmd)
+{
+ return pmd_val(pmd) & _SEGMENT_ENTRY_SOFT_DIRTY;
+}
+
+static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
+{
+ pmd_val(pmd) |= _SEGMENT_ENTRY_SOFT_DIRTY;
+ return pmd;
+}
+
+static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd)
+{
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_SOFT_DIRTY;
+ return pmd;
+}
+
+/*
+ * query functions pte_write/pte_dirty/pte_young only work if
+ * pte_present() is true. Undefined behaviour if not..
+ */
+static inline int pte_write(pte_t pte)
+{
+ return (pte_val(pte) & _PAGE_WRITE) != 0;
+}
+
+static inline int pte_dirty(pte_t pte)
+{
+ return (pte_val(pte) & _PAGE_DIRTY) != 0;
+}
+
+static inline int pte_young(pte_t pte)
+{
+ return (pte_val(pte) & _PAGE_YOUNG) != 0;
+}
+
+#define __HAVE_ARCH_PTE_UNUSED
+static inline int pte_unused(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_UNUSED;
+}
+
+/*
+ * pgd/pmd/pte modification functions
+ */
+
+static inline void pgd_clear(pgd_t *pgd)
+{
+ if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1)
+ pgd_val(*pgd) = _REGION1_ENTRY_EMPTY;
+}
+
+static inline void p4d_clear(p4d_t *p4d)
+{
+ if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
+ p4d_val(*p4d) = _REGION2_ENTRY_EMPTY;
+}
+
+static inline void pud_clear(pud_t *pud)
+{
+ if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
+ pud_val(*pud) = _REGION3_ENTRY_EMPTY;
+}
+
+static inline void pmd_clear(pmd_t *pmdp)
+{
+ pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY;
+}
+
+static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+ pte_val(*ptep) = _PAGE_INVALID;
+}
+
+/*
+ * The following pte modification functions only work if
+ * pte_present() is true. Undefined behaviour if not..
+ */
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+ pte_val(pte) &= _PAGE_CHG_MASK;
+ pte_val(pte) |= pgprot_val(newprot);
+ /*
+ * newprot for PAGE_NONE, PAGE_RO, PAGE_RX, PAGE_RW and PAGE_RWX
+ * has the invalid bit set, clear it again for readable, young pages
+ */
+ if ((pte_val(pte) & _PAGE_YOUNG) && (pte_val(pte) & _PAGE_READ))
+ pte_val(pte) &= ~_PAGE_INVALID;
+ /*
+ * newprot for PAGE_RO, PAGE_RX, PAGE_RW and PAGE_RWX has the page
+ * protection bit set, clear it again for writable, dirty pages
+ */
+ if ((pte_val(pte) & _PAGE_DIRTY) && (pte_val(pte) & _PAGE_WRITE))
+ pte_val(pte) &= ~_PAGE_PROTECT;
+ return pte;
+}
+
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+ pte_val(pte) &= ~_PAGE_WRITE;
+ pte_val(pte) |= _PAGE_PROTECT;
+ return pte;
+}
+
+static inline pte_t pte_mkwrite(pte_t pte)
+{
+ pte_val(pte) |= _PAGE_WRITE;
+ if (pte_val(pte) & _PAGE_DIRTY)
+ pte_val(pte) &= ~_PAGE_PROTECT;
+ return pte;
+}
+
+static inline pte_t pte_mkclean(pte_t pte)
+{
+ pte_val(pte) &= ~_PAGE_DIRTY;
+ pte_val(pte) |= _PAGE_PROTECT;
+ return pte;
+}
+
+static inline pte_t pte_mkdirty(pte_t pte)
+{
+ pte_val(pte) |= _PAGE_DIRTY | _PAGE_SOFT_DIRTY;
+ if (pte_val(pte) & _PAGE_WRITE)
+ pte_val(pte) &= ~_PAGE_PROTECT;
+ return pte;
+}
+
+static inline pte_t pte_mkold(pte_t pte)
+{
+ pte_val(pte) &= ~_PAGE_YOUNG;
+ pte_val(pte) |= _PAGE_INVALID;
+ return pte;
+}
+
+static inline pte_t pte_mkyoung(pte_t pte)
+{
+ pte_val(pte) |= _PAGE_YOUNG;
+ if (pte_val(pte) & _PAGE_READ)
+ pte_val(pte) &= ~_PAGE_INVALID;
+ return pte;
+}
+
+static inline pte_t pte_mkspecial(pte_t pte)
+{
+ pte_val(pte) |= _PAGE_SPECIAL;
+ return pte;
+}
+
+#ifdef CONFIG_HUGETLB_PAGE
+static inline pte_t pte_mkhuge(pte_t pte)
+{
+ pte_val(pte) |= _PAGE_LARGE;
+ return pte;
+}
+#endif
+
+#define IPTE_GLOBAL 0
+#define IPTE_LOCAL 1
+
+#define IPTE_NODAT 0x400
+#define IPTE_GUEST_ASCE 0x800
+
+static inline void __ptep_ipte(unsigned long address, pte_t *ptep,
+ unsigned long opt, unsigned long asce,
+ int local)
+{
+ unsigned long pto = (unsigned long) ptep;
+
+ if (__builtin_constant_p(opt) && opt == 0) {
+ /* Invalidation + TLB flush for the pte */
+ asm volatile(
+ " .insn rrf,0xb2210000,%[r1],%[r2],0,%[m4]"
+ : "+m" (*ptep) : [r1] "a" (pto), [r2] "a" (address),
+ [m4] "i" (local));
+ return;
+ }
+
+ /* Invalidate ptes with options + TLB flush of the ptes */
+ opt = opt | (asce & _ASCE_ORIGIN);
+ asm volatile(
+ " .insn rrf,0xb2210000,%[r1],%[r2],%[r3],%[m4]"
+ : [r2] "+a" (address), [r3] "+a" (opt)
+ : [r1] "a" (pto), [m4] "i" (local) : "memory");
+}
+
+static inline void __ptep_ipte_range(unsigned long address, int nr,
+ pte_t *ptep, int local)
+{
+ unsigned long pto = (unsigned long) ptep;
+
+ /* Invalidate a range of ptes + TLB flush of the ptes */
+ do {
+ asm volatile(
+ " .insn rrf,0xb2210000,%[r1],%[r2],%[r3],%[m4]"
+ : [r2] "+a" (address), [r3] "+a" (nr)
+ : [r1] "a" (pto), [m4] "i" (local) : "memory");
+ } while (nr != 255);
+}
+
+/*
+ * This is hard to understand. ptep_get_and_clear and ptep_clear_flush
+ * both clear the TLB for the unmapped pte. The reason is that
+ * ptep_get_and_clear is used in common code (e.g. change_pte_range)
+ * to modify an active pte. The sequence is
+ * 1) ptep_get_and_clear
+ * 2) set_pte_at
+ * 3) flush_tlb_range
+ * On s390 the tlb needs to get flushed with the modification of the pte
+ * if the pte is active. The only way how this can be implemented is to
+ * have ptep_get_and_clear do the tlb flush. In exchange flush_tlb_range
+ * is a nop.
+ */
+pte_t ptep_xchg_direct(struct mm_struct *, unsigned long, pte_t *, pte_t);
+pte_t ptep_xchg_lazy(struct mm_struct *, unsigned long, pte_t *, pte_t);
+
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep)
+{
+ pte_t pte = *ptep;
+
+ pte = ptep_xchg_direct(vma->vm_mm, addr, ptep, pte_mkold(pte));
+ return pte_young(pte);
+}
+
+#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
+static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep)
+{
+ return ptep_test_and_clear_young(vma, address, ptep);
+}
+
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ return ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
+}
+
+#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
+pte_t ptep_modify_prot_start(struct mm_struct *, unsigned long, pte_t *);
+void ptep_modify_prot_commit(struct mm_struct *, unsigned long, pte_t *, pte_t);
+
+#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
+static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep)
+{
+ return ptep_xchg_direct(vma->vm_mm, addr, ptep, __pte(_PAGE_INVALID));
+}
+
+/*
+ * The batched pte unmap code uses ptep_get_and_clear_full to clear the
+ * ptes. Here an optimization is possible. tlb_gather_mmu flushes all
+ * tlbs of an mm if it can guarantee that the ptes of the mm_struct
+ * cannot be accessed while the batched unmap is running. In this case
+ * full==1 and a simple pte_clear is enough. See tlb.h.
+ */
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
+static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
+ unsigned long addr,
+ pte_t *ptep, int full)
+{
+ if (full) {
+ pte_t pte = *ptep;
+ *ptep = __pte(_PAGE_INVALID);
+ return pte;
+ }
+ return ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
+}
+
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+static inline void ptep_set_wrprotect(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ pte_t pte = *ptep;
+
+ if (pte_write(pte))
+ ptep_xchg_lazy(mm, addr, ptep, pte_wrprotect(pte));
+}
+
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+static inline int ptep_set_access_flags(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t entry, int dirty)
+{
+ if (pte_same(*ptep, entry))
+ return 0;
+ ptep_xchg_direct(vma->vm_mm, addr, ptep, entry);
+ return 1;
+}
+
+/*
+ * Additional functions to handle KVM guest page tables
+ */
+void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t entry);
+void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
+void ptep_notify(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned long bits);
+int ptep_force_prot(struct mm_struct *mm, unsigned long gaddr,
+ pte_t *ptep, int prot, unsigned long bit);
+void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep , int reset);
+void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
+int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr,
+ pte_t *sptep, pte_t *tptep, pte_t pte);
+void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep);
+
+bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long address,
+ pte_t *ptep);
+int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
+ unsigned char key, bool nq);
+int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
+ unsigned char key, unsigned char *oldkey,
+ bool nq, bool mr, bool mc);
+int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr);
+int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
+ unsigned char *key);
+
+int set_pgste_bits(struct mm_struct *mm, unsigned long addr,
+ unsigned long bits, unsigned long value);
+int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep);
+int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
+ unsigned long *oldpte, unsigned long *oldpgste);
+void gmap_pmdp_csp(struct mm_struct *mm, unsigned long vmaddr);
+void gmap_pmdp_invalidate(struct mm_struct *mm, unsigned long vmaddr);
+void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr);
+void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr);
+
+/*
+ * Certain architectures need to do special things when PTEs
+ * within a page table are directly modified. Thus, the following
+ * hook is made available.
+ */
+static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t entry)
+{
+ if (pte_present(entry))
+ pte_val(entry) &= ~_PAGE_UNUSED;
+ if (mm_has_pgste(mm))
+ ptep_set_pte_at(mm, addr, ptep, entry);
+ else
+ *ptep = entry;
+}
+
+/*
+ * Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ */
+static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
+{
+ pte_t __pte;
+ pte_val(__pte) = physpage + pgprot_val(pgprot);
+ if (!MACHINE_HAS_NX)
+ pte_val(__pte) &= ~_PAGE_NOEXEC;
+ return pte_mkyoung(__pte);
+}
+
+static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
+{
+ unsigned long physpage = page_to_phys(page);
+ pte_t __pte = mk_pte_phys(physpage, pgprot);
+
+ if (pte_write(__pte) && PageDirty(page))
+ __pte = pte_mkdirty(__pte);
+ return __pte;
+}
+
+#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
+#define p4d_index(address) (((address) >> P4D_SHIFT) & (PTRS_PER_P4D-1))
+#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
+#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
+#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1))
+
+#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address))
+#define pgd_offset_k(address) pgd_offset(&init_mm, address)
+
+#define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN)
+#define pud_deref(pud) (pud_val(pud) & _REGION_ENTRY_ORIGIN)
+#define p4d_deref(pud) (p4d_val(pud) & _REGION_ENTRY_ORIGIN)
+#define pgd_deref(pgd) (pgd_val(pgd) & _REGION_ENTRY_ORIGIN)
+
+static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
+{
+ p4d_t *p4d = (p4d_t *) pgd;
+
+ if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1)
+ p4d = (p4d_t *) pgd_deref(*pgd);
+ return p4d + p4d_index(address);
+}
+
+static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
+{
+ pud_t *pud = (pud_t *) p4d;
+
+ if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
+ pud = (pud_t *) p4d_deref(*p4d);
+ return pud + pud_index(address);
+}
+
+static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
+{
+ pmd_t *pmd = (pmd_t *) pud;
+
+ if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
+ pmd = (pmd_t *) pud_deref(*pud);
+ return pmd + pmd_index(address);
+}
+
+#define pfn_pte(pfn,pgprot) mk_pte_phys(__pa((pfn) << PAGE_SHIFT),(pgprot))
+#define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT)
+#define pte_page(x) pfn_to_page(pte_pfn(x))
+
+#define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd))
+#define pud_page(pud) pfn_to_page(pud_pfn(pud))
+#define p4d_page(pud) pfn_to_page(p4d_pfn(p4d))
+
+/* Find an entry in the lowest level page table.. */
+#define pte_offset(pmd, addr) ((pte_t *) pmd_deref(*(pmd)) + pte_index(addr))
+#define pte_offset_kernel(pmd, address) pte_offset(pmd,address)
+#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
+#define pte_unmap(pte) do { } while (0)
+
+static inline pmd_t pmd_wrprotect(pmd_t pmd)
+{
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_WRITE;
+ pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
+ return pmd;
+}
+
+static inline pmd_t pmd_mkwrite(pmd_t pmd)
+{
+ pmd_val(pmd) |= _SEGMENT_ENTRY_WRITE;
+ if (pmd_large(pmd) && !(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY))
+ return pmd;
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
+ return pmd;
+}
+
+static inline pmd_t pmd_mkclean(pmd_t pmd)
+{
+ if (pmd_large(pmd)) {
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_DIRTY;
+ pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
+ }
+ return pmd;
+}
+
+static inline pmd_t pmd_mkdirty(pmd_t pmd)
+{
+ if (pmd_large(pmd)) {
+ pmd_val(pmd) |= _SEGMENT_ENTRY_DIRTY |
+ _SEGMENT_ENTRY_SOFT_DIRTY;
+ if (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE)
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
+ }
+ return pmd;
+}
+
+static inline pud_t pud_wrprotect(pud_t pud)
+{
+ pud_val(pud) &= ~_REGION3_ENTRY_WRITE;
+ pud_val(pud) |= _REGION_ENTRY_PROTECT;
+ return pud;
+}
+
+static inline pud_t pud_mkwrite(pud_t pud)
+{
+ pud_val(pud) |= _REGION3_ENTRY_WRITE;
+ if (pud_large(pud) && !(pud_val(pud) & _REGION3_ENTRY_DIRTY))
+ return pud;
+ pud_val(pud) &= ~_REGION_ENTRY_PROTECT;
+ return pud;
+}
+
+static inline pud_t pud_mkclean(pud_t pud)
+{
+ if (pud_large(pud)) {
+ pud_val(pud) &= ~_REGION3_ENTRY_DIRTY;
+ pud_val(pud) |= _REGION_ENTRY_PROTECT;
+ }
+ return pud;
+}
+
+static inline pud_t pud_mkdirty(pud_t pud)
+{
+ if (pud_large(pud)) {
+ pud_val(pud) |= _REGION3_ENTRY_DIRTY |
+ _REGION3_ENTRY_SOFT_DIRTY;
+ if (pud_val(pud) & _REGION3_ENTRY_WRITE)
+ pud_val(pud) &= ~_REGION_ENTRY_PROTECT;
+ }
+ return pud;
+}
+
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
+static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot)
+{
+ /*
+ * pgprot is PAGE_NONE, PAGE_RO, PAGE_RX, PAGE_RW or PAGE_RWX
+ * (see __Pxxx / __Sxxx). Convert to segment table entry format.
+ */
+ if (pgprot_val(pgprot) == pgprot_val(PAGE_NONE))
+ return pgprot_val(SEGMENT_NONE);
+ if (pgprot_val(pgprot) == pgprot_val(PAGE_RO))
+ return pgprot_val(SEGMENT_RO);
+ if (pgprot_val(pgprot) == pgprot_val(PAGE_RX))
+ return pgprot_val(SEGMENT_RX);
+ if (pgprot_val(pgprot) == pgprot_val(PAGE_RW))
+ return pgprot_val(SEGMENT_RW);
+ return pgprot_val(SEGMENT_RWX);
+}
+
+static inline pmd_t pmd_mkyoung(pmd_t pmd)
+{
+ if (pmd_large(pmd)) {
+ pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
+ if (pmd_val(pmd) & _SEGMENT_ENTRY_READ)
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID;
+ }
+ return pmd;
+}
+
+static inline pmd_t pmd_mkold(pmd_t pmd)
+{
+ if (pmd_large(pmd)) {
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_YOUNG;
+ pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
+ }
+ return pmd;
+}
+
+static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+ if (pmd_large(pmd)) {
+ pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN_LARGE |
+ _SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_YOUNG |
+ _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_SOFT_DIRTY;
+ pmd_val(pmd) |= massage_pgprot_pmd(newprot);
+ if (!(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY))
+ pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
+ if (!(pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG))
+ pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
+ return pmd;
+ }
+ pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN;
+ pmd_val(pmd) |= massage_pgprot_pmd(newprot);
+ return pmd;
+}
+
+static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot)
+{
+ pmd_t __pmd;
+ pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot);
+ return __pmd;
+}
+
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */
+
+static inline void __pmdp_csp(pmd_t *pmdp)
+{
+ csp((unsigned int *)pmdp + 1, pmd_val(*pmdp),
+ pmd_val(*pmdp) | _SEGMENT_ENTRY_INVALID);
+}
+
+#define IDTE_GLOBAL 0
+#define IDTE_LOCAL 1
+
+#define IDTE_PTOA 0x0800
+#define IDTE_NODAT 0x1000
+#define IDTE_GUEST_ASCE 0x2000
+
+static inline void __pmdp_idte(unsigned long addr, pmd_t *pmdp,
+ unsigned long opt, unsigned long asce,
+ int local)
+{
+ unsigned long sto;
+
+ sto = (unsigned long) pmdp - pmd_index(addr) * sizeof(pmd_t);
+ if (__builtin_constant_p(opt) && opt == 0) {
+ /* flush without guest asce */
+ asm volatile(
+ " .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]"
+ : "+m" (*pmdp)
+ : [r1] "a" (sto), [r2] "a" ((addr & HPAGE_MASK)),
+ [m4] "i" (local)
+ : "cc" );
+ } else {
+ /* flush with guest asce */
+ asm volatile(
+ " .insn rrf,0xb98e0000,%[r1],%[r2],%[r3],%[m4]"
+ : "+m" (*pmdp)
+ : [r1] "a" (sto), [r2] "a" ((addr & HPAGE_MASK) | opt),
+ [r3] "a" (asce), [m4] "i" (local)
+ : "cc" );
+ }
+}
+
+static inline void __pudp_idte(unsigned long addr, pud_t *pudp,
+ unsigned long opt, unsigned long asce,
+ int local)
+{
+ unsigned long r3o;
+
+ r3o = (unsigned long) pudp - pud_index(addr) * sizeof(pud_t);
+ r3o |= _ASCE_TYPE_REGION3;
+ if (__builtin_constant_p(opt) && opt == 0) {
+ /* flush without guest asce */
+ asm volatile(
+ " .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]"
+ : "+m" (*pudp)
+ : [r1] "a" (r3o), [r2] "a" ((addr & PUD_MASK)),
+ [m4] "i" (local)
+ : "cc");
+ } else {
+ /* flush with guest asce */
+ asm volatile(
+ " .insn rrf,0xb98e0000,%[r1],%[r2],%[r3],%[m4]"
+ : "+m" (*pudp)
+ : [r1] "a" (r3o), [r2] "a" ((addr & PUD_MASK) | opt),
+ [r3] "a" (asce), [m4] "i" (local)
+ : "cc" );
+ }
+}
+
+pmd_t pmdp_xchg_direct(struct mm_struct *, unsigned long, pmd_t *, pmd_t);
+pmd_t pmdp_xchg_lazy(struct mm_struct *, unsigned long, pmd_t *, pmd_t);
+pud_t pudp_xchg_direct(struct mm_struct *, unsigned long, pud_t *, pud_t);
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+
+#define __HAVE_ARCH_PGTABLE_DEPOSIT
+void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+ pgtable_t pgtable);
+
+#define __HAVE_ARCH_PGTABLE_WITHDRAW
+pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
+
+#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
+static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmdp,
+ pmd_t entry, int dirty)
+{
+ VM_BUG_ON(addr & ~HPAGE_MASK);
+
+ entry = pmd_mkyoung(entry);
+ if (dirty)
+ entry = pmd_mkdirty(entry);
+ if (pmd_val(*pmdp) == pmd_val(entry))
+ return 0;
+ pmdp_xchg_direct(vma->vm_mm, addr, pmdp, entry);
+ return 1;
+}
+
+#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
+static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmdp)
+{
+ pmd_t pmd = *pmdp;
+
+ pmd = pmdp_xchg_direct(vma->vm_mm, addr, pmdp, pmd_mkold(pmd));
+ return pmd_young(pmd);
+}
+
+#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
+static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmdp)
+{
+ VM_BUG_ON(addr & ~HPAGE_MASK);
+ return pmdp_test_and_clear_young(vma, addr, pmdp);
+}
+
+static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t entry)
+{
+ if (!MACHINE_HAS_NX)
+ pmd_val(entry) &= ~_SEGMENT_ENTRY_NOEXEC;
+ *pmdp = entry;
+}
+
+static inline pmd_t pmd_mkhuge(pmd_t pmd)
+{
+ pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE;
+ pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
+ pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
+ return pmd;
+}
+
+#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
+static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp)
+{
+ return pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
+}
+
+#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
+static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm,
+ unsigned long addr,
+ pmd_t *pmdp, int full)
+{
+ if (full) {
+ pmd_t pmd = *pmdp;
+ *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY);
+ return pmd;
+ }
+ return pmdp_xchg_lazy(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
+}
+
+#define __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH
+static inline pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmdp)
+{
+ return pmdp_huge_get_and_clear(vma->vm_mm, addr, pmdp);
+}
+
+#define __HAVE_ARCH_PMDP_INVALIDATE
+static inline pmd_t pmdp_invalidate(struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmdp)
+{
+ pmd_t pmd = __pmd(pmd_val(*pmdp) | _SEGMENT_ENTRY_INVALID);
+
+ return pmdp_xchg_direct(vma->vm_mm, addr, pmdp, pmd);
+}
+
+#define __HAVE_ARCH_PMDP_SET_WRPROTECT
+static inline void pmdp_set_wrprotect(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp)
+{
+ pmd_t pmd = *pmdp;
+
+ if (pmd_write(pmd))
+ pmd = pmdp_xchg_lazy(mm, addr, pmdp, pmd_wrprotect(pmd));
+}
+
+static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
+ unsigned long address,
+ pmd_t *pmdp)
+{
+ return pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
+}
+#define pmdp_collapse_flush pmdp_collapse_flush
+
+#define pfn_pmd(pfn, pgprot) mk_pmd_phys(__pa((pfn) << PAGE_SHIFT), (pgprot))
+#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot))
+
+static inline int pmd_trans_huge(pmd_t pmd)
+{
+ return pmd_val(pmd) & _SEGMENT_ENTRY_LARGE;
+}
+
+#define has_transparent_hugepage has_transparent_hugepage
+static inline int has_transparent_hugepage(void)
+{
+ return MACHINE_HAS_EDAT1 ? 1 : 0;
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+/*
+ * 64 bit swap entry format:
+ * A page-table entry has some bits we have to treat in a special way.
+ * Bits 52 and bit 55 have to be zero, otherwise a specification
+ * exception will occur instead of a page translation exception. The
+ * specification exception has the bad habit not to store necessary
+ * information in the lowcore.
+ * Bits 54 and 63 are used to indicate the page type.
+ * A swap pte is indicated by bit pattern (pte & 0x201) == 0x200
+ * This leaves the bits 0-51 and bits 56-62 to store type and offset.
+ * We use the 5 bits from 57-61 for the type and the 52 bits from 0-51
+ * for the offset.
+ * | offset |01100|type |00|
+ * |0000000000111111111122222222223333333333444444444455|55555|55566|66|
+ * |0123456789012345678901234567890123456789012345678901|23456|78901|23|
+ */
+
+#define __SWP_OFFSET_MASK ((1UL << 52) - 1)
+#define __SWP_OFFSET_SHIFT 12
+#define __SWP_TYPE_MASK ((1UL << 5) - 1)
+#define __SWP_TYPE_SHIFT 2
+
+static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
+{
+ pte_t pte;
+
+ pte_val(pte) = _PAGE_INVALID | _PAGE_PROTECT;
+ pte_val(pte) |= (offset & __SWP_OFFSET_MASK) << __SWP_OFFSET_SHIFT;
+ pte_val(pte) |= (type & __SWP_TYPE_MASK) << __SWP_TYPE_SHIFT;
+ return pte;
+}
+
+static inline unsigned long __swp_type(swp_entry_t entry)
+{
+ return (entry.val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK;
+}
+
+static inline unsigned long __swp_offset(swp_entry_t entry)
+{
+ return (entry.val >> __SWP_OFFSET_SHIFT) & __SWP_OFFSET_MASK;
+}
+
+static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset)
+{
+ return (swp_entry_t) { pte_val(mk_swap_pte(type, offset)) };
+}
+
+#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
+#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+
+#define kern_addr_valid(addr) (1)
+
+extern int vmem_add_mapping(unsigned long start, unsigned long size);
+extern int vmem_remove_mapping(unsigned long start, unsigned long size);
+extern int s390_enable_sie(void);
+extern int s390_enable_skey(void);
+extern void s390_reset_cmma(struct mm_struct *mm);
+
+/* s390 has a private copy of get unmapped area to deal with cache synonyms */
+#define HAVE_ARCH_UNMAPPED_AREA
+#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
+
+/*
+ * No page table caches to initialise
+ */
+static inline void pgtable_cache_init(void) { }
+static inline void check_pgt_cache(void) { }
+
+#include <asm-generic/pgtable.h>
+
+#endif /* _S390_PAGE_H */
diff --git a/arch/s390/include/asm/pkey.h b/arch/s390/include/asm/pkey.h
new file mode 100644
index 000000000..053117ba7
--- /dev/null
+++ b/arch/s390/include/asm/pkey.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Kernelspace interface to the pkey device driver
+ *
+ * Copyright IBM Corp. 2016
+ *
+ * Author: Harald Freudenberger <freude@de.ibm.com>
+ *
+ */
+
+#ifndef _KAPI_PKEY_H
+#define _KAPI_PKEY_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+#include <uapi/asm/pkey.h>
+
+/*
+ * Generate (AES) random secure key.
+ * @param cardnr may be -1 (use default card)
+ * @param domain may be -1 (use default domain)
+ * @param keytype one of the PKEY_KEYTYPE values
+ * @param seckey pointer to buffer receiving the secure key
+ * @return 0 on success, negative errno value on failure
+ */
+int pkey_genseckey(__u16 cardnr, __u16 domain,
+ __u32 keytype, struct pkey_seckey *seckey);
+
+/*
+ * Generate (AES) secure key with given key value.
+ * @param cardnr may be -1 (use default card)
+ * @param domain may be -1 (use default domain)
+ * @param keytype one of the PKEY_KEYTYPE values
+ * @param clrkey pointer to buffer with clear key data
+ * @param seckey pointer to buffer receiving the secure key
+ * @return 0 on success, negative errno value on failure
+ */
+int pkey_clr2seckey(__u16 cardnr, __u16 domain, __u32 keytype,
+ const struct pkey_clrkey *clrkey,
+ struct pkey_seckey *seckey);
+
+/*
+ * Derive (AES) proteced key from the (AES) secure key blob.
+ * @param cardnr may be -1 (use default card)
+ * @param domain may be -1 (use default domain)
+ * @param seckey pointer to buffer with the input secure key
+ * @param protkey pointer to buffer receiving the protected key and
+ * additional info (type, length)
+ * @return 0 on success, negative errno value on failure
+ */
+int pkey_sec2protkey(__u16 cardnr, __u16 domain,
+ const struct pkey_seckey *seckey,
+ struct pkey_protkey *protkey);
+
+/*
+ * Derive (AES) protected key from a given clear key value.
+ * @param keytype one of the PKEY_KEYTYPE values
+ * @param clrkey pointer to buffer with clear key data
+ * @param protkey pointer to buffer receiving the protected key and
+ * additional info (type, length)
+ * @return 0 on success, negative errno value on failure
+ */
+int pkey_clr2protkey(__u32 keytype,
+ const struct pkey_clrkey *clrkey,
+ struct pkey_protkey *protkey);
+
+/*
+ * Search for a matching crypto card based on the Master Key
+ * Verification Pattern provided inside a secure key.
+ * @param seckey pointer to buffer with the input secure key
+ * @param cardnr pointer to cardnr, receives the card number on success
+ * @param domain pointer to domain, receives the domain number on success
+ * @param verify if set, always verify by fetching verification pattern
+ * from card
+ * @return 0 on success, negative errno value on failure. If no card could be
+ * found, -ENODEV is returned.
+ */
+int pkey_findcard(const struct pkey_seckey *seckey,
+ __u16 *cardnr, __u16 *domain, int verify);
+
+/*
+ * Find card and transform secure key to protected key.
+ * @param seckey pointer to buffer with the input secure key
+ * @param protkey pointer to buffer receiving the protected key and
+ * additional info (type, length)
+ * @return 0 on success, negative errno value on failure
+ */
+int pkey_skey2pkey(const struct pkey_seckey *seckey,
+ struct pkey_protkey *protkey);
+
+/*
+ * Verify the given secure key for being able to be useable with
+ * the pkey module. Check for correct key type and check for having at
+ * least one crypto card being able to handle this key (master key
+ * or old master key verification pattern matches).
+ * Return some info about the key: keysize in bits, keytype (currently
+ * only AES), flag if key is wrapped with an old MKVP.
+ * @param seckey pointer to buffer with the input secure key
+ * @param pcardnr pointer to cardnr, receives the card number on success
+ * @param pdomain pointer to domain, receives the domain number on success
+ * @param pkeysize pointer to keysize, receives the bitsize of the key
+ * @param pattributes pointer to attributes, receives additional info
+ * PKEY_VERIFY_ATTR_AES if the key is an AES key
+ * PKEY_VERIFY_ATTR_OLD_MKVP if key has old mkvp stored in
+ * @return 0 on success, negative errno value on failure. If no card could
+ * be found which is able to handle this key, -ENODEV is returned.
+ */
+int pkey_verifykey(const struct pkey_seckey *seckey,
+ u16 *pcardnr, u16 *pdomain,
+ u16 *pkeysize, u32 *pattributes);
+
+#endif /* _KAPI_PKEY_H */
diff --git a/arch/s390/include/asm/pnet.h b/arch/s390/include/asm/pnet.h
new file mode 100644
index 000000000..6e278584f
--- /dev/null
+++ b/arch/s390/include/asm/pnet.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * IBM System z PNET ID Support
+ *
+ * Copyright IBM Corp. 2018
+ */
+
+#ifndef _ASM_S390_PNET_H
+#define _ASM_S390_PNET_H
+
+#include <linux/device.h>
+#include <linux/types.h>
+
+#define PNETIDS_LEN 64 /* Total utility string length in bytes
+ * to cover up to 4 PNETIDs of 16 bytes
+ * for up to 4 device ports
+ */
+#define MAX_PNETID_LEN 16 /* Max.length of a single port PNETID */
+#define MAX_PNETID_PORTS (PNETIDS_LEN / MAX_PNETID_LEN)
+ /* Max. # of ports with a PNETID */
+
+int pnet_id_by_dev_port(struct device *dev, unsigned short port, u8 *pnetid);
+#endif /* _ASM_S390_PNET_H */
diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h
new file mode 100644
index 000000000..1aebf09fb
--- /dev/null
+++ b/arch/s390/include/asm/preempt.h
@@ -0,0 +1,145 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_PREEMPT_H
+#define __ASM_PREEMPT_H
+
+#include <asm/current.h>
+#include <linux/thread_info.h>
+#include <asm/atomic_ops.h>
+
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+
+#define PREEMPT_ENABLED (0 + PREEMPT_NEED_RESCHED)
+
+static inline int preempt_count(void)
+{
+ return READ_ONCE(S390_lowcore.preempt_count) & ~PREEMPT_NEED_RESCHED;
+}
+
+static inline void preempt_count_set(int pc)
+{
+ int old, new;
+
+ do {
+ old = READ_ONCE(S390_lowcore.preempt_count);
+ new = (old & PREEMPT_NEED_RESCHED) |
+ (pc & ~PREEMPT_NEED_RESCHED);
+ } while (__atomic_cmpxchg(&S390_lowcore.preempt_count,
+ old, new) != old);
+}
+
+#define init_task_preempt_count(p) do { } while (0)
+
+#define init_idle_preempt_count(p, cpu) do { \
+ S390_lowcore.preempt_count = PREEMPT_ENABLED; \
+} while (0)
+
+static inline void set_preempt_need_resched(void)
+{
+ __atomic_and(~PREEMPT_NEED_RESCHED, &S390_lowcore.preempt_count);
+}
+
+static inline void clear_preempt_need_resched(void)
+{
+ __atomic_or(PREEMPT_NEED_RESCHED, &S390_lowcore.preempt_count);
+}
+
+static inline bool test_preempt_need_resched(void)
+{
+ return !(READ_ONCE(S390_lowcore.preempt_count) & PREEMPT_NEED_RESCHED);
+}
+
+static inline void __preempt_count_add(int val)
+{
+ /*
+ * With some obscure config options and CONFIG_PROFILE_ALL_BRANCHES
+ * enabled, gcc 12 fails to handle __builtin_constant_p().
+ */
+ if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES)) {
+ if (__builtin_constant_p(val) && (val >= -128) && (val <= 127)) {
+ __atomic_add_const(val, &S390_lowcore.preempt_count);
+ return;
+ }
+ }
+ __atomic_add(val, &S390_lowcore.preempt_count);
+}
+
+static inline void __preempt_count_sub(int val)
+{
+ __preempt_count_add(-val);
+}
+
+static inline bool __preempt_count_dec_and_test(void)
+{
+ return __atomic_add(-1, &S390_lowcore.preempt_count) == 1;
+}
+
+static inline bool should_resched(int preempt_offset)
+{
+ return unlikely(READ_ONCE(S390_lowcore.preempt_count) ==
+ preempt_offset);
+}
+
+#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+#define PREEMPT_ENABLED (0)
+
+static inline int preempt_count(void)
+{
+ return READ_ONCE(S390_lowcore.preempt_count);
+}
+
+static inline void preempt_count_set(int pc)
+{
+ S390_lowcore.preempt_count = pc;
+}
+
+#define init_task_preempt_count(p) do { } while (0)
+
+#define init_idle_preempt_count(p, cpu) do { \
+ S390_lowcore.preempt_count = PREEMPT_ENABLED; \
+} while (0)
+
+static inline void set_preempt_need_resched(void)
+{
+}
+
+static inline void clear_preempt_need_resched(void)
+{
+}
+
+static inline bool test_preempt_need_resched(void)
+{
+ return false;
+}
+
+static inline void __preempt_count_add(int val)
+{
+ S390_lowcore.preempt_count += val;
+}
+
+static inline void __preempt_count_sub(int val)
+{
+ S390_lowcore.preempt_count -= val;
+}
+
+static inline bool __preempt_count_dec_and_test(void)
+{
+ return !--S390_lowcore.preempt_count && tif_need_resched();
+}
+
+static inline bool should_resched(int preempt_offset)
+{
+ return unlikely(preempt_count() == preempt_offset &&
+ tif_need_resched());
+}
+
+#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+#ifdef CONFIG_PREEMPT
+extern asmlinkage void preempt_schedule(void);
+#define __preempt_schedule() preempt_schedule()
+extern asmlinkage void preempt_schedule_notrace(void);
+#define __preempt_schedule_notrace() preempt_schedule_notrace()
+#endif /* CONFIG_PREEMPT */
+
+#endif /* __ASM_PREEMPT_H */
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
new file mode 100644
index 000000000..7f2953c15
--- /dev/null
+++ b/arch/s390/include/asm/processor.h
@@ -0,0 +1,386 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999
+ * Author(s): Hartmut Penner (hp@de.ibm.com),
+ * Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * Derived from "include/asm-i386/processor.h"
+ * Copyright (C) 1994, Linus Torvalds
+ */
+
+#ifndef __ASM_S390_PROCESSOR_H
+#define __ASM_S390_PROCESSOR_H
+
+#include <linux/const.h>
+
+#define CIF_MCCK_PENDING 0 /* machine check handling is pending */
+#define CIF_ASCE_PRIMARY 1 /* primary asce needs fixup / uaccess */
+#define CIF_ASCE_SECONDARY 2 /* secondary asce needs fixup / uaccess */
+#define CIF_NOHZ_DELAY 3 /* delay HZ disable for a tick */
+#define CIF_FPU 4 /* restore FPU registers */
+#define CIF_IGNORE_IRQ 5 /* ignore interrupt (for udelay) */
+#define CIF_ENABLED_WAIT 6 /* in enabled wait state */
+#define CIF_MCCK_GUEST 7 /* machine check happening in guest */
+#define CIF_DEDICATED_CPU 8 /* this CPU is dedicated */
+
+#define _CIF_MCCK_PENDING _BITUL(CIF_MCCK_PENDING)
+#define _CIF_ASCE_PRIMARY _BITUL(CIF_ASCE_PRIMARY)
+#define _CIF_ASCE_SECONDARY _BITUL(CIF_ASCE_SECONDARY)
+#define _CIF_NOHZ_DELAY _BITUL(CIF_NOHZ_DELAY)
+#define _CIF_FPU _BITUL(CIF_FPU)
+#define _CIF_IGNORE_IRQ _BITUL(CIF_IGNORE_IRQ)
+#define _CIF_ENABLED_WAIT _BITUL(CIF_ENABLED_WAIT)
+#define _CIF_MCCK_GUEST _BITUL(CIF_MCCK_GUEST)
+#define _CIF_DEDICATED_CPU _BITUL(CIF_DEDICATED_CPU)
+
+#ifndef __ASSEMBLY__
+
+#include <linux/linkage.h>
+#include <linux/irqflags.h>
+#include <asm/cpu.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/setup.h>
+#include <asm/runtime_instr.h>
+#include <asm/fpu/types.h>
+#include <asm/fpu/internal.h>
+
+static inline void set_cpu_flag(int flag)
+{
+ S390_lowcore.cpu_flags |= (1UL << flag);
+}
+
+static inline void clear_cpu_flag(int flag)
+{
+ S390_lowcore.cpu_flags &= ~(1UL << flag);
+}
+
+static inline int test_cpu_flag(int flag)
+{
+ return !!(S390_lowcore.cpu_flags & (1UL << flag));
+}
+
+/*
+ * Test CIF flag of another CPU. The caller needs to ensure that
+ * CPU hotplug can not happen, e.g. by disabling preemption.
+ */
+static inline int test_cpu_flag_of(int flag, int cpu)
+{
+ struct lowcore *lc = lowcore_ptr[cpu];
+ return !!(lc->cpu_flags & (1UL << flag));
+}
+
+#define arch_needs_cpu() test_cpu_flag(CIF_NOHZ_DELAY)
+
+/*
+ * Default implementation of macro that returns current
+ * instruction pointer ("program counter").
+ */
+#define current_text_addr() ({ void *pc; asm("basr %0,0" : "=a" (pc)); pc; })
+
+static inline void get_cpu_id(struct cpuid *ptr)
+{
+ asm volatile("stidp %0" : "=Q" (*ptr));
+}
+
+void s390_adjust_jiffies(void);
+void s390_update_cpu_mhz(void);
+void cpu_detect_mhz_feature(void);
+
+extern const struct seq_operations cpuinfo_op;
+extern int sysctl_ieee_emulation_warnings;
+extern void execve_tail(void);
+extern void __bpon(void);
+
+/*
+ * User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit.
+ */
+
+#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_31BIT) ? \
+ (1UL << 31) : -PAGE_SIZE)
+#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \
+ (1UL << 30) : (1UL << 41))
+#define TASK_SIZE TASK_SIZE_OF(current)
+#define TASK_SIZE_MAX (-PAGE_SIZE)
+
+#define STACK_TOP (test_thread_flag(TIF_31BIT) ? \
+ (1UL << 31) : (1UL << 42))
+#define STACK_TOP_MAX (1UL << 42)
+
+#define HAVE_ARCH_PICK_MMAP_LAYOUT
+
+typedef unsigned int mm_segment_t;
+
+/*
+ * Thread structure
+ */
+struct thread_struct {
+ unsigned int acrs[NUM_ACRS];
+ unsigned long ksp; /* kernel stack pointer */
+ unsigned long user_timer; /* task cputime in user space */
+ unsigned long guest_timer; /* task cputime in kvm guest */
+ unsigned long system_timer; /* task cputime in kernel space */
+ unsigned long hardirq_timer; /* task cputime in hardirq context */
+ unsigned long softirq_timer; /* task cputime in softirq context */
+ unsigned long sys_call_table; /* system call table address */
+ mm_segment_t mm_segment;
+ unsigned long gmap_addr; /* address of last gmap fault. */
+ unsigned int gmap_write_flag; /* gmap fault write indication */
+ unsigned int gmap_int_code; /* int code of last gmap fault */
+ unsigned int gmap_pfault; /* signal of a pending guest pfault */
+ /* Per-thread information related to debugging */
+ struct per_regs per_user; /* User specified PER registers */
+ struct per_event per_event; /* Cause of the last PER trap */
+ unsigned long per_flags; /* Flags to control debug behavior */
+ unsigned int system_call; /* system call number in signal */
+ unsigned long last_break; /* last breaking-event-address. */
+ /* pfault_wait is used to block the process on a pfault event */
+ unsigned long pfault_wait;
+ struct list_head list;
+ /* cpu runtime instrumentation */
+ struct runtime_instr_cb *ri_cb;
+ struct gs_cb *gs_cb; /* Current guarded storage cb */
+ struct gs_cb *gs_bc_cb; /* Broadcast guarded storage cb */
+ unsigned char trap_tdb[256]; /* Transaction abort diagnose block */
+ /*
+ * Warning: 'fpu' is dynamically-sized. It *MUST* be at
+ * the end.
+ */
+ struct fpu fpu; /* FP and VX register save area */
+};
+
+/* Flag to disable transactions. */
+#define PER_FLAG_NO_TE 1UL
+/* Flag to enable random transaction aborts. */
+#define PER_FLAG_TE_ABORT_RAND 2UL
+/* Flag to specify random transaction abort mode:
+ * - abort each transaction at a random instruction before TEND if set.
+ * - abort random transactions at a random instruction if cleared.
+ */
+#define PER_FLAG_TE_ABORT_RAND_TEND 4UL
+
+typedef struct thread_struct thread_struct;
+
+/*
+ * Stack layout of a C stack frame.
+ */
+#ifndef __PACK_STACK
+struct stack_frame {
+ unsigned long back_chain;
+ unsigned long empty1[5];
+ unsigned long gprs[10];
+ unsigned int empty2[8];
+};
+#else
+struct stack_frame {
+ unsigned long empty1[5];
+ unsigned int empty2[8];
+ unsigned long gprs[10];
+ unsigned long back_chain;
+};
+#endif
+
+#define ARCH_MIN_TASKALIGN 8
+
+#define INIT_THREAD { \
+ .ksp = sizeof(init_stack) + (unsigned long) &init_stack, \
+ .fpu.regs = (void *) init_task.thread.fpu.fprs, \
+}
+
+/*
+ * Do necessary setup to start up a new thread.
+ */
+#define start_thread(regs, new_psw, new_stackp) do { \
+ regs->psw.mask = PSW_USER_BITS | PSW_MASK_EA | PSW_MASK_BA; \
+ regs->psw.addr = new_psw; \
+ regs->gprs[15] = new_stackp; \
+ execve_tail(); \
+} while (0)
+
+#define start_thread31(regs, new_psw, new_stackp) do { \
+ regs->psw.mask = PSW_USER_BITS | PSW_MASK_BA; \
+ regs->psw.addr = new_psw; \
+ regs->gprs[15] = new_stackp; \
+ crst_table_downgrade(current->mm); \
+ execve_tail(); \
+} while (0)
+
+/* Forward declaration, a strange C thing */
+struct task_struct;
+struct mm_struct;
+struct seq_file;
+struct pt_regs;
+
+typedef int (*dump_trace_func_t)(void *data, unsigned long address, int reliable);
+void dump_trace(dump_trace_func_t func, void *data,
+ struct task_struct *task, unsigned long sp);
+void show_registers(struct pt_regs *regs);
+
+void show_cacheinfo(struct seq_file *m);
+
+/* Free all resources held by a thread. */
+static inline void release_thread(struct task_struct *tsk) { }
+
+/* Free guarded storage control block */
+void guarded_storage_release(struct task_struct *tsk);
+
+unsigned long get_wchan(struct task_struct *p);
+#define task_pt_regs(tsk) ((struct pt_regs *) \
+ (task_stack_page(tsk) + THREAD_SIZE) - 1)
+#define KSTK_EIP(tsk) (task_pt_regs(tsk)->psw.addr)
+#define KSTK_ESP(tsk) (task_pt_regs(tsk)->gprs[15])
+
+/* Has task runtime instrumentation enabled ? */
+#define is_ri_task(tsk) (!!(tsk)->thread.ri_cb)
+
+static inline unsigned long current_stack_pointer(void)
+{
+ unsigned long sp;
+
+ asm volatile("la %0,0(15)" : "=a" (sp));
+ return sp;
+}
+
+static inline unsigned short stap(void)
+{
+ unsigned short cpu_address;
+
+ asm volatile("stap %0" : "=Q" (cpu_address));
+ return cpu_address;
+}
+
+/*
+ * Give up the time slice of the virtual PU.
+ */
+#define cpu_relax_yield cpu_relax_yield
+void cpu_relax_yield(void);
+
+#define cpu_relax() barrier()
+
+#define ECAG_CACHE_ATTRIBUTE 0
+#define ECAG_CPU_ATTRIBUTE 1
+
+static inline unsigned long __ecag(unsigned int asi, unsigned char parm)
+{
+ unsigned long val;
+
+ asm volatile(".insn rsy,0xeb000000004c,%0,0,0(%1)" /* ecag */
+ : "=d" (val) : "a" (asi << 8 | parm));
+ return val;
+}
+
+static inline void psw_set_key(unsigned int key)
+{
+ asm volatile("spka 0(%0)" : : "d" (key));
+}
+
+/*
+ * Set PSW to specified value.
+ */
+static inline void __load_psw(psw_t psw)
+{
+ asm volatile("lpswe %0" : : "Q" (psw) : "cc");
+}
+
+/*
+ * Set PSW mask to specified value, while leaving the
+ * PSW addr pointing to the next instruction.
+ */
+static inline void __load_psw_mask(unsigned long mask)
+{
+ unsigned long addr;
+ psw_t psw;
+
+ psw.mask = mask;
+
+ asm volatile(
+ " larl %0,1f\n"
+ " stg %0,%O1+8(%R1)\n"
+ " lpswe %1\n"
+ "1:"
+ : "=&d" (addr), "=Q" (psw) : "Q" (psw) : "memory", "cc");
+}
+
+/*
+ * Extract current PSW mask
+ */
+static inline unsigned long __extract_psw(void)
+{
+ unsigned int reg1, reg2;
+
+ asm volatile("epsw %0,%1" : "=d" (reg1), "=a" (reg2));
+ return (((unsigned long) reg1) << 32) | ((unsigned long) reg2);
+}
+
+static inline void local_mcck_enable(void)
+{
+ __load_psw_mask(__extract_psw() | PSW_MASK_MCHECK);
+}
+
+static inline void local_mcck_disable(void)
+{
+ __load_psw_mask(__extract_psw() & ~PSW_MASK_MCHECK);
+}
+
+/*
+ * Rewind PSW instruction address by specified number of bytes.
+ */
+static inline unsigned long __rewind_psw(psw_t psw, unsigned long ilc)
+{
+ unsigned long mask;
+
+ mask = (psw.mask & PSW_MASK_EA) ? -1UL :
+ (psw.mask & PSW_MASK_BA) ? (1UL << 31) - 1 :
+ (1UL << 24) - 1;
+ return (psw.addr - ilc) & mask;
+}
+
+/*
+ * Function to stop a processor until the next interrupt occurs
+ */
+void enabled_wait(void);
+
+/*
+ * Function to drop a processor into disabled wait state
+ */
+static inline void __noreturn disabled_wait(unsigned long code)
+{
+ psw_t psw;
+
+ psw.mask = PSW_MASK_BASE | PSW_MASK_WAIT | PSW_MASK_BA | PSW_MASK_EA;
+ psw.addr = code;
+ __load_psw(psw);
+ while (1);
+}
+
+/*
+ * Basic Machine Check/Program Check Handler.
+ */
+
+extern void s390_base_mcck_handler(void);
+extern void s390_base_pgm_handler(void);
+extern void s390_base_ext_handler(void);
+
+extern void (*s390_base_mcck_handler_fn)(void);
+extern void (*s390_base_pgm_handler_fn)(void);
+extern void (*s390_base_ext_handler_fn)(void);
+
+#define ARCH_LOW_ADDRESS_LIMIT 0x7fffffffUL
+
+extern int memcpy_real(void *, void *, size_t);
+extern void memcpy_absolute(void *, void *, size_t);
+
+#define mem_assign_absolute(dest, val) do { \
+ __typeof__(dest) __tmp = (val); \
+ \
+ BUILD_BUG_ON(sizeof(__tmp) != sizeof(val)); \
+ memcpy_absolute(&(dest), &__tmp, sizeof(__tmp)); \
+} while (0)
+
+extern int s390_isolate_bp(void);
+extern int s390_isolate_bp_guest(void);
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_S390_PROCESSOR_H */
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
new file mode 100644
index 000000000..6f70d81c4
--- /dev/null
+++ b/arch/s390/include/asm/ptrace.h
@@ -0,0 +1,188 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999, 2000
+ * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
+ */
+#ifndef _S390_PTRACE_H
+#define _S390_PTRACE_H
+
+#include <linux/const.h>
+#include <uapi/asm/ptrace.h>
+
+#define PIF_SYSCALL 0 /* inside a system call */
+#define PIF_PER_TRAP 1 /* deliver sigtrap on return to user */
+#define PIF_SYSCALL_RESTART 2 /* restart the current system call */
+#define PIF_GUEST_FAULT 3 /* indicates program check in sie64a */
+
+#define _PIF_SYSCALL _BITUL(PIF_SYSCALL)
+#define _PIF_PER_TRAP _BITUL(PIF_PER_TRAP)
+#define _PIF_SYSCALL_RESTART _BITUL(PIF_SYSCALL_RESTART)
+#define _PIF_GUEST_FAULT _BITUL(PIF_GUEST_FAULT)
+
+#ifndef __ASSEMBLY__
+
+#define PSW_KERNEL_BITS (PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_ASC_HOME | \
+ PSW_MASK_EA | PSW_MASK_BA)
+#define PSW_USER_BITS (PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | \
+ PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_MCHECK | \
+ PSW_MASK_PSTATE | PSW_ASC_PRIMARY)
+
+struct psw_bits {
+ unsigned long : 1;
+ unsigned long per : 1; /* PER-Mask */
+ unsigned long : 3;
+ unsigned long dat : 1; /* DAT Mode */
+ unsigned long io : 1; /* Input/Output Mask */
+ unsigned long ext : 1; /* External Mask */
+ unsigned long key : 4; /* PSW Key */
+ unsigned long : 1;
+ unsigned long mcheck : 1; /* Machine-Check Mask */
+ unsigned long wait : 1; /* Wait State */
+ unsigned long pstate : 1; /* Problem State */
+ unsigned long as : 2; /* Address Space Control */
+ unsigned long cc : 2; /* Condition Code */
+ unsigned long pm : 4; /* Program Mask */
+ unsigned long ri : 1; /* Runtime Instrumentation */
+ unsigned long : 6;
+ unsigned long eaba : 2; /* Addressing Mode */
+ unsigned long : 31;
+ unsigned long ia : 64; /* Instruction Address */
+};
+
+enum {
+ PSW_BITS_AMODE_24BIT = 0,
+ PSW_BITS_AMODE_31BIT = 1,
+ PSW_BITS_AMODE_64BIT = 3
+};
+
+enum {
+ PSW_BITS_AS_PRIMARY = 0,
+ PSW_BITS_AS_ACCREG = 1,
+ PSW_BITS_AS_SECONDARY = 2,
+ PSW_BITS_AS_HOME = 3
+};
+
+#define psw_bits(__psw) (*({ \
+ typecheck(psw_t, __psw); \
+ &(*(struct psw_bits *)(&(__psw))); \
+}))
+
+/*
+ * The pt_regs struct defines the way the registers are stored on
+ * the stack during a system call.
+ */
+struct pt_regs
+{
+ union {
+ user_pt_regs user_regs;
+ struct {
+ unsigned long args[1];
+ psw_t psw;
+ unsigned long gprs[NUM_GPRS];
+ };
+ };
+ unsigned long orig_gpr2;
+ unsigned int int_code;
+ unsigned int int_parm;
+ unsigned long int_parm_long;
+ unsigned long flags;
+};
+
+/*
+ * Program event recording (PER) register set.
+ */
+struct per_regs {
+ unsigned long control; /* PER control bits */
+ unsigned long start; /* PER starting address */
+ unsigned long end; /* PER ending address */
+};
+
+/*
+ * PER event contains information about the cause of the last PER exception.
+ */
+struct per_event {
+ unsigned short cause; /* PER code, ATMID and AI */
+ unsigned long address; /* PER address */
+ unsigned char paid; /* PER access identification */
+};
+
+/*
+ * Simplified per_info structure used to decode the ptrace user space ABI.
+ */
+struct per_struct_kernel {
+ unsigned long cr9; /* PER control bits */
+ unsigned long cr10; /* PER starting address */
+ unsigned long cr11; /* PER ending address */
+ unsigned long bits; /* Obsolete software bits */
+ unsigned long starting_addr; /* User specified start address */
+ unsigned long ending_addr; /* User specified end address */
+ unsigned short perc_atmid; /* PER trap ATMID */
+ unsigned long address; /* PER trap instruction address */
+ unsigned char access_id; /* PER trap access identification */
+};
+
+#define PER_EVENT_MASK 0xEB000000UL
+
+#define PER_EVENT_BRANCH 0x80000000UL
+#define PER_EVENT_IFETCH 0x40000000UL
+#define PER_EVENT_STORE 0x20000000UL
+#define PER_EVENT_STORE_REAL 0x08000000UL
+#define PER_EVENT_TRANSACTION_END 0x02000000UL
+#define PER_EVENT_NULLIFICATION 0x01000000UL
+
+#define PER_CONTROL_MASK 0x00e00000UL
+
+#define PER_CONTROL_BRANCH_ADDRESS 0x00800000UL
+#define PER_CONTROL_SUSPENSION 0x00400000UL
+#define PER_CONTROL_ALTERATION 0x00200000UL
+
+static inline void set_pt_regs_flag(struct pt_regs *regs, int flag)
+{
+ regs->flags |= (1UL << flag);
+}
+
+static inline void clear_pt_regs_flag(struct pt_regs *regs, int flag)
+{
+ regs->flags &= ~(1UL << flag);
+}
+
+static inline int test_pt_regs_flag(struct pt_regs *regs, int flag)
+{
+ return !!(regs->flags & (1UL << flag));
+}
+
+/*
+ * These are defined as per linux/ptrace.h, which see.
+ */
+#define arch_has_single_step() (1)
+#define arch_has_block_step() (1)
+
+#define user_mode(regs) (((regs)->psw.mask & PSW_MASK_PSTATE) != 0)
+#define instruction_pointer(regs) ((regs)->psw.addr)
+#define user_stack_pointer(regs)((regs)->gprs[15])
+#define profile_pc(regs) instruction_pointer(regs)
+
+static inline long regs_return_value(struct pt_regs *regs)
+{
+ return regs->gprs[2];
+}
+
+static inline void instruction_pointer_set(struct pt_regs *regs,
+ unsigned long val)
+{
+ regs->psw.addr = val;
+}
+
+int regs_query_register_offset(const char *name);
+const char *regs_query_register_name(unsigned int offset);
+unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset);
+unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n);
+
+static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
+{
+ return regs->gprs[15];
+}
+
+#endif /* __ASSEMBLY__ */
+#endif /* _S390_PTRACE_H */
diff --git a/arch/s390/include/asm/purgatory.h b/arch/s390/include/asm/purgatory.h
new file mode 100644
index 000000000..e297bcfc4
--- /dev/null
+++ b/arch/s390/include/asm/purgatory.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2018
+ *
+ * Author(s): Philipp Rudo <prudo@linux.vnet.ibm.com>
+ */
+
+#ifndef _S390_PURGATORY_H_
+#define _S390_PURGATORY_H_
+#ifndef __ASSEMBLY__
+
+#include <linux/purgatory.h>
+
+int verify_sha256_digest(void);
+
+#endif /* __ASSEMBLY__ */
+#endif /* _S390_PURGATORY_H_ */
diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
new file mode 100644
index 000000000..1f2cd98dc
--- /dev/null
+++ b/arch/s390/include/asm/qdio.h
@@ -0,0 +1,430 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2000, 2008
+ * Author(s): Utz Bacher <utz.bacher@de.ibm.com>
+ * Jan Glauber <jang@linux.vnet.ibm.com>
+ *
+ */
+#ifndef __QDIO_H__
+#define __QDIO_H__
+
+#include <linux/interrupt.h>
+#include <asm/cio.h>
+#include <asm/ccwdev.h>
+
+/* only use 4 queues to save some cachelines */
+#define QDIO_MAX_QUEUES_PER_IRQ 4
+#define QDIO_MAX_BUFFERS_PER_Q 128
+#define QDIO_MAX_BUFFERS_MASK (QDIO_MAX_BUFFERS_PER_Q - 1)
+#define QDIO_MAX_ELEMENTS_PER_BUFFER 16
+#define QDIO_SBAL_SIZE 256
+
+#define QDIO_QETH_QFMT 0
+#define QDIO_ZFCP_QFMT 1
+#define QDIO_IQDIO_QFMT 2
+
+/**
+ * struct qdesfmt0 - queue descriptor, format 0
+ * @sliba: storage list information block address
+ * @sla: storage list address
+ * @slsba: storage list state block address
+ * @akey: access key for DLIB
+ * @bkey: access key for SL
+ * @ckey: access key for SBALs
+ * @dkey: access key for SLSB
+ */
+struct qdesfmt0 {
+ u64 sliba;
+ u64 sla;
+ u64 slsba;
+ u32 : 32;
+ u32 akey : 4;
+ u32 bkey : 4;
+ u32 ckey : 4;
+ u32 dkey : 4;
+ u32 : 16;
+} __attribute__ ((packed));
+
+#define QDR_AC_MULTI_BUFFER_ENABLE 0x01
+
+/**
+ * struct qdr - queue description record (QDR)
+ * @qfmt: queue format
+ * @pfmt: implementation dependent parameter format
+ * @ac: adapter characteristics
+ * @iqdcnt: input queue descriptor count
+ * @oqdcnt: output queue descriptor count
+ * @iqdsz: inpout queue descriptor size
+ * @oqdsz: output queue descriptor size
+ * @qiba: queue information block address
+ * @qkey: queue information block key
+ * @qdf0: queue descriptions
+ */
+struct qdr {
+ u32 qfmt : 8;
+ u32 pfmt : 8;
+ u32 : 8;
+ u32 ac : 8;
+ u32 : 8;
+ u32 iqdcnt : 8;
+ u32 : 8;
+ u32 oqdcnt : 8;
+ u32 : 8;
+ u32 iqdsz : 8;
+ u32 : 8;
+ u32 oqdsz : 8;
+ /* private: */
+ u32 res[9];
+ /* public: */
+ u64 qiba;
+ u32 : 32;
+ u32 qkey : 4;
+ u32 : 28;
+ struct qdesfmt0 qdf0[126];
+} __packed __aligned(PAGE_SIZE);
+
+#define QIB_AC_OUTBOUND_PCI_SUPPORTED 0x40
+#define QIB_RFLAGS_ENABLE_QEBSM 0x80
+#define QIB_RFLAGS_ENABLE_DATA_DIV 0x02
+
+/**
+ * struct qib - queue information block (QIB)
+ * @qfmt: queue format
+ * @pfmt: implementation dependent parameter format
+ * @rflags: QEBSM
+ * @ac: adapter characteristics
+ * @isliba: absolute address of first input SLIB
+ * @osliba: absolute address of first output SLIB
+ * @ebcnam: adapter identifier in EBCDIC
+ * @parm: implementation dependent parameters
+ */
+struct qib {
+ u32 qfmt : 8;
+ u32 pfmt : 8;
+ u32 rflags : 8;
+ u32 ac : 8;
+ u32 : 32;
+ u64 isliba;
+ u64 osliba;
+ u32 : 32;
+ u32 : 32;
+ u8 ebcnam[8];
+ /* private: */
+ u8 res[88];
+ /* public: */
+ u8 parm[QDIO_MAX_BUFFERS_PER_Q];
+} __attribute__ ((packed, aligned(256)));
+
+/**
+ * struct slibe - storage list information block element (SLIBE)
+ * @parms: implementation dependent parameters
+ */
+struct slibe {
+ u64 parms;
+};
+
+/**
+ * struct qaob - queue asynchronous operation block
+ * @res0: reserved parameters
+ * @res1: reserved parameter
+ * @res2: reserved parameter
+ * @res3: reserved parameter
+ * @aorc: asynchronous operation return code
+ * @flags: internal flags
+ * @cbtbs: control block type
+ * @sb_count: number of storage blocks
+ * @sba: storage block element addresses
+ * @dcount: size of storage block elements
+ * @user0: user defineable value
+ * @res4: reserved paramater
+ * @user1: user defineable value
+ * @user2: user defineable value
+ */
+struct qaob {
+ u64 res0[6];
+ u8 res1;
+ u8 res2;
+ u8 res3;
+ u8 aorc;
+ u8 flags;
+ u16 cbtbs;
+ u8 sb_count;
+ u64 sba[QDIO_MAX_ELEMENTS_PER_BUFFER];
+ u16 dcount[QDIO_MAX_ELEMENTS_PER_BUFFER];
+ u64 user0;
+ u64 res4[2];
+ u64 user1;
+ u64 user2;
+} __attribute__ ((packed, aligned(256)));
+
+/**
+ * struct slib - storage list information block (SLIB)
+ * @nsliba: next SLIB address (if any)
+ * @sla: SL address
+ * @slsba: SLSB address
+ * @slibe: SLIB elements
+ */
+struct slib {
+ u64 nsliba;
+ u64 sla;
+ u64 slsba;
+ /* private: */
+ u8 res[1000];
+ /* public: */
+ struct slibe slibe[QDIO_MAX_BUFFERS_PER_Q];
+} __attribute__ ((packed, aligned(2048)));
+
+#define SBAL_EFLAGS_LAST_ENTRY 0x40
+#define SBAL_EFLAGS_CONTIGUOUS 0x20
+#define SBAL_EFLAGS_FIRST_FRAG 0x04
+#define SBAL_EFLAGS_MIDDLE_FRAG 0x08
+#define SBAL_EFLAGS_LAST_FRAG 0x0c
+#define SBAL_EFLAGS_MASK 0x6f
+
+#define SBAL_SFLAGS0_PCI_REQ 0x40
+#define SBAL_SFLAGS0_DATA_CONTINUATION 0x20
+
+/* Awesome OpenFCP extensions */
+#define SBAL_SFLAGS0_TYPE_STATUS 0x00
+#define SBAL_SFLAGS0_TYPE_WRITE 0x08
+#define SBAL_SFLAGS0_TYPE_READ 0x10
+#define SBAL_SFLAGS0_TYPE_WRITE_READ 0x18
+#define SBAL_SFLAGS0_MORE_SBALS 0x04
+#define SBAL_SFLAGS0_COMMAND 0x02
+#define SBAL_SFLAGS0_LAST_SBAL 0x00
+#define SBAL_SFLAGS0_ONLY_SBAL SBAL_SFLAGS0_COMMAND
+#define SBAL_SFLAGS0_MIDDLE_SBAL SBAL_SFLAGS0_MORE_SBALS
+#define SBAL_SFLAGS0_FIRST_SBAL (SBAL_SFLAGS0_MORE_SBALS | SBAL_SFLAGS0_COMMAND)
+
+/**
+ * struct qdio_buffer_element - SBAL entry
+ * @eflags: SBAL entry flags
+ * @scount: SBAL count
+ * @sflags: whole SBAL flags
+ * @length: length
+ * @addr: address
+*/
+struct qdio_buffer_element {
+ u8 eflags;
+ /* private: */
+ u8 res1;
+ /* public: */
+ u8 scount;
+ u8 sflags;
+ u32 length;
+ void *addr;
+} __attribute__ ((packed, aligned(16)));
+
+/**
+ * struct qdio_buffer - storage block address list (SBAL)
+ * @element: SBAL entries
+ */
+struct qdio_buffer {
+ struct qdio_buffer_element element[QDIO_MAX_ELEMENTS_PER_BUFFER];
+} __attribute__ ((packed, aligned(256)));
+
+/**
+ * struct sl_element - storage list entry
+ * @sbal: absolute SBAL address
+ */
+struct sl_element {
+ u64 sbal;
+} __attribute__ ((packed));
+
+/**
+ * struct sl - storage list (SL)
+ * @element: SL entries
+ */
+struct sl {
+ struct sl_element element[QDIO_MAX_BUFFERS_PER_Q];
+} __attribute__ ((packed, aligned(1024)));
+
+/**
+ * struct slsb - storage list state block (SLSB)
+ * @val: state per buffer
+ */
+struct slsb {
+ u8 val[QDIO_MAX_BUFFERS_PER_Q];
+} __attribute__ ((packed, aligned(256)));
+
+/**
+ * struct qdio_outbuf_state - SBAL related asynchronous operation information
+ * (for communication with upper layer programs)
+ * (only required for use with completion queues)
+ * @flags: flags indicating state of buffer
+ * @aob: pointer to QAOB used for the particular SBAL
+ * @user: pointer to upper layer program's state information related to SBAL
+ * (stored in user1 data of QAOB)
+ */
+struct qdio_outbuf_state {
+ u8 flags;
+ struct qaob *aob;
+ void *user;
+};
+
+#define QDIO_OUTBUF_STATE_FLAG_PENDING 0x01
+
+#define CHSC_AC1_INITIATE_INPUTQ 0x80
+
+
+/* qdio adapter-characteristics-1 flag */
+#define AC1_SIGA_INPUT_NEEDED 0x40 /* process input queues */
+#define AC1_SIGA_OUTPUT_NEEDED 0x20 /* process output queues */
+#define AC1_SIGA_SYNC_NEEDED 0x10 /* ask hypervisor to sync */
+#define AC1_AUTOMATIC_SYNC_ON_THININT 0x08 /* set by hypervisor */
+#define AC1_AUTOMATIC_SYNC_ON_OUT_PCI 0x04 /* set by hypervisor */
+#define AC1_SC_QEBSM_AVAILABLE 0x02 /* available for subchannel */
+#define AC1_SC_QEBSM_ENABLED 0x01 /* enabled for subchannel */
+
+#define CHSC_AC2_MULTI_BUFFER_AVAILABLE 0x0080
+#define CHSC_AC2_MULTI_BUFFER_ENABLED 0x0040
+#define CHSC_AC2_DATA_DIV_AVAILABLE 0x0010
+#define CHSC_AC2_DATA_DIV_ENABLED 0x0002
+
+#define CHSC_AC3_FORMAT2_CQ_AVAILABLE 0x8000
+
+struct qdio_ssqd_desc {
+ u8 flags;
+ u8:8;
+ u16 sch;
+ u8 qfmt;
+ u8 parm;
+ u8 qdioac1;
+ u8 sch_class;
+ u8 pcnt;
+ u8 icnt;
+ u8:8;
+ u8 ocnt;
+ u8:8;
+ u8 mbccnt;
+ u16 qdioac2;
+ u64 sch_token;
+ u8 mro;
+ u8 mri;
+ u16 qdioac3;
+ u16:16;
+ u8:8;
+ u8 mmwc;
+} __attribute__ ((packed));
+
+/* params are: ccw_device, qdio_error, queue_number,
+ first element processed, number of elements processed, int_parm */
+typedef void qdio_handler_t(struct ccw_device *, unsigned int, int,
+ int, int, unsigned long);
+
+/* qdio errors reported to the upper-layer program */
+#define QDIO_ERROR_ACTIVATE 0x0001
+#define QDIO_ERROR_GET_BUF_STATE 0x0002
+#define QDIO_ERROR_SET_BUF_STATE 0x0004
+#define QDIO_ERROR_SLSB_STATE 0x0100
+
+#define QDIO_ERROR_FATAL 0x00ff
+#define QDIO_ERROR_TEMPORARY 0xff00
+
+/* for qdio_cleanup */
+#define QDIO_FLAG_CLEANUP_USING_CLEAR 0x01
+#define QDIO_FLAG_CLEANUP_USING_HALT 0x02
+
+/**
+ * struct qdio_initialize - qdio initialization data
+ * @cdev: associated ccw device
+ * @q_format: queue format
+ * @adapter_name: name for the adapter
+ * @qib_param_field_format: format for qib_parm_field
+ * @qib_param_field: pointer to 128 bytes or NULL, if no param field
+ * @qib_rflags: rflags to set
+ * @input_slib_elements: pointer to no_input_qs * 128 words of data or NULL
+ * @output_slib_elements: pointer to no_output_qs * 128 words of data or NULL
+ * @no_input_qs: number of input queues
+ * @no_output_qs: number of output queues
+ * @input_handler: handler to be called for input queues
+ * @output_handler: handler to be called for output queues
+ * @queue_start_poll_array: polling handlers (one per input queue or NULL)
+ * @int_parm: interruption parameter
+ * @input_sbal_addr_array: address of no_input_qs * 128 pointers
+ * @output_sbal_addr_array: address of no_output_qs * 128 pointers
+ * @output_sbal_state_array: no_output_qs * 128 state info (for CQ or NULL)
+ */
+struct qdio_initialize {
+ struct ccw_device *cdev;
+ unsigned char q_format;
+ unsigned char qdr_ac;
+ unsigned char adapter_name[8];
+ unsigned int qib_param_field_format;
+ unsigned char *qib_param_field;
+ unsigned char qib_rflags;
+ unsigned long *input_slib_elements;
+ unsigned long *output_slib_elements;
+ unsigned int no_input_qs;
+ unsigned int no_output_qs;
+ qdio_handler_t *input_handler;
+ qdio_handler_t *output_handler;
+ void (**queue_start_poll_array) (struct ccw_device *, int,
+ unsigned long);
+ int scan_threshold;
+ unsigned long int_parm;
+ void **input_sbal_addr_array;
+ void **output_sbal_addr_array;
+ struct qdio_outbuf_state *output_sbal_state_array;
+};
+
+/**
+ * enum qdio_brinfo_entry_type - type of address entry for qdio_brinfo_desc()
+ * @l3_ipv6_addr: entry contains IPv6 address
+ * @l3_ipv4_addr: entry contains IPv4 address
+ * @l2_addr_lnid: entry contains MAC address and VLAN ID
+ */
+enum qdio_brinfo_entry_type {l3_ipv6_addr, l3_ipv4_addr, l2_addr_lnid};
+
+/**
+ * struct qdio_brinfo_entry_XXX - Address entry for qdio_brinfo_desc()
+ * @nit: Network interface token
+ * @addr: Address of one of the three types
+ *
+ * The struct is passed to the callback function by qdio_brinfo_desc()
+ */
+struct qdio_brinfo_entry_l3_ipv6 {
+ u64 nit;
+ struct { unsigned char _s6_addr[16]; } addr;
+} __packed;
+struct qdio_brinfo_entry_l3_ipv4 {
+ u64 nit;
+ struct { uint32_t _s_addr; } addr;
+} __packed;
+struct qdio_brinfo_entry_l2 {
+ u64 nit;
+ struct { u8 mac[6]; u16 lnid; } addr_lnid;
+} __packed;
+
+#define QDIO_STATE_INACTIVE 0x00000002 /* after qdio_cleanup */
+#define QDIO_STATE_ESTABLISHED 0x00000004 /* after qdio_establish */
+#define QDIO_STATE_ACTIVE 0x00000008 /* after qdio_activate */
+#define QDIO_STATE_STOPPED 0x00000010 /* after queues went down */
+
+#define QDIO_FLAG_SYNC_INPUT 0x01
+#define QDIO_FLAG_SYNC_OUTPUT 0x02
+#define QDIO_FLAG_PCI_OUT 0x10
+
+int qdio_alloc_buffers(struct qdio_buffer **buf, unsigned int count);
+void qdio_free_buffers(struct qdio_buffer **buf, unsigned int count);
+void qdio_reset_buffers(struct qdio_buffer **buf, unsigned int count);
+
+extern int qdio_allocate(struct qdio_initialize *);
+extern int qdio_establish(struct qdio_initialize *);
+extern int qdio_activate(struct ccw_device *);
+extern void qdio_release_aob(struct qaob *);
+extern int do_QDIO(struct ccw_device *, unsigned int, int, unsigned int,
+ unsigned int);
+extern int qdio_start_irq(struct ccw_device *, int);
+extern int qdio_stop_irq(struct ccw_device *, int);
+extern int qdio_get_next_buffers(struct ccw_device *, int, int *, int *);
+extern int qdio_shutdown(struct ccw_device *, int);
+extern int qdio_free(struct ccw_device *);
+extern int qdio_get_ssqd_desc(struct ccw_device *, struct qdio_ssqd_desc *);
+extern int qdio_pnso_brinfo(struct subchannel_id schid,
+ int cnc, u16 *response,
+ void (*cb)(void *priv, enum qdio_brinfo_entry_type type,
+ void *entry),
+ void *priv);
+
+#endif /* __QDIO_H__ */
diff --git a/arch/s390/include/asm/runtime_instr.h b/arch/s390/include/asm/runtime_instr.h
new file mode 100644
index 000000000..0e1605538
--- /dev/null
+++ b/arch/s390/include/asm/runtime_instr.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _RUNTIME_INSTR_H
+#define _RUNTIME_INSTR_H
+
+#include <uapi/asm/runtime_instr.h>
+
+extern struct runtime_instr_cb runtime_instr_empty_cb;
+
+static inline void save_ri_cb(struct runtime_instr_cb *cb_prev)
+{
+ if (cb_prev)
+ store_runtime_instr_cb(cb_prev);
+}
+
+static inline void restore_ri_cb(struct runtime_instr_cb *cb_next,
+ struct runtime_instr_cb *cb_prev)
+{
+ if (cb_next)
+ load_runtime_instr_cb(cb_next);
+ else if (cb_prev)
+ load_runtime_instr_cb(&runtime_instr_empty_cb);
+}
+
+struct task_struct;
+
+void runtime_instr_release(struct task_struct *tsk);
+
+#endif /* _RUNTIME_INSTR_H */
diff --git a/arch/s390/include/asm/schid.h b/arch/s390/include/asm/schid.h
new file mode 100644
index 000000000..3ac405a67
--- /dev/null
+++ b/arch/s390/include/asm/schid.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ASM_SCHID_H
+#define ASM_SCHID_H
+
+#include <linux/string.h>
+#include <uapi/asm/schid.h>
+
+/* Helper function for sane state of pre-allocated subchannel_id. */
+static inline void
+init_subchannel_id(struct subchannel_id *schid)
+{
+ memset(schid, 0, sizeof(struct subchannel_id));
+ schid->one = 1;
+}
+
+static inline int
+schid_equal(struct subchannel_id *schid1, struct subchannel_id *schid2)
+{
+ return !memcmp(schid1, schid2, sizeof(struct subchannel_id));
+}
+
+#endif /* ASM_SCHID_H */
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
new file mode 100644
index 000000000..e44a8d795
--- /dev/null
+++ b/arch/s390/include/asm/sclp.h
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2007
+ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_SCLP_H
+#define _ASM_S390_SCLP_H
+
+#include <linux/types.h>
+#include <asm/chpid.h>
+#include <asm/cpu.h>
+
+#define SCLP_CHP_INFO_MASK_SIZE 32
+#define SCLP_MAX_CORES 256
+
+struct sclp_chp_info {
+ u8 recognized[SCLP_CHP_INFO_MASK_SIZE];
+ u8 standby[SCLP_CHP_INFO_MASK_SIZE];
+ u8 configured[SCLP_CHP_INFO_MASK_SIZE];
+};
+
+#define LOADPARM_LEN 8
+
+struct sclp_ipl_info {
+ int is_valid;
+ int has_dump;
+ char loadparm[LOADPARM_LEN];
+};
+
+struct sclp_core_entry {
+ u8 core_id;
+ u8 reserved0;
+ u8 : 4;
+ u8 sief2 : 1;
+ u8 skey : 1;
+ u8 : 2;
+ u8 : 2;
+ u8 gpere : 1;
+ u8 siif : 1;
+ u8 sigpif : 1;
+ u8 : 3;
+ u8 reserved2[3];
+ u8 : 2;
+ u8 ib : 1;
+ u8 cei : 1;
+ u8 : 4;
+ u8 reserved3[6];
+ u8 type;
+ u8 reserved1;
+} __attribute__((packed));
+
+struct sclp_core_info {
+ unsigned int configured;
+ unsigned int standby;
+ unsigned int combined;
+ struct sclp_core_entry core[SCLP_MAX_CORES];
+};
+
+struct sclp_info {
+ unsigned char has_linemode : 1;
+ unsigned char has_vt220 : 1;
+ unsigned char has_siif : 1;
+ unsigned char has_sigpif : 1;
+ unsigned char has_core_type : 1;
+ unsigned char has_sprp : 1;
+ unsigned char has_hvs : 1;
+ unsigned char has_esca : 1;
+ unsigned char has_sief2 : 1;
+ unsigned char has_64bscao : 1;
+ unsigned char has_gpere : 1;
+ unsigned char has_cmma : 1;
+ unsigned char has_gsls : 1;
+ unsigned char has_ib : 1;
+ unsigned char has_cei : 1;
+ unsigned char has_pfmfi : 1;
+ unsigned char has_ibs : 1;
+ unsigned char has_skey : 1;
+ unsigned char has_kss : 1;
+ unsigned char has_gisaf : 1;
+ unsigned int ibc;
+ unsigned int mtid;
+ unsigned int mtid_cp;
+ unsigned int mtid_prev;
+ unsigned long rzm;
+ unsigned long rnmax;
+ unsigned long hamax;
+ unsigned int max_cores;
+ unsigned long hsa_size;
+ unsigned long facilities;
+ unsigned int hmfai;
+};
+extern struct sclp_info sclp;
+
+struct zpci_report_error_header {
+ u8 version; /* Interface version byte */
+ u8 action; /* Action qualifier byte
+ * 1: Deconfigure and repair action requested
+ * (OpenCrypto Problem Call Home)
+ * 2: Informational Report
+ * (OpenCrypto Successful Diagnostics Execution)
+ */
+ u16 length; /* Length of Subsequent Data (up to 4K – SCLP header */
+ u8 data[0]; /* Subsequent Data passed verbatim to SCLP ET 24 */
+} __packed;
+
+int sclp_early_get_core_info(struct sclp_core_info *info);
+void sclp_early_get_ipl_info(struct sclp_ipl_info *info);
+void sclp_early_detect(void);
+void sclp_early_printk(const char *s);
+void sclp_early_printk_force(const char *s);
+void __sclp_early_printk(const char *s, unsigned int len, unsigned int force);
+
+int _sclp_get_core_info(struct sclp_core_info *info);
+int sclp_core_configure(u8 core);
+int sclp_core_deconfigure(u8 core);
+int sclp_sdias_blk_count(void);
+int sclp_sdias_copy(void *dest, int blk_num, int nr_blks);
+int sclp_chp_configure(struct chp_id chpid);
+int sclp_chp_deconfigure(struct chp_id chpid);
+int sclp_chp_read_info(struct sclp_chp_info *info);
+int sclp_pci_configure(u32 fid);
+int sclp_pci_deconfigure(u32 fid);
+int sclp_pci_report(struct zpci_report_error_header *report, u32 fh, u32 fid);
+int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count);
+int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count);
+void sclp_ocf_cpc_name_copy(char *dst);
+
+static inline int sclp_get_core_info(struct sclp_core_info *info, int early)
+{
+ if (early)
+ return sclp_early_get_core_info(info);
+ return _sclp_get_core_info(info);
+}
+
+#endif /* _ASM_S390_SCLP_H */
diff --git a/arch/s390/include/asm/scsw.h b/arch/s390/include/asm/scsw.h
new file mode 100644
index 000000000..c00f7b031
--- /dev/null
+++ b/arch/s390/include/asm/scsw.h
@@ -0,0 +1,993 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Helper functions for scsw access.
+ *
+ * Copyright IBM Corp. 2008, 2012
+ * Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_SCSW_H_
+#define _ASM_S390_SCSW_H_
+
+#include <linux/types.h>
+#include <asm/css_chars.h>
+#include <asm/cio.h>
+
+/**
+ * struct cmd_scsw - command-mode subchannel status word
+ * @key: subchannel key
+ * @sctl: suspend control
+ * @eswf: esw format
+ * @cc: deferred condition code
+ * @fmt: format
+ * @pfch: prefetch
+ * @isic: initial-status interruption control
+ * @alcc: address-limit checking control
+ * @ssi: suppress-suspended interruption
+ * @zcc: zero condition code
+ * @ectl: extended control
+ * @pno: path not operational
+ * @res: reserved
+ * @fctl: function control
+ * @actl: activity control
+ * @stctl: status control
+ * @cpa: channel program address
+ * @dstat: device status
+ * @cstat: subchannel status
+ * @count: residual count
+ */
+struct cmd_scsw {
+ __u32 key : 4;
+ __u32 sctl : 1;
+ __u32 eswf : 1;
+ __u32 cc : 2;
+ __u32 fmt : 1;
+ __u32 pfch : 1;
+ __u32 isic : 1;
+ __u32 alcc : 1;
+ __u32 ssi : 1;
+ __u32 zcc : 1;
+ __u32 ectl : 1;
+ __u32 pno : 1;
+ __u32 res : 1;
+ __u32 fctl : 3;
+ __u32 actl : 7;
+ __u32 stctl : 5;
+ __u32 cpa;
+ __u32 dstat : 8;
+ __u32 cstat : 8;
+ __u32 count : 16;
+} __attribute__ ((packed));
+
+/**
+ * struct tm_scsw - transport-mode subchannel status word
+ * @key: subchannel key
+ * @eswf: esw format
+ * @cc: deferred condition code
+ * @fmt: format
+ * @x: IRB-format control
+ * @q: interrogate-complete
+ * @ectl: extended control
+ * @pno: path not operational
+ * @fctl: function control
+ * @actl: activity control
+ * @stctl: status control
+ * @tcw: TCW address
+ * @dstat: device status
+ * @cstat: subchannel status
+ * @fcxs: FCX status
+ * @schxs: subchannel-extended status
+ */
+struct tm_scsw {
+ u32 key:4;
+ u32 :1;
+ u32 eswf:1;
+ u32 cc:2;
+ u32 fmt:3;
+ u32 x:1;
+ u32 q:1;
+ u32 :1;
+ u32 ectl:1;
+ u32 pno:1;
+ u32 :1;
+ u32 fctl:3;
+ u32 actl:7;
+ u32 stctl:5;
+ u32 tcw;
+ u32 dstat:8;
+ u32 cstat:8;
+ u32 fcxs:8;
+ u32 ifob:1;
+ u32 sesq:7;
+} __attribute__ ((packed));
+
+/**
+ * struct eadm_scsw - subchannel status word for eadm subchannels
+ * @key: subchannel key
+ * @eswf: esw format
+ * @cc: deferred condition code
+ * @ectl: extended control
+ * @fctl: function control
+ * @actl: activity control
+ * @stctl: status control
+ * @aob: AOB address
+ * @dstat: device status
+ * @cstat: subchannel status
+ */
+struct eadm_scsw {
+ u32 key:4;
+ u32:1;
+ u32 eswf:1;
+ u32 cc:2;
+ u32:6;
+ u32 ectl:1;
+ u32:2;
+ u32 fctl:3;
+ u32 actl:7;
+ u32 stctl:5;
+ u32 aob;
+ u32 dstat:8;
+ u32 cstat:8;
+ u32:16;
+} __packed;
+
+/**
+ * union scsw - subchannel status word
+ * @cmd: command-mode SCSW
+ * @tm: transport-mode SCSW
+ * @eadm: eadm SCSW
+ */
+union scsw {
+ struct cmd_scsw cmd;
+ struct tm_scsw tm;
+ struct eadm_scsw eadm;
+} __packed;
+
+#define SCSW_FCTL_CLEAR_FUNC 0x1
+#define SCSW_FCTL_HALT_FUNC 0x2
+#define SCSW_FCTL_START_FUNC 0x4
+
+#define SCSW_ACTL_SUSPENDED 0x1
+#define SCSW_ACTL_DEVACT 0x2
+#define SCSW_ACTL_SCHACT 0x4
+#define SCSW_ACTL_CLEAR_PEND 0x8
+#define SCSW_ACTL_HALT_PEND 0x10
+#define SCSW_ACTL_START_PEND 0x20
+#define SCSW_ACTL_RESUME_PEND 0x40
+
+#define SCSW_STCTL_STATUS_PEND 0x1
+#define SCSW_STCTL_SEC_STATUS 0x2
+#define SCSW_STCTL_PRIM_STATUS 0x4
+#define SCSW_STCTL_INTER_STATUS 0x8
+#define SCSW_STCTL_ALERT_STATUS 0x10
+
+#define DEV_STAT_ATTENTION 0x80
+#define DEV_STAT_STAT_MOD 0x40
+#define DEV_STAT_CU_END 0x20
+#define DEV_STAT_BUSY 0x10
+#define DEV_STAT_CHN_END 0x08
+#define DEV_STAT_DEV_END 0x04
+#define DEV_STAT_UNIT_CHECK 0x02
+#define DEV_STAT_UNIT_EXCEP 0x01
+
+#define SCHN_STAT_PCI 0x80
+#define SCHN_STAT_INCORR_LEN 0x40
+#define SCHN_STAT_PROG_CHECK 0x20
+#define SCHN_STAT_PROT_CHECK 0x10
+#define SCHN_STAT_CHN_DATA_CHK 0x08
+#define SCHN_STAT_CHN_CTRL_CHK 0x04
+#define SCHN_STAT_INTF_CTRL_CHK 0x02
+#define SCHN_STAT_CHAIN_CHECK 0x01
+
+#define SCSW_SESQ_DEV_NOFCX 3
+#define SCSW_SESQ_PATH_NOFCX 4
+
+/*
+ * architectured values for first sense byte
+ */
+#define SNS0_CMD_REJECT 0x80
+#define SNS_CMD_REJECT SNS0_CMD_REJEC
+#define SNS0_INTERVENTION_REQ 0x40
+#define SNS0_BUS_OUT_CHECK 0x20
+#define SNS0_EQUIPMENT_CHECK 0x10
+#define SNS0_DATA_CHECK 0x08
+#define SNS0_OVERRUN 0x04
+#define SNS0_INCOMPL_DOMAIN 0x01
+
+/*
+ * architectured values for second sense byte
+ */
+#define SNS1_PERM_ERR 0x80
+#define SNS1_INV_TRACK_FORMAT 0x40
+#define SNS1_EOC 0x20
+#define SNS1_MESSAGE_TO_OPER 0x10
+#define SNS1_NO_REC_FOUND 0x08
+#define SNS1_FILE_PROTECTED 0x04
+#define SNS1_WRITE_INHIBITED 0x02
+#define SNS1_INPRECISE_END 0x01
+
+/*
+ * architectured values for third sense byte
+ */
+#define SNS2_REQ_INH_WRITE 0x80
+#define SNS2_CORRECTABLE 0x40
+#define SNS2_FIRST_LOG_ERR 0x20
+#define SNS2_ENV_DATA_PRESENT 0x10
+#define SNS2_INPRECISE_END 0x04
+
+/**
+ * scsw_is_tm - check for transport mode scsw
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the specified scsw is a transport mode scsw, zero
+ * otherwise.
+ */
+static inline int scsw_is_tm(union scsw *scsw)
+{
+ return css_general_characteristics.fcx && (scsw->tm.x == 1);
+}
+
+/**
+ * scsw_key - return scsw key field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the key field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_key(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw->tm.key;
+ else
+ return scsw->cmd.key;
+}
+
+/**
+ * scsw_eswf - return scsw eswf field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the eswf field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_eswf(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw->tm.eswf;
+ else
+ return scsw->cmd.eswf;
+}
+
+/**
+ * scsw_cc - return scsw cc field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the cc field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_cc(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw->tm.cc;
+ else
+ return scsw->cmd.cc;
+}
+
+/**
+ * scsw_ectl - return scsw ectl field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the ectl field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_ectl(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw->tm.ectl;
+ else
+ return scsw->cmd.ectl;
+}
+
+/**
+ * scsw_pno - return scsw pno field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the pno field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_pno(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw->tm.pno;
+ else
+ return scsw->cmd.pno;
+}
+
+/**
+ * scsw_fctl - return scsw fctl field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the fctl field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_fctl(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw->tm.fctl;
+ else
+ return scsw->cmd.fctl;
+}
+
+/**
+ * scsw_actl - return scsw actl field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the actl field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_actl(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw->tm.actl;
+ else
+ return scsw->cmd.actl;
+}
+
+/**
+ * scsw_stctl - return scsw stctl field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the stctl field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_stctl(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw->tm.stctl;
+ else
+ return scsw->cmd.stctl;
+}
+
+/**
+ * scsw_dstat - return scsw dstat field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the dstat field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_dstat(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw->tm.dstat;
+ else
+ return scsw->cmd.dstat;
+}
+
+/**
+ * scsw_cstat - return scsw cstat field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the cstat field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_cstat(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw->tm.cstat;
+ else
+ return scsw->cmd.cstat;
+}
+
+/**
+ * scsw_cmd_is_valid_key - check key field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the key field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_key(union scsw *scsw)
+{
+ return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
+}
+
+/**
+ * scsw_cmd_is_valid_sctl - check sctl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the sctl field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_sctl(union scsw *scsw)
+{
+ return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
+}
+
+/**
+ * scsw_cmd_is_valid_eswf - check eswf field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the eswf field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_eswf(union scsw *scsw)
+{
+ return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND);
+}
+
+/**
+ * scsw_cmd_is_valid_cc - check cc field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the cc field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_cc(union scsw *scsw)
+{
+ return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC) &&
+ (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND);
+}
+
+/**
+ * scsw_cmd_is_valid_fmt - check fmt field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the fmt field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_fmt(union scsw *scsw)
+{
+ return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
+}
+
+/**
+ * scsw_cmd_is_valid_pfch - check pfch field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the pfch field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_pfch(union scsw *scsw)
+{
+ return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
+}
+
+/**
+ * scsw_cmd_is_valid_isic - check isic field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the isic field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_isic(union scsw *scsw)
+{
+ return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
+}
+
+/**
+ * scsw_cmd_is_valid_alcc - check alcc field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the alcc field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_alcc(union scsw *scsw)
+{
+ return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
+}
+
+/**
+ * scsw_cmd_is_valid_ssi - check ssi field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the ssi field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_ssi(union scsw *scsw)
+{
+ return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
+}
+
+/**
+ * scsw_cmd_is_valid_zcc - check zcc field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the zcc field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_zcc(union scsw *scsw)
+{
+ return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC) &&
+ (scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS);
+}
+
+/**
+ * scsw_cmd_is_valid_ectl - check ectl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the ectl field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_ectl(union scsw *scsw)
+{
+ return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) &&
+ !(scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS) &&
+ (scsw->cmd.stctl & SCSW_STCTL_ALERT_STATUS);
+}
+
+/**
+ * scsw_cmd_is_valid_pno - check pno field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the pno field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_pno(union scsw *scsw)
+{
+ return (scsw->cmd.fctl != 0) &&
+ (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) &&
+ (!(scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS) ||
+ ((scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS) &&
+ (scsw->cmd.actl & SCSW_ACTL_SUSPENDED)));
+}
+
+/**
+ * scsw_cmd_is_valid_fctl - check fctl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the fctl field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_fctl(union scsw *scsw)
+{
+ /* Only valid if pmcw.dnv == 1*/
+ return 1;
+}
+
+/**
+ * scsw_cmd_is_valid_actl - check actl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the actl field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_actl(union scsw *scsw)
+{
+ /* Only valid if pmcw.dnv == 1*/
+ return 1;
+}
+
+/**
+ * scsw_cmd_is_valid_stctl - check stctl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the stctl field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_stctl(union scsw *scsw)
+{
+ /* Only valid if pmcw.dnv == 1*/
+ return 1;
+}
+
+/**
+ * scsw_cmd_is_valid_dstat - check dstat field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the dstat field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_dstat(union scsw *scsw)
+{
+ return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) &&
+ (scsw->cmd.cc != 3);
+}
+
+/**
+ * scsw_cmd_is_valid_cstat - check cstat field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the cstat field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_cstat(union scsw *scsw)
+{
+ return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) &&
+ (scsw->cmd.cc != 3);
+}
+
+/**
+ * scsw_tm_is_valid_key - check key field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the key field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_key(union scsw *scsw)
+{
+ return (scsw->tm.fctl & SCSW_FCTL_START_FUNC);
+}
+
+/**
+ * scsw_tm_is_valid_eswf - check eswf field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the eswf field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_eswf(union scsw *scsw)
+{
+ return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND);
+}
+
+/**
+ * scsw_tm_is_valid_cc - check cc field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the cc field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_cc(union scsw *scsw)
+{
+ return (scsw->tm.fctl & SCSW_FCTL_START_FUNC) &&
+ (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND);
+}
+
+/**
+ * scsw_tm_is_valid_fmt - check fmt field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the fmt field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_fmt(union scsw *scsw)
+{
+ return 1;
+}
+
+/**
+ * scsw_tm_is_valid_x - check x field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the x field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_x(union scsw *scsw)
+{
+ return 1;
+}
+
+/**
+ * scsw_tm_is_valid_q - check q field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the q field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_q(union scsw *scsw)
+{
+ return 1;
+}
+
+/**
+ * scsw_tm_is_valid_ectl - check ectl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the ectl field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_ectl(union scsw *scsw)
+{
+ return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) &&
+ !(scsw->tm.stctl & SCSW_STCTL_INTER_STATUS) &&
+ (scsw->tm.stctl & SCSW_STCTL_ALERT_STATUS);
+}
+
+/**
+ * scsw_tm_is_valid_pno - check pno field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the pno field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_pno(union scsw *scsw)
+{
+ return (scsw->tm.fctl != 0) &&
+ (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) &&
+ (!(scsw->tm.stctl & SCSW_STCTL_INTER_STATUS) ||
+ ((scsw->tm.stctl & SCSW_STCTL_INTER_STATUS) &&
+ (scsw->tm.actl & SCSW_ACTL_SUSPENDED)));
+}
+
+/**
+ * scsw_tm_is_valid_fctl - check fctl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the fctl field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_fctl(union scsw *scsw)
+{
+ /* Only valid if pmcw.dnv == 1*/
+ return 1;
+}
+
+/**
+ * scsw_tm_is_valid_actl - check actl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the actl field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_actl(union scsw *scsw)
+{
+ /* Only valid if pmcw.dnv == 1*/
+ return 1;
+}
+
+/**
+ * scsw_tm_is_valid_stctl - check stctl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the stctl field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_stctl(union scsw *scsw)
+{
+ /* Only valid if pmcw.dnv == 1*/
+ return 1;
+}
+
+/**
+ * scsw_tm_is_valid_dstat - check dstat field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the dstat field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_dstat(union scsw *scsw)
+{
+ return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) &&
+ (scsw->tm.cc != 3);
+}
+
+/**
+ * scsw_tm_is_valid_cstat - check cstat field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the cstat field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_cstat(union scsw *scsw)
+{
+ return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) &&
+ (scsw->tm.cc != 3);
+}
+
+/**
+ * scsw_tm_is_valid_fcxs - check fcxs field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the fcxs field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_fcxs(union scsw *scsw)
+{
+ return 1;
+}
+
+/**
+ * scsw_tm_is_valid_schxs - check schxs field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the schxs field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_schxs(union scsw *scsw)
+{
+ return (scsw->tm.cstat & (SCHN_STAT_PROG_CHECK |
+ SCHN_STAT_INTF_CTRL_CHK |
+ SCHN_STAT_PROT_CHECK |
+ SCHN_STAT_CHN_DATA_CHK));
+}
+
+/**
+ * scsw_is_valid_actl - check actl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the actl field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_actl(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw_tm_is_valid_actl(scsw);
+ else
+ return scsw_cmd_is_valid_actl(scsw);
+}
+
+/**
+ * scsw_is_valid_cc - check cc field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the cc field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_cc(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw_tm_is_valid_cc(scsw);
+ else
+ return scsw_cmd_is_valid_cc(scsw);
+}
+
+/**
+ * scsw_is_valid_cstat - check cstat field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the cstat field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_cstat(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw_tm_is_valid_cstat(scsw);
+ else
+ return scsw_cmd_is_valid_cstat(scsw);
+}
+
+/**
+ * scsw_is_valid_dstat - check dstat field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the dstat field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_dstat(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw_tm_is_valid_dstat(scsw);
+ else
+ return scsw_cmd_is_valid_dstat(scsw);
+}
+
+/**
+ * scsw_is_valid_ectl - check ectl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the ectl field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_ectl(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw_tm_is_valid_ectl(scsw);
+ else
+ return scsw_cmd_is_valid_ectl(scsw);
+}
+
+/**
+ * scsw_is_valid_eswf - check eswf field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the eswf field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_eswf(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw_tm_is_valid_eswf(scsw);
+ else
+ return scsw_cmd_is_valid_eswf(scsw);
+}
+
+/**
+ * scsw_is_valid_fctl - check fctl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the fctl field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_fctl(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw_tm_is_valid_fctl(scsw);
+ else
+ return scsw_cmd_is_valid_fctl(scsw);
+}
+
+/**
+ * scsw_is_valid_key - check key field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the key field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_key(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw_tm_is_valid_key(scsw);
+ else
+ return scsw_cmd_is_valid_key(scsw);
+}
+
+/**
+ * scsw_is_valid_pno - check pno field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the pno field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_pno(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw_tm_is_valid_pno(scsw);
+ else
+ return scsw_cmd_is_valid_pno(scsw);
+}
+
+/**
+ * scsw_is_valid_stctl - check stctl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the stctl field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_stctl(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw_tm_is_valid_stctl(scsw);
+ else
+ return scsw_cmd_is_valid_stctl(scsw);
+}
+
+/**
+ * scsw_cmd_is_solicited - check for solicited scsw
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the command mode scsw indicates that the associated
+ * status condition is solicited, zero if it is unsolicited.
+ */
+static inline int scsw_cmd_is_solicited(union scsw *scsw)
+{
+ return (scsw->cmd.cc != 0) || (scsw->cmd.stctl !=
+ (SCSW_STCTL_STATUS_PEND | SCSW_STCTL_ALERT_STATUS));
+}
+
+/**
+ * scsw_tm_is_solicited - check for solicited scsw
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the transport mode scsw indicates that the associated
+ * status condition is solicited, zero if it is unsolicited.
+ */
+static inline int scsw_tm_is_solicited(union scsw *scsw)
+{
+ return (scsw->tm.cc != 0) || (scsw->tm.stctl !=
+ (SCSW_STCTL_STATUS_PEND | SCSW_STCTL_ALERT_STATUS));
+}
+
+/**
+ * scsw_is_solicited - check for solicited scsw
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the transport or command mode scsw indicates that the
+ * associated status condition is solicited, zero if it is unsolicited.
+ */
+static inline int scsw_is_solicited(union scsw *scsw)
+{
+ if (scsw_is_tm(scsw))
+ return scsw_tm_is_solicited(scsw);
+ else
+ return scsw_cmd_is_solicited(scsw);
+}
+
+#endif /* _ASM_S390_SCSW_H_ */
diff --git a/arch/s390/include/asm/seccomp.h b/arch/s390/include/asm/seccomp.h
new file mode 100644
index 000000000..795bbe0d7
--- /dev/null
+++ b/arch/s390/include/asm/seccomp.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_SECCOMP_H
+#define _ASM_S390_SECCOMP_H
+
+#include <linux/unistd.h>
+
+#define __NR_seccomp_read __NR_read
+#define __NR_seccomp_write __NR_write
+#define __NR_seccomp_exit __NR_exit
+#define __NR_seccomp_sigreturn __NR_sigreturn
+
+#define __NR_seccomp_read_32 __NR_read
+#define __NR_seccomp_write_32 __NR_write
+#define __NR_seccomp_exit_32 __NR_exit
+#define __NR_seccomp_sigreturn_32 __NR_sigreturn
+
+#include <asm-generic/seccomp.h>
+
+#endif /* _ASM_S390_SECCOMP_H */
diff --git a/arch/s390/include/asm/sections.h b/arch/s390/include/asm/sections.h
new file mode 100644
index 000000000..724faede8
--- /dev/null
+++ b/arch/s390/include/asm/sections.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _S390_SECTIONS_H
+#define _S390_SECTIONS_H
+
+#include <asm-generic/sections.h>
+
+#endif
diff --git a/arch/s390/include/asm/segment.h b/arch/s390/include/asm/segment.h
new file mode 100644
index 000000000..97a0582b8
--- /dev/null
+++ b/arch/s390/include/asm/segment.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SEGMENT_H
+#define _ASM_SEGMENT_H
+
+#endif
diff --git a/arch/s390/include/asm/serial.h b/arch/s390/include/asm/serial.h
new file mode 100644
index 000000000..aaf85a690
--- /dev/null
+++ b/arch/s390/include/asm/serial.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_SERIAL_H
+#define _ASM_S390_SERIAL_H
+
+#define BASE_BAUD 0
+
+#endif /* _ASM_S390_SERIAL_H */
diff --git a/arch/s390/include/asm/set_memory.h b/arch/s390/include/asm/set_memory.h
new file mode 100644
index 000000000..c59a83536
--- /dev/null
+++ b/arch/s390/include/asm/set_memory.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASMS390_SET_MEMORY_H
+#define _ASMS390_SET_MEMORY_H
+
+#define SET_MEMORY_RO 1UL
+#define SET_MEMORY_RW 2UL
+#define SET_MEMORY_NX 4UL
+#define SET_MEMORY_X 8UL
+
+int __set_memory(unsigned long addr, int numpages, unsigned long flags);
+
+static inline int set_memory_ro(unsigned long addr, int numpages)
+{
+ return __set_memory(addr, numpages, SET_MEMORY_RO);
+}
+
+static inline int set_memory_rw(unsigned long addr, int numpages)
+{
+ return __set_memory(addr, numpages, SET_MEMORY_RW);
+}
+
+static inline int set_memory_nx(unsigned long addr, int numpages)
+{
+ return __set_memory(addr, numpages, SET_MEMORY_NX);
+}
+
+static inline int set_memory_x(unsigned long addr, int numpages)
+{
+ return __set_memory(addr, numpages, SET_MEMORY_X);
+}
+
+#endif
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
new file mode 100644
index 000000000..1d66016f4
--- /dev/null
+++ b/arch/s390/include/asm/setup.h
@@ -0,0 +1,148 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999, 2017
+ */
+#ifndef _ASM_S390_SETUP_H
+#define _ASM_S390_SETUP_H
+
+#include <linux/const.h>
+#include <uapi/asm/setup.h>
+
+#define EP_OFFSET 0x10008
+#define EP_STRING "S390EP"
+#define PARMAREA 0x10400
+#define PARMAREA_END 0x11000
+
+/*
+ * Machine features detected in early.c
+ */
+
+#define MACHINE_FLAG_VM _BITUL(0)
+#define MACHINE_FLAG_KVM _BITUL(1)
+#define MACHINE_FLAG_LPAR _BITUL(2)
+#define MACHINE_FLAG_DIAG9C _BITUL(3)
+#define MACHINE_FLAG_ESOP _BITUL(4)
+#define MACHINE_FLAG_IDTE _BITUL(5)
+#define MACHINE_FLAG_DIAG44 _BITUL(6)
+#define MACHINE_FLAG_EDAT1 _BITUL(7)
+#define MACHINE_FLAG_EDAT2 _BITUL(8)
+#define MACHINE_FLAG_TOPOLOGY _BITUL(10)
+#define MACHINE_FLAG_TE _BITUL(11)
+#define MACHINE_FLAG_TLB_LC _BITUL(12)
+#define MACHINE_FLAG_VX _BITUL(13)
+#define MACHINE_FLAG_TLB_GUEST _BITUL(14)
+#define MACHINE_FLAG_NX _BITUL(15)
+#define MACHINE_FLAG_GS _BITUL(16)
+#define MACHINE_FLAG_SCC _BITUL(17)
+
+#define LPP_MAGIC _BITUL(31)
+#define LPP_PID_MASK _AC(0xffffffff, UL)
+
+/* Offsets to entry points in kernel/head.S */
+
+#define STARTUP_NORMAL_OFFSET 0x10000
+#define STARTUP_KDUMP_OFFSET 0x10010
+
+/* Offsets to parameters in kernel/head.S */
+
+#define IPL_DEVICE_OFFSET 0x10400
+#define INITRD_START_OFFSET 0x10408
+#define INITRD_SIZE_OFFSET 0x10410
+#define OLDMEM_BASE_OFFSET 0x10418
+#define OLDMEM_SIZE_OFFSET 0x10420
+#define COMMAND_LINE_OFFSET 0x10480
+
+#ifndef __ASSEMBLY__
+
+#include <asm/lowcore.h>
+#include <asm/types.h>
+
+#define IPL_DEVICE (*(unsigned long *) (IPL_DEVICE_OFFSET))
+#define INITRD_START (*(unsigned long *) (INITRD_START_OFFSET))
+#define INITRD_SIZE (*(unsigned long *) (INITRD_SIZE_OFFSET))
+#define OLDMEM_BASE (*(unsigned long *) (OLDMEM_BASE_OFFSET))
+#define OLDMEM_SIZE (*(unsigned long *) (OLDMEM_SIZE_OFFSET))
+#define COMMAND_LINE ((char *) (COMMAND_LINE_OFFSET))
+
+extern int memory_end_set;
+extern unsigned long memory_end;
+extern unsigned long max_physmem_end;
+
+extern void detect_memory_memblock(void);
+
+#define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM)
+#define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM)
+#define MACHINE_IS_LPAR (S390_lowcore.machine_flags & MACHINE_FLAG_LPAR)
+
+#define MACHINE_HAS_DIAG9C (S390_lowcore.machine_flags & MACHINE_FLAG_DIAG9C)
+#define MACHINE_HAS_ESOP (S390_lowcore.machine_flags & MACHINE_FLAG_ESOP)
+#define MACHINE_HAS_IDTE (S390_lowcore.machine_flags & MACHINE_FLAG_IDTE)
+#define MACHINE_HAS_DIAG44 (S390_lowcore.machine_flags & MACHINE_FLAG_DIAG44)
+#define MACHINE_HAS_EDAT1 (S390_lowcore.machine_flags & MACHINE_FLAG_EDAT1)
+#define MACHINE_HAS_EDAT2 (S390_lowcore.machine_flags & MACHINE_FLAG_EDAT2)
+#define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY)
+#define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE)
+#define MACHINE_HAS_TLB_LC (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC)
+#define MACHINE_HAS_VX (S390_lowcore.machine_flags & MACHINE_FLAG_VX)
+#define MACHINE_HAS_TLB_GUEST (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_GUEST)
+#define MACHINE_HAS_NX (S390_lowcore.machine_flags & MACHINE_FLAG_NX)
+#define MACHINE_HAS_GS (S390_lowcore.machine_flags & MACHINE_FLAG_GS)
+#define MACHINE_HAS_SCC (S390_lowcore.machine_flags & MACHINE_FLAG_SCC)
+
+/*
+ * Console mode. Override with conmode=
+ */
+extern unsigned int console_mode;
+extern unsigned int console_devno;
+extern unsigned int console_irq;
+
+extern char vmhalt_cmd[];
+extern char vmpoff_cmd[];
+
+#define CONSOLE_IS_UNDEFINED (console_mode == 0)
+#define CONSOLE_IS_SCLP (console_mode == 1)
+#define CONSOLE_IS_3215 (console_mode == 2)
+#define CONSOLE_IS_3270 (console_mode == 3)
+#define CONSOLE_IS_VT220 (console_mode == 4)
+#define CONSOLE_IS_HVC (console_mode == 5)
+#define SET_CONSOLE_SCLP do { console_mode = 1; } while (0)
+#define SET_CONSOLE_3215 do { console_mode = 2; } while (0)
+#define SET_CONSOLE_3270 do { console_mode = 3; } while (0)
+#define SET_CONSOLE_VT220 do { console_mode = 4; } while (0)
+#define SET_CONSOLE_HVC do { console_mode = 5; } while (0)
+
+#ifdef CONFIG_PFAULT
+extern int pfault_init(void);
+extern void pfault_fini(void);
+#else /* CONFIG_PFAULT */
+#define pfault_init() ({-1;})
+#define pfault_fini() do { } while (0)
+#endif /* CONFIG_PFAULT */
+
+#ifdef CONFIG_VMCP
+void vmcp_cma_reserve(void);
+#else
+static inline void vmcp_cma_reserve(void) { }
+#endif
+
+void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault);
+
+void cmma_init(void);
+void cmma_init_nodat(void);
+
+extern void (*_machine_restart)(char *command);
+extern void (*_machine_halt)(void);
+extern void (*_machine_power_off)(void);
+
+#else /* __ASSEMBLY__ */
+
+#define IPL_DEVICE (IPL_DEVICE_OFFSET)
+#define INITRD_START (INITRD_START_OFFSET)
+#define INITRD_SIZE (INITRD_SIZE_OFFSET)
+#define OLDMEM_BASE (OLDMEM_BASE_OFFSET)
+#define OLDMEM_SIZE (OLDMEM_SIZE_OFFSET)
+#define COMMAND_LINE (COMMAND_LINE_OFFSET)
+
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_S390_SETUP_H */
diff --git a/arch/s390/include/asm/shmparam.h b/arch/s390/include/asm/shmparam.h
new file mode 100644
index 000000000..e75d45649
--- /dev/null
+++ b/arch/s390/include/asm/shmparam.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * S390 version
+ *
+ * Derived from "include/asm-i386/shmparam.h"
+ */
+#ifndef _ASM_S390_SHMPARAM_H
+#define _ASM_S390_SHMPARAM_H
+
+#define SHMLBA PAGE_SIZE /* attach addr a multiple of this */
+
+#endif /* _ASM_S390_SHMPARAM_H */
diff --git a/arch/s390/include/asm/signal.h b/arch/s390/include/asm/signal.h
new file mode 100644
index 000000000..7daf4d8b5
--- /dev/null
+++ b/arch/s390/include/asm/signal.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * S390 version
+ *
+ * Derived from "include/asm-i386/signal.h"
+ */
+#ifndef _ASMS390_SIGNAL_H
+#define _ASMS390_SIGNAL_H
+
+#include <uapi/asm/signal.h>
+
+/* Most things should be clean enough to redefine this at will, if care
+ is taken to make libc match. */
+#include <asm/sigcontext.h>
+#define _NSIG _SIGCONTEXT_NSIG
+#define _NSIG_BPW _SIGCONTEXT_NSIG_BPW
+#define _NSIG_WORDS _SIGCONTEXT_NSIG_WORDS
+
+typedef unsigned long old_sigset_t; /* at least 32 bits */
+
+typedef struct {
+ unsigned long sig[_NSIG_WORDS];
+} sigset_t;
+
+#define __ARCH_HAS_SA_RESTORER
+#endif
diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h
new file mode 100644
index 000000000..53ee795cd
--- /dev/null
+++ b/arch/s390/include/asm/sigp.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __S390_ASM_SIGP_H
+#define __S390_ASM_SIGP_H
+
+/* SIGP order codes */
+#define SIGP_SENSE 1
+#define SIGP_EXTERNAL_CALL 2
+#define SIGP_EMERGENCY_SIGNAL 3
+#define SIGP_START 4
+#define SIGP_STOP 5
+#define SIGP_RESTART 6
+#define SIGP_STOP_AND_STORE_STATUS 9
+#define SIGP_INITIAL_CPU_RESET 11
+#define SIGP_CPU_RESET 12
+#define SIGP_SET_PREFIX 13
+#define SIGP_STORE_STATUS_AT_ADDRESS 14
+#define SIGP_SET_ARCHITECTURE 18
+#define SIGP_COND_EMERGENCY_SIGNAL 19
+#define SIGP_SENSE_RUNNING 21
+#define SIGP_SET_MULTI_THREADING 22
+#define SIGP_STORE_ADDITIONAL_STATUS 23
+
+/* SIGP condition codes */
+#define SIGP_CC_ORDER_CODE_ACCEPTED 0
+#define SIGP_CC_STATUS_STORED 1
+#define SIGP_CC_BUSY 2
+#define SIGP_CC_NOT_OPERATIONAL 3
+
+/* SIGP cpu status bits */
+
+#define SIGP_STATUS_INVALID_ORDER 0x00000002UL
+#define SIGP_STATUS_CHECK_STOP 0x00000010UL
+#define SIGP_STATUS_STOPPED 0x00000040UL
+#define SIGP_STATUS_EXT_CALL_PENDING 0x00000080UL
+#define SIGP_STATUS_INVALID_PARAMETER 0x00000100UL
+#define SIGP_STATUS_INCORRECT_STATE 0x00000200UL
+#define SIGP_STATUS_NOT_RUNNING 0x00000400UL
+
+#ifndef __ASSEMBLY__
+
+static inline int ____pcpu_sigp(u16 addr, u8 order, unsigned long parm,
+ u32 *status)
+{
+ register unsigned long reg1 asm ("1") = parm;
+ int cc;
+
+ asm volatile(
+ " sigp %1,%2,0(%3)\n"
+ " ipm %0\n"
+ " srl %0,28\n"
+ : "=d" (cc), "+d" (reg1) : "d" (addr), "a" (order) : "cc");
+ *status = reg1;
+ return cc;
+}
+
+static inline int __pcpu_sigp(u16 addr, u8 order, unsigned long parm,
+ u32 *status)
+{
+ u32 _status;
+ int cc;
+
+ cc = ____pcpu_sigp(addr, order, parm, &_status);
+ if (status && cc == SIGP_CC_STATUS_STORED)
+ *status = _status;
+ return cc;
+}
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __S390_ASM_SIGP_H */
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
new file mode 100644
index 000000000..3907ead27
--- /dev/null
+++ b/arch/s390/include/asm/smp.h
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 1999, 2012
+ * Author(s): Denis Joseph Barrow,
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ * Heiko Carstens <heiko.carstens@de.ibm.com>,
+ */
+#ifndef __ASM_SMP_H
+#define __ASM_SMP_H
+
+#include <asm/sigp.h>
+
+#ifdef CONFIG_SMP
+
+#include <asm/lowcore.h>
+
+#define raw_smp_processor_id() (S390_lowcore.cpu_nr)
+
+extern struct mutex smp_cpu_state_mutex;
+extern unsigned int smp_cpu_mt_shift;
+extern unsigned int smp_cpu_mtid;
+extern __vector128 __initdata boot_cpu_vector_save_area[__NUM_VXRS];
+
+extern int __cpu_up(unsigned int cpu, struct task_struct *tidle);
+
+extern void arch_send_call_function_single_ipi(int cpu);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
+
+extern void smp_call_online_cpu(void (*func)(void *), void *);
+extern void smp_call_ipl_cpu(void (*func)(void *), void *);
+extern void smp_emergency_stop(void);
+
+extern int smp_find_processor_id(u16 address);
+extern int smp_store_status(int cpu);
+extern void smp_save_dump_cpus(void);
+extern int smp_vcpu_scheduled(int cpu);
+extern void smp_yield_cpu(int cpu);
+extern void smp_cpu_set_polarization(int cpu, int val);
+extern int smp_cpu_get_polarization(int cpu);
+extern void smp_fill_possible_mask(void);
+extern void smp_detect_cpus(void);
+
+#else /* CONFIG_SMP */
+
+#define smp_cpu_mtid 0
+
+static inline void smp_call_ipl_cpu(void (*func)(void *), void *data)
+{
+ func(data);
+}
+
+static inline void smp_call_online_cpu(void (*func)(void *), void *data)
+{
+ func(data);
+}
+
+static inline void smp_emergency_stop(void)
+{
+}
+
+static inline int smp_find_processor_id(u16 address) { return 0; }
+static inline int smp_store_status(int cpu) { return 0; }
+static inline int smp_vcpu_scheduled(int cpu) { return 1; }
+static inline void smp_yield_cpu(int cpu) { }
+static inline void smp_fill_possible_mask(void) { }
+static inline void smp_detect_cpus(void) { }
+
+#endif /* CONFIG_SMP */
+
+static inline void smp_stop_cpu(void)
+{
+ u16 pcpu = stap();
+
+ for (;;) {
+ __pcpu_sigp(pcpu, SIGP_STOP, 0, NULL);
+ cpu_relax();
+ }
+}
+
+/* Return thread 0 CPU number as base CPU */
+static inline int smp_get_base_cpu(int cpu)
+{
+ return cpu - (cpu % (smp_cpu_mtid + 1));
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+extern int smp_rescan_cpus(void);
+extern void __noreturn cpu_die(void);
+extern void __cpu_die(unsigned int cpu);
+extern int __cpu_disable(void);
+#else
+static inline int smp_rescan_cpus(void) { return 0; }
+static inline void cpu_die(void) { }
+#endif
+
+#endif /* __ASM_SMP_H */
diff --git a/arch/s390/include/asm/sparsemem.h b/arch/s390/include/asm/sparsemem.h
new file mode 100644
index 000000000..c54989360
--- /dev/null
+++ b/arch/s390/include/asm/sparsemem.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_SPARSEMEM_H
+#define _ASM_S390_SPARSEMEM_H
+
+#define SECTION_SIZE_BITS 28
+#define MAX_PHYSMEM_BITS CONFIG_MAX_PHYSMEM_BITS
+
+#endif /* _ASM_S390_SPARSEMEM_H */
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
new file mode 100644
index 000000000..0a29588aa
--- /dev/null
+++ b/arch/s390/include/asm/spinlock.h
@@ -0,0 +1,159 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * Derived from "include/asm-i386/spinlock.h"
+ */
+
+#ifndef __ASM_SPINLOCK_H
+#define __ASM_SPINLOCK_H
+
+#include <linux/smp.h>
+#include <asm/atomic_ops.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
+#include <asm/alternative.h>
+
+#define SPINLOCK_LOCKVAL (S390_lowcore.spinlock_lockval)
+
+extern int spin_retry;
+
+#ifndef CONFIG_SMP
+static inline bool arch_vcpu_is_preempted(int cpu) { return false; }
+#else
+bool arch_vcpu_is_preempted(int cpu);
+#endif
+
+#define vcpu_is_preempted arch_vcpu_is_preempted
+
+/*
+ * Simple spin lock operations. There are two variants, one clears IRQ's
+ * on the local processor, one does not.
+ *
+ * We make no fairness assumptions. They have a cost.
+ *
+ * (the type definitions are in asm/spinlock_types.h)
+ */
+
+void arch_spin_relax(arch_spinlock_t *lock);
+#define arch_spin_relax arch_spin_relax
+
+void arch_spin_lock_wait(arch_spinlock_t *);
+int arch_spin_trylock_retry(arch_spinlock_t *);
+void arch_spin_lock_setup(int cpu);
+
+static inline u32 arch_spin_lockval(int cpu)
+{
+ return cpu + 1;
+}
+
+static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
+{
+ return lock.lock == 0;
+}
+
+static inline int arch_spin_is_locked(arch_spinlock_t *lp)
+{
+ return READ_ONCE(lp->lock) != 0;
+}
+
+static inline int arch_spin_trylock_once(arch_spinlock_t *lp)
+{
+ barrier();
+ return likely(__atomic_cmpxchg_bool(&lp->lock, 0, SPINLOCK_LOCKVAL));
+}
+
+static inline void arch_spin_lock(arch_spinlock_t *lp)
+{
+ if (!arch_spin_trylock_once(lp))
+ arch_spin_lock_wait(lp);
+}
+
+static inline void arch_spin_lock_flags(arch_spinlock_t *lp,
+ unsigned long flags)
+{
+ if (!arch_spin_trylock_once(lp))
+ arch_spin_lock_wait(lp);
+}
+#define arch_spin_lock_flags arch_spin_lock_flags
+
+static inline int arch_spin_trylock(arch_spinlock_t *lp)
+{
+ if (!arch_spin_trylock_once(lp))
+ return arch_spin_trylock_retry(lp);
+ return 1;
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *lp)
+{
+ typecheck(int, lp->lock);
+ asm volatile(
+ ALTERNATIVE("", ".long 0xb2fa0070", 49) /* NIAI 7 */
+ " sth %1,%0\n"
+ : "=Q" (((unsigned short *) &lp->lock)[1])
+ : "d" (0) : "cc", "memory");
+}
+
+/*
+ * Read-write spinlocks, allowing multiple readers
+ * but only one writer.
+ *
+ * NOTE! it is quite common to have readers in interrupts
+ * but no interrupt writers. For those circumstances we
+ * can "mix" irq-safe locks - any writer needs to get a
+ * irq-safe write-lock, but readers can get non-irqsafe
+ * read-locks.
+ */
+
+#define arch_read_relax(rw) barrier()
+#define arch_write_relax(rw) barrier()
+
+void arch_read_lock_wait(arch_rwlock_t *lp);
+void arch_write_lock_wait(arch_rwlock_t *lp);
+
+static inline void arch_read_lock(arch_rwlock_t *rw)
+{
+ int old;
+
+ old = __atomic_add(1, &rw->cnts);
+ if (old & 0xffff0000)
+ arch_read_lock_wait(rw);
+}
+
+static inline void arch_read_unlock(arch_rwlock_t *rw)
+{
+ __atomic_add_const_barrier(-1, &rw->cnts);
+}
+
+static inline void arch_write_lock(arch_rwlock_t *rw)
+{
+ if (!__atomic_cmpxchg_bool(&rw->cnts, 0, 0x30000))
+ arch_write_lock_wait(rw);
+}
+
+static inline void arch_write_unlock(arch_rwlock_t *rw)
+{
+ __atomic_add_barrier(-0x30000, &rw->cnts);
+}
+
+
+static inline int arch_read_trylock(arch_rwlock_t *rw)
+{
+ int old;
+
+ old = READ_ONCE(rw->cnts);
+ return (!(old & 0xffff0000) &&
+ __atomic_cmpxchg_bool(&rw->cnts, old, old + 1));
+}
+
+static inline int arch_write_trylock(arch_rwlock_t *rw)
+{
+ int old;
+
+ old = READ_ONCE(rw->cnts);
+ return !old && __atomic_cmpxchg_bool(&rw->cnts, 0, 0x30000);
+}
+
+#endif /* __ASM_SPINLOCK_H */
diff --git a/arch/s390/include/asm/spinlock_types.h b/arch/s390/include/asm/spinlock_types.h
new file mode 100644
index 000000000..cfed272e4
--- /dev/null
+++ b/arch/s390/include/asm/spinlock_types.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_SPINLOCK_TYPES_H
+#define __ASM_SPINLOCK_TYPES_H
+
+#ifndef __LINUX_SPINLOCK_TYPES_H
+# error "please don't include this file directly"
+#endif
+
+typedef struct {
+ int lock;
+} __attribute__ ((aligned (4))) arch_spinlock_t;
+
+#define __ARCH_SPIN_LOCK_UNLOCKED { .lock = 0, }
+
+typedef struct {
+ int cnts;
+ arch_spinlock_t wait;
+} arch_rwlock_t;
+
+#define __ARCH_RW_LOCK_UNLOCKED { 0 }
+
+#endif
diff --git a/arch/s390/include/asm/stp.h b/arch/s390/include/asm/stp.h
new file mode 100644
index 000000000..f0ddefb06
--- /dev/null
+++ b/arch/s390/include/asm/stp.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2006
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+#ifndef __S390_STP_H
+#define __S390_STP_H
+
+/* notifier for syncs */
+extern struct atomic_notifier_head s390_epoch_delta_notifier;
+
+/* STP interruption parameter */
+struct stp_irq_parm {
+ unsigned int _pad0 : 14;
+ unsigned int tsc : 1; /* Timing status change */
+ unsigned int lac : 1; /* Link availability change */
+ unsigned int tcpc : 1; /* Time control parameter change */
+ unsigned int _pad2 : 15;
+} __attribute__ ((packed));
+
+#define STP_OP_SYNC 1
+#define STP_OP_CTRL 3
+
+struct stp_sstpi {
+ unsigned int rsvd0;
+ unsigned int rsvd1 : 8;
+ unsigned int stratum : 8;
+ unsigned int vbits : 16;
+ unsigned int leaps : 16;
+ unsigned int tmd : 4;
+ unsigned int ctn : 4;
+ unsigned int rsvd2 : 3;
+ unsigned int c : 1;
+ unsigned int tst : 4;
+ unsigned int tzo : 16;
+ unsigned int dsto : 16;
+ unsigned int ctrl : 16;
+ unsigned int rsvd3 : 16;
+ unsigned int tto;
+ unsigned int rsvd4;
+ unsigned int ctnid[3];
+ unsigned int rsvd5;
+ unsigned int todoff[4];
+ unsigned int rsvd6[48];
+} __attribute__ ((packed));
+
+/* Functions needed by the machine check handler */
+int stp_sync_check(void);
+int stp_island_check(void);
+void stp_queue_work(void);
+
+#endif /* __S390_STP_H */
diff --git a/arch/s390/include/asm/string.h b/arch/s390/include/asm/string.h
new file mode 100644
index 000000000..50f26fc9a
--- /dev/null
+++ b/arch/s390/include/asm/string.h
@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ */
+
+#ifndef _S390_STRING_H_
+#define _S390_STRING_H_
+
+#ifndef _LINUX_TYPES_H
+#include <linux/types.h>
+#endif
+
+#define __HAVE_ARCH_MEMCHR /* inline & arch function */
+#define __HAVE_ARCH_MEMCMP /* arch function */
+#define __HAVE_ARCH_MEMCPY /* gcc builtin & arch function */
+#define __HAVE_ARCH_MEMMOVE /* gcc builtin & arch function */
+#define __HAVE_ARCH_MEMSCAN /* inline & arch function */
+#define __HAVE_ARCH_MEMSET /* gcc builtin & arch function */
+#define __HAVE_ARCH_MEMSET16 /* arch function */
+#define __HAVE_ARCH_MEMSET32 /* arch function */
+#define __HAVE_ARCH_MEMSET64 /* arch function */
+#define __HAVE_ARCH_STRCAT /* inline & arch function */
+#define __HAVE_ARCH_STRCMP /* arch function */
+#define __HAVE_ARCH_STRCPY /* inline & arch function */
+#define __HAVE_ARCH_STRLCAT /* arch function */
+#define __HAVE_ARCH_STRLCPY /* arch function */
+#define __HAVE_ARCH_STRLEN /* inline & arch function */
+#define __HAVE_ARCH_STRNCAT /* arch function */
+#define __HAVE_ARCH_STRNCPY /* arch function */
+#define __HAVE_ARCH_STRNLEN /* inline & arch function */
+#define __HAVE_ARCH_STRRCHR /* arch function */
+#define __HAVE_ARCH_STRSTR /* arch function */
+
+/* Prototypes for non-inlined arch strings functions. */
+int memcmp(const void *s1, const void *s2, size_t n);
+void *memcpy(void *dest, const void *src, size_t n);
+void *memset(void *s, int c, size_t n);
+void *memmove(void *dest, const void *src, size_t n);
+int strcmp(const char *s1, const char *s2);
+size_t strlcat(char *dest, const char *src, size_t n);
+size_t strlcpy(char *dest, const char *src, size_t size);
+char *strncat(char *dest, const char *src, size_t n);
+char *strncpy(char *dest, const char *src, size_t n);
+char *strrchr(const char *s, int c);
+char *strstr(const char *s1, const char *s2);
+
+#undef __HAVE_ARCH_STRCHR
+#undef __HAVE_ARCH_STRNCHR
+#undef __HAVE_ARCH_STRNCMP
+#undef __HAVE_ARCH_STRPBRK
+#undef __HAVE_ARCH_STRSEP
+#undef __HAVE_ARCH_STRSPN
+
+void *__memset16(uint16_t *s, uint16_t v, size_t count);
+void *__memset32(uint32_t *s, uint32_t v, size_t count);
+void *__memset64(uint64_t *s, uint64_t v, size_t count);
+
+static inline void *memset16(uint16_t *s, uint16_t v, size_t count)
+{
+ return __memset16(s, v, count * sizeof(v));
+}
+
+static inline void *memset32(uint32_t *s, uint32_t v, size_t count)
+{
+ return __memset32(s, v, count * sizeof(v));
+}
+
+static inline void *memset64(uint64_t *s, uint64_t v, size_t count)
+{
+ return __memset64(s, v, count * sizeof(v));
+}
+
+#if !defined(IN_ARCH_STRING_C) && (!defined(CONFIG_FORTIFY_SOURCE) || defined(__NO_FORTIFY))
+
+static inline void *memchr(const void * s, int c, size_t n)
+{
+ register int r0 asm("0") = (char) c;
+ const void *ret = s + n;
+
+ asm volatile(
+ "0: srst %0,%1\n"
+ " jo 0b\n"
+ " jl 1f\n"
+ " la %0,0\n"
+ "1:"
+ : "+a" (ret), "+&a" (s) : "d" (r0) : "cc", "memory");
+ return (void *) ret;
+}
+
+static inline void *memscan(void *s, int c, size_t n)
+{
+ register int r0 asm("0") = (char) c;
+ const void *ret = s + n;
+
+ asm volatile(
+ "0: srst %0,%1\n"
+ " jo 0b\n"
+ : "+a" (ret), "+&a" (s) : "d" (r0) : "cc", "memory");
+ return (void *) ret;
+}
+
+static inline char *strcat(char *dst, const char *src)
+{
+ register int r0 asm("0") = 0;
+ unsigned long dummy;
+ char *ret = dst;
+
+ asm volatile(
+ "0: srst %0,%1\n"
+ " jo 0b\n"
+ "1: mvst %0,%2\n"
+ " jo 1b"
+ : "=&a" (dummy), "+a" (dst), "+a" (src)
+ : "d" (r0), "0" (0) : "cc", "memory" );
+ return ret;
+}
+
+static inline char *strcpy(char *dst, const char *src)
+{
+ register int r0 asm("0") = 0;
+ char *ret = dst;
+
+ asm volatile(
+ "0: mvst %0,%1\n"
+ " jo 0b"
+ : "+&a" (dst), "+&a" (src) : "d" (r0)
+ : "cc", "memory");
+ return ret;
+}
+
+static inline size_t strlen(const char *s)
+{
+ register unsigned long r0 asm("0") = 0;
+ const char *tmp = s;
+
+ asm volatile(
+ "0: srst %0,%1\n"
+ " jo 0b"
+ : "+d" (r0), "+a" (tmp) : : "cc", "memory");
+ return r0 - (unsigned long) s;
+}
+
+static inline size_t strnlen(const char * s, size_t n)
+{
+ register int r0 asm("0") = 0;
+ const char *tmp = s;
+ const char *end = s + n;
+
+ asm volatile(
+ "0: srst %0,%1\n"
+ " jo 0b"
+ : "+a" (end), "+a" (tmp) : "d" (r0) : "cc", "memory");
+ return end - s;
+}
+#else /* IN_ARCH_STRING_C */
+void *memchr(const void * s, int c, size_t n);
+void *memscan(void *s, int c, size_t n);
+char *strcat(char *dst, const char *src);
+char *strcpy(char *dst, const char *src);
+size_t strlen(const char *s);
+size_t strnlen(const char * s, size_t n);
+#endif /* !IN_ARCH_STRING_C */
+
+#endif /* __S390_STRING_H_ */
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
new file mode 100644
index 000000000..c61b2cc1a
--- /dev/null
+++ b/arch/s390/include/asm/switch_to.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 1999, 2009
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef __ASM_SWITCH_TO_H
+#define __ASM_SWITCH_TO_H
+
+#include <linux/thread_info.h>
+#include <asm/fpu/api.h>
+#include <asm/ptrace.h>
+#include <asm/guarded_storage.h>
+
+extern struct task_struct *__switch_to(void *, void *);
+extern void update_cr_regs(struct task_struct *task);
+
+static inline void save_access_regs(unsigned int *acrs)
+{
+ typedef struct { int _[NUM_ACRS]; } acrstype;
+
+ asm volatile("stam 0,15,%0" : "=Q" (*(acrstype *)acrs));
+}
+
+static inline void restore_access_regs(unsigned int *acrs)
+{
+ typedef struct { int _[NUM_ACRS]; } acrstype;
+
+ asm volatile("lam 0,15,%0" : : "Q" (*(acrstype *)acrs));
+}
+
+#define switch_to(prev, next, last) do { \
+ /* save_fpu_regs() sets the CIF_FPU flag, which enforces \
+ * a restore of the floating point / vector registers as \
+ * soon as the next task returns to user space \
+ */ \
+ save_fpu_regs(); \
+ save_access_regs(&prev->thread.acrs[0]); \
+ save_ri_cb(prev->thread.ri_cb); \
+ save_gs_cb(prev->thread.gs_cb); \
+ update_cr_regs(next); \
+ restore_access_regs(&next->thread.acrs[0]); \
+ restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \
+ restore_gs_cb(next->thread.gs_cb); \
+ prev = __switch_to(prev, next); \
+} while (0)
+
+#endif /* __ASM_SWITCH_TO_H */
diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h
new file mode 100644
index 000000000..2becba800
--- /dev/null
+++ b/arch/s390/include/asm/syscall.h
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Access to user system call parameters and results
+ *
+ * Copyright IBM Corp. 2008
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+
+#ifndef _ASM_SYSCALL_H
+#define _ASM_SYSCALL_H 1
+
+#include <uapi/linux/audit.h>
+#include <linux/sched.h>
+#include <linux/err.h>
+#include <asm/ptrace.h>
+
+/*
+ * The syscall table always contains 32 bit pointers since we know that the
+ * address of the function to be called is (way) below 4GB. So the "int"
+ * type here is what we want [need] for both 32 bit and 64 bit systems.
+ */
+extern const unsigned int sys_call_table[];
+extern const unsigned int sys_call_table_emu[];
+
+static inline long syscall_get_nr(struct task_struct *task,
+ struct pt_regs *regs)
+{
+ return test_pt_regs_flag(regs, PIF_SYSCALL) ?
+ (regs->int_code & 0xffff) : -1;
+}
+
+static inline void syscall_rollback(struct task_struct *task,
+ struct pt_regs *regs)
+{
+ regs->gprs[2] = regs->orig_gpr2;
+}
+
+static inline long syscall_get_error(struct task_struct *task,
+ struct pt_regs *regs)
+{
+ unsigned long error = regs->gprs[2];
+#ifdef CONFIG_COMPAT
+ if (test_tsk_thread_flag(task, TIF_31BIT)) {
+ /*
+ * Sign-extend the value so (int)-EFOO becomes (long)-EFOO
+ * and will match correctly in comparisons.
+ */
+ error = (long)(int)error;
+ }
+#endif
+ return IS_ERR_VALUE(error) ? error : 0;
+}
+
+static inline long syscall_get_return_value(struct task_struct *task,
+ struct pt_regs *regs)
+{
+ return regs->gprs[2];
+}
+
+static inline void syscall_set_return_value(struct task_struct *task,
+ struct pt_regs *regs,
+ int error, long val)
+{
+ regs->gprs[2] = error ? error : val;
+}
+
+static inline void syscall_get_arguments(struct task_struct *task,
+ struct pt_regs *regs,
+ unsigned int i, unsigned int n,
+ unsigned long *args)
+{
+ unsigned long mask = -1UL;
+
+ /*
+ * No arguments for this syscall, there's nothing to do.
+ */
+ if (!n)
+ return;
+
+ BUG_ON(i + n > 6);
+#ifdef CONFIG_COMPAT
+ if (test_tsk_thread_flag(task, TIF_31BIT))
+ mask = 0xffffffff;
+#endif
+ while (n-- > 0)
+ if (i + n > 0)
+ args[n] = regs->gprs[2 + i + n] & mask;
+ if (i == 0)
+ args[0] = regs->orig_gpr2 & mask;
+}
+
+static inline void syscall_set_arguments(struct task_struct *task,
+ struct pt_regs *regs,
+ unsigned int i, unsigned int n,
+ const unsigned long *args)
+{
+ BUG_ON(i + n > 6);
+ while (n-- > 0)
+ if (i + n > 0)
+ regs->gprs[2 + i + n] = args[n];
+ if (i == 0)
+ regs->orig_gpr2 = args[0];
+}
+
+static inline int syscall_get_arch(void)
+{
+#ifdef CONFIG_COMPAT
+ if (test_tsk_thread_flag(current, TIF_31BIT))
+ return AUDIT_ARCH_S390;
+#endif
+ return AUDIT_ARCH_S390X;
+}
+#endif /* _ASM_SYSCALL_H */
diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h
new file mode 100644
index 000000000..fe7b3f8f0
--- /dev/null
+++ b/arch/s390/include/asm/sysinfo.h
@@ -0,0 +1,201 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * definition for store system information stsi
+ *
+ * Copyright IBM Corp. 2001, 2008
+ *
+ * Author(s): Ulrich Weigand <weigand@de.ibm.com>
+ * Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#ifndef __ASM_S390_SYSINFO_H
+#define __ASM_S390_SYSINFO_H
+
+#include <asm/bitsperlong.h>
+#include <linux/uuid.h>
+
+struct sysinfo_1_1_1 {
+ unsigned char p:1;
+ unsigned char :6;
+ unsigned char t:1;
+ unsigned char :8;
+ unsigned char ccr;
+ unsigned char cai;
+ char reserved_0[20];
+ unsigned long lic;
+ char manufacturer[16];
+ char type[4];
+ char reserved_1[12];
+ char model_capacity[16];
+ char sequence[16];
+ char plant[4];
+ char model[16];
+ char model_perm_cap[16];
+ char model_temp_cap[16];
+ unsigned int model_cap_rating;
+ unsigned int model_perm_cap_rating;
+ unsigned int model_temp_cap_rating;
+ unsigned char typepct[5];
+ unsigned char reserved_2[3];
+ unsigned int ncr;
+ unsigned int npr;
+ unsigned int ntr;
+};
+
+struct sysinfo_1_2_1 {
+ char reserved_0[80];
+ char sequence[16];
+ char plant[4];
+ char reserved_1[2];
+ unsigned short cpu_address;
+};
+
+struct sysinfo_1_2_2 {
+ char format;
+ char reserved_0[1];
+ unsigned short acc_offset;
+ unsigned char mt_installed :1;
+ unsigned char :2;
+ unsigned char mt_stid :5;
+ unsigned char :3;
+ unsigned char mt_gtid :5;
+ char reserved_1[18];
+ unsigned int nominal_cap;
+ unsigned int secondary_cap;
+ unsigned int capability;
+ unsigned short cpus_total;
+ unsigned short cpus_configured;
+ unsigned short cpus_standby;
+ unsigned short cpus_reserved;
+ unsigned short adjustment[0];
+};
+
+struct sysinfo_1_2_2_extension {
+ unsigned int alt_capability;
+ unsigned short alt_adjustment[0];
+};
+
+struct sysinfo_2_2_1 {
+ char reserved_0[80];
+ char sequence[16];
+ char plant[4];
+ unsigned short cpu_id;
+ unsigned short cpu_address;
+};
+
+struct sysinfo_2_2_2 {
+ char reserved_0[32];
+ unsigned short lpar_number;
+ char reserved_1;
+ unsigned char characteristics;
+ unsigned short cpus_total;
+ unsigned short cpus_configured;
+ unsigned short cpus_standby;
+ unsigned short cpus_reserved;
+ char name[8];
+ unsigned int caf;
+ char reserved_2[8];
+ unsigned char mt_installed :1;
+ unsigned char :2;
+ unsigned char mt_stid :5;
+ unsigned char :3;
+ unsigned char mt_gtid :5;
+ unsigned char :3;
+ unsigned char mt_psmtid :5;
+ char reserved_3[5];
+ unsigned short cpus_dedicated;
+ unsigned short cpus_shared;
+ char reserved_4[3];
+ unsigned char vsne;
+ uuid_t uuid;
+ char reserved_5[160];
+ char ext_name[256];
+};
+
+#define LPAR_CHAR_DEDICATED (1 << 7)
+#define LPAR_CHAR_SHARED (1 << 6)
+#define LPAR_CHAR_LIMITED (1 << 5)
+
+struct sysinfo_3_2_2 {
+ char reserved_0[31];
+ unsigned char :4;
+ unsigned char count:4;
+ struct {
+ char reserved_0[4];
+ unsigned short cpus_total;
+ unsigned short cpus_configured;
+ unsigned short cpus_standby;
+ unsigned short cpus_reserved;
+ char name[8];
+ unsigned int caf;
+ char cpi[16];
+ char reserved_1[3];
+ unsigned char evmne;
+ unsigned int reserved_2;
+ uuid_t uuid;
+ } vm[8];
+ char reserved_3[1504];
+ char ext_names[8][256];
+};
+
+extern int topology_max_mnest;
+
+/*
+ * Returns the maximum nesting level supported by the cpu topology code.
+ * The current maximum level is 4 which is the drawer level.
+ */
+static inline unsigned char topology_mnest_limit(void)
+{
+ return min(topology_max_mnest, 4);
+}
+
+#define TOPOLOGY_NR_MAG 6
+
+struct topology_core {
+ unsigned char nl;
+ unsigned char reserved0[3];
+ unsigned char :5;
+ unsigned char d:1;
+ unsigned char pp:2;
+ unsigned char reserved1;
+ unsigned short origin;
+ unsigned long mask;
+};
+
+struct topology_container {
+ unsigned char nl;
+ unsigned char reserved[6];
+ unsigned char id;
+};
+
+union topology_entry {
+ unsigned char nl;
+ struct topology_core cpu;
+ struct topology_container container;
+};
+
+struct sysinfo_15_1_x {
+ unsigned char reserved0[2];
+ unsigned short length;
+ unsigned char mag[TOPOLOGY_NR_MAG];
+ unsigned char reserved1;
+ unsigned char mnest;
+ unsigned char reserved2[4];
+ union topology_entry tle[0];
+};
+
+int stsi(void *sysinfo, int fc, int sel1, int sel2);
+
+/*
+ * Service level reporting interface.
+ */
+struct service_level {
+ struct list_head list;
+ void (*seq_print)(struct seq_file *, struct service_level *);
+};
+
+int register_service_level(struct service_level *);
+int unregister_service_level(struct service_level *);
+
+int sthyi_fill(void *dst, u64 *rc);
+#endif /* __ASM_S390_SYSINFO_H */
diff --git a/arch/s390/include/asm/termios.h b/arch/s390/include/asm/termios.h
new file mode 100644
index 000000000..46fa3020b
--- /dev/null
+++ b/arch/s390/include/asm/termios.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * S390 version
+ *
+ * Derived from "include/asm-i386/termios.h"
+ */
+#ifndef _S390_TERMIOS_H
+#define _S390_TERMIOS_H
+
+#include <uapi/asm/termios.h>
+
+
+/* intr=^C quit=^\ erase=del kill=^U
+ eof=^D vtime=\0 vmin=\1 sxtc=\0
+ start=^Q stop=^S susp=^Z eol=\0
+ reprint=^R discard=^U werase=^W lnext=^V
+ eol2=\0
+*/
+#define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0"
+
+#define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios2))
+#define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios2))
+
+#include <asm-generic/termios-base.h>
+
+#endif /* _S390_TERMIOS_H */
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
new file mode 100644
index 000000000..3c883c368
--- /dev/null
+++ b/arch/s390/include/asm/thread_info.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * S390 version
+ * Copyright IBM Corp. 2002, 2006
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+
+#ifndef _ASM_THREAD_INFO_H
+#define _ASM_THREAD_INFO_H
+
+#include <linux/const.h>
+
+/*
+ * Size of kernel stack for each process
+ */
+#define THREAD_SIZE_ORDER 2
+#define ASYNC_ORDER 2
+
+#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
+#define ASYNC_SIZE (PAGE_SIZE << ASYNC_ORDER)
+
+#ifndef __ASSEMBLY__
+#include <asm/lowcore.h>
+#include <asm/page.h>
+#include <asm/processor.h>
+
+/*
+ * low level task data that entry.S needs immediate access to
+ * - this struct should fit entirely inside of one cache line
+ * - this struct shares the supervisor stack pages
+ * - if the contents of this structure are changed, the assembly constants must also be changed
+ */
+struct thread_info {
+ unsigned long flags; /* low level flags */
+};
+
+/*
+ * macros/functions for gaining access to the thread information structure
+ */
+#define INIT_THREAD_INFO(tsk) \
+{ \
+ .flags = 0, \
+}
+
+void arch_release_task_struct(struct task_struct *tsk);
+int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
+
+void arch_setup_new_exec(void);
+#define arch_setup_new_exec arch_setup_new_exec
+
+#endif
+
+/*
+ * thread information flags bit numbers
+ */
+/* _TIF_WORK bits */
+#define TIF_NOTIFY_RESUME 0 /* callback before returning to user */
+#define TIF_SIGPENDING 1 /* signal pending */
+#define TIF_NEED_RESCHED 2 /* rescheduling necessary */
+#define TIF_UPROBE 3 /* breakpointed or single-stepping */
+#define TIF_GUARDED_STORAGE 4 /* load guarded storage control block */
+#define TIF_PATCH_PENDING 5 /* pending live patching update */
+#define TIF_PGSTE 6 /* New mm's will use 4K page tables */
+#define TIF_ISOLATE_BP 8 /* Run process with isolated BP */
+#define TIF_ISOLATE_BP_GUEST 9 /* Run KVM guests with isolated BP */
+
+#define TIF_31BIT 16 /* 32bit process */
+#define TIF_MEMDIE 17 /* is terminating due to OOM killer */
+#define TIF_RESTORE_SIGMASK 18 /* restore signal mask in do_signal() */
+#define TIF_SINGLE_STEP 19 /* This task is single stepped */
+#define TIF_BLOCK_STEP 20 /* This task is block stepped */
+#define TIF_UPROBE_SINGLESTEP 21 /* This task is uprobe single stepped */
+
+/* _TIF_TRACE bits */
+#define TIF_SYSCALL_TRACE 24 /* syscall trace active */
+#define TIF_SYSCALL_AUDIT 25 /* syscall auditing active */
+#define TIF_SECCOMP 26 /* secure computing */
+#define TIF_SYSCALL_TRACEPOINT 27 /* syscall tracepoint instrumentation */
+
+#define _TIF_NOTIFY_RESUME _BITUL(TIF_NOTIFY_RESUME)
+#define _TIF_SIGPENDING _BITUL(TIF_SIGPENDING)
+#define _TIF_NEED_RESCHED _BITUL(TIF_NEED_RESCHED)
+#define _TIF_UPROBE _BITUL(TIF_UPROBE)
+#define _TIF_GUARDED_STORAGE _BITUL(TIF_GUARDED_STORAGE)
+#define _TIF_PATCH_PENDING _BITUL(TIF_PATCH_PENDING)
+#define _TIF_ISOLATE_BP _BITUL(TIF_ISOLATE_BP)
+#define _TIF_ISOLATE_BP_GUEST _BITUL(TIF_ISOLATE_BP_GUEST)
+
+#define _TIF_31BIT _BITUL(TIF_31BIT)
+#define _TIF_SINGLE_STEP _BITUL(TIF_SINGLE_STEP)
+
+#define _TIF_SYSCALL_TRACE _BITUL(TIF_SYSCALL_TRACE)
+#define _TIF_SYSCALL_AUDIT _BITUL(TIF_SYSCALL_AUDIT)
+#define _TIF_SECCOMP _BITUL(TIF_SECCOMP)
+#define _TIF_SYSCALL_TRACEPOINT _BITUL(TIF_SYSCALL_TRACEPOINT)
+
+#endif /* _ASM_THREAD_INFO_H */
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
new file mode 100644
index 000000000..99a7e0282
--- /dev/null
+++ b/arch/s390/include/asm/timex.h
@@ -0,0 +1,257 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999
+ *
+ * Derived from "include/asm-i386/timex.h"
+ * Copyright (C) 1992, Linus Torvalds
+ */
+
+#ifndef _ASM_S390_TIMEX_H
+#define _ASM_S390_TIMEX_H
+
+#include <linux/preempt.h>
+#include <linux/time64.h>
+#include <asm/lowcore.h>
+
+/* The value of the TOD clock for 1.1.1970. */
+#define TOD_UNIX_EPOCH 0x7d91048bca000000ULL
+
+extern u64 clock_comparator_max;
+
+/* Inline functions for clock register access. */
+static inline int set_tod_clock(__u64 time)
+{
+ int cc;
+
+ asm volatile(
+ " sck %1\n"
+ " ipm %0\n"
+ " srl %0,28\n"
+ : "=d" (cc) : "Q" (time) : "cc");
+ return cc;
+}
+
+static inline int store_tod_clock(__u64 *time)
+{
+ int cc;
+
+ asm volatile(
+ " stck %1\n"
+ " ipm %0\n"
+ " srl %0,28\n"
+ : "=d" (cc), "=Q" (*time) : : "cc");
+ return cc;
+}
+
+static inline void set_clock_comparator(__u64 time)
+{
+ asm volatile("sckc %0" : : "Q" (time));
+}
+
+static inline void store_clock_comparator(__u64 *time)
+{
+ asm volatile("stckc %0" : "=Q" (*time));
+}
+
+void clock_comparator_work(void);
+
+void __init time_early_init(void);
+
+extern unsigned char ptff_function_mask[16];
+
+/* Function codes for the ptff instruction. */
+#define PTFF_QAF 0x00 /* query available functions */
+#define PTFF_QTO 0x01 /* query tod offset */
+#define PTFF_QSI 0x02 /* query steering information */
+#define PTFF_QUI 0x04 /* query UTC information */
+#define PTFF_ATO 0x40 /* adjust tod offset */
+#define PTFF_STO 0x41 /* set tod offset */
+#define PTFF_SFS 0x42 /* set fine steering rate */
+#define PTFF_SGS 0x43 /* set gross steering rate */
+
+/* Query TOD offset result */
+struct ptff_qto {
+ unsigned long long physical_clock;
+ unsigned long long tod_offset;
+ unsigned long long logical_tod_offset;
+ unsigned long long tod_epoch_difference;
+} __packed;
+
+static inline int ptff_query(unsigned int nr)
+{
+ unsigned char *ptr;
+
+ ptr = ptff_function_mask + (nr >> 3);
+ return (*ptr & (0x80 >> (nr & 7))) != 0;
+}
+
+/* Query UTC information result */
+struct ptff_qui {
+ unsigned int tm : 2;
+ unsigned int ts : 2;
+ unsigned int : 28;
+ unsigned int pad_0x04;
+ unsigned long leap_event;
+ short old_leap;
+ short new_leap;
+ unsigned int pad_0x14;
+ unsigned long prt[5];
+ unsigned long cst[3];
+ unsigned int skew;
+ unsigned int pad_0x5c[41];
+} __packed;
+
+/*
+ * ptff - Perform timing facility function
+ * @ptff_block: Pointer to ptff parameter block
+ * @len: Length of parameter block
+ * @func: Function code
+ * Returns: Condition code (0 on success)
+ */
+#define ptff(ptff_block, len, func) \
+({ \
+ struct addrtype { char _[len]; }; \
+ register unsigned int reg0 asm("0") = func; \
+ register unsigned long reg1 asm("1") = (unsigned long) (ptff_block);\
+ int rc; \
+ \
+ asm volatile( \
+ " .word 0x0104\n" \
+ " ipm %0\n" \
+ " srl %0,28\n" \
+ : "=d" (rc), "+m" (*(struct addrtype *) reg1) \
+ : "d" (reg0), "d" (reg1) : "cc"); \
+ rc; \
+})
+
+static inline unsigned long long local_tick_disable(void)
+{
+ unsigned long long old;
+
+ old = S390_lowcore.clock_comparator;
+ S390_lowcore.clock_comparator = clock_comparator_max;
+ set_clock_comparator(S390_lowcore.clock_comparator);
+ return old;
+}
+
+static inline void local_tick_enable(unsigned long long comp)
+{
+ S390_lowcore.clock_comparator = comp;
+ set_clock_comparator(S390_lowcore.clock_comparator);
+}
+
+#define CLOCK_TICK_RATE 1193180 /* Underlying HZ */
+#define STORE_CLOCK_EXT_SIZE 16 /* stcke writes 16 bytes */
+
+typedef unsigned long long cycles_t;
+
+static inline void get_tod_clock_ext(char *clk)
+{
+ typedef struct { char _[STORE_CLOCK_EXT_SIZE]; } addrtype;
+
+ asm volatile("stcke %0" : "=Q" (*(addrtype *) clk) : : "cc");
+}
+
+static inline unsigned long long get_tod_clock(void)
+{
+ char clk[STORE_CLOCK_EXT_SIZE];
+
+ get_tod_clock_ext(clk);
+ return *((unsigned long long *)&clk[1]);
+}
+
+static inline unsigned long long get_tod_clock_fast(void)
+{
+#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES
+ unsigned long long clk;
+
+ asm volatile("stckf %0" : "=Q" (clk) : : "cc");
+ return clk;
+#else
+ return get_tod_clock();
+#endif
+}
+
+static inline cycles_t get_cycles(void)
+{
+ return (cycles_t) get_tod_clock() >> 2;
+}
+#define get_cycles get_cycles
+
+int get_phys_clock(unsigned long *clock);
+void init_cpu_timer(void);
+unsigned long long monotonic_clock(void);
+
+extern unsigned char tod_clock_base[16] __aligned(8);
+
+/**
+ * get_clock_monotonic - returns current time in clock rate units
+ *
+ * The clock and tod_clock_base get changed via stop_machine.
+ * Therefore preemption must be disabled, otherwise the returned
+ * value is not guaranteed to be monotonic.
+ */
+static inline unsigned long long get_tod_clock_monotonic(void)
+{
+ unsigned long long tod;
+
+ preempt_disable_notrace();
+ tod = get_tod_clock() - *(unsigned long long *) &tod_clock_base[1];
+ preempt_enable_notrace();
+ return tod;
+}
+
+/**
+ * tod_to_ns - convert a TOD format value to nanoseconds
+ * @todval: to be converted TOD format value
+ * Returns: number of nanoseconds that correspond to the TOD format value
+ *
+ * Converting a 64 Bit TOD format value to nanoseconds means that the value
+ * must be divided by 4.096. In order to achieve that we multiply with 125
+ * and divide by 512:
+ *
+ * ns = (todval * 125) >> 9;
+ *
+ * In order to avoid an overflow with the multiplication we can rewrite this.
+ * With a split todval == 2^9 * th + tl (th upper 55 bits, tl lower 9 bits)
+ * we end up with
+ *
+ * ns = ((2^9 * th + tl) * 125 ) >> 9;
+ * -> ns = (th * 125) + ((tl * 125) >> 9);
+ *
+ */
+static inline unsigned long long tod_to_ns(unsigned long long todval)
+{
+ return ((todval >> 9) * 125) + (((todval & 0x1ff) * 125) >> 9);
+}
+
+/**
+ * tod_after - compare two 64 bit TOD values
+ * @a: first 64 bit TOD timestamp
+ * @b: second 64 bit TOD timestamp
+ *
+ * Returns: true if a is later than b
+ */
+static inline int tod_after(unsigned long long a, unsigned long long b)
+{
+ if (MACHINE_HAS_SCC)
+ return (long long) a > (long long) b;
+ return a > b;
+}
+
+/**
+ * tod_after_eq - compare two 64 bit TOD values
+ * @a: first 64 bit TOD timestamp
+ * @b: second 64 bit TOD timestamp
+ *
+ * Returns: true if a is later than b
+ */
+static inline int tod_after_eq(unsigned long long a, unsigned long long b)
+{
+ if (MACHINE_HAS_SCC)
+ return (long long) a >= (long long) b;
+ return a >= b;
+}
+
+#endif
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
new file mode 100644
index 000000000..1df28a8e2
--- /dev/null
+++ b/arch/s390/include/asm/tlb.h
@@ -0,0 +1,205 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _S390_TLB_H
+#define _S390_TLB_H
+
+/*
+ * TLB flushing on s390 is complicated. The following requirement
+ * from the principles of operation is the most arduous:
+ *
+ * "A valid table entry must not be changed while it is attached
+ * to any CPU and may be used for translation by that CPU except to
+ * (1) invalidate the entry by using INVALIDATE PAGE TABLE ENTRY,
+ * or INVALIDATE DAT TABLE ENTRY, (2) alter bits 56-63 of a page
+ * table entry, or (3) make a change by means of a COMPARE AND SWAP
+ * AND PURGE instruction that purges the TLB."
+ *
+ * The modification of a pte of an active mm struct therefore is
+ * a two step process: i) invalidate the pte, ii) store the new pte.
+ * This is true for the page protection bit as well.
+ * The only possible optimization is to flush at the beginning of
+ * a tlb_gather_mmu cycle if the mm_struct is currently not in use.
+ *
+ * Pages used for the page tables is a different story. FIXME: more
+ */
+
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/swap.h>
+#include <asm/processor.h>
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+
+struct mmu_gather {
+ struct mm_struct *mm;
+ struct mmu_table_batch *batch;
+ unsigned int fullmm;
+ unsigned long start, end;
+};
+
+struct mmu_table_batch {
+ struct rcu_head rcu;
+ unsigned int nr;
+ void *tables[0];
+};
+
+#define MAX_TABLE_BATCH \
+ ((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *))
+
+extern void tlb_table_flush(struct mmu_gather *tlb);
+extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
+
+static inline void
+arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+ tlb->mm = mm;
+ tlb->start = start;
+ tlb->end = end;
+ tlb->fullmm = !(start | (end+1));
+ tlb->batch = NULL;
+}
+
+static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
+{
+ __tlb_flush_mm_lazy(tlb->mm);
+}
+
+static inline void tlb_flush_mmu_free(struct mmu_gather *tlb)
+{
+ tlb_table_flush(tlb);
+}
+
+
+static inline void tlb_flush_mmu(struct mmu_gather *tlb)
+{
+ tlb_flush_mmu_tlbonly(tlb);
+ tlb_flush_mmu_free(tlb);
+}
+
+static inline void
+arch_tlb_finish_mmu(struct mmu_gather *tlb,
+ unsigned long start, unsigned long end, bool force)
+{
+ if (force) {
+ tlb->start = start;
+ tlb->end = end;
+ }
+
+ tlb_flush_mmu(tlb);
+}
+
+/*
+ * Release the page cache reference for a pte removed by
+ * tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page
+ * has already been freed, so just do free_page_and_swap_cache.
+ */
+static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+{
+ free_page_and_swap_cache(page);
+ return false; /* avoid calling tlb_flush_mmu */
+}
+
+static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+{
+ free_page_and_swap_cache(page);
+}
+
+static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
+ struct page *page, int page_size)
+{
+ return __tlb_remove_page(tlb, page);
+}
+
+static inline void tlb_remove_page_size(struct mmu_gather *tlb,
+ struct page *page, int page_size)
+{
+ return tlb_remove_page(tlb, page);
+}
+
+static inline void tlb_flush_pmd_range(struct mmu_gather *tlb,
+ unsigned long address, unsigned long size)
+{
+ /*
+ * the range might exceed the original range that was provided to
+ * tlb_gather_mmu(), so we need to update it despite the fact it is
+ * usually not updated.
+ */
+ if (tlb->start > address)
+ tlb->start = address;
+ if (tlb->end < address + size)
+ tlb->end = address + size;
+}
+
+/*
+ * pte_free_tlb frees a pte table and clears the CRSTE for the
+ * page table from the tlb.
+ */
+static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
+ unsigned long address)
+{
+ page_table_free_rcu(tlb, (unsigned long *) pte, address);
+}
+
+/*
+ * pmd_free_tlb frees a pmd table and clears the CRSTE for the
+ * segment table entry from the tlb.
+ * If the mm uses a two level page table the single pmd is freed
+ * as the pgd. pmd_free_tlb checks the asce_limit against 2GB
+ * to avoid the double free of the pmd in this case.
+ */
+static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
+ unsigned long address)
+{
+ if (mm_pmd_folded(tlb->mm))
+ return;
+ pgtable_pmd_page_dtor(virt_to_page(pmd));
+ tlb_remove_table(tlb, pmd);
+}
+
+/*
+ * p4d_free_tlb frees a pud table and clears the CRSTE for the
+ * region second table entry from the tlb.
+ * If the mm uses a four level page table the single p4d is freed
+ * as the pgd. p4d_free_tlb checks the asce_limit against 8PB
+ * to avoid the double free of the p4d in this case.
+ */
+static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
+ unsigned long address)
+{
+ if (mm_p4d_folded(tlb->mm))
+ return;
+ tlb_remove_table(tlb, p4d);
+}
+
+/*
+ * pud_free_tlb frees a pud table and clears the CRSTE for the
+ * region third table entry from the tlb.
+ * If the mm uses a three level page table the single pud is freed
+ * as the pgd. pud_free_tlb checks the asce_limit against 4TB
+ * to avoid the double free of the pud in this case.
+ */
+static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
+ unsigned long address)
+{
+ if (mm_pud_folded(tlb->mm))
+ return;
+ tlb_remove_table(tlb, pud);
+}
+
+#define tlb_start_vma(tlb, vma) do { } while (0)
+#define tlb_end_vma(tlb, vma) do { } while (0)
+#define tlb_remove_tlb_entry(tlb, ptep, addr) do { } while (0)
+#define tlb_remove_pmd_tlb_entry(tlb, pmdp, addr) do { } while (0)
+#define tlb_migrate_finish(mm) do { } while (0)
+#define tlb_flush_pmd_range(tlb, addr, sz) do { } while (0)
+
+#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \
+ tlb_remove_tlb_entry(tlb, ptep, address)
+
+#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
+static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
+ unsigned int page_size)
+{
+}
+
+#endif /* _S390_TLB_H */
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
new file mode 100644
index 000000000..8c840f090
--- /dev/null
+++ b/arch/s390/include/asm/tlbflush.h
@@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _S390_TLBFLUSH_H
+#define _S390_TLBFLUSH_H
+
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <asm/processor.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+
+/*
+ * Flush all TLB entries on the local CPU.
+ */
+static inline void __tlb_flush_local(void)
+{
+ asm volatile("ptlb" : : : "memory");
+}
+
+/*
+ * Flush TLB entries for a specific ASCE on all CPUs
+ */
+static inline void __tlb_flush_idte(unsigned long asce)
+{
+ unsigned long opt;
+
+ opt = IDTE_PTOA;
+ if (MACHINE_HAS_TLB_GUEST)
+ opt |= IDTE_GUEST_ASCE;
+ /* Global TLB flush for the mm */
+ asm volatile(
+ " .insn rrf,0xb98e0000,0,%0,%1,0"
+ : : "a" (opt), "a" (asce) : "cc");
+}
+
+#ifdef CONFIG_SMP
+void smp_ptlb_all(void);
+
+/*
+ * Flush all TLB entries on all CPUs.
+ */
+static inline void __tlb_flush_global(void)
+{
+ unsigned int dummy = 0;
+
+ csp(&dummy, 0, 0);
+}
+
+/*
+ * Flush TLB entries for a specific mm on all CPUs (in case gmap is used
+ * this implicates multiple ASCEs!).
+ */
+static inline void __tlb_flush_mm(struct mm_struct *mm)
+{
+ unsigned long gmap_asce;
+
+ /*
+ * If the machine has IDTE we prefer to do a per mm flush
+ * on all cpus instead of doing a local flush if the mm
+ * only ran on the local cpu.
+ */
+ preempt_disable();
+ atomic_inc(&mm->context.flush_count);
+ /* Reset TLB flush mask */
+ cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask);
+ barrier();
+ gmap_asce = READ_ONCE(mm->context.gmap_asce);
+ if (MACHINE_HAS_IDTE && gmap_asce != -1UL) {
+ if (gmap_asce)
+ __tlb_flush_idte(gmap_asce);
+ __tlb_flush_idte(mm->context.asce);
+ } else {
+ /* Global TLB flush */
+ __tlb_flush_global();
+ }
+ atomic_dec(&mm->context.flush_count);
+ preempt_enable();
+}
+
+static inline void __tlb_flush_kernel(void)
+{
+ if (MACHINE_HAS_IDTE)
+ __tlb_flush_idte(init_mm.context.asce);
+ else
+ __tlb_flush_global();
+}
+#else
+#define __tlb_flush_global() __tlb_flush_local()
+
+/*
+ * Flush TLB entries for a specific ASCE on all CPUs.
+ */
+static inline void __tlb_flush_mm(struct mm_struct *mm)
+{
+ __tlb_flush_local();
+}
+
+static inline void __tlb_flush_kernel(void)
+{
+ __tlb_flush_local();
+}
+#endif
+
+static inline void __tlb_flush_mm_lazy(struct mm_struct * mm)
+{
+ spin_lock(&mm->context.lock);
+ if (mm->context.flush_mm) {
+ mm->context.flush_mm = 0;
+ __tlb_flush_mm(mm);
+ }
+ spin_unlock(&mm->context.lock);
+}
+
+/*
+ * TLB flushing:
+ * flush_tlb() - flushes the current mm struct TLBs
+ * flush_tlb_all() - flushes all processes TLBs
+ * flush_tlb_mm(mm) - flushes the specified mm context TLB's
+ * flush_tlb_page(vma, vmaddr) - flushes one page
+ * flush_tlb_range(vma, start, end) - flushes a range of pages
+ * flush_tlb_kernel_range(start, end) - flushes a range of kernel pages
+ */
+
+/*
+ * flush_tlb_mm goes together with ptep_set_wrprotect for the
+ * copy_page_range operation and flush_tlb_range is related to
+ * ptep_get_and_clear for change_protection. ptep_set_wrprotect and
+ * ptep_get_and_clear do not flush the TLBs directly if the mm has
+ * only one user. At the end of the update the flush_tlb_mm and
+ * flush_tlb_range functions need to do the flush.
+ */
+#define flush_tlb() do { } while (0)
+#define flush_tlb_all() do { } while (0)
+#define flush_tlb_page(vma, addr) do { } while (0)
+
+static inline void flush_tlb_mm(struct mm_struct *mm)
+{
+ __tlb_flush_mm_lazy(mm);
+}
+
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+ __tlb_flush_mm_lazy(vma->vm_mm);
+}
+
+static inline void flush_tlb_kernel_range(unsigned long start,
+ unsigned long end)
+{
+ __tlb_flush_kernel();
+}
+
+#endif /* _S390_TLBFLUSH_H */
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
new file mode 100644
index 000000000..cca406fdb
--- /dev/null
+++ b/arch/s390/include/asm/topology.h
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_TOPOLOGY_H
+#define _ASM_S390_TOPOLOGY_H
+
+#include <linux/cpumask.h>
+#include <asm/numa.h>
+
+struct sysinfo_15_1_x;
+struct cpu;
+
+#ifdef CONFIG_SCHED_TOPOLOGY
+
+struct cpu_topology_s390 {
+ unsigned short thread_id;
+ unsigned short core_id;
+ unsigned short socket_id;
+ unsigned short book_id;
+ unsigned short drawer_id;
+ unsigned short node_id;
+ unsigned short dedicated : 1;
+ cpumask_t thread_mask;
+ cpumask_t core_mask;
+ cpumask_t book_mask;
+ cpumask_t drawer_mask;
+};
+
+extern struct cpu_topology_s390 cpu_topology[NR_CPUS];
+extern cpumask_t cpus_with_topology;
+
+#define topology_physical_package_id(cpu) (cpu_topology[cpu].socket_id)
+#define topology_thread_id(cpu) (cpu_topology[cpu].thread_id)
+#define topology_sibling_cpumask(cpu) (&cpu_topology[cpu].thread_mask)
+#define topology_core_id(cpu) (cpu_topology[cpu].core_id)
+#define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_mask)
+#define topology_book_id(cpu) (cpu_topology[cpu].book_id)
+#define topology_book_cpumask(cpu) (&cpu_topology[cpu].book_mask)
+#define topology_drawer_id(cpu) (cpu_topology[cpu].drawer_id)
+#define topology_drawer_cpumask(cpu) (&cpu_topology[cpu].drawer_mask)
+#define topology_cpu_dedicated(cpu) (cpu_topology[cpu].dedicated)
+
+#define mc_capable() 1
+
+void topology_init_early(void);
+int topology_cpu_init(struct cpu *);
+int topology_set_cpu_management(int fc);
+void topology_schedule_update(void);
+void store_topology(struct sysinfo_15_1_x *info);
+void topology_expect_change(void);
+const struct cpumask *cpu_coregroup_mask(int cpu);
+
+#else /* CONFIG_SCHED_TOPOLOGY */
+
+static inline void topology_init_early(void) { }
+static inline void topology_schedule_update(void) { }
+static inline int topology_cpu_init(struct cpu *cpu) { return 0; }
+static inline int topology_cpu_dedicated(int cpu_nr) { return 0; }
+static inline void topology_expect_change(void) { }
+
+#endif /* CONFIG_SCHED_TOPOLOGY */
+
+#define POLARIZATION_UNKNOWN (-1)
+#define POLARIZATION_HRZ (0)
+#define POLARIZATION_VL (1)
+#define POLARIZATION_VM (2)
+#define POLARIZATION_VH (3)
+
+#define SD_BOOK_INIT SD_CPU_INIT
+
+#ifdef CONFIG_NUMA
+
+#define cpu_to_node cpu_to_node
+static inline int cpu_to_node(int cpu)
+{
+ return cpu_topology[cpu].node_id;
+}
+
+/* Returns a pointer to the cpumask of CPUs on node 'node'. */
+#define cpumask_of_node cpumask_of_node
+static inline const struct cpumask *cpumask_of_node(int node)
+{
+ return &node_to_cpumask_map[node];
+}
+
+#define pcibus_to_node(bus) __pcibus_to_node(bus)
+
+#define node_distance(a, b) __node_distance(a, b)
+
+#else /* !CONFIG_NUMA */
+
+#define numa_node_id numa_node_id
+static inline int numa_node_id(void)
+{
+ return 0;
+}
+
+#endif /* CONFIG_NUMA */
+
+#include <asm-generic/topology.h>
+
+#endif /* _ASM_S390_TOPOLOGY_H */
diff --git a/arch/s390/include/asm/trace/diag.h b/arch/s390/include/asm/trace/diag.h
new file mode 100644
index 000000000..22fcac4ff
--- /dev/null
+++ b/arch/s390/include/asm/trace/diag.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Tracepoint header for s390 diagnose calls
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM s390
+
+#if !defined(_TRACE_S390_DIAG_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_S390_DIAG_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+
+#define TRACE_INCLUDE_PATH asm/trace
+#define TRACE_INCLUDE_FILE diag
+
+TRACE_EVENT(s390_diagnose,
+ TP_PROTO(unsigned short nr),
+ TP_ARGS(nr),
+ TP_STRUCT__entry(
+ __field(unsigned short, nr)
+ ),
+ TP_fast_assign(
+ __entry->nr = nr;
+ ),
+ TP_printk("nr=0x%x", __entry->nr)
+);
+
+#ifdef CONFIG_TRACEPOINTS
+void trace_s390_diagnose_norecursion(int diag_nr);
+#else
+static inline void trace_s390_diagnose_norecursion(int diag_nr) { }
+#endif
+
+#endif /* _TRACE_S390_DIAG_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/s390/include/asm/trace/zcrypt.h b/arch/s390/include/asm/trace/zcrypt.h
new file mode 100644
index 000000000..457ddaa99
--- /dev/null
+++ b/arch/s390/include/asm/trace/zcrypt.h
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Tracepoint definitions for the s390 zcrypt device driver
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Harald Freudenberger <freude@de.ibm.com>
+ *
+ * Currently there are two tracepoint events defined here.
+ * An s390_zcrypt_req request event occurs as soon as the request is
+ * recognized by the zcrypt ioctl function. This event may act as some kind
+ * of request-processing-starts-now indication.
+ * As late as possible within the zcrypt ioctl function there occurs the
+ * s390_zcrypt_rep event which may act as the point in time where the
+ * request has been processed by the kernel and the result is about to be
+ * transferred back to userspace.
+ * The glue which binds together request and reply event is the ptr
+ * parameter, which is the local buffer address where the request from
+ * userspace has been stored by the ioctl function.
+ *
+ * The main purpose of this zcrypt tracepoint api is to get some data for
+ * performance measurements together with information about on which card
+ * and queue the request has been processed. It is not an ffdc interface as
+ * there is already code in the zcrypt device driver to serve the s390
+ * debug feature interface.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM s390
+
+#if !defined(_TRACE_S390_ZCRYPT_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_S390_ZCRYPT_H
+
+#include <linux/tracepoint.h>
+
+#define TP_ICARSAMODEXPO 0x0001
+#define TP_ICARSACRT 0x0002
+#define TB_ZSECSENDCPRB 0x0003
+#define TP_ZSENDEP11CPRB 0x0004
+#define TP_HWRNGCPRB 0x0005
+
+#define show_zcrypt_tp_type(type) \
+ __print_symbolic(type, \
+ { TP_ICARSAMODEXPO, "ICARSAMODEXPO" }, \
+ { TP_ICARSACRT, "ICARSACRT" }, \
+ { TB_ZSECSENDCPRB, "ZSECSENDCPRB" }, \
+ { TP_ZSENDEP11CPRB, "ZSENDEP11CPRB" }, \
+ { TP_HWRNGCPRB, "HWRNGCPRB" })
+
+/**
+ * trace_s390_zcrypt_req - zcrypt request tracepoint function
+ * @ptr: Address of the local buffer where the request from userspace
+ * is stored. Can be used as a unique id to relate together
+ * request and reply.
+ * @type: One of the TP_ defines above.
+ *
+ * Called when a request from userspace is recognised within the ioctl
+ * function of the zcrypt device driver and may act as an entry
+ * timestamp.
+ */
+TRACE_EVENT(s390_zcrypt_req,
+ TP_PROTO(void *ptr, u32 type),
+ TP_ARGS(ptr, type),
+ TP_STRUCT__entry(
+ __field(void *, ptr)
+ __field(u32, type)),
+ TP_fast_assign(
+ __entry->ptr = ptr;
+ __entry->type = type;),
+ TP_printk("ptr=%p type=%s",
+ __entry->ptr,
+ show_zcrypt_tp_type(__entry->type))
+);
+
+/**
+ * trace_s390_zcrypt_rep - zcrypt reply tracepoint function
+ * @ptr: Address of the local buffer where the request from userspace
+ * is stored. Can be used as a unique id to match together
+ * request and reply.
+ * @fc: Function code.
+ * @rc: The bare returncode as returned by the device driver ioctl
+ * function.
+ * @dev: The adapter nr where this request was actually processed.
+ * @dom: Domain id of the device where this request was processed.
+ *
+ * Called upon recognising the reply from the crypto adapter. This
+ * message may act as the exit timestamp for the request but also
+ * carries some info about on which adapter the request was processed
+ * and the returncode from the device driver.
+ */
+TRACE_EVENT(s390_zcrypt_rep,
+ TP_PROTO(void *ptr, u32 fc, u32 rc, u16 dev, u16 dom),
+ TP_ARGS(ptr, fc, rc, dev, dom),
+ TP_STRUCT__entry(
+ __field(void *, ptr)
+ __field(u32, fc)
+ __field(u32, rc)
+ __field(u16, device)
+ __field(u16, domain)),
+ TP_fast_assign(
+ __entry->ptr = ptr;
+ __entry->fc = fc;
+ __entry->rc = rc;
+ __entry->device = dev;
+ __entry->domain = dom;),
+ TP_printk("ptr=%p fc=0x%04x rc=%d dev=0x%02hx domain=0x%04hx",
+ __entry->ptr,
+ (unsigned int) __entry->fc,
+ (int) __entry->rc,
+ (unsigned short) __entry->device,
+ (unsigned short) __entry->domain)
+);
+
+#endif /* _TRACE_S390_ZCRYPT_H */
+
+/* This part must be outside protection */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+
+#define TRACE_INCLUDE_PATH asm/trace
+#define TRACE_INCLUDE_FILE zcrypt
+
+#include <trace/define_trace.h>
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
new file mode 100644
index 000000000..40194f8c7
--- /dev/null
+++ b/arch/s390/include/asm/uaccess.h
@@ -0,0 +1,282 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999, 2000
+ * Author(s): Hartmut Penner (hp@de.ibm.com),
+ * Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * Derived from "include/asm-i386/uaccess.h"
+ */
+#ifndef __S390_UACCESS_H
+#define __S390_UACCESS_H
+
+/*
+ * User space memory access functions
+ */
+#include <asm/processor.h>
+#include <asm/ctl_reg.h>
+#include <asm/extable.h>
+#include <asm/facility.h>
+
+/*
+ * The fs value determines whether argument validity checking should be
+ * performed or not. If get_fs() == USER_DS, checking is performed, with
+ * get_fs() == KERNEL_DS, checking is bypassed.
+ *
+ * For historical reasons, these macros are grossly misnamed.
+ */
+
+#define KERNEL_DS (0)
+#define KERNEL_DS_SACF (1)
+#define USER_DS (2)
+#define USER_DS_SACF (3)
+
+#define get_ds() (KERNEL_DS)
+#define get_fs() (current->thread.mm_segment)
+#define segment_eq(a,b) (((a) & 2) == ((b) & 2))
+
+void set_fs(mm_segment_t fs);
+
+static inline int __range_ok(unsigned long addr, unsigned long size)
+{
+ return 1;
+}
+
+#define __access_ok(addr, size) \
+({ \
+ __chk_user_ptr(addr); \
+ __range_ok((unsigned long)(addr), (size)); \
+})
+
+#define access_ok(type, addr, size) __access_ok(addr, size)
+
+unsigned long __must_check
+raw_copy_from_user(void *to, const void __user *from, unsigned long n);
+
+unsigned long __must_check
+raw_copy_to_user(void __user *to, const void *from, unsigned long n);
+
+#ifndef CONFIG_KASAN
+#define INLINE_COPY_FROM_USER
+#define INLINE_COPY_TO_USER
+#endif
+
+#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
+
+#define __put_get_user_asm(to, from, size, spec) \
+({ \
+ register unsigned long __reg0 asm("0") = spec; \
+ int __rc; \
+ \
+ asm volatile( \
+ "0: mvcos %1,%3,%2\n" \
+ "1: xr %0,%0\n" \
+ "2:\n" \
+ ".pushsection .fixup, \"ax\"\n" \
+ "3: lhi %0,%5\n" \
+ " jg 2b\n" \
+ ".popsection\n" \
+ EX_TABLE(0b,3b) EX_TABLE(1b,3b) \
+ : "=d" (__rc), "+Q" (*(to)) \
+ : "d" (size), "Q" (*(from)), \
+ "d" (__reg0), "K" (-EFAULT) \
+ : "cc"); \
+ __rc; \
+})
+
+static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned long size)
+{
+ unsigned long spec = 0x010000UL;
+ int rc;
+
+ switch (size) {
+ case 1:
+ rc = __put_get_user_asm((unsigned char __user *)ptr,
+ (unsigned char *)x,
+ size, spec);
+ break;
+ case 2:
+ rc = __put_get_user_asm((unsigned short __user *)ptr,
+ (unsigned short *)x,
+ size, spec);
+ break;
+ case 4:
+ rc = __put_get_user_asm((unsigned int __user *)ptr,
+ (unsigned int *)x,
+ size, spec);
+ break;
+ case 8:
+ rc = __put_get_user_asm((unsigned long __user *)ptr,
+ (unsigned long *)x,
+ size, spec);
+ break;
+ }
+ return rc;
+}
+
+static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size)
+{
+ unsigned long spec = 0x01UL;
+ int rc;
+
+ switch (size) {
+ case 1:
+ rc = __put_get_user_asm((unsigned char *)x,
+ (unsigned char __user *)ptr,
+ size, spec);
+ break;
+ case 2:
+ rc = __put_get_user_asm((unsigned short *)x,
+ (unsigned short __user *)ptr,
+ size, spec);
+ break;
+ case 4:
+ rc = __put_get_user_asm((unsigned int *)x,
+ (unsigned int __user *)ptr,
+ size, spec);
+ break;
+ case 8:
+ rc = __put_get_user_asm((unsigned long *)x,
+ (unsigned long __user *)ptr,
+ size, spec);
+ break;
+ }
+ return rc;
+}
+
+#else /* CONFIG_HAVE_MARCH_Z10_FEATURES */
+
+static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size)
+{
+ size = raw_copy_to_user(ptr, x, size);
+ return size ? -EFAULT : 0;
+}
+
+static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size)
+{
+ size = raw_copy_from_user(x, ptr, size);
+ return size ? -EFAULT : 0;
+}
+
+#endif /* CONFIG_HAVE_MARCH_Z10_FEATURES */
+
+/*
+ * These are the main single-value transfer routines. They automatically
+ * use the right size if we just have the right pointer type.
+ */
+#define __put_user(x, ptr) \
+({ \
+ __typeof__(*(ptr)) __x = (x); \
+ int __pu_err = -EFAULT; \
+ __chk_user_ptr(ptr); \
+ switch (sizeof (*(ptr))) { \
+ case 1: \
+ case 2: \
+ case 4: \
+ case 8: \
+ __pu_err = __put_user_fn(&__x, ptr, \
+ sizeof(*(ptr))); \
+ break; \
+ default: \
+ __put_user_bad(); \
+ break; \
+ } \
+ __builtin_expect(__pu_err, 0); \
+})
+
+#define put_user(x, ptr) \
+({ \
+ might_fault(); \
+ __put_user(x, ptr); \
+})
+
+
+int __put_user_bad(void) __attribute__((noreturn));
+
+#define __get_user(x, ptr) \
+({ \
+ int __gu_err = -EFAULT; \
+ __chk_user_ptr(ptr); \
+ switch (sizeof(*(ptr))) { \
+ case 1: { \
+ unsigned char __x = 0; \
+ __gu_err = __get_user_fn(&__x, ptr, \
+ sizeof(*(ptr))); \
+ (x) = *(__force __typeof__(*(ptr)) *) &__x; \
+ break; \
+ }; \
+ case 2: { \
+ unsigned short __x = 0; \
+ __gu_err = __get_user_fn(&__x, ptr, \
+ sizeof(*(ptr))); \
+ (x) = *(__force __typeof__(*(ptr)) *) &__x; \
+ break; \
+ }; \
+ case 4: { \
+ unsigned int __x = 0; \
+ __gu_err = __get_user_fn(&__x, ptr, \
+ sizeof(*(ptr))); \
+ (x) = *(__force __typeof__(*(ptr)) *) &__x; \
+ break; \
+ }; \
+ case 8: { \
+ unsigned long long __x = 0; \
+ __gu_err = __get_user_fn(&__x, ptr, \
+ sizeof(*(ptr))); \
+ (x) = *(__force __typeof__(*(ptr)) *) &__x; \
+ break; \
+ }; \
+ default: \
+ __get_user_bad(); \
+ break; \
+ } \
+ __builtin_expect(__gu_err, 0); \
+})
+
+#define get_user(x, ptr) \
+({ \
+ might_fault(); \
+ __get_user(x, ptr); \
+})
+
+int __get_user_bad(void) __attribute__((noreturn));
+
+unsigned long __must_check
+raw_copy_in_user(void __user *to, const void __user *from, unsigned long n);
+
+/*
+ * Copy a null terminated string from userspace.
+ */
+
+long __strncpy_from_user(char *dst, const char __user *src, long count);
+
+static inline long __must_check
+strncpy_from_user(char *dst, const char __user *src, long count)
+{
+ might_fault();
+ return __strncpy_from_user(dst, src, count);
+}
+
+unsigned long __must_check __strnlen_user(const char __user *src, unsigned long count);
+
+static inline unsigned long strnlen_user(const char __user *src, unsigned long n)
+{
+ might_fault();
+ return __strnlen_user(src, n);
+}
+
+/*
+ * Zero Userspace
+ */
+unsigned long __must_check __clear_user(void __user *to, unsigned long size);
+
+static inline unsigned long __must_check clear_user(void __user *to, unsigned long n)
+{
+ might_fault();
+ return __clear_user(to, n);
+}
+
+int copy_to_user_real(void __user *dest, void *src, unsigned long count);
+void s390_kernel_write(void *dst, const void *src, size_t size);
+
+#endif /* __S390_UACCESS_H */
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
new file mode 100644
index 000000000..fd79c0d35
--- /dev/null
+++ b/arch/s390/include/asm/unistd.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * S390 version
+ *
+ * Derived from "include/asm-i386/unistd.h"
+ */
+#ifndef _ASM_S390_UNISTD_H_
+#define _ASM_S390_UNISTD_H_
+
+#include <uapi/asm/unistd.h>
+#include <asm/unistd_nr.h>
+
+#define __IGNORE_time
+#define __IGNORE_pkey_mprotect
+#define __IGNORE_pkey_alloc
+#define __IGNORE_pkey_free
+
+#define __ARCH_WANT_OLD_READDIR
+#define __ARCH_WANT_SYS_ALARM
+#define __ARCH_WANT_SYS_GETHOSTNAME
+#define __ARCH_WANT_SYS_PAUSE
+#define __ARCH_WANT_SYS_SIGNAL
+#define __ARCH_WANT_SYS_UTIME
+#define __ARCH_WANT_SYS_SOCKETCALL
+#define __ARCH_WANT_SYS_IPC
+#define __ARCH_WANT_SYS_FADVISE64
+#define __ARCH_WANT_SYS_GETPGRP
+#define __ARCH_WANT_SYS_LLSEEK
+#define __ARCH_WANT_SYS_NICE
+#define __ARCH_WANT_SYS_OLD_GETRLIMIT
+#define __ARCH_WANT_SYS_OLD_MMAP
+#define __ARCH_WANT_SYS_OLDUMOUNT
+#define __ARCH_WANT_SYS_SIGPENDING
+#define __ARCH_WANT_SYS_SIGPROCMASK
+# ifdef CONFIG_COMPAT
+# define __ARCH_WANT_COMPAT_SYS_TIME
+# endif
+#define __ARCH_WANT_SYS_FORK
+#define __ARCH_WANT_SYS_VFORK
+#define __ARCH_WANT_SYS_CLONE
+
+#endif /* _ASM_S390_UNISTD_H_ */
diff --git a/arch/s390/include/asm/uprobes.h b/arch/s390/include/asm/uprobes.h
new file mode 100644
index 000000000..b60b3c7ef
--- /dev/null
+++ b/arch/s390/include/asm/uprobes.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * User-space Probes (UProbes) for s390
+ *
+ * Copyright IBM Corp. 2014
+ * Author(s): Jan Willeke,
+ */
+
+#ifndef _ASM_UPROBES_H
+#define _ASM_UPROBES_H
+
+#include <linux/notifier.h>
+
+typedef u16 uprobe_opcode_t;
+
+#define UPROBE_XOL_SLOT_BYTES 256 /* cache aligned */
+
+#define UPROBE_SWBP_INSN 0x0002
+#define UPROBE_SWBP_INSN_SIZE 2
+
+struct arch_uprobe {
+ union{
+ uprobe_opcode_t insn[3];
+ uprobe_opcode_t ixol[3];
+ };
+ unsigned int saved_per : 1;
+ unsigned int saved_int_code;
+};
+
+struct arch_uprobe_task {
+};
+
+#endif /* _ASM_UPROBES_H */
diff --git a/arch/s390/include/asm/user.h b/arch/s390/include/asm/user.h
new file mode 100644
index 000000000..0ca572ced
--- /dev/null
+++ b/arch/s390/include/asm/user.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * S390 version
+ *
+ * Derived from "include/asm-i386/usr.h"
+ */
+
+#ifndef _S390_USER_H
+#define _S390_USER_H
+
+#include <asm/page.h>
+#include <asm/ptrace.h>
+/* Core file format: The core file is written in such a way that gdb
+ can understand it and provide useful information to the user (under
+ linux we use the 'trad-core' bfd). There are quite a number of
+ obstacles to being able to view the contents of the floating point
+ registers, and until these are solved you will not be able to view the
+ contents of them. Actually, you can read in the core file and look at
+ the contents of the user struct to find out what the floating point
+ registers contain.
+ The actual file contents are as follows:
+ UPAGE: 1 page consisting of a user struct that tells gdb what is present
+ in the file. Directly after this is a copy of the task_struct, which
+ is currently not used by gdb, but it may come in useful at some point.
+ All of the registers are stored as part of the upage. The upage should
+ always be only one page.
+ DATA: The data area is stored. We use current->end_text to
+ current->brk to pick up all of the user variables, plus any memory
+ that may have been malloced. No attempt is made to determine if a page
+ is demand-zero or if a page is totally unused, we just cover the entire
+ range. All of the addresses are rounded in such a way that an integral
+ number of pages is written.
+ STACK: We need the stack information in order to get a meaningful
+ backtrace. We need to write the data from (esp) to
+ current->start_stack, so we round each of these off in order to be able
+ to write an integer number of pages.
+ The minimum core file size is 3 pages, or 12288 bytes.
+*/
+
+
+/*
+ * This is the old layout of "struct pt_regs", and
+ * is still the layout used by user mode (the new
+ * pt_regs doesn't have all registers as the kernel
+ * doesn't use the extra segment registers)
+ */
+
+/* When the kernel dumps core, it starts by dumping the user struct -
+ this will be used by gdb to figure out where the data and stack segments
+ are within the file, and what virtual addresses to use. */
+struct user {
+/* We start with the registers, to mimic the way that "memory" is returned
+ from the ptrace(3,...) function. */
+ struct user_regs_struct regs; /* Where the registers are actually stored */
+/* The rest of this junk is to help gdb figure out what goes where */
+ unsigned long int u_tsize; /* Text segment size (pages). */
+ unsigned long int u_dsize; /* Data segment size (pages). */
+ unsigned long int u_ssize; /* Stack segment size (pages). */
+ unsigned long start_code; /* Starting virtual address of text. */
+ unsigned long start_stack; /* Starting virtual address of stack area.
+ This is actually the bottom of the stack,
+ the top of the stack is always found in the
+ esp register. */
+ long int signal; /* Signal that caused the core dump. */
+ unsigned long u_ar0; /* Used by gdb to help find the values for */
+ /* the registers. */
+ unsigned long magic; /* To uniquely identify a core file */
+ char u_comm[32]; /* User command that was responsible */
+};
+#define NBPG PAGE_SIZE
+#define UPAGES 1
+#define HOST_TEXT_START_ADDR (u.start_code)
+#define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG)
+
+#endif /* _S390_USER_H */
diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h
new file mode 100644
index 000000000..f3ba84fa9
--- /dev/null
+++ b/arch/s390/include/asm/vdso.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __S390_VDSO_H__
+#define __S390_VDSO_H__
+
+/* Default link addresses for the vDSOs */
+#define VDSO32_LBASE 0
+#define VDSO64_LBASE 0
+
+#define VDSO_VERSION_STRING LINUX_2.6.29
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Note about the vdso_data and vdso_per_cpu_data structures:
+ *
+ * NEVER USE THEM IN USERSPACE CODE DIRECTLY. The layout of the
+ * structure is supposed to be known only to the function in the vdso
+ * itself and may change without notice.
+ */
+
+struct vdso_data {
+ __u64 tb_update_count; /* Timebase atomicity ctr 0x00 */
+ __u64 xtime_tod_stamp; /* TOD clock for xtime 0x08 */
+ __u64 xtime_clock_sec; /* Kernel time 0x10 */
+ __u64 xtime_clock_nsec; /* 0x18 */
+ __u64 xtime_coarse_sec; /* Coarse kernel time 0x20 */
+ __u64 xtime_coarse_nsec; /* 0x28 */
+ __u64 wtom_clock_sec; /* Wall to monotonic clock 0x30 */
+ __u64 wtom_clock_nsec; /* 0x38 */
+ __u64 wtom_coarse_sec; /* Coarse wall to monotonic 0x40 */
+ __u64 wtom_coarse_nsec; /* 0x48 */
+ __u32 tz_minuteswest; /* Minutes west of Greenwich 0x50 */
+ __u32 tz_dsttime; /* Type of dst correction 0x54 */
+ __u32 ectg_available; /* ECTG instruction present 0x58 */
+ __u32 tk_mult; /* Mult. used for xtime_nsec 0x5c */
+ __u32 tk_shift; /* Shift used for xtime_nsec 0x60 */
+ __u32 ts_dir; /* TOD steering direction 0x64 */
+ __u64 ts_end; /* TOD steering end 0x68 */
+ __u32 hrtimer_res; /* hrtimer resolution 0x70 */
+};
+
+struct vdso_per_cpu_data {
+ __u64 ectg_timer_base;
+ __u64 ectg_user_time;
+ __u32 cpu_nr;
+ __u32 node_id;
+};
+
+extern struct vdso_data *vdso_data;
+extern struct vdso_data boot_vdso_data;
+
+void vdso_alloc_boot_cpu(struct lowcore *lowcore);
+int vdso_alloc_per_cpu(struct lowcore *lowcore);
+void vdso_free_per_cpu(struct lowcore *lowcore);
+
+#endif /* __ASSEMBLY__ */
+#endif /* __S390_VDSO_H__ */
diff --git a/arch/s390/include/asm/vga.h b/arch/s390/include/asm/vga.h
new file mode 100644
index 000000000..605dc46ba
--- /dev/null
+++ b/arch/s390/include/asm/vga.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_VGA_H
+#define _ASM_S390_VGA_H
+
+/* Avoid compile errors due to missing asm/vga.h */
+
+#endif /* _ASM_S390_VGA_H */
diff --git a/arch/s390/include/asm/vtime.h b/arch/s390/include/asm/vtime.h
new file mode 100644
index 000000000..3622d4ebc
--- /dev/null
+++ b/arch/s390/include/asm/vtime.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _S390_VTIME_H
+#define _S390_VTIME_H
+
+#define __ARCH_HAS_VTIME_ACCOUNT
+#define __ARCH_HAS_VTIME_TASK_SWITCH
+
+#endif /* _S390_VTIME_H */
diff --git a/arch/s390/include/asm/vtimer.h b/arch/s390/include/asm/vtimer.h
new file mode 100644
index 000000000..42f707d1c
--- /dev/null
+++ b/arch/s390/include/asm/vtimer.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2003, 2012
+ * Virtual CPU timer
+ *
+ * Author(s): Jan Glauber <jan.glauber@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_TIMER_H
+#define _ASM_S390_TIMER_H
+
+#define VTIMER_MAX_SLICE (0x7fffffffffffffffULL)
+
+struct vtimer_list {
+ struct list_head entry;
+ u64 expires;
+ u64 interval;
+ void (*function)(unsigned long);
+ unsigned long data;
+};
+
+extern void init_virt_timer(struct vtimer_list *timer);
+extern void add_virt_timer(struct vtimer_list *timer);
+extern void add_virt_timer_periodic(struct vtimer_list *timer);
+extern int mod_virt_timer(struct vtimer_list *timer, u64 expires);
+extern int mod_virt_timer_periodic(struct vtimer_list *timer, u64 expires);
+extern int del_virt_timer(struct vtimer_list *timer);
+
+extern void init_cpu_vtimer(void);
+extern void vtime_init(void);
+
+#endif /* _ASM_S390_TIMER_H */
diff --git a/arch/s390/include/asm/vx-insn.h b/arch/s390/include/asm/vx-insn.h
new file mode 100644
index 000000000..266a72320
--- /dev/null
+++ b/arch/s390/include/asm/vx-insn.h
@@ -0,0 +1,561 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Support for Vector Instructions
+ *
+ * Assembler macros to generate .byte/.word code for particular
+ * vector instructions that are supported by recent binutils (>= 2.26) only.
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef __ASM_S390_VX_INSN_H
+#define __ASM_S390_VX_INSN_H
+
+#ifdef __ASSEMBLY__
+
+
+/* Macros to generate vector instruction byte code */
+
+/* GR_NUM - Retrieve general-purpose register number
+ *
+ * @opd: Operand to store register number
+ * @r64: String designation register in the format "%rN"
+ */
+.macro GR_NUM opd gr
+ \opd = 255
+ .ifc \gr,%r0
+ \opd = 0
+ .endif
+ .ifc \gr,%r1
+ \opd = 1
+ .endif
+ .ifc \gr,%r2
+ \opd = 2
+ .endif
+ .ifc \gr,%r3
+ \opd = 3
+ .endif
+ .ifc \gr,%r4
+ \opd = 4
+ .endif
+ .ifc \gr,%r5
+ \opd = 5
+ .endif
+ .ifc \gr,%r6
+ \opd = 6
+ .endif
+ .ifc \gr,%r7
+ \opd = 7
+ .endif
+ .ifc \gr,%r8
+ \opd = 8
+ .endif
+ .ifc \gr,%r9
+ \opd = 9
+ .endif
+ .ifc \gr,%r10
+ \opd = 10
+ .endif
+ .ifc \gr,%r11
+ \opd = 11
+ .endif
+ .ifc \gr,%r12
+ \opd = 12
+ .endif
+ .ifc \gr,%r13
+ \opd = 13
+ .endif
+ .ifc \gr,%r14
+ \opd = 14
+ .endif
+ .ifc \gr,%r15
+ \opd = 15
+ .endif
+ .if \opd == 255
+ \opd = \gr
+ .endif
+.endm
+
+/* VX_NUM - Retrieve vector register number
+ *
+ * @opd: Operand to store register number
+ * @vxr: String designation register in the format "%vN"
+ *
+ * The vector register number is used for as input number to the
+ * instruction and, as well as, to compute the RXB field of the
+ * instruction.
+ */
+.macro VX_NUM opd vxr
+ \opd = 255
+ .ifc \vxr,%v0
+ \opd = 0
+ .endif
+ .ifc \vxr,%v1
+ \opd = 1
+ .endif
+ .ifc \vxr,%v2
+ \opd = 2
+ .endif
+ .ifc \vxr,%v3
+ \opd = 3
+ .endif
+ .ifc \vxr,%v4
+ \opd = 4
+ .endif
+ .ifc \vxr,%v5
+ \opd = 5
+ .endif
+ .ifc \vxr,%v6
+ \opd = 6
+ .endif
+ .ifc \vxr,%v7
+ \opd = 7
+ .endif
+ .ifc \vxr,%v8
+ \opd = 8
+ .endif
+ .ifc \vxr,%v9
+ \opd = 9
+ .endif
+ .ifc \vxr,%v10
+ \opd = 10
+ .endif
+ .ifc \vxr,%v11
+ \opd = 11
+ .endif
+ .ifc \vxr,%v12
+ \opd = 12
+ .endif
+ .ifc \vxr,%v13
+ \opd = 13
+ .endif
+ .ifc \vxr,%v14
+ \opd = 14
+ .endif
+ .ifc \vxr,%v15
+ \opd = 15
+ .endif
+ .ifc \vxr,%v16
+ \opd = 16
+ .endif
+ .ifc \vxr,%v17
+ \opd = 17
+ .endif
+ .ifc \vxr,%v18
+ \opd = 18
+ .endif
+ .ifc \vxr,%v19
+ \opd = 19
+ .endif
+ .ifc \vxr,%v20
+ \opd = 20
+ .endif
+ .ifc \vxr,%v21
+ \opd = 21
+ .endif
+ .ifc \vxr,%v22
+ \opd = 22
+ .endif
+ .ifc \vxr,%v23
+ \opd = 23
+ .endif
+ .ifc \vxr,%v24
+ \opd = 24
+ .endif
+ .ifc \vxr,%v25
+ \opd = 25
+ .endif
+ .ifc \vxr,%v26
+ \opd = 26
+ .endif
+ .ifc \vxr,%v27
+ \opd = 27
+ .endif
+ .ifc \vxr,%v28
+ \opd = 28
+ .endif
+ .ifc \vxr,%v29
+ \opd = 29
+ .endif
+ .ifc \vxr,%v30
+ \opd = 30
+ .endif
+ .ifc \vxr,%v31
+ \opd = 31
+ .endif
+ .if \opd == 255
+ \opd = \vxr
+ .endif
+.endm
+
+/* RXB - Compute most significant bit used vector registers
+ *
+ * @rxb: Operand to store computed RXB value
+ * @v1: First vector register designated operand
+ * @v2: Second vector register designated operand
+ * @v3: Third vector register designated operand
+ * @v4: Fourth vector register designated operand
+ */
+.macro RXB rxb v1 v2=0 v3=0 v4=0
+ \rxb = 0
+ .if \v1 & 0x10
+ \rxb = \rxb | 0x08
+ .endif
+ .if \v2 & 0x10
+ \rxb = \rxb | 0x04
+ .endif
+ .if \v3 & 0x10
+ \rxb = \rxb | 0x02
+ .endif
+ .if \v4 & 0x10
+ \rxb = \rxb | 0x01
+ .endif
+.endm
+
+/* MRXB - Generate Element Size Control and RXB value
+ *
+ * @m: Element size control
+ * @v1: First vector register designated operand (for RXB)
+ * @v2: Second vector register designated operand (for RXB)
+ * @v3: Third vector register designated operand (for RXB)
+ * @v4: Fourth vector register designated operand (for RXB)
+ */
+.macro MRXB m v1 v2=0 v3=0 v4=0
+ rxb = 0
+ RXB rxb, \v1, \v2, \v3, \v4
+ .byte (\m << 4) | rxb
+.endm
+
+/* MRXBOPC - Generate Element Size Control, RXB, and final Opcode fields
+ *
+ * @m: Element size control
+ * @opc: Opcode
+ * @v1: First vector register designated operand (for RXB)
+ * @v2: Second vector register designated operand (for RXB)
+ * @v3: Third vector register designated operand (for RXB)
+ * @v4: Fourth vector register designated operand (for RXB)
+ */
+.macro MRXBOPC m opc v1 v2=0 v3=0 v4=0
+ MRXB \m, \v1, \v2, \v3, \v4
+ .byte \opc
+.endm
+
+/* Vector support instructions */
+
+/* VECTOR GENERATE BYTE MASK */
+.macro VGBM vr imm2
+ VX_NUM v1, \vr
+ .word (0xE700 | ((v1&15) << 4))
+ .word \imm2
+ MRXBOPC 0, 0x44, v1
+.endm
+.macro VZERO vxr
+ VGBM \vxr, 0
+.endm
+.macro VONE vxr
+ VGBM \vxr, 0xFFFF
+.endm
+
+/* VECTOR LOAD VR ELEMENT FROM GR */
+.macro VLVG v, gr, disp, m
+ VX_NUM v1, \v
+ GR_NUM b2, "%r0"
+ GR_NUM r3, \gr
+ .word 0xE700 | ((v1&15) << 4) | r3
+ .word (b2 << 12) | (\disp)
+ MRXBOPC \m, 0x22, v1
+.endm
+.macro VLVGB v, gr, index, base
+ VLVG \v, \gr, \index, \base, 0
+.endm
+.macro VLVGH v, gr, index
+ VLVG \v, \gr, \index, 1
+.endm
+.macro VLVGF v, gr, index
+ VLVG \v, \gr, \index, 2
+.endm
+.macro VLVGG v, gr, index
+ VLVG \v, \gr, \index, 3
+.endm
+
+/* VECTOR LOAD REGISTER */
+.macro VLR v1, v2
+ VX_NUM v1, \v1
+ VX_NUM v2, \v2
+ .word 0xE700 | ((v1&15) << 4) | (v2&15)
+ .word 0
+ MRXBOPC 0, 0x56, v1, v2
+.endm
+
+/* VECTOR LOAD */
+.macro VL v, disp, index="%r0", base
+ VX_NUM v1, \v
+ GR_NUM x2, \index
+ GR_NUM b2, \base
+ .word 0xE700 | ((v1&15) << 4) | x2
+ .word (b2 << 12) | (\disp)
+ MRXBOPC 0, 0x06, v1
+.endm
+
+/* VECTOR LOAD ELEMENT */
+.macro VLEx vr1, disp, index="%r0", base, m3, opc
+ VX_NUM v1, \vr1
+ GR_NUM x2, \index
+ GR_NUM b2, \base
+ .word 0xE700 | ((v1&15) << 4) | x2
+ .word (b2 << 12) | (\disp)
+ MRXBOPC \m3, \opc, v1
+.endm
+.macro VLEB vr1, disp, index="%r0", base, m3
+ VLEx \vr1, \disp, \index, \base, \m3, 0x00
+.endm
+.macro VLEH vr1, disp, index="%r0", base, m3
+ VLEx \vr1, \disp, \index, \base, \m3, 0x01
+.endm
+.macro VLEF vr1, disp, index="%r0", base, m3
+ VLEx \vr1, \disp, \index, \base, \m3, 0x03
+.endm
+.macro VLEG vr1, disp, index="%r0", base, m3
+ VLEx \vr1, \disp, \index, \base, \m3, 0x02
+.endm
+
+/* VECTOR LOAD ELEMENT IMMEDIATE */
+.macro VLEIx vr1, imm2, m3, opc
+ VX_NUM v1, \vr1
+ .word 0xE700 | ((v1&15) << 4)
+ .word \imm2
+ MRXBOPC \m3, \opc, v1
+.endm
+.macro VLEIB vr1, imm2, index
+ VLEIx \vr1, \imm2, \index, 0x40
+.endm
+.macro VLEIH vr1, imm2, index
+ VLEIx \vr1, \imm2, \index, 0x41
+.endm
+.macro VLEIF vr1, imm2, index
+ VLEIx \vr1, \imm2, \index, 0x43
+.endm
+.macro VLEIG vr1, imm2, index
+ VLEIx \vr1, \imm2, \index, 0x42
+.endm
+
+/* VECTOR LOAD GR FROM VR ELEMENT */
+.macro VLGV gr, vr, disp, base="%r0", m
+ GR_NUM r1, \gr
+ GR_NUM b2, \base
+ VX_NUM v3, \vr
+ .word 0xE700 | (r1 << 4) | (v3&15)
+ .word (b2 << 12) | (\disp)
+ MRXBOPC \m, 0x21, v3
+.endm
+.macro VLGVB gr, vr, disp, base="%r0"
+ VLGV \gr, \vr, \disp, \base, 0
+.endm
+.macro VLGVH gr, vr, disp, base="%r0"
+ VLGV \gr, \vr, \disp, \base, 1
+.endm
+.macro VLGVF gr, vr, disp, base="%r0"
+ VLGV \gr, \vr, \disp, \base, 2
+.endm
+.macro VLGVG gr, vr, disp, base="%r0"
+ VLGV \gr, \vr, \disp, \base, 3
+.endm
+
+/* VECTOR LOAD MULTIPLE */
+.macro VLM vfrom, vto, disp, base
+ VX_NUM v1, \vfrom
+ VX_NUM v3, \vto
+ GR_NUM b2, \base /* Base register */
+ .word 0xE700 | ((v1&15) << 4) | (v3&15)
+ .word (b2 << 12) | (\disp)
+ MRXBOPC 0, 0x36, v1, v3
+.endm
+
+/* VECTOR STORE MULTIPLE */
+.macro VSTM vfrom, vto, disp, base
+ VX_NUM v1, \vfrom
+ VX_NUM v3, \vto
+ GR_NUM b2, \base /* Base register */
+ .word 0xE700 | ((v1&15) << 4) | (v3&15)
+ .word (b2 << 12) | (\disp)
+ MRXBOPC 0, 0x3E, v1, v3
+.endm
+
+/* VECTOR PERMUTE */
+.macro VPERM vr1, vr2, vr3, vr4
+ VX_NUM v1, \vr1
+ VX_NUM v2, \vr2
+ VX_NUM v3, \vr3
+ VX_NUM v4, \vr4
+ .word 0xE700 | ((v1&15) << 4) | (v2&15)
+ .word ((v3&15) << 12)
+ MRXBOPC (v4&15), 0x8C, v1, v2, v3, v4
+.endm
+
+/* VECTOR UNPACK LOGICAL LOW */
+.macro VUPLL vr1, vr2, m3
+ VX_NUM v1, \vr1
+ VX_NUM v2, \vr2
+ .word 0xE700 | ((v1&15) << 4) | (v2&15)
+ .word 0x0000
+ MRXBOPC \m3, 0xD4, v1, v2
+.endm
+.macro VUPLLB vr1, vr2
+ VUPLL \vr1, \vr2, 0
+.endm
+.macro VUPLLH vr1, vr2
+ VUPLL \vr1, \vr2, 1
+.endm
+.macro VUPLLF vr1, vr2
+ VUPLL \vr1, \vr2, 2
+.endm
+
+
+/* Vector integer instructions */
+
+/* VECTOR AND */
+.macro VN vr1, vr2, vr3
+ VX_NUM v1, \vr1
+ VX_NUM v2, \vr2
+ VX_NUM v3, \vr3
+ .word 0xE700 | ((v1&15) << 4) | (v2&15)
+ .word ((v3&15) << 12)
+ MRXBOPC 0, 0x68, v1, v2, v3
+.endm
+
+/* VECTOR EXCLUSIVE OR */
+.macro VX vr1, vr2, vr3
+ VX_NUM v1, \vr1
+ VX_NUM v2, \vr2
+ VX_NUM v3, \vr3
+ .word 0xE700 | ((v1&15) << 4) | (v2&15)
+ .word ((v3&15) << 12)
+ MRXBOPC 0, 0x6D, v1, v2, v3
+.endm
+
+/* VECTOR GALOIS FIELD MULTIPLY SUM */
+.macro VGFM vr1, vr2, vr3, m4
+ VX_NUM v1, \vr1
+ VX_NUM v2, \vr2
+ VX_NUM v3, \vr3
+ .word 0xE700 | ((v1&15) << 4) | (v2&15)
+ .word ((v3&15) << 12)
+ MRXBOPC \m4, 0xB4, v1, v2, v3
+.endm
+.macro VGFMB vr1, vr2, vr3
+ VGFM \vr1, \vr2, \vr3, 0
+.endm
+.macro VGFMH vr1, vr2, vr3
+ VGFM \vr1, \vr2, \vr3, 1
+.endm
+.macro VGFMF vr1, vr2, vr3
+ VGFM \vr1, \vr2, \vr3, 2
+.endm
+.macro VGFMG vr1, vr2, vr3
+ VGFM \vr1, \vr2, \vr3, 3
+.endm
+
+/* VECTOR GALOIS FIELD MULTIPLY SUM AND ACCUMULATE */
+.macro VGFMA vr1, vr2, vr3, vr4, m5
+ VX_NUM v1, \vr1
+ VX_NUM v2, \vr2
+ VX_NUM v3, \vr3
+ VX_NUM v4, \vr4
+ .word 0xE700 | ((v1&15) << 4) | (v2&15)
+ .word ((v3&15) << 12) | (\m5 << 8)
+ MRXBOPC (v4&15), 0xBC, v1, v2, v3, v4
+.endm
+.macro VGFMAB vr1, vr2, vr3, vr4
+ VGFMA \vr1, \vr2, \vr3, \vr4, 0
+.endm
+.macro VGFMAH vr1, vr2, vr3, vr4
+ VGFMA \vr1, \vr2, \vr3, \vr4, 1
+.endm
+.macro VGFMAF vr1, vr2, vr3, vr4
+ VGFMA \vr1, \vr2, \vr3, \vr4, 2
+.endm
+.macro VGFMAG vr1, vr2, vr3, vr4
+ VGFMA \vr1, \vr2, \vr3, \vr4, 3
+.endm
+
+/* VECTOR SHIFT RIGHT LOGICAL BY BYTE */
+.macro VSRLB vr1, vr2, vr3
+ VX_NUM v1, \vr1
+ VX_NUM v2, \vr2
+ VX_NUM v3, \vr3
+ .word 0xE700 | ((v1&15) << 4) | (v2&15)
+ .word ((v3&15) << 12)
+ MRXBOPC 0, 0x7D, v1, v2, v3
+.endm
+
+/* VECTOR REPLICATE IMMEDIATE */
+.macro VREPI vr1, imm2, m3
+ VX_NUM v1, \vr1
+ .word 0xE700 | ((v1&15) << 4)
+ .word \imm2
+ MRXBOPC \m3, 0x45, v1
+.endm
+.macro VREPIB vr1, imm2
+ VREPI \vr1, \imm2, 0
+.endm
+.macro VREPIH vr1, imm2
+ VREPI \vr1, \imm2, 1
+.endm
+.macro VREPIF vr1, imm2
+ VREPI \vr1, \imm2, 2
+.endm
+.macro VREPIG vr1, imm2
+ VREP \vr1, \imm2, 3
+.endm
+
+/* VECTOR ADD */
+.macro VA vr1, vr2, vr3, m4
+ VX_NUM v1, \vr1
+ VX_NUM v2, \vr2
+ VX_NUM v3, \vr3
+ .word 0xE700 | ((v1&15) << 4) | (v2&15)
+ .word ((v3&15) << 12)
+ MRXBOPC \m4, 0xF3, v1, v2, v3
+.endm
+.macro VAB vr1, vr2, vr3
+ VA \vr1, \vr2, \vr3, 0
+.endm
+.macro VAH vr1, vr2, vr3
+ VA \vr1, \vr2, \vr3, 1
+.endm
+.macro VAF vr1, vr2, vr3
+ VA \vr1, \vr2, \vr3, 2
+.endm
+.macro VAG vr1, vr2, vr3
+ VA \vr1, \vr2, \vr3, 3
+.endm
+.macro VAQ vr1, vr2, vr3
+ VA \vr1, \vr2, \vr3, 4
+.endm
+
+/* VECTOR ELEMENT SHIFT RIGHT ARITHMETIC */
+.macro VESRAV vr1, vr2, vr3, m4
+ VX_NUM v1, \vr1
+ VX_NUM v2, \vr2
+ VX_NUM v3, \vr3
+ .word 0xE700 | ((v1&15) << 4) | (v2&15)
+ .word ((v3&15) << 12)
+ MRXBOPC \m4, 0x7A, v1, v2, v3
+.endm
+
+.macro VESRAVB vr1, vr2, vr3
+ VESRAV \vr1, \vr2, \vr3, 0
+.endm
+.macro VESRAVH vr1, vr2, vr3
+ VESRAV \vr1, \vr2, \vr3, 1
+.endm
+.macro VESRAVF vr1, vr2, vr3
+ VESRAV \vr1, \vr2, \vr3, 2
+.endm
+.macro VESRAVG vr1, vr2, vr3
+ VESRAV \vr1, \vr2, \vr3, 3
+.endm
+
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_S390_VX_INSN_H */
diff --git a/arch/s390/include/asm/xor.h b/arch/s390/include/asm/xor.h
new file mode 100644
index 000000000..857d6759b
--- /dev/null
+++ b/arch/s390/include/asm/xor.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Optimited xor routines
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+#ifndef _ASM_S390_XOR_H
+#define _ASM_S390_XOR_H
+
+extern struct xor_block_template xor_block_xc;
+
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES \
+do { \
+ xor_speed(&xor_block_xc); \
+} while (0)
+
+#define XOR_SELECT_TEMPLATE(FASTEST) (&xor_block_xc)
+
+#endif /* _ASM_S390_XOR_H */
diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild
new file mode 100644
index 000000000..e364873e0
--- /dev/null
+++ b/arch/s390/include/uapi/asm/Kbuild
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
+generated-y += unistd_32.h
+generated-y += unistd_64.h
+
+generic-y += errno.h
+generic-y += fcntl.h
+generic-y += ioctl.h
+generic-y += mman.h
+generic-y += msgbuf.h
+generic-y += param.h
+generic-y += poll.h
+generic-y += resource.h
+generic-y += sembuf.h
+generic-y += shmbuf.h
+generic-y += sockios.h
+generic-y += swab.h
+generic-y += termbits.h
diff --git a/arch/s390/include/uapi/asm/auxvec.h b/arch/s390/include/uapi/asm/auxvec.h
new file mode 100644
index 000000000..a056c4637
--- /dev/null
+++ b/arch/s390/include/uapi/asm/auxvec.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ASMS390_AUXVEC_H
+#define __ASMS390_AUXVEC_H
+
+#define AT_SYSINFO_EHDR 33
+
+#define AT_VECTOR_SIZE_ARCH 1 /* entries in ARCH_DLINFO */
+
+#endif
diff --git a/arch/s390/include/uapi/asm/bitsperlong.h b/arch/s390/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000..cceaf47b0
--- /dev/null
+++ b/arch/s390/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ASM_S390_BITSPERLONG_H
+#define __ASM_S390_BITSPERLONG_H
+
+#ifndef __s390x__
+#define __BITS_PER_LONG 32
+#else
+#define __BITS_PER_LONG 64
+#endif
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_S390_BITSPERLONG_H */
+
diff --git a/arch/s390/include/uapi/asm/bpf_perf_event.h b/arch/s390/include/uapi/asm/bpf_perf_event.h
new file mode 100644
index 000000000..cefe7c7cd
--- /dev/null
+++ b/arch/s390/include/uapi/asm/bpf_perf_event.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
+#define _UAPI__ASM_BPF_PERF_EVENT_H__
+
+#include <asm/ptrace.h>
+
+typedef user_pt_regs bpf_user_pt_regs_t;
+
+#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */
diff --git a/arch/s390/include/uapi/asm/byteorder.h b/arch/s390/include/uapi/asm/byteorder.h
new file mode 100644
index 000000000..1442b57dd
--- /dev/null
+++ b/arch/s390/include/uapi/asm/byteorder.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _S390_BYTEORDER_H
+#define _S390_BYTEORDER_H
+
+#include <linux/byteorder/big_endian.h>
+
+#endif /* _S390_BYTEORDER_H */
diff --git a/arch/s390/include/uapi/asm/chpid.h b/arch/s390/include/uapi/asm/chpid.h
new file mode 100644
index 000000000..2ae2ed8c0
--- /dev/null
+++ b/arch/s390/include/uapi/asm/chpid.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright IBM Corp. 2007, 2012
+ * Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
+ */
+
+#ifndef _UAPI_ASM_S390_CHPID_H
+#define _UAPI_ASM_S390_CHPID_H
+
+#include <linux/string.h>
+#include <linux/types.h>
+
+#define __MAX_CHPID 255
+
+struct chp_id {
+ __u8 reserved1;
+ __u8 cssid;
+ __u8 reserved2;
+ __u8 id;
+} __attribute__((packed));
+
+
+#endif /* _UAPI_ASM_S390_CHPID_H */
diff --git a/arch/s390/include/uapi/asm/chsc.h b/arch/s390/include/uapi/asm/chsc.h
new file mode 100644
index 000000000..83a574e95
--- /dev/null
+++ b/arch/s390/include/uapi/asm/chsc.h
@@ -0,0 +1,144 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * ioctl interface for /dev/chsc
+ *
+ * Copyright IBM Corp. 2008, 2012
+ * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ */
+
+#ifndef _ASM_CHSC_H
+#define _ASM_CHSC_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+#include <asm/chpid.h>
+#include <asm/schid.h>
+
+#define CHSC_SIZE 0x1000
+
+struct chsc_async_header {
+ __u16 length;
+ __u16 code;
+ __u32 cmd_dependend;
+ __u32 key : 4;
+ __u32 : 28;
+ struct subchannel_id sid;
+};
+
+struct chsc_async_area {
+ struct chsc_async_header header;
+ __u8 data[CHSC_SIZE - sizeof(struct chsc_async_header)];
+};
+
+struct chsc_header {
+ __u16 length;
+ __u16 code;
+};
+
+struct chsc_sync_area {
+ struct chsc_header header;
+ __u8 data[CHSC_SIZE - sizeof(struct chsc_header)];
+};
+
+struct chsc_response_struct {
+ __u16 length;
+ __u16 code;
+ __u32 parms;
+ __u8 data[CHSC_SIZE - 2 * sizeof(__u16) - sizeof(__u32)];
+};
+
+struct chsc_chp_cd {
+ struct chp_id chpid;
+ int m;
+ int fmt;
+ struct chsc_response_struct cpcb;
+};
+
+struct chsc_cu_cd {
+ __u16 cun;
+ __u8 cssid;
+ int m;
+ int fmt;
+ struct chsc_response_struct cucb;
+};
+
+struct chsc_sch_cud {
+ struct subchannel_id schid;
+ int fmt;
+ struct chsc_response_struct scub;
+};
+
+struct conf_id {
+ int m;
+ __u8 cssid;
+ __u8 ssid;
+};
+
+struct chsc_conf_info {
+ struct conf_id id;
+ int fmt;
+ struct chsc_response_struct scid;
+};
+
+struct ccl_parm_chpid {
+ int m;
+ struct chp_id chp;
+};
+
+struct ccl_parm_cssids {
+ __u8 f_cssid;
+ __u8 l_cssid;
+};
+
+struct chsc_comp_list {
+ struct {
+ enum {
+ CCL_CU_ON_CHP = 1,
+ CCL_CHP_TYPE_CAP = 2,
+ CCL_CSS_IMG = 4,
+ CCL_CSS_IMG_CONF_CHAR = 5,
+ CCL_IOP_CHP = 6,
+ } ctype;
+ int fmt;
+ struct ccl_parm_chpid chpid;
+ struct ccl_parm_cssids cssids;
+ } req;
+ struct chsc_response_struct sccl;
+};
+
+struct chsc_dcal {
+ struct {
+ enum {
+ DCAL_CSS_IID_PN = 4,
+ } atype;
+ __u32 list_parm[2];
+ int fmt;
+ } req;
+ struct chsc_response_struct sdcal;
+};
+
+struct chsc_cpd_info {
+ struct chp_id chpid;
+ int m;
+ int fmt;
+ int rfmt;
+ int c;
+ struct chsc_response_struct chpdb;
+};
+
+#define CHSC_IOCTL_MAGIC 'c'
+
+#define CHSC_START _IOWR(CHSC_IOCTL_MAGIC, 0x81, struct chsc_async_area)
+#define CHSC_INFO_CHANNEL_PATH _IOWR(CHSC_IOCTL_MAGIC, 0x82, \
+ struct chsc_chp_cd)
+#define CHSC_INFO_CU _IOWR(CHSC_IOCTL_MAGIC, 0x83, struct chsc_cu_cd)
+#define CHSC_INFO_SCH_CU _IOWR(CHSC_IOCTL_MAGIC, 0x84, struct chsc_sch_cud)
+#define CHSC_INFO_CI _IOWR(CHSC_IOCTL_MAGIC, 0x85, struct chsc_conf_info)
+#define CHSC_INFO_CCL _IOWR(CHSC_IOCTL_MAGIC, 0x86, struct chsc_comp_list)
+#define CHSC_INFO_CPD _IOWR(CHSC_IOCTL_MAGIC, 0x87, struct chsc_cpd_info)
+#define CHSC_INFO_DCAL _IOWR(CHSC_IOCTL_MAGIC, 0x88, struct chsc_dcal)
+#define CHSC_START_SYNC _IOWR(CHSC_IOCTL_MAGIC, 0x89, struct chsc_sync_area)
+#define CHSC_ON_CLOSE_SET _IOWR(CHSC_IOCTL_MAGIC, 0x8a, struct chsc_async_area)
+#define CHSC_ON_CLOSE_REMOVE _IO(CHSC_IOCTL_MAGIC, 0x8b)
+
+#endif
diff --git a/arch/s390/include/uapi/asm/clp.h b/arch/s390/include/uapi/asm/clp.h
new file mode 100644
index 000000000..b36d9e9cd
--- /dev/null
+++ b/arch/s390/include/uapi/asm/clp.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * ioctl interface for /dev/clp
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef _ASM_CLP_H
+#define _ASM_CLP_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+struct clp_req {
+ unsigned int c : 1;
+ unsigned int r : 1;
+ unsigned int lps : 6;
+ unsigned int cmd : 8;
+ unsigned int : 16;
+ unsigned int reserved;
+ __u64 data_p;
+};
+
+#define CLP_IOCTL_MAGIC 'c'
+
+#define CLP_SYNC _IOWR(CLP_IOCTL_MAGIC, 0xC1, struct clp_req)
+
+#endif
diff --git a/arch/s390/include/uapi/asm/cmb.h b/arch/s390/include/uapi/asm/cmb.h
new file mode 100644
index 000000000..ecbe94941
--- /dev/null
+++ b/arch/s390/include/uapi/asm/cmb.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPIS390_CMB_H
+#define _UAPIS390_CMB_H
+
+#include <linux/types.h>
+
+/**
+ * struct cmbdata - channel measurement block data for user space
+ * @size: size of the stored data
+ * @elapsed_time: time since last sampling
+ * @ssch_rsch_count: number of ssch and rsch
+ * @sample_count: number of samples
+ * @device_connect_time: time of device connect
+ * @function_pending_time: time of function pending
+ * @device_disconnect_time: time of device disconnect
+ * @control_unit_queuing_time: time of control unit queuing
+ * @device_active_only_time: time of device active only
+ * @device_busy_time: time of device busy (ext. format)
+ * @initial_command_response_time: initial command response time (ext. format)
+ *
+ * All values are stored as 64 bit for simplicity, especially
+ * in 32 bit emulation mode. All time values are normalized to
+ * nanoseconds.
+ * Currently, two formats are known, which differ by the size of
+ * this structure, i.e. the last two members are only set when
+ * the extended channel measurement facility (first shipped in
+ * z990 machines) is activated.
+ * Potentially, more fields could be added, which would result in a
+ * new ioctl number.
+ */
+struct cmbdata {
+ __u64 size;
+ __u64 elapsed_time;
+ /* basic and exended format: */
+ __u64 ssch_rsch_count;
+ __u64 sample_count;
+ __u64 device_connect_time;
+ __u64 function_pending_time;
+ __u64 device_disconnect_time;
+ __u64 control_unit_queuing_time;
+ __u64 device_active_only_time;
+ /* extended format only: */
+ __u64 device_busy_time;
+ __u64 initial_command_response_time;
+};
+
+/* enable channel measurement */
+#define BIODASDCMFENABLE _IO(DASD_IOCTL_LETTER, 32)
+/* enable channel measurement */
+#define BIODASDCMFDISABLE _IO(DASD_IOCTL_LETTER, 33)
+/* read channel measurement data */
+#define BIODASDREADALLCMB _IOWR(DASD_IOCTL_LETTER, 33, struct cmbdata)
+
+#endif /* _UAPIS390_CMB_H */
diff --git a/arch/s390/include/uapi/asm/dasd.h b/arch/s390/include/uapi/asm/dasd.h
new file mode 100644
index 000000000..832be5c25
--- /dev/null
+++ b/arch/s390/include/uapi/asm/dasd.h
@@ -0,0 +1,334 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Author(s)......: Holger Smolinski <Holger.Smolinski@de.ibm.com>
+ * Bugreports.to..: <Linux390@de.ibm.com>
+ * Copyright IBM Corp. 1999, 2000
+ * EMC Symmetrix ioctl Copyright EMC Corporation, 2008
+ * Author.........: Nigel Hislop <hislop_nigel@emc.com>
+ *
+ * This file is the interface of the DASD device driver, which is exported to user space
+ * any future changes wrt the API will result in a change of the APIVERSION reported
+ * to userspace by the DASDAPIVER-ioctl
+ *
+ */
+
+#ifndef DASD_H
+#define DASD_H
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define DASD_IOCTL_LETTER 'D'
+
+#define DASD_API_VERSION 6
+
+/*
+ * struct dasd_information2_t
+ * represents any data about the device, which is visible to userspace.
+ * including foramt and featueres.
+ */
+typedef struct dasd_information2_t {
+ unsigned int devno; /* S/390 devno */
+ unsigned int real_devno; /* for aliases */
+ unsigned int schid; /* S/390 subchannel identifier */
+ unsigned int cu_type : 16; /* from SenseID */
+ unsigned int cu_model : 8; /* from SenseID */
+ unsigned int dev_type : 16; /* from SenseID */
+ unsigned int dev_model : 8; /* from SenseID */
+ unsigned int open_count;
+ unsigned int req_queue_len;
+ unsigned int chanq_len; /* length of chanq */
+ char type[4]; /* from discipline.name, 'none' for unknown */
+ unsigned int status; /* current device level */
+ unsigned int label_block; /* where to find the VOLSER */
+ unsigned int FBA_layout; /* fixed block size (like AIXVOL) */
+ unsigned int characteristics_size;
+ unsigned int confdata_size;
+ char characteristics[64]; /* from read_device_characteristics */
+ char configuration_data[256]; /* from read_configuration_data */
+ unsigned int format; /* format info like formatted/cdl/ldl/... */
+ unsigned int features; /* dasd features like 'ro',... */
+ unsigned int reserved0; /* reserved for further use ,... */
+ unsigned int reserved1; /* reserved for further use ,... */
+ unsigned int reserved2; /* reserved for further use ,... */
+ unsigned int reserved3; /* reserved for further use ,... */
+ unsigned int reserved4; /* reserved for further use ,... */
+ unsigned int reserved5; /* reserved for further use ,... */
+ unsigned int reserved6; /* reserved for further use ,... */
+ unsigned int reserved7; /* reserved for further use ,... */
+} dasd_information2_t;
+
+/*
+ * values to be used for dasd_information_t.format
+ * 0x00: NOT formatted
+ * 0x01: Linux disc layout
+ * 0x02: Common disc layout
+ */
+#define DASD_FORMAT_NONE 0
+#define DASD_FORMAT_LDL 1
+#define DASD_FORMAT_CDL 2
+/*
+ * values to be used for dasd_information_t.features
+ * 0x100: default features
+ * 0x001: readonly (ro)
+ * 0x002: use diag discipline (diag)
+ * 0x004: set the device initially online (internal use only)
+ * 0x008: enable ERP related logging
+ * 0x010: allow I/O to fail on lost paths
+ * 0x020: allow I/O to fail when a lock was stolen
+ * 0x040: give access to raw eckd data
+ * 0x080: enable discard support
+ * 0x100: enable autodisable for IFCC errors (default)
+ */
+#define DASD_FEATURE_READONLY 0x001
+#define DASD_FEATURE_USEDIAG 0x002
+#define DASD_FEATURE_INITIAL_ONLINE 0x004
+#define DASD_FEATURE_ERPLOG 0x008
+#define DASD_FEATURE_FAILFAST 0x010
+#define DASD_FEATURE_FAILONSLCK 0x020
+#define DASD_FEATURE_USERAW 0x040
+#define DASD_FEATURE_DISCARD 0x080
+#define DASD_FEATURE_PATH_AUTODISABLE 0x100
+#define DASD_FEATURE_DEFAULT DASD_FEATURE_PATH_AUTODISABLE
+
+#define DASD_PARTN_BITS 2
+
+/*
+ * struct dasd_information_t
+ * represents any data about the data, which is visible to userspace
+ */
+typedef struct dasd_information_t {
+ unsigned int devno; /* S/390 devno */
+ unsigned int real_devno; /* for aliases */
+ unsigned int schid; /* S/390 subchannel identifier */
+ unsigned int cu_type : 16; /* from SenseID */
+ unsigned int cu_model : 8; /* from SenseID */
+ unsigned int dev_type : 16; /* from SenseID */
+ unsigned int dev_model : 8; /* from SenseID */
+ unsigned int open_count;
+ unsigned int req_queue_len;
+ unsigned int chanq_len; /* length of chanq */
+ char type[4]; /* from discipline.name, 'none' for unknown */
+ unsigned int status; /* current device level */
+ unsigned int label_block; /* where to find the VOLSER */
+ unsigned int FBA_layout; /* fixed block size (like AIXVOL) */
+ unsigned int characteristics_size;
+ unsigned int confdata_size;
+ char characteristics[64]; /* from read_device_characteristics */
+ char configuration_data[256]; /* from read_configuration_data */
+} dasd_information_t;
+
+/*
+ * Read Subsystem Data - Performance Statistics
+ */
+typedef struct dasd_rssd_perf_stats_t {
+ unsigned char invalid:1;
+ unsigned char format:3;
+ unsigned char data_format:4;
+ unsigned char unit_address;
+ unsigned short device_status;
+ unsigned int nr_read_normal;
+ unsigned int nr_read_normal_hits;
+ unsigned int nr_write_normal;
+ unsigned int nr_write_fast_normal_hits;
+ unsigned int nr_read_seq;
+ unsigned int nr_read_seq_hits;
+ unsigned int nr_write_seq;
+ unsigned int nr_write_fast_seq_hits;
+ unsigned int nr_read_cache;
+ unsigned int nr_read_cache_hits;
+ unsigned int nr_write_cache;
+ unsigned int nr_write_fast_cache_hits;
+ unsigned int nr_inhibit_cache;
+ unsigned int nr_bybass_cache;
+ unsigned int nr_seq_dasd_to_cache;
+ unsigned int nr_dasd_to_cache;
+ unsigned int nr_cache_to_dasd;
+ unsigned int nr_delayed_fast_write;
+ unsigned int nr_normal_fast_write;
+ unsigned int nr_seq_fast_write;
+ unsigned int nr_cache_miss;
+ unsigned char status2;
+ unsigned int nr_quick_write_promotes;
+ unsigned char reserved;
+ unsigned short ssid;
+ unsigned char reseved2[96];
+} __attribute__((packed)) dasd_rssd_perf_stats_t;
+
+/*
+ * struct profile_info_t
+ * holds the profinling information
+ */
+typedef struct dasd_profile_info_t {
+ unsigned int dasd_io_reqs; /* number of requests processed at all */
+ unsigned int dasd_io_sects; /* number of sectors processed at all */
+ unsigned int dasd_io_secs[32]; /* histogram of request's sizes */
+ unsigned int dasd_io_times[32]; /* histogram of requests's times */
+ unsigned int dasd_io_timps[32]; /* histogram of requests's times per sector */
+ unsigned int dasd_io_time1[32]; /* histogram of time from build to start */
+ unsigned int dasd_io_time2[32]; /* histogram of time from start to irq */
+ unsigned int dasd_io_time2ps[32]; /* histogram of time from start to irq */
+ unsigned int dasd_io_time3[32]; /* histogram of time from irq to end */
+ unsigned int dasd_io_nr_req[32]; /* histogram of # of requests in chanq */
+} dasd_profile_info_t;
+
+/*
+ * struct format_data_t
+ * represents all data necessary to format a dasd
+ */
+typedef struct format_data_t {
+ unsigned int start_unit; /* from track */
+ unsigned int stop_unit; /* to track */
+ unsigned int blksize; /* sectorsize */
+ unsigned int intensity;
+} format_data_t;
+
+/*
+ * values to be used for format_data_t.intensity
+ * 0/8: normal format
+ * 1/9: also write record zero
+ * 3/11: also write home address
+ * 4/12: invalidate track
+ */
+#define DASD_FMT_INT_FMT_R0 1 /* write record zero */
+#define DASD_FMT_INT_FMT_HA 2 /* write home address, also set FMT_R0 ! */
+#define DASD_FMT_INT_INVAL 4 /* invalidate tracks */
+#define DASD_FMT_INT_COMPAT 8 /* use OS/390 compatible disk layout */
+
+/*
+ * struct format_check_t
+ * represents all data necessary to evaluate the format of
+ * different tracks of a dasd
+ */
+typedef struct format_check_t {
+ /* Input */
+ struct format_data_t expect;
+
+ /* Output */
+ unsigned int result; /* Error indication (DASD_FMT_ERR_*) */
+ unsigned int unit; /* Track that is in error */
+ unsigned int rec; /* Record that is in error */
+ unsigned int num_records; /* Records in the track in error */
+ unsigned int blksize; /* Blocksize of first record in error */
+ unsigned int key_length; /* Key length of first record in error */
+} format_check_t;
+
+/* Values returned in format_check_t when a format error is detected: */
+/* Too few records were found on a single track */
+#define DASD_FMT_ERR_TOO_FEW_RECORDS 1
+/* Too many records were found on a single track */
+#define DASD_FMT_ERR_TOO_MANY_RECORDS 2
+/* Blocksize/data-length of a record was wrong */
+#define DASD_FMT_ERR_BLKSIZE 3
+/* A record ID is defined by cylinder, head, and record number (CHR). */
+/* On mismatch, this error is set */
+#define DASD_FMT_ERR_RECORD_ID 4
+/* If key-length was != 0 */
+#define DASD_FMT_ERR_KEY_LENGTH 5
+
+/*
+ * struct attrib_data_t
+ * represents the operation (cache) bits for the device.
+ * Used in DE to influence caching of the DASD.
+ */
+typedef struct attrib_data_t {
+ unsigned char operation:3; /* cache operation mode */
+ unsigned char reserved:5; /* cache operation mode */
+ __u16 nr_cyl; /* no of cyliners for read ahaed */
+ __u8 reserved2[29]; /* for future use */
+} __attribute__ ((packed)) attrib_data_t;
+
+/* definition of operation (cache) bits within attributes of DE */
+#define DASD_NORMAL_CACHE 0x0
+#define DASD_BYPASS_CACHE 0x1
+#define DASD_INHIBIT_LOAD 0x2
+#define DASD_SEQ_ACCESS 0x3
+#define DASD_SEQ_PRESTAGE 0x4
+#define DASD_REC_ACCESS 0x5
+
+/*
+ * Perform EMC Symmetrix I/O
+ */
+typedef struct dasd_symmio_parms {
+ unsigned char reserved[8]; /* compat with older releases */
+ unsigned long long psf_data; /* char * cast to u64 */
+ unsigned long long rssd_result; /* char * cast to u64 */
+ int psf_data_len;
+ int rssd_result_len;
+} __attribute__ ((packed)) dasd_symmio_parms_t;
+
+/*
+ * Data returned by Sense Path Group ID (SNID)
+ */
+struct dasd_snid_data {
+ struct {
+ __u8 group:2;
+ __u8 reserve:2;
+ __u8 mode:1;
+ __u8 res:3;
+ } __attribute__ ((packed)) path_state;
+ __u8 pgid[11];
+} __attribute__ ((packed));
+
+struct dasd_snid_ioctl_data {
+ struct dasd_snid_data data;
+ __u8 path_mask;
+} __attribute__ ((packed));
+
+
+/********************************************************************************
+ * SECTION: Definition of IOCTLs
+ *
+ * Here ist how the ioctl-nr should be used:
+ * 0 - 31 DASD driver itself
+ * 32 - 239 still open
+ * 240 - 255 reserved for EMC
+ *******************************************************************************/
+
+/* Disable the volume (for Linux) */
+#define BIODASDDISABLE _IO(DASD_IOCTL_LETTER,0)
+/* Enable the volume (for Linux) */
+#define BIODASDENABLE _IO(DASD_IOCTL_LETTER,1)
+/* Issue a reserve/release command, rsp. */
+#define BIODASDRSRV _IO(DASD_IOCTL_LETTER,2) /* reserve */
+#define BIODASDRLSE _IO(DASD_IOCTL_LETTER,3) /* release */
+#define BIODASDSLCK _IO(DASD_IOCTL_LETTER,4) /* steal lock */
+/* reset profiling information of a device */
+#define BIODASDPRRST _IO(DASD_IOCTL_LETTER,5)
+/* Quiesce IO on device */
+#define BIODASDQUIESCE _IO(DASD_IOCTL_LETTER,6)
+/* Resume IO on device */
+#define BIODASDRESUME _IO(DASD_IOCTL_LETTER,7)
+/* Abort all I/O on a device */
+#define BIODASDABORTIO _IO(DASD_IOCTL_LETTER, 240)
+/* Allow I/O on a device */
+#define BIODASDALLOWIO _IO(DASD_IOCTL_LETTER, 241)
+
+
+/* retrieve API version number */
+#define DASDAPIVER _IOR(DASD_IOCTL_LETTER,0,int)
+/* Get information on a dasd device */
+#define BIODASDINFO _IOR(DASD_IOCTL_LETTER,1,dasd_information_t)
+/* retrieve profiling information of a device */
+#define BIODASDPRRD _IOR(DASD_IOCTL_LETTER,2,dasd_profile_info_t)
+/* Get information on a dasd device (enhanced) */
+#define BIODASDINFO2 _IOR(DASD_IOCTL_LETTER,3,dasd_information2_t)
+/* Performance Statistics Read */
+#define BIODASDPSRD _IOR(DASD_IOCTL_LETTER,4,dasd_rssd_perf_stats_t)
+/* Get Attributes (cache operations) */
+#define BIODASDGATTR _IOR(DASD_IOCTL_LETTER,5,attrib_data_t)
+
+
+/* #define BIODASDFORMAT _IOW(IOCTL_LETTER,0,format_data_t) , deprecated */
+#define BIODASDFMT _IOW(DASD_IOCTL_LETTER,1,format_data_t)
+/* Set Attributes (cache operations) */
+#define BIODASDSATTR _IOW(DASD_IOCTL_LETTER,2,attrib_data_t)
+
+/* Get Sense Path Group ID (SNID) data */
+#define BIODASDSNID _IOWR(DASD_IOCTL_LETTER, 1, struct dasd_snid_ioctl_data)
+/* Check device format according to format_check_t */
+#define BIODASDCHECKFMT _IOWR(DASD_IOCTL_LETTER, 2, format_check_t)
+
+#define BIODASDSYMMIO _IOWR(DASD_IOCTL_LETTER, 240, dasd_symmio_parms_t)
+
+#endif /* DASD_H */
+
diff --git a/arch/s390/include/uapi/asm/debug.h b/arch/s390/include/uapi/asm/debug.h
new file mode 100644
index 000000000..c7c564d9a
--- /dev/null
+++ b/arch/s390/include/uapi/asm/debug.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * S/390 debug facility
+ *
+ * Copyright IBM Corp. 1999, 2000
+ */
+
+#ifndef _UAPIDEBUG_H
+#define _UAPIDEBUG_H
+
+#include <linux/fs.h>
+
+/* Note:
+ * struct __debug_entry must be defined outside of #ifdef __KERNEL__
+ * in order to allow a user program to analyze the 'raw'-view.
+ */
+
+struct __debug_entry{
+ union {
+ struct {
+ unsigned long long clock:52;
+ unsigned long long exception:1;
+ unsigned long long level:3;
+ unsigned long long cpuid:8;
+ } fields;
+
+ unsigned long long stck;
+ } id;
+ void* caller;
+} __attribute__((packed));
+
+
+#define __DEBUG_FEATURE_VERSION 2 /* version of debug feature */
+
+#endif /* _UAPIDEBUG_H */
diff --git a/arch/s390/include/uapi/asm/guarded_storage.h b/arch/s390/include/uapi/asm/guarded_storage.h
new file mode 100644
index 000000000..666af1c33
--- /dev/null
+++ b/arch/s390/include/uapi/asm/guarded_storage.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _GUARDED_STORAGE_H
+#define _GUARDED_STORAGE_H
+
+#include <linux/types.h>
+
+struct gs_cb {
+ __u64 reserved;
+ __u64 gsd;
+ __u64 gssm;
+ __u64 gs_epl_a;
+};
+
+struct gs_epl {
+ __u8 pad1;
+ union {
+ __u8 gs_eam;
+ struct {
+ __u8 : 6;
+ __u8 e : 1;
+ __u8 b : 1;
+ };
+ };
+ union {
+ __u8 gs_eci;
+ struct {
+ __u8 tx : 1;
+ __u8 cx : 1;
+ __u8 : 5;
+ __u8 in : 1;
+ };
+ };
+ union {
+ __u8 gs_eai;
+ struct {
+ __u8 : 1;
+ __u8 t : 1;
+ __u8 as : 2;
+ __u8 ar : 4;
+ };
+ };
+ __u32 pad2;
+ __u64 gs_eha;
+ __u64 gs_eia;
+ __u64 gs_eoa;
+ __u64 gs_eir;
+ __u64 gs_era;
+};
+
+#define GS_ENABLE 0
+#define GS_DISABLE 1
+#define GS_SET_BC_CB 2
+#define GS_CLEAR_BC_CB 3
+#define GS_BROADCAST 4
+
+static inline void load_gs_cb(struct gs_cb *gs_cb)
+{
+ asm volatile(".insn rxy,0xe3000000004d,0,%0" : : "Q" (*gs_cb));
+}
+
+static inline void store_gs_cb(struct gs_cb *gs_cb)
+{
+ asm volatile(".insn rxy,0xe30000000049,0,%0" : : "Q" (*gs_cb));
+}
+
+static inline void save_gs_cb(struct gs_cb *gs_cb)
+{
+ if (gs_cb)
+ store_gs_cb(gs_cb);
+}
+
+static inline void restore_gs_cb(struct gs_cb *gs_cb)
+{
+ if (gs_cb)
+ load_gs_cb(gs_cb);
+}
+
+#endif /* _GUARDED_STORAGE_H */
diff --git a/arch/s390/include/uapi/asm/hypfs.h b/arch/s390/include/uapi/asm/hypfs.h
new file mode 100644
index 000000000..fe6174e14
--- /dev/null
+++ b/arch/s390/include/uapi/asm/hypfs.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Structures for hypfs interface
+ *
+ * Copyright IBM Corp. 2013
+ *
+ * Author: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef _ASM_HYPFS_H
+#define _ASM_HYPFS_H
+
+#include <linux/types.h>
+
+/*
+ * IOCTL for binary interface /sys/kernel/debug/diag_304
+ */
+struct hypfs_diag304 {
+ __u32 args[2];
+ __u64 data;
+ __u64 rc;
+} __attribute__((packed));
+
+#define HYPFS_IOCTL_MAGIC 0x10
+
+#define HYPFS_DIAG304 \
+ _IOWR(HYPFS_IOCTL_MAGIC, 0x20, struct hypfs_diag304)
+
+/*
+ * Structures for binary interface /sys/kernel/debug/diag_0c
+ */
+struct hypfs_diag0c_hdr {
+ __u64 len; /* Length of diag0c buffer without header */
+ __u16 version; /* Version of header */
+ char reserved1[6]; /* Reserved */
+ char tod_ext[16]; /* TOD clock for diag0c */
+ __u64 count; /* Number of entries (CPUs) in diag0c array */
+ char reserved2[24]; /* Reserved */
+};
+
+struct hypfs_diag0c_entry {
+ char date[8]; /* MM/DD/YY in EBCDIC */
+ char time[8]; /* HH:MM:SS in EBCDIC */
+ __u64 virtcpu; /* Virtual time consumed by the virt CPU (us) */
+ __u64 totalproc; /* Total of virtual and simulation time (us) */
+ __u32 cpu; /* Linux logical CPU number */
+ __u32 reserved; /* Align to 8 byte */
+};
+
+struct hypfs_diag0c_data {
+ struct hypfs_diag0c_hdr hdr; /* 64 byte header */
+ struct hypfs_diag0c_entry entry[]; /* diag0c entry array */
+};
+
+#endif
diff --git a/arch/s390/include/uapi/asm/ioctls.h b/arch/s390/include/uapi/asm/ioctls.h
new file mode 100644
index 000000000..342a3284e
--- /dev/null
+++ b/arch/s390/include/uapi/asm/ioctls.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ARCH_S390_IOCTLS_H__
+#define __ARCH_S390_IOCTLS_H__
+
+#define FIOQSIZE 0x545E
+
+#include <asm-generic/ioctls.h>
+
+#endif
diff --git a/arch/s390/include/uapi/asm/ipcbuf.h b/arch/s390/include/uapi/asm/ipcbuf.h
new file mode 100644
index 000000000..5b1c4f47c
--- /dev/null
+++ b/arch/s390/include/uapi/asm/ipcbuf.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __S390_IPCBUF_H__
+#define __S390_IPCBUF_H__
+
+/*
+ * The user_ipc_perm structure for S/390 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 32-bit mode_t and seq
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct ipc64_perm
+{
+ __kernel_key_t key;
+ __kernel_uid32_t uid;
+ __kernel_gid32_t gid;
+ __kernel_uid32_t cuid;
+ __kernel_gid32_t cgid;
+ __kernel_mode_t mode;
+ unsigned short __pad1;
+ unsigned short seq;
+#ifndef __s390x__
+ unsigned short __pad2;
+#endif /* ! __s390x__ */
+ unsigned long __unused1;
+ unsigned long __unused2;
+};
+
+#endif /* __S390_IPCBUF_H__ */
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
new file mode 100644
index 000000000..9a50f02b9
--- /dev/null
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -0,0 +1,277 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __LINUX_KVM_S390_H
+#define __LINUX_KVM_S390_H
+/*
+ * KVM s390 specific structures and definitions
+ *
+ * Copyright IBM Corp. 2008, 2018
+ *
+ * Author(s): Carsten Otte <cotte@de.ibm.com>
+ * Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+#include <linux/types.h>
+
+#define __KVM_S390
+#define __KVM_HAVE_GUEST_DEBUG
+
+/* Device control API: s390-specific devices */
+#define KVM_DEV_FLIC_GET_ALL_IRQS 1
+#define KVM_DEV_FLIC_ENQUEUE 2
+#define KVM_DEV_FLIC_CLEAR_IRQS 3
+#define KVM_DEV_FLIC_APF_ENABLE 4
+#define KVM_DEV_FLIC_APF_DISABLE_WAIT 5
+#define KVM_DEV_FLIC_ADAPTER_REGISTER 6
+#define KVM_DEV_FLIC_ADAPTER_MODIFY 7
+#define KVM_DEV_FLIC_CLEAR_IO_IRQ 8
+#define KVM_DEV_FLIC_AISM 9
+#define KVM_DEV_FLIC_AIRQ_INJECT 10
+#define KVM_DEV_FLIC_AISM_ALL 11
+/*
+ * We can have up to 4*64k pending subchannels + 8 adapter interrupts,
+ * as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts.
+ * There are also sclp and machine checks. This gives us
+ * sizeof(kvm_s390_irq)*(4*65536+8+64*64+1+1) = 72 * 266250 = 19170000
+ * Lets round up to 8192 pages.
+ */
+#define KVM_S390_MAX_FLOAT_IRQS 266250
+#define KVM_S390_FLIC_MAX_BUFFER 0x2000000
+
+struct kvm_s390_io_adapter {
+ __u32 id;
+ __u8 isc;
+ __u8 maskable;
+ __u8 swap;
+ __u8 flags;
+};
+
+#define KVM_S390_ADAPTER_SUPPRESSIBLE 0x01
+
+struct kvm_s390_ais_req {
+ __u8 isc;
+ __u16 mode;
+};
+
+struct kvm_s390_ais_all {
+ __u8 simm;
+ __u8 nimm;
+};
+
+#define KVM_S390_IO_ADAPTER_MASK 1
+#define KVM_S390_IO_ADAPTER_MAP 2
+#define KVM_S390_IO_ADAPTER_UNMAP 3
+
+struct kvm_s390_io_adapter_req {
+ __u32 id;
+ __u8 type;
+ __u8 mask;
+ __u16 pad0;
+ __u64 addr;
+};
+
+/* kvm attr_group on vm fd */
+#define KVM_S390_VM_MEM_CTRL 0
+#define KVM_S390_VM_TOD 1
+#define KVM_S390_VM_CRYPTO 2
+#define KVM_S390_VM_CPU_MODEL 3
+#define KVM_S390_VM_MIGRATION 4
+
+/* kvm attributes for mem_ctrl */
+#define KVM_S390_VM_MEM_ENABLE_CMMA 0
+#define KVM_S390_VM_MEM_CLR_CMMA 1
+#define KVM_S390_VM_MEM_LIMIT_SIZE 2
+
+#define KVM_S390_NO_MEM_LIMIT U64_MAX
+
+/* kvm attributes for KVM_S390_VM_TOD */
+#define KVM_S390_VM_TOD_LOW 0
+#define KVM_S390_VM_TOD_HIGH 1
+#define KVM_S390_VM_TOD_EXT 2
+
+struct kvm_s390_vm_tod_clock {
+ __u8 epoch_idx;
+ __u64 tod;
+};
+
+/* kvm attributes for KVM_S390_VM_CPU_MODEL */
+/* processor related attributes are r/w */
+#define KVM_S390_VM_CPU_PROCESSOR 0
+struct kvm_s390_vm_cpu_processor {
+ __u64 cpuid;
+ __u16 ibc;
+ __u8 pad[6];
+ __u64 fac_list[256];
+};
+
+/* machine related attributes are r/o */
+#define KVM_S390_VM_CPU_MACHINE 1
+struct kvm_s390_vm_cpu_machine {
+ __u64 cpuid;
+ __u32 ibc;
+ __u8 pad[4];
+ __u64 fac_mask[256];
+ __u64 fac_list[256];
+};
+
+#define KVM_S390_VM_CPU_PROCESSOR_FEAT 2
+#define KVM_S390_VM_CPU_MACHINE_FEAT 3
+
+#define KVM_S390_VM_CPU_FEAT_NR_BITS 1024
+#define KVM_S390_VM_CPU_FEAT_ESOP 0
+#define KVM_S390_VM_CPU_FEAT_SIEF2 1
+#define KVM_S390_VM_CPU_FEAT_64BSCAO 2
+#define KVM_S390_VM_CPU_FEAT_SIIF 3
+#define KVM_S390_VM_CPU_FEAT_GPERE 4
+#define KVM_S390_VM_CPU_FEAT_GSLS 5
+#define KVM_S390_VM_CPU_FEAT_IB 6
+#define KVM_S390_VM_CPU_FEAT_CEI 7
+#define KVM_S390_VM_CPU_FEAT_IBS 8
+#define KVM_S390_VM_CPU_FEAT_SKEY 9
+#define KVM_S390_VM_CPU_FEAT_CMMA 10
+#define KVM_S390_VM_CPU_FEAT_PFMFI 11
+#define KVM_S390_VM_CPU_FEAT_SIGPIF 12
+#define KVM_S390_VM_CPU_FEAT_KSS 13
+struct kvm_s390_vm_cpu_feat {
+ __u64 feat[16];
+};
+
+#define KVM_S390_VM_CPU_PROCESSOR_SUBFUNC 4
+#define KVM_S390_VM_CPU_MACHINE_SUBFUNC 5
+/* for "test bit" instructions MSB 0 bit ordering, for "query" raw blocks */
+struct kvm_s390_vm_cpu_subfunc {
+ __u8 plo[32]; /* always */
+ __u8 ptff[16]; /* with TOD-clock steering */
+ __u8 kmac[16]; /* with MSA */
+ __u8 kmc[16]; /* with MSA */
+ __u8 km[16]; /* with MSA */
+ __u8 kimd[16]; /* with MSA */
+ __u8 klmd[16]; /* with MSA */
+ __u8 pckmo[16]; /* with MSA3 */
+ __u8 kmctr[16]; /* with MSA4 */
+ __u8 kmf[16]; /* with MSA4 */
+ __u8 kmo[16]; /* with MSA4 */
+ __u8 pcc[16]; /* with MSA4 */
+ __u8 ppno[16]; /* with MSA5 */
+ __u8 kma[16]; /* with MSA8 */
+ __u8 reserved[1808];
+};
+
+/* kvm attributes for crypto */
+#define KVM_S390_VM_CRYPTO_ENABLE_AES_KW 0
+#define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW 1
+#define KVM_S390_VM_CRYPTO_DISABLE_AES_KW 2
+#define KVM_S390_VM_CRYPTO_DISABLE_DEA_KW 3
+
+/* kvm attributes for migration mode */
+#define KVM_S390_VM_MIGRATION_STOP 0
+#define KVM_S390_VM_MIGRATION_START 1
+#define KVM_S390_VM_MIGRATION_STATUS 2
+
+/* for KVM_GET_REGS and KVM_SET_REGS */
+struct kvm_regs {
+ /* general purpose regs for s390 */
+ __u64 gprs[16];
+};
+
+/* for KVM_GET_SREGS and KVM_SET_SREGS */
+struct kvm_sregs {
+ __u32 acrs[16];
+ __u64 crs[16];
+};
+
+/* for KVM_GET_FPU and KVM_SET_FPU */
+struct kvm_fpu {
+ __u32 fpc;
+ __u64 fprs[16];
+};
+
+#define KVM_GUESTDBG_USE_HW_BP 0x00010000
+
+#define KVM_HW_BP 1
+#define KVM_HW_WP_WRITE 2
+#define KVM_SINGLESTEP 4
+
+struct kvm_debug_exit_arch {
+ __u64 addr;
+ __u8 type;
+ __u8 pad[7]; /* Should be set to 0 */
+};
+
+struct kvm_hw_breakpoint {
+ __u64 addr;
+ __u64 phys_addr;
+ __u64 len;
+ __u8 type;
+ __u8 pad[7]; /* Should be set to 0 */
+};
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+ __u32 nr_hw_bp;
+ __u32 pad; /* Should be set to 0 */
+ struct kvm_hw_breakpoint __user *hw_bp;
+};
+
+/* for KVM_SYNC_PFAULT and KVM_REG_S390_PFTOKEN */
+#define KVM_S390_PFAULT_TOKEN_INVALID 0xffffffffffffffffULL
+
+#define KVM_SYNC_PREFIX (1UL << 0)
+#define KVM_SYNC_GPRS (1UL << 1)
+#define KVM_SYNC_ACRS (1UL << 2)
+#define KVM_SYNC_CRS (1UL << 3)
+#define KVM_SYNC_ARCH0 (1UL << 4)
+#define KVM_SYNC_PFAULT (1UL << 5)
+#define KVM_SYNC_VRS (1UL << 6)
+#define KVM_SYNC_RICCB (1UL << 7)
+#define KVM_SYNC_FPRS (1UL << 8)
+#define KVM_SYNC_GSCB (1UL << 9)
+#define KVM_SYNC_BPBC (1UL << 10)
+#define KVM_SYNC_ETOKEN (1UL << 11)
+/* length and alignment of the sdnx as a power of two */
+#define SDNXC 8
+#define SDNXL (1UL << SDNXC)
+/* definition of registers in kvm_run */
+struct kvm_sync_regs {
+ __u64 prefix; /* prefix register */
+ __u64 gprs[16]; /* general purpose registers */
+ __u32 acrs[16]; /* access registers */
+ __u64 crs[16]; /* control registers */
+ __u64 todpr; /* tod programmable register [ARCH0] */
+ __u64 cputm; /* cpu timer [ARCH0] */
+ __u64 ckc; /* clock comparator [ARCH0] */
+ __u64 pp; /* program parameter [ARCH0] */
+ __u64 gbea; /* guest breaking-event address [ARCH0] */
+ __u64 pft; /* pfault token [PFAULT] */
+ __u64 pfs; /* pfault select [PFAULT] */
+ __u64 pfc; /* pfault compare [PFAULT] */
+ union {
+ __u64 vrs[32][2]; /* vector registers (KVM_SYNC_VRS) */
+ __u64 fprs[16]; /* fp registers (KVM_SYNC_FPRS) */
+ };
+ __u8 reserved[512]; /* for future vector expansion */
+ __u32 fpc; /* valid on KVM_SYNC_VRS or KVM_SYNC_FPRS */
+ __u8 bpbc : 1; /* bp mode */
+ __u8 reserved2 : 7;
+ __u8 padding1[51]; /* riccb needs to be 64byte aligned */
+ __u8 riccb[64]; /* runtime instrumentation controls block */
+ __u8 padding2[192]; /* sdnx needs to be 256byte aligned */
+ union {
+ __u8 sdnx[SDNXL]; /* state description annex */
+ struct {
+ __u64 reserved1[2];
+ __u64 gscb[4];
+ __u64 etoken;
+ __u64 etoken_extension;
+ };
+ };
+};
+
+#define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1)
+#define KVM_REG_S390_EPOCHDIFF (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x2)
+#define KVM_REG_S390_CPU_TIMER (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x3)
+#define KVM_REG_S390_CLOCK_COMP (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x4)
+#define KVM_REG_S390_PFTOKEN (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x5)
+#define KVM_REG_S390_PFCOMPARE (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x6)
+#define KVM_REG_S390_PFSELECT (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x7)
+#define KVM_REG_S390_PP (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x8)
+#define KVM_REG_S390_GBEA (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x9)
+#endif
diff --git a/arch/s390/include/uapi/asm/kvm_para.h b/arch/s390/include/uapi/asm/kvm_para.h
new file mode 100644
index 000000000..b9ab584ad
--- /dev/null
+++ b/arch/s390/include/uapi/asm/kvm_para.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * User API definitions for paravirtual devices on s390
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Author(s): Christian Borntraeger <borntraeger@de.ibm.com>
+ */
diff --git a/arch/s390/include/uapi/asm/kvm_perf.h b/arch/s390/include/uapi/asm/kvm_perf.h
new file mode 100644
index 000000000..84606b8cc
--- /dev/null
+++ b/arch/s390/include/uapi/asm/kvm_perf.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Definitions for perf-kvm on s390
+ *
+ * Copyright 2014 IBM Corp.
+ * Author(s): Alexander Yarygin <yarygin@linux.vnet.ibm.com>
+ */
+
+#ifndef __LINUX_KVM_PERF_S390_H
+#define __LINUX_KVM_PERF_S390_H
+
+#include <asm/sie.h>
+
+#define DECODE_STR_LEN 40
+
+#define VCPU_ID "id"
+
+#define KVM_ENTRY_TRACE "kvm:kvm_s390_sie_enter"
+#define KVM_EXIT_TRACE "kvm:kvm_s390_sie_exit"
+#define KVM_EXIT_REASON "icptcode"
+
+#endif
diff --git a/arch/s390/include/uapi/asm/monwriter.h b/arch/s390/include/uapi/asm/monwriter.h
new file mode 100644
index 000000000..03d172f31
--- /dev/null
+++ b/arch/s390/include/uapi/asm/monwriter.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright IBM Corp. 2006
+ * Character device driver for writing z/VM APPLDATA monitor records
+ * Version 1.0
+ * Author(s): Melissa Howland <melissah@us.ibm.com>
+ *
+ */
+
+#ifndef _ASM_390_MONWRITER_H
+#define _ASM_390_MONWRITER_H
+
+/* mon_function values */
+#define MONWRITE_START_INTERVAL 0x00 /* start interval recording */
+#define MONWRITE_STOP_INTERVAL 0x01 /* stop interval or config recording */
+#define MONWRITE_GEN_EVENT 0x02 /* generate event record */
+#define MONWRITE_START_CONFIG 0x03 /* start configuration recording */
+
+/* the header the app uses in its write() data */
+struct monwrite_hdr {
+ unsigned char mon_function;
+ unsigned short applid;
+ unsigned char record_num;
+ unsigned short version;
+ unsigned short release;
+ unsigned short mod_level;
+ unsigned short datalen;
+ unsigned char hdrlen;
+
+} __attribute__((packed));
+
+#endif /* _ASM_390_MONWRITER_H */
diff --git a/arch/s390/include/uapi/asm/perf_regs.h b/arch/s390/include/uapi/asm/perf_regs.h
new file mode 100644
index 000000000..d17dd9e5d
--- /dev/null
+++ b/arch/s390/include/uapi/asm/perf_regs.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_S390_PERF_REGS_H
+#define _ASM_S390_PERF_REGS_H
+
+enum perf_event_s390_regs {
+ PERF_REG_S390_R0,
+ PERF_REG_S390_R1,
+ PERF_REG_S390_R2,
+ PERF_REG_S390_R3,
+ PERF_REG_S390_R4,
+ PERF_REG_S390_R5,
+ PERF_REG_S390_R6,
+ PERF_REG_S390_R7,
+ PERF_REG_S390_R8,
+ PERF_REG_S390_R9,
+ PERF_REG_S390_R10,
+ PERF_REG_S390_R11,
+ PERF_REG_S390_R12,
+ PERF_REG_S390_R13,
+ PERF_REG_S390_R14,
+ PERF_REG_S390_R15,
+ PERF_REG_S390_FP0,
+ PERF_REG_S390_FP1,
+ PERF_REG_S390_FP2,
+ PERF_REG_S390_FP3,
+ PERF_REG_S390_FP4,
+ PERF_REG_S390_FP5,
+ PERF_REG_S390_FP6,
+ PERF_REG_S390_FP7,
+ PERF_REG_S390_FP8,
+ PERF_REG_S390_FP9,
+ PERF_REG_S390_FP10,
+ PERF_REG_S390_FP11,
+ PERF_REG_S390_FP12,
+ PERF_REG_S390_FP13,
+ PERF_REG_S390_FP14,
+ PERF_REG_S390_FP15,
+ PERF_REG_S390_MASK,
+ PERF_REG_S390_PC,
+
+ PERF_REG_S390_MAX
+};
+
+#endif /* _ASM_S390_PERF_REGS_H */
diff --git a/arch/s390/include/uapi/asm/pkey.h b/arch/s390/include/uapi/asm/pkey.h
new file mode 100644
index 000000000..6f84a53c3
--- /dev/null
+++ b/arch/s390/include/uapi/asm/pkey.h
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Userspace interface to the pkey device driver
+ *
+ * Copyright IBM Corp. 2017
+ *
+ * Author: Harald Freudenberger <freude@de.ibm.com>
+ *
+ */
+
+#ifndef _UAPI_PKEY_H
+#define _UAPI_PKEY_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+/*
+ * Ioctl calls supported by the pkey device driver
+ */
+
+#define PKEY_IOCTL_MAGIC 'p'
+
+#define SECKEYBLOBSIZE 64 /* secure key blob size is always 64 bytes */
+#define MAXPROTKEYSIZE 64 /* a protected key blob may be up to 64 bytes */
+#define MAXCLRKEYSIZE 32 /* a clear key value may be up to 32 bytes */
+
+/* defines for the type field within the pkey_protkey struct */
+#define PKEY_KEYTYPE_AES_128 1
+#define PKEY_KEYTYPE_AES_192 2
+#define PKEY_KEYTYPE_AES_256 3
+
+/* Struct to hold a secure key blob */
+struct pkey_seckey {
+ __u8 seckey[SECKEYBLOBSIZE]; /* the secure key blob */
+};
+
+/* Struct to hold protected key and length info */
+struct pkey_protkey {
+ __u32 type; /* key type, one of the PKEY_KEYTYPE values */
+ __u32 len; /* bytes actually stored in protkey[] */
+ __u8 protkey[MAXPROTKEYSIZE]; /* the protected key blob */
+};
+
+/* Struct to hold a clear key value */
+struct pkey_clrkey {
+ __u8 clrkey[MAXCLRKEYSIZE]; /* 16, 24, or 32 byte clear key value */
+};
+
+/*
+ * Generate secure key
+ */
+struct pkey_genseck {
+ __u16 cardnr; /* in: card to use or FFFF for any */
+ __u16 domain; /* in: domain or FFFF for any */
+ __u32 keytype; /* in: key type to generate */
+ struct pkey_seckey seckey; /* out: the secure key blob */
+};
+#define PKEY_GENSECK _IOWR(PKEY_IOCTL_MAGIC, 0x01, struct pkey_genseck)
+
+/*
+ * Construct secure key from clear key value
+ */
+struct pkey_clr2seck {
+ __u16 cardnr; /* in: card to use or FFFF for any */
+ __u16 domain; /* in: domain or FFFF for any */
+ __u32 keytype; /* in: key type to generate */
+ struct pkey_clrkey clrkey; /* in: the clear key value */
+ struct pkey_seckey seckey; /* out: the secure key blob */
+};
+#define PKEY_CLR2SECK _IOWR(PKEY_IOCTL_MAGIC, 0x02, struct pkey_clr2seck)
+
+/*
+ * Fabricate protected key from a secure key
+ */
+struct pkey_sec2protk {
+ __u16 cardnr; /* in: card to use or FFFF for any */
+ __u16 domain; /* in: domain or FFFF for any */
+ struct pkey_seckey seckey; /* in: the secure key blob */
+ struct pkey_protkey protkey; /* out: the protected key */
+};
+#define PKEY_SEC2PROTK _IOWR(PKEY_IOCTL_MAGIC, 0x03, struct pkey_sec2protk)
+
+/*
+ * Fabricate protected key from an clear key value
+ */
+struct pkey_clr2protk {
+ __u32 keytype; /* in: key type to generate */
+ struct pkey_clrkey clrkey; /* in: the clear key value */
+ struct pkey_protkey protkey; /* out: the protected key */
+};
+#define PKEY_CLR2PROTK _IOWR(PKEY_IOCTL_MAGIC, 0x04, struct pkey_clr2protk)
+
+/*
+ * Search for matching crypto card based on the Master Key
+ * Verification Pattern provided inside a secure key.
+ */
+struct pkey_findcard {
+ struct pkey_seckey seckey; /* in: the secure key blob */
+ __u16 cardnr; /* out: card number */
+ __u16 domain; /* out: domain number */
+};
+#define PKEY_FINDCARD _IOWR(PKEY_IOCTL_MAGIC, 0x05, struct pkey_findcard)
+
+/*
+ * Combined together: findcard + sec2prot
+ */
+struct pkey_skey2pkey {
+ struct pkey_seckey seckey; /* in: the secure key blob */
+ struct pkey_protkey protkey; /* out: the protected key */
+};
+#define PKEY_SKEY2PKEY _IOWR(PKEY_IOCTL_MAGIC, 0x06, struct pkey_skey2pkey)
+
+/*
+ * Verify the given secure key for being able to be useable with
+ * the pkey module. Check for correct key type and check for having at
+ * least one crypto card being able to handle this key (master key
+ * or old master key verification pattern matches).
+ * Return some info about the key: keysize in bits, keytype (currently
+ * only AES), flag if key is wrapped with an old MKVP.
+ */
+struct pkey_verifykey {
+ struct pkey_seckey seckey; /* in: the secure key blob */
+ __u16 cardnr; /* out: card number */
+ __u16 domain; /* out: domain number */
+ __u16 keysize; /* out: key size in bits */
+ __u32 attributes; /* out: attribute bits */
+};
+#define PKEY_VERIFYKEY _IOWR(PKEY_IOCTL_MAGIC, 0x07, struct pkey_verifykey)
+#define PKEY_VERIFY_ATTR_AES 0x00000001 /* key is an AES key */
+#define PKEY_VERIFY_ATTR_OLD_MKVP 0x00000100 /* key has old MKVP value */
+
+#endif /* _UAPI_PKEY_H */
diff --git a/arch/s390/include/uapi/asm/posix_types.h b/arch/s390/include/uapi/asm/posix_types.h
new file mode 100644
index 000000000..2a3fc6384
--- /dev/null
+++ b/arch/s390/include/uapi/asm/posix_types.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * S390 version
+ *
+ */
+
+#ifndef __ARCH_S390_POSIX_TYPES_H
+#define __ARCH_S390_POSIX_TYPES_H
+
+/*
+ * This file is generally used by user-level software, so you need to
+ * be a little careful about namespace pollution etc. Also, we cannot
+ * assume GCC is being used.
+ */
+
+typedef unsigned long __kernel_size_t;
+typedef long __kernel_ssize_t;
+#define __kernel_size_t __kernel_size_t
+
+typedef unsigned short __kernel_old_dev_t;
+#define __kernel_old_dev_t __kernel_old_dev_t
+
+#ifndef __s390x__
+
+typedef unsigned long __kernel_ino_t;
+typedef unsigned short __kernel_mode_t;
+typedef unsigned short __kernel_ipc_pid_t;
+typedef unsigned short __kernel_uid_t;
+typedef unsigned short __kernel_gid_t;
+typedef int __kernel_ptrdiff_t;
+
+#else /* __s390x__ */
+
+typedef unsigned int __kernel_ino_t;
+typedef unsigned int __kernel_mode_t;
+typedef int __kernel_ipc_pid_t;
+typedef unsigned int __kernel_uid_t;
+typedef unsigned int __kernel_gid_t;
+typedef long __kernel_ptrdiff_t;
+typedef unsigned long __kernel_sigset_t; /* at least 32 bits */
+
+#endif /* __s390x__ */
+
+#define __kernel_ino_t __kernel_ino_t
+#define __kernel_mode_t __kernel_mode_t
+#define __kernel_ipc_pid_t __kernel_ipc_pid_t
+#define __kernel_uid_t __kernel_uid_t
+#define __kernel_gid_t __kernel_gid_t
+
+#include <asm-generic/posix_types.h>
+
+#endif
diff --git a/arch/s390/include/uapi/asm/ptrace.h b/arch/s390/include/uapi/asm/ptrace.h
new file mode 100644
index 000000000..543dd70e1
--- /dev/null
+++ b/arch/s390/include/uapi/asm/ptrace.h
@@ -0,0 +1,457 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999, 2000
+ * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
+ */
+
+#ifndef _UAPI_S390_PTRACE_H
+#define _UAPI_S390_PTRACE_H
+
+/*
+ * Offsets in the user_regs_struct. They are used for the ptrace
+ * system call and in entry.S
+ */
+#ifndef __s390x__
+
+#define PT_PSWMASK 0x00
+#define PT_PSWADDR 0x04
+#define PT_GPR0 0x08
+#define PT_GPR1 0x0C
+#define PT_GPR2 0x10
+#define PT_GPR3 0x14
+#define PT_GPR4 0x18
+#define PT_GPR5 0x1C
+#define PT_GPR6 0x20
+#define PT_GPR7 0x24
+#define PT_GPR8 0x28
+#define PT_GPR9 0x2C
+#define PT_GPR10 0x30
+#define PT_GPR11 0x34
+#define PT_GPR12 0x38
+#define PT_GPR13 0x3C
+#define PT_GPR14 0x40
+#define PT_GPR15 0x44
+#define PT_ACR0 0x48
+#define PT_ACR1 0x4C
+#define PT_ACR2 0x50
+#define PT_ACR3 0x54
+#define PT_ACR4 0x58
+#define PT_ACR5 0x5C
+#define PT_ACR6 0x60
+#define PT_ACR7 0x64
+#define PT_ACR8 0x68
+#define PT_ACR9 0x6C
+#define PT_ACR10 0x70
+#define PT_ACR11 0x74
+#define PT_ACR12 0x78
+#define PT_ACR13 0x7C
+#define PT_ACR14 0x80
+#define PT_ACR15 0x84
+#define PT_ORIGGPR2 0x88
+#define PT_FPC 0x90
+/*
+ * A nasty fact of life that the ptrace api
+ * only supports passing of longs.
+ */
+#define PT_FPR0_HI 0x98
+#define PT_FPR0_LO 0x9C
+#define PT_FPR1_HI 0xA0
+#define PT_FPR1_LO 0xA4
+#define PT_FPR2_HI 0xA8
+#define PT_FPR2_LO 0xAC
+#define PT_FPR3_HI 0xB0
+#define PT_FPR3_LO 0xB4
+#define PT_FPR4_HI 0xB8
+#define PT_FPR4_LO 0xBC
+#define PT_FPR5_HI 0xC0
+#define PT_FPR5_LO 0xC4
+#define PT_FPR6_HI 0xC8
+#define PT_FPR6_LO 0xCC
+#define PT_FPR7_HI 0xD0
+#define PT_FPR7_LO 0xD4
+#define PT_FPR8_HI 0xD8
+#define PT_FPR8_LO 0XDC
+#define PT_FPR9_HI 0xE0
+#define PT_FPR9_LO 0xE4
+#define PT_FPR10_HI 0xE8
+#define PT_FPR10_LO 0xEC
+#define PT_FPR11_HI 0xF0
+#define PT_FPR11_LO 0xF4
+#define PT_FPR12_HI 0xF8
+#define PT_FPR12_LO 0xFC
+#define PT_FPR13_HI 0x100
+#define PT_FPR13_LO 0x104
+#define PT_FPR14_HI 0x108
+#define PT_FPR14_LO 0x10C
+#define PT_FPR15_HI 0x110
+#define PT_FPR15_LO 0x114
+#define PT_CR_9 0x118
+#define PT_CR_10 0x11C
+#define PT_CR_11 0x120
+#define PT_IEEE_IP 0x13C
+#define PT_LASTOFF PT_IEEE_IP
+#define PT_ENDREGS 0x140-1
+
+#define GPR_SIZE 4
+#define CR_SIZE 4
+
+#define STACK_FRAME_OVERHEAD 96 /* size of minimum stack frame */
+
+#else /* __s390x__ */
+
+#define PT_PSWMASK 0x00
+#define PT_PSWADDR 0x08
+#define PT_GPR0 0x10
+#define PT_GPR1 0x18
+#define PT_GPR2 0x20
+#define PT_GPR3 0x28
+#define PT_GPR4 0x30
+#define PT_GPR5 0x38
+#define PT_GPR6 0x40
+#define PT_GPR7 0x48
+#define PT_GPR8 0x50
+#define PT_GPR9 0x58
+#define PT_GPR10 0x60
+#define PT_GPR11 0x68
+#define PT_GPR12 0x70
+#define PT_GPR13 0x78
+#define PT_GPR14 0x80
+#define PT_GPR15 0x88
+#define PT_ACR0 0x90
+#define PT_ACR1 0x94
+#define PT_ACR2 0x98
+#define PT_ACR3 0x9C
+#define PT_ACR4 0xA0
+#define PT_ACR5 0xA4
+#define PT_ACR6 0xA8
+#define PT_ACR7 0xAC
+#define PT_ACR8 0xB0
+#define PT_ACR9 0xB4
+#define PT_ACR10 0xB8
+#define PT_ACR11 0xBC
+#define PT_ACR12 0xC0
+#define PT_ACR13 0xC4
+#define PT_ACR14 0xC8
+#define PT_ACR15 0xCC
+#define PT_ORIGGPR2 0xD0
+#define PT_FPC 0xD8
+#define PT_FPR0 0xE0
+#define PT_FPR1 0xE8
+#define PT_FPR2 0xF0
+#define PT_FPR3 0xF8
+#define PT_FPR4 0x100
+#define PT_FPR5 0x108
+#define PT_FPR6 0x110
+#define PT_FPR7 0x118
+#define PT_FPR8 0x120
+#define PT_FPR9 0x128
+#define PT_FPR10 0x130
+#define PT_FPR11 0x138
+#define PT_FPR12 0x140
+#define PT_FPR13 0x148
+#define PT_FPR14 0x150
+#define PT_FPR15 0x158
+#define PT_CR_9 0x160
+#define PT_CR_10 0x168
+#define PT_CR_11 0x170
+#define PT_IEEE_IP 0x1A8
+#define PT_LASTOFF PT_IEEE_IP
+#define PT_ENDREGS 0x1B0-1
+
+#define GPR_SIZE 8
+#define CR_SIZE 8
+
+#define STACK_FRAME_OVERHEAD 160 /* size of minimum stack frame */
+
+#endif /* __s390x__ */
+
+#define NUM_GPRS 16
+#define NUM_FPRS 16
+#define NUM_CRS 16
+#define NUM_ACRS 16
+
+#define NUM_CR_WORDS 3
+
+#define FPR_SIZE 8
+#define FPC_SIZE 4
+#define FPC_PAD_SIZE 4 /* gcc insists on aligning the fpregs */
+#define ACR_SIZE 4
+
+
+#define PTRACE_OLDSETOPTIONS 21
+
+#ifndef __ASSEMBLY__
+#include <linux/stddef.h>
+#include <linux/types.h>
+
+typedef union {
+ float f;
+ double d;
+ __u64 ui;
+ struct
+ {
+ __u32 hi;
+ __u32 lo;
+ } fp;
+} freg_t;
+
+typedef struct {
+ __u32 fpc;
+ __u32 pad;
+ freg_t fprs[NUM_FPRS];
+} s390_fp_regs;
+
+#define FPC_EXCEPTION_MASK 0xF8000000
+#define FPC_FLAGS_MASK 0x00F80000
+#define FPC_DXC_MASK 0x0000FF00
+#define FPC_RM_MASK 0x00000003
+
+/* this typedef defines how a Program Status Word looks like */
+typedef struct {
+ unsigned long mask;
+ unsigned long addr;
+} __attribute__ ((aligned(8))) psw_t;
+
+#ifndef __s390x__
+
+#define PSW_MASK_PER 0x40000000UL
+#define PSW_MASK_DAT 0x04000000UL
+#define PSW_MASK_IO 0x02000000UL
+#define PSW_MASK_EXT 0x01000000UL
+#define PSW_MASK_KEY 0x00F00000UL
+#define PSW_MASK_BASE 0x00080000UL /* always one */
+#define PSW_MASK_MCHECK 0x00040000UL
+#define PSW_MASK_WAIT 0x00020000UL
+#define PSW_MASK_PSTATE 0x00010000UL
+#define PSW_MASK_ASC 0x0000C000UL
+#define PSW_MASK_CC 0x00003000UL
+#define PSW_MASK_PM 0x00000F00UL
+#define PSW_MASK_RI 0x00000000UL
+#define PSW_MASK_EA 0x00000000UL
+#define PSW_MASK_BA 0x00000000UL
+
+#define PSW_MASK_USER 0x0000FF00UL
+
+#define PSW_ADDR_AMODE 0x80000000UL
+#define PSW_ADDR_INSN 0x7FFFFFFFUL
+
+#define PSW_DEFAULT_KEY (((unsigned long) PAGE_DEFAULT_ACC) << 20)
+
+#define PSW_ASC_PRIMARY 0x00000000UL
+#define PSW_ASC_ACCREG 0x00004000UL
+#define PSW_ASC_SECONDARY 0x00008000UL
+#define PSW_ASC_HOME 0x0000C000UL
+
+#else /* __s390x__ */
+
+#define PSW_MASK_PER 0x4000000000000000UL
+#define PSW_MASK_DAT 0x0400000000000000UL
+#define PSW_MASK_IO 0x0200000000000000UL
+#define PSW_MASK_EXT 0x0100000000000000UL
+#define PSW_MASK_BASE 0x0000000000000000UL
+#define PSW_MASK_KEY 0x00F0000000000000UL
+#define PSW_MASK_MCHECK 0x0004000000000000UL
+#define PSW_MASK_WAIT 0x0002000000000000UL
+#define PSW_MASK_PSTATE 0x0001000000000000UL
+#define PSW_MASK_ASC 0x0000C00000000000UL
+#define PSW_MASK_CC 0x0000300000000000UL
+#define PSW_MASK_PM 0x00000F0000000000UL
+#define PSW_MASK_RI 0x0000008000000000UL
+#define PSW_MASK_EA 0x0000000100000000UL
+#define PSW_MASK_BA 0x0000000080000000UL
+
+#define PSW_MASK_USER 0x0000FF0180000000UL
+
+#define PSW_ADDR_AMODE 0x0000000000000000UL
+#define PSW_ADDR_INSN 0xFFFFFFFFFFFFFFFFUL
+
+#define PSW_DEFAULT_KEY (((unsigned long) PAGE_DEFAULT_ACC) << 52)
+
+#define PSW_ASC_PRIMARY 0x0000000000000000UL
+#define PSW_ASC_ACCREG 0x0000400000000000UL
+#define PSW_ASC_SECONDARY 0x0000800000000000UL
+#define PSW_ASC_HOME 0x0000C00000000000UL
+
+#endif /* __s390x__ */
+
+
+/*
+ * The s390_regs structure is used to define the elf_gregset_t.
+ */
+typedef struct {
+ psw_t psw;
+ unsigned long gprs[NUM_GPRS];
+ unsigned int acrs[NUM_ACRS];
+ unsigned long orig_gpr2;
+} s390_regs;
+
+/*
+ * The user_pt_regs structure exports the beginning of
+ * the in-kernel pt_regs structure to user space.
+ */
+typedef struct {
+ unsigned long args[1];
+ psw_t psw;
+ unsigned long gprs[NUM_GPRS];
+} user_pt_regs;
+
+/*
+ * Now for the user space program event recording (trace) definitions.
+ * The following structures are used only for the ptrace interface, don't
+ * touch or even look at it if you don't want to modify the user-space
+ * ptrace interface. In particular stay away from it for in-kernel PER.
+ */
+typedef struct {
+ unsigned long cr[NUM_CR_WORDS];
+} per_cr_words;
+
+#define PER_EM_MASK 0xE8000000UL
+
+typedef struct {
+#ifdef __s390x__
+ unsigned : 32;
+#endif /* __s390x__ */
+ unsigned em_branching : 1;
+ unsigned em_instruction_fetch : 1;
+ /*
+ * Switching on storage alteration automatically fixes
+ * the storage alteration event bit in the users std.
+ */
+ unsigned em_storage_alteration : 1;
+ unsigned em_gpr_alt_unused : 1;
+ unsigned em_store_real_address : 1;
+ unsigned : 3;
+ unsigned branch_addr_ctl : 1;
+ unsigned : 1;
+ unsigned storage_alt_space_ctl : 1;
+ unsigned : 21;
+ unsigned long starting_addr;
+ unsigned long ending_addr;
+} per_cr_bits;
+
+typedef struct {
+ unsigned short perc_atmid;
+ unsigned long address;
+ unsigned char access_id;
+} per_lowcore_words;
+
+typedef struct {
+ unsigned perc_branching : 1;
+ unsigned perc_instruction_fetch : 1;
+ unsigned perc_storage_alteration : 1;
+ unsigned perc_gpr_alt_unused : 1;
+ unsigned perc_store_real_address : 1;
+ unsigned : 3;
+ unsigned atmid_psw_bit_31 : 1;
+ unsigned atmid_validity_bit : 1;
+ unsigned atmid_psw_bit_32 : 1;
+ unsigned atmid_psw_bit_5 : 1;
+ unsigned atmid_psw_bit_16 : 1;
+ unsigned atmid_psw_bit_17 : 1;
+ unsigned si : 2;
+ unsigned long address;
+ unsigned : 4;
+ unsigned access_id : 4;
+} per_lowcore_bits;
+
+typedef struct {
+ union {
+ per_cr_words words;
+ per_cr_bits bits;
+ } control_regs;
+ /*
+ * The single_step and instruction_fetch bits are obsolete,
+ * the kernel always sets them to zero. To enable single
+ * stepping use ptrace(PTRACE_SINGLESTEP) instead.
+ */
+ unsigned single_step : 1;
+ unsigned instruction_fetch : 1;
+ unsigned : 30;
+ /*
+ * These addresses are copied into cr10 & cr11 if single
+ * stepping is switched off
+ */
+ unsigned long starting_addr;
+ unsigned long ending_addr;
+ union {
+ per_lowcore_words words;
+ per_lowcore_bits bits;
+ } lowcore;
+} per_struct;
+
+typedef struct {
+ unsigned int len;
+ unsigned long kernel_addr;
+ unsigned long process_addr;
+} ptrace_area;
+
+/*
+ * S/390 specific non posix ptrace requests. I chose unusual values so
+ * they are unlikely to clash with future ptrace definitions.
+ */
+#define PTRACE_PEEKUSR_AREA 0x5000
+#define PTRACE_POKEUSR_AREA 0x5001
+#define PTRACE_PEEKTEXT_AREA 0x5002
+#define PTRACE_PEEKDATA_AREA 0x5003
+#define PTRACE_POKETEXT_AREA 0x5004
+#define PTRACE_POKEDATA_AREA 0x5005
+#define PTRACE_GET_LAST_BREAK 0x5006
+#define PTRACE_PEEK_SYSTEM_CALL 0x5007
+#define PTRACE_POKE_SYSTEM_CALL 0x5008
+#define PTRACE_ENABLE_TE 0x5009
+#define PTRACE_DISABLE_TE 0x5010
+#define PTRACE_TE_ABORT_RAND 0x5011
+
+/*
+ * The numbers chosen here are somewhat arbitrary but absolutely MUST
+ * not overlap with any of the number assigned in <linux/ptrace.h>.
+ */
+#define PTRACE_SINGLEBLOCK 12 /* resume execution until next branch */
+
+/*
+ * PT_PROT definition is loosely based on hppa bsd definition in
+ * gdb/hppab-nat.c
+ */
+#define PTRACE_PROT 21
+
+typedef enum {
+ ptprot_set_access_watchpoint,
+ ptprot_set_write_watchpoint,
+ ptprot_disable_watchpoint
+} ptprot_flags;
+
+typedef struct {
+ unsigned long lowaddr;
+ unsigned long hiaddr;
+ ptprot_flags prot;
+} ptprot_area;
+
+/* Sequence of bytes for breakpoint illegal instruction. */
+#define S390_BREAKPOINT {0x0,0x1}
+#define S390_BREAKPOINT_U16 ((__u16)0x0001)
+#define S390_SYSCALL_OPCODE ((__u16)0x0a00)
+#define S390_SYSCALL_SIZE 2
+
+/*
+ * The user_regs_struct defines the way the user registers are
+ * store on the stack for signal handling.
+ */
+struct user_regs_struct {
+ psw_t psw;
+ unsigned long gprs[NUM_GPRS];
+ unsigned int acrs[NUM_ACRS];
+ unsigned long orig_gpr2;
+ s390_fp_regs fp_regs;
+ /*
+ * These per registers are in here so that gdb can modify them
+ * itself as there is no "official" ptrace interface for hardware
+ * watchpoints. This is the way intel does it.
+ */
+ per_struct per_info;
+ unsigned long ieee_instruction_pointer; /* obsolete, always 0 */
+};
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _UAPI_S390_PTRACE_H */
diff --git a/arch/s390/include/uapi/asm/qeth.h b/arch/s390/include/uapi/asm/qeth.h
new file mode 100644
index 000000000..fac9995df
--- /dev/null
+++ b/arch/s390/include/uapi/asm/qeth.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * ioctl definitions for qeth driver
+ *
+ * Copyright IBM Corp. 2004
+ *
+ * Author(s): Thomas Spatzier <tspat@de.ibm.com>
+ *
+ */
+#ifndef __ASM_S390_QETH_IOCTL_H__
+#define __ASM_S390_QETH_IOCTL_H__
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define SIOC_QETH_ARP_SET_NO_ENTRIES (SIOCDEVPRIVATE)
+#define SIOC_QETH_ARP_QUERY_INFO (SIOCDEVPRIVATE + 1)
+#define SIOC_QETH_ARP_ADD_ENTRY (SIOCDEVPRIVATE + 2)
+#define SIOC_QETH_ARP_REMOVE_ENTRY (SIOCDEVPRIVATE + 3)
+#define SIOC_QETH_ARP_FLUSH_CACHE (SIOCDEVPRIVATE + 4)
+#define SIOC_QETH_ADP_SET_SNMP_CONTROL (SIOCDEVPRIVATE + 5)
+#define SIOC_QETH_GET_CARD_TYPE (SIOCDEVPRIVATE + 6)
+#define SIOC_QETH_QUERY_OAT (SIOCDEVPRIVATE + 7)
+
+struct qeth_arp_cache_entry {
+ __u8 macaddr[6];
+ __u8 reserved1[2];
+ __u8 ipaddr[16]; /* for both IPv4 and IPv6 */
+ __u8 reserved2[32];
+} __attribute__ ((packed));
+
+enum qeth_arp_ipaddrtype {
+ QETHARP_IP_ADDR_V4 = 1,
+ QETHARP_IP_ADDR_V6 = 2,
+};
+struct qeth_arp_entrytype {
+ __u8 mac;
+ __u8 ip;
+} __attribute__((packed));
+
+#define QETH_QARP_MEDIASPECIFIC_BYTES 32
+#define QETH_QARP_MACADDRTYPE_BYTES 1
+struct qeth_arp_qi_entry7 {
+ __u8 media_specific[QETH_QARP_MEDIASPECIFIC_BYTES];
+ struct qeth_arp_entrytype type;
+ __u8 macaddr[6];
+ __u8 ipaddr[4];
+} __attribute__((packed));
+
+struct qeth_arp_qi_entry7_ipv6 {
+ __u8 media_specific[QETH_QARP_MEDIASPECIFIC_BYTES];
+ struct qeth_arp_entrytype type;
+ __u8 macaddr[6];
+ __u8 ipaddr[16];
+} __attribute__((packed));
+
+struct qeth_arp_qi_entry7_short {
+ struct qeth_arp_entrytype type;
+ __u8 macaddr[6];
+ __u8 ipaddr[4];
+} __attribute__((packed));
+
+struct qeth_arp_qi_entry7_short_ipv6 {
+ struct qeth_arp_entrytype type;
+ __u8 macaddr[6];
+ __u8 ipaddr[16];
+} __attribute__((packed));
+
+struct qeth_arp_qi_entry5 {
+ __u8 media_specific[QETH_QARP_MEDIASPECIFIC_BYTES];
+ struct qeth_arp_entrytype type;
+ __u8 ipaddr[4];
+} __attribute__((packed));
+
+struct qeth_arp_qi_entry5_ipv6 {
+ __u8 media_specific[QETH_QARP_MEDIASPECIFIC_BYTES];
+ struct qeth_arp_entrytype type;
+ __u8 ipaddr[16];
+} __attribute__((packed));
+
+struct qeth_arp_qi_entry5_short {
+ struct qeth_arp_entrytype type;
+ __u8 ipaddr[4];
+} __attribute__((packed));
+
+struct qeth_arp_qi_entry5_short_ipv6 {
+ struct qeth_arp_entrytype type;
+ __u8 ipaddr[16];
+} __attribute__((packed));
+/*
+ * can be set by user if no "media specific information" is wanted
+ * -> saves a lot of space in user space buffer
+ */
+#define QETH_QARP_STRIP_ENTRIES 0x8000
+#define QETH_QARP_WITH_IPV6 0x4000
+#define QETH_QARP_REQUEST_MASK 0x00ff
+
+/* data sent to user space as result of query arp ioctl */
+#define QETH_QARP_USER_DATA_SIZE 20000
+#define QETH_QARP_MASK_OFFSET 4
+#define QETH_QARP_ENTRIES_OFFSET 6
+struct qeth_arp_query_user_data {
+ union {
+ __u32 data_len; /* set by user space program */
+ __u32 no_entries; /* set by kernel */
+ } u;
+ __u16 mask_bits;
+ char *entries;
+} __attribute__((packed));
+
+struct qeth_query_oat_data {
+ __u32 command;
+ __u32 buffer_len;
+ __u32 response_len;
+ __u64 ptr;
+};
+#endif /* __ASM_S390_QETH_IOCTL_H__ */
diff --git a/arch/s390/include/uapi/asm/runtime_instr.h b/arch/s390/include/uapi/asm/runtime_instr.h
new file mode 100644
index 000000000..45c9ec984
--- /dev/null
+++ b/arch/s390/include/uapi/asm/runtime_instr.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _S390_UAPI_RUNTIME_INSTR_H
+#define _S390_UAPI_RUNTIME_INSTR_H
+
+#include <linux/types.h>
+
+#define S390_RUNTIME_INSTR_START 0x1
+#define S390_RUNTIME_INSTR_STOP 0x2
+
+struct runtime_instr_cb {
+ __u64 rca;
+ __u64 roa;
+ __u64 rla;
+
+ __u32 v : 1;
+ __u32 s : 1;
+ __u32 k : 1;
+ __u32 h : 1;
+ __u32 a : 1;
+ __u32 reserved1 : 3;
+ __u32 ps : 1;
+ __u32 qs : 1;
+ __u32 pc : 1;
+ __u32 qc : 1;
+ __u32 reserved2 : 1;
+ __u32 g : 1;
+ __u32 u : 1;
+ __u32 l : 1;
+ __u32 key : 4;
+ __u32 reserved3 : 8;
+ __u32 t : 1;
+ __u32 rgs : 3;
+
+ __u32 m : 4;
+ __u32 n : 1;
+ __u32 mae : 1;
+ __u32 reserved4 : 2;
+ __u32 c : 1;
+ __u32 r : 1;
+ __u32 b : 1;
+ __u32 j : 1;
+ __u32 e : 1;
+ __u32 x : 1;
+ __u32 reserved5 : 2;
+ __u32 bpxn : 1;
+ __u32 bpxt : 1;
+ __u32 bpti : 1;
+ __u32 bpni : 1;
+ __u32 reserved6 : 2;
+
+ __u32 d : 1;
+ __u32 f : 1;
+ __u32 ic : 4;
+ __u32 dc : 4;
+
+ __u64 reserved7;
+ __u64 sf;
+ __u64 rsic;
+ __u64 reserved8;
+} __packed __aligned(8);
+
+static inline void load_runtime_instr_cb(struct runtime_instr_cb *cb)
+{
+ asm volatile(".insn rsy,0xeb0000000060,0,0,%0" /* LRIC */
+ : : "Q" (*cb));
+}
+
+static inline void store_runtime_instr_cb(struct runtime_instr_cb *cb)
+{
+ asm volatile(".insn rsy,0xeb0000000061,0,0,%0" /* STRIC */
+ : "=Q" (*cb) : : "cc");
+}
+
+#endif /* _S390_UAPI_RUNTIME_INSTR_H */
diff --git a/arch/s390/include/uapi/asm/schid.h b/arch/s390/include/uapi/asm/schid.h
new file mode 100644
index 000000000..58fca6f48
--- /dev/null
+++ b/arch/s390/include/uapi/asm/schid.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPIASM_SCHID_H
+#define _UAPIASM_SCHID_H
+
+#include <linux/types.h>
+
+struct subchannel_id {
+ __u32 cssid : 8;
+ __u32 : 4;
+ __u32 m : 1;
+ __u32 ssid : 2;
+ __u32 one : 1;
+ __u32 sch_no : 16;
+} __attribute__ ((packed, aligned(4)));
+
+
+#endif /* _UAPIASM_SCHID_H */
diff --git a/arch/s390/include/uapi/asm/sclp_ctl.h b/arch/s390/include/uapi/asm/sclp_ctl.h
new file mode 100644
index 000000000..e4e8c4dcd
--- /dev/null
+++ b/arch/s390/include/uapi/asm/sclp_ctl.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * IOCTL interface for SCLP
+ *
+ * Copyright IBM Corp. 2012
+ *
+ * Author: Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#ifndef _ASM_SCLP_CTL_H
+#define _ASM_SCLP_CTL_H
+
+#include <linux/types.h>
+
+struct sclp_ctl_sccb {
+ __u32 cmdw;
+ __u64 sccb;
+} __attribute__((packed));
+
+#define SCLP_CTL_IOCTL_MAGIC 0x10
+
+#define SCLP_CTL_SCCB \
+ _IOWR(SCLP_CTL_IOCTL_MAGIC, 0x10, struct sclp_ctl_sccb)
+
+#endif
diff --git a/arch/s390/include/uapi/asm/setup.h b/arch/s390/include/uapi/asm/setup.h
new file mode 100644
index 000000000..1f8803a31
--- /dev/null
+++ b/arch/s390/include/uapi/asm/setup.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999, 2010
+ */
+
+#ifndef _UAPI_ASM_S390_SETUP_H
+#define _UAPI_ASM_S390_SETUP_H
+
+#define COMMAND_LINE_SIZE 4096
+
+#define ARCH_COMMAND_LINE_SIZE 896
+
+#endif /* _UAPI_ASM_S390_SETUP_H */
diff --git a/arch/s390/include/uapi/asm/sie.h b/arch/s390/include/uapi/asm/sie.h
new file mode 100644
index 000000000..6ca1e68d7
--- /dev/null
+++ b/arch/s390/include/uapi/asm/sie.h
@@ -0,0 +1,252 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_ASM_S390_SIE_H
+#define _UAPI_ASM_S390_SIE_H
+
+#define diagnose_codes \
+ { 0x10, "DIAG (0x10) release pages" }, \
+ { 0x44, "DIAG (0x44) time slice end" }, \
+ { 0x9c, "DIAG (0x9c) time slice end directed" }, \
+ { 0x204, "DIAG (0x204) logical-cpu utilization" }, \
+ { 0x258, "DIAG (0x258) page-reference services" }, \
+ { 0x288, "DIAG (0x288) watchdog functions" }, \
+ { 0x308, "DIAG (0x308) ipl functions" }, \
+ { 0x500, "DIAG (0x500) KVM virtio functions" }, \
+ { 0x501, "DIAG (0x501) KVM breakpoint" }
+
+#define sigp_order_codes \
+ { 0x01, "SIGP sense" }, \
+ { 0x02, "SIGP external call" }, \
+ { 0x03, "SIGP emergency signal" }, \
+ { 0x04, "SIGP start" }, \
+ { 0x05, "SIGP stop" }, \
+ { 0x06, "SIGP restart" }, \
+ { 0x09, "SIGP stop and store status" }, \
+ { 0x0b, "SIGP initial cpu reset" }, \
+ { 0x0c, "SIGP cpu reset" }, \
+ { 0x0d, "SIGP set prefix" }, \
+ { 0x0e, "SIGP store status at address" }, \
+ { 0x12, "SIGP set architecture" }, \
+ { 0x13, "SIGP conditional emergency signal" }, \
+ { 0x15, "SIGP sense running" }, \
+ { 0x16, "SIGP set multithreading"}, \
+ { 0x17, "SIGP store additional status ait address"}
+
+#define icpt_prog_codes \
+ { 0x0001, "Prog Operation" }, \
+ { 0x0002, "Prog Privileged Operation" }, \
+ { 0x0003, "Prog Execute" }, \
+ { 0x0004, "Prog Protection" }, \
+ { 0x0005, "Prog Addressing" }, \
+ { 0x0006, "Prog Specification" }, \
+ { 0x0007, "Prog Data" }, \
+ { 0x0008, "Prog Fixedpoint overflow" }, \
+ { 0x0009, "Prog Fixedpoint divide" }, \
+ { 0x000A, "Prog Decimal overflow" }, \
+ { 0x000B, "Prog Decimal divide" }, \
+ { 0x000C, "Prog HFP exponent overflow" }, \
+ { 0x000D, "Prog HFP exponent underflow" }, \
+ { 0x000E, "Prog HFP significance" }, \
+ { 0x000F, "Prog HFP divide" }, \
+ { 0x0010, "Prog Segment translation" }, \
+ { 0x0011, "Prog Page translation" }, \
+ { 0x0012, "Prog Translation specification" }, \
+ { 0x0013, "Prog Special operation" }, \
+ { 0x0015, "Prog Operand" }, \
+ { 0x0016, "Prog Trace table" }, \
+ { 0x0017, "Prog ASNtranslation specification" }, \
+ { 0x001C, "Prog Spaceswitch event" }, \
+ { 0x001D, "Prog HFP square root" }, \
+ { 0x001F, "Prog PCtranslation specification" }, \
+ { 0x0020, "Prog AFX translation" }, \
+ { 0x0021, "Prog ASX translation" }, \
+ { 0x0022, "Prog LX translation" }, \
+ { 0x0023, "Prog EX translation" }, \
+ { 0x0024, "Prog Primary authority" }, \
+ { 0x0025, "Prog Secondary authority" }, \
+ { 0x0026, "Prog LFXtranslation exception" }, \
+ { 0x0027, "Prog LSXtranslation exception" }, \
+ { 0x0028, "Prog ALET specification" }, \
+ { 0x0029, "Prog ALEN translation" }, \
+ { 0x002A, "Prog ALE sequence" }, \
+ { 0x002B, "Prog ASTE validity" }, \
+ { 0x002C, "Prog ASTE sequence" }, \
+ { 0x002D, "Prog Extended authority" }, \
+ { 0x002E, "Prog LSTE sequence" }, \
+ { 0x002F, "Prog ASTE instance" }, \
+ { 0x0030, "Prog Stack full" }, \
+ { 0x0031, "Prog Stack empty" }, \
+ { 0x0032, "Prog Stack specification" }, \
+ { 0x0033, "Prog Stack type" }, \
+ { 0x0034, "Prog Stack operation" }, \
+ { 0x0039, "Prog Region first translation" }, \
+ { 0x003A, "Prog Region second translation" }, \
+ { 0x003B, "Prog Region third translation" }, \
+ { 0x0040, "Prog Monitor event" }, \
+ { 0x0080, "Prog PER event" }, \
+ { 0x0119, "Prog Crypto operation" }
+
+#define exit_code_ipa0(ipa0, opcode, mnemonic) \
+ { (ipa0 << 8 | opcode), #ipa0 " " mnemonic }
+#define exit_code(opcode, mnemonic) \
+ { opcode, mnemonic }
+
+#define icpt_insn_codes \
+ exit_code_ipa0(0x01, 0x01, "PR"), \
+ exit_code_ipa0(0x01, 0x04, "PTFF"), \
+ exit_code_ipa0(0x01, 0x07, "SCKPF"), \
+ exit_code_ipa0(0xAA, 0x00, "RINEXT"), \
+ exit_code_ipa0(0xAA, 0x01, "RION"), \
+ exit_code_ipa0(0xAA, 0x02, "TRIC"), \
+ exit_code_ipa0(0xAA, 0x03, "RIOFF"), \
+ exit_code_ipa0(0xAA, 0x04, "RIEMIT"), \
+ exit_code_ipa0(0xB2, 0x02, "STIDP"), \
+ exit_code_ipa0(0xB2, 0x04, "SCK"), \
+ exit_code_ipa0(0xB2, 0x05, "STCK"), \
+ exit_code_ipa0(0xB2, 0x06, "SCKC"), \
+ exit_code_ipa0(0xB2, 0x07, "STCKC"), \
+ exit_code_ipa0(0xB2, 0x08, "SPT"), \
+ exit_code_ipa0(0xB2, 0x09, "STPT"), \
+ exit_code_ipa0(0xB2, 0x0d, "PTLB"), \
+ exit_code_ipa0(0xB2, 0x10, "SPX"), \
+ exit_code_ipa0(0xB2, 0x11, "STPX"), \
+ exit_code_ipa0(0xB2, 0x12, "STAP"), \
+ exit_code_ipa0(0xB2, 0x14, "SIE"), \
+ exit_code_ipa0(0xB2, 0x16, "SETR"), \
+ exit_code_ipa0(0xB2, 0x17, "STETR"), \
+ exit_code_ipa0(0xB2, 0x18, "PC"), \
+ exit_code_ipa0(0xB2, 0x20, "SERVC"), \
+ exit_code_ipa0(0xB2, 0x21, "IPTE"), \
+ exit_code_ipa0(0xB2, 0x28, "PT"), \
+ exit_code_ipa0(0xB2, 0x29, "ISKE"), \
+ exit_code_ipa0(0xB2, 0x2a, "RRBE"), \
+ exit_code_ipa0(0xB2, 0x2b, "SSKE"), \
+ exit_code_ipa0(0xB2, 0x2c, "TB"), \
+ exit_code_ipa0(0xB2, 0x2e, "PGIN"), \
+ exit_code_ipa0(0xB2, 0x2f, "PGOUT"), \
+ exit_code_ipa0(0xB2, 0x30, "CSCH"), \
+ exit_code_ipa0(0xB2, 0x31, "HSCH"), \
+ exit_code_ipa0(0xB2, 0x32, "MSCH"), \
+ exit_code_ipa0(0xB2, 0x33, "SSCH"), \
+ exit_code_ipa0(0xB2, 0x34, "STSCH"), \
+ exit_code_ipa0(0xB2, 0x35, "TSCH"), \
+ exit_code_ipa0(0xB2, 0x36, "TPI"), \
+ exit_code_ipa0(0xB2, 0x37, "SAL"), \
+ exit_code_ipa0(0xB2, 0x38, "RSCH"), \
+ exit_code_ipa0(0xB2, 0x39, "STCRW"), \
+ exit_code_ipa0(0xB2, 0x3a, "STCPS"), \
+ exit_code_ipa0(0xB2, 0x3b, "RCHP"), \
+ exit_code_ipa0(0xB2, 0x3c, "SCHM"), \
+ exit_code_ipa0(0xB2, 0x40, "BAKR"), \
+ exit_code_ipa0(0xB2, 0x48, "PALB"), \
+ exit_code_ipa0(0xB2, 0x4c, "TAR"), \
+ exit_code_ipa0(0xB2, 0x50, "CSP"), \
+ exit_code_ipa0(0xB2, 0x54, "MVPG"), \
+ exit_code_ipa0(0xB2, 0x56, "STHYI"), \
+ exit_code_ipa0(0xB2, 0x58, "BSG"), \
+ exit_code_ipa0(0xB2, 0x5a, "BSA"), \
+ exit_code_ipa0(0xB2, 0x5f, "CHSC"), \
+ exit_code_ipa0(0xB2, 0x74, "SIGA"), \
+ exit_code_ipa0(0xB2, 0x76, "XSCH"), \
+ exit_code_ipa0(0xB2, 0x78, "STCKE"), \
+ exit_code_ipa0(0xB2, 0x7c, "STCKF"), \
+ exit_code_ipa0(0xB2, 0x7d, "STSI"), \
+ exit_code_ipa0(0xB2, 0xb0, "STFLE"), \
+ exit_code_ipa0(0xB2, 0xb1, "STFL"), \
+ exit_code_ipa0(0xB2, 0xb2, "LPSWE"), \
+ exit_code_ipa0(0xB2, 0xf8, "TEND"), \
+ exit_code_ipa0(0xB2, 0xfc, "TABORT"), \
+ exit_code_ipa0(0xB9, 0x1e, "KMAC"), \
+ exit_code_ipa0(0xB9, 0x28, "PCKMO"), \
+ exit_code_ipa0(0xB9, 0x2a, "KMF"), \
+ exit_code_ipa0(0xB9, 0x2b, "KMO"), \
+ exit_code_ipa0(0xB9, 0x2d, "KMCTR"), \
+ exit_code_ipa0(0xB9, 0x2e, "KM"), \
+ exit_code_ipa0(0xB9, 0x2f, "KMC"), \
+ exit_code_ipa0(0xB9, 0x3e, "KIMD"), \
+ exit_code_ipa0(0xB9, 0x3f, "KLMD"), \
+ exit_code_ipa0(0xB9, 0x8a, "CSPG"), \
+ exit_code_ipa0(0xB9, 0x8d, "EPSW"), \
+ exit_code_ipa0(0xB9, 0x8e, "IDTE"), \
+ exit_code_ipa0(0xB9, 0x8f, "CRDTE"), \
+ exit_code_ipa0(0xB9, 0x9c, "EQBS"), \
+ exit_code_ipa0(0xB9, 0xa2, "PTF"), \
+ exit_code_ipa0(0xB9, 0xab, "ESSA"), \
+ exit_code_ipa0(0xB9, 0xae, "RRBM"), \
+ exit_code_ipa0(0xB9, 0xaf, "PFMF"), \
+ exit_code_ipa0(0xE3, 0x03, "LRAG"), \
+ exit_code_ipa0(0xE3, 0x13, "LRAY"), \
+ exit_code_ipa0(0xE3, 0x25, "NTSTG"), \
+ exit_code_ipa0(0xE5, 0x00, "LASP"), \
+ exit_code_ipa0(0xE5, 0x01, "TPROT"), \
+ exit_code_ipa0(0xE5, 0x60, "TBEGIN"), \
+ exit_code_ipa0(0xE5, 0x61, "TBEGINC"), \
+ exit_code_ipa0(0xEB, 0x25, "STCTG"), \
+ exit_code_ipa0(0xEB, 0x2f, "LCTLG"), \
+ exit_code_ipa0(0xEB, 0x60, "LRIC"), \
+ exit_code_ipa0(0xEB, 0x61, "STRIC"), \
+ exit_code_ipa0(0xEB, 0x62, "MRIC"), \
+ exit_code_ipa0(0xEB, 0x8a, "SQBS"), \
+ exit_code_ipa0(0xC8, 0x01, "ECTG"), \
+ exit_code(0x0a, "SVC"), \
+ exit_code(0x80, "SSM"), \
+ exit_code(0x82, "LPSW"), \
+ exit_code(0x83, "DIAG"), \
+ exit_code(0xae, "SIGP"), \
+ exit_code(0xac, "STNSM"), \
+ exit_code(0xad, "STOSM"), \
+ exit_code(0xb1, "LRA"), \
+ exit_code(0xb6, "STCTL"), \
+ exit_code(0xb7, "LCTL"), \
+ exit_code(0xee, "PLO")
+
+#define sie_intercept_code \
+ { 0x00, "Host interruption" }, \
+ { 0x04, "Instruction" }, \
+ { 0x08, "Program interruption" }, \
+ { 0x0c, "Instruction and program interruption" }, \
+ { 0x10, "External request" }, \
+ { 0x14, "External interruption" }, \
+ { 0x18, "I/O request" }, \
+ { 0x1c, "Wait state" }, \
+ { 0x20, "Validity" }, \
+ { 0x28, "Stop request" }, \
+ { 0x2c, "Operation exception" }, \
+ { 0x38, "Partial-execution" }, \
+ { 0x3c, "I/O interruption" }, \
+ { 0x40, "I/O instruction" }, \
+ { 0x48, "Timing subset" }
+
+/*
+ * This is the simple interceptable instructions decoder.
+ *
+ * It will be used as userspace interface and it can be used in places
+ * that does not allow to use general decoder functions,
+ * such as trace events declarations.
+ *
+ * Some userspace tools may want to parse this code
+ * and would be confused by switch(), if() and other statements,
+ * but they can understand conditional operator.
+ */
+#define INSN_DECODE_IPA0(ipa0, insn, rshift, mask) \
+ (insn >> 56) == (ipa0) ? \
+ ((ipa0 << 8) | ((insn >> rshift) & mask)) :
+
+#define INSN_DECODE(insn) (insn >> 56)
+
+/*
+ * The macro icpt_insn_decoder() takes an intercepted instruction
+ * and returns a key, which can be used to find a mnemonic name
+ * of the instruction in the icpt_insn_codes table.
+ */
+#define icpt_insn_decoder(insn) ( \
+ INSN_DECODE_IPA0(0x01, insn, 48, 0xff) \
+ INSN_DECODE_IPA0(0xaa, insn, 48, 0x0f) \
+ INSN_DECODE_IPA0(0xb2, insn, 48, 0xff) \
+ INSN_DECODE_IPA0(0xb9, insn, 48, 0xff) \
+ INSN_DECODE_IPA0(0xe3, insn, 48, 0xff) \
+ INSN_DECODE_IPA0(0xe5, insn, 48, 0xff) \
+ INSN_DECODE_IPA0(0xeb, insn, 16, 0xff) \
+ INSN_DECODE_IPA0(0xc8, insn, 48, 0x0f) \
+ INSN_DECODE(insn))
+
+#endif /* _UAPI_ASM_S390_SIE_H */
diff --git a/arch/s390/include/uapi/asm/sigcontext.h b/arch/s390/include/uapi/asm/sigcontext.h
new file mode 100644
index 000000000..8b3503333
--- /dev/null
+++ b/arch/s390/include/uapi/asm/sigcontext.h
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999, 2000
+ */
+
+#ifndef _ASM_S390_SIGCONTEXT_H
+#define _ASM_S390_SIGCONTEXT_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+#define __NUM_GPRS 16
+#define __NUM_FPRS 16
+#define __NUM_ACRS 16
+#define __NUM_VXRS 32
+#define __NUM_VXRS_LOW 16
+#define __NUM_VXRS_HIGH 16
+
+#ifndef __s390x__
+
+/* Has to be at least _NSIG_WORDS from asm/signal.h */
+#define _SIGCONTEXT_NSIG 64
+#define _SIGCONTEXT_NSIG_BPW 32
+/* Size of stack frame allocated when calling signal handler. */
+#define __SIGNAL_FRAMESIZE 96
+
+#else /* __s390x__ */
+
+/* Has to be at least _NSIG_WORDS from asm/signal.h */
+#define _SIGCONTEXT_NSIG 64
+#define _SIGCONTEXT_NSIG_BPW 64
+/* Size of stack frame allocated when calling signal handler. */
+#define __SIGNAL_FRAMESIZE 160
+
+#endif /* __s390x__ */
+
+#define _SIGCONTEXT_NSIG_WORDS (_SIGCONTEXT_NSIG / _SIGCONTEXT_NSIG_BPW)
+#define _SIGMASK_COPY_SIZE (sizeof(unsigned long)*_SIGCONTEXT_NSIG_WORDS)
+
+typedef struct
+{
+ unsigned long mask;
+ unsigned long addr;
+} __attribute__ ((aligned(8))) _psw_t;
+
+typedef struct
+{
+ _psw_t psw;
+ unsigned long gprs[__NUM_GPRS];
+ unsigned int acrs[__NUM_ACRS];
+} _s390_regs_common;
+
+typedef struct
+{
+ unsigned int fpc;
+ unsigned int pad;
+ double fprs[__NUM_FPRS];
+} _s390_fp_regs;
+
+typedef struct
+{
+ _s390_regs_common regs;
+ _s390_fp_regs fpregs;
+} _sigregs;
+
+typedef struct
+{
+#ifndef __s390x__
+ unsigned long gprs_high[__NUM_GPRS];
+#endif
+ unsigned long long vxrs_low[__NUM_VXRS_LOW];
+ __vector128 vxrs_high[__NUM_VXRS_HIGH];
+ unsigned char __reserved[128];
+} _sigregs_ext;
+
+struct sigcontext
+{
+ unsigned long oldmask[_SIGCONTEXT_NSIG_WORDS];
+ _sigregs __user *sregs;
+};
+
+
+#endif
+
diff --git a/arch/s390/include/uapi/asm/siginfo.h b/arch/s390/include/uapi/asm/siginfo.h
new file mode 100644
index 000000000..6984820f2
--- /dev/null
+++ b/arch/s390/include/uapi/asm/siginfo.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * S390 version
+ *
+ * Derived from "include/asm-i386/siginfo.h"
+ */
+
+#ifndef _S390_SIGINFO_H
+#define _S390_SIGINFO_H
+
+#ifdef __s390x__
+#define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int))
+#endif
+
+#include <asm-generic/siginfo.h>
+
+#endif
diff --git a/arch/s390/include/uapi/asm/signal.h b/arch/s390/include/uapi/asm/signal.h
new file mode 100644
index 000000000..9a14a611e
--- /dev/null
+++ b/arch/s390/include/uapi/asm/signal.h
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * S390 version
+ *
+ * Derived from "include/asm-i386/signal.h"
+ */
+
+#ifndef _UAPI_ASMS390_SIGNAL_H
+#define _UAPI_ASMS390_SIGNAL_H
+
+#include <linux/types.h>
+#include <linux/time.h>
+
+/* Avoid too many header ordering problems. */
+struct siginfo;
+struct pt_regs;
+
+#ifndef __KERNEL__
+/* Here we must cater to libcs that poke about in kernel headers. */
+
+#define NSIG 32
+typedef unsigned long sigset_t;
+
+#endif /* __KERNEL__ */
+
+#define SIGHUP 1
+#define SIGINT 2
+#define SIGQUIT 3
+#define SIGILL 4
+#define SIGTRAP 5
+#define SIGABRT 6
+#define SIGIOT 6
+#define SIGBUS 7
+#define SIGFPE 8
+#define SIGKILL 9
+#define SIGUSR1 10
+#define SIGSEGV 11
+#define SIGUSR2 12
+#define SIGPIPE 13
+#define SIGALRM 14
+#define SIGTERM 15
+#define SIGSTKFLT 16
+#define SIGCHLD 17
+#define SIGCONT 18
+#define SIGSTOP 19
+#define SIGTSTP 20
+#define SIGTTIN 21
+#define SIGTTOU 22
+#define SIGURG 23
+#define SIGXCPU 24
+#define SIGXFSZ 25
+#define SIGVTALRM 26
+#define SIGPROF 27
+#define SIGWINCH 28
+#define SIGIO 29
+#define SIGPOLL SIGIO
+/*
+#define SIGLOST 29
+*/
+#define SIGPWR 30
+#define SIGSYS 31
+#define SIGUNUSED 31
+
+/* These should not be considered constants from userland. */
+#define SIGRTMIN 32
+#define SIGRTMAX _NSIG
+
+/*
+ * SA_FLAGS values:
+ *
+ * SA_ONSTACK indicates that a registered stack_t will be used.
+ * SA_RESTART flag to get restarting signals (which were the default long ago)
+ * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
+ * SA_RESETHAND clears the handler when the signal is delivered.
+ * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
+ * SA_NODEFER prevents the current signal from being masked in the handler.
+ *
+ * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
+ * Unix names RESETHAND and NODEFER respectively.
+ */
+#define SA_NOCLDSTOP 0x00000001
+#define SA_NOCLDWAIT 0x00000002
+#define SA_SIGINFO 0x00000004
+#define SA_ONSTACK 0x08000000
+#define SA_RESTART 0x10000000
+#define SA_NODEFER 0x40000000
+#define SA_RESETHAND 0x80000000
+
+#define SA_NOMASK SA_NODEFER
+#define SA_ONESHOT SA_RESETHAND
+
+#define SA_RESTORER 0x04000000
+
+#define MINSIGSTKSZ 2048
+#define SIGSTKSZ 8192
+
+#include <asm-generic/signal-defs.h>
+
+#ifndef __KERNEL__
+
+/*
+ * There are two system calls in regard to sigaction, sys_rt_sigaction
+ * and sys_sigaction. Internally the kernel uses the struct old_sigaction
+ * for the older sys_sigaction system call, and the kernel version of the
+ * struct sigaction for the newer sys_rt_sigaction.
+ *
+ * The uapi definition for struct sigaction has made a strange distinction
+ * between 31-bit and 64-bit in the past. For 64-bit the uapi structure
+ * looks like the kernel struct sigaction, but for 31-bit it used to
+ * look like the kernel struct old_sigaction. That practically made the
+ * structure unusable for either system call. To get around this problem
+ * the glibc always had its own definitions for the sigaction structures.
+ *
+ * The current struct sigaction uapi definition below is suitable for the
+ * sys_rt_sigaction system call only.
+ */
+struct sigaction {
+ union {
+ __sighandler_t _sa_handler;
+ void (*_sa_sigaction)(int, struct siginfo *, void *);
+ } _u;
+ unsigned long sa_flags;
+ void (*sa_restorer)(void);
+ sigset_t sa_mask;
+};
+
+#define sa_handler _u._sa_handler
+#define sa_sigaction _u._sa_sigaction
+
+#endif /* __KERNEL__ */
+
+typedef struct sigaltstack {
+ void __user *ss_sp;
+ int ss_flags;
+ size_t ss_size;
+} stack_t;
+
+
+#endif /* _UAPI_ASMS390_SIGNAL_H */
diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h
new file mode 100644
index 000000000..39d901476
--- /dev/null
+++ b/arch/s390/include/uapi/asm/socket.h
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * S390 version
+ *
+ * Derived from "include/asm-i386/socket.h"
+ */
+
+#ifndef _ASM_SOCKET_H
+#define _ASM_SOCKET_H
+
+#include <asm/sockios.h>
+
+/* For setsockopt(2) */
+#define SOL_SOCKET 1
+
+#define SO_DEBUG 1
+#define SO_REUSEADDR 2
+#define SO_TYPE 3
+#define SO_ERROR 4
+#define SO_DONTROUTE 5
+#define SO_BROADCAST 6
+#define SO_SNDBUF 7
+#define SO_RCVBUF 8
+#define SO_SNDBUFFORCE 32
+#define SO_RCVBUFFORCE 33
+#define SO_KEEPALIVE 9
+#define SO_OOBINLINE 10
+#define SO_NO_CHECK 11
+#define SO_PRIORITY 12
+#define SO_LINGER 13
+#define SO_BSDCOMPAT 14
+#define SO_REUSEPORT 15
+#define SO_PASSCRED 16
+#define SO_PEERCRED 17
+#define SO_RCVLOWAT 18
+#define SO_SNDLOWAT 19
+#define SO_RCVTIMEO 20
+#define SO_SNDTIMEO 21
+
+/* Security levels - as per NRL IPv6 - don't actually do anything */
+#define SO_SECURITY_AUTHENTICATION 22
+#define SO_SECURITY_ENCRYPTION_TRANSPORT 23
+#define SO_SECURITY_ENCRYPTION_NETWORK 24
+
+#define SO_BINDTODEVICE 25
+
+/* Socket filtering */
+#define SO_ATTACH_FILTER 26
+#define SO_DETACH_FILTER 27
+#define SO_GET_FILTER SO_ATTACH_FILTER
+
+#define SO_PEERNAME 28
+#define SO_TIMESTAMP 29
+#define SCM_TIMESTAMP SO_TIMESTAMP
+
+#define SO_ACCEPTCONN 30
+
+#define SO_PEERSEC 31
+#define SO_PASSSEC 34
+#define SO_TIMESTAMPNS 35
+#define SCM_TIMESTAMPNS SO_TIMESTAMPNS
+
+#define SO_MARK 36
+
+#define SO_TIMESTAMPING 37
+#define SCM_TIMESTAMPING SO_TIMESTAMPING
+
+#define SO_PROTOCOL 38
+#define SO_DOMAIN 39
+
+#define SO_RXQ_OVFL 40
+
+#define SO_WIFI_STATUS 41
+#define SCM_WIFI_STATUS SO_WIFI_STATUS
+#define SO_PEEK_OFF 42
+
+/* Instruct lower device to use last 4-bytes of skb data as FCS */
+#define SO_NOFCS 43
+
+#define SO_LOCK_FILTER 44
+
+#define SO_SELECT_ERR_QUEUE 45
+
+#define SO_BUSY_POLL 46
+
+#define SO_MAX_PACING_RATE 47
+
+#define SO_BPF_EXTENSIONS 48
+
+#define SO_INCOMING_CPU 49
+
+#define SO_ATTACH_BPF 50
+#define SO_DETACH_BPF SO_DETACH_FILTER
+
+#define SO_ATTACH_REUSEPORT_CBPF 51
+#define SO_ATTACH_REUSEPORT_EBPF 52
+
+#define SO_CNX_ADVICE 53
+
+#define SCM_TIMESTAMPING_OPT_STATS 54
+
+#define SO_MEMINFO 55
+
+#define SO_INCOMING_NAPI_ID 56
+
+#define SO_COOKIE 57
+
+#define SCM_TIMESTAMPING_PKTINFO 58
+
+#define SO_PEERGROUPS 59
+
+#define SO_ZEROCOPY 60
+
+#define SO_TXTIME 61
+#define SCM_TXTIME SO_TXTIME
+
+#endif /* _ASM_SOCKET_H */
diff --git a/arch/s390/include/uapi/asm/stat.h b/arch/s390/include/uapi/asm/stat.h
new file mode 100644
index 000000000..ac253d236
--- /dev/null
+++ b/arch/s390/include/uapi/asm/stat.h
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * S390 version
+ *
+ * Derived from "include/asm-i386/stat.h"
+ */
+
+#ifndef _S390_STAT_H
+#define _S390_STAT_H
+
+#ifndef __s390x__
+struct __old_kernel_stat {
+ unsigned short st_dev;
+ unsigned short st_ino;
+ unsigned short st_mode;
+ unsigned short st_nlink;
+ unsigned short st_uid;
+ unsigned short st_gid;
+ unsigned short st_rdev;
+ unsigned long st_size;
+ unsigned long st_atime;
+ unsigned long st_mtime;
+ unsigned long st_ctime;
+};
+
+struct stat {
+ unsigned short st_dev;
+ unsigned short __pad1;
+ unsigned long st_ino;
+ unsigned short st_mode;
+ unsigned short st_nlink;
+ unsigned short st_uid;
+ unsigned short st_gid;
+ unsigned short st_rdev;
+ unsigned short __pad2;
+ unsigned long st_size;
+ unsigned long st_blksize;
+ unsigned long st_blocks;
+ unsigned long st_atime;
+ unsigned long st_atime_nsec;
+ unsigned long st_mtime;
+ unsigned long st_mtime_nsec;
+ unsigned long st_ctime;
+ unsigned long st_ctime_nsec;
+ unsigned long __unused4;
+ unsigned long __unused5;
+};
+
+/* This matches struct stat64 in glibc2.1, hence the absolutely
+ * insane amounts of padding around dev_t's.
+ */
+struct stat64 {
+ unsigned long long st_dev;
+ unsigned int __pad1;
+#define STAT64_HAS_BROKEN_ST_INO 1
+ unsigned long __st_ino;
+ unsigned int st_mode;
+ unsigned int st_nlink;
+ unsigned long st_uid;
+ unsigned long st_gid;
+ unsigned long long st_rdev;
+ unsigned int __pad3;
+ long long st_size;
+ unsigned long st_blksize;
+ unsigned char __pad4[4];
+ unsigned long __pad5; /* future possible st_blocks high bits */
+ unsigned long st_blocks; /* Number 512-byte blocks allocated. */
+ unsigned long st_atime;
+ unsigned long st_atime_nsec;
+ unsigned long st_mtime;
+ unsigned long st_mtime_nsec;
+ unsigned long st_ctime;
+ unsigned long st_ctime_nsec; /* will be high 32 bits of ctime someday */
+ unsigned long long st_ino;
+};
+
+#else /* __s390x__ */
+
+struct stat {
+ unsigned long st_dev;
+ unsigned long st_ino;
+ unsigned long st_nlink;
+ unsigned int st_mode;
+ unsigned int st_uid;
+ unsigned int st_gid;
+ unsigned int __pad1;
+ unsigned long st_rdev;
+ unsigned long st_size;
+ unsigned long st_atime;
+ unsigned long st_atime_nsec;
+ unsigned long st_mtime;
+ unsigned long st_mtime_nsec;
+ unsigned long st_ctime;
+ unsigned long st_ctime_nsec;
+ unsigned long st_blksize;
+ long st_blocks;
+ unsigned long __unused[3];
+};
+
+#endif /* __s390x__ */
+
+#define STAT_HAVE_NSEC 1
+
+#endif
diff --git a/arch/s390/include/uapi/asm/statfs.h b/arch/s390/include/uapi/asm/statfs.h
new file mode 100644
index 000000000..72604f779
--- /dev/null
+++ b/arch/s390/include/uapi/asm/statfs.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * S390 version
+ *
+ * Derived from "include/asm-i386/statfs.h"
+ */
+
+#ifndef _S390_STATFS_H
+#define _S390_STATFS_H
+
+/*
+ * We can't use <asm-generic/statfs.h> because in 64-bit mode
+ * we mix ints of different sizes in our struct statfs.
+ */
+
+#ifndef __KERNEL_STRICT_NAMES
+#include <linux/types.h>
+typedef __kernel_fsid_t fsid_t;
+#endif
+
+struct statfs {
+ unsigned int f_type;
+ unsigned int f_bsize;
+ unsigned long f_blocks;
+ unsigned long f_bfree;
+ unsigned long f_bavail;
+ unsigned long f_files;
+ unsigned long f_ffree;
+ __kernel_fsid_t f_fsid;
+ unsigned int f_namelen;
+ unsigned int f_frsize;
+ unsigned int f_flags;
+ unsigned int f_spare[4];
+};
+
+struct statfs64 {
+ unsigned int f_type;
+ unsigned int f_bsize;
+ unsigned long long f_blocks;
+ unsigned long long f_bfree;
+ unsigned long long f_bavail;
+ unsigned long long f_files;
+ unsigned long long f_ffree;
+ __kernel_fsid_t f_fsid;
+ unsigned int f_namelen;
+ unsigned int f_frsize;
+ unsigned int f_flags;
+ unsigned int f_spare[4];
+};
+
+#endif
diff --git a/arch/s390/include/uapi/asm/sthyi.h b/arch/s390/include/uapi/asm/sthyi.h
new file mode 100644
index 000000000..b1b022316
--- /dev/null
+++ b/arch/s390/include/uapi/asm/sthyi.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_ASM_STHYI_H
+#define _UAPI_ASM_STHYI_H
+
+#define STHYI_FC_CP_IFL_CAP 0
+
+#endif /* _UAPI_ASM_STHYI_H */
diff --git a/arch/s390/include/uapi/asm/tape390.h b/arch/s390/include/uapi/asm/tape390.h
new file mode 100644
index 000000000..90266c696
--- /dev/null
+++ b/arch/s390/include/uapi/asm/tape390.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*************************************************************************
+ *
+ * enables user programs to display messages and control encryption
+ * on s390 tape devices
+ *
+ * Copyright IBM Corp. 2001, 2006
+ * Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ *
+ *************************************************************************/
+
+#ifndef _TAPE390_H
+#define _TAPE390_H
+
+#define TAPE390_DISPLAY _IOW('d', 1, struct display_struct)
+
+/*
+ * The TAPE390_DISPLAY ioctl calls the Load Display command
+ * which transfers 17 bytes of data from the channel to the subsystem:
+ * - 1 format control byte, and
+ * - two 8-byte messages
+ *
+ * Format control byte:
+ * 0-2: New Message Overlay
+ * 3: Alternate Messages
+ * 4: Blink Message
+ * 5: Display Low/High Message
+ * 6: Reserved
+ * 7: Automatic Load Request
+ *
+ */
+
+typedef struct display_struct {
+ char cntrl;
+ char message1[8];
+ char message2[8];
+} display_struct;
+
+/*
+ * Tape encryption support
+ */
+
+struct tape390_crypt_info {
+ char capability;
+ char status;
+ char medium_status;
+} __attribute__ ((packed));
+
+
+/* Macros for "capable" field */
+#define TAPE390_CRYPT_SUPPORTED_MASK 0x01
+#define TAPE390_CRYPT_SUPPORTED(x) \
+ ((x.capability & TAPE390_CRYPT_SUPPORTED_MASK))
+
+/* Macros for "status" field */
+#define TAPE390_CRYPT_ON_MASK 0x01
+#define TAPE390_CRYPT_ON(x) (((x.status) & TAPE390_CRYPT_ON_MASK))
+
+/* Macros for "medium status" field */
+#define TAPE390_MEDIUM_LOADED_MASK 0x01
+#define TAPE390_MEDIUM_ENCRYPTED_MASK 0x02
+#define TAPE390_MEDIUM_ENCRYPTED(x) \
+ (((x.medium_status) & TAPE390_MEDIUM_ENCRYPTED_MASK))
+#define TAPE390_MEDIUM_LOADED(x) \
+ (((x.medium_status) & TAPE390_MEDIUM_LOADED_MASK))
+
+/*
+ * The TAPE390_CRYPT_SET ioctl is used to switch on/off encryption.
+ * The "encryption_capable" and "tape_status" fields are ignored for this ioctl!
+ */
+#define TAPE390_CRYPT_SET _IOW('d', 2, struct tape390_crypt_info)
+
+/*
+ * The TAPE390_CRYPT_QUERY ioctl is used to query the encryption state.
+ */
+#define TAPE390_CRYPT_QUERY _IOR('d', 3, struct tape390_crypt_info)
+
+/* Values for "kekl1/2_type" and "kekl1/2_type_on_tape" fields */
+#define TAPE390_KEKL_TYPE_NONE 0
+#define TAPE390_KEKL_TYPE_LABEL 1
+#define TAPE390_KEKL_TYPE_HASH 2
+
+struct tape390_kekl {
+ unsigned char type;
+ unsigned char type_on_tape;
+ char label[65];
+} __attribute__ ((packed));
+
+struct tape390_kekl_pair {
+ struct tape390_kekl kekl[2];
+} __attribute__ ((packed));
+
+/*
+ * The TAPE390_KEKL_SET ioctl is used to set Key Encrypting Key labels.
+ */
+#define TAPE390_KEKL_SET _IOW('d', 4, struct tape390_kekl_pair)
+
+/*
+ * The TAPE390_KEKL_QUERY ioctl is used to query Key Encrypting Key labels.
+ */
+#define TAPE390_KEKL_QUERY _IOR('d', 5, struct tape390_kekl_pair)
+
+#endif
diff --git a/arch/s390/include/uapi/asm/termios.h b/arch/s390/include/uapi/asm/termios.h
new file mode 100644
index 000000000..54223169c
--- /dev/null
+++ b/arch/s390/include/uapi/asm/termios.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * S390 version
+ *
+ * Derived from "include/asm-i386/termios.h"
+ */
+
+#ifndef _UAPI_S390_TERMIOS_H
+#define _UAPI_S390_TERMIOS_H
+
+#include <asm/termbits.h>
+#include <asm/ioctls.h>
+
+struct winsize {
+ unsigned short ws_row;
+ unsigned short ws_col;
+ unsigned short ws_xpixel;
+ unsigned short ws_ypixel;
+};
+
+#define NCC 8
+struct termio {
+ unsigned short c_iflag; /* input mode flags */
+ unsigned short c_oflag; /* output mode flags */
+ unsigned short c_cflag; /* control mode flags */
+ unsigned short c_lflag; /* local mode flags */
+ unsigned char c_line; /* line discipline */
+ unsigned char c_cc[NCC]; /* control characters */
+};
+
+/* modem lines */
+#define TIOCM_LE 0x001
+#define TIOCM_DTR 0x002
+#define TIOCM_RTS 0x004
+#define TIOCM_ST 0x008
+#define TIOCM_SR 0x010
+#define TIOCM_CTS 0x020
+#define TIOCM_CAR 0x040
+#define TIOCM_RNG 0x080
+#define TIOCM_DSR 0x100
+#define TIOCM_CD TIOCM_CAR
+#define TIOCM_RI TIOCM_RNG
+#define TIOCM_OUT1 0x2000
+#define TIOCM_OUT2 0x4000
+#define TIOCM_LOOP 0x8000
+
+/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
+
+
+#endif /* _UAPI_S390_TERMIOS_H */
diff --git a/arch/s390/include/uapi/asm/types.h b/arch/s390/include/uapi/asm/types.h
new file mode 100644
index 000000000..da034c606
--- /dev/null
+++ b/arch/s390/include/uapi/asm/types.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * S390 version
+ *
+ * Derived from "include/asm-i386/types.h"
+ */
+
+#ifndef _UAPI_S390_TYPES_H
+#define _UAPI_S390_TYPES_H
+
+#include <asm-generic/int-ll64.h>
+
+#ifndef __ASSEMBLY__
+
+/* A address type so that arithmetic can be done on it & it can be upgraded to
+ 64 bit when necessary
+*/
+typedef unsigned long addr_t;
+typedef __signed__ long saddr_t;
+
+typedef struct {
+ __u32 u[4];
+} __vector128;
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _UAPI_S390_TYPES_H */
diff --git a/arch/s390/include/uapi/asm/ucontext.h b/arch/s390/include/uapi/asm/ucontext.h
new file mode 100644
index 000000000..c95f42e85
--- /dev/null
+++ b/arch/s390/include/uapi/asm/ucontext.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * S390 version
+ *
+ * Derived from "include/asm-i386/ucontext.h"
+ */
+
+#ifndef _ASM_S390_UCONTEXT_H
+#define _ASM_S390_UCONTEXT_H
+
+#define UC_GPRS_HIGH 1 /* uc_mcontext_ext has valid high gprs */
+#define UC_VXRS 2 /* uc_mcontext_ext has valid vector regs */
+
+/*
+ * The struct ucontext_extended describes how the registers are stored
+ * on a rt signal frame. Please note that the structure is not fixed,
+ * if new CPU registers are added to the user state the size of the
+ * struct ucontext_extended will increase.
+ */
+struct ucontext_extended {
+ unsigned long uc_flags;
+ struct ucontext *uc_link;
+ stack_t uc_stack;
+ _sigregs uc_mcontext;
+ sigset_t uc_sigmask;
+ /* Allow for uc_sigmask growth. Glibc uses a 1024-bit sigset_t. */
+ unsigned char __unused[128 - sizeof(sigset_t)];
+ _sigregs_ext uc_mcontext_ext;
+};
+
+struct ucontext {
+ unsigned long uc_flags;
+ struct ucontext *uc_link;
+ stack_t uc_stack;
+ _sigregs uc_mcontext;
+ sigset_t uc_sigmask;
+ /* Allow for uc_sigmask growth. Glibc uses a 1024-bit sigset_t. */
+ unsigned char __unused[128 - sizeof(sigset_t)];
+};
+
+#endif /* !_ASM_S390_UCONTEXT_H */
diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h
new file mode 100644
index 000000000..01b5fe8b9
--- /dev/null
+++ b/arch/s390/include/uapi/asm/unistd.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * S390 version
+ *
+ * Derived from "include/asm-i386/unistd.h"
+ */
+
+#ifndef _UAPI_ASM_S390_UNISTD_H_
+#define _UAPI_ASM_S390_UNISTD_H_
+
+#ifdef __s390x__
+#include <asm/unistd_64.h>
+#else
+#include <asm/unistd_32.h>
+#endif
+
+#endif /* _UAPI_ASM_S390_UNISTD_H_ */
diff --git a/arch/s390/include/uapi/asm/virtio-ccw.h b/arch/s390/include/uapi/asm/virtio-ccw.h
new file mode 100644
index 000000000..2b605f7e8
--- /dev/null
+++ b/arch/s390/include/uapi/asm/virtio-ccw.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/*
+ * Definitions for virtio-ccw devices.
+ *
+ * Copyright IBM Corp. 2013
+ *
+ * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ */
+#ifndef __KVM_VIRTIO_CCW_H
+#define __KVM_VIRTIO_CCW_H
+
+/* Alignment of vring buffers. */
+#define KVM_VIRTIO_CCW_RING_ALIGN 4096
+
+/* Subcode for diagnose 500 (virtio hypercall). */
+#define KVM_S390_VIRTIO_CCW_NOTIFY 3
+
+#endif
diff --git a/arch/s390/include/uapi/asm/vmcp.h b/arch/s390/include/uapi/asm/vmcp.h
new file mode 100644
index 000000000..aeaaa0300
--- /dev/null
+++ b/arch/s390/include/uapi/asm/vmcp.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright IBM Corp. 2004, 2005
+ * Interface implementation for communication with the z/VM control program
+ * Version 1.0
+ * Author(s): Christian Borntraeger <cborntra@de.ibm.com>
+ *
+ *
+ * z/VMs CP offers the possibility to issue commands via the diagnose code 8
+ * this driver implements a character device that issues these commands and
+ * returns the answer of CP.
+ *
+ * The idea of this driver is based on cpint from Neale Ferguson
+ */
+
+#ifndef _UAPI_ASM_VMCP_H
+#define _UAPI_ASM_VMCP_H
+
+#include <linux/ioctl.h>
+
+#define VMCP_GETCODE _IOR(0x10, 1, int)
+#define VMCP_SETBUF _IOW(0x10, 2, int)
+#define VMCP_GETSIZE _IOR(0x10, 3, int)
+
+#endif /* _UAPI_ASM_VMCP_H */
diff --git a/arch/s390/include/uapi/asm/vtoc.h b/arch/s390/include/uapi/asm/vtoc.h
new file mode 100644
index 000000000..50c1d7b9e
--- /dev/null
+++ b/arch/s390/include/uapi/asm/vtoc.h
@@ -0,0 +1,214 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * This file contains volume label definitions for DASD devices.
+ *
+ * Copyright IBM Corp. 2005
+ *
+ * Author(s): Volker Sameske <sameske@de.ibm.com>
+ *
+ */
+
+#ifndef _ASM_S390_VTOC_H
+#define _ASM_S390_VTOC_H
+
+#include <linux/types.h>
+
+struct vtoc_ttr
+{
+ __u16 tt;
+ __u8 r;
+} __attribute__ ((packed));
+
+struct vtoc_cchhb
+{
+ __u16 cc;
+ __u16 hh;
+ __u8 b;
+} __attribute__ ((packed));
+
+struct vtoc_cchh
+{
+ __u16 cc;
+ __u16 hh;
+} __attribute__ ((packed));
+
+struct vtoc_labeldate
+{
+ __u8 year;
+ __u16 day;
+} __attribute__ ((packed));
+
+struct vtoc_volume_label_cdl
+{
+ char volkey[4]; /* volume key = volume label */
+ char vollbl[4]; /* volume label */
+ char volid[6]; /* volume identifier */
+ __u8 security; /* security byte */
+ struct vtoc_cchhb vtoc; /* VTOC address */
+ char res1[5]; /* reserved */
+ char cisize[4]; /* CI-size for FBA,... */
+ /* ...blanks for CKD */
+ char blkperci[4]; /* no of blocks per CI (FBA), blanks for CKD */
+ char labperci[4]; /* no of labels per CI (FBA), blanks for CKD */
+ char res2[4]; /* reserved */
+ char lvtoc[14]; /* owner code for LVTOC */
+ char res3[29]; /* reserved */
+} __attribute__ ((packed));
+
+struct vtoc_volume_label_ldl {
+ char vollbl[4]; /* volume label */
+ char volid[6]; /* volume identifier */
+ char res3[69]; /* reserved */
+ char ldl_version; /* version number, valid for ldl format */
+ __u64 formatted_blocks; /* valid when ldl_version >= f2 */
+} __attribute__ ((packed));
+
+struct vtoc_extent
+{
+ __u8 typeind; /* extent type indicator */
+ __u8 seqno; /* extent sequence number */
+ struct vtoc_cchh llimit; /* starting point of this extent */
+ struct vtoc_cchh ulimit; /* ending point of this extent */
+} __attribute__ ((packed));
+
+struct vtoc_dev_const
+{
+ __u16 DS4DSCYL; /* number of logical cyls */
+ __u16 DS4DSTRK; /* number of tracks in a logical cylinder */
+ __u16 DS4DEVTK; /* device track length */
+ __u8 DS4DEVI; /* non-last keyed record overhead */
+ __u8 DS4DEVL; /* last keyed record overhead */
+ __u8 DS4DEVK; /* non-keyed record overhead differential */
+ __u8 DS4DEVFG; /* flag byte */
+ __u16 DS4DEVTL; /* device tolerance */
+ __u8 DS4DEVDT; /* number of DSCB's per track */
+ __u8 DS4DEVDB; /* number of directory blocks per track */
+} __attribute__ ((packed));
+
+struct vtoc_format1_label
+{
+ char DS1DSNAM[44]; /* data set name */
+ __u8 DS1FMTID; /* format identifier */
+ char DS1DSSN[6]; /* data set serial number */
+ __u16 DS1VOLSQ; /* volume sequence number */
+ struct vtoc_labeldate DS1CREDT; /* creation date: ydd */
+ struct vtoc_labeldate DS1EXPDT; /* expiration date */
+ __u8 DS1NOEPV; /* number of extents on volume */
+ __u8 DS1NOBDB; /* no. of bytes used in last direction blk */
+ __u8 DS1FLAG1; /* flag 1 */
+ char DS1SYSCD[13]; /* system code */
+ struct vtoc_labeldate DS1REFD; /* date last referenced */
+ __u8 DS1SMSFG; /* system managed storage indicators */
+ __u8 DS1SCXTF; /* sec. space extension flag byte */
+ __u16 DS1SCXTV; /* secondary space extension value */
+ __u8 DS1DSRG1; /* data set organisation byte 1 */
+ __u8 DS1DSRG2; /* data set organisation byte 2 */
+ __u8 DS1RECFM; /* record format */
+ __u8 DS1OPTCD; /* option code */
+ __u16 DS1BLKL; /* block length */
+ __u16 DS1LRECL; /* record length */
+ __u8 DS1KEYL; /* key length */
+ __u16 DS1RKP; /* relative key position */
+ __u8 DS1DSIND; /* data set indicators */
+ __u8 DS1SCAL1; /* secondary allocation flag byte */
+ char DS1SCAL3[3]; /* secondary allocation quantity */
+ struct vtoc_ttr DS1LSTAR; /* last used track and block on track */
+ __u16 DS1TRBAL; /* space remaining on last used track */
+ __u16 res1; /* reserved */
+ struct vtoc_extent DS1EXT1; /* first extent description */
+ struct vtoc_extent DS1EXT2; /* second extent description */
+ struct vtoc_extent DS1EXT3; /* third extent description */
+ struct vtoc_cchhb DS1PTRDS; /* possible pointer to f2 or f3 DSCB */
+} __attribute__ ((packed));
+
+struct vtoc_format4_label
+{
+ char DS4KEYCD[44]; /* key code for VTOC labels: 44 times 0x04 */
+ __u8 DS4IDFMT; /* format identifier */
+ struct vtoc_cchhb DS4HPCHR; /* highest address of a format 1 DSCB */
+ __u16 DS4DSREC; /* number of available DSCB's */
+ struct vtoc_cchh DS4HCCHH; /* CCHH of next available alternate track */
+ __u16 DS4NOATK; /* number of remaining alternate tracks */
+ __u8 DS4VTOCI; /* VTOC indicators */
+ __u8 DS4NOEXT; /* number of extents in VTOC */
+ __u8 DS4SMSFG; /* system managed storage indicators */
+ __u8 DS4DEVAC; /* number of alternate cylinders.
+ * Subtract from first two bytes of
+ * DS4DEVSZ to get number of usable
+ * cylinders. can be zero. valid
+ * only if DS4DEVAV on. */
+ struct vtoc_dev_const DS4DEVCT; /* device constants */
+ char DS4AMTIM[8]; /* VSAM time stamp */
+ char DS4AMCAT[3]; /* VSAM catalog indicator */
+ char DS4R2TIM[8]; /* VSAM volume/catalog match time stamp */
+ char res1[5]; /* reserved */
+ char DS4F6PTR[5]; /* pointer to first format 6 DSCB */
+ struct vtoc_extent DS4VTOCE; /* VTOC extent description */
+ char res2[10]; /* reserved */
+ __u8 DS4EFLVL; /* extended free-space management level */
+ struct vtoc_cchhb DS4EFPTR; /* pointer to extended free-space info */
+ char res3; /* reserved */
+ __u32 DS4DCYL; /* number of logical cyls */
+ char res4[2]; /* reserved */
+ __u8 DS4DEVF2; /* device flags */
+ char res5; /* reserved */
+} __attribute__ ((packed));
+
+struct vtoc_ds5ext
+{
+ __u16 t; /* RTA of the first track of free extent */
+ __u16 fc; /* number of whole cylinders in free ext. */
+ __u8 ft; /* number of remaining free tracks */
+} __attribute__ ((packed));
+
+struct vtoc_format5_label
+{
+ char DS5KEYID[4]; /* key identifier */
+ struct vtoc_ds5ext DS5AVEXT; /* first available (free-space) extent. */
+ struct vtoc_ds5ext DS5EXTAV[7]; /* seven available extents */
+ __u8 DS5FMTID; /* format identifier */
+ struct vtoc_ds5ext DS5MAVET[18]; /* eighteen available extents */
+ struct vtoc_cchhb DS5PTRDS; /* pointer to next format5 DSCB */
+} __attribute__ ((packed));
+
+struct vtoc_ds7ext
+{
+ __u32 a; /* starting RTA value */
+ __u32 b; /* ending RTA value + 1 */
+} __attribute__ ((packed));
+
+struct vtoc_format7_label
+{
+ char DS7KEYID[4]; /* key identifier */
+ struct vtoc_ds7ext DS7EXTNT[5]; /* space for 5 extent descriptions */
+ __u8 DS7FMTID; /* format identifier */
+ struct vtoc_ds7ext DS7ADEXT[11]; /* space for 11 extent descriptions */
+ char res1[2]; /* reserved */
+ struct vtoc_cchhb DS7PTRDS; /* pointer to next FMT7 DSCB */
+} __attribute__ ((packed));
+
+struct vtoc_cms_label {
+ __u8 label_id[4]; /* Label identifier */
+ __u8 vol_id[6]; /* Volid */
+ __u16 version_id; /* Version identifier */
+ __u32 block_size; /* Disk block size */
+ __u32 origin_ptr; /* Disk origin pointer */
+ __u32 usable_count; /* Number of usable cylinders/blocks */
+ __u32 formatted_count; /* Maximum number of formatted cylinders/
+ * blocks */
+ __u32 block_count; /* Disk size in CMS blocks */
+ __u32 used_count; /* Number of CMS blocks in use */
+ __u32 fst_size; /* File Status Table (FST) size */
+ __u32 fst_count; /* Number of FSTs per CMS block */
+ __u8 format_date[6]; /* Disk FORMAT date */
+ __u8 reserved1[2];
+ __u32 disk_offset; /* Disk offset when reserved*/
+ __u32 map_block; /* Allocation Map Block with next hole */
+ __u32 hblk_disp; /* Displacement into HBLK data of next hole */
+ __u32 user_disp; /* Displacement into user part of Allocation
+ * map */
+ __u8 reserved2[4];
+ __u8 segment_name[8]; /* Name of shared segment */
+} __attribute__ ((packed));
+
+#endif /* _ASM_S390_VTOC_H */
diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h
new file mode 100644
index 000000000..48c784f21
--- /dev/null
+++ b/arch/s390/include/uapi/asm/zcrypt.h
@@ -0,0 +1,358 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * include/asm-s390/zcrypt.h
+ *
+ * zcrypt 2.1.0 (user-visible header)
+ *
+ * Copyright IBM Corp. 2001, 2006
+ * Author(s): Robert Burroughs
+ * Eric Rossman (edrossma@us.ibm.com)
+ *
+ * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
+ */
+
+#ifndef __ASM_S390_ZCRYPT_H
+#define __ASM_S390_ZCRYPT_H
+
+#define ZCRYPT_VERSION 2
+#define ZCRYPT_RELEASE 1
+#define ZCRYPT_VARIANT 1
+
+#include <linux/ioctl.h>
+#include <linux/compiler.h>
+
+/**
+ * struct ica_rsa_modexpo
+ *
+ * Requirements:
+ * - outputdatalength is at least as large as inputdatalength.
+ * - All key parts are right justified in their fields, padded on
+ * the left with zeroes.
+ * - length(b_key) = inputdatalength
+ * - length(n_modulus) = inputdatalength
+ */
+struct ica_rsa_modexpo {
+ char __user *inputdata;
+ unsigned int inputdatalength;
+ char __user *outputdata;
+ unsigned int outputdatalength;
+ char __user *b_key;
+ char __user *n_modulus;
+};
+
+/**
+ * struct ica_rsa_modexpo_crt
+ *
+ * Requirements:
+ * - inputdatalength is even.
+ * - outputdatalength is at least as large as inputdatalength.
+ * - All key parts are right justified in their fields, padded on
+ * the left with zeroes.
+ * - length(bp_key) = inputdatalength/2 + 8
+ * - length(bq_key) = inputdatalength/2
+ * - length(np_key) = inputdatalength/2 + 8
+ * - length(nq_key) = inputdatalength/2
+ * - length(u_mult_inv) = inputdatalength/2 + 8
+ */
+struct ica_rsa_modexpo_crt {
+ char __user *inputdata;
+ unsigned int inputdatalength;
+ char __user *outputdata;
+ unsigned int outputdatalength;
+ char __user *bp_key;
+ char __user *bq_key;
+ char __user *np_prime;
+ char __user *nq_prime;
+ char __user *u_mult_inv;
+};
+
+/**
+ * CPRBX
+ * Note that all shorts and ints are big-endian.
+ * All pointer fields are 16 bytes long, and mean nothing.
+ *
+ * A request CPRB is followed by a request_parameter_block.
+ *
+ * The request (or reply) parameter block is organized thus:
+ * function code
+ * VUD block
+ * key block
+ */
+struct CPRBX {
+ unsigned short cprb_len; /* CPRB length 220 */
+ unsigned char cprb_ver_id; /* CPRB version id. 0x02 */
+ unsigned char pad_000[3]; /* Alignment pad bytes */
+ unsigned char func_id[2]; /* function id 0x5432 */
+ unsigned char cprb_flags[4]; /* Flags */
+ unsigned int req_parml; /* request parameter buffer len */
+ unsigned int req_datal; /* request data buffer */
+ unsigned int rpl_msgbl; /* reply message block length */
+ unsigned int rpld_parml; /* replied parameter block len */
+ unsigned int rpl_datal; /* reply data block len */
+ unsigned int rpld_datal; /* replied data block len */
+ unsigned int req_extbl; /* request extension block len */
+ unsigned char pad_001[4]; /* reserved */
+ unsigned int rpld_extbl; /* replied extension block len */
+ unsigned char padx000[16 - sizeof(char *)];
+ unsigned char *req_parmb; /* request parm block 'address' */
+ unsigned char padx001[16 - sizeof(char *)];
+ unsigned char *req_datab; /* request data block 'address' */
+ unsigned char padx002[16 - sizeof(char *)];
+ unsigned char *rpl_parmb; /* reply parm block 'address' */
+ unsigned char padx003[16 - sizeof(char *)];
+ unsigned char *rpl_datab; /* reply data block 'address' */
+ unsigned char padx004[16 - sizeof(char *)];
+ unsigned char *req_extb; /* request extension block 'addr'*/
+ unsigned char padx005[16 - sizeof(char *)];
+ unsigned char *rpl_extb; /* reply extension block 'address'*/
+ unsigned short ccp_rtcode; /* server return code */
+ unsigned short ccp_rscode; /* server reason code */
+ unsigned int mac_data_len; /* Mac Data Length */
+ unsigned char logon_id[8]; /* Logon Identifier */
+ unsigned char mac_value[8]; /* Mac Value */
+ unsigned char mac_content_flgs;/* Mac content flag byte */
+ unsigned char pad_002; /* Alignment */
+ unsigned short domain; /* Domain */
+ unsigned char usage_domain[4];/* Usage domain */
+ unsigned char cntrl_domain[4];/* Control domain */
+ unsigned char S390enf_mask[4];/* S/390 enforcement mask */
+ unsigned char pad_004[36]; /* reserved */
+} __attribute__((packed));
+
+/**
+ * xcRB
+ */
+struct ica_xcRB {
+ unsigned short agent_ID;
+ unsigned int user_defined;
+ unsigned short request_ID;
+ unsigned int request_control_blk_length;
+ unsigned char padding1[16 - sizeof(char *)];
+ char __user *request_control_blk_addr;
+ unsigned int request_data_length;
+ char padding2[16 - sizeof(char *)];
+ char __user *request_data_address;
+ unsigned int reply_control_blk_length;
+ char padding3[16 - sizeof(char *)];
+ char __user *reply_control_blk_addr;
+ unsigned int reply_data_length;
+ char padding4[16 - sizeof(char *)];
+ char __user *reply_data_addr;
+ unsigned short priority_window;
+ unsigned int status;
+} __attribute__((packed));
+
+/**
+ * struct ep11_cprb - EP11 connectivity programming request block
+ * @cprb_len: CPRB header length [0x0020]
+ * @cprb_ver_id: CPRB version id. [0x04]
+ * @pad_000: Alignment pad bytes
+ * @flags: Admin bit [0x80], Special bit [0x20]
+ * @func_id: Function id / subtype [0x5434] "T4"
+ * @source_id: Source id [originator id]
+ * @target_id: Target id [usage/ctrl domain id]
+ * @ret_code: Return code
+ * @reserved1: Reserved
+ * @reserved2: Reserved
+ * @payload_len: Payload length
+ */
+struct ep11_cprb {
+ uint16_t cprb_len;
+ unsigned char cprb_ver_id;
+ unsigned char pad_000[2];
+ unsigned char flags;
+ unsigned char func_id[2];
+ uint32_t source_id;
+ uint32_t target_id;
+ uint32_t ret_code;
+ uint32_t reserved1;
+ uint32_t reserved2;
+ uint32_t payload_len;
+} __attribute__((packed));
+
+/**
+ * struct ep11_target_dev - EP11 target device list
+ * @ap_id: AP device id
+ * @dom_id: Usage domain id
+ */
+struct ep11_target_dev {
+ uint16_t ap_id;
+ uint16_t dom_id;
+};
+
+/**
+ * struct ep11_urb - EP11 user request block
+ * @targets_num: Number of target adapters
+ * @targets: Addr to target adapter list
+ * @weight: Level of request priority
+ * @req_no: Request id/number
+ * @req_len: Request length
+ * @req: Addr to request block
+ * @resp_len: Response length
+ * @resp: Addr to response block
+ */
+struct ep11_urb {
+ uint16_t targets_num;
+ uint64_t targets;
+ uint64_t weight;
+ uint64_t req_no;
+ uint64_t req_len;
+ uint64_t req;
+ uint64_t resp_len;
+ uint64_t resp;
+} __attribute__((packed));
+
+/**
+ * struct zcrypt_device_status_ext
+ * @hwtype: raw hardware type
+ * @qid: 8 bit device index, 8 bit domain
+ * @functions: AP device function bit field 'abcdef'
+ * a, b, c = reserved
+ * d = CCA coprocessor
+ * e = Accelerator
+ * f = EP11 coprocessor
+ * @online online status
+ * @reserved reserved
+ */
+struct zcrypt_device_status_ext {
+ unsigned int hwtype:8;
+ unsigned int qid:16;
+ unsigned int online:1;
+ unsigned int functions:6;
+ unsigned int reserved:1;
+};
+
+#define MAX_ZDEV_CARDIDS_EXT 256
+#define MAX_ZDEV_DOMAINS_EXT 256
+
+/* Maximum number of zcrypt devices */
+#define MAX_ZDEV_ENTRIES_EXT (MAX_ZDEV_CARDIDS_EXT * MAX_ZDEV_DOMAINS_EXT)
+
+/* Device matrix of all zcrypt devices */
+struct zcrypt_device_matrix_ext {
+ struct zcrypt_device_status_ext device[MAX_ZDEV_ENTRIES_EXT];
+};
+
+#define AUTOSELECT 0xFFFFFFFF
+
+#define ZCRYPT_IOCTL_MAGIC 'z'
+
+/**
+ * Interface notes:
+ *
+ * The ioctl()s which are implemented (along with relevant details)
+ * are:
+ *
+ * ICARSAMODEXPO
+ * Perform an RSA operation using a Modulus-Exponent pair
+ * This takes an ica_rsa_modexpo struct as its arg.
+ *
+ * NOTE: please refer to the comments preceding this structure
+ * for the implementation details for the contents of the
+ * block
+ *
+ * ICARSACRT
+ * Perform an RSA operation using a Chinese-Remainder Theorem key
+ * This takes an ica_rsa_modexpo_crt struct as its arg.
+ *
+ * NOTE: please refer to the comments preceding this structure
+ * for the implementation details for the contents of the
+ * block
+ *
+ * ZSECSENDCPRB
+ * Send an arbitrary CPRB to a crypto card.
+ *
+ * ZSENDEP11CPRB
+ * Send an arbitrary EP11 CPRB to an EP11 coprocessor crypto card.
+ *
+ * ZCRYPT_DEVICE_STATUS
+ * The given struct zcrypt_device_matrix_ext is updated with
+ * status information for each currently known apqn.
+ *
+ * ZCRYPT_STATUS_MASK
+ * Return an MAX_ZDEV_CARDIDS_EXT element array of unsigned chars for the
+ * status of all devices.
+ * 0x01: PCICA
+ * 0x02: PCICC
+ * 0x03: PCIXCC_MCL2
+ * 0x04: PCIXCC_MCL3
+ * 0x05: CEX2C
+ * 0x06: CEX2A
+ * 0x07: CEX3C
+ * 0x08: CEX3A
+ * 0x0a: CEX4
+ * 0x0b: CEX5
+ * 0x0c: CEX6
+ * 0x0d: device is disabled
+ *
+ * ZCRYPT_QDEPTH_MASK
+ * Return an MAX_ZDEV_CARDIDS_EXT element array of unsigned chars for the
+ * queue depth of all devices.
+ *
+ * ZCRYPT_PERDEV_REQCNT
+ * Return an MAX_ZDEV_CARDIDS_EXT element array of unsigned integers for
+ * the number of successfully completed requests per device since the
+ * device was detected and made available.
+ *
+ */
+
+/**
+ * Supported ioctl calls
+ */
+#define ICARSAMODEXPO _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x05, 0)
+#define ICARSACRT _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x06, 0)
+#define ZSECSENDCPRB _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x81, 0)
+#define ZSENDEP11CPRB _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x04, 0)
+
+#define ZCRYPT_DEVICE_STATUS _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x5f, 0)
+#define ZCRYPT_STATUS_MASK _IOR(ZCRYPT_IOCTL_MAGIC, 0x58, char[MAX_ZDEV_CARDIDS_EXT])
+#define ZCRYPT_QDEPTH_MASK _IOR(ZCRYPT_IOCTL_MAGIC, 0x59, char[MAX_ZDEV_CARDIDS_EXT])
+#define ZCRYPT_PERDEV_REQCNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x5a, int[MAX_ZDEV_CARDIDS_EXT])
+
+/*
+ * Only deprecated defines, structs and ioctls below this line.
+ */
+
+/* Deprecated: use MAX_ZDEV_CARDIDS_EXT */
+#define MAX_ZDEV_CARDIDS 64
+/* Deprecated: use MAX_ZDEV_DOMAINS_EXT */
+#define MAX_ZDEV_DOMAINS 256
+
+/* Deprecated: use MAX_ZDEV_ENTRIES_EXT */
+#define MAX_ZDEV_ENTRIES (MAX_ZDEV_CARDIDS * MAX_ZDEV_DOMAINS)
+
+/* Deprecated: use struct zcrypt_device_status_ext */
+struct zcrypt_device_status {
+ unsigned int hwtype:8;
+ unsigned int qid:14;
+ unsigned int online:1;
+ unsigned int functions:6;
+ unsigned int reserved:3;
+};
+
+/* Deprecated: use struct zcrypt_device_matrix_ext */
+struct zcrypt_device_matrix {
+ struct zcrypt_device_status device[MAX_ZDEV_ENTRIES];
+};
+
+/* Deprecated: use ZCRYPT_DEVICE_STATUS */
+#define ZDEVICESTATUS _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x4f, 0)
+/* Deprecated: use ZCRYPT_STATUS_MASK */
+#define Z90STAT_STATUS_MASK _IOR(ZCRYPT_IOCTL_MAGIC, 0x48, char[64])
+/* Deprecated: use ZCRYPT_QDEPTH_MASK */
+#define Z90STAT_QDEPTH_MASK _IOR(ZCRYPT_IOCTL_MAGIC, 0x49, char[64])
+/* Deprecated: use ZCRYPT_PERDEV_REQCNT */
+#define Z90STAT_PERDEV_REQCNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x4a, int[64])
+
+/* Deprecated: use sysfs to query these values */
+#define Z90STAT_REQUESTQ_COUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x44, int)
+#define Z90STAT_PENDINGQ_COUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x45, int)
+#define Z90STAT_TOTALOPEN_COUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x46, int)
+#define Z90STAT_DOMAIN_INDEX _IOR(ZCRYPT_IOCTL_MAGIC, 0x47, int)
+
+/*
+ * The ioctl number ranges 0x40 - 0x42 and 0x4b - 0x4e had been used in the
+ * past, don't assign new ioctls for these.
+ */
+
+#endif /* __ASM_S390_ZCRYPT_H */
diff --git a/arch/s390/kernel/.gitignore b/arch/s390/kernel/.gitignore
new file mode 100644
index 000000000..c5f676c3c
--- /dev/null
+++ b/arch/s390/kernel/.gitignore
@@ -0,0 +1 @@
+vmlinux.lds
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
new file mode 100644
index 000000000..762fc4537
--- /dev/null
+++ b/arch/s390/kernel/Makefile
@@ -0,0 +1,88 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the linux kernel.
+#
+
+ifdef CONFIG_FUNCTION_TRACER
+
+# Do not trace tracer code
+CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
+
+# Do not trace early setup code
+CFLAGS_REMOVE_early.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_early_nobss.o = $(CC_FLAGS_FTRACE)
+
+endif
+
+GCOV_PROFILE_early.o := n
+GCOV_PROFILE_early_nobss.o := n
+
+KCOV_INSTRUMENT_early.o := n
+KCOV_INSTRUMENT_early_nobss.o := n
+
+UBSAN_SANITIZE_early.o := n
+UBSAN_SANITIZE_early_nobss.o := n
+
+KASAN_SANITIZE_early_nobss.o := n
+
+#
+# Passing null pointers is ok for smp code, since we access the lowcore here.
+#
+CFLAGS_smp.o := -Wno-nonnull
+
+#
+# Disable tailcall optimizations for stack / callchain walking functions
+# since this might generate broken code when accessing register 15 and
+# passing its content to other functions.
+#
+CFLAGS_stacktrace.o += -fno-optimize-sibling-calls
+CFLAGS_dumpstack.o += -fno-optimize-sibling-calls
+
+#
+# Pass UTS_MACHINE for user_regset definition
+#
+CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"'
+
+obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o
+obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
+obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o early_nobss.o
+obj-y += sysinfo.o lgr.o os_info.o machine_kexec.o pgm_check.o
+obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
+obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o
+obj-y += nospec-branch.o
+
+extra-y += head64.o vmlinux.lds
+
+obj-$(CONFIG_SYSFS) += nospec-sysfs.o
+CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE)
+
+obj-$(CONFIG_MODULES) += module.o
+obj-$(CONFIG_SMP) += smp.o
+obj-$(CONFIG_SCHED_TOPOLOGY) += topology.o
+obj-$(CONFIG_HIBERNATION) += suspend.o swsusp.o
+obj-$(CONFIG_AUDIT) += audit.o
+compat-obj-$(CONFIG_AUDIT) += compat_audit.o
+obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o
+obj-$(CONFIG_COMPAT) += compat_wrapper.o $(compat-obj-y)
+obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
+obj-$(CONFIG_STACKTRACE) += stacktrace.o
+obj-$(CONFIG_KPROBES) += kprobes.o
+obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o
+obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
+obj-$(CONFIG_UPROBES) += uprobes.o
+obj-$(CONFIG_JUMP_LABEL) += jump_label.o
+
+obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o kexec_image.o
+obj-$(CONFIG_KEXEC_FILE) += kexec_elf.o
+
+obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o perf_cpum_sf.o
+obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o
+
+obj-$(CONFIG_TRACEPOINTS) += trace.o
+
+# vdso
+obj-y += vdso64/
+obj-$(CONFIG_COMPAT) += vdso32/
+
+chkbss := head64.o early_nobss.o
+include $(srctree)/arch/s390/scripts/Makefile.chkbss
diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c
new file mode 100644
index 000000000..8e1f2aee8
--- /dev/null
+++ b/arch/s390/kernel/alternative.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/module.h>
+#include <asm/alternative.h>
+#include <asm/facility.h>
+#include <asm/nospec-branch.h>
+
+#define MAX_PATCH_LEN (255 - 1)
+
+static int __initdata_or_module alt_instr_disabled;
+
+static int __init disable_alternative_instructions(char *str)
+{
+ alt_instr_disabled = 1;
+ return 0;
+}
+
+early_param("noaltinstr", disable_alternative_instructions);
+
+struct brcl_insn {
+ u16 opc;
+ s32 disp;
+} __packed;
+
+static u16 __initdata_or_module nop16 = 0x0700;
+static u32 __initdata_or_module nop32 = 0x47000000;
+static struct brcl_insn __initdata_or_module nop48 = {
+ 0xc004, 0
+};
+
+static const void *nops[] __initdata_or_module = {
+ &nop16,
+ &nop32,
+ &nop48
+};
+
+static void __init_or_module add_jump_padding(void *insns, unsigned int len)
+{
+ struct brcl_insn brcl = {
+ 0xc0f4,
+ len / 2
+ };
+
+ memcpy(insns, &brcl, sizeof(brcl));
+ insns += sizeof(brcl);
+ len -= sizeof(brcl);
+
+ while (len > 0) {
+ memcpy(insns, &nop16, 2);
+ insns += 2;
+ len -= 2;
+ }
+}
+
+static void __init_or_module add_padding(void *insns, unsigned int len)
+{
+ if (len > 6)
+ add_jump_padding(insns, len);
+ else if (len >= 2)
+ memcpy(insns, nops[len / 2 - 1], len);
+}
+
+static void __init_or_module __apply_alternatives(struct alt_instr *start,
+ struct alt_instr *end)
+{
+ struct alt_instr *a;
+ u8 *instr, *replacement;
+ u8 insnbuf[MAX_PATCH_LEN];
+
+ /*
+ * The scan order should be from start to end. A later scanned
+ * alternative code can overwrite previously scanned alternative code.
+ */
+ for (a = start; a < end; a++) {
+ int insnbuf_sz = 0;
+
+ instr = (u8 *)&a->instr_offset + a->instr_offset;
+ replacement = (u8 *)&a->repl_offset + a->repl_offset;
+
+ if (!__test_facility(a->facility,
+ S390_lowcore.alt_stfle_fac_list))
+ continue;
+
+ if (unlikely(a->instrlen % 2 || a->replacementlen % 2)) {
+ WARN_ONCE(1, "cpu alternatives instructions length is "
+ "odd, skipping patching\n");
+ continue;
+ }
+
+ memcpy(insnbuf, replacement, a->replacementlen);
+ insnbuf_sz = a->replacementlen;
+
+ if (a->instrlen > a->replacementlen) {
+ add_padding(insnbuf + a->replacementlen,
+ a->instrlen - a->replacementlen);
+ insnbuf_sz += a->instrlen - a->replacementlen;
+ }
+
+ s390_kernel_write(instr, insnbuf, insnbuf_sz);
+ }
+}
+
+void __init_or_module apply_alternatives(struct alt_instr *start,
+ struct alt_instr *end)
+{
+ if (!alt_instr_disabled)
+ __apply_alternatives(start, end);
+}
+
+extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
+void __init apply_alternative_instructions(void)
+{
+ apply_alternatives(__alt_instructions, __alt_instructions_end);
+}
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
new file mode 100644
index 000000000..e9d09f6e8
--- /dev/null
+++ b/arch/s390/kernel/asm-offsets.c
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Generate definitions needed by assembly language modules.
+ * This code generates raw asm output which is post-processed to extract
+ * and format the required data.
+ */
+
+#define ASM_OFFSETS_C
+
+#include <linux/kbuild.h>
+#include <linux/kvm_host.h>
+#include <linux/sched.h>
+#include <linux/purgatory.h>
+#include <asm/idle.h>
+#include <asm/vdso.h>
+#include <asm/pgtable.h>
+#include <asm/gmap.h>
+#include <asm/nmi.h>
+
+int main(void)
+{
+ /* task struct offsets */
+ OFFSET(__TASK_stack, task_struct, stack);
+ OFFSET(__TASK_thread, task_struct, thread);
+ OFFSET(__TASK_pid, task_struct, pid);
+ BLANK();
+ /* thread struct offsets */
+ OFFSET(__THREAD_ksp, thread_struct, ksp);
+ OFFSET(__THREAD_sysc_table, thread_struct, sys_call_table);
+ OFFSET(__THREAD_last_break, thread_struct, last_break);
+ OFFSET(__THREAD_FPU_fpc, thread_struct, fpu.fpc);
+ OFFSET(__THREAD_FPU_regs, thread_struct, fpu.regs);
+ OFFSET(__THREAD_per_cause, thread_struct, per_event.cause);
+ OFFSET(__THREAD_per_address, thread_struct, per_event.address);
+ OFFSET(__THREAD_per_paid, thread_struct, per_event.paid);
+ OFFSET(__THREAD_trap_tdb, thread_struct, trap_tdb);
+ BLANK();
+ /* thread info offsets */
+ OFFSET(__TI_flags, task_struct, thread_info.flags);
+ BLANK();
+ /* pt_regs offsets */
+ OFFSET(__PT_ARGS, pt_regs, args);
+ OFFSET(__PT_PSW, pt_regs, psw);
+ OFFSET(__PT_GPRS, pt_regs, gprs);
+ OFFSET(__PT_ORIG_GPR2, pt_regs, orig_gpr2);
+ OFFSET(__PT_INT_CODE, pt_regs, int_code);
+ OFFSET(__PT_INT_PARM, pt_regs, int_parm);
+ OFFSET(__PT_INT_PARM_LONG, pt_regs, int_parm_long);
+ OFFSET(__PT_FLAGS, pt_regs, flags);
+ DEFINE(__PT_SIZE, sizeof(struct pt_regs));
+ BLANK();
+ /* stack_frame offsets */
+ OFFSET(__SF_BACKCHAIN, stack_frame, back_chain);
+ OFFSET(__SF_GPRS, stack_frame, gprs);
+ OFFSET(__SF_EMPTY, stack_frame, empty1);
+ OFFSET(__SF_SIE_CONTROL, stack_frame, empty1[0]);
+ OFFSET(__SF_SIE_SAVEAREA, stack_frame, empty1[1]);
+ OFFSET(__SF_SIE_REASON, stack_frame, empty1[2]);
+ OFFSET(__SF_SIE_FLAGS, stack_frame, empty1[3]);
+ BLANK();
+ /* timeval/timezone offsets for use by vdso */
+ OFFSET(__VDSO_UPD_COUNT, vdso_data, tb_update_count);
+ OFFSET(__VDSO_XTIME_STAMP, vdso_data, xtime_tod_stamp);
+ OFFSET(__VDSO_XTIME_SEC, vdso_data, xtime_clock_sec);
+ OFFSET(__VDSO_XTIME_NSEC, vdso_data, xtime_clock_nsec);
+ OFFSET(__VDSO_XTIME_CRS_SEC, vdso_data, xtime_coarse_sec);
+ OFFSET(__VDSO_XTIME_CRS_NSEC, vdso_data, xtime_coarse_nsec);
+ OFFSET(__VDSO_WTOM_SEC, vdso_data, wtom_clock_sec);
+ OFFSET(__VDSO_WTOM_NSEC, vdso_data, wtom_clock_nsec);
+ OFFSET(__VDSO_WTOM_CRS_SEC, vdso_data, wtom_coarse_sec);
+ OFFSET(__VDSO_WTOM_CRS_NSEC, vdso_data, wtom_coarse_nsec);
+ OFFSET(__VDSO_TIMEZONE, vdso_data, tz_minuteswest);
+ OFFSET(__VDSO_ECTG_OK, vdso_data, ectg_available);
+ OFFSET(__VDSO_TK_MULT, vdso_data, tk_mult);
+ OFFSET(__VDSO_TK_SHIFT, vdso_data, tk_shift);
+ OFFSET(__VDSO_TS_DIR, vdso_data, ts_dir);
+ OFFSET(__VDSO_TS_END, vdso_data, ts_end);
+ OFFSET(__VDSO_CLOCK_REALTIME_RES, vdso_data, hrtimer_res);
+ OFFSET(__VDSO_ECTG_BASE, vdso_per_cpu_data, ectg_timer_base);
+ OFFSET(__VDSO_ECTG_USER, vdso_per_cpu_data, ectg_user_time);
+ OFFSET(__VDSO_CPU_NR, vdso_per_cpu_data, cpu_nr);
+ OFFSET(__VDSO_NODE_ID, vdso_per_cpu_data, node_id);
+ BLANK();
+ /* constants used by the vdso */
+ DEFINE(__CLOCK_REALTIME, CLOCK_REALTIME);
+ DEFINE(__CLOCK_MONOTONIC, CLOCK_MONOTONIC);
+ DEFINE(__CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
+ DEFINE(__CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE);
+ DEFINE(__CLOCK_THREAD_CPUTIME_ID, CLOCK_THREAD_CPUTIME_ID);
+ DEFINE(__CLOCK_COARSE_RES, LOW_RES_NSEC);
+ BLANK();
+ /* idle data offsets */
+ OFFSET(__CLOCK_IDLE_ENTER, s390_idle_data, clock_idle_enter);
+ OFFSET(__CLOCK_IDLE_EXIT, s390_idle_data, clock_idle_exit);
+ OFFSET(__TIMER_IDLE_ENTER, s390_idle_data, timer_idle_enter);
+ OFFSET(__TIMER_IDLE_EXIT, s390_idle_data, timer_idle_exit);
+ BLANK();
+ /* hardware defined lowcore locations 0x000 - 0x1ff */
+ OFFSET(__LC_EXT_PARAMS, lowcore, ext_params);
+ OFFSET(__LC_EXT_CPU_ADDR, lowcore, ext_cpu_addr);
+ OFFSET(__LC_EXT_INT_CODE, lowcore, ext_int_code);
+ OFFSET(__LC_SVC_ILC, lowcore, svc_ilc);
+ OFFSET(__LC_SVC_INT_CODE, lowcore, svc_code);
+ OFFSET(__LC_PGM_ILC, lowcore, pgm_ilc);
+ OFFSET(__LC_PGM_INT_CODE, lowcore, pgm_code);
+ OFFSET(__LC_DATA_EXC_CODE, lowcore, data_exc_code);
+ OFFSET(__LC_MON_CLASS_NR, lowcore, mon_class_num);
+ OFFSET(__LC_PER_CODE, lowcore, per_code);
+ OFFSET(__LC_PER_ATMID, lowcore, per_atmid);
+ OFFSET(__LC_PER_ADDRESS, lowcore, per_address);
+ OFFSET(__LC_EXC_ACCESS_ID, lowcore, exc_access_id);
+ OFFSET(__LC_PER_ACCESS_ID, lowcore, per_access_id);
+ OFFSET(__LC_OP_ACCESS_ID, lowcore, op_access_id);
+ OFFSET(__LC_AR_MODE_ID, lowcore, ar_mode_id);
+ OFFSET(__LC_TRANS_EXC_CODE, lowcore, trans_exc_code);
+ OFFSET(__LC_MON_CODE, lowcore, monitor_code);
+ OFFSET(__LC_SUBCHANNEL_ID, lowcore, subchannel_id);
+ OFFSET(__LC_SUBCHANNEL_NR, lowcore, subchannel_nr);
+ OFFSET(__LC_IO_INT_PARM, lowcore, io_int_parm);
+ OFFSET(__LC_IO_INT_WORD, lowcore, io_int_word);
+ OFFSET(__LC_STFL_FAC_LIST, lowcore, stfl_fac_list);
+ OFFSET(__LC_STFLE_FAC_LIST, lowcore, stfle_fac_list);
+ OFFSET(__LC_MCCK_CODE, lowcore, mcck_interruption_code);
+ OFFSET(__LC_EXT_DAMAGE_CODE, lowcore, external_damage_code);
+ OFFSET(__LC_MCCK_FAIL_STOR_ADDR, lowcore, failing_storage_address);
+ OFFSET(__LC_LAST_BREAK, lowcore, breaking_event_addr);
+ OFFSET(__LC_RST_OLD_PSW, lowcore, restart_old_psw);
+ OFFSET(__LC_EXT_OLD_PSW, lowcore, external_old_psw);
+ OFFSET(__LC_SVC_OLD_PSW, lowcore, svc_old_psw);
+ OFFSET(__LC_PGM_OLD_PSW, lowcore, program_old_psw);
+ OFFSET(__LC_MCK_OLD_PSW, lowcore, mcck_old_psw);
+ OFFSET(__LC_IO_OLD_PSW, lowcore, io_old_psw);
+ OFFSET(__LC_RST_NEW_PSW, lowcore, restart_psw);
+ OFFSET(__LC_EXT_NEW_PSW, lowcore, external_new_psw);
+ OFFSET(__LC_SVC_NEW_PSW, lowcore, svc_new_psw);
+ OFFSET(__LC_PGM_NEW_PSW, lowcore, program_new_psw);
+ OFFSET(__LC_MCK_NEW_PSW, lowcore, mcck_new_psw);
+ OFFSET(__LC_IO_NEW_PSW, lowcore, io_new_psw);
+ /* software defined lowcore locations 0x200 - 0xdff*/
+ OFFSET(__LC_SAVE_AREA_SYNC, lowcore, save_area_sync);
+ OFFSET(__LC_SAVE_AREA_ASYNC, lowcore, save_area_async);
+ OFFSET(__LC_SAVE_AREA_RESTART, lowcore, save_area_restart);
+ OFFSET(__LC_CPU_FLAGS, lowcore, cpu_flags);
+ OFFSET(__LC_RETURN_PSW, lowcore, return_psw);
+ OFFSET(__LC_RETURN_MCCK_PSW, lowcore, return_mcck_psw);
+ OFFSET(__LC_SYNC_ENTER_TIMER, lowcore, sync_enter_timer);
+ OFFSET(__LC_ASYNC_ENTER_TIMER, lowcore, async_enter_timer);
+ OFFSET(__LC_MCCK_ENTER_TIMER, lowcore, mcck_enter_timer);
+ OFFSET(__LC_EXIT_TIMER, lowcore, exit_timer);
+ OFFSET(__LC_USER_TIMER, lowcore, user_timer);
+ OFFSET(__LC_SYSTEM_TIMER, lowcore, system_timer);
+ OFFSET(__LC_STEAL_TIMER, lowcore, steal_timer);
+ OFFSET(__LC_LAST_UPDATE_TIMER, lowcore, last_update_timer);
+ OFFSET(__LC_LAST_UPDATE_CLOCK, lowcore, last_update_clock);
+ OFFSET(__LC_INT_CLOCK, lowcore, int_clock);
+ OFFSET(__LC_MCCK_CLOCK, lowcore, mcck_clock);
+ OFFSET(__LC_CLOCK_COMPARATOR, lowcore, clock_comparator);
+ OFFSET(__LC_BOOT_CLOCK, lowcore, boot_clock);
+ OFFSET(__LC_CURRENT, lowcore, current_task);
+ OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack);
+ OFFSET(__LC_ASYNC_STACK, lowcore, async_stack);
+ OFFSET(__LC_PANIC_STACK, lowcore, panic_stack);
+ OFFSET(__LC_RESTART_STACK, lowcore, restart_stack);
+ OFFSET(__LC_RESTART_FN, lowcore, restart_fn);
+ OFFSET(__LC_RESTART_DATA, lowcore, restart_data);
+ OFFSET(__LC_RESTART_SOURCE, lowcore, restart_source);
+ OFFSET(__LC_USER_ASCE, lowcore, user_asce);
+ OFFSET(__LC_VDSO_ASCE, lowcore, vdso_asce);
+ OFFSET(__LC_LPP, lowcore, lpp);
+ OFFSET(__LC_CURRENT_PID, lowcore, current_pid);
+ OFFSET(__LC_PERCPU_OFFSET, lowcore, percpu_offset);
+ OFFSET(__LC_VDSO_PER_CPU, lowcore, vdso_per_cpu_data);
+ OFFSET(__LC_MACHINE_FLAGS, lowcore, machine_flags);
+ OFFSET(__LC_PREEMPT_COUNT, lowcore, preempt_count);
+ OFFSET(__LC_GMAP, lowcore, gmap);
+ OFFSET(__LC_BR_R1, lowcore, br_r1_trampoline);
+ /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */
+ OFFSET(__LC_DUMP_REIPL, lowcore, ipib);
+ /* hardware defined lowcore locations 0x1000 - 0x18ff */
+ OFFSET(__LC_MCESAD, lowcore, mcesad);
+ OFFSET(__LC_EXT_PARAMS2, lowcore, ext_params2);
+ OFFSET(__LC_FPREGS_SAVE_AREA, lowcore, floating_pt_save_area);
+ OFFSET(__LC_GPREGS_SAVE_AREA, lowcore, gpregs_save_area);
+ OFFSET(__LC_PSW_SAVE_AREA, lowcore, psw_save_area);
+ OFFSET(__LC_PREFIX_SAVE_AREA, lowcore, prefixreg_save_area);
+ OFFSET(__LC_FP_CREG_SAVE_AREA, lowcore, fpt_creg_save_area);
+ OFFSET(__LC_TOD_PROGREG_SAVE_AREA, lowcore, tod_progreg_save_area);
+ OFFSET(__LC_CPU_TIMER_SAVE_AREA, lowcore, cpu_timer_save_area);
+ OFFSET(__LC_CLOCK_COMP_SAVE_AREA, lowcore, clock_comp_save_area);
+ OFFSET(__LC_AREGS_SAVE_AREA, lowcore, access_regs_save_area);
+ OFFSET(__LC_CREGS_SAVE_AREA, lowcore, cregs_save_area);
+ OFFSET(__LC_PGM_TDB, lowcore, pgm_tdb);
+ BLANK();
+ /* extended machine check save area */
+ OFFSET(__MCESA_GS_SAVE_AREA, mcesa, guarded_storage_save_area);
+ BLANK();
+ /* gmap/sie offsets */
+ OFFSET(__GMAP_ASCE, gmap, asce);
+ OFFSET(__SIE_PROG0C, kvm_s390_sie_block, prog0c);
+ OFFSET(__SIE_PROG20, kvm_s390_sie_block, prog20);
+ /* kexec_sha_region */
+ OFFSET(__KEXEC_SHA_REGION_START, kexec_sha_region, start);
+ OFFSET(__KEXEC_SHA_REGION_LEN, kexec_sha_region, len);
+ DEFINE(__KEXEC_SHA_REGION_SIZE, sizeof(struct kexec_sha_region));
+ return 0;
+}
diff --git a/arch/s390/kernel/audit.c b/arch/s390/kernel/audit.c
new file mode 100644
index 000000000..d395c6c99
--- /dev/null
+++ b/arch/s390/kernel/audit.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/audit.h>
+#include <asm/unistd.h>
+#include "audit.h"
+
+static unsigned dir_class[] = {
+#include <asm-generic/audit_dir_write.h>
+~0U
+};
+
+static unsigned read_class[] = {
+#include <asm-generic/audit_read.h>
+~0U
+};
+
+static unsigned write_class[] = {
+#include <asm-generic/audit_write.h>
+~0U
+};
+
+static unsigned chattr_class[] = {
+#include <asm-generic/audit_change_attr.h>
+~0U
+};
+
+static unsigned signal_class[] = {
+#include <asm-generic/audit_signal.h>
+~0U
+};
+
+int audit_classify_arch(int arch)
+{
+#ifdef CONFIG_COMPAT
+ if (arch == AUDIT_ARCH_S390)
+ return 1;
+#endif
+ return 0;
+}
+
+int audit_classify_syscall(int abi, unsigned syscall)
+{
+#ifdef CONFIG_COMPAT
+ if (abi == AUDIT_ARCH_S390)
+ return s390_classify_syscall(syscall);
+#endif
+ switch(syscall) {
+ case __NR_open:
+ return 2;
+ case __NR_openat:
+ return 3;
+ case __NR_socketcall:
+ return 4;
+ case __NR_execve:
+ return 5;
+ default:
+ return 0;
+ }
+}
+
+static int __init audit_classes_init(void)
+{
+#ifdef CONFIG_COMPAT
+ audit_register_class(AUDIT_CLASS_WRITE_32, s390_write_class);
+ audit_register_class(AUDIT_CLASS_READ_32, s390_read_class);
+ audit_register_class(AUDIT_CLASS_DIR_WRITE_32, s390_dir_class);
+ audit_register_class(AUDIT_CLASS_CHATTR_32, s390_chattr_class);
+ audit_register_class(AUDIT_CLASS_SIGNAL_32, s390_signal_class);
+#endif
+ audit_register_class(AUDIT_CLASS_WRITE, write_class);
+ audit_register_class(AUDIT_CLASS_READ, read_class);
+ audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class);
+ audit_register_class(AUDIT_CLASS_CHATTR, chattr_class);
+ audit_register_class(AUDIT_CLASS_SIGNAL, signal_class);
+ return 0;
+}
+
+__initcall(audit_classes_init);
diff --git a/arch/s390/kernel/audit.h b/arch/s390/kernel/audit.h
new file mode 100644
index 000000000..4d4b59641
--- /dev/null
+++ b/arch/s390/kernel/audit.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ARCH_S390_KERNEL_AUDIT_H
+#define __ARCH_S390_KERNEL_AUDIT_H
+
+#include <linux/types.h>
+
+#ifdef CONFIG_COMPAT
+extern int s390_classify_syscall(unsigned);
+extern __u32 s390_dir_class[];
+extern __u32 s390_write_class[];
+extern __u32 s390_read_class[];
+extern __u32 s390_chattr_class[];
+extern __u32 s390_signal_class[];
+#endif /* CONFIG_COMPAT */
+
+#endif /* __ARCH_S390_KERNEL_AUDIT_H */
diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S
new file mode 100644
index 000000000..b65874b0b
--- /dev/null
+++ b/arch/s390/kernel/base.S
@@ -0,0 +1,149 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/s390/kernel/base.S
+ *
+ * Copyright IBM Corp. 2006, 2007
+ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ * Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/nospec-insn.h>
+#include <asm/ptrace.h>
+#include <asm/sigp.h>
+
+ GEN_BR_THUNK %r9
+ GEN_BR_THUNK %r14
+
+ENTRY(s390_base_mcck_handler)
+ basr %r13,0
+0: lg %r15,__LC_PANIC_STACK # load panic stack
+ aghi %r15,-STACK_FRAME_OVERHEAD
+ larl %r1,s390_base_mcck_handler_fn
+ lg %r9,0(%r1)
+ ltgr %r9,%r9
+ jz 1f
+ BASR_EX %r14,%r9
+1: la %r1,4095
+ lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)
+ lpswe __LC_MCK_OLD_PSW
+
+ .section .bss
+ .align 8
+ .globl s390_base_mcck_handler_fn
+s390_base_mcck_handler_fn:
+ .quad 0
+ .previous
+
+ENTRY(s390_base_ext_handler)
+ stmg %r0,%r15,__LC_SAVE_AREA_ASYNC
+ basr %r13,0
+0: aghi %r15,-STACK_FRAME_OVERHEAD
+ larl %r1,s390_base_ext_handler_fn
+ lg %r9,0(%r1)
+ ltgr %r9,%r9
+ jz 1f
+ BASR_EX %r14,%r9
+1: lmg %r0,%r15,__LC_SAVE_AREA_ASYNC
+ ni __LC_EXT_OLD_PSW+1,0xfd # clear wait state bit
+ lpswe __LC_EXT_OLD_PSW
+
+ .section .bss
+ .align 8
+ .globl s390_base_ext_handler_fn
+s390_base_ext_handler_fn:
+ .quad 0
+ .previous
+
+ENTRY(s390_base_pgm_handler)
+ stmg %r0,%r15,__LC_SAVE_AREA_SYNC
+ basr %r13,0
+0: aghi %r15,-STACK_FRAME_OVERHEAD
+ larl %r1,s390_base_pgm_handler_fn
+ lg %r9,0(%r1)
+ ltgr %r9,%r9
+ jz 1f
+ BASR_EX %r14,%r9
+ lmg %r0,%r15,__LC_SAVE_AREA_SYNC
+ lpswe __LC_PGM_OLD_PSW
+1: lpswe disabled_wait_psw-0b(%r13)
+
+ .align 8
+disabled_wait_psw:
+ .quad 0x0002000180000000,0x0000000000000000 + s390_base_pgm_handler
+
+ .section .bss
+ .align 8
+ .globl s390_base_pgm_handler_fn
+s390_base_pgm_handler_fn:
+ .quad 0
+ .previous
+
+#
+# Calls diag 308 subcode 1 and continues execution
+#
+ENTRY(diag308_reset)
+ larl %r4,.Lctlregs # Save control registers
+ stctg %c0,%c15,0(%r4)
+ lg %r2,0(%r4) # Disable lowcore protection
+ nilh %r2,0xefff
+ larl %r4,.Lctlreg0
+ stg %r2,0(%r4)
+ lctlg %c0,%c0,0(%r4)
+ larl %r4,.Lfpctl # Floating point control register
+ stfpc 0(%r4)
+ larl %r4,.Lprefix # Save prefix register
+ stpx 0(%r4)
+ larl %r4,.Lprefix_zero # Set prefix register to 0
+ spx 0(%r4)
+ larl %r4,.Lcontinue_psw # Save PSW flags
+ epsw %r2,%r3
+ stm %r2,%r3,0(%r4)
+ larl %r4,.Lrestart_psw # Setup restart PSW at absolute 0
+ lghi %r3,0
+ lg %r4,0(%r4) # Save PSW
+ sturg %r4,%r3 # Use sturg, because of large pages
+ lghi %r1,1
+ lghi %r0,0
+ diag %r0,%r1,0x308
+.Lrestart_part2:
+ lhi %r0,0 # Load r0 with zero
+ lhi %r1,2 # Use mode 2 = ESAME (dump)
+ sigp %r1,%r0,SIGP_SET_ARCHITECTURE # Switch to ESAME mode
+ sam64 # Switch to 64 bit addressing mode
+ larl %r4,.Lctlregs # Restore control registers
+ lctlg %c0,%c15,0(%r4)
+ larl %r4,.Lfpctl # Restore floating point ctl register
+ lfpc 0(%r4)
+ larl %r4,.Lprefix # Restore prefix register
+ spx 0(%r4)
+ larl %r4,.Lcontinue_psw # Restore PSW flags
+ lpswe 0(%r4)
+.Lcontinue:
+ BR_EX %r14
+.align 16
+.Lrestart_psw:
+ .long 0x00080000,0x80000000 + .Lrestart_part2
+
+ .section .data..nosave,"aw",@progbits
+.align 8
+.Lcontinue_psw:
+ .quad 0,.Lcontinue
+ .previous
+
+ .section .bss
+.align 8
+.Lctlreg0:
+ .quad 0
+.Lctlregs:
+ .rept 16
+ .quad 0
+ .endr
+.Lfpctl:
+ .long 0
+.Lprefix:
+ .long 0
+.Lprefix_zero:
+ .long 0
+ .previous
diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c
new file mode 100644
index 000000000..d66825e53
--- /dev/null
+++ b/arch/s390/kernel/cache.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Extract CPU cache information and expose them via sysfs.
+ *
+ * Copyright IBM Corp. 2012
+ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#include <linux/seq_file.h>
+#include <linux/cpu.h>
+#include <linux/cacheinfo.h>
+#include <asm/facility.h>
+
+enum {
+ CACHE_SCOPE_NOTEXISTS,
+ CACHE_SCOPE_PRIVATE,
+ CACHE_SCOPE_SHARED,
+ CACHE_SCOPE_RESERVED,
+};
+
+enum {
+ CTYPE_SEPARATE,
+ CTYPE_DATA,
+ CTYPE_INSTRUCTION,
+ CTYPE_UNIFIED,
+};
+
+enum {
+ EXTRACT_TOPOLOGY,
+ EXTRACT_LINE_SIZE,
+ EXTRACT_SIZE,
+ EXTRACT_ASSOCIATIVITY,
+};
+
+enum {
+ CACHE_TI_UNIFIED = 0,
+ CACHE_TI_DATA = 0,
+ CACHE_TI_INSTRUCTION,
+};
+
+struct cache_info {
+ unsigned char : 4;
+ unsigned char scope : 2;
+ unsigned char type : 2;
+};
+
+#define CACHE_MAX_LEVEL 8
+union cache_topology {
+ struct cache_info ci[CACHE_MAX_LEVEL];
+ unsigned long long raw;
+};
+
+static const char * const cache_type_string[] = {
+ "",
+ "Instruction",
+ "Data",
+ "",
+ "Unified",
+};
+
+static const enum cache_type cache_type_map[] = {
+ [CTYPE_SEPARATE] = CACHE_TYPE_SEPARATE,
+ [CTYPE_DATA] = CACHE_TYPE_DATA,
+ [CTYPE_INSTRUCTION] = CACHE_TYPE_INST,
+ [CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
+};
+
+void show_cacheinfo(struct seq_file *m)
+{
+ struct cpu_cacheinfo *this_cpu_ci;
+ struct cacheinfo *cache;
+ int idx;
+
+ if (!test_facility(34))
+ return;
+ this_cpu_ci = get_cpu_cacheinfo(cpumask_any(cpu_online_mask));
+ for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
+ cache = this_cpu_ci->info_list + idx;
+ seq_printf(m, "cache%-11d: ", idx);
+ seq_printf(m, "level=%d ", cache->level);
+ seq_printf(m, "type=%s ", cache_type_string[cache->type]);
+ seq_printf(m, "scope=%s ",
+ cache->disable_sysfs ? "Shared" : "Private");
+ seq_printf(m, "size=%dK ", cache->size >> 10);
+ seq_printf(m, "line_size=%u ", cache->coherency_line_size);
+ seq_printf(m, "associativity=%d", cache->ways_of_associativity);
+ seq_puts(m, "\n");
+ }
+}
+
+static inline enum cache_type get_cache_type(struct cache_info *ci, int level)
+{
+ if (level >= CACHE_MAX_LEVEL)
+ return CACHE_TYPE_NOCACHE;
+ ci += level;
+ if (ci->scope != CACHE_SCOPE_SHARED && ci->scope != CACHE_SCOPE_PRIVATE)
+ return CACHE_TYPE_NOCACHE;
+ return cache_type_map[ci->type];
+}
+
+static inline unsigned long ecag(int ai, int li, int ti)
+{
+ return __ecag(ECAG_CACHE_ATTRIBUTE, ai << 4 | li << 1 | ti);
+}
+
+static void ci_leaf_init(struct cacheinfo *this_leaf, int private,
+ enum cache_type type, unsigned int level, int cpu)
+{
+ int ti, num_sets;
+
+ if (type == CACHE_TYPE_INST)
+ ti = CACHE_TI_INSTRUCTION;
+ else
+ ti = CACHE_TI_UNIFIED;
+ this_leaf->level = level + 1;
+ this_leaf->type = type;
+ this_leaf->coherency_line_size = ecag(EXTRACT_LINE_SIZE, level, ti);
+ this_leaf->ways_of_associativity = ecag(EXTRACT_ASSOCIATIVITY, level, ti);
+ this_leaf->size = ecag(EXTRACT_SIZE, level, ti);
+ num_sets = this_leaf->size / this_leaf->coherency_line_size;
+ num_sets /= this_leaf->ways_of_associativity;
+ this_leaf->number_of_sets = num_sets;
+ cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
+ if (!private)
+ this_leaf->disable_sysfs = true;
+}
+
+int init_cache_level(unsigned int cpu)
+{
+ struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+ unsigned int level = 0, leaves = 0;
+ union cache_topology ct;
+ enum cache_type ctype;
+
+ if (!test_facility(34))
+ return -EOPNOTSUPP;
+ if (!this_cpu_ci)
+ return -EINVAL;
+ ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0);
+ do {
+ ctype = get_cache_type(&ct.ci[0], level);
+ if (ctype == CACHE_TYPE_NOCACHE)
+ break;
+ /* Separate instruction and data caches */
+ leaves += (ctype == CACHE_TYPE_SEPARATE) ? 2 : 1;
+ } while (++level < CACHE_MAX_LEVEL);
+ this_cpu_ci->num_levels = level;
+ this_cpu_ci->num_leaves = leaves;
+ return 0;
+}
+
+int populate_cache_leaves(unsigned int cpu)
+{
+ struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+ struct cacheinfo *this_leaf = this_cpu_ci->info_list;
+ unsigned int level, idx, pvt;
+ union cache_topology ct;
+ enum cache_type ctype;
+
+ if (!test_facility(34))
+ return -EOPNOTSUPP;
+ ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0);
+ for (idx = 0, level = 0; level < this_cpu_ci->num_levels &&
+ idx < this_cpu_ci->num_leaves; idx++, level++) {
+ if (!this_leaf)
+ return -EINVAL;
+ pvt = (ct.ci[level].scope == CACHE_SCOPE_PRIVATE) ? 1 : 0;
+ ctype = get_cache_type(&ct.ci[0], level);
+ if (ctype == CACHE_TYPE_SEPARATE) {
+ ci_leaf_init(this_leaf++, pvt, CACHE_TYPE_DATA, level, cpu);
+ ci_leaf_init(this_leaf++, pvt, CACHE_TYPE_INST, level, cpu);
+ } else {
+ ci_leaf_init(this_leaf++, pvt, ctype, level, cpu);
+ }
+ }
+ return 0;
+}
diff --git a/arch/s390/kernel/compat_audit.c b/arch/s390/kernel/compat_audit.c
new file mode 100644
index 000000000..444fb1f66
--- /dev/null
+++ b/arch/s390/kernel/compat_audit.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+#undef __s390x__
+#include <asm/unistd.h>
+#include "audit.h"
+
+unsigned s390_dir_class[] = {
+#include <asm-generic/audit_dir_write.h>
+~0U
+};
+
+unsigned s390_chattr_class[] = {
+#include <asm-generic/audit_change_attr.h>
+~0U
+};
+
+unsigned s390_write_class[] = {
+#include <asm-generic/audit_write.h>
+~0U
+};
+
+unsigned s390_read_class[] = {
+#include <asm-generic/audit_read.h>
+~0U
+};
+
+unsigned s390_signal_class[] = {
+#include <asm-generic/audit_signal.h>
+~0U
+};
+
+int s390_classify_syscall(unsigned syscall)
+{
+ switch(syscall) {
+ case __NR_open:
+ return 2;
+ case __NR_openat:
+ return 3;
+ case __NR_socketcall:
+ return 4;
+ case __NR_execve:
+ return 5;
+ default:
+ return 1;
+ }
+}
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
new file mode 100644
index 000000000..8ac38d51e
--- /dev/null
+++ b/arch/s390/kernel/compat_linux.c
@@ -0,0 +1,522 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * S390 version
+ * Copyright IBM Corp. 2000
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ * Gerhard Tonn (ton@de.ibm.com)
+ * Thomas Spatzier (tspat@de.ibm.com)
+ *
+ * Conversion between 31bit and 64bit native syscalls.
+ *
+ * Heavily inspired by the 32-bit Sparc compat code which is
+ * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
+ *
+ */
+
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/file.h>
+#include <linux/signal.h>
+#include <linux/resource.h>
+#include <linux/times.h>
+#include <linux/smp.h>
+#include <linux/sem.h>
+#include <linux/msg.h>
+#include <linux/shm.h>
+#include <linux/uio.h>
+#include <linux/quota.h>
+#include <linux/poll.h>
+#include <linux/personality.h>
+#include <linux/stat.h>
+#include <linux/filter.h>
+#include <linux/highmem.h>
+#include <linux/highuid.h>
+#include <linux/mman.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/icmpv6.h>
+#include <linux/syscalls.h>
+#include <linux/sysctl.h>
+#include <linux/binfmts.h>
+#include <linux/capability.h>
+#include <linux/compat.h>
+#include <linux/vfs.h>
+#include <linux/ptrace.h>
+#include <linux/fadvise.h>
+#include <linux/ipc.h>
+#include <linux/slab.h>
+
+#include <asm/types.h>
+#include <linux/uaccess.h>
+
+#include <net/scm.h>
+#include <net/sock.h>
+
+#include "compat_linux.h"
+
+/* For this source file, we want overflow handling. */
+
+#undef high2lowuid
+#undef high2lowgid
+#undef low2highuid
+#undef low2highgid
+#undef SET_UID16
+#undef SET_GID16
+#undef NEW_TO_OLD_UID
+#undef NEW_TO_OLD_GID
+#undef SET_OLDSTAT_UID
+#undef SET_OLDSTAT_GID
+#undef SET_STAT_UID
+#undef SET_STAT_GID
+
+#define high2lowuid(uid) ((uid) > 65535) ? (u16)overflowuid : (u16)(uid)
+#define high2lowgid(gid) ((gid) > 65535) ? (u16)overflowgid : (u16)(gid)
+#define low2highuid(uid) ((uid) == (u16)-1) ? (uid_t)-1 : (uid_t)(uid)
+#define low2highgid(gid) ((gid) == (u16)-1) ? (gid_t)-1 : (gid_t)(gid)
+#define SET_UID16(var, uid) var = high2lowuid(uid)
+#define SET_GID16(var, gid) var = high2lowgid(gid)
+#define NEW_TO_OLD_UID(uid) high2lowuid(uid)
+#define NEW_TO_OLD_GID(gid) high2lowgid(gid)
+#define SET_OLDSTAT_UID(stat, uid) (stat).st_uid = high2lowuid(uid)
+#define SET_OLDSTAT_GID(stat, gid) (stat).st_gid = high2lowgid(gid)
+#define SET_STAT_UID(stat, uid) (stat).st_uid = high2lowuid(uid)
+#define SET_STAT_GID(stat, gid) (stat).st_gid = high2lowgid(gid)
+
+COMPAT_SYSCALL_DEFINE3(s390_chown16, const char __user *, filename,
+ u16, user, u16, group)
+{
+ return ksys_chown(filename, low2highuid(user), low2highgid(group));
+}
+
+COMPAT_SYSCALL_DEFINE3(s390_lchown16, const char __user *,
+ filename, u16, user, u16, group)
+{
+ return ksys_lchown(filename, low2highuid(user), low2highgid(group));
+}
+
+COMPAT_SYSCALL_DEFINE3(s390_fchown16, unsigned int, fd, u16, user, u16, group)
+{
+ return ksys_fchown(fd, low2highuid(user), low2highgid(group));
+}
+
+COMPAT_SYSCALL_DEFINE2(s390_setregid16, u16, rgid, u16, egid)
+{
+ return sys_setregid(low2highgid(rgid), low2highgid(egid));
+}
+
+COMPAT_SYSCALL_DEFINE1(s390_setgid16, u16, gid)
+{
+ return sys_setgid(low2highgid(gid));
+}
+
+COMPAT_SYSCALL_DEFINE2(s390_setreuid16, u16, ruid, u16, euid)
+{
+ return sys_setreuid(low2highuid(ruid), low2highuid(euid));
+}
+
+COMPAT_SYSCALL_DEFINE1(s390_setuid16, u16, uid)
+{
+ return sys_setuid(low2highuid(uid));
+}
+
+COMPAT_SYSCALL_DEFINE3(s390_setresuid16, u16, ruid, u16, euid, u16, suid)
+{
+ return sys_setresuid(low2highuid(ruid), low2highuid(euid),
+ low2highuid(suid));
+}
+
+COMPAT_SYSCALL_DEFINE3(s390_getresuid16, u16 __user *, ruidp,
+ u16 __user *, euidp, u16 __user *, suidp)
+{
+ const struct cred *cred = current_cred();
+ int retval;
+ u16 ruid, euid, suid;
+
+ ruid = high2lowuid(from_kuid_munged(cred->user_ns, cred->uid));
+ euid = high2lowuid(from_kuid_munged(cred->user_ns, cred->euid));
+ suid = high2lowuid(from_kuid_munged(cred->user_ns, cred->suid));
+
+ if (!(retval = put_user(ruid, ruidp)) &&
+ !(retval = put_user(euid, euidp)))
+ retval = put_user(suid, suidp);
+
+ return retval;
+}
+
+COMPAT_SYSCALL_DEFINE3(s390_setresgid16, u16, rgid, u16, egid, u16, sgid)
+{
+ return sys_setresgid(low2highgid(rgid), low2highgid(egid),
+ low2highgid(sgid));
+}
+
+COMPAT_SYSCALL_DEFINE3(s390_getresgid16, u16 __user *, rgidp,
+ u16 __user *, egidp, u16 __user *, sgidp)
+{
+ const struct cred *cred = current_cred();
+ int retval;
+ u16 rgid, egid, sgid;
+
+ rgid = high2lowgid(from_kgid_munged(cred->user_ns, cred->gid));
+ egid = high2lowgid(from_kgid_munged(cred->user_ns, cred->egid));
+ sgid = high2lowgid(from_kgid_munged(cred->user_ns, cred->sgid));
+
+ if (!(retval = put_user(rgid, rgidp)) &&
+ !(retval = put_user(egid, egidp)))
+ retval = put_user(sgid, sgidp);
+
+ return retval;
+}
+
+COMPAT_SYSCALL_DEFINE1(s390_setfsuid16, u16, uid)
+{
+ return sys_setfsuid(low2highuid(uid));
+}
+
+COMPAT_SYSCALL_DEFINE1(s390_setfsgid16, u16, gid)
+{
+ return sys_setfsgid(low2highgid(gid));
+}
+
+static int groups16_to_user(u16 __user *grouplist, struct group_info *group_info)
+{
+ struct user_namespace *user_ns = current_user_ns();
+ int i;
+ u16 group;
+ kgid_t kgid;
+
+ for (i = 0; i < group_info->ngroups; i++) {
+ kgid = group_info->gid[i];
+ group = (u16)from_kgid_munged(user_ns, kgid);
+ if (put_user(group, grouplist+i))
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+static int groups16_from_user(struct group_info *group_info, u16 __user *grouplist)
+{
+ struct user_namespace *user_ns = current_user_ns();
+ int i;
+ u16 group;
+ kgid_t kgid;
+
+ for (i = 0; i < group_info->ngroups; i++) {
+ if (get_user(group, grouplist+i))
+ return -EFAULT;
+
+ kgid = make_kgid(user_ns, (gid_t)group);
+ if (!gid_valid(kgid))
+ return -EINVAL;
+
+ group_info->gid[i] = kgid;
+ }
+
+ return 0;
+}
+
+COMPAT_SYSCALL_DEFINE2(s390_getgroups16, int, gidsetsize, u16 __user *, grouplist)
+{
+ const struct cred *cred = current_cred();
+ int i;
+
+ if (gidsetsize < 0)
+ return -EINVAL;
+
+ get_group_info(cred->group_info);
+ i = cred->group_info->ngroups;
+ if (gidsetsize) {
+ if (i > gidsetsize) {
+ i = -EINVAL;
+ goto out;
+ }
+ if (groups16_to_user(grouplist, cred->group_info)) {
+ i = -EFAULT;
+ goto out;
+ }
+ }
+out:
+ put_group_info(cred->group_info);
+ return i;
+}
+
+COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplist)
+{
+ struct group_info *group_info;
+ int retval;
+
+ if (!may_setgroups())
+ return -EPERM;
+ if ((unsigned)gidsetsize > NGROUPS_MAX)
+ return -EINVAL;
+
+ group_info = groups_alloc(gidsetsize);
+ if (!group_info)
+ return -ENOMEM;
+ retval = groups16_from_user(group_info, grouplist);
+ if (retval) {
+ put_group_info(group_info);
+ return retval;
+ }
+
+ groups_sort(group_info);
+ retval = set_current_groups(group_info);
+ put_group_info(group_info);
+
+ return retval;
+}
+
+COMPAT_SYSCALL_DEFINE0(s390_getuid16)
+{
+ return high2lowuid(from_kuid_munged(current_user_ns(), current_uid()));
+}
+
+COMPAT_SYSCALL_DEFINE0(s390_geteuid16)
+{
+ return high2lowuid(from_kuid_munged(current_user_ns(), current_euid()));
+}
+
+COMPAT_SYSCALL_DEFINE0(s390_getgid16)
+{
+ return high2lowgid(from_kgid_munged(current_user_ns(), current_gid()));
+}
+
+COMPAT_SYSCALL_DEFINE0(s390_getegid16)
+{
+ return high2lowgid(from_kgid_munged(current_user_ns(), current_egid()));
+}
+
+#ifdef CONFIG_SYSVIPC
+COMPAT_SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, compat_ulong_t, second,
+ compat_ulong_t, third, compat_uptr_t, ptr)
+{
+ if (call >> 16) /* hack for backward compatibility */
+ return -EINVAL;
+ return compat_sys_ipc(call, first, second, third, ptr, third);
+}
+#endif
+
+COMPAT_SYSCALL_DEFINE3(s390_truncate64, const char __user *, path, u32, high, u32, low)
+{
+ return ksys_truncate(path, (unsigned long)high << 32 | low);
+}
+
+COMPAT_SYSCALL_DEFINE3(s390_ftruncate64, unsigned int, fd, u32, high, u32, low)
+{
+ return ksys_ftruncate(fd, (unsigned long)high << 32 | low);
+}
+
+COMPAT_SYSCALL_DEFINE5(s390_pread64, unsigned int, fd, char __user *, ubuf,
+ compat_size_t, count, u32, high, u32, low)
+{
+ if ((compat_ssize_t) count < 0)
+ return -EINVAL;
+ return ksys_pread64(fd, ubuf, count, (unsigned long)high << 32 | low);
+}
+
+COMPAT_SYSCALL_DEFINE5(s390_pwrite64, unsigned int, fd, const char __user *, ubuf,
+ compat_size_t, count, u32, high, u32, low)
+{
+ if ((compat_ssize_t) count < 0)
+ return -EINVAL;
+ return ksys_pwrite64(fd, ubuf, count, (unsigned long)high << 32 | low);
+}
+
+COMPAT_SYSCALL_DEFINE4(s390_readahead, int, fd, u32, high, u32, low, s32, count)
+{
+ return ksys_readahead(fd, (unsigned long)high << 32 | low, count);
+}
+
+struct stat64_emu31 {
+ unsigned long long st_dev;
+ unsigned int __pad1;
+#define STAT64_HAS_BROKEN_ST_INO 1
+ u32 __st_ino;
+ unsigned int st_mode;
+ unsigned int st_nlink;
+ u32 st_uid;
+ u32 st_gid;
+ unsigned long long st_rdev;
+ unsigned int __pad3;
+ long st_size;
+ u32 st_blksize;
+ unsigned char __pad4[4];
+ u32 __pad5; /* future possible st_blocks high bits */
+ u32 st_blocks; /* Number 512-byte blocks allocated. */
+ u32 st_atime;
+ u32 __pad6;
+ u32 st_mtime;
+ u32 __pad7;
+ u32 st_ctime;
+ u32 __pad8; /* will be high 32 bits of ctime someday */
+ unsigned long st_ino;
+};
+
+static int cp_stat64(struct stat64_emu31 __user *ubuf, struct kstat *stat)
+{
+ struct stat64_emu31 tmp;
+
+ memset(&tmp, 0, sizeof(tmp));
+
+ tmp.st_dev = huge_encode_dev(stat->dev);
+ tmp.st_ino = stat->ino;
+ tmp.__st_ino = (u32)stat->ino;
+ tmp.st_mode = stat->mode;
+ tmp.st_nlink = (unsigned int)stat->nlink;
+ tmp.st_uid = from_kuid_munged(current_user_ns(), stat->uid);
+ tmp.st_gid = from_kgid_munged(current_user_ns(), stat->gid);
+ tmp.st_rdev = huge_encode_dev(stat->rdev);
+ tmp.st_size = stat->size;
+ tmp.st_blksize = (u32)stat->blksize;
+ tmp.st_blocks = (u32)stat->blocks;
+ tmp.st_atime = (u32)stat->atime.tv_sec;
+ tmp.st_mtime = (u32)stat->mtime.tv_sec;
+ tmp.st_ctime = (u32)stat->ctime.tv_sec;
+
+ return copy_to_user(ubuf,&tmp,sizeof(tmp)) ? -EFAULT : 0;
+}
+
+COMPAT_SYSCALL_DEFINE2(s390_stat64, const char __user *, filename, struct stat64_emu31 __user *, statbuf)
+{
+ struct kstat stat;
+ int ret = vfs_stat(filename, &stat);
+ if (!ret)
+ ret = cp_stat64(statbuf, &stat);
+ return ret;
+}
+
+COMPAT_SYSCALL_DEFINE2(s390_lstat64, const char __user *, filename, struct stat64_emu31 __user *, statbuf)
+{
+ struct kstat stat;
+ int ret = vfs_lstat(filename, &stat);
+ if (!ret)
+ ret = cp_stat64(statbuf, &stat);
+ return ret;
+}
+
+COMPAT_SYSCALL_DEFINE2(s390_fstat64, unsigned int, fd, struct stat64_emu31 __user *, statbuf)
+{
+ struct kstat stat;
+ int ret = vfs_fstat(fd, &stat);
+ if (!ret)
+ ret = cp_stat64(statbuf, &stat);
+ return ret;
+}
+
+COMPAT_SYSCALL_DEFINE4(s390_fstatat64, unsigned int, dfd, const char __user *, filename,
+ struct stat64_emu31 __user *, statbuf, int, flag)
+{
+ struct kstat stat;
+ int error;
+
+ error = vfs_fstatat(dfd, filename, &stat, flag);
+ if (error)
+ return error;
+ return cp_stat64(statbuf, &stat);
+}
+
+/*
+ * Linux/i386 didn't use to be able to handle more than
+ * 4 system call parameters, so these system calls used a memory
+ * block for parameter passing..
+ */
+
+struct mmap_arg_struct_emu31 {
+ compat_ulong_t addr;
+ compat_ulong_t len;
+ compat_ulong_t prot;
+ compat_ulong_t flags;
+ compat_ulong_t fd;
+ compat_ulong_t offset;
+};
+
+COMPAT_SYSCALL_DEFINE1(s390_old_mmap, struct mmap_arg_struct_emu31 __user *, arg)
+{
+ struct mmap_arg_struct_emu31 a;
+
+ if (copy_from_user(&a, arg, sizeof(a)))
+ return -EFAULT;
+ if (a.offset & ~PAGE_MASK)
+ return -EINVAL;
+ return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
+ a.offset >> PAGE_SHIFT);
+}
+
+COMPAT_SYSCALL_DEFINE1(s390_mmap2, struct mmap_arg_struct_emu31 __user *, arg)
+{
+ struct mmap_arg_struct_emu31 a;
+
+ if (copy_from_user(&a, arg, sizeof(a)))
+ return -EFAULT;
+ return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset);
+}
+
+COMPAT_SYSCALL_DEFINE3(s390_read, unsigned int, fd, char __user *, buf, compat_size_t, count)
+{
+ if ((compat_ssize_t) count < 0)
+ return -EINVAL;
+
+ return ksys_read(fd, buf, count);
+}
+
+COMPAT_SYSCALL_DEFINE3(s390_write, unsigned int, fd, const char __user *, buf, compat_size_t, count)
+{
+ if ((compat_ssize_t) count < 0)
+ return -EINVAL;
+
+ return ksys_write(fd, buf, count);
+}
+
+/*
+ * 31 bit emulation wrapper functions for sys_fadvise64/fadvise64_64.
+ * These need to rewrite the advise values for POSIX_FADV_{DONTNEED,NOREUSE}
+ * because the 31 bit values differ from the 64 bit values.
+ */
+
+COMPAT_SYSCALL_DEFINE5(s390_fadvise64, int, fd, u32, high, u32, low, compat_size_t, len, int, advise)
+{
+ if (advise == 4)
+ advise = POSIX_FADV_DONTNEED;
+ else if (advise == 5)
+ advise = POSIX_FADV_NOREUSE;
+ return ksys_fadvise64_64(fd, (unsigned long)high << 32 | low, len,
+ advise);
+}
+
+struct fadvise64_64_args {
+ int fd;
+ long long offset;
+ long long len;
+ int advice;
+};
+
+COMPAT_SYSCALL_DEFINE1(s390_fadvise64_64, struct fadvise64_64_args __user *, args)
+{
+ struct fadvise64_64_args a;
+
+ if ( copy_from_user(&a, args, sizeof(a)) )
+ return -EFAULT;
+ if (a.advice == 4)
+ a.advice = POSIX_FADV_DONTNEED;
+ else if (a.advice == 5)
+ a.advice = POSIX_FADV_NOREUSE;
+ return ksys_fadvise64_64(a.fd, a.offset, a.len, a.advice);
+}
+
+COMPAT_SYSCALL_DEFINE6(s390_sync_file_range, int, fd, u32, offhigh, u32, offlow,
+ u32, nhigh, u32, nlow, unsigned int, flags)
+{
+ return ksys_sync_file_range(fd, ((loff_t)offhigh << 32) + offlow,
+ ((u64)nhigh << 32) + nlow, flags);
+}
+
+COMPAT_SYSCALL_DEFINE6(s390_fallocate, int, fd, int, mode, u32, offhigh, u32, offlow,
+ u32, lenhigh, u32, lenlow)
+{
+ return ksys_fallocate(fd, mode, ((loff_t)offhigh << 32) + offlow,
+ ((u64)lenhigh << 32) + lenlow);
+}
diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h
new file mode 100644
index 000000000..64509e7db
--- /dev/null
+++ b/arch/s390/kernel/compat_linux.h
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390X_S390_H
+#define _ASM_S390X_S390_H
+
+#include <linux/compat.h>
+#include <linux/socket.h>
+#include <linux/syscalls.h>
+
+/* Macro that masks the high order bit of an 32 bit pointer and converts it*/
+/* to a 64 bit pointer */
+#define A(__x) ((unsigned long)((__x) & 0x7FFFFFFFUL))
+#define AA(__x) \
+ ((unsigned long)(__x))
+
+/* Now 32bit compatibility types */
+struct ipc_kludge_32 {
+ __u32 msgp; /* pointer */
+ __s32 msgtyp;
+};
+
+/* asm/sigcontext.h */
+typedef union
+{
+ __u64 d;
+ __u32 f;
+} freg_t32;
+
+typedef struct
+{
+ unsigned int fpc;
+ unsigned int pad;
+ freg_t32 fprs[__NUM_FPRS];
+} _s390_fp_regs32;
+
+typedef struct
+{
+ __u32 mask;
+ __u32 addr;
+} _psw_t32 __attribute__ ((aligned(8)));
+
+typedef struct
+{
+ _psw_t32 psw;
+ __u32 gprs[__NUM_GPRS];
+ __u32 acrs[__NUM_ACRS];
+} _s390_regs_common32;
+
+typedef struct
+{
+ _s390_regs_common32 regs;
+ _s390_fp_regs32 fpregs;
+} _sigregs32;
+
+typedef struct
+{
+ __u32 gprs_high[__NUM_GPRS];
+ __u64 vxrs_low[__NUM_VXRS_LOW];
+ __vector128 vxrs_high[__NUM_VXRS_HIGH];
+ __u8 __reserved[128];
+} _sigregs_ext32;
+
+#define _SIGCONTEXT_NSIG32 64
+#define _SIGCONTEXT_NSIG_BPW32 32
+#define __SIGNAL_FRAMESIZE32 96
+#define _SIGMASK_COPY_SIZE32 (sizeof(u32)*2)
+
+struct sigcontext32
+{
+ __u32 oldmask[_COMPAT_NSIG_WORDS];
+ __u32 sregs; /* pointer */
+};
+
+/* asm/signal.h */
+
+/* asm/ucontext.h */
+struct ucontext32 {
+ __u32 uc_flags;
+ __u32 uc_link; /* pointer */
+ compat_stack_t uc_stack;
+ _sigregs32 uc_mcontext;
+ compat_sigset_t uc_sigmask;
+ /* Allow for uc_sigmask growth. Glibc uses a 1024-bit sigset_t. */
+ unsigned char __unused[128 - sizeof(compat_sigset_t)];
+ _sigregs_ext32 uc_mcontext_ext;
+};
+
+struct stat64_emu31;
+struct mmap_arg_struct_emu31;
+struct fadvise64_64_args;
+
+long compat_sys_s390_chown16(const char __user *filename, u16 user, u16 group);
+long compat_sys_s390_lchown16(const char __user *filename, u16 user, u16 group);
+long compat_sys_s390_fchown16(unsigned int fd, u16 user, u16 group);
+long compat_sys_s390_setregid16(u16 rgid, u16 egid);
+long compat_sys_s390_setgid16(u16 gid);
+long compat_sys_s390_setreuid16(u16 ruid, u16 euid);
+long compat_sys_s390_setuid16(u16 uid);
+long compat_sys_s390_setresuid16(u16 ruid, u16 euid, u16 suid);
+long compat_sys_s390_getresuid16(u16 __user *ruid, u16 __user *euid, u16 __user *suid);
+long compat_sys_s390_setresgid16(u16 rgid, u16 egid, u16 sgid);
+long compat_sys_s390_getresgid16(u16 __user *rgid, u16 __user *egid, u16 __user *sgid);
+long compat_sys_s390_setfsuid16(u16 uid);
+long compat_sys_s390_setfsgid16(u16 gid);
+long compat_sys_s390_getgroups16(int gidsetsize, u16 __user *grouplist);
+long compat_sys_s390_setgroups16(int gidsetsize, u16 __user *grouplist);
+long compat_sys_s390_getuid16(void);
+long compat_sys_s390_geteuid16(void);
+long compat_sys_s390_getgid16(void);
+long compat_sys_s390_getegid16(void);
+long compat_sys_s390_truncate64(const char __user *path, u32 high, u32 low);
+long compat_sys_s390_ftruncate64(unsigned int fd, u32 high, u32 low);
+long compat_sys_s390_pread64(unsigned int fd, char __user *ubuf, compat_size_t count, u32 high, u32 low);
+long compat_sys_s390_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count, u32 high, u32 low);
+long compat_sys_s390_readahead(int fd, u32 high, u32 low, s32 count);
+long compat_sys_s390_stat64(const char __user *filename, struct stat64_emu31 __user *statbuf);
+long compat_sys_s390_lstat64(const char __user *filename, struct stat64_emu31 __user *statbuf);
+long compat_sys_s390_fstat64(unsigned int fd, struct stat64_emu31 __user *statbuf);
+long compat_sys_s390_fstatat64(unsigned int dfd, const char __user *filename, struct stat64_emu31 __user *statbuf, int flag);
+long compat_sys_s390_old_mmap(struct mmap_arg_struct_emu31 __user *arg);
+long compat_sys_s390_mmap2(struct mmap_arg_struct_emu31 __user *arg);
+long compat_sys_s390_read(unsigned int fd, char __user * buf, compat_size_t count);
+long compat_sys_s390_write(unsigned int fd, const char __user * buf, compat_size_t count);
+long compat_sys_s390_fadvise64(int fd, u32 high, u32 low, compat_size_t len, int advise);
+long compat_sys_s390_fadvise64_64(struct fadvise64_64_args __user *args);
+long compat_sys_s390_sync_file_range(int fd, u32 offhigh, u32 offlow, u32 nhigh, u32 nlow, unsigned int flags);
+long compat_sys_s390_fallocate(int fd, int mode, u32 offhigh, u32 offlow, u32 lenhigh, u32 lenlow);
+long compat_sys_sigreturn(void);
+long compat_sys_rt_sigreturn(void);
+
+#endif /* _ASM_S390X_S390_H */
diff --git a/arch/s390/kernel/compat_ptrace.h b/arch/s390/kernel/compat_ptrace.h
new file mode 100644
index 000000000..3c400fc7e
--- /dev/null
+++ b/arch/s390/kernel/compat_ptrace.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PTRACE32_H
+#define _PTRACE32_H
+
+#include <asm/ptrace.h> /* needed for NUM_CR_WORDS */
+#include "compat_linux.h" /* needed for psw_compat_t */
+
+struct compat_per_struct_kernel {
+ __u32 cr9; /* PER control bits */
+ __u32 cr10; /* PER starting address */
+ __u32 cr11; /* PER ending address */
+ __u32 bits; /* Obsolete software bits */
+ __u32 starting_addr; /* User specified start address */
+ __u32 ending_addr; /* User specified end address */
+ __u16 perc_atmid; /* PER trap ATMID */
+ __u32 address; /* PER trap instruction address */
+ __u8 access_id; /* PER trap access identification */
+};
+
+struct compat_user_regs_struct
+{
+ psw_compat_t psw;
+ u32 gprs[NUM_GPRS];
+ u32 acrs[NUM_ACRS];
+ u32 orig_gpr2;
+ /* nb: there's a 4-byte hole here */
+ s390_fp_regs fp_regs;
+ /*
+ * These per registers are in here so that gdb can modify them
+ * itself as there is no "official" ptrace interface for hardware
+ * watchpoints. This is the way intel does it.
+ */
+ struct compat_per_struct_kernel per_info;
+ u32 ieee_instruction_pointer; /* obsolete, always 0 */
+};
+
+struct compat_user {
+ /* We start with the registers, to mimic the way that "memory"
+ is returned from the ptrace(3,...) function. */
+ struct compat_user_regs_struct regs;
+ /* The rest of this junk is to help gdb figure out what goes where */
+ u32 u_tsize; /* Text segment size (pages). */
+ u32 u_dsize; /* Data segment size (pages). */
+ u32 u_ssize; /* Stack segment size (pages). */
+ u32 start_code; /* Starting virtual address of text. */
+ u32 start_stack; /* Starting virtual address of stack area.
+ This is actually the bottom of the stack,
+ the top of the stack is always found in the
+ esp register. */
+ s32 signal; /* Signal that caused the core dump. */
+ u32 u_ar0; /* Used by gdb to help find the values for */
+ /* the registers. */
+ u32 magic; /* To uniquely identify a core file */
+ char u_comm[32]; /* User command that was responsible */
+};
+
+typedef struct
+{
+ __u32 len;
+ __u32 kernel_addr;
+ __u32 process_addr;
+} compat_ptrace_area;
+
+#endif /* _PTRACE32_H */
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
new file mode 100644
index 000000000..6f2a193cc
--- /dev/null
+++ b/arch/s390/kernel/compat_signal.c
@@ -0,0 +1,428 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2000, 2006
+ * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
+ * Gerhard Tonn (ton@de.ibm.com)
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
+ */
+
+#include <linux/compat.h>
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+#include <linux/ptrace.h>
+#include <linux/unistd.h>
+#include <linux/stddef.h>
+#include <linux/tty.h>
+#include <linux/personality.h>
+#include <linux/binfmts.h>
+#include <asm/ucontext.h>
+#include <linux/uaccess.h>
+#include <asm/lowcore.h>
+#include <asm/switch_to.h>
+#include "compat_linux.h"
+#include "compat_ptrace.h"
+#include "entry.h"
+
+typedef struct
+{
+ __u8 callee_used_stack[__SIGNAL_FRAMESIZE32];
+ struct sigcontext32 sc;
+ _sigregs32 sregs;
+ int signo;
+ _sigregs_ext32 sregs_ext;
+ __u16 svc_insn; /* Offset of svc_insn is NOT fixed! */
+} sigframe32;
+
+typedef struct
+{
+ __u8 callee_used_stack[__SIGNAL_FRAMESIZE32];
+ __u16 svc_insn;
+ compat_siginfo_t info;
+ struct ucontext32 uc;
+} rt_sigframe32;
+
+/* Store registers needed to create the signal frame */
+static void store_sigregs(void)
+{
+ save_access_regs(current->thread.acrs);
+ save_fpu_regs();
+}
+
+/* Load registers after signal return */
+static void load_sigregs(void)
+{
+ restore_access_regs(current->thread.acrs);
+}
+
+static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs)
+{
+ _sigregs32 user_sregs;
+ int i;
+
+ user_sregs.regs.psw.mask = (__u32)(regs->psw.mask >> 32);
+ user_sregs.regs.psw.mask &= PSW32_MASK_USER | PSW32_MASK_RI;
+ user_sregs.regs.psw.mask |= PSW32_USER_BITS;
+ user_sregs.regs.psw.addr = (__u32) regs->psw.addr |
+ (__u32)(regs->psw.mask & PSW_MASK_BA);
+ for (i = 0; i < NUM_GPRS; i++)
+ user_sregs.regs.gprs[i] = (__u32) regs->gprs[i];
+ memcpy(&user_sregs.regs.acrs, current->thread.acrs,
+ sizeof(user_sregs.regs.acrs));
+ fpregs_store((_s390_fp_regs *) &user_sregs.fpregs, &current->thread.fpu);
+ if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs32)))
+ return -EFAULT;
+ return 0;
+}
+
+static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs)
+{
+ _sigregs32 user_sregs;
+ int i;
+
+ /* Alwys make any pending restarted system call return -EINTR */
+ current->restart_block.fn = do_no_restart_syscall;
+
+ if (__copy_from_user(&user_sregs, &sregs->regs, sizeof(user_sregs)))
+ return -EFAULT;
+
+ if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW32_MASK_RI))
+ return -EINVAL;
+
+ /* Test the floating-point-control word. */
+ if (test_fp_ctl(user_sregs.fpregs.fpc))
+ return -EINVAL;
+
+ /* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */
+ regs->psw.mask = (regs->psw.mask & ~(PSW_MASK_USER | PSW_MASK_RI)) |
+ (__u64)(user_sregs.regs.psw.mask & PSW32_MASK_USER) << 32 |
+ (__u64)(user_sregs.regs.psw.mask & PSW32_MASK_RI) << 32 |
+ (__u64)(user_sregs.regs.psw.addr & PSW32_ADDR_AMODE);
+ /* Check for invalid user address space control. */
+ if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_HOME)
+ regs->psw.mask = PSW_ASC_PRIMARY |
+ (regs->psw.mask & ~PSW_MASK_ASC);
+ regs->psw.addr = (__u64)(user_sregs.regs.psw.addr & PSW32_ADDR_INSN);
+ for (i = 0; i < NUM_GPRS; i++)
+ regs->gprs[i] = (__u64) user_sregs.regs.gprs[i];
+ memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
+ sizeof(current->thread.acrs));
+ fpregs_load((_s390_fp_regs *) &user_sregs.fpregs, &current->thread.fpu);
+
+ clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
+ return 0;
+}
+
+static int save_sigregs_ext32(struct pt_regs *regs,
+ _sigregs_ext32 __user *sregs_ext)
+{
+ __u32 gprs_high[NUM_GPRS];
+ __u64 vxrs[__NUM_VXRS_LOW];
+ int i;
+
+ /* Save high gprs to signal stack */
+ for (i = 0; i < NUM_GPRS; i++)
+ gprs_high[i] = regs->gprs[i] >> 32;
+ if (__copy_to_user(&sregs_ext->gprs_high, &gprs_high,
+ sizeof(sregs_ext->gprs_high)))
+ return -EFAULT;
+
+ /* Save vector registers to signal stack */
+ if (MACHINE_HAS_VX) {
+ for (i = 0; i < __NUM_VXRS_LOW; i++)
+ vxrs[i] = *((__u64 *)(current->thread.fpu.vxrs + i) + 1);
+ if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
+ sizeof(sregs_ext->vxrs_low)) ||
+ __copy_to_user(&sregs_ext->vxrs_high,
+ current->thread.fpu.vxrs + __NUM_VXRS_LOW,
+ sizeof(sregs_ext->vxrs_high)))
+ return -EFAULT;
+ }
+ return 0;
+}
+
+static int restore_sigregs_ext32(struct pt_regs *regs,
+ _sigregs_ext32 __user *sregs_ext)
+{
+ __u32 gprs_high[NUM_GPRS];
+ __u64 vxrs[__NUM_VXRS_LOW];
+ int i;
+
+ /* Restore high gprs from signal stack */
+ if (__copy_from_user(&gprs_high, &sregs_ext->gprs_high,
+ sizeof(sregs_ext->gprs_high)))
+ return -EFAULT;
+ for (i = 0; i < NUM_GPRS; i++)
+ *(__u32 *)&regs->gprs[i] = gprs_high[i];
+
+ /* Restore vector registers from signal stack */
+ if (MACHINE_HAS_VX) {
+ if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
+ sizeof(sregs_ext->vxrs_low)) ||
+ __copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW,
+ &sregs_ext->vxrs_high,
+ sizeof(sregs_ext->vxrs_high)))
+ return -EFAULT;
+ for (i = 0; i < __NUM_VXRS_LOW; i++)
+ *((__u64 *)(current->thread.fpu.vxrs + i) + 1) = vxrs[i];
+ }
+ return 0;
+}
+
+COMPAT_SYSCALL_DEFINE0(sigreturn)
+{
+ struct pt_regs *regs = task_pt_regs(current);
+ sigframe32 __user *frame = (sigframe32 __user *)regs->gprs[15];
+ sigset_t set;
+
+ if (get_compat_sigset(&set, (compat_sigset_t __user *)frame->sc.oldmask))
+ goto badframe;
+ set_current_blocked(&set);
+ save_fpu_regs();
+ if (restore_sigregs32(regs, &frame->sregs))
+ goto badframe;
+ if (restore_sigregs_ext32(regs, &frame->sregs_ext))
+ goto badframe;
+ load_sigregs();
+ return regs->gprs[2];
+badframe:
+ force_sig(SIGSEGV, current);
+ return 0;
+}
+
+COMPAT_SYSCALL_DEFINE0(rt_sigreturn)
+{
+ struct pt_regs *regs = task_pt_regs(current);
+ rt_sigframe32 __user *frame = (rt_sigframe32 __user *)regs->gprs[15];
+ sigset_t set;
+
+ if (get_compat_sigset(&set, &frame->uc.uc_sigmask))
+ goto badframe;
+ set_current_blocked(&set);
+ if (compat_restore_altstack(&frame->uc.uc_stack))
+ goto badframe;
+ save_fpu_regs();
+ if (restore_sigregs32(regs, &frame->uc.uc_mcontext))
+ goto badframe;
+ if (restore_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext))
+ goto badframe;
+ load_sigregs();
+ return regs->gprs[2];
+badframe:
+ force_sig(SIGSEGV, current);
+ return 0;
+}
+
+/*
+ * Set up a signal frame.
+ */
+
+
+/*
+ * Determine which stack to use..
+ */
+static inline void __user *
+get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
+{
+ unsigned long sp;
+
+ /* Default to using normal stack */
+ sp = (unsigned long) A(regs->gprs[15]);
+
+ /* Overflow on alternate signal stack gives SIGSEGV. */
+ if (on_sig_stack(sp) && !on_sig_stack((sp - frame_size) & -8UL))
+ return (void __user *) -1UL;
+
+ /* This is the X/Open sanctioned signal stack switching. */
+ if (ka->sa.sa_flags & SA_ONSTACK) {
+ if (! sas_ss_flags(sp))
+ sp = current->sas_ss_sp + current->sas_ss_size;
+ }
+
+ return (void __user *)((sp - frame_size) & -8ul);
+}
+
+static int setup_frame32(struct ksignal *ksig, sigset_t *set,
+ struct pt_regs *regs)
+{
+ int sig = ksig->sig;
+ sigframe32 __user *frame;
+ unsigned long restorer;
+ size_t frame_size;
+
+ /*
+ * gprs_high are always present for 31-bit compat tasks.
+ * The space for vector registers is only allocated if
+ * the machine supports it
+ */
+ frame_size = sizeof(*frame) - sizeof(frame->sregs_ext.__reserved);
+ if (!MACHINE_HAS_VX)
+ frame_size -= sizeof(frame->sregs_ext.vxrs_low) +
+ sizeof(frame->sregs_ext.vxrs_high);
+ frame = get_sigframe(&ksig->ka, regs, frame_size);
+ if (frame == (void __user *) -1UL)
+ return -EFAULT;
+
+ /* Set up backchain. */
+ if (__put_user(regs->gprs[15], (unsigned int __user *) frame))
+ return -EFAULT;
+
+ /* Create struct sigcontext32 on the signal stack */
+ if (put_compat_sigset((compat_sigset_t __user *)frame->sc.oldmask,
+ set, sizeof(compat_sigset_t)))
+ return -EFAULT;
+ if (__put_user(ptr_to_compat(&frame->sregs), &frame->sc.sregs))
+ return -EFAULT;
+
+ /* Store registers needed to create the signal frame */
+ store_sigregs();
+
+ /* Create _sigregs32 on the signal stack */
+ if (save_sigregs32(regs, &frame->sregs))
+ return -EFAULT;
+
+ /* Place signal number on stack to allow backtrace from handler. */
+ if (__put_user(regs->gprs[2], (int __force __user *) &frame->signo))
+ return -EFAULT;
+
+ /* Create _sigregs_ext32 on the signal stack */
+ if (save_sigregs_ext32(regs, &frame->sregs_ext))
+ return -EFAULT;
+
+ /* Set up to return from userspace. If provided, use a stub
+ already in userspace. */
+ if (ksig->ka.sa.sa_flags & SA_RESTORER) {
+ restorer = (unsigned long __force)
+ ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE;
+ } else {
+ /* Signal frames without vectors registers are short ! */
+ __u16 __user *svc = (void __user *) frame + frame_size - 2;
+ if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, svc))
+ return -EFAULT;
+ restorer = (unsigned long __force) svc | PSW32_ADDR_AMODE;
+ }
+
+ /* Set up registers for signal handler */
+ regs->gprs[14] = restorer;
+ regs->gprs[15] = (__force __u64) frame;
+ /* Force 31 bit amode and default user address space control. */
+ regs->psw.mask = PSW_MASK_BA |
+ (PSW_USER_BITS & PSW_MASK_ASC) |
+ (regs->psw.mask & ~PSW_MASK_ASC);
+ regs->psw.addr = (__force __u64) ksig->ka.sa.sa_handler;
+
+ regs->gprs[2] = sig;
+ regs->gprs[3] = (__force __u64) &frame->sc;
+
+ /* We forgot to include these in the sigcontext.
+ To avoid breaking binary compatibility, they are passed as args. */
+ if (sig == SIGSEGV || sig == SIGBUS || sig == SIGILL ||
+ sig == SIGTRAP || sig == SIGFPE) {
+ /* set extra registers only for synchronous signals */
+ regs->gprs[4] = regs->int_code & 127;
+ regs->gprs[5] = regs->int_parm_long;
+ regs->gprs[6] = current->thread.last_break;
+ }
+
+ return 0;
+}
+
+static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set,
+ struct pt_regs *regs)
+{
+ rt_sigframe32 __user *frame;
+ unsigned long restorer;
+ size_t frame_size;
+ u32 uc_flags;
+
+ frame_size = sizeof(*frame) -
+ sizeof(frame->uc.uc_mcontext_ext.__reserved);
+ /*
+ * gprs_high are always present for 31-bit compat tasks.
+ * The space for vector registers is only allocated if
+ * the machine supports it
+ */
+ uc_flags = UC_GPRS_HIGH;
+ if (MACHINE_HAS_VX) {
+ uc_flags |= UC_VXRS;
+ } else
+ frame_size -= sizeof(frame->uc.uc_mcontext_ext.vxrs_low) +
+ sizeof(frame->uc.uc_mcontext_ext.vxrs_high);
+ frame = get_sigframe(&ksig->ka, regs, frame_size);
+ if (frame == (void __user *) -1UL)
+ return -EFAULT;
+
+ /* Set up backchain. */
+ if (__put_user(regs->gprs[15], (unsigned int __force __user *) frame))
+ return -EFAULT;
+
+ /* Set up to return from userspace. If provided, use a stub
+ already in userspace. */
+ if (ksig->ka.sa.sa_flags & SA_RESTORER) {
+ restorer = (unsigned long __force)
+ ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE;
+ } else {
+ __u16 __user *svc = &frame->svc_insn;
+ if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, svc))
+ return -EFAULT;
+ restorer = (unsigned long __force) svc | PSW32_ADDR_AMODE;
+ }
+
+ /* Create siginfo on the signal stack */
+ if (copy_siginfo_to_user32(&frame->info, &ksig->info))
+ return -EFAULT;
+
+ /* Store registers needed to create the signal frame */
+ store_sigregs();
+
+ /* Create ucontext on the signal stack. */
+ if (__put_user(uc_flags, &frame->uc.uc_flags) ||
+ __put_user(0, &frame->uc.uc_link) ||
+ __compat_save_altstack(&frame->uc.uc_stack, regs->gprs[15]) ||
+ save_sigregs32(regs, &frame->uc.uc_mcontext) ||
+ put_compat_sigset(&frame->uc.uc_sigmask, set, sizeof(compat_sigset_t)) ||
+ save_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext))
+ return -EFAULT;
+
+ /* Set up registers for signal handler */
+ regs->gprs[14] = restorer;
+ regs->gprs[15] = (__force __u64) frame;
+ /* Force 31 bit amode and default user address space control. */
+ regs->psw.mask = PSW_MASK_BA |
+ (PSW_USER_BITS & PSW_MASK_ASC) |
+ (regs->psw.mask & ~PSW_MASK_ASC);
+ regs->psw.addr = (__u64 __force) ksig->ka.sa.sa_handler;
+
+ regs->gprs[2] = ksig->sig;
+ regs->gprs[3] = (__force __u64) &frame->info;
+ regs->gprs[4] = (__force __u64) &frame->uc;
+ regs->gprs[5] = current->thread.last_break;
+ return 0;
+}
+
+/*
+ * OK, we're invoking a handler
+ */
+
+void handle_signal32(struct ksignal *ksig, sigset_t *oldset,
+ struct pt_regs *regs)
+{
+ int ret;
+
+ /* Set up the stack frame */
+ if (ksig->ka.sa.sa_flags & SA_SIGINFO)
+ ret = setup_rt_frame32(ksig, oldset, regs);
+ else
+ ret = setup_frame32(ksig, oldset, regs);
+
+ signal_setup_done(ret, ksig, test_thread_flag(TIF_SINGLE_STEP));
+}
+
diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c
new file mode 100644
index 000000000..2ce28bf0c
--- /dev/null
+++ b/arch/s390/kernel/compat_wrapper.c
@@ -0,0 +1,186 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Compat system call wrappers.
+ *
+ * Copyright IBM Corp. 2014
+ */
+
+#include <linux/syscalls.h>
+#include <linux/compat.h>
+#include "entry.h"
+
+#define COMPAT_SYSCALL_WRAP1(name, ...) \
+ COMPAT_SYSCALL_WRAPx(1, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_WRAP2(name, ...) \
+ COMPAT_SYSCALL_WRAPx(2, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_WRAP3(name, ...) \
+ COMPAT_SYSCALL_WRAPx(3, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_WRAP4(name, ...) \
+ COMPAT_SYSCALL_WRAPx(4, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_WRAP5(name, ...) \
+ COMPAT_SYSCALL_WRAPx(5, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_WRAP6(name, ...) \
+ COMPAT_SYSCALL_WRAPx(6, _##name, __VA_ARGS__)
+
+#define __SC_COMPAT_TYPE(t, a) \
+ __typeof(__builtin_choose_expr(sizeof(t) > 4, 0L, (t)0)) a
+
+#define __SC_COMPAT_CAST(t, a) \
+({ \
+ long __ReS = a; \
+ \
+ BUILD_BUG_ON((sizeof(t) > 4) && !__TYPE_IS_L(t) && \
+ !__TYPE_IS_UL(t) && !__TYPE_IS_PTR(t)); \
+ if (__TYPE_IS_L(t)) \
+ __ReS = (s32)a; \
+ if (__TYPE_IS_UL(t)) \
+ __ReS = (u32)a; \
+ if (__TYPE_IS_PTR(t)) \
+ __ReS = a & 0x7fffffff; \
+ (t)__ReS; \
+})
+
+/*
+ * The COMPAT_SYSCALL_WRAP macro generates system call wrappers to be used by
+ * compat tasks. These wrappers will only be used for system calls where only
+ * the system call arguments need sign or zero extension or zeroing of the upper
+ * 33 bits of pointers.
+ * Note: since the wrapper function will afterwards call a system call which
+ * again performs zero and sign extension for all system call arguments with
+ * a size of less than eight bytes, these compat wrappers only touch those
+ * system call arguments with a size of eight bytes ((unsigned) long and
+ * pointers). Zero and sign extension for e.g. int parameters will be done by
+ * the regular system call wrappers.
+ */
+#define COMPAT_SYSCALL_WRAPx(x, name, ...) \
+asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \
+asmlinkage long notrace compat_sys##name(__MAP(x,__SC_COMPAT_TYPE,__VA_ARGS__));\
+asmlinkage long notrace compat_sys##name(__MAP(x,__SC_COMPAT_TYPE,__VA_ARGS__)) \
+{ \
+ return sys##name(__MAP(x,__SC_COMPAT_CAST,__VA_ARGS__)); \
+}
+
+COMPAT_SYSCALL_WRAP2(creat, const char __user *, pathname, umode_t, mode);
+COMPAT_SYSCALL_WRAP2(link, const char __user *, oldname, const char __user *, newname);
+COMPAT_SYSCALL_WRAP1(unlink, const char __user *, pathname);
+COMPAT_SYSCALL_WRAP1(chdir, const char __user *, filename);
+COMPAT_SYSCALL_WRAP3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev);
+COMPAT_SYSCALL_WRAP2(chmod, const char __user *, filename, umode_t, mode);
+COMPAT_SYSCALL_WRAP1(oldumount, char __user *, name);
+COMPAT_SYSCALL_WRAP2(access, const char __user *, filename, int, mode);
+COMPAT_SYSCALL_WRAP2(rename, const char __user *, oldname, const char __user *, newname);
+COMPAT_SYSCALL_WRAP2(mkdir, const char __user *, pathname, umode_t, mode);
+COMPAT_SYSCALL_WRAP1(rmdir, const char __user *, pathname);
+COMPAT_SYSCALL_WRAP1(pipe, int __user *, fildes);
+COMPAT_SYSCALL_WRAP1(brk, unsigned long, brk);
+COMPAT_SYSCALL_WRAP2(signal, int, sig, __sighandler_t, handler);
+COMPAT_SYSCALL_WRAP1(acct, const char __user *, name);
+COMPAT_SYSCALL_WRAP2(umount, char __user *, name, int, flags);
+COMPAT_SYSCALL_WRAP1(chroot, const char __user *, filename);
+COMPAT_SYSCALL_WRAP3(sigsuspend, int, unused1, int, unused2, old_sigset_t, mask);
+COMPAT_SYSCALL_WRAP2(sethostname, char __user *, name, int, len);
+COMPAT_SYSCALL_WRAP2(symlink, const char __user *, old, const char __user *, new);
+COMPAT_SYSCALL_WRAP3(readlink, const char __user *, path, char __user *, buf, int, bufsiz);
+COMPAT_SYSCALL_WRAP1(uselib, const char __user *, library);
+COMPAT_SYSCALL_WRAP2(swapon, const char __user *, specialfile, int, swap_flags);
+COMPAT_SYSCALL_WRAP4(reboot, int, magic1, int, magic2, unsigned int, cmd, void __user *, arg);
+COMPAT_SYSCALL_WRAP2(munmap, unsigned long, addr, size_t, len);
+COMPAT_SYSCALL_WRAP3(syslog, int, type, char __user *, buf, int, len);
+COMPAT_SYSCALL_WRAP1(swapoff, const char __user *, specialfile);
+COMPAT_SYSCALL_WRAP2(setdomainname, char __user *, name, int, len);
+COMPAT_SYSCALL_WRAP1(newuname, struct new_utsname __user *, name);
+COMPAT_SYSCALL_WRAP3(mprotect, unsigned long, start, size_t, len, unsigned long, prot);
+COMPAT_SYSCALL_WRAP3(init_module, void __user *, umod, unsigned long, len, const char __user *, uargs);
+COMPAT_SYSCALL_WRAP2(delete_module, const char __user *, name_user, unsigned int, flags);
+COMPAT_SYSCALL_WRAP4(quotactl, unsigned int, cmd, const char __user *, special, qid_t, id, void __user *, addr);
+COMPAT_SYSCALL_WRAP2(bdflush, int, func, long, data);
+COMPAT_SYSCALL_WRAP3(sysfs, int, option, unsigned long, arg1, unsigned long, arg2);
+COMPAT_SYSCALL_WRAP5(llseek, unsigned int, fd, unsigned long, high, unsigned long, low, loff_t __user *, result, unsigned int, whence);
+COMPAT_SYSCALL_WRAP3(msync, unsigned long, start, size_t, len, int, flags);
+COMPAT_SYSCALL_WRAP2(mlock, unsigned long, start, size_t, len);
+COMPAT_SYSCALL_WRAP2(munlock, unsigned long, start, size_t, len);
+COMPAT_SYSCALL_WRAP2(sched_setparam, pid_t, pid, struct sched_param __user *, param);
+COMPAT_SYSCALL_WRAP2(sched_getparam, pid_t, pid, struct sched_param __user *, param);
+COMPAT_SYSCALL_WRAP3(sched_setscheduler, pid_t, pid, int, policy, struct sched_param __user *, param);
+COMPAT_SYSCALL_WRAP5(mremap, unsigned long, addr, unsigned long, old_len, unsigned long, new_len, unsigned long, flags, unsigned long, new_addr);
+COMPAT_SYSCALL_WRAP3(poll, struct pollfd __user *, ufds, unsigned int, nfds, int, timeout);
+COMPAT_SYSCALL_WRAP5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, unsigned long, arg4, unsigned long, arg5);
+COMPAT_SYSCALL_WRAP2(getcwd, char __user *, buf, unsigned long, size);
+COMPAT_SYSCALL_WRAP2(capget, cap_user_header_t, header, cap_user_data_t, dataptr);
+COMPAT_SYSCALL_WRAP2(capset, cap_user_header_t, header, const cap_user_data_t, data);
+COMPAT_SYSCALL_WRAP3(lchown, const char __user *, filename, uid_t, user, gid_t, group);
+COMPAT_SYSCALL_WRAP2(getgroups, int, gidsetsize, gid_t __user *, grouplist);
+COMPAT_SYSCALL_WRAP2(setgroups, int, gidsetsize, gid_t __user *, grouplist);
+COMPAT_SYSCALL_WRAP3(getresuid, uid_t __user *, ruid, uid_t __user *, euid, uid_t __user *, suid);
+COMPAT_SYSCALL_WRAP3(getresgid, gid_t __user *, rgid, gid_t __user *, egid, gid_t __user *, sgid);
+COMPAT_SYSCALL_WRAP3(chown, const char __user *, filename, uid_t, user, gid_t, group);
+COMPAT_SYSCALL_WRAP2(pivot_root, const char __user *, new_root, const char __user *, put_old);
+COMPAT_SYSCALL_WRAP3(mincore, unsigned long, start, size_t, len, unsigned char __user *, vec);
+COMPAT_SYSCALL_WRAP3(madvise, unsigned long, start, size_t, len, int, behavior);
+COMPAT_SYSCALL_WRAP5(setxattr, const char __user *, path, const char __user *, name, const void __user *, value, size_t, size, int, flags);
+COMPAT_SYSCALL_WRAP5(lsetxattr, const char __user *, path, const char __user *, name, const void __user *, value, size_t, size, int, flags);
+COMPAT_SYSCALL_WRAP5(fsetxattr, int, fd, const char __user *, name, const void __user *, value, size_t, size, int, flags);
+COMPAT_SYSCALL_WRAP3(getdents64, unsigned int, fd, struct linux_dirent64 __user *, dirent, unsigned int, count);
+COMPAT_SYSCALL_WRAP4(getxattr, const char __user *, path, const char __user *, name, void __user *, value, size_t, size);
+COMPAT_SYSCALL_WRAP4(lgetxattr, const char __user *, path, const char __user *, name, void __user *, value, size_t, size);
+COMPAT_SYSCALL_WRAP4(fgetxattr, int, fd, const char __user *, name, void __user *, value, size_t, size);
+COMPAT_SYSCALL_WRAP3(listxattr, const char __user *, path, char __user *, list, size_t, size);
+COMPAT_SYSCALL_WRAP3(llistxattr, const char __user *, path, char __user *, list, size_t, size);
+COMPAT_SYSCALL_WRAP3(flistxattr, int, fd, char __user *, list, size_t, size);
+COMPAT_SYSCALL_WRAP2(removexattr, const char __user *, path, const char __user *, name);
+COMPAT_SYSCALL_WRAP2(lremovexattr, const char __user *, path, const char __user *, name);
+COMPAT_SYSCALL_WRAP2(fremovexattr, int, fd, const char __user *, name);
+COMPAT_SYSCALL_WRAP1(set_tid_address, int __user *, tidptr);
+COMPAT_SYSCALL_WRAP4(epoll_ctl, int, epfd, int, op, int, fd, struct epoll_event __user *, event);
+COMPAT_SYSCALL_WRAP4(epoll_wait, int, epfd, struct epoll_event __user *, events, int, maxevents, int, timeout);
+COMPAT_SYSCALL_WRAP1(io_destroy, aio_context_t, ctx);
+COMPAT_SYSCALL_WRAP3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, struct io_event __user *, result);
+COMPAT_SYSCALL_WRAP1(mq_unlink, const char __user *, name);
+COMPAT_SYSCALL_WRAP5(add_key, const char __user *, tp, const char __user *, dsc, const void __user *, pld, size_t, len, key_serial_t, id);
+COMPAT_SYSCALL_WRAP4(request_key, const char __user *, tp, const char __user *, dsc, const char __user *, info, key_serial_t, id);
+COMPAT_SYSCALL_WRAP5(remap_file_pages, unsigned long, start, unsigned long, size, unsigned long, prot, unsigned long, pgoff, unsigned long, flags);
+COMPAT_SYSCALL_WRAP3(inotify_add_watch, int, fd, const char __user *, path, u32, mask);
+COMPAT_SYSCALL_WRAP3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode);
+COMPAT_SYSCALL_WRAP4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, unsigned, dev);
+COMPAT_SYSCALL_WRAP5(fchownat, int, dfd, const char __user *, filename, uid_t, user, gid_t, group, int, flag);
+COMPAT_SYSCALL_WRAP3(unlinkat, int, dfd, const char __user *, pathname, int, flag);
+COMPAT_SYSCALL_WRAP4(renameat, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname);
+COMPAT_SYSCALL_WRAP5(linkat, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname, int, flags);
+COMPAT_SYSCALL_WRAP3(symlinkat, const char __user *, oldname, int, newdfd, const char __user *, newname);
+COMPAT_SYSCALL_WRAP4(readlinkat, int, dfd, const char __user *, path, char __user *, buf, int, bufsiz);
+COMPAT_SYSCALL_WRAP3(fchmodat, int, dfd, const char __user *, filename, umode_t, mode);
+COMPAT_SYSCALL_WRAP3(faccessat, int, dfd, const char __user *, filename, int, mode);
+COMPAT_SYSCALL_WRAP1(unshare, unsigned long, unshare_flags);
+COMPAT_SYSCALL_WRAP6(splice, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags);
+COMPAT_SYSCALL_WRAP4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags);
+COMPAT_SYSCALL_WRAP3(getcpu, unsigned __user *, cpu, unsigned __user *, node, struct getcpu_cache __user *, cache);
+COMPAT_SYSCALL_WRAP2(pipe2, int __user *, fildes, int, flags);
+COMPAT_SYSCALL_WRAP5(perf_event_open, struct perf_event_attr __user *, attr_uptr, pid_t, pid, int, cpu, int, group_fd, unsigned long, flags);
+COMPAT_SYSCALL_WRAP5(clone, unsigned long, newsp, unsigned long, clone_flags, int __user *, parent_tidptr, int __user *, child_tidptr, unsigned long, tls);
+COMPAT_SYSCALL_WRAP4(prlimit64, pid_t, pid, unsigned int, resource, const struct rlimit64 __user *, new_rlim, struct rlimit64 __user *, old_rlim);
+COMPAT_SYSCALL_WRAP5(name_to_handle_at, int, dfd, const char __user *, name, struct file_handle __user *, handle, int __user *, mnt_id, int, flag);
+COMPAT_SYSCALL_WRAP5(kcmp, pid_t, pid1, pid_t, pid2, int, type, unsigned long, idx1, unsigned long, idx2);
+COMPAT_SYSCALL_WRAP3(finit_module, int, fd, const char __user *, uargs, int, flags);
+COMPAT_SYSCALL_WRAP3(sched_setattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, flags);
+COMPAT_SYSCALL_WRAP4(sched_getattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, size, unsigned int, flags);
+COMPAT_SYSCALL_WRAP5(renameat2, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname, unsigned int, flags);
+COMPAT_SYSCALL_WRAP3(seccomp, unsigned int, op, unsigned int, flags, const char __user *, uargs)
+COMPAT_SYSCALL_WRAP3(getrandom, char __user *, buf, size_t, count, unsigned int, flags)
+COMPAT_SYSCALL_WRAP2(memfd_create, const char __user *, uname, unsigned int, flags)
+COMPAT_SYSCALL_WRAP3(bpf, int, cmd, union bpf_attr *, attr, unsigned int, size);
+COMPAT_SYSCALL_WRAP3(s390_pci_mmio_write, const unsigned long, mmio_addr, const void __user *, user_buffer, const size_t, length);
+COMPAT_SYSCALL_WRAP3(s390_pci_mmio_read, const unsigned long, mmio_addr, void __user *, user_buffer, const size_t, length);
+COMPAT_SYSCALL_WRAP4(socketpair, int, family, int, type, int, protocol, int __user *, usockvec);
+COMPAT_SYSCALL_WRAP3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen);
+COMPAT_SYSCALL_WRAP3(connect, int, fd, struct sockaddr __user *, uservaddr, int, addrlen);
+COMPAT_SYSCALL_WRAP4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, int __user *, upeer_addrlen, int, flags);
+COMPAT_SYSCALL_WRAP3(getsockname, int, fd, struct sockaddr __user *, usockaddr, int __user *, usockaddr_len);
+COMPAT_SYSCALL_WRAP3(getpeername, int, fd, struct sockaddr __user *, usockaddr, int __user *, usockaddr_len);
+COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len);
+COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags);
+COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags);
+COMPAT_SYSCALL_WRAP2(s390_guarded_storage, int, command, struct gs_cb *, gs_cb);
+COMPAT_SYSCALL_WRAP5(statx, int, dfd, const char __user *, path, unsigned, flags, unsigned, mask, struct statx __user *, buffer);
+COMPAT_SYSCALL_WRAP4(s390_sthyi, unsigned long, code, void __user *, info, u64 __user *, rc, unsigned long, flags);
+COMPAT_SYSCALL_WRAP5(kexec_file_load, int, kernel_fd, int, initrd_fd, unsigned long, cmdline_len, const char __user *, cmdline_ptr, unsigned long, flags)
+COMPAT_SYSCALL_WRAP4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32, sig)
diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c
new file mode 100644
index 000000000..2da027359
--- /dev/null
+++ b/arch/s390/kernel/cpcmd.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999, 2007
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ * Christian Borntraeger (cborntra@de.ibm.com),
+ */
+
+#define KMSG_COMPONENT "cpcmd"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <asm/diag.h>
+#include <asm/ebcdic.h>
+#include <asm/cpcmd.h>
+#include <asm/io.h>
+
+static DEFINE_SPINLOCK(cpcmd_lock);
+static char cpcmd_buf[241];
+
+static int diag8_noresponse(int cmdlen)
+{
+ register unsigned long reg2 asm ("2") = (addr_t) cpcmd_buf;
+ register unsigned long reg3 asm ("3") = cmdlen;
+
+ asm volatile(
+ " diag %1,%0,0x8\n"
+ : "+d" (reg3) : "d" (reg2) : "cc");
+ return reg3;
+}
+
+static int diag8_response(int cmdlen, char *response, int *rlen)
+{
+ unsigned long _cmdlen = cmdlen | 0x40000000L;
+ unsigned long _rlen = *rlen;
+ register unsigned long reg2 asm ("2") = (addr_t) cpcmd_buf;
+ register unsigned long reg3 asm ("3") = (addr_t) response;
+ register unsigned long reg4 asm ("4") = _cmdlen;
+ register unsigned long reg5 asm ("5") = _rlen;
+
+ asm volatile(
+ " diag %2,%0,0x8\n"
+ " brc 8,1f\n"
+ " agr %1,%4\n"
+ "1:\n"
+ : "+d" (reg4), "+d" (reg5)
+ : "d" (reg2), "d" (reg3), "d" (*rlen) : "cc");
+ *rlen = reg5;
+ return reg4;
+}
+
+/*
+ * __cpcmd has some restrictions over cpcmd
+ * - __cpcmd is unlocked and therefore not SMP-safe
+ */
+int __cpcmd(const char *cmd, char *response, int rlen, int *response_code)
+{
+ int cmdlen;
+ int rc;
+ int response_len;
+
+ cmdlen = strlen(cmd);
+ BUG_ON(cmdlen > 240);
+ memcpy(cpcmd_buf, cmd, cmdlen);
+ ASCEBC(cpcmd_buf, cmdlen);
+
+ diag_stat_inc(DIAG_STAT_X008);
+ if (response) {
+ memset(response, 0, rlen);
+ response_len = rlen;
+ rc = diag8_response(cmdlen, response, &rlen);
+ EBCASC(response, response_len);
+ } else {
+ rc = diag8_noresponse(cmdlen);
+ }
+ if (response_code)
+ *response_code = rc;
+ return rlen;
+}
+EXPORT_SYMBOL(__cpcmd);
+
+int cpcmd(const char *cmd, char *response, int rlen, int *response_code)
+{
+ unsigned long flags;
+ char *lowbuf;
+ int len;
+
+ if (is_vmalloc_or_module_addr(response)) {
+ lowbuf = kmalloc(rlen, GFP_KERNEL);
+ if (!lowbuf) {
+ pr_warn("The cpcmd kernel function failed to allocate a response buffer\n");
+ return -ENOMEM;
+ }
+ spin_lock_irqsave(&cpcmd_lock, flags);
+ len = __cpcmd(cmd, lowbuf, rlen, response_code);
+ spin_unlock_irqrestore(&cpcmd_lock, flags);
+ memcpy(response, lowbuf, rlen);
+ kfree(lowbuf);
+ } else {
+ spin_lock_irqsave(&cpcmd_lock, flags);
+ len = __cpcmd(cmd, response, rlen, response_code);
+ spin_unlock_irqrestore(&cpcmd_lock, flags);
+ }
+ return len;
+}
+EXPORT_SYMBOL(cpcmd);
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
new file mode 100644
index 000000000..376f6b6df
--- /dev/null
+++ b/arch/s390/kernel/crash_dump.c
@@ -0,0 +1,711 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * S390 kdump implementation
+ *
+ * Copyright IBM Corp. 2011
+ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#include <linux/crash_dump.h>
+#include <asm/lowcore.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/bootmem.h>
+#include <linux/elf.h>
+#include <asm/asm-offsets.h>
+#include <linux/memblock.h>
+#include <asm/os_info.h>
+#include <asm/elf.h>
+#include <asm/ipl.h>
+#include <asm/sclp.h>
+
+#define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y)))
+#define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y)))
+#define PTR_DIFF(x, y) ((unsigned long)(((char *) (x)) - ((unsigned long) (y))))
+
+static struct memblock_region oldmem_region;
+
+static struct memblock_type oldmem_type = {
+ .cnt = 1,
+ .max = 1,
+ .total_size = 0,
+ .regions = &oldmem_region,
+ .name = "oldmem",
+};
+
+struct save_area {
+ struct list_head list;
+ u64 psw[2];
+ u64 ctrs[16];
+ u64 gprs[16];
+ u32 acrs[16];
+ u64 fprs[16];
+ u32 fpc;
+ u32 prefix;
+ u64 todpreg;
+ u64 timer;
+ u64 todcmp;
+ u64 vxrs_low[16];
+ __vector128 vxrs_high[16];
+};
+
+static LIST_HEAD(dump_save_areas);
+
+/*
+ * Allocate a save area
+ */
+struct save_area * __init save_area_alloc(bool is_boot_cpu)
+{
+ struct save_area *sa;
+
+ sa = (void *) memblock_alloc(sizeof(*sa), 8);
+ if (is_boot_cpu)
+ list_add(&sa->list, &dump_save_areas);
+ else
+ list_add_tail(&sa->list, &dump_save_areas);
+ return sa;
+}
+
+/*
+ * Return the address of the save area for the boot CPU
+ */
+struct save_area * __init save_area_boot_cpu(void)
+{
+ return list_first_entry_or_null(&dump_save_areas, struct save_area, list);
+}
+
+/*
+ * Copy CPU registers into the save area
+ */
+void __init save_area_add_regs(struct save_area *sa, void *regs)
+{
+ struct lowcore *lc;
+
+ lc = (struct lowcore *)(regs - __LC_FPREGS_SAVE_AREA);
+ memcpy(&sa->psw, &lc->psw_save_area, sizeof(sa->psw));
+ memcpy(&sa->ctrs, &lc->cregs_save_area, sizeof(sa->ctrs));
+ memcpy(&sa->gprs, &lc->gpregs_save_area, sizeof(sa->gprs));
+ memcpy(&sa->acrs, &lc->access_regs_save_area, sizeof(sa->acrs));
+ memcpy(&sa->fprs, &lc->floating_pt_save_area, sizeof(sa->fprs));
+ memcpy(&sa->fpc, &lc->fpt_creg_save_area, sizeof(sa->fpc));
+ memcpy(&sa->prefix, &lc->prefixreg_save_area, sizeof(sa->prefix));
+ memcpy(&sa->todpreg, &lc->tod_progreg_save_area, sizeof(sa->todpreg));
+ memcpy(&sa->timer, &lc->cpu_timer_save_area, sizeof(sa->timer));
+ memcpy(&sa->todcmp, &lc->clock_comp_save_area, sizeof(sa->todcmp));
+}
+
+/*
+ * Copy vector registers into the save area
+ */
+void __init save_area_add_vxrs(struct save_area *sa, __vector128 *vxrs)
+{
+ int i;
+
+ /* Copy lower halves of vector registers 0-15 */
+ for (i = 0; i < 16; i++)
+ memcpy(&sa->vxrs_low[i], &vxrs[i].u[2], 8);
+ /* Copy vector registers 16-31 */
+ memcpy(sa->vxrs_high, vxrs + 16, 16 * sizeof(__vector128));
+}
+
+/*
+ * Return physical address for virtual address
+ */
+static inline void *load_real_addr(void *addr)
+{
+ unsigned long real_addr;
+
+ asm volatile(
+ " lra %0,0(%1)\n"
+ " jz 0f\n"
+ " la %0,0\n"
+ "0:"
+ : "=a" (real_addr) : "a" (addr) : "cc");
+ return (void *)real_addr;
+}
+
+/*
+ * Copy memory of the old, dumped system to a kernel space virtual address
+ */
+int copy_oldmem_kernel(void *dst, void *src, size_t count)
+{
+ unsigned long from, len;
+ void *ra;
+ int rc;
+
+ while (count) {
+ from = __pa(src);
+ if (!OLDMEM_BASE && from < sclp.hsa_size) {
+ /* Copy from zfcpdump HSA area */
+ len = min(count, sclp.hsa_size - from);
+ rc = memcpy_hsa_kernel(dst, from, len);
+ if (rc)
+ return rc;
+ } else {
+ /* Check for swapped kdump oldmem areas */
+ if (OLDMEM_BASE && from - OLDMEM_BASE < OLDMEM_SIZE) {
+ from -= OLDMEM_BASE;
+ len = min(count, OLDMEM_SIZE - from);
+ } else if (OLDMEM_BASE && from < OLDMEM_SIZE) {
+ len = min(count, OLDMEM_SIZE - from);
+ from += OLDMEM_BASE;
+ } else {
+ len = count;
+ }
+ if (is_vmalloc_or_module_addr(dst)) {
+ ra = load_real_addr(dst);
+ len = min(PAGE_SIZE - offset_in_page(ra), len);
+ } else {
+ ra = dst;
+ }
+ if (memcpy_real(ra, (void *) from, len))
+ return -EFAULT;
+ }
+ dst += len;
+ src += len;
+ count -= len;
+ }
+ return 0;
+}
+
+/*
+ * Copy memory of the old, dumped system to a user space virtual address
+ */
+static int copy_oldmem_user(void __user *dst, void *src, size_t count)
+{
+ unsigned long from, len;
+ int rc;
+
+ while (count) {
+ from = __pa(src);
+ if (!OLDMEM_BASE && from < sclp.hsa_size) {
+ /* Copy from zfcpdump HSA area */
+ len = min(count, sclp.hsa_size - from);
+ rc = memcpy_hsa_user(dst, from, len);
+ if (rc)
+ return rc;
+ } else {
+ /* Check for swapped kdump oldmem areas */
+ if (OLDMEM_BASE && from - OLDMEM_BASE < OLDMEM_SIZE) {
+ from -= OLDMEM_BASE;
+ len = min(count, OLDMEM_SIZE - from);
+ } else if (OLDMEM_BASE && from < OLDMEM_SIZE) {
+ len = min(count, OLDMEM_SIZE - from);
+ from += OLDMEM_BASE;
+ } else {
+ len = count;
+ }
+ rc = copy_to_user_real(dst, (void *) from, count);
+ if (rc)
+ return rc;
+ }
+ dst += len;
+ src += len;
+ count -= len;
+ }
+ return 0;
+}
+
+/*
+ * Copy one page from "oldmem"
+ */
+ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize,
+ unsigned long offset, int userbuf)
+{
+ void *src;
+ int rc;
+
+ if (!csize)
+ return 0;
+ src = (void *) (pfn << PAGE_SHIFT) + offset;
+ if (userbuf)
+ rc = copy_oldmem_user((void __force __user *) buf, src, csize);
+ else
+ rc = copy_oldmem_kernel((void *) buf, src, csize);
+ return rc;
+}
+
+/*
+ * Remap "oldmem" for kdump
+ *
+ * For the kdump reserved memory this functions performs a swap operation:
+ * [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE]
+ */
+static int remap_oldmem_pfn_range_kdump(struct vm_area_struct *vma,
+ unsigned long from, unsigned long pfn,
+ unsigned long size, pgprot_t prot)
+{
+ unsigned long size_old;
+ int rc;
+
+ if (pfn < OLDMEM_SIZE >> PAGE_SHIFT) {
+ size_old = min(size, OLDMEM_SIZE - (pfn << PAGE_SHIFT));
+ rc = remap_pfn_range(vma, from,
+ pfn + (OLDMEM_BASE >> PAGE_SHIFT),
+ size_old, prot);
+ if (rc || size == size_old)
+ return rc;
+ size -= size_old;
+ from += size_old;
+ pfn += size_old >> PAGE_SHIFT;
+ }
+ return remap_pfn_range(vma, from, pfn, size, prot);
+}
+
+/*
+ * Remap "oldmem" for zfcpdump
+ *
+ * We only map available memory above HSA size. Memory below HSA size
+ * is read on demand using the copy_oldmem_page() function.
+ */
+static int remap_oldmem_pfn_range_zfcpdump(struct vm_area_struct *vma,
+ unsigned long from,
+ unsigned long pfn,
+ unsigned long size, pgprot_t prot)
+{
+ unsigned long hsa_end = sclp.hsa_size;
+ unsigned long size_hsa;
+
+ if (pfn < hsa_end >> PAGE_SHIFT) {
+ size_hsa = min(size, hsa_end - (pfn << PAGE_SHIFT));
+ if (size == size_hsa)
+ return 0;
+ size -= size_hsa;
+ from += size_hsa;
+ pfn += size_hsa >> PAGE_SHIFT;
+ }
+ return remap_pfn_range(vma, from, pfn, size, prot);
+}
+
+/*
+ * Remap "oldmem" for kdump or zfcpdump
+ */
+int remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long from,
+ unsigned long pfn, unsigned long size, pgprot_t prot)
+{
+ if (OLDMEM_BASE)
+ return remap_oldmem_pfn_range_kdump(vma, from, pfn, size, prot);
+ else
+ return remap_oldmem_pfn_range_zfcpdump(vma, from, pfn, size,
+ prot);
+}
+
+static const char *nt_name(Elf64_Word type)
+{
+ const char *name = "LINUX";
+
+ if (type == NT_PRPSINFO || type == NT_PRSTATUS || type == NT_PRFPREG)
+ name = KEXEC_CORE_NOTE_NAME;
+ return name;
+}
+
+/*
+ * Initialize ELF note
+ */
+static void *nt_init_name(void *buf, Elf64_Word type, void *desc, int d_len,
+ const char *name)
+{
+ Elf64_Nhdr *note;
+ u64 len;
+
+ note = (Elf64_Nhdr *)buf;
+ note->n_namesz = strlen(name) + 1;
+ note->n_descsz = d_len;
+ note->n_type = type;
+ len = sizeof(Elf64_Nhdr);
+
+ memcpy(buf + len, name, note->n_namesz);
+ len = roundup(len + note->n_namesz, 4);
+
+ memcpy(buf + len, desc, note->n_descsz);
+ len = roundup(len + note->n_descsz, 4);
+
+ return PTR_ADD(buf, len);
+}
+
+static inline void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len)
+{
+ return nt_init_name(buf, type, desc, d_len, nt_name(type));
+}
+
+/*
+ * Calculate the size of ELF note
+ */
+static size_t nt_size_name(int d_len, const char *name)
+{
+ size_t size;
+
+ size = sizeof(Elf64_Nhdr);
+ size += roundup(strlen(name) + 1, 4);
+ size += roundup(d_len, 4);
+
+ return size;
+}
+
+static inline size_t nt_size(Elf64_Word type, int d_len)
+{
+ return nt_size_name(d_len, nt_name(type));
+}
+
+/*
+ * Fill ELF notes for one CPU with save area registers
+ */
+static void *fill_cpu_elf_notes(void *ptr, int cpu, struct save_area *sa)
+{
+ struct elf_prstatus nt_prstatus;
+ elf_fpregset_t nt_fpregset;
+
+ /* Prepare prstatus note */
+ memset(&nt_prstatus, 0, sizeof(nt_prstatus));
+ memcpy(&nt_prstatus.pr_reg.gprs, sa->gprs, sizeof(sa->gprs));
+ memcpy(&nt_prstatus.pr_reg.psw, sa->psw, sizeof(sa->psw));
+ memcpy(&nt_prstatus.pr_reg.acrs, sa->acrs, sizeof(sa->acrs));
+ nt_prstatus.pr_pid = cpu;
+ /* Prepare fpregset (floating point) note */
+ memset(&nt_fpregset, 0, sizeof(nt_fpregset));
+ memcpy(&nt_fpregset.fpc, &sa->fpc, sizeof(sa->fpc));
+ memcpy(&nt_fpregset.fprs, &sa->fprs, sizeof(sa->fprs));
+ /* Create ELF notes for the CPU */
+ ptr = nt_init(ptr, NT_PRSTATUS, &nt_prstatus, sizeof(nt_prstatus));
+ ptr = nt_init(ptr, NT_PRFPREG, &nt_fpregset, sizeof(nt_fpregset));
+ ptr = nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer));
+ ptr = nt_init(ptr, NT_S390_TODCMP, &sa->todcmp, sizeof(sa->todcmp));
+ ptr = nt_init(ptr, NT_S390_TODPREG, &sa->todpreg, sizeof(sa->todpreg));
+ ptr = nt_init(ptr, NT_S390_CTRS, &sa->ctrs, sizeof(sa->ctrs));
+ ptr = nt_init(ptr, NT_S390_PREFIX, &sa->prefix, sizeof(sa->prefix));
+ if (MACHINE_HAS_VX) {
+ ptr = nt_init(ptr, NT_S390_VXRS_HIGH,
+ &sa->vxrs_high, sizeof(sa->vxrs_high));
+ ptr = nt_init(ptr, NT_S390_VXRS_LOW,
+ &sa->vxrs_low, sizeof(sa->vxrs_low));
+ }
+ return ptr;
+}
+
+/*
+ * Calculate size of ELF notes per cpu
+ */
+static size_t get_cpu_elf_notes_size(void)
+{
+ struct save_area *sa = NULL;
+ size_t size;
+
+ size = nt_size(NT_PRSTATUS, sizeof(struct elf_prstatus));
+ size += nt_size(NT_PRFPREG, sizeof(elf_fpregset_t));
+ size += nt_size(NT_S390_TIMER, sizeof(sa->timer));
+ size += nt_size(NT_S390_TODCMP, sizeof(sa->todcmp));
+ size += nt_size(NT_S390_TODPREG, sizeof(sa->todpreg));
+ size += nt_size(NT_S390_CTRS, sizeof(sa->ctrs));
+ size += nt_size(NT_S390_PREFIX, sizeof(sa->prefix));
+ if (MACHINE_HAS_VX) {
+ size += nt_size(NT_S390_VXRS_HIGH, sizeof(sa->vxrs_high));
+ size += nt_size(NT_S390_VXRS_LOW, sizeof(sa->vxrs_low));
+ }
+
+ return size;
+}
+
+/*
+ * Initialize prpsinfo note (new kernel)
+ */
+static void *nt_prpsinfo(void *ptr)
+{
+ struct elf_prpsinfo prpsinfo;
+
+ memset(&prpsinfo, 0, sizeof(prpsinfo));
+ prpsinfo.pr_sname = 'R';
+ strcpy(prpsinfo.pr_fname, "vmlinux");
+ return nt_init(ptr, NT_PRPSINFO, &prpsinfo, sizeof(prpsinfo));
+}
+
+/*
+ * Get vmcoreinfo using lowcore->vmcore_info (new kernel)
+ */
+static void *get_vmcoreinfo_old(unsigned long *size)
+{
+ char nt_name[11], *vmcoreinfo;
+ Elf64_Nhdr note;
+ void *addr;
+
+ if (copy_oldmem_kernel(&addr, &S390_lowcore.vmcore_info, sizeof(addr)))
+ return NULL;
+ memset(nt_name, 0, sizeof(nt_name));
+ if (copy_oldmem_kernel(&note, addr, sizeof(note)))
+ return NULL;
+ if (copy_oldmem_kernel(nt_name, addr + sizeof(note),
+ sizeof(nt_name) - 1))
+ return NULL;
+ if (strcmp(nt_name, VMCOREINFO_NOTE_NAME) != 0)
+ return NULL;
+ vmcoreinfo = kzalloc(note.n_descsz, GFP_KERNEL);
+ if (!vmcoreinfo)
+ return NULL;
+ if (copy_oldmem_kernel(vmcoreinfo, addr + 24, note.n_descsz)) {
+ kfree(vmcoreinfo);
+ return NULL;
+ }
+ *size = note.n_descsz;
+ return vmcoreinfo;
+}
+
+/*
+ * Initialize vmcoreinfo note (new kernel)
+ */
+static void *nt_vmcoreinfo(void *ptr)
+{
+ const char *name = VMCOREINFO_NOTE_NAME;
+ unsigned long size;
+ void *vmcoreinfo;
+
+ vmcoreinfo = os_info_old_entry(OS_INFO_VMCOREINFO, &size);
+ if (vmcoreinfo)
+ return nt_init_name(ptr, 0, vmcoreinfo, size, name);
+
+ vmcoreinfo = get_vmcoreinfo_old(&size);
+ if (!vmcoreinfo)
+ return ptr;
+ ptr = nt_init_name(ptr, 0, vmcoreinfo, size, name);
+ kfree(vmcoreinfo);
+ return ptr;
+}
+
+static size_t nt_vmcoreinfo_size(void)
+{
+ const char *name = VMCOREINFO_NOTE_NAME;
+ unsigned long size;
+ void *vmcoreinfo;
+
+ vmcoreinfo = os_info_old_entry(OS_INFO_VMCOREINFO, &size);
+ if (vmcoreinfo)
+ return nt_size_name(size, name);
+
+ vmcoreinfo = get_vmcoreinfo_old(&size);
+ if (!vmcoreinfo)
+ return 0;
+
+ kfree(vmcoreinfo);
+ return nt_size_name(size, name);
+}
+
+/*
+ * Initialize final note (needed for /proc/vmcore code)
+ */
+static void *nt_final(void *ptr)
+{
+ Elf64_Nhdr *note;
+
+ note = (Elf64_Nhdr *) ptr;
+ note->n_namesz = 0;
+ note->n_descsz = 0;
+ note->n_type = 0;
+ return PTR_ADD(ptr, sizeof(Elf64_Nhdr));
+}
+
+/*
+ * Initialize ELF header (new kernel)
+ */
+static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt)
+{
+ memset(ehdr, 0, sizeof(*ehdr));
+ memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
+ ehdr->e_ident[EI_CLASS] = ELFCLASS64;
+ ehdr->e_ident[EI_DATA] = ELFDATA2MSB;
+ ehdr->e_ident[EI_VERSION] = EV_CURRENT;
+ memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD);
+ ehdr->e_type = ET_CORE;
+ ehdr->e_machine = EM_S390;
+ ehdr->e_version = EV_CURRENT;
+ ehdr->e_phoff = sizeof(Elf64_Ehdr);
+ ehdr->e_ehsize = sizeof(Elf64_Ehdr);
+ ehdr->e_phentsize = sizeof(Elf64_Phdr);
+ ehdr->e_phnum = mem_chunk_cnt + 1;
+ return ehdr + 1;
+}
+
+/*
+ * Return CPU count for ELF header (new kernel)
+ */
+static int get_cpu_cnt(void)
+{
+ struct save_area *sa;
+ int cpus = 0;
+
+ list_for_each_entry(sa, &dump_save_areas, list)
+ if (sa->prefix != 0)
+ cpus++;
+ return cpus;
+}
+
+/*
+ * Return memory chunk count for ELF header (new kernel)
+ */
+static int get_mem_chunk_cnt(void)
+{
+ int cnt = 0;
+ u64 idx;
+
+ for_each_mem_range(idx, &memblock.physmem, &oldmem_type, NUMA_NO_NODE,
+ MEMBLOCK_NONE, NULL, NULL, NULL)
+ cnt++;
+ return cnt;
+}
+
+/*
+ * Initialize ELF loads (new kernel)
+ */
+static void loads_init(Elf64_Phdr *phdr, u64 loads_offset)
+{
+ phys_addr_t start, end;
+ u64 idx;
+
+ for_each_mem_range(idx, &memblock.physmem, &oldmem_type, NUMA_NO_NODE,
+ MEMBLOCK_NONE, &start, &end, NULL) {
+ phdr->p_filesz = end - start;
+ phdr->p_type = PT_LOAD;
+ phdr->p_offset = start;
+ phdr->p_vaddr = start;
+ phdr->p_paddr = start;
+ phdr->p_memsz = end - start;
+ phdr->p_flags = PF_R | PF_W | PF_X;
+ phdr->p_align = PAGE_SIZE;
+ phdr++;
+ }
+}
+
+/*
+ * Initialize notes (new kernel)
+ */
+static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset)
+{
+ struct save_area *sa;
+ void *ptr_start = ptr;
+ int cpu;
+
+ ptr = nt_prpsinfo(ptr);
+
+ cpu = 1;
+ list_for_each_entry(sa, &dump_save_areas, list)
+ if (sa->prefix != 0)
+ ptr = fill_cpu_elf_notes(ptr, cpu++, sa);
+ ptr = nt_vmcoreinfo(ptr);
+ ptr = nt_final(ptr);
+ memset(phdr, 0, sizeof(*phdr));
+ phdr->p_type = PT_NOTE;
+ phdr->p_offset = notes_offset;
+ phdr->p_filesz = (unsigned long) PTR_SUB(ptr, ptr_start);
+ phdr->p_memsz = phdr->p_filesz;
+ return ptr;
+}
+
+static size_t get_elfcorehdr_size(int mem_chunk_cnt)
+{
+ size_t size;
+
+ size = sizeof(Elf64_Ehdr);
+ /* PT_NOTES */
+ size += sizeof(Elf64_Phdr);
+ /* nt_prpsinfo */
+ size += nt_size(NT_PRPSINFO, sizeof(struct elf_prpsinfo));
+ /* regsets */
+ size += get_cpu_cnt() * get_cpu_elf_notes_size();
+ /* nt_vmcoreinfo */
+ size += nt_vmcoreinfo_size();
+ /* nt_final */
+ size += sizeof(Elf64_Nhdr);
+ /* PT_LOADS */
+ size += mem_chunk_cnt * sizeof(Elf64_Phdr);
+
+ return size;
+}
+
+/*
+ * Create ELF core header (new kernel)
+ */
+int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
+{
+ Elf64_Phdr *phdr_notes, *phdr_loads;
+ int mem_chunk_cnt;
+ void *ptr, *hdr;
+ u32 alloc_size;
+ u64 hdr_off;
+
+ /* If we are not in kdump or zfcpdump mode return */
+ if (!OLDMEM_BASE && ipl_info.type != IPL_TYPE_FCP_DUMP)
+ return 0;
+ /* If we cannot get HSA size for zfcpdump return error */
+ if (ipl_info.type == IPL_TYPE_FCP_DUMP && !sclp.hsa_size)
+ return -ENODEV;
+
+ /* For kdump, exclude previous crashkernel memory */
+ if (OLDMEM_BASE) {
+ oldmem_region.base = OLDMEM_BASE;
+ oldmem_region.size = OLDMEM_SIZE;
+ oldmem_type.total_size = OLDMEM_SIZE;
+ }
+
+ mem_chunk_cnt = get_mem_chunk_cnt();
+
+ alloc_size = get_elfcorehdr_size(mem_chunk_cnt);
+
+ hdr = kzalloc(alloc_size, GFP_KERNEL);
+
+ /* Without elfcorehdr /proc/vmcore cannot be created. Thus creating
+ * a dump with this crash kernel will fail. Panic now to allow other
+ * dump mechanisms to take over.
+ */
+ if (!hdr)
+ panic("s390 kdump allocating elfcorehdr failed");
+
+ /* Init elf header */
+ ptr = ehdr_init(hdr, mem_chunk_cnt);
+ /* Init program headers */
+ phdr_notes = ptr;
+ ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr));
+ phdr_loads = ptr;
+ ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr) * mem_chunk_cnt);
+ /* Init notes */
+ hdr_off = PTR_DIFF(ptr, hdr);
+ ptr = notes_init(phdr_notes, ptr, ((unsigned long) hdr) + hdr_off);
+ /* Init loads */
+ hdr_off = PTR_DIFF(ptr, hdr);
+ loads_init(phdr_loads, hdr_off);
+ *addr = (unsigned long long) hdr;
+ *size = (unsigned long long) hdr_off;
+ BUG_ON(elfcorehdr_size > alloc_size);
+ return 0;
+}
+
+/*
+ * Free ELF core header (new kernel)
+ */
+void elfcorehdr_free(unsigned long long addr)
+{
+ kfree((void *)(unsigned long)addr);
+}
+
+/*
+ * Read from ELF header
+ */
+ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos)
+{
+ void *src = (void *)(unsigned long)*ppos;
+
+ memcpy(buf, src, count);
+ *ppos += count;
+ return count;
+}
+
+/*
+ * Read from ELF notes data
+ */
+ssize_t elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos)
+{
+ void *src = (void *)(unsigned long)*ppos;
+
+ memcpy(buf, src, count);
+ *ppos += count;
+ return count;
+}
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
new file mode 100644
index 000000000..04bbf7e97
--- /dev/null
+++ b/arch/s390/kernel/debug.c
@@ -0,0 +1,1461 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * S/390 debug facility
+ *
+ * Copyright IBM Corp. 1999, 2012
+ *
+ * Author(s): Michael Holzheu (holzheu@de.ibm.com),
+ * Holger Smolinski (Holger.Smolinski@de.ibm.com)
+ *
+ * Bugreports to: <Linux390@de.ibm.com>
+ */
+
+#define KMSG_COMPONENT "s390dbf"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/ctype.h>
+#include <linux/string.h>
+#include <linux/sysctl.h>
+#include <linux/uaccess.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+
+#include <asm/debug.h>
+
+#define DEBUG_PROLOG_ENTRY -1
+
+#define ALL_AREAS 0 /* copy all debug areas */
+#define NO_AREAS 1 /* copy no debug areas */
+
+/* typedefs */
+
+typedef struct file_private_info {
+ loff_t offset; /* offset of last read in file */
+ int act_area; /* number of last formated area */
+ int act_page; /* act page in given area */
+ int act_entry; /* last formated entry (offset */
+ /* relative to beginning of last */
+ /* formated page) */
+ size_t act_entry_offset; /* up to this offset we copied */
+ /* in last read the last formated */
+ /* entry to userland */
+ char temp_buf[2048]; /* buffer for output */
+ debug_info_t *debug_info_org; /* original debug information */
+ debug_info_t *debug_info_snap; /* snapshot of debug information */
+ struct debug_view *view; /* used view of debug info */
+} file_private_info_t;
+
+typedef struct {
+ char *string;
+ /*
+ * This assumes that all args are converted into longs
+ * on L/390 this is the case for all types of parameter
+ * except of floats, and long long (32 bit)
+ *
+ */
+ long args[0];
+} debug_sprintf_entry_t;
+
+/* internal function prototyes */
+
+static int debug_init(void);
+static ssize_t debug_output(struct file *file, char __user *user_buf,
+ size_t user_len, loff_t *offset);
+static ssize_t debug_input(struct file *file, const char __user *user_buf,
+ size_t user_len, loff_t *offset);
+static int debug_open(struct inode *inode, struct file *file);
+static int debug_close(struct inode *inode, struct file *file);
+static debug_info_t *debug_info_create(const char *name, int pages_per_area,
+ int nr_areas, int buf_size, umode_t mode);
+static void debug_info_get(debug_info_t *);
+static void debug_info_put(debug_info_t *);
+static int debug_prolog_level_fn(debug_info_t *id,
+ struct debug_view *view, char *out_buf);
+static int debug_input_level_fn(debug_info_t *id, struct debug_view *view,
+ struct file *file, const char __user *user_buf,
+ size_t user_buf_size, loff_t *offset);
+static int debug_prolog_pages_fn(debug_info_t *id,
+ struct debug_view *view, char *out_buf);
+static int debug_input_pages_fn(debug_info_t *id, struct debug_view *view,
+ struct file *file, const char __user *user_buf,
+ size_t user_buf_size, loff_t *offset);
+static int debug_input_flush_fn(debug_info_t *id, struct debug_view *view,
+ struct file *file, const char __user *user_buf,
+ size_t user_buf_size, loff_t *offset);
+static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view,
+ char *out_buf, const char *in_buf);
+static int debug_raw_format_fn(debug_info_t *id,
+ struct debug_view *view, char *out_buf,
+ const char *in_buf);
+static int debug_raw_header_fn(debug_info_t *id, struct debug_view *view,
+ int area, debug_entry_t *entry, char *out_buf);
+
+static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
+ char *out_buf, debug_sprintf_entry_t *curr_event);
+
+/* globals */
+
+struct debug_view debug_raw_view = {
+ "raw",
+ NULL,
+ &debug_raw_header_fn,
+ &debug_raw_format_fn,
+ NULL,
+ NULL
+};
+EXPORT_SYMBOL(debug_raw_view);
+
+struct debug_view debug_hex_ascii_view = {
+ "hex_ascii",
+ NULL,
+ &debug_dflt_header_fn,
+ &debug_hex_ascii_format_fn,
+ NULL,
+ NULL
+};
+EXPORT_SYMBOL(debug_hex_ascii_view);
+
+static struct debug_view debug_level_view = {
+ "level",
+ &debug_prolog_level_fn,
+ NULL,
+ NULL,
+ &debug_input_level_fn,
+ NULL
+};
+
+static struct debug_view debug_pages_view = {
+ "pages",
+ &debug_prolog_pages_fn,
+ NULL,
+ NULL,
+ &debug_input_pages_fn,
+ NULL
+};
+
+static struct debug_view debug_flush_view = {
+ "flush",
+ NULL,
+ NULL,
+ NULL,
+ &debug_input_flush_fn,
+ NULL
+};
+
+struct debug_view debug_sprintf_view = {
+ "sprintf",
+ NULL,
+ &debug_dflt_header_fn,
+ (debug_format_proc_t *)&debug_sprintf_format_fn,
+ NULL,
+ NULL
+};
+EXPORT_SYMBOL(debug_sprintf_view);
+
+/* used by dump analysis tools to determine version of debug feature */
+static unsigned int __used debug_feature_version = __DEBUG_FEATURE_VERSION;
+
+/* static globals */
+
+static debug_info_t *debug_area_first;
+static debug_info_t *debug_area_last;
+static DEFINE_MUTEX(debug_mutex);
+
+static int initialized;
+static int debug_critical;
+
+static const struct file_operations debug_file_ops = {
+ .owner = THIS_MODULE,
+ .read = debug_output,
+ .write = debug_input,
+ .open = debug_open,
+ .release = debug_close,
+ .llseek = no_llseek,
+};
+
+static struct dentry *debug_debugfs_root_entry;
+
+/* functions */
+
+/*
+ * debug_areas_alloc
+ * - Debug areas are implemented as a threedimensonal array:
+ * areas[areanumber][pagenumber][pageoffset]
+ */
+
+static debug_entry_t ***debug_areas_alloc(int pages_per_area, int nr_areas)
+{
+ debug_entry_t ***areas;
+ int i, j;
+
+ areas = kmalloc_array(nr_areas, sizeof(debug_entry_t **), GFP_KERNEL);
+ if (!areas)
+ goto fail_malloc_areas;
+ for (i = 0; i < nr_areas; i++) {
+ /* GFP_NOWARN to avoid user triggerable WARN, we handle fails */
+ areas[i] = kmalloc_array(pages_per_area,
+ sizeof(debug_entry_t *),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!areas[i])
+ goto fail_malloc_areas2;
+ for (j = 0; j < pages_per_area; j++) {
+ areas[i][j] = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!areas[i][j]) {
+ for (j--; j >= 0 ; j--)
+ kfree(areas[i][j]);
+ kfree(areas[i]);
+ goto fail_malloc_areas2;
+ }
+ }
+ }
+ return areas;
+
+fail_malloc_areas2:
+ for (i--; i >= 0; i--) {
+ for (j = 0; j < pages_per_area; j++)
+ kfree(areas[i][j]);
+ kfree(areas[i]);
+ }
+ kfree(areas);
+fail_malloc_areas:
+ return NULL;
+}
+
+/*
+ * debug_info_alloc
+ * - alloc new debug-info
+ */
+static debug_info_t *debug_info_alloc(const char *name, int pages_per_area,
+ int nr_areas, int buf_size, int level,
+ int mode)
+{
+ debug_info_t *rc;
+
+ /* alloc everything */
+ rc = kmalloc(sizeof(debug_info_t), GFP_KERNEL);
+ if (!rc)
+ goto fail_malloc_rc;
+ rc->active_entries = kcalloc(nr_areas, sizeof(int), GFP_KERNEL);
+ if (!rc->active_entries)
+ goto fail_malloc_active_entries;
+ rc->active_pages = kcalloc(nr_areas, sizeof(int), GFP_KERNEL);
+ if (!rc->active_pages)
+ goto fail_malloc_active_pages;
+ if ((mode == ALL_AREAS) && (pages_per_area != 0)) {
+ rc->areas = debug_areas_alloc(pages_per_area, nr_areas);
+ if (!rc->areas)
+ goto fail_malloc_areas;
+ } else {
+ rc->areas = NULL;
+ }
+
+ /* initialize members */
+ spin_lock_init(&rc->lock);
+ rc->pages_per_area = pages_per_area;
+ rc->nr_areas = nr_areas;
+ rc->active_area = 0;
+ rc->level = level;
+ rc->buf_size = buf_size;
+ rc->entry_size = sizeof(debug_entry_t) + buf_size;
+ strlcpy(rc->name, name, sizeof(rc->name));
+ memset(rc->views, 0, DEBUG_MAX_VIEWS * sizeof(struct debug_view *));
+ memset(rc->debugfs_entries, 0, DEBUG_MAX_VIEWS * sizeof(struct dentry *));
+ refcount_set(&(rc->ref_count), 0);
+
+ return rc;
+
+fail_malloc_areas:
+ kfree(rc->active_pages);
+fail_malloc_active_pages:
+ kfree(rc->active_entries);
+fail_malloc_active_entries:
+ kfree(rc);
+fail_malloc_rc:
+ return NULL;
+}
+
+/*
+ * debug_areas_free
+ * - free all debug areas
+ */
+static void debug_areas_free(debug_info_t *db_info)
+{
+ int i, j;
+
+ if (!db_info->areas)
+ return;
+ for (i = 0; i < db_info->nr_areas; i++) {
+ for (j = 0; j < db_info->pages_per_area; j++)
+ kfree(db_info->areas[i][j]);
+ kfree(db_info->areas[i]);
+ }
+ kfree(db_info->areas);
+ db_info->areas = NULL;
+}
+
+/*
+ * debug_info_free
+ * - free memory debug-info
+ */
+static void debug_info_free(debug_info_t *db_info)
+{
+ debug_areas_free(db_info);
+ kfree(db_info->active_entries);
+ kfree(db_info->active_pages);
+ kfree(db_info);
+}
+
+/*
+ * debug_info_create
+ * - create new debug-info
+ */
+
+static debug_info_t *debug_info_create(const char *name, int pages_per_area,
+ int nr_areas, int buf_size, umode_t mode)
+{
+ debug_info_t *rc;
+
+ rc = debug_info_alloc(name, pages_per_area, nr_areas, buf_size,
+ DEBUG_DEFAULT_LEVEL, ALL_AREAS);
+ if (!rc)
+ goto out;
+
+ rc->mode = mode & ~S_IFMT;
+
+ /* create root directory */
+ rc->debugfs_root_entry = debugfs_create_dir(rc->name,
+ debug_debugfs_root_entry);
+
+ /* append new element to linked list */
+ if (!debug_area_first) {
+ /* first element in list */
+ debug_area_first = rc;
+ rc->prev = NULL;
+ } else {
+ /* append element to end of list */
+ debug_area_last->next = rc;
+ rc->prev = debug_area_last;
+ }
+ debug_area_last = rc;
+ rc->next = NULL;
+
+ refcount_set(&rc->ref_count, 1);
+out:
+ return rc;
+}
+
+/*
+ * debug_info_copy
+ * - copy debug-info
+ */
+static debug_info_t *debug_info_copy(debug_info_t *in, int mode)
+{
+ unsigned long flags;
+ debug_info_t *rc;
+ int i, j;
+
+ /* get a consistent copy of the debug areas */
+ do {
+ rc = debug_info_alloc(in->name, in->pages_per_area,
+ in->nr_areas, in->buf_size, in->level, mode);
+ spin_lock_irqsave(&in->lock, flags);
+ if (!rc)
+ goto out;
+ /* has something changed in the meantime ? */
+ if ((rc->pages_per_area == in->pages_per_area) &&
+ (rc->nr_areas == in->nr_areas)) {
+ break;
+ }
+ spin_unlock_irqrestore(&in->lock, flags);
+ debug_info_free(rc);
+ } while (1);
+
+ if (mode == NO_AREAS)
+ goto out;
+
+ for (i = 0; i < in->nr_areas; i++) {
+ for (j = 0; j < in->pages_per_area; j++)
+ memcpy(rc->areas[i][j], in->areas[i][j], PAGE_SIZE);
+ }
+out:
+ spin_unlock_irqrestore(&in->lock, flags);
+ return rc;
+}
+
+/*
+ * debug_info_get
+ * - increments reference count for debug-info
+ */
+static void debug_info_get(debug_info_t *db_info)
+{
+ if (db_info)
+ refcount_inc(&db_info->ref_count);
+}
+
+/*
+ * debug_info_put:
+ * - decreases reference count for debug-info and frees it if necessary
+ */
+static void debug_info_put(debug_info_t *db_info)
+{
+ int i;
+
+ if (!db_info)
+ return;
+ if (refcount_dec_and_test(&db_info->ref_count)) {
+ for (i = 0; i < DEBUG_MAX_VIEWS; i++) {
+ if (!db_info->views[i])
+ continue;
+ debugfs_remove(db_info->debugfs_entries[i]);
+ }
+ debugfs_remove(db_info->debugfs_root_entry);
+ if (db_info == debug_area_first)
+ debug_area_first = db_info->next;
+ if (db_info == debug_area_last)
+ debug_area_last = db_info->prev;
+ if (db_info->prev)
+ db_info->prev->next = db_info->next;
+ if (db_info->next)
+ db_info->next->prev = db_info->prev;
+ debug_info_free(db_info);
+ }
+}
+
+/*
+ * debug_format_entry:
+ * - format one debug entry and return size of formated data
+ */
+static int debug_format_entry(file_private_info_t *p_info)
+{
+ debug_info_t *id_snap = p_info->debug_info_snap;
+ struct debug_view *view = p_info->view;
+ debug_entry_t *act_entry;
+ size_t len = 0;
+
+ if (p_info->act_entry == DEBUG_PROLOG_ENTRY) {
+ /* print prolog */
+ if (view->prolog_proc)
+ len += view->prolog_proc(id_snap, view, p_info->temp_buf);
+ goto out;
+ }
+ if (!id_snap->areas) /* this is true, if we have a prolog only view */
+ goto out; /* or if 'pages_per_area' is 0 */
+ act_entry = (debug_entry_t *) ((char *)id_snap->areas[p_info->act_area]
+ [p_info->act_page] + p_info->act_entry);
+
+ if (act_entry->id.stck == 0LL)
+ goto out; /* empty entry */
+ if (view->header_proc)
+ len += view->header_proc(id_snap, view, p_info->act_area,
+ act_entry, p_info->temp_buf + len);
+ if (view->format_proc)
+ len += view->format_proc(id_snap, view, p_info->temp_buf + len,
+ DEBUG_DATA(act_entry));
+out:
+ return len;
+}
+
+/*
+ * debug_next_entry:
+ * - goto next entry in p_info
+ */
+static inline int debug_next_entry(file_private_info_t *p_info)
+{
+ debug_info_t *id;
+
+ id = p_info->debug_info_snap;
+ if (p_info->act_entry == DEBUG_PROLOG_ENTRY) {
+ p_info->act_entry = 0;
+ p_info->act_page = 0;
+ goto out;
+ }
+ if (!id->areas)
+ return 1;
+ p_info->act_entry += id->entry_size;
+ /* switch to next page, if we reached the end of the page */
+ if (p_info->act_entry > (PAGE_SIZE - id->entry_size)) {
+ /* next page */
+ p_info->act_entry = 0;
+ p_info->act_page += 1;
+ if ((p_info->act_page % id->pages_per_area) == 0) {
+ /* next area */
+ p_info->act_area++;
+ p_info->act_page = 0;
+ }
+ if (p_info->act_area >= id->nr_areas)
+ return 1;
+ }
+out:
+ return 0;
+}
+
+/*
+ * debug_output:
+ * - called for user read()
+ * - copies formated debug entries to the user buffer
+ */
+static ssize_t debug_output(struct file *file, /* file descriptor */
+ char __user *user_buf, /* user buffer */
+ size_t len, /* length of buffer */
+ loff_t *offset) /* offset in the file */
+{
+ size_t count = 0;
+ size_t entry_offset;
+ file_private_info_t *p_info;
+
+ p_info = (file_private_info_t *) file->private_data;
+ if (*offset != p_info->offset)
+ return -EPIPE;
+ if (p_info->act_area >= p_info->debug_info_snap->nr_areas)
+ return 0;
+ entry_offset = p_info->act_entry_offset;
+ while (count < len) {
+ int formatted_line_residue;
+ int formatted_line_size;
+ int user_buf_residue;
+ size_t copy_size;
+
+ formatted_line_size = debug_format_entry(p_info);
+ formatted_line_residue = formatted_line_size - entry_offset;
+ user_buf_residue = len-count;
+ copy_size = min(user_buf_residue, formatted_line_residue);
+ if (copy_size) {
+ if (copy_to_user(user_buf + count, p_info->temp_buf
+ + entry_offset, copy_size))
+ return -EFAULT;
+ count += copy_size;
+ entry_offset += copy_size;
+ }
+ if (copy_size == formatted_line_residue) {
+ entry_offset = 0;
+ if (debug_next_entry(p_info))
+ goto out;
+ }
+ }
+out:
+ p_info->offset = *offset + count;
+ p_info->act_entry_offset = entry_offset;
+ *offset = p_info->offset;
+ return count;
+}
+
+/*
+ * debug_input:
+ * - called for user write()
+ * - calls input function of view
+ */
+static ssize_t debug_input(struct file *file, const char __user *user_buf,
+ size_t length, loff_t *offset)
+{
+ file_private_info_t *p_info;
+ int rc = 0;
+
+ mutex_lock(&debug_mutex);
+ p_info = ((file_private_info_t *) file->private_data);
+ if (p_info->view->input_proc) {
+ rc = p_info->view->input_proc(p_info->debug_info_org,
+ p_info->view, file, user_buf,
+ length, offset);
+ } else {
+ rc = -EPERM;
+ }
+ mutex_unlock(&debug_mutex);
+ return rc; /* number of input characters */
+}
+
+/*
+ * debug_open:
+ * - called for user open()
+ * - copies formated output to private_data area of the file
+ * handle
+ */
+static int debug_open(struct inode *inode, struct file *file)
+{
+ debug_info_t *debug_info, *debug_info_snapshot;
+ file_private_info_t *p_info;
+ int i, rc = 0;
+
+ mutex_lock(&debug_mutex);
+ debug_info = file_inode(file)->i_private;
+ /* find debug view */
+ for (i = 0; i < DEBUG_MAX_VIEWS; i++) {
+ if (!debug_info->views[i])
+ continue;
+ else if (debug_info->debugfs_entries[i] == file->f_path.dentry)
+ goto found; /* found view ! */
+ }
+ /* no entry found */
+ rc = -EINVAL;
+ goto out;
+
+found:
+
+ /* Make snapshot of current debug areas to get it consistent. */
+ /* To copy all the areas is only needed, if we have a view which */
+ /* formats the debug areas. */
+
+ if (!debug_info->views[i]->format_proc && !debug_info->views[i]->header_proc)
+ debug_info_snapshot = debug_info_copy(debug_info, NO_AREAS);
+ else
+ debug_info_snapshot = debug_info_copy(debug_info, ALL_AREAS);
+
+ if (!debug_info_snapshot) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ p_info = kmalloc(sizeof(file_private_info_t), GFP_KERNEL);
+ if (!p_info) {
+ debug_info_free(debug_info_snapshot);
+ rc = -ENOMEM;
+ goto out;
+ }
+ p_info->offset = 0;
+ p_info->debug_info_snap = debug_info_snapshot;
+ p_info->debug_info_org = debug_info;
+ p_info->view = debug_info->views[i];
+ p_info->act_area = 0;
+ p_info->act_page = 0;
+ p_info->act_entry = DEBUG_PROLOG_ENTRY;
+ p_info->act_entry_offset = 0;
+ file->private_data = p_info;
+ debug_info_get(debug_info);
+ nonseekable_open(inode, file);
+out:
+ mutex_unlock(&debug_mutex);
+ return rc;
+}
+
+/*
+ * debug_close:
+ * - called for user close()
+ * - deletes private_data area of the file handle
+ */
+static int debug_close(struct inode *inode, struct file *file)
+{
+ file_private_info_t *p_info;
+
+ p_info = (file_private_info_t *) file->private_data;
+ if (p_info->debug_info_snap)
+ debug_info_free(p_info->debug_info_snap);
+ debug_info_put(p_info->debug_info_org);
+ kfree(file->private_data);
+ return 0; /* success */
+}
+
+/*
+ * debug_register_mode:
+ * - Creates and initializes debug area for the caller
+ * The mode parameter allows to specify access rights for the s390dbf files
+ * - Returns handle for debug area
+ */
+debug_info_t *debug_register_mode(const char *name, int pages_per_area,
+ int nr_areas, int buf_size, umode_t mode,
+ uid_t uid, gid_t gid)
+{
+ debug_info_t *rc = NULL;
+
+ /* Since debugfs currently does not support uid/gid other than root, */
+ /* we do not allow gid/uid != 0 until we get support for that. */
+ if ((uid != 0) || (gid != 0))
+ pr_warn("Root becomes the owner of all s390dbf files in sysfs\n");
+ BUG_ON(!initialized);
+ mutex_lock(&debug_mutex);
+
+ /* create new debug_info */
+ rc = debug_info_create(name, pages_per_area, nr_areas, buf_size, mode);
+ if (!rc)
+ goto out;
+ debug_register_view(rc, &debug_level_view);
+ debug_register_view(rc, &debug_flush_view);
+ debug_register_view(rc, &debug_pages_view);
+out:
+ if (!rc)
+ pr_err("Registering debug feature %s failed\n", name);
+ mutex_unlock(&debug_mutex);
+ return rc;
+}
+EXPORT_SYMBOL(debug_register_mode);
+
+/*
+ * debug_register:
+ * - creates and initializes debug area for the caller
+ * - returns handle for debug area
+ */
+debug_info_t *debug_register(const char *name, int pages_per_area,
+ int nr_areas, int buf_size)
+{
+ return debug_register_mode(name, pages_per_area, nr_areas, buf_size,
+ S_IRUSR | S_IWUSR, 0, 0);
+}
+EXPORT_SYMBOL(debug_register);
+
+/*
+ * debug_unregister:
+ * - give back debug area
+ */
+void debug_unregister(debug_info_t *id)
+{
+ if (!id)
+ return;
+ mutex_lock(&debug_mutex);
+ debug_info_put(id);
+ mutex_unlock(&debug_mutex);
+}
+EXPORT_SYMBOL(debug_unregister);
+
+/*
+ * debug_set_size:
+ * - set area size (number of pages) and number of areas
+ */
+static int debug_set_size(debug_info_t *id, int nr_areas, int pages_per_area)
+{
+ debug_entry_t ***new_areas;
+ unsigned long flags;
+ int rc = 0;
+
+ if (!id || (nr_areas <= 0) || (pages_per_area < 0))
+ return -EINVAL;
+ if (pages_per_area > 0) {
+ new_areas = debug_areas_alloc(pages_per_area, nr_areas);
+ if (!new_areas) {
+ pr_info("Allocating memory for %i pages failed\n",
+ pages_per_area);
+ rc = -ENOMEM;
+ goto out;
+ }
+ } else {
+ new_areas = NULL;
+ }
+ spin_lock_irqsave(&id->lock, flags);
+ debug_areas_free(id);
+ id->areas = new_areas;
+ id->nr_areas = nr_areas;
+ id->pages_per_area = pages_per_area;
+ id->active_area = 0;
+ memset(id->active_entries, 0, sizeof(int)*id->nr_areas);
+ memset(id->active_pages, 0, sizeof(int)*id->nr_areas);
+ spin_unlock_irqrestore(&id->lock, flags);
+ pr_info("%s: set new size (%i pages)\n", id->name, pages_per_area);
+out:
+ return rc;
+}
+
+/*
+ * debug_set_level:
+ * - set actual debug level
+ */
+void debug_set_level(debug_info_t *id, int new_level)
+{
+ unsigned long flags;
+
+ if (!id)
+ return;
+ spin_lock_irqsave(&id->lock, flags);
+ if (new_level == DEBUG_OFF_LEVEL) {
+ id->level = DEBUG_OFF_LEVEL;
+ pr_info("%s: switched off\n", id->name);
+ } else if ((new_level > DEBUG_MAX_LEVEL) || (new_level < 0)) {
+ pr_info("%s: level %i is out of range (%i - %i)\n",
+ id->name, new_level, 0, DEBUG_MAX_LEVEL);
+ } else {
+ id->level = new_level;
+ }
+ spin_unlock_irqrestore(&id->lock, flags);
+}
+EXPORT_SYMBOL(debug_set_level);
+
+/*
+ * proceed_active_entry:
+ * - set active entry to next in the ring buffer
+ */
+static inline void proceed_active_entry(debug_info_t *id)
+{
+ if ((id->active_entries[id->active_area] += id->entry_size)
+ > (PAGE_SIZE - id->entry_size)) {
+ id->active_entries[id->active_area] = 0;
+ id->active_pages[id->active_area] =
+ (id->active_pages[id->active_area] + 1) %
+ id->pages_per_area;
+ }
+}
+
+/*
+ * proceed_active_area:
+ * - set active area to next in the ring buffer
+ */
+static inline void proceed_active_area(debug_info_t *id)
+{
+ id->active_area++;
+ id->active_area = id->active_area % id->nr_areas;
+}
+
+/*
+ * get_active_entry:
+ */
+static inline debug_entry_t *get_active_entry(debug_info_t *id)
+{
+ return (debug_entry_t *) (((char *) id->areas[id->active_area]
+ [id->active_pages[id->active_area]]) +
+ id->active_entries[id->active_area]);
+}
+
+/*
+ * debug_finish_entry:
+ * - set timestamp, caller address, cpu number etc.
+ */
+
+static inline void debug_finish_entry(debug_info_t *id, debug_entry_t *active,
+ int level, int exception)
+{
+ active->id.stck = get_tod_clock_fast() -
+ *(unsigned long long *) &tod_clock_base[1];
+ active->id.fields.cpuid = smp_processor_id();
+ active->caller = __builtin_return_address(0);
+ active->id.fields.exception = exception;
+ active->id.fields.level = level;
+ proceed_active_entry(id);
+ if (exception)
+ proceed_active_area(id);
+}
+
+static int debug_stoppable = 1;
+static int debug_active = 1;
+
+#define CTL_S390DBF_STOPPABLE 5678
+#define CTL_S390DBF_ACTIVE 5679
+
+/*
+ * proc handler for the running debug_active sysctl
+ * always allow read, allow write only if debug_stoppable is set or
+ * if debug_active is already off
+ */
+static int s390dbf_procactive(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ if (!write || debug_stoppable || !debug_active)
+ return proc_dointvec(table, write, buffer, lenp, ppos);
+ else
+ return 0;
+}
+
+static struct ctl_table s390dbf_table[] = {
+ {
+ .procname = "debug_stoppable",
+ .data = &debug_stoppable,
+ .maxlen = sizeof(int),
+ .mode = S_IRUGO | S_IWUSR,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "debug_active",
+ .data = &debug_active,
+ .maxlen = sizeof(int),
+ .mode = S_IRUGO | S_IWUSR,
+ .proc_handler = s390dbf_procactive,
+ },
+ { }
+};
+
+static struct ctl_table s390dbf_dir_table[] = {
+ {
+ .procname = "s390dbf",
+ .maxlen = 0,
+ .mode = S_IRUGO | S_IXUGO,
+ .child = s390dbf_table,
+ },
+ { }
+};
+
+static struct ctl_table_header *s390dbf_sysctl_header;
+
+void debug_stop_all(void)
+{
+ if (debug_stoppable)
+ debug_active = 0;
+}
+EXPORT_SYMBOL(debug_stop_all);
+
+void debug_set_critical(void)
+{
+ debug_critical = 1;
+}
+
+/*
+ * debug_event_common:
+ * - write debug entry with given size
+ */
+debug_entry_t *debug_event_common(debug_info_t *id, int level, const void *buf,
+ int len)
+{
+ debug_entry_t *active;
+ unsigned long flags;
+
+ if (!debug_active || !id->areas)
+ return NULL;
+ if (debug_critical) {
+ if (!spin_trylock_irqsave(&id->lock, flags))
+ return NULL;
+ } else {
+ spin_lock_irqsave(&id->lock, flags);
+ }
+ do {
+ active = get_active_entry(id);
+ memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size));
+ if (len < id->buf_size)
+ memset((DEBUG_DATA(active)) + len, 0, id->buf_size - len);
+ debug_finish_entry(id, active, level, 0);
+ len -= id->buf_size;
+ buf += id->buf_size;
+ } while (len > 0);
+
+ spin_unlock_irqrestore(&id->lock, flags);
+ return active;
+}
+EXPORT_SYMBOL(debug_event_common);
+
+/*
+ * debug_exception_common:
+ * - write debug entry with given size and switch to next debug area
+ */
+debug_entry_t *debug_exception_common(debug_info_t *id, int level,
+ const void *buf, int len)
+{
+ debug_entry_t *active;
+ unsigned long flags;
+
+ if (!debug_active || !id->areas)
+ return NULL;
+ if (debug_critical) {
+ if (!spin_trylock_irqsave(&id->lock, flags))
+ return NULL;
+ } else {
+ spin_lock_irqsave(&id->lock, flags);
+ }
+ do {
+ active = get_active_entry(id);
+ memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size));
+ if (len < id->buf_size)
+ memset((DEBUG_DATA(active)) + len, 0, id->buf_size - len);
+ debug_finish_entry(id, active, level, len <= id->buf_size);
+ len -= id->buf_size;
+ buf += id->buf_size;
+ } while (len > 0);
+
+ spin_unlock_irqrestore(&id->lock, flags);
+ return active;
+}
+EXPORT_SYMBOL(debug_exception_common);
+
+/*
+ * counts arguments in format string for sprintf view
+ */
+static inline int debug_count_numargs(char *string)
+{
+ int numargs = 0;
+
+ while (*string) {
+ if (*string++ == '%')
+ numargs++;
+ }
+ return numargs;
+}
+
+/*
+ * debug_sprintf_event:
+ */
+debug_entry_t *__debug_sprintf_event(debug_info_t *id, int level, char *string, ...)
+{
+ debug_sprintf_entry_t *curr_event;
+ debug_entry_t *active;
+ unsigned long flags;
+ int numargs, idx;
+ va_list ap;
+
+ if (!debug_active || !id->areas)
+ return NULL;
+ numargs = debug_count_numargs(string);
+
+ if (debug_critical) {
+ if (!spin_trylock_irqsave(&id->lock, flags))
+ return NULL;
+ } else {
+ spin_lock_irqsave(&id->lock, flags);
+ }
+ active = get_active_entry(id);
+ curr_event = (debug_sprintf_entry_t *) DEBUG_DATA(active);
+ va_start(ap, string);
+ curr_event->string = string;
+ for (idx = 0; idx < min(numargs, (int)(id->buf_size / sizeof(long)) - 1); idx++)
+ curr_event->args[idx] = va_arg(ap, long);
+ va_end(ap);
+ debug_finish_entry(id, active, level, 0);
+ spin_unlock_irqrestore(&id->lock, flags);
+
+ return active;
+}
+EXPORT_SYMBOL(__debug_sprintf_event);
+
+/*
+ * debug_sprintf_exception:
+ */
+debug_entry_t *__debug_sprintf_exception(debug_info_t *id, int level, char *string, ...)
+{
+ debug_sprintf_entry_t *curr_event;
+ debug_entry_t *active;
+ unsigned long flags;
+ int numargs, idx;
+ va_list ap;
+
+ if (!debug_active || !id->areas)
+ return NULL;
+
+ numargs = debug_count_numargs(string);
+
+ if (debug_critical) {
+ if (!spin_trylock_irqsave(&id->lock, flags))
+ return NULL;
+ } else {
+ spin_lock_irqsave(&id->lock, flags);
+ }
+ active = get_active_entry(id);
+ curr_event = (debug_sprintf_entry_t *)DEBUG_DATA(active);
+ va_start(ap, string);
+ curr_event->string = string;
+ for (idx = 0; idx < min(numargs, (int)(id->buf_size / sizeof(long)) - 1); idx++)
+ curr_event->args[idx] = va_arg(ap, long);
+ va_end(ap);
+ debug_finish_entry(id, active, level, 1);
+ spin_unlock_irqrestore(&id->lock, flags);
+
+ return active;
+}
+EXPORT_SYMBOL(__debug_sprintf_exception);
+
+/*
+ * debug_register_view:
+ */
+int debug_register_view(debug_info_t *id, struct debug_view *view)
+{
+ unsigned long flags;
+ struct dentry *pde;
+ umode_t mode;
+ int rc = 0;
+ int i;
+
+ if (!id)
+ goto out;
+ mode = (id->mode | S_IFREG) & ~S_IXUGO;
+ if (!(view->prolog_proc || view->format_proc || view->header_proc))
+ mode &= ~(S_IRUSR | S_IRGRP | S_IROTH);
+ if (!view->input_proc)
+ mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH);
+ pde = debugfs_create_file(view->name, mode, id->debugfs_root_entry,
+ id, &debug_file_ops);
+ if (!pde) {
+ pr_err("Registering view %s/%s failed due to out of "
+ "memory\n", id->name, view->name);
+ rc = -1;
+ goto out;
+ }
+ spin_lock_irqsave(&id->lock, flags);
+ for (i = 0; i < DEBUG_MAX_VIEWS; i++) {
+ if (!id->views[i])
+ break;
+ }
+ if (i == DEBUG_MAX_VIEWS) {
+ pr_err("Registering view %s/%s would exceed the maximum "
+ "number of views %i\n", id->name, view->name, i);
+ rc = -1;
+ } else {
+ id->views[i] = view;
+ id->debugfs_entries[i] = pde;
+ }
+ spin_unlock_irqrestore(&id->lock, flags);
+ if (rc)
+ debugfs_remove(pde);
+out:
+ return rc;
+}
+EXPORT_SYMBOL(debug_register_view);
+
+/*
+ * debug_unregister_view:
+ */
+int debug_unregister_view(debug_info_t *id, struct debug_view *view)
+{
+ struct dentry *dentry = NULL;
+ unsigned long flags;
+ int i, rc = 0;
+
+ if (!id)
+ goto out;
+ spin_lock_irqsave(&id->lock, flags);
+ for (i = 0; i < DEBUG_MAX_VIEWS; i++) {
+ if (id->views[i] == view)
+ break;
+ }
+ if (i == DEBUG_MAX_VIEWS) {
+ rc = -1;
+ } else {
+ dentry = id->debugfs_entries[i];
+ id->views[i] = NULL;
+ id->debugfs_entries[i] = NULL;
+ }
+ spin_unlock_irqrestore(&id->lock, flags);
+ debugfs_remove(dentry);
+out:
+ return rc;
+}
+EXPORT_SYMBOL(debug_unregister_view);
+
+static inline char *debug_get_user_string(const char __user *user_buf,
+ size_t user_len)
+{
+ char *buffer;
+
+ buffer = kmalloc(user_len + 1, GFP_KERNEL);
+ if (!buffer)
+ return ERR_PTR(-ENOMEM);
+ if (copy_from_user(buffer, user_buf, user_len) != 0) {
+ kfree(buffer);
+ return ERR_PTR(-EFAULT);
+ }
+ /* got the string, now strip linefeed. */
+ if (buffer[user_len - 1] == '\n')
+ buffer[user_len - 1] = 0;
+ else
+ buffer[user_len] = 0;
+ return buffer;
+}
+
+static inline int debug_get_uint(char *buf)
+{
+ int rc;
+
+ buf = skip_spaces(buf);
+ rc = simple_strtoul(buf, &buf, 10);
+ if (*buf)
+ rc = -EINVAL;
+ return rc;
+}
+
+/*
+ * functions for debug-views
+ ***********************************
+*/
+
+/*
+ * prints out actual debug level
+ */
+
+static int debug_prolog_pages_fn(debug_info_t *id, struct debug_view *view,
+ char *out_buf)
+{
+ return sprintf(out_buf, "%i\n", id->pages_per_area);
+}
+
+/*
+ * reads new size (number of pages per debug area)
+ */
+
+static int debug_input_pages_fn(debug_info_t *id, struct debug_view *view,
+ struct file *file, const char __user *user_buf,
+ size_t user_len, loff_t *offset)
+{
+ int rc, new_pages;
+ char *str;
+
+ if (user_len > 0x10000)
+ user_len = 0x10000;
+ if (*offset != 0) {
+ rc = -EPIPE;
+ goto out;
+ }
+ str = debug_get_user_string(user_buf, user_len);
+ if (IS_ERR(str)) {
+ rc = PTR_ERR(str);
+ goto out;
+ }
+ new_pages = debug_get_uint(str);
+ if (new_pages < 0) {
+ rc = -EINVAL;
+ goto free_str;
+ }
+ rc = debug_set_size(id, id->nr_areas, new_pages);
+ if (rc != 0) {
+ rc = -EINVAL;
+ goto free_str;
+ }
+ rc = user_len;
+free_str:
+ kfree(str);
+out:
+ *offset += user_len;
+ return rc; /* number of input characters */
+}
+
+/*
+ * prints out actual debug level
+ */
+static int debug_prolog_level_fn(debug_info_t *id, struct debug_view *view,
+ char *out_buf)
+{
+ int rc = 0;
+
+ if (id->level == DEBUG_OFF_LEVEL)
+ rc = sprintf(out_buf, "-\n");
+ else
+ rc = sprintf(out_buf, "%i\n", id->level);
+ return rc;
+}
+
+/*
+ * reads new debug level
+ */
+static int debug_input_level_fn(debug_info_t *id, struct debug_view *view,
+ struct file *file, const char __user *user_buf,
+ size_t user_len, loff_t *offset)
+{
+ int rc, new_level;
+ char *str;
+
+ if (user_len > 0x10000)
+ user_len = 0x10000;
+ if (*offset != 0) {
+ rc = -EPIPE;
+ goto out;
+ }
+ str = debug_get_user_string(user_buf, user_len);
+ if (IS_ERR(str)) {
+ rc = PTR_ERR(str);
+ goto out;
+ }
+ if (str[0] == '-') {
+ debug_set_level(id, DEBUG_OFF_LEVEL);
+ rc = user_len;
+ goto free_str;
+ } else {
+ new_level = debug_get_uint(str);
+ }
+ if (new_level < 0) {
+ pr_warn("%s is not a valid level for a debug feature\n", str);
+ rc = -EINVAL;
+ } else {
+ debug_set_level(id, new_level);
+ rc = user_len;
+ }
+free_str:
+ kfree(str);
+out:
+ *offset += user_len;
+ return rc; /* number of input characters */
+}
+
+/*
+ * flushes debug areas
+ */
+static void debug_flush(debug_info_t *id, int area)
+{
+ unsigned long flags;
+ int i, j;
+
+ if (!id || !id->areas)
+ return;
+ spin_lock_irqsave(&id->lock, flags);
+ if (area == DEBUG_FLUSH_ALL) {
+ id->active_area = 0;
+ memset(id->active_entries, 0, id->nr_areas * sizeof(int));
+ for (i = 0; i < id->nr_areas; i++) {
+ id->active_pages[i] = 0;
+ for (j = 0; j < id->pages_per_area; j++)
+ memset(id->areas[i][j], 0, PAGE_SIZE);
+ }
+ } else if (area >= 0 && area < id->nr_areas) {
+ id->active_entries[area] = 0;
+ id->active_pages[area] = 0;
+ for (i = 0; i < id->pages_per_area; i++)
+ memset(id->areas[area][i], 0, PAGE_SIZE);
+ }
+ spin_unlock_irqrestore(&id->lock, flags);
+}
+
+/*
+ * view function: flushes debug areas
+ */
+static int debug_input_flush_fn(debug_info_t *id, struct debug_view *view,
+ struct file *file, const char __user *user_buf,
+ size_t user_len, loff_t *offset)
+{
+ char input_buf[1];
+ int rc = user_len;
+
+ if (user_len > 0x10000)
+ user_len = 0x10000;
+ if (*offset != 0) {
+ rc = -EPIPE;
+ goto out;
+ }
+ if (copy_from_user(input_buf, user_buf, 1)) {
+ rc = -EFAULT;
+ goto out;
+ }
+ if (input_buf[0] == '-') {
+ debug_flush(id, DEBUG_FLUSH_ALL);
+ goto out;
+ }
+ if (isdigit(input_buf[0])) {
+ int area = ((int) input_buf[0] - (int) '0');
+
+ debug_flush(id, area);
+ goto out;
+ }
+
+ pr_info("Flushing debug data failed because %c is not a valid "
+ "area\n", input_buf[0]);
+
+out:
+ *offset += user_len;
+ return rc; /* number of input characters */
+}
+
+/*
+ * prints debug header in raw format
+ */
+static int debug_raw_header_fn(debug_info_t *id, struct debug_view *view,
+ int area, debug_entry_t *entry, char *out_buf)
+{
+ int rc;
+
+ rc = sizeof(debug_entry_t);
+ memcpy(out_buf, entry, sizeof(debug_entry_t));
+ return rc;
+}
+
+/*
+ * prints debug data in raw format
+ */
+static int debug_raw_format_fn(debug_info_t *id, struct debug_view *view,
+ char *out_buf, const char *in_buf)
+{
+ int rc;
+
+ rc = id->buf_size;
+ memcpy(out_buf, in_buf, id->buf_size);
+ return rc;
+}
+
+/*
+ * prints debug data in hex/ascii format
+ */
+static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view,
+ char *out_buf, const char *in_buf)
+{
+ int i, rc = 0;
+
+ for (i = 0; i < id->buf_size; i++)
+ rc += sprintf(out_buf + rc, "%02x ", ((unsigned char *) in_buf)[i]);
+ rc += sprintf(out_buf + rc, "| ");
+ for (i = 0; i < id->buf_size; i++) {
+ unsigned char c = in_buf[i];
+
+ if (isascii(c) && isprint(c))
+ rc += sprintf(out_buf + rc, "%c", c);
+ else
+ rc += sprintf(out_buf + rc, ".");
+ }
+ rc += sprintf(out_buf + rc, "\n");
+ return rc;
+}
+
+/*
+ * prints header for debug entry
+ */
+int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view,
+ int area, debug_entry_t *entry, char *out_buf)
+{
+ unsigned long base, sec, usec;
+ unsigned long caller;
+ unsigned int level;
+ char *except_str;
+ int rc = 0;
+
+ level = entry->id.fields.level;
+ base = (*(unsigned long *) &tod_clock_base[0]) >> 4;
+ sec = (entry->id.stck >> 12) + base - (TOD_UNIX_EPOCH >> 12);
+ usec = do_div(sec, USEC_PER_SEC);
+
+ if (entry->id.fields.exception)
+ except_str = "*";
+ else
+ except_str = "-";
+ caller = (unsigned long) entry->caller;
+ rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %02i %pK ",
+ area, sec, usec, level, except_str,
+ entry->id.fields.cpuid, (void *)caller);
+ return rc;
+}
+EXPORT_SYMBOL(debug_dflt_header_fn);
+
+/*
+ * prints debug data sprintf-formated:
+ * debug_sprinf_event/exception calls must be used together with this view
+ */
+
+#define DEBUG_SPRINTF_MAX_ARGS 10
+
+static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
+ char *out_buf, debug_sprintf_entry_t *curr_event)
+{
+ int num_longs, num_used_args = 0, i, rc = 0;
+ int index[DEBUG_SPRINTF_MAX_ARGS];
+
+ /* count of longs fit into one entry */
+ num_longs = id->buf_size / sizeof(long);
+
+ if (num_longs < 1)
+ goto out; /* bufsize of entry too small */
+ if (num_longs == 1) {
+ /* no args, we use only the string */
+ strcpy(out_buf, curr_event->string);
+ rc = strlen(curr_event->string);
+ goto out;
+ }
+
+ /* number of arguments used for sprintf (without the format string) */
+ num_used_args = min(DEBUG_SPRINTF_MAX_ARGS, (num_longs - 1));
+
+ memset(index, 0, DEBUG_SPRINTF_MAX_ARGS * sizeof(int));
+
+ for (i = 0; i < num_used_args; i++)
+ index[i] = i;
+
+ rc = sprintf(out_buf, curr_event->string, curr_event->args[index[0]],
+ curr_event->args[index[1]], curr_event->args[index[2]],
+ curr_event->args[index[3]], curr_event->args[index[4]],
+ curr_event->args[index[5]], curr_event->args[index[6]],
+ curr_event->args[index[7]], curr_event->args[index[8]],
+ curr_event->args[index[9]]);
+out:
+ return rc;
+}
+
+/*
+ * debug_init:
+ * - is called exactly once to initialize the debug feature
+ */
+static int __init debug_init(void)
+{
+ s390dbf_sysctl_header = register_sysctl_table(s390dbf_dir_table);
+ mutex_lock(&debug_mutex);
+ debug_debugfs_root_entry = debugfs_create_dir(DEBUG_DIR_ROOT, NULL);
+ initialized = 1;
+ mutex_unlock(&debug_mutex);
+ return 0;
+}
+postcore_initcall(debug_init);
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
new file mode 100644
index 000000000..4c7cf8787
--- /dev/null
+++ b/arch/s390/kernel/diag.c
@@ -0,0 +1,268 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Implementation of s390 diagnose codes
+ *
+ * Copyright IBM Corp. 2007
+ * Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <asm/diag.h>
+#include <asm/trace/diag.h>
+
+struct diag_stat {
+ unsigned int counter[NR_DIAG_STAT];
+};
+
+static DEFINE_PER_CPU(struct diag_stat, diag_stat);
+
+struct diag_desc {
+ int code;
+ char *name;
+};
+
+static const struct diag_desc diag_map[NR_DIAG_STAT] = {
+ [DIAG_STAT_X008] = { .code = 0x008, .name = "Console Function" },
+ [DIAG_STAT_X00C] = { .code = 0x00c, .name = "Pseudo Timer" },
+ [DIAG_STAT_X010] = { .code = 0x010, .name = "Release Pages" },
+ [DIAG_STAT_X014] = { .code = 0x014, .name = "Spool File Services" },
+ [DIAG_STAT_X044] = { .code = 0x044, .name = "Voluntary Timeslice End" },
+ [DIAG_STAT_X064] = { .code = 0x064, .name = "NSS Manipulation" },
+ [DIAG_STAT_X09C] = { .code = 0x09c, .name = "Relinquish Timeslice" },
+ [DIAG_STAT_X0DC] = { .code = 0x0dc, .name = "Appldata Control" },
+ [DIAG_STAT_X204] = { .code = 0x204, .name = "Logical-CPU Utilization" },
+ [DIAG_STAT_X210] = { .code = 0x210, .name = "Device Information" },
+ [DIAG_STAT_X224] = { .code = 0x224, .name = "EBCDIC-Name Table" },
+ [DIAG_STAT_X250] = { .code = 0x250, .name = "Block I/O" },
+ [DIAG_STAT_X258] = { .code = 0x258, .name = "Page-Reference Services" },
+ [DIAG_STAT_X26C] = { .code = 0x26c, .name = "Certain System Information" },
+ [DIAG_STAT_X288] = { .code = 0x288, .name = "Time Bomb" },
+ [DIAG_STAT_X2C4] = { .code = 0x2c4, .name = "FTP Services" },
+ [DIAG_STAT_X2FC] = { .code = 0x2fc, .name = "Guest Performance Data" },
+ [DIAG_STAT_X304] = { .code = 0x304, .name = "Partition-Resource Service" },
+ [DIAG_STAT_X308] = { .code = 0x308, .name = "List-Directed IPL" },
+ [DIAG_STAT_X500] = { .code = 0x500, .name = "Virtio Service" },
+};
+
+static int show_diag_stat(struct seq_file *m, void *v)
+{
+ struct diag_stat *stat;
+ unsigned long n = (unsigned long) v - 1;
+ int cpu, prec, tmp;
+
+ get_online_cpus();
+ if (n == 0) {
+ seq_puts(m, " ");
+
+ for_each_online_cpu(cpu) {
+ prec = 10;
+ for (tmp = 10; cpu >= tmp; tmp *= 10)
+ prec--;
+ seq_printf(m, "%*s%d", prec, "CPU", cpu);
+ }
+ seq_putc(m, '\n');
+ } else if (n <= NR_DIAG_STAT) {
+ seq_printf(m, "diag %03x:", diag_map[n-1].code);
+ for_each_online_cpu(cpu) {
+ stat = &per_cpu(diag_stat, cpu);
+ seq_printf(m, " %10u", stat->counter[n-1]);
+ }
+ seq_printf(m, " %s\n", diag_map[n-1].name);
+ }
+ put_online_cpus();
+ return 0;
+}
+
+static void *show_diag_stat_start(struct seq_file *m, loff_t *pos)
+{
+ return *pos <= NR_DIAG_STAT ? (void *)((unsigned long) *pos + 1) : NULL;
+}
+
+static void *show_diag_stat_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ ++*pos;
+ return show_diag_stat_start(m, pos);
+}
+
+static void show_diag_stat_stop(struct seq_file *m, void *v)
+{
+}
+
+static const struct seq_operations show_diag_stat_sops = {
+ .start = show_diag_stat_start,
+ .next = show_diag_stat_next,
+ .stop = show_diag_stat_stop,
+ .show = show_diag_stat,
+};
+
+static int show_diag_stat_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &show_diag_stat_sops);
+}
+
+static const struct file_operations show_diag_stat_fops = {
+ .open = show_diag_stat_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+
+static int __init show_diag_stat_init(void)
+{
+ debugfs_create_file("diag_stat", 0400, NULL, NULL,
+ &show_diag_stat_fops);
+ return 0;
+}
+
+device_initcall(show_diag_stat_init);
+
+void diag_stat_inc(enum diag_stat_enum nr)
+{
+ this_cpu_inc(diag_stat.counter[nr]);
+ trace_s390_diagnose(diag_map[nr].code);
+}
+EXPORT_SYMBOL(diag_stat_inc);
+
+void notrace diag_stat_inc_norecursion(enum diag_stat_enum nr)
+{
+ this_cpu_inc(diag_stat.counter[nr]);
+ trace_s390_diagnose_norecursion(diag_map[nr].code);
+}
+EXPORT_SYMBOL(diag_stat_inc_norecursion);
+
+/*
+ * Diagnose 14: Input spool file manipulation
+ */
+static inline int __diag14(unsigned long rx, unsigned long ry1,
+ unsigned long subcode)
+{
+ register unsigned long _ry1 asm("2") = ry1;
+ register unsigned long _ry2 asm("3") = subcode;
+ int rc = 0;
+
+ asm volatile(
+ " sam31\n"
+ " diag %2,2,0x14\n"
+ " sam64\n"
+ " ipm %0\n"
+ " srl %0,28\n"
+ : "=d" (rc), "+d" (_ry2)
+ : "d" (rx), "d" (_ry1)
+ : "cc");
+
+ return rc;
+}
+
+int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode)
+{
+ diag_stat_inc(DIAG_STAT_X014);
+ return __diag14(rx, ry1, subcode);
+}
+EXPORT_SYMBOL(diag14);
+
+static inline int __diag204(unsigned long *subcode, unsigned long size, void *addr)
+{
+ register unsigned long _subcode asm("0") = *subcode;
+ register unsigned long _size asm("1") = size;
+
+ asm volatile(
+ " diag %2,%0,0x204\n"
+ "0: nopr %%r7\n"
+ EX_TABLE(0b,0b)
+ : "+d" (_subcode), "+d" (_size) : "d" (addr) : "memory");
+ *subcode = _subcode;
+ return _size;
+}
+
+int diag204(unsigned long subcode, unsigned long size, void *addr)
+{
+ diag_stat_inc(DIAG_STAT_X204);
+ size = __diag204(&subcode, size, addr);
+ if (subcode)
+ return -1;
+ return size;
+}
+EXPORT_SYMBOL(diag204);
+
+/*
+ * Diagnose 210: Get information about a virtual device
+ */
+int diag210(struct diag210 *addr)
+{
+ /*
+ * diag 210 needs its data below the 2GB border, so we
+ * use a static data area to be sure
+ */
+ static struct diag210 diag210_tmp;
+ static DEFINE_SPINLOCK(diag210_lock);
+ unsigned long flags;
+ int ccode;
+
+ spin_lock_irqsave(&diag210_lock, flags);
+ diag210_tmp = *addr;
+
+ diag_stat_inc(DIAG_STAT_X210);
+ asm volatile(
+ " lhi %0,-1\n"
+ " sam31\n"
+ " diag %1,0,0x210\n"
+ "0: ipm %0\n"
+ " srl %0,28\n"
+ "1: sam64\n"
+ EX_TABLE(0b, 1b)
+ : "=&d" (ccode) : "a" (&diag210_tmp) : "cc", "memory");
+
+ *addr = diag210_tmp;
+ spin_unlock_irqrestore(&diag210_lock, flags);
+
+ return ccode;
+}
+EXPORT_SYMBOL(diag210);
+
+int diag224(void *ptr)
+{
+ int rc = -EOPNOTSUPP;
+
+ diag_stat_inc(DIAG_STAT_X224);
+ asm volatile(
+ " diag %1,%2,0x224\n"
+ "0: lhi %0,0x0\n"
+ "1:\n"
+ EX_TABLE(0b,1b)
+ : "+d" (rc) :"d" (0), "d" (ptr) : "memory");
+ return rc;
+}
+EXPORT_SYMBOL(diag224);
+
+/*
+ * Diagnose 26C: Access Certain System Information
+ */
+static inline int __diag26c(void *req, void *resp, enum diag26c_sc subcode)
+{
+ register unsigned long _req asm("2") = (addr_t) req;
+ register unsigned long _resp asm("3") = (addr_t) resp;
+ register unsigned long _subcode asm("4") = subcode;
+ register unsigned long _rc asm("5") = -EOPNOTSUPP;
+
+ asm volatile(
+ " sam31\n"
+ " diag %[rx],%[ry],0x26c\n"
+ "0: sam64\n"
+ EX_TABLE(0b,0b)
+ : "+d" (_rc)
+ : [rx] "d" (_req), "d" (_resp), [ry] "d" (_subcode)
+ : "cc", "memory");
+ return _rc;
+}
+
+int diag26c(void *req, void *resp, enum diag26c_sc subcode)
+{
+ diag_stat_inc(DIAG_STAT_X26C);
+ return __diag26c(req, resp, subcode);
+}
+EXPORT_SYMBOL(diag26c);
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
new file mode 100644
index 000000000..01307db4b
--- /dev/null
+++ b/arch/s390/kernel/dis.c
@@ -0,0 +1,582 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Disassemble s390 instructions.
+ *
+ * Copyright IBM Corp. 2007
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/kallsyms.h>
+#include <linux/reboot.h>
+#include <linux/kprobes.h>
+#include <linux/kdebug.h>
+#include <linux/uaccess.h>
+#include <linux/atomic.h>
+#include <asm/dis.h>
+#include <asm/io.h>
+#include <asm/cpcmd.h>
+#include <asm/lowcore.h>
+#include <asm/debug.h>
+#include <asm/irq.h>
+
+/* Type of operand */
+#define OPERAND_GPR 0x1 /* Operand printed as %rx */
+#define OPERAND_FPR 0x2 /* Operand printed as %fx */
+#define OPERAND_AR 0x4 /* Operand printed as %ax */
+#define OPERAND_CR 0x8 /* Operand printed as %cx */
+#define OPERAND_VR 0x10 /* Operand printed as %vx */
+#define OPERAND_DISP 0x20 /* Operand printed as displacement */
+#define OPERAND_BASE 0x40 /* Operand printed as base register */
+#define OPERAND_INDEX 0x80 /* Operand printed as index register */
+#define OPERAND_PCREL 0x100 /* Operand printed as pc-relative symbol */
+#define OPERAND_SIGNED 0x200 /* Operand printed as signed value */
+#define OPERAND_LENGTH 0x400 /* Operand printed as length (+1) */
+
+struct s390_operand {
+ unsigned char bits; /* The number of bits in the operand. */
+ unsigned char shift; /* The number of bits to shift. */
+ unsigned short flags; /* One bit syntax flags. */
+};
+
+struct s390_insn {
+ union {
+ const char name[5];
+ struct {
+ unsigned char zero;
+ unsigned int offset;
+ } __packed;
+ };
+ unsigned char opfrag;
+ unsigned char format;
+};
+
+struct s390_opcode_offset {
+ unsigned char opcode;
+ unsigned char mask;
+ unsigned char byte;
+ unsigned short offset;
+ unsigned short count;
+} __packed;
+
+enum {
+ UNUSED,
+ A_8, /* Access reg. starting at position 8 */
+ A_12, /* Access reg. starting at position 12 */
+ A_24, /* Access reg. starting at position 24 */
+ A_28, /* Access reg. starting at position 28 */
+ B_16, /* Base register starting at position 16 */
+ B_32, /* Base register starting at position 32 */
+ C_8, /* Control reg. starting at position 8 */
+ C_12, /* Control reg. starting at position 12 */
+ D20_20, /* 20 bit displacement starting at 20 */
+ D_20, /* Displacement starting at position 20 */
+ D_36, /* Displacement starting at position 36 */
+ F_8, /* FPR starting at position 8 */
+ F_12, /* FPR starting at position 12 */
+ F_16, /* FPR starting at position 16 */
+ F_24, /* FPR starting at position 24 */
+ F_28, /* FPR starting at position 28 */
+ F_32, /* FPR starting at position 32 */
+ I8_8, /* 8 bit signed value starting at 8 */
+ I8_32, /* 8 bit signed value starting at 32 */
+ I16_16, /* 16 bit signed value starting at 16 */
+ I16_32, /* 16 bit signed value starting at 32 */
+ I32_16, /* 32 bit signed value starting at 16 */
+ J12_12, /* 12 bit PC relative offset at 12 */
+ J16_16, /* 16 bit PC relative offset at 16 */
+ J16_32, /* 16 bit PC relative offset at 32 */
+ J24_24, /* 24 bit PC relative offset at 24 */
+ J32_16, /* 32 bit PC relative offset at 16 */
+ L4_8, /* 4 bit length starting at position 8 */
+ L4_12, /* 4 bit length starting at position 12 */
+ L8_8, /* 8 bit length starting at position 8 */
+ R_8, /* GPR starting at position 8 */
+ R_12, /* GPR starting at position 12 */
+ R_16, /* GPR starting at position 16 */
+ R_24, /* GPR starting at position 24 */
+ R_28, /* GPR starting at position 28 */
+ U4_8, /* 4 bit unsigned value starting at 8 */
+ U4_12, /* 4 bit unsigned value starting at 12 */
+ U4_16, /* 4 bit unsigned value starting at 16 */
+ U4_20, /* 4 bit unsigned value starting at 20 */
+ U4_24, /* 4 bit unsigned value starting at 24 */
+ U4_28, /* 4 bit unsigned value starting at 28 */
+ U4_32, /* 4 bit unsigned value starting at 32 */
+ U4_36, /* 4 bit unsigned value starting at 36 */
+ U8_8, /* 8 bit unsigned value starting at 8 */
+ U8_16, /* 8 bit unsigned value starting at 16 */
+ U8_24, /* 8 bit unsigned value starting at 24 */
+ U8_28, /* 8 bit unsigned value starting at 28 */
+ U8_32, /* 8 bit unsigned value starting at 32 */
+ U12_16, /* 12 bit unsigned value starting at 16 */
+ U16_16, /* 16 bit unsigned value starting at 16 */
+ U16_32, /* 16 bit unsigned value starting at 32 */
+ U32_16, /* 32 bit unsigned value starting at 16 */
+ VX_12, /* Vector index register starting at position 12 */
+ V_8, /* Vector reg. starting at position 8 */
+ V_12, /* Vector reg. starting at position 12 */
+ V_16, /* Vector reg. starting at position 16 */
+ V_32, /* Vector reg. starting at position 32 */
+ X_12, /* Index register starting at position 12 */
+};
+
+static const struct s390_operand operands[] = {
+ [UNUSED] = { 0, 0, 0 },
+ [A_8] = { 4, 8, OPERAND_AR },
+ [A_12] = { 4, 12, OPERAND_AR },
+ [A_24] = { 4, 24, OPERAND_AR },
+ [A_28] = { 4, 28, OPERAND_AR },
+ [B_16] = { 4, 16, OPERAND_BASE | OPERAND_GPR },
+ [B_32] = { 4, 32, OPERAND_BASE | OPERAND_GPR },
+ [C_8] = { 4, 8, OPERAND_CR },
+ [C_12] = { 4, 12, OPERAND_CR },
+ [D20_20] = { 20, 20, OPERAND_DISP | OPERAND_SIGNED },
+ [D_20] = { 12, 20, OPERAND_DISP },
+ [D_36] = { 12, 36, OPERAND_DISP },
+ [F_8] = { 4, 8, OPERAND_FPR },
+ [F_12] = { 4, 12, OPERAND_FPR },
+ [F_16] = { 4, 16, OPERAND_FPR },
+ [F_24] = { 4, 24, OPERAND_FPR },
+ [F_28] = { 4, 28, OPERAND_FPR },
+ [F_32] = { 4, 32, OPERAND_FPR },
+ [I8_8] = { 8, 8, OPERAND_SIGNED },
+ [I8_32] = { 8, 32, OPERAND_SIGNED },
+ [I16_16] = { 16, 16, OPERAND_SIGNED },
+ [I16_32] = { 16, 32, OPERAND_SIGNED },
+ [I32_16] = { 32, 16, OPERAND_SIGNED },
+ [J12_12] = { 12, 12, OPERAND_PCREL },
+ [J16_16] = { 16, 16, OPERAND_PCREL },
+ [J16_32] = { 16, 32, OPERAND_PCREL },
+ [J24_24] = { 24, 24, OPERAND_PCREL },
+ [J32_16] = { 32, 16, OPERAND_PCREL },
+ [L4_8] = { 4, 8, OPERAND_LENGTH },
+ [L4_12] = { 4, 12, OPERAND_LENGTH },
+ [L8_8] = { 8, 8, OPERAND_LENGTH },
+ [R_8] = { 4, 8, OPERAND_GPR },
+ [R_12] = { 4, 12, OPERAND_GPR },
+ [R_16] = { 4, 16, OPERAND_GPR },
+ [R_24] = { 4, 24, OPERAND_GPR },
+ [R_28] = { 4, 28, OPERAND_GPR },
+ [U4_8] = { 4, 8, 0 },
+ [U4_12] = { 4, 12, 0 },
+ [U4_16] = { 4, 16, 0 },
+ [U4_20] = { 4, 20, 0 },
+ [U4_24] = { 4, 24, 0 },
+ [U4_28] = { 4, 28, 0 },
+ [U4_32] = { 4, 32, 0 },
+ [U4_36] = { 4, 36, 0 },
+ [U8_8] = { 8, 8, 0 },
+ [U8_16] = { 8, 16, 0 },
+ [U8_24] = { 8, 24, 0 },
+ [U8_28] = { 8, 28, 0 },
+ [U8_32] = { 8, 32, 0 },
+ [U12_16] = { 12, 16, 0 },
+ [U16_16] = { 16, 16, 0 },
+ [U16_32] = { 16, 32, 0 },
+ [U32_16] = { 32, 16, 0 },
+ [VX_12] = { 4, 12, OPERAND_INDEX | OPERAND_VR },
+ [V_8] = { 4, 8, OPERAND_VR },
+ [V_12] = { 4, 12, OPERAND_VR },
+ [V_16] = { 4, 16, OPERAND_VR },
+ [V_32] = { 4, 32, OPERAND_VR },
+ [X_12] = { 4, 12, OPERAND_INDEX | OPERAND_GPR },
+};
+
+static const unsigned char formats[][6] = {
+ [INSTR_E] = { 0, 0, 0, 0, 0, 0 },
+ [INSTR_IE_UU] = { U4_24, U4_28, 0, 0, 0, 0 },
+ [INSTR_MII_UPP] = { U4_8, J12_12, J24_24 },
+ [INSTR_RIE_R0IU] = { R_8, I16_16, U4_32, 0, 0, 0 },
+ [INSTR_RIE_R0UU] = { R_8, U16_16, U4_32, 0, 0, 0 },
+ [INSTR_RIE_RRI0] = { R_8, R_12, I16_16, 0, 0, 0 },
+ [INSTR_RIE_RRP] = { R_8, R_12, J16_16, 0, 0, 0 },
+ [INSTR_RIE_RRPU] = { R_8, R_12, U4_32, J16_16, 0, 0 },
+ [INSTR_RIE_RRUUU] = { R_8, R_12, U8_16, U8_24, U8_32, 0 },
+ [INSTR_RIE_RUI0] = { R_8, I16_16, U4_12, 0, 0, 0 },
+ [INSTR_RIE_RUPI] = { R_8, I8_32, U4_12, J16_16, 0, 0 },
+ [INSTR_RIE_RUPU] = { R_8, U8_32, U4_12, J16_16, 0, 0 },
+ [INSTR_RIL_RI] = { R_8, I32_16, 0, 0, 0, 0 },
+ [INSTR_RIL_RP] = { R_8, J32_16, 0, 0, 0, 0 },
+ [INSTR_RIL_RU] = { R_8, U32_16, 0, 0, 0, 0 },
+ [INSTR_RIL_UP] = { U4_8, J32_16, 0, 0, 0, 0 },
+ [INSTR_RIS_RURDI] = { R_8, I8_32, U4_12, D_20, B_16, 0 },
+ [INSTR_RIS_RURDU] = { R_8, U8_32, U4_12, D_20, B_16, 0 },
+ [INSTR_RI_RI] = { R_8, I16_16, 0, 0, 0, 0 },
+ [INSTR_RI_RP] = { R_8, J16_16, 0, 0, 0, 0 },
+ [INSTR_RI_RU] = { R_8, U16_16, 0, 0, 0, 0 },
+ [INSTR_RI_UP] = { U4_8, J16_16, 0, 0, 0, 0 },
+ [INSTR_RRE_00] = { 0, 0, 0, 0, 0, 0 },
+ [INSTR_RRE_AA] = { A_24, A_28, 0, 0, 0, 0 },
+ [INSTR_RRE_AR] = { A_24, R_28, 0, 0, 0, 0 },
+ [INSTR_RRE_F0] = { F_24, 0, 0, 0, 0, 0 },
+ [INSTR_RRE_FF] = { F_24, F_28, 0, 0, 0, 0 },
+ [INSTR_RRE_FR] = { F_24, R_28, 0, 0, 0, 0 },
+ [INSTR_RRE_R0] = { R_24, 0, 0, 0, 0, 0 },
+ [INSTR_RRE_RA] = { R_24, A_28, 0, 0, 0, 0 },
+ [INSTR_RRE_RF] = { R_24, F_28, 0, 0, 0, 0 },
+ [INSTR_RRE_RR] = { R_24, R_28, 0, 0, 0, 0 },
+ [INSTR_RRF_0UFF] = { F_24, F_28, U4_20, 0, 0, 0 },
+ [INSTR_RRF_0URF] = { R_24, F_28, U4_20, 0, 0, 0 },
+ [INSTR_RRF_F0FF] = { F_16, F_24, F_28, 0, 0, 0 },
+ [INSTR_RRF_F0FF2] = { F_24, F_16, F_28, 0, 0, 0 },
+ [INSTR_RRF_F0FR] = { F_24, F_16, R_28, 0, 0, 0 },
+ [INSTR_RRF_FFRU] = { F_24, F_16, R_28, U4_20, 0, 0 },
+ [INSTR_RRF_FUFF] = { F_24, F_16, F_28, U4_20, 0, 0 },
+ [INSTR_RRF_FUFF2] = { F_24, F_28, F_16, U4_20, 0, 0 },
+ [INSTR_RRF_R0RR] = { R_24, R_16, R_28, 0, 0, 0 },
+ [INSTR_RRF_R0RR2] = { R_24, R_28, R_16, 0, 0, 0 },
+ [INSTR_RRF_RURR] = { R_24, R_28, R_16, U4_20, 0, 0 },
+ [INSTR_RRF_RURR2] = { R_24, R_16, R_28, U4_20, 0, 0 },
+ [INSTR_RRF_U0FF] = { F_24, U4_16, F_28, 0, 0, 0 },
+ [INSTR_RRF_U0RF] = { R_24, U4_16, F_28, 0, 0, 0 },
+ [INSTR_RRF_U0RR] = { R_24, R_28, U4_16, 0, 0, 0 },
+ [INSTR_RRF_UUFF] = { F_24, U4_16, F_28, U4_20, 0, 0 },
+ [INSTR_RRF_UUFR] = { F_24, U4_16, R_28, U4_20, 0, 0 },
+ [INSTR_RRF_UURF] = { R_24, U4_16, F_28, U4_20, 0, 0 },
+ [INSTR_RRS_RRRDU] = { R_8, R_12, U4_32, D_20, B_16 },
+ [INSTR_RR_FF] = { F_8, F_12, 0, 0, 0, 0 },
+ [INSTR_RR_R0] = { R_8, 0, 0, 0, 0, 0 },
+ [INSTR_RR_RR] = { R_8, R_12, 0, 0, 0, 0 },
+ [INSTR_RR_U0] = { U8_8, 0, 0, 0, 0, 0 },
+ [INSTR_RR_UR] = { U4_8, R_12, 0, 0, 0, 0 },
+ [INSTR_RSI_RRP] = { R_8, R_12, J16_16, 0, 0, 0 },
+ [INSTR_RSL_LRDFU] = { F_32, D_20, L8_8, B_16, U4_36, 0 },
+ [INSTR_RSL_R0RD] = { D_20, L4_8, B_16, 0, 0, 0 },
+ [INSTR_RSY_AARD] = { A_8, A_12, D20_20, B_16, 0, 0 },
+ [INSTR_RSY_CCRD] = { C_8, C_12, D20_20, B_16, 0, 0 },
+ [INSTR_RSY_RDRU] = { R_8, D20_20, B_16, U4_12, 0, 0 },
+ [INSTR_RSY_RRRD] = { R_8, R_12, D20_20, B_16, 0, 0 },
+ [INSTR_RSY_RURD] = { R_8, U4_12, D20_20, B_16, 0, 0 },
+ [INSTR_RSY_RURD2] = { R_8, D20_20, B_16, U4_12, 0, 0 },
+ [INSTR_RS_AARD] = { A_8, A_12, D_20, B_16, 0, 0 },
+ [INSTR_RS_CCRD] = { C_8, C_12, D_20, B_16, 0, 0 },
+ [INSTR_RS_R0RD] = { R_8, D_20, B_16, 0, 0, 0 },
+ [INSTR_RS_RRRD] = { R_8, R_12, D_20, B_16, 0, 0 },
+ [INSTR_RS_RURD] = { R_8, U4_12, D_20, B_16, 0, 0 },
+ [INSTR_RXE_FRRD] = { F_8, D_20, X_12, B_16, 0, 0 },
+ [INSTR_RXE_RRRDU] = { R_8, D_20, X_12, B_16, U4_32, 0 },
+ [INSTR_RXF_FRRDF] = { F_32, F_8, D_20, X_12, B_16, 0 },
+ [INSTR_RXY_FRRD] = { F_8, D20_20, X_12, B_16, 0, 0 },
+ [INSTR_RXY_RRRD] = { R_8, D20_20, X_12, B_16, 0, 0 },
+ [INSTR_RXY_URRD] = { U4_8, D20_20, X_12, B_16, 0, 0 },
+ [INSTR_RX_FRRD] = { F_8, D_20, X_12, B_16, 0, 0 },
+ [INSTR_RX_RRRD] = { R_8, D_20, X_12, B_16, 0, 0 },
+ [INSTR_RX_URRD] = { U4_8, D_20, X_12, B_16, 0, 0 },
+ [INSTR_SIL_RDI] = { D_20, B_16, I16_32, 0, 0, 0 },
+ [INSTR_SIL_RDU] = { D_20, B_16, U16_32, 0, 0, 0 },
+ [INSTR_SIY_IRD] = { D20_20, B_16, I8_8, 0, 0, 0 },
+ [INSTR_SIY_URD] = { D20_20, B_16, U8_8, 0, 0, 0 },
+ [INSTR_SI_RD] = { D_20, B_16, 0, 0, 0, 0 },
+ [INSTR_SI_URD] = { D_20, B_16, U8_8, 0, 0, 0 },
+ [INSTR_SMI_U0RDP] = { U4_8, J16_32, D_20, B_16, 0, 0 },
+ [INSTR_SSE_RDRD] = { D_20, B_16, D_36, B_32, 0, 0 },
+ [INSTR_SSF_RRDRD] = { D_20, B_16, D_36, B_32, R_8, 0 },
+ [INSTR_SSF_RRDRD2] = { R_8, D_20, B_16, D_36, B_32, 0 },
+ [INSTR_SS_L0RDRD] = { D_20, L8_8, B_16, D_36, B_32, 0 },
+ [INSTR_SS_L2RDRD] = { D_20, B_16, D_36, L8_8, B_32, 0 },
+ [INSTR_SS_LIRDRD] = { D_20, L4_8, B_16, D_36, B_32, U4_12 },
+ [INSTR_SS_LLRDRD] = { D_20, L4_8, B_16, D_36, L4_12, B_32 },
+ [INSTR_SS_RRRDRD] = { D_20, R_8, B_16, D_36, B_32, R_12 },
+ [INSTR_SS_RRRDRD2] = { R_8, D_20, B_16, R_12, D_36, B_32 },
+ [INSTR_SS_RRRDRD3] = { R_8, R_12, D_20, B_16, D_36, B_32 },
+ [INSTR_S_00] = { 0, 0, 0, 0, 0, 0 },
+ [INSTR_S_RD] = { D_20, B_16, 0, 0, 0, 0 },
+ [INSTR_VRI_V0IU] = { V_8, I16_16, U4_32, 0, 0, 0 },
+ [INSTR_VRI_V0U] = { V_8, U16_16, 0, 0, 0, 0 },
+ [INSTR_VRI_V0UU2] = { V_8, U16_16, U4_32, 0, 0, 0 },
+ [INSTR_VRI_V0UUU] = { V_8, U8_16, U8_24, U4_32, 0, 0 },
+ [INSTR_VRI_VR0UU] = { V_8, R_12, U8_28, U4_24, 0, 0 },
+ [INSTR_VRI_VVUU] = { V_8, V_12, U16_16, U4_32, 0, 0 },
+ [INSTR_VRI_VVUUU] = { V_8, V_12, U12_16, U4_32, U4_28, 0 },
+ [INSTR_VRI_VVUUU2] = { V_8, V_12, U8_28, U8_16, U4_24, 0 },
+ [INSTR_VRI_VVV0U] = { V_8, V_12, V_16, U8_24, 0, 0 },
+ [INSTR_VRI_VVV0UU] = { V_8, V_12, V_16, U8_24, U4_32, 0 },
+ [INSTR_VRI_VVV0UU2] = { V_8, V_12, V_16, U8_28, U4_24, 0 },
+ [INSTR_VRR_0V] = { V_12, 0, 0, 0, 0, 0 },
+ [INSTR_VRR_0VV0U] = { V_12, V_16, U4_24, 0, 0, 0 },
+ [INSTR_VRR_RV0U] = { R_8, V_12, U4_24, 0, 0, 0 },
+ [INSTR_VRR_VRR] = { V_8, R_12, R_16, 0, 0, 0 },
+ [INSTR_VRR_VV] = { V_8, V_12, 0, 0, 0, 0 },
+ [INSTR_VRR_VV0U] = { V_8, V_12, U4_32, 0, 0, 0 },
+ [INSTR_VRR_VV0U0U] = { V_8, V_12, U4_32, U4_24, 0, 0 },
+ [INSTR_VRR_VV0UU2] = { V_8, V_12, U4_32, U4_28, 0, 0 },
+ [INSTR_VRR_VV0UUU] = { V_8, V_12, U4_32, U4_28, U4_24, 0 },
+ [INSTR_VRR_VVV] = { V_8, V_12, V_16, 0, 0, 0 },
+ [INSTR_VRR_VVV0U] = { V_8, V_12, V_16, U4_32, 0, 0 },
+ [INSTR_VRR_VVV0U0U] = { V_8, V_12, V_16, U4_32, U4_24, 0 },
+ [INSTR_VRR_VVV0UU] = { V_8, V_12, V_16, U4_32, U4_28, 0 },
+ [INSTR_VRR_VVV0UUU] = { V_8, V_12, V_16, U4_32, U4_28, U4_24 },
+ [INSTR_VRR_VVV0V] = { V_8, V_12, V_16, V_32, 0, 0 },
+ [INSTR_VRR_VVVU0UV] = { V_8, V_12, V_16, V_32, U4_28, U4_20 },
+ [INSTR_VRR_VVVU0V] = { V_8, V_12, V_16, V_32, U4_20, 0 },
+ [INSTR_VRR_VVVUU0V] = { V_8, V_12, V_16, V_32, U4_20, U4_24 },
+ [INSTR_VRS_RRDV] = { V_32, R_12, D_20, B_16, 0, 0 },
+ [INSTR_VRS_RVRDU] = { R_8, V_12, D_20, B_16, U4_32, 0 },
+ [INSTR_VRS_VRRD] = { V_8, R_12, D_20, B_16, 0, 0 },
+ [INSTR_VRS_VRRDU] = { V_8, R_12, D_20, B_16, U4_32, 0 },
+ [INSTR_VRS_VVRD] = { V_8, V_12, D_20, B_16, 0, 0 },
+ [INSTR_VRS_VVRDU] = { V_8, V_12, D_20, B_16, U4_32, 0 },
+ [INSTR_VRV_VVXRDU] = { V_8, D_20, VX_12, B_16, U4_32, 0 },
+ [INSTR_VRX_VRRD] = { V_8, D_20, X_12, B_16, 0, 0 },
+ [INSTR_VRX_VRRDU] = { V_8, D_20, X_12, B_16, U4_32, 0 },
+ [INSTR_VRX_VV] = { V_8, V_12, 0, 0, 0, 0 },
+ [INSTR_VSI_URDV] = { V_32, D_20, B_16, U8_8, 0, 0 },
+};
+
+static char long_insn_name[][7] = LONG_INSN_INITIALIZER;
+static struct s390_insn opcode[] = OPCODE_TABLE_INITIALIZER;
+static struct s390_opcode_offset opcode_offset[] = OPCODE_OFFSET_INITIALIZER;
+
+/* Extracts an operand value from an instruction. */
+static unsigned int extract_operand(unsigned char *code,
+ const struct s390_operand *operand)
+{
+ unsigned char *cp;
+ unsigned int val;
+ int bits;
+
+ /* Extract fragments of the operand byte for byte. */
+ cp = code + operand->shift / 8;
+ bits = (operand->shift & 7) + operand->bits;
+ val = 0;
+ do {
+ val <<= 8;
+ val |= (unsigned int) *cp++;
+ bits -= 8;
+ } while (bits > 0);
+ val >>= -bits;
+ val &= ((1U << (operand->bits - 1)) << 1) - 1;
+
+ /* Check for special long displacement case. */
+ if (operand->bits == 20 && operand->shift == 20)
+ val = (val & 0xff) << 12 | (val & 0xfff00) >> 8;
+
+ /* Check for register extensions bits for vector registers. */
+ if (operand->flags & OPERAND_VR) {
+ if (operand->shift == 8)
+ val |= (code[4] & 8) << 1;
+ else if (operand->shift == 12)
+ val |= (code[4] & 4) << 2;
+ else if (operand->shift == 16)
+ val |= (code[4] & 2) << 3;
+ else if (operand->shift == 32)
+ val |= (code[4] & 1) << 4;
+ }
+
+ /* Sign extend value if the operand is signed or pc relative. */
+ if ((operand->flags & (OPERAND_SIGNED | OPERAND_PCREL)) &&
+ (val & (1U << (operand->bits - 1))))
+ val |= (-1U << (operand->bits - 1)) << 1;
+
+ /* Double value if the operand is pc relative. */
+ if (operand->flags & OPERAND_PCREL)
+ val <<= 1;
+
+ /* Length x in an instructions has real length x + 1. */
+ if (operand->flags & OPERAND_LENGTH)
+ val++;
+ return val;
+}
+
+struct s390_insn *find_insn(unsigned char *code)
+{
+ struct s390_opcode_offset *entry;
+ struct s390_insn *insn;
+ unsigned char opfrag;
+ int i;
+
+ /* Search the opcode offset table to find an entry which
+ * matches the beginning of the opcode. If there is no match
+ * the last entry will be used, which is the default entry for
+ * unknown instructions as well as 1-byte opcode instructions.
+ */
+ for (i = 0; i < ARRAY_SIZE(opcode_offset); i++) {
+ entry = &opcode_offset[i];
+ if (entry->opcode == code[0])
+ break;
+ }
+
+ opfrag = *(code + entry->byte) & entry->mask;
+
+ insn = &opcode[entry->offset];
+ for (i = 0; i < entry->count; i++) {
+ if (insn->opfrag == opfrag)
+ return insn;
+ insn++;
+ }
+ return NULL;
+}
+
+static int print_insn(char *buffer, unsigned char *code, unsigned long addr)
+{
+ struct s390_insn *insn;
+ const unsigned char *ops;
+ const struct s390_operand *operand;
+ unsigned int value;
+ char separator;
+ char *ptr;
+ int i;
+
+ ptr = buffer;
+ insn = find_insn(code);
+ if (insn) {
+ if (insn->zero == 0)
+ ptr += sprintf(ptr, "%.7s\t",
+ long_insn_name[insn->offset]);
+ else
+ ptr += sprintf(ptr, "%.5s\t", insn->name);
+ /* Extract the operands. */
+ separator = 0;
+ for (ops = formats[insn->format], i = 0;
+ *ops != 0 && i < 6; ops++, i++) {
+ operand = operands + *ops;
+ value = extract_operand(code, operand);
+ if ((operand->flags & OPERAND_INDEX) && value == 0)
+ continue;
+ if ((operand->flags & OPERAND_BASE) &&
+ value == 0 && separator == '(') {
+ separator = ',';
+ continue;
+ }
+ if (separator)
+ ptr += sprintf(ptr, "%c", separator);
+ if (operand->flags & OPERAND_GPR)
+ ptr += sprintf(ptr, "%%r%i", value);
+ else if (operand->flags & OPERAND_FPR)
+ ptr += sprintf(ptr, "%%f%i", value);
+ else if (operand->flags & OPERAND_AR)
+ ptr += sprintf(ptr, "%%a%i", value);
+ else if (operand->flags & OPERAND_CR)
+ ptr += sprintf(ptr, "%%c%i", value);
+ else if (operand->flags & OPERAND_VR)
+ ptr += sprintf(ptr, "%%v%i", value);
+ else if (operand->flags & OPERAND_PCREL) {
+ void *pcrel = (void *)((int)value + addr);
+
+ ptr += sprintf(ptr, "%px", pcrel);
+ } else if (operand->flags & OPERAND_SIGNED)
+ ptr += sprintf(ptr, "%i", value);
+ else
+ ptr += sprintf(ptr, "%u", value);
+ if (operand->flags & OPERAND_DISP)
+ separator = '(';
+ else if (operand->flags & OPERAND_BASE) {
+ ptr += sprintf(ptr, ")");
+ separator = ',';
+ } else
+ separator = ',';
+ }
+ } else
+ ptr += sprintf(ptr, "unknown");
+ return (int) (ptr - buffer);
+}
+
+void show_code(struct pt_regs *regs)
+{
+ char *mode = user_mode(regs) ? "User" : "Krnl";
+ unsigned char code[64];
+ char buffer[128], *ptr;
+ mm_segment_t old_fs;
+ unsigned long addr;
+ int start, end, opsize, hops, i;
+
+ /* Get a snapshot of the 64 bytes surrounding the fault address. */
+ old_fs = get_fs();
+ set_fs(user_mode(regs) ? USER_DS : KERNEL_DS);
+ for (start = 32; start && regs->psw.addr >= 34 - start; start -= 2) {
+ addr = regs->psw.addr - 34 + start;
+ if (__copy_from_user(code + start - 2,
+ (char __user *) addr, 2))
+ break;
+ }
+ for (end = 32; end < 64; end += 2) {
+ addr = regs->psw.addr + end - 32;
+ if (__copy_from_user(code + end,
+ (char __user *) addr, 2))
+ break;
+ }
+ set_fs(old_fs);
+ /* Code snapshot useable ? */
+ if ((regs->psw.addr & 1) || start >= end) {
+ printk("%s Code: Bad PSW.\n", mode);
+ return;
+ }
+ /* Find a starting point for the disassembly. */
+ while (start < 32) {
+ for (i = 0, hops = 0; start + i < 32 && hops < 3; hops++) {
+ if (!find_insn(code + start + i))
+ break;
+ i += insn_length(code[start + i]);
+ }
+ if (start + i == 32)
+ /* Looks good, sequence ends at PSW. */
+ break;
+ start += 2;
+ }
+ /* Decode the instructions. */
+ ptr = buffer;
+ ptr += sprintf(ptr, "%s Code:", mode);
+ hops = 0;
+ while (start < end && hops < 8) {
+ opsize = insn_length(code[start]);
+ if (start + opsize == 32)
+ *ptr++ = '#';
+ else if (start == 32)
+ *ptr++ = '>';
+ else
+ *ptr++ = ' ';
+ addr = regs->psw.addr + start - 32;
+ ptr += sprintf(ptr, "%px: ", (void *)addr);
+ if (start + opsize >= end)
+ break;
+ for (i = 0; i < opsize; i++)
+ ptr += sprintf(ptr, "%02x", code[start + i]);
+ *ptr++ = '\t';
+ if (i < 6)
+ *ptr++ = '\t';
+ ptr += print_insn(ptr, code + start, addr);
+ start += opsize;
+ pr_cont("%s", buffer);
+ ptr = buffer;
+ ptr += sprintf(ptr, "\n ");
+ hops++;
+ }
+ pr_cont("\n");
+}
+
+void print_fn_code(unsigned char *code, unsigned long len)
+{
+ char buffer[128], *ptr;
+ int opsize, i;
+
+ while (len) {
+ ptr = buffer;
+ opsize = insn_length(*code);
+ if (opsize > len)
+ break;
+ ptr += sprintf(ptr, "%px: ", code);
+ for (i = 0; i < opsize; i++)
+ ptr += sprintf(ptr, "%02x", code[i]);
+ *ptr++ = '\t';
+ if (i < 4)
+ *ptr++ = '\t';
+ ptr += print_insn(ptr, code, (unsigned long) code);
+ *ptr++ = '\n';
+ *ptr++ = 0;
+ printk("%s", buffer);
+ code += opsize;
+ len -= opsize;
+ }
+}
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
new file mode 100644
index 000000000..5b23c4f6e
--- /dev/null
+++ b/arch/s390/kernel/dumpstack.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Stack dumping functions
+ *
+ * Copyright IBM Corp. 1999, 2013
+ */
+
+#include <linux/kallsyms.h>
+#include <linux/hardirq.h>
+#include <linux/kprobes.h>
+#include <linux/utsname.h>
+#include <linux/export.h>
+#include <linux/kdebug.h>
+#include <linux/ptrace.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
+#include <asm/processor.h>
+#include <asm/debug.h>
+#include <asm/dis.h>
+#include <asm/ipl.h>
+
+/*
+ * For dump_trace we have tree different stack to consider:
+ * - the panic stack which is used if the kernel stack has overflown
+ * - the asynchronous interrupt stack (cpu related)
+ * - the synchronous kernel stack (process related)
+ * The stack trace can start at any of the three stacks and can potentially
+ * touch all of them. The order is: panic stack, async stack, sync stack.
+ */
+static unsigned long
+__dump_trace(dump_trace_func_t func, void *data, unsigned long sp,
+ unsigned long low, unsigned long high)
+{
+ struct stack_frame *sf;
+ struct pt_regs *regs;
+
+ while (1) {
+ if (sp < low || sp > high - sizeof(*sf))
+ return sp;
+ sf = (struct stack_frame *) sp;
+ if (func(data, sf->gprs[8], 0))
+ return sp;
+ /* Follow the backchain. */
+ while (1) {
+ low = sp;
+ sp = sf->back_chain;
+ if (!sp)
+ break;
+ if (sp <= low || sp > high - sizeof(*sf))
+ return sp;
+ sf = (struct stack_frame *) sp;
+ if (func(data, sf->gprs[8], 1))
+ return sp;
+ }
+ /* Zero backchain detected, check for interrupt frame. */
+ sp = (unsigned long) (sf + 1);
+ if (sp <= low || sp > high - sizeof(*regs))
+ return sp;
+ regs = (struct pt_regs *) sp;
+ if (!user_mode(regs)) {
+ if (func(data, regs->psw.addr, 1))
+ return sp;
+ }
+ low = sp;
+ sp = regs->gprs[15];
+ }
+}
+
+void dump_trace(dump_trace_func_t func, void *data, struct task_struct *task,
+ unsigned long sp)
+{
+ unsigned long frame_size;
+
+ frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
+#ifdef CONFIG_CHECK_STACK
+ sp = __dump_trace(func, data, sp,
+ S390_lowcore.panic_stack + frame_size - PAGE_SIZE,
+ S390_lowcore.panic_stack + frame_size);
+#endif
+ sp = __dump_trace(func, data, sp,
+ S390_lowcore.async_stack + frame_size - ASYNC_SIZE,
+ S390_lowcore.async_stack + frame_size);
+ task = task ?: current;
+ __dump_trace(func, data, sp,
+ (unsigned long)task_stack_page(task),
+ (unsigned long)task_stack_page(task) + THREAD_SIZE);
+}
+EXPORT_SYMBOL_GPL(dump_trace);
+
+static int show_address(void *data, unsigned long address, int reliable)
+{
+ if (reliable)
+ printk(" [<%016lx>] %pSR \n", address, (void *)address);
+ else
+ printk("([<%016lx>] %pSR)\n", address, (void *)address);
+ return 0;
+}
+
+void show_stack(struct task_struct *task, unsigned long *stack)
+{
+ unsigned long sp = (unsigned long) stack;
+
+ if (!sp)
+ sp = task ? task->thread.ksp : current_stack_pointer();
+ printk("Call Trace:\n");
+ dump_trace(show_address, NULL, task, sp);
+ if (!task)
+ task = current;
+ debug_show_held_locks(task);
+}
+
+static void show_last_breaking_event(struct pt_regs *regs)
+{
+ printk("Last Breaking-Event-Address:\n");
+ printk(" [<%016lx>] %pSR\n", regs->args[0], (void *)regs->args[0]);
+}
+
+void show_registers(struct pt_regs *regs)
+{
+ struct psw_bits *psw = &psw_bits(regs->psw);
+ char *mode;
+
+ mode = user_mode(regs) ? "User" : "Krnl";
+ printk("%s PSW : %p %p", mode, (void *)regs->psw.mask, (void *)regs->psw.addr);
+ if (!user_mode(regs))
+ pr_cont(" (%pSR)", (void *)regs->psw.addr);
+ pr_cont("\n");
+ printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x "
+ "P:%x AS:%x CC:%x PM:%x", psw->per, psw->dat, psw->io, psw->ext,
+ psw->key, psw->mcheck, psw->wait, psw->pstate, psw->as, psw->cc, psw->pm);
+ pr_cont(" RI:%x EA:%x\n", psw->ri, psw->eaba);
+ printk("%s GPRS: %016lx %016lx %016lx %016lx\n", mode,
+ regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]);
+ printk(" %016lx %016lx %016lx %016lx\n",
+ regs->gprs[4], regs->gprs[5], regs->gprs[6], regs->gprs[7]);
+ printk(" %016lx %016lx %016lx %016lx\n",
+ regs->gprs[8], regs->gprs[9], regs->gprs[10], regs->gprs[11]);
+ printk(" %016lx %016lx %016lx %016lx\n",
+ regs->gprs[12], regs->gprs[13], regs->gprs[14], regs->gprs[15]);
+ show_code(regs);
+}
+
+void show_regs(struct pt_regs *regs)
+{
+ show_regs_print_info(KERN_DEFAULT);
+ show_registers(regs);
+ /* Show stack backtrace if pt_regs is from kernel mode */
+ if (!user_mode(regs))
+ show_stack(NULL, (unsigned long *) regs->gprs[15]);
+ show_last_breaking_event(regs);
+}
+
+static DEFINE_SPINLOCK(die_lock);
+
+void die(struct pt_regs *regs, const char *str)
+{
+ static int die_counter;
+
+ oops_enter();
+ lgr_info_log();
+ debug_stop_all();
+ console_verbose();
+ spin_lock_irq(&die_lock);
+ bust_spinlocks(1);
+ printk("%s: %04x ilc:%d [#%d] ", str, regs->int_code & 0xffff,
+ regs->int_code >> 17, ++die_counter);
+#ifdef CONFIG_PREEMPT
+ pr_cont("PREEMPT ");
+#endif
+#ifdef CONFIG_SMP
+ pr_cont("SMP ");
+#endif
+ if (debug_pagealloc_enabled())
+ pr_cont("DEBUG_PAGEALLOC");
+ pr_cont("\n");
+ notify_die(DIE_OOPS, str, regs, 0, regs->int_code & 0xffff, SIGSEGV);
+ print_modules();
+ show_regs(regs);
+ bust_spinlocks(0);
+ add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
+ spin_unlock_irq(&die_lock);
+ if (in_interrupt())
+ panic("Fatal exception in interrupt");
+ if (panic_on_oops)
+ panic("Fatal exception: panic_on_oops");
+ oops_exit();
+ do_exit(SIGSEGV);
+}
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
new file mode 100644
index 000000000..ad88bed74
--- /dev/null
+++ b/arch/s390/kernel/early.c
@@ -0,0 +1,366 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2007, 2009
+ * Author(s): Hongjie Yang <hongjie@us.ibm.com>,
+ * Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#define KMSG_COMPONENT "setup"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/compiler.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/lockdep.h>
+#include <linux/extable.h>
+#include <linux/pfn.h>
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+#include <asm/diag.h>
+#include <asm/ebcdic.h>
+#include <asm/ipl.h>
+#include <asm/lowcore.h>
+#include <asm/processor.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/sysinfo.h>
+#include <asm/cpcmd.h>
+#include <asm/sclp.h>
+#include <asm/facility.h>
+#include "entry.h"
+
+static void __init setup_boot_command_line(void);
+
+/*
+ * Initialize storage key for kernel pages
+ */
+static noinline __init void init_kernel_storage_key(void)
+{
+#if PAGE_DEFAULT_KEY
+ unsigned long end_pfn, init_pfn;
+
+ end_pfn = PFN_UP(__pa(_end));
+
+ for (init_pfn = 0 ; init_pfn < end_pfn; init_pfn++)
+ page_set_storage_key(init_pfn << PAGE_SHIFT,
+ PAGE_DEFAULT_KEY, 0);
+#endif
+}
+
+static __initdata char sysinfo_page[PAGE_SIZE] __aligned(PAGE_SIZE);
+
+static noinline __init void detect_machine_type(void)
+{
+ struct sysinfo_3_2_2 *vmms = (struct sysinfo_3_2_2 *)&sysinfo_page;
+
+ /* Check current-configuration-level */
+ if (stsi(NULL, 0, 0, 0) <= 2) {
+ S390_lowcore.machine_flags |= MACHINE_FLAG_LPAR;
+ return;
+ }
+ /* Get virtual-machine cpu information. */
+ if (stsi(vmms, 3, 2, 2) || !vmms->count)
+ return;
+
+ /* Detect known hypervisors */
+ if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3))
+ S390_lowcore.machine_flags |= MACHINE_FLAG_KVM;
+ else if (!memcmp(vmms->vm[0].cpi, "\xa9\x61\xe5\xd4", 4))
+ S390_lowcore.machine_flags |= MACHINE_FLAG_VM;
+}
+
+/* Remove leading, trailing and double whitespace. */
+static inline void strim_all(char *str)
+{
+ char *s;
+
+ s = strim(str);
+ if (s != str)
+ memmove(str, s, strlen(s));
+ while (*str) {
+ if (!isspace(*str++))
+ continue;
+ if (isspace(*str)) {
+ s = skip_spaces(str);
+ memmove(str, s, strlen(s) + 1);
+ }
+ }
+}
+
+static noinline __init void setup_arch_string(void)
+{
+ struct sysinfo_1_1_1 *mach = (struct sysinfo_1_1_1 *)&sysinfo_page;
+ struct sysinfo_3_2_2 *vm = (struct sysinfo_3_2_2 *)&sysinfo_page;
+ char mstr[80], hvstr[17];
+
+ if (stsi(mach, 1, 1, 1))
+ return;
+ EBCASC(mach->manufacturer, sizeof(mach->manufacturer));
+ EBCASC(mach->type, sizeof(mach->type));
+ EBCASC(mach->model, sizeof(mach->model));
+ EBCASC(mach->model_capacity, sizeof(mach->model_capacity));
+ sprintf(mstr, "%-16.16s %-4.4s %-16.16s %-16.16s",
+ mach->manufacturer, mach->type,
+ mach->model, mach->model_capacity);
+ strim_all(mstr);
+ if (stsi(vm, 3, 2, 2) == 0 && vm->count) {
+ EBCASC(vm->vm[0].cpi, sizeof(vm->vm[0].cpi));
+ sprintf(hvstr, "%-16.16s", vm->vm[0].cpi);
+ strim_all(hvstr);
+ } else {
+ sprintf(hvstr, "%s",
+ MACHINE_IS_LPAR ? "LPAR" :
+ MACHINE_IS_VM ? "z/VM" :
+ MACHINE_IS_KVM ? "KVM" : "unknown");
+ }
+ dump_stack_set_arch_desc("%s (%s)", mstr, hvstr);
+}
+
+static __init void setup_topology(void)
+{
+ int max_mnest;
+
+ if (!test_facility(11))
+ return;
+ S390_lowcore.machine_flags |= MACHINE_FLAG_TOPOLOGY;
+ for (max_mnest = 6; max_mnest > 1; max_mnest--) {
+ if (stsi(&sysinfo_page, 15, 1, max_mnest) == 0)
+ break;
+ }
+ topology_max_mnest = max_mnest;
+}
+
+static void early_pgm_check_handler(void)
+{
+ const struct exception_table_entry *fixup;
+ unsigned long cr0, cr0_new;
+ unsigned long addr;
+
+ addr = S390_lowcore.program_old_psw.addr;
+ fixup = search_exception_tables(addr);
+ if (!fixup)
+ disabled_wait(0);
+ /* Disable low address protection before storing into lowcore. */
+ __ctl_store(cr0, 0, 0);
+ cr0_new = cr0 & ~(1UL << 28);
+ __ctl_load(cr0_new, 0, 0);
+ S390_lowcore.program_old_psw.addr = extable_fixup(fixup);
+ __ctl_load(cr0, 0, 0);
+}
+
+static noinline __init void setup_lowcore_early(void)
+{
+ psw_t psw;
+
+ psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA;
+ if (IS_ENABLED(CONFIG_KASAN))
+ psw.mask |= PSW_MASK_DAT;
+ psw.addr = (unsigned long) s390_base_ext_handler;
+ S390_lowcore.external_new_psw = psw;
+ psw.addr = (unsigned long) s390_base_pgm_handler;
+ S390_lowcore.program_new_psw = psw;
+ s390_base_pgm_handler_fn = early_pgm_check_handler;
+ S390_lowcore.preempt_count = INIT_PREEMPT_COUNT;
+}
+
+static noinline __init void setup_facility_list(void)
+{
+ stfle(S390_lowcore.stfle_fac_list,
+ ARRAY_SIZE(S390_lowcore.stfle_fac_list));
+ memcpy(S390_lowcore.alt_stfle_fac_list,
+ S390_lowcore.stfle_fac_list,
+ sizeof(S390_lowcore.alt_stfle_fac_list));
+ if (!IS_ENABLED(CONFIG_KERNEL_NOBP))
+ __clear_facility(82, S390_lowcore.alt_stfle_fac_list);
+}
+
+static __init void detect_diag9c(void)
+{
+ unsigned int cpu_address;
+ int rc;
+
+ cpu_address = stap();
+ diag_stat_inc(DIAG_STAT_X09C);
+ asm volatile(
+ " diag %2,0,0x9c\n"
+ "0: la %0,0\n"
+ "1:\n"
+ EX_TABLE(0b,1b)
+ : "=d" (rc) : "0" (-EOPNOTSUPP), "d" (cpu_address) : "cc");
+ if (!rc)
+ S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG9C;
+}
+
+static __init void detect_diag44(void)
+{
+ int rc;
+
+ diag_stat_inc(DIAG_STAT_X044);
+ asm volatile(
+ " diag 0,0,0x44\n"
+ "0: la %0,0\n"
+ "1:\n"
+ EX_TABLE(0b,1b)
+ : "=d" (rc) : "0" (-EOPNOTSUPP) : "cc");
+ if (!rc)
+ S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG44;
+}
+
+static __init void detect_machine_facilities(void)
+{
+ if (test_facility(8)) {
+ S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT1;
+ __ctl_set_bit(0, 23);
+ }
+ if (test_facility(78))
+ S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT2;
+ if (test_facility(3))
+ S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE;
+ if (test_facility(50) && test_facility(73)) {
+ S390_lowcore.machine_flags |= MACHINE_FLAG_TE;
+ __ctl_set_bit(0, 55);
+ }
+ if (test_facility(51))
+ S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC;
+ if (test_facility(129)) {
+ S390_lowcore.machine_flags |= MACHINE_FLAG_VX;
+ __ctl_set_bit(0, 17);
+ }
+ if (test_facility(130)) {
+ S390_lowcore.machine_flags |= MACHINE_FLAG_NX;
+ __ctl_set_bit(0, 20);
+ }
+ if (test_facility(133))
+ S390_lowcore.machine_flags |= MACHINE_FLAG_GS;
+ if (test_facility(139) && (tod_clock_base[1] & 0x80)) {
+ /* Enabled signed clock comparator comparisons */
+ S390_lowcore.machine_flags |= MACHINE_FLAG_SCC;
+ clock_comparator_max = -1ULL >> 1;
+ __ctl_set_bit(0, 53);
+ }
+}
+
+static inline void save_vector_registers(void)
+{
+#ifdef CONFIG_CRASH_DUMP
+ if (test_facility(129))
+ save_vx_regs(boot_cpu_vector_save_area);
+#endif
+}
+
+static int __init disable_vector_extension(char *str)
+{
+ S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX;
+ __ctl_clear_bit(0, 17);
+ return 0;
+}
+early_param("novx", disable_vector_extension);
+
+static int __init noexec_setup(char *str)
+{
+ bool enabled;
+ int rc;
+
+ rc = kstrtobool(str, &enabled);
+ if (!rc && !enabled) {
+ /* Disable no-execute support */
+ S390_lowcore.machine_flags &= ~MACHINE_FLAG_NX;
+ __ctl_clear_bit(0, 20);
+ }
+ return rc;
+}
+early_param("noexec", noexec_setup);
+
+static int __init cad_setup(char *str)
+{
+ bool enabled;
+ int rc;
+
+ rc = kstrtobool(str, &enabled);
+ if (!rc && enabled && test_facility(128))
+ /* Enable problem state CAD. */
+ __ctl_set_bit(2, 3);
+ return rc;
+}
+early_param("cad", cad_setup);
+
+/* Set up boot command line */
+static void __init append_to_cmdline(size_t (*ipl_data)(char *, size_t))
+{
+ char *parm, *delim;
+ size_t rc, len;
+
+ len = strlen(boot_command_line);
+
+ delim = boot_command_line + len; /* '\0' character position */
+ parm = boot_command_line + len + 1; /* append right after '\0' */
+
+ rc = ipl_data(parm, COMMAND_LINE_SIZE - len - 1);
+ if (rc) {
+ if (*parm == '=')
+ memmove(boot_command_line, parm + 1, rc);
+ else
+ *delim = ' '; /* replace '\0' with space */
+ }
+}
+
+static inline int has_ebcdic_char(const char *str)
+{
+ int i;
+
+ for (i = 0; str[i]; i++)
+ if (str[i] & 0x80)
+ return 1;
+ return 0;
+}
+
+static void __init setup_boot_command_line(void)
+{
+ COMMAND_LINE[ARCH_COMMAND_LINE_SIZE - 1] = 0;
+ /* convert arch command line to ascii if necessary */
+ if (has_ebcdic_char(COMMAND_LINE))
+ EBCASC(COMMAND_LINE, ARCH_COMMAND_LINE_SIZE);
+ /* copy arch command line */
+ strlcpy(boot_command_line, strstrip(COMMAND_LINE),
+ ARCH_COMMAND_LINE_SIZE);
+
+ /* append IPL PARM data to the boot command line */
+ if (MACHINE_IS_VM)
+ append_to_cmdline(append_ipl_vmparm);
+
+ append_to_cmdline(append_ipl_scpdata);
+}
+
+static void __init check_image_bootable(void)
+{
+ if (!memcmp(EP_STRING, (void *)EP_OFFSET, strlen(EP_STRING)))
+ return;
+
+ sclp_early_printk("Linux kernel boot failure: An attempt to boot a vmlinux ELF image failed.\n");
+ sclp_early_printk("This image does not contain all parts necessary for starting up. Use\n");
+ sclp_early_printk("bzImage or arch/s390/boot/compressed/vmlinux instead.\n");
+ disabled_wait(0xbadb007);
+}
+
+void __init startup_init(void)
+{
+ check_image_bootable();
+ time_early_init();
+ init_kernel_storage_key();
+ lockdep_off();
+ setup_lowcore_early();
+ setup_facility_list();
+ detect_machine_type();
+ setup_arch_string();
+ ipl_store_parameters();
+ setup_boot_command_line();
+ detect_diag9c();
+ detect_diag44();
+ detect_machine_facilities();
+ save_vector_registers();
+ setup_topology();
+ sclp_early_detect();
+ lockdep_on();
+}
diff --git a/arch/s390/kernel/early_nobss.c b/arch/s390/kernel/early_nobss.c
new file mode 100644
index 000000000..2d84fc48d
--- /dev/null
+++ b/arch/s390/kernel/early_nobss.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2007, 2018
+ */
+
+/*
+ * Early setup functions which may not rely on an initialized bss
+ * section. The last thing that is supposed to happen here is
+ * initialization of the bss section.
+ */
+
+#include <linux/processor.h>
+#include <linux/string.h>
+#include <asm/sections.h>
+#include <asm/lowcore.h>
+#include <asm/setup.h>
+#include <asm/timex.h>
+#include "entry.h"
+
+static void __init reset_tod_clock(void)
+{
+ u64 time;
+
+ if (store_tod_clock(&time) == 0)
+ return;
+ /* TOD clock not running. Set the clock to Unix Epoch. */
+ if (set_tod_clock(TOD_UNIX_EPOCH) != 0 || store_tod_clock(&time) != 0)
+ disabled_wait(0);
+
+ memset(tod_clock_base, 0, 16);
+ *(__u64 *) &tod_clock_base[1] = TOD_UNIX_EPOCH;
+ S390_lowcore.last_update_clock = TOD_UNIX_EPOCH;
+}
+
+static void __init rescue_initrd(void)
+{
+ unsigned long min_initrd_addr = (unsigned long) _end + (4UL << 20);
+
+ /*
+ * Just like in case of IPL from VM reader we make sure there is a
+ * gap of 4MB between end of kernel and start of initrd.
+ * That way we can also be sure that saving an NSS will succeed,
+ * which however only requires different segments.
+ */
+ if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD))
+ return;
+ if (!INITRD_START || !INITRD_SIZE)
+ return;
+ if (INITRD_START >= min_initrd_addr)
+ return;
+ memmove((void *) min_initrd_addr, (void *) INITRD_START, INITRD_SIZE);
+ INITRD_START = min_initrd_addr;
+}
+
+static void __init clear_bss_section(void)
+{
+ memset(__bss_start, 0, __bss_stop - __bss_start);
+}
+
+void __init startup_init_nobss(void)
+{
+ reset_tod_clock();
+ rescue_initrd();
+ clear_bss_section();
+}
diff --git a/arch/s390/kernel/early_printk.c b/arch/s390/kernel/early_printk.c
new file mode 100644
index 000000000..40c1dfec9
--- /dev/null
+++ b/arch/s390/kernel/early_printk.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2017
+ */
+
+#include <linux/console.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <asm/sclp.h>
+
+static void sclp_early_write(struct console *con, const char *s, unsigned int len)
+{
+ __sclp_early_printk(s, len, 0);
+}
+
+static struct console sclp_early_console = {
+ .name = "earlysclp",
+ .write = sclp_early_write,
+ .flags = CON_PRINTBUFFER | CON_BOOT,
+ .index = -1,
+};
+
+static int __init setup_early_printk(char *buf)
+{
+ if (early_console)
+ return 0;
+ /* Accept only "earlyprintk" and "earlyprintk=sclp" */
+ if (buf && strncmp(buf, "sclp", 4))
+ return 0;
+ if (!sclp.has_linemode && !sclp.has_vt220)
+ return 0;
+ early_console = &sclp_early_console;
+ register_console(early_console);
+ return 0;
+}
+early_param("earlyprintk", setup_early_printk);
diff --git a/arch/s390/kernel/ebcdic.c b/arch/s390/kernel/ebcdic.c
new file mode 100644
index 000000000..7f8246c9b
--- /dev/null
+++ b/arch/s390/kernel/ebcdic.c
@@ -0,0 +1,401 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ECBDIC -> ASCII, ASCII -> ECBDIC,
+ * upper to lower case (EBCDIC) conversion tables.
+ *
+ * S390 version
+ * Copyright IBM Corp. 1999
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ * Martin Peschke <peschke@fh-brandenburg.de>
+ */
+
+#include <linux/types.h>
+#include <linux/export.h>
+#include <asm/ebcdic.h>
+
+/*
+ * ASCII (IBM PC 437) -> EBCDIC 037
+ */
+__u8 _ascebc[256] =
+{
+ /*00 NUL SOH STX ETX EOT ENQ ACK BEL */
+ 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F,
+ /*08 BS HT LF VT FF CR SO SI */
+ /* ->NL */
+ 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ /*10 DLE DC1 DC2 DC3 DC4 NAK SYN ETB */
+ 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26,
+ /*18 CAN EM SUB ESC FS GS RS US */
+ /* ->IGS ->IRS ->IUS */
+ 0x18, 0x19, 0x3F, 0x27, 0x22, 0x1D, 0x1E, 0x1F,
+ /*20 SP ! " # $ % & ' */
+ 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D,
+ /*28 ( ) * + , - . / */
+ 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
+ /*30 0 1 2 3 4 5 6 7 */
+ 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
+ /*38 8 9 : ; < = > ? */
+ 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
+ /*40 @ A B C D E F G */
+ 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
+ /*48 H I J K L M N O */
+ 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
+ /*50 P Q R S T U V W */
+ 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6,
+ /*58 X Y Z [ \ ] ^ _ */
+ 0xE7, 0xE8, 0xE9, 0xBA, 0xE0, 0xBB, 0xB0, 0x6D,
+ /*60 ` a b c d e f g */
+ 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ /*68 h i j k l m n o */
+ 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
+ /*70 p q r s t u v w */
+ 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6,
+ /*78 x y z { | } ~ DL */
+ 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07,
+ /*80*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*88*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*90*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*98*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*A0*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*A8*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*B0*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*B8*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*C0*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*C8*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*D0*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*D8*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*E0 sz */
+ 0x3F, 0x59, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*E8*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*F0*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*F8*/
+ 0x90, 0x3F, 0x3F, 0x3F, 0x3F, 0xEA, 0x3F, 0xFF
+};
+
+/*
+ * EBCDIC 037 -> ASCII (IBM PC 437)
+ */
+__u8 _ebcasc[256] =
+{
+ /* 0x00 NUL SOH STX ETX *SEL HT *RNL DEL */
+ 0x00, 0x01, 0x02, 0x03, 0x07, 0x09, 0x07, 0x7F,
+ /* 0x08 -GE -SPS -RPT VT FF CR SO SI */
+ 0x07, 0x07, 0x07, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ /* 0x10 DLE DC1 DC2 DC3 -RES -NL BS -POC
+ -ENP ->LF */
+ 0x10, 0x11, 0x12, 0x13, 0x07, 0x0A, 0x08, 0x07,
+ /* 0x18 CAN EM -UBS -CU1 -IFS -IGS -IRS -ITB
+ -IUS */
+ 0x18, 0x19, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ /* 0x20 -DS -SOS FS -WUS -BYP LF ETB ESC
+ -INP */
+ 0x07, 0x07, 0x1C, 0x07, 0x07, 0x0A, 0x17, 0x1B,
+ /* 0x28 -SA -SFE -SM -CSP -MFA ENQ ACK BEL
+ -SW */
+ 0x07, 0x07, 0x07, 0x07, 0x07, 0x05, 0x06, 0x07,
+ /* 0x30 ---- ---- SYN -IR -PP -TRN -NBS EOT */
+ 0x07, 0x07, 0x16, 0x07, 0x07, 0x07, 0x07, 0x04,
+ /* 0x38 -SBS -IT -RFF -CU3 DC4 NAK ---- SUB */
+ 0x07, 0x07, 0x07, 0x07, 0x14, 0x15, 0x07, 0x1A,
+ /* 0x40 SP RSP ä ---- */
+ 0x20, 0xFF, 0x83, 0x84, 0x85, 0xA0, 0x07, 0x86,
+ /* 0x48 . < ( + | */
+ 0x87, 0xA4, 0x9B, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
+ /* 0x50 & ---- */
+ 0x26, 0x82, 0x88, 0x89, 0x8A, 0xA1, 0x8C, 0x07,
+ /* 0x58 ß ! $ * ) ; */
+ 0x8D, 0xE1, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0xAA,
+ /* 0x60 - / ---- Ä ---- ---- ---- */
+ 0x2D, 0x2F, 0x07, 0x8E, 0x07, 0x07, 0x07, 0x8F,
+ /* 0x68 ---- , % _ > ? */
+ 0x80, 0xA5, 0x07, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
+ /* 0x70 ---- ---- ---- ---- ---- ---- ---- */
+ 0x07, 0x90, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ /* 0x78 * ` : # @ ' = " */
+ 0x70, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
+ /* 0x80 * a b c d e f g */
+ 0x07, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ /* 0x88 h i ---- ---- ---- */
+ 0x68, 0x69, 0xAE, 0xAF, 0x07, 0x07, 0x07, 0xF1,
+ /* 0x90 ° j k l m n o p */
+ 0xF8, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70,
+ /* 0x98 q r ---- ---- */
+ 0x71, 0x72, 0xA6, 0xA7, 0x91, 0x07, 0x92, 0x07,
+ /* 0xA0 ~ s t u v w x */
+ 0xE6, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
+ /* 0xA8 y z ---- ---- ---- ---- */
+ 0x79, 0x7A, 0xAD, 0xAB, 0x07, 0x07, 0x07, 0x07,
+ /* 0xB0 ^ ---- § ---- */
+ 0x5E, 0x9C, 0x9D, 0xFA, 0x07, 0x07, 0x07, 0xAC,
+ /* 0xB8 ---- [ ] ---- ---- ---- ---- */
+ 0xAB, 0x07, 0x5B, 0x5D, 0x07, 0x07, 0x07, 0x07,
+ /* 0xC0 { A B C D E F G */
+ 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+ /* 0xC8 H I ---- ö ---- */
+ 0x48, 0x49, 0x07, 0x93, 0x94, 0x95, 0xA2, 0x07,
+ /* 0xD0 } J K L M N O P */
+ 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50,
+ /* 0xD8 Q R ---- ü */
+ 0x51, 0x52, 0x07, 0x96, 0x81, 0x97, 0xA3, 0x98,
+ /* 0xE0 \ S T U V W X */
+ 0x5C, 0xF6, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+ /* 0xE8 Y Z ---- Ö ---- ---- ---- */
+ 0x59, 0x5A, 0xFD, 0x07, 0x99, 0x07, 0x07, 0x07,
+ /* 0xF0 0 1 2 3 4 5 6 7 */
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+ /* 0xF8 8 9 ---- ---- Ü ---- ---- ---- */
+ 0x38, 0x39, 0x07, 0x07, 0x9A, 0x07, 0x07, 0x07
+};
+
+
+/*
+ * ASCII (IBM PC 437) -> EBCDIC 500
+ */
+__u8 _ascebc_500[256] =
+{
+ /*00 NUL SOH STX ETX EOT ENQ ACK BEL */
+ 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F,
+ /*08 BS HT LF VT FF CR SO SI */
+ /* ->NL */
+ 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ /*10 DLE DC1 DC2 DC3 DC4 NAK SYN ETB */
+ 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26,
+ /*18 CAN EM SUB ESC FS GS RS US */
+ /* ->IGS ->IRS ->IUS */
+ 0x18, 0x19, 0x3F, 0x27, 0x22, 0x1D, 0x1E, 0x1F,
+ /*20 SP ! " # $ % & ' */
+ 0x40, 0x4F, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D,
+ /*28 ( ) * + , - . / */
+ 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
+ /*30 0 1 2 3 4 5 6 7 */
+ 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
+ /*38 8 9 : ; < = > ? */
+ 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
+ /*40 @ A B C D E F G */
+ 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
+ /*48 H I J K L M N O */
+ 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
+ /*50 P Q R S T U V W */
+ 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6,
+ /*58 X Y Z [ \ ] ^ _ */
+ 0xE7, 0xE8, 0xE9, 0x4A, 0xE0, 0x5A, 0x5F, 0x6D,
+ /*60 ` a b c d e f g */
+ 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ /*68 h i j k l m n o */
+ 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
+ /*70 p q r s t u v w */
+ 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6,
+ /*78 x y z { | } ~ DL */
+ 0xA7, 0xA8, 0xA9, 0xC0, 0xBB, 0xD0, 0xA1, 0x07,
+ /*80*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*88*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*90*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*98*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*A0*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*A8*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*B0*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*B8*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*C0*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*C8*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*D0*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*D8*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*E0 sz */
+ 0x3F, 0x59, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*E8*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*F0*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*F8*/
+ 0x90, 0x3F, 0x3F, 0x3F, 0x3F, 0xEA, 0x3F, 0xFF
+};
+
+/*
+ * EBCDIC 500 -> ASCII (IBM PC 437)
+ */
+__u8 _ebcasc_500[256] =
+{
+ /* 0x00 NUL SOH STX ETX *SEL HT *RNL DEL */
+ 0x00, 0x01, 0x02, 0x03, 0x07, 0x09, 0x07, 0x7F,
+ /* 0x08 -GE -SPS -RPT VT FF CR SO SI */
+ 0x07, 0x07, 0x07, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ /* 0x10 DLE DC1 DC2 DC3 -RES -NL BS -POC
+ -ENP ->LF */
+ 0x10, 0x11, 0x12, 0x13, 0x07, 0x0A, 0x08, 0x07,
+ /* 0x18 CAN EM -UBS -CU1 -IFS -IGS -IRS -ITB
+ -IUS */
+ 0x18, 0x19, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ /* 0x20 -DS -SOS FS -WUS -BYP LF ETB ESC
+ -INP */
+ 0x07, 0x07, 0x1C, 0x07, 0x07, 0x0A, 0x17, 0x1B,
+ /* 0x28 -SA -SFE -SM -CSP -MFA ENQ ACK BEL
+ -SW */
+ 0x07, 0x07, 0x07, 0x07, 0x07, 0x05, 0x06, 0x07,
+ /* 0x30 ---- ---- SYN -IR -PP -TRN -NBS EOT */
+ 0x07, 0x07, 0x16, 0x07, 0x07, 0x07, 0x07, 0x04,
+ /* 0x38 -SBS -IT -RFF -CU3 DC4 NAK ---- SUB */
+ 0x07, 0x07, 0x07, 0x07, 0x14, 0x15, 0x07, 0x1A,
+ /* 0x40 SP RSP ä ---- */
+ 0x20, 0xFF, 0x83, 0x84, 0x85, 0xA0, 0x07, 0x86,
+ /* 0x48 [ . < ( + ! */
+ 0x87, 0xA4, 0x5B, 0x2E, 0x3C, 0x28, 0x2B, 0x21,
+ /* 0x50 & ---- */
+ 0x26, 0x82, 0x88, 0x89, 0x8A, 0xA1, 0x8C, 0x07,
+ /* 0x58 ß ] $ * ) ; ^ */
+ 0x8D, 0xE1, 0x5D, 0x24, 0x2A, 0x29, 0x3B, 0x5E,
+ /* 0x60 - / ---- Ä ---- ---- ---- */
+ 0x2D, 0x2F, 0x07, 0x8E, 0x07, 0x07, 0x07, 0x8F,
+ /* 0x68 ---- , % _ > ? */
+ 0x80, 0xA5, 0x07, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
+ /* 0x70 ---- ---- ---- ---- ---- ---- ---- */
+ 0x07, 0x90, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ /* 0x78 * ` : # @ ' = " */
+ 0x70, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
+ /* 0x80 * a b c d e f g */
+ 0x07, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ /* 0x88 h i ---- ---- ---- */
+ 0x68, 0x69, 0xAE, 0xAF, 0x07, 0x07, 0x07, 0xF1,
+ /* 0x90 ° j k l m n o p */
+ 0xF8, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70,
+ /* 0x98 q r ---- ---- */
+ 0x71, 0x72, 0xA6, 0xA7, 0x91, 0x07, 0x92, 0x07,
+ /* 0xA0 ~ s t u v w x */
+ 0xE6, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
+ /* 0xA8 y z ---- ---- ---- ---- */
+ 0x79, 0x7A, 0xAD, 0xAB, 0x07, 0x07, 0x07, 0x07,
+ /* 0xB0 ---- § ---- */
+ 0x9B, 0x9C, 0x9D, 0xFA, 0x07, 0x07, 0x07, 0xAC,
+ /* 0xB8 ---- | ---- ---- ---- ---- */
+ 0xAB, 0x07, 0xAA, 0x7C, 0x07, 0x07, 0x07, 0x07,
+ /* 0xC0 { A B C D E F G */
+ 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+ /* 0xC8 H I ---- ö ---- */
+ 0x48, 0x49, 0x07, 0x93, 0x94, 0x95, 0xA2, 0x07,
+ /* 0xD0 } J K L M N O P */
+ 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50,
+ /* 0xD8 Q R ---- ü */
+ 0x51, 0x52, 0x07, 0x96, 0x81, 0x97, 0xA3, 0x98,
+ /* 0xE0 \ S T U V W X */
+ 0x5C, 0xF6, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+ /* 0xE8 Y Z ---- Ö ---- ---- ---- */
+ 0x59, 0x5A, 0xFD, 0x07, 0x99, 0x07, 0x07, 0x07,
+ /* 0xF0 0 1 2 3 4 5 6 7 */
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+ /* 0xF8 8 9 ---- ---- Ü ---- ---- ---- */
+ 0x38, 0x39, 0x07, 0x07, 0x9A, 0x07, 0x07, 0x07
+};
+
+
+/*
+ * EBCDIC 037/500 conversion table:
+ * from upper to lower case
+ */
+__u8 _ebc_tolower[256] =
+{
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
+ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
+ 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+ 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
+ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+ 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
+ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
+ 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
+ 0x60, 0x61, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+ 0x48, 0x49, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
+ 0x70, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
+ 0x58, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9C, 0x9F,
+ 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
+ 0xA8, 0xA9, 0xAA, 0xAB, 0x8C, 0x8D, 0x8E, 0xAF,
+ 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7,
+ 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
+ 0xC0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
+ 0xD0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
+ 0xE0, 0xE1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
+ 0xA8, 0xA9, 0xEA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
+ 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
+ 0xF8, 0xF9, 0xFA, 0xDB, 0xDC, 0xDD, 0xDE, 0xFF
+};
+
+
+/*
+ * EBCDIC 037/500 conversion table:
+ * from lower to upper case
+ */
+__u8 _ebc_toupper[256] =
+{
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
+ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
+ 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+ 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
+ 0x40, 0x41, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ 0x68, 0x69, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
+ 0x50, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+ 0x78, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
+ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+ 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
+ 0x80, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
+ 0xC8, 0xC9, 0x8A, 0x8B, 0xAC, 0xAD, 0xAE, 0x8F,
+ 0x90, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
+ 0xD8, 0xD9, 0x9A, 0x9B, 0x9E, 0x9D, 0x9E, 0x9F,
+ 0xA0, 0xA1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
+ 0xE8, 0xE9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
+ 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7,
+ 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
+ 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
+ 0xC8, 0xC9, 0xCA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
+ 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
+ 0xD8, 0xD9, 0xDA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF,
+ 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
+ 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
+ 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
+ 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
+};
+
+EXPORT_SYMBOL(_ascebc_500);
+EXPORT_SYMBOL(_ebcasc_500);
+EXPORT_SYMBOL(_ascebc);
+EXPORT_SYMBOL(_ebcasc);
+EXPORT_SYMBOL(_ebc_tolower);
+EXPORT_SYMBOL(_ebc_toupper);
+
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
new file mode 100644
index 000000000..7e6a9cf86
--- /dev/null
+++ b/arch/s390/kernel/entry.S
@@ -0,0 +1,1510 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * S390 low-level entry points.
+ *
+ * Copyright IBM Corp. 1999, 2012
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ * Hartmut Penner (hp@de.ibm.com),
+ * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
+ * Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/alternative-asm.h>
+#include <asm/processor.h>
+#include <asm/cache.h>
+#include <asm/ctl_reg.h>
+#include <asm/dwarf.h>
+#include <asm/errno.h>
+#include <asm/ptrace.h>
+#include <asm/thread_info.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+#include <asm/page.h>
+#include <asm/sigp.h>
+#include <asm/irq.h>
+#include <asm/vx-insn.h>
+#include <asm/setup.h>
+#include <asm/nmi.h>
+#include <asm/export.h>
+#include <asm/nospec-insn.h>
+
+__PT_R0 = __PT_GPRS
+__PT_R1 = __PT_GPRS + 8
+__PT_R2 = __PT_GPRS + 16
+__PT_R3 = __PT_GPRS + 24
+__PT_R4 = __PT_GPRS + 32
+__PT_R5 = __PT_GPRS + 40
+__PT_R6 = __PT_GPRS + 48
+__PT_R7 = __PT_GPRS + 56
+__PT_R8 = __PT_GPRS + 64
+__PT_R9 = __PT_GPRS + 72
+__PT_R10 = __PT_GPRS + 80
+__PT_R11 = __PT_GPRS + 88
+__PT_R12 = __PT_GPRS + 96
+__PT_R13 = __PT_GPRS + 104
+__PT_R14 = __PT_GPRS + 112
+__PT_R15 = __PT_GPRS + 120
+
+STACK_SHIFT = PAGE_SHIFT + THREAD_SIZE_ORDER
+STACK_SIZE = 1 << STACK_SHIFT
+STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE
+
+_TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
+ _TIF_UPROBE | _TIF_GUARDED_STORAGE | _TIF_PATCH_PENDING)
+_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
+ _TIF_SYSCALL_TRACEPOINT)
+_CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE_PRIMARY | \
+ _CIF_ASCE_SECONDARY | _CIF_FPU)
+_PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART)
+
+_LPP_OFFSET = __LC_LPP
+
+#define BASED(name) name-cleanup_critical(%r13)
+
+ .macro TRACE_IRQS_ON
+#ifdef CONFIG_TRACE_IRQFLAGS
+ basr %r2,%r0
+ brasl %r14,trace_hardirqs_on_caller
+#endif
+ .endm
+
+ .macro TRACE_IRQS_OFF
+#ifdef CONFIG_TRACE_IRQFLAGS
+ basr %r2,%r0
+ brasl %r14,trace_hardirqs_off_caller
+#endif
+ .endm
+
+ .macro LOCKDEP_SYS_EXIT
+#ifdef CONFIG_LOCKDEP
+ tm __PT_PSW+1(%r11),0x01 # returning to user ?
+ jz .+10
+ brasl %r14,lockdep_sys_exit
+#endif
+ .endm
+
+ .macro CHECK_STACK stacksize,savearea
+#ifdef CONFIG_CHECK_STACK
+ tml %r15,\stacksize - CONFIG_STACK_GUARD
+ lghi %r14,\savearea
+ jz stack_overflow
+#endif
+ .endm
+
+ .macro SWITCH_ASYNC savearea,timer
+ tmhh %r8,0x0001 # interrupting from user ?
+ jnz 1f
+ lgr %r14,%r9
+ slg %r14,BASED(.Lcritical_start)
+ clg %r14,BASED(.Lcritical_length)
+ jhe 0f
+ lghi %r11,\savearea # inside critical section, do cleanup
+ brasl %r14,cleanup_critical
+ tmhh %r8,0x0001 # retest problem state after cleanup
+ jnz 1f
+0: lg %r14,__LC_ASYNC_STACK # are we already on the async stack?
+ slgr %r14,%r15
+ srag %r14,%r14,STACK_SHIFT
+ jnz 2f
+ CHECK_STACK 1<<STACK_SHIFT,\savearea
+ aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+ j 3f
+1: UPDATE_VTIME %r14,%r15,\timer
+ BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
+2: lg %r15,__LC_ASYNC_STACK # load async stack
+3: la %r11,STACK_FRAME_OVERHEAD(%r15)
+ .endm
+
+ .macro UPDATE_VTIME w1,w2,enter_timer
+ lg \w1,__LC_EXIT_TIMER
+ lg \w2,__LC_LAST_UPDATE_TIMER
+ slg \w1,\enter_timer
+ slg \w2,__LC_EXIT_TIMER
+ alg \w1,__LC_USER_TIMER
+ alg \w2,__LC_SYSTEM_TIMER
+ stg \w1,__LC_USER_TIMER
+ stg \w2,__LC_SYSTEM_TIMER
+ mvc __LC_LAST_UPDATE_TIMER(8),\enter_timer
+ .endm
+
+ .macro REENABLE_IRQS
+ stg %r8,__LC_RETURN_PSW
+ ni __LC_RETURN_PSW,0xbf
+ ssm __LC_RETURN_PSW
+ .endm
+
+ .macro STCK savearea
+#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES
+ .insn s,0xb27c0000,\savearea # store clock fast
+#else
+ .insn s,0xb2050000,\savearea # store clock
+#endif
+ .endm
+
+ /*
+ * The TSTMSK macro generates a test-under-mask instruction by
+ * calculating the memory offset for the specified mask value.
+ * Mask value can be any constant. The macro shifts the mask
+ * value to calculate the memory offset for the test-under-mask
+ * instruction.
+ */
+ .macro TSTMSK addr, mask, size=8, bytepos=0
+ .if (\bytepos < \size) && (\mask >> 8)
+ .if (\mask & 0xff)
+ .error "Mask exceeds byte boundary"
+ .endif
+ TSTMSK \addr, "(\mask >> 8)", \size, "(\bytepos + 1)"
+ .exitm
+ .endif
+ .ifeq \mask
+ .error "Mask must not be zero"
+ .endif
+ off = \size - \bytepos - 1
+ tm off+\addr, \mask
+ .endm
+
+ .macro BPOFF
+ ALTERNATIVE "", ".long 0xb2e8c000", 82
+ .endm
+
+ .macro BPON
+ ALTERNATIVE "", ".long 0xb2e8d000", 82
+ .endm
+
+ .macro BPENTER tif_ptr,tif_mask
+ ALTERNATIVE "TSTMSK \tif_ptr,\tif_mask; jz .+8; .long 0xb2e8d000", \
+ "", 82
+ .endm
+
+ .macro BPEXIT tif_ptr,tif_mask
+ TSTMSK \tif_ptr,\tif_mask
+ ALTERNATIVE "jz .+8; .long 0xb2e8c000", \
+ "jnz .+8; .long 0xb2e8d000", 82
+ .endm
+
+ GEN_BR_THUNK %r9
+ GEN_BR_THUNK %r14
+ GEN_BR_THUNK %r14,%r11
+
+ .section .kprobes.text, "ax"
+.Ldummy:
+ /*
+ * This nop exists only in order to avoid that __switch_to starts at
+ * the beginning of the kprobes text section. In that case we would
+ * have several symbols at the same address. E.g. objdump would take
+ * an arbitrary symbol name when disassembling this code.
+ * With the added nop in between the __switch_to symbol is unique
+ * again.
+ */
+ nop 0
+
+ENTRY(__bpon)
+ .globl __bpon
+ BPON
+ BR_EX %r14
+
+/*
+ * Scheduler resume function, called by switch_to
+ * gpr2 = (task_struct *) prev
+ * gpr3 = (task_struct *) next
+ * Returns:
+ * gpr2 = prev
+ */
+ENTRY(__switch_to)
+ stmg %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task
+ lghi %r4,__TASK_stack
+ lghi %r1,__TASK_thread
+ lg %r5,0(%r4,%r3) # start of kernel stack of next
+ stg %r15,__THREAD_ksp(%r1,%r2) # store kernel stack of prev
+ lgr %r15,%r5
+ aghi %r15,STACK_INIT # end of kernel stack of next
+ stg %r3,__LC_CURRENT # store task struct of next
+ stg %r15,__LC_KERNEL_STACK # store end of kernel stack
+ lg %r15,__THREAD_ksp(%r1,%r3) # load kernel stack of next
+ aghi %r3,__TASK_pid
+ mvc __LC_CURRENT_PID(4,%r0),0(%r3) # store pid of next
+ lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task
+ ALTERNATIVE "", ".insn s,0xb2800000,_LPP_OFFSET", 40
+ BR_EX %r14
+
+.L__critical_start:
+
+#if IS_ENABLED(CONFIG_KVM)
+/*
+ * sie64a calling convention:
+ * %r2 pointer to sie control block
+ * %r3 guest register save area
+ */
+ENTRY(sie64a)
+ stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers
+ lg %r12,__LC_CURRENT
+ stg %r2,__SF_SIE_CONTROL(%r15) # save control block pointer
+ stg %r3,__SF_SIE_SAVEAREA(%r15) # save guest register save area
+ xc __SF_SIE_REASON(8,%r15),__SF_SIE_REASON(%r15) # reason code = 0
+ mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r12) # copy thread flags
+ TSTMSK __LC_CPU_FLAGS,_CIF_FPU # load guest fp/vx registers ?
+ jno .Lsie_load_guest_gprs
+ brasl %r14,load_fpu_regs # load guest fp/vx regs
+.Lsie_load_guest_gprs:
+ lmg %r0,%r13,0(%r3) # load guest gprs 0-13
+ lg %r14,__LC_GMAP # get gmap pointer
+ ltgr %r14,%r14
+ jz .Lsie_gmap
+ lctlg %c1,%c1,__GMAP_ASCE(%r14) # load primary asce
+.Lsie_gmap:
+ lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer
+ oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now
+ tm __SIE_PROG20+3(%r14),3 # last exit...
+ jnz .Lsie_skip
+ TSTMSK __LC_CPU_FLAGS,_CIF_FPU
+ jo .Lsie_skip # exit if fp/vx regs changed
+ BPEXIT __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
+.Lsie_entry:
+ sie 0(%r14)
+.Lsie_exit:
+ BPOFF
+ BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
+.Lsie_skip:
+ ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
+ lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
+.Lsie_done:
+# some program checks are suppressing. C code (e.g. do_protection_exception)
+# will rewind the PSW by the ILC, which is often 4 bytes in case of SIE. There
+# are some corner cases (e.g. runtime instrumentation) where ILC is unpredictable.
+# Other instructions between sie64a and .Lsie_done should not cause program
+# interrupts. So lets use 3 nops as a landing pad for all possible rewinds.
+# See also .Lcleanup_sie
+.Lrewind_pad6:
+ nopr 7
+.Lrewind_pad4:
+ nopr 7
+.Lrewind_pad2:
+ nopr 7
+ .globl sie_exit
+sie_exit:
+ lg %r14,__SF_SIE_SAVEAREA(%r15) # load guest register save area
+ stmg %r0,%r13,0(%r14) # save guest gprs 0-13
+ xgr %r0,%r0 # clear guest registers to
+ xgr %r1,%r1 # prevent speculative use
+ xgr %r2,%r2
+ xgr %r3,%r3
+ xgr %r4,%r4
+ xgr %r5,%r5
+ lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers
+ lg %r2,__SF_SIE_REASON(%r15) # return exit reason code
+ BR_EX %r14
+.Lsie_fault:
+ lghi %r14,-EFAULT
+ stg %r14,__SF_SIE_REASON(%r15) # set exit reason code
+ j sie_exit
+
+ EX_TABLE(.Lrewind_pad6,.Lsie_fault)
+ EX_TABLE(.Lrewind_pad4,.Lsie_fault)
+ EX_TABLE(.Lrewind_pad2,.Lsie_fault)
+ EX_TABLE(sie_exit,.Lsie_fault)
+EXPORT_SYMBOL(sie64a)
+EXPORT_SYMBOL(sie_exit)
+#endif
+
+/*
+ * SVC interrupt handler routine. System calls are synchronous events and
+ * are executed with interrupts enabled.
+ */
+
+ENTRY(system_call)
+ stpt __LC_SYNC_ENTER_TIMER
+.Lsysc_stmg:
+ stmg %r8,%r15,__LC_SAVE_AREA_SYNC
+ BPOFF
+ lg %r12,__LC_CURRENT
+ lghi %r13,__TASK_thread
+ lghi %r14,_PIF_SYSCALL
+.Lsysc_per:
+ lg %r15,__LC_KERNEL_STACK
+ la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs
+.Lsysc_vtime:
+ UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER
+ BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
+ stmg %r0,%r7,__PT_R0(%r11)
+ mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
+ mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW
+ mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC
+ stg %r14,__PT_FLAGS(%r11)
+.Lsysc_do_svc:
+ # clear user controlled register to prevent speculative use
+ xgr %r0,%r0
+ # load address of system call table
+ lg %r10,__THREAD_sysc_table(%r13,%r12)
+ llgh %r8,__PT_INT_CODE+2(%r11)
+ slag %r8,%r8,2 # shift and test for svc 0
+ jnz .Lsysc_nr_ok
+ # svc 0: system call number in %r1
+ llgfr %r1,%r1 # clear high word in r1
+ cghi %r1,NR_syscalls
+ jnl .Lsysc_nr_ok
+ sth %r1,__PT_INT_CODE+2(%r11)
+ slag %r8,%r1,2
+.Lsysc_nr_ok:
+ xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+ stg %r2,__PT_ORIG_GPR2(%r11)
+ stg %r7,STACK_FRAME_OVERHEAD(%r15)
+ lgf %r9,0(%r8,%r10) # get system call add.
+ TSTMSK __TI_flags(%r12),_TIF_TRACE
+ jnz .Lsysc_tracesys
+ BASR_EX %r14,%r9 # call sys_xxxx
+ stg %r2,__PT_R2(%r11) # store return value
+
+.Lsysc_return:
+#ifdef CONFIG_DEBUG_RSEQ
+ lgr %r2,%r11
+ brasl %r14,rseq_syscall
+#endif
+ LOCKDEP_SYS_EXIT
+.Lsysc_tif:
+ TSTMSK __PT_FLAGS(%r11),_PIF_WORK
+ jnz .Lsysc_work
+ TSTMSK __TI_flags(%r12),_TIF_WORK
+ jnz .Lsysc_work # check for work
+ TSTMSK __LC_CPU_FLAGS,_CIF_WORK
+ jnz .Lsysc_work
+ BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP
+.Lsysc_restore:
+ lg %r14,__LC_VDSO_PER_CPU
+ lmg %r0,%r10,__PT_R0(%r11)
+ mvc __LC_RETURN_PSW(16),__PT_PSW(%r11)
+.Lsysc_exit_timer:
+ stpt __LC_EXIT_TIMER
+ mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
+ lmg %r11,%r15,__PT_R11(%r11)
+ lpswe __LC_RETURN_PSW
+.Lsysc_done:
+
+#
+# One of the work bits is on. Find out which one.
+#
+.Lsysc_work:
+ TSTMSK __LC_CPU_FLAGS,_CIF_MCCK_PENDING
+ jo .Lsysc_mcck_pending
+ TSTMSK __TI_flags(%r12),_TIF_NEED_RESCHED
+ jo .Lsysc_reschedule
+ TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL_RESTART
+ jo .Lsysc_syscall_restart
+#ifdef CONFIG_UPROBES
+ TSTMSK __TI_flags(%r12),_TIF_UPROBE
+ jo .Lsysc_uprobe_notify
+#endif
+ TSTMSK __TI_flags(%r12),_TIF_GUARDED_STORAGE
+ jo .Lsysc_guarded_storage
+ TSTMSK __PT_FLAGS(%r11),_PIF_PER_TRAP
+ jo .Lsysc_singlestep
+#ifdef CONFIG_LIVEPATCH
+ TSTMSK __TI_flags(%r12),_TIF_PATCH_PENDING
+ jo .Lsysc_patch_pending # handle live patching just before
+ # signals and possible syscall restart
+#endif
+ TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL_RESTART
+ jo .Lsysc_syscall_restart
+ TSTMSK __TI_flags(%r12),_TIF_SIGPENDING
+ jo .Lsysc_sigpending
+ TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME
+ jo .Lsysc_notify_resume
+ TSTMSK __LC_CPU_FLAGS,_CIF_FPU
+ jo .Lsysc_vxrs
+ TSTMSK __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY)
+ jnz .Lsysc_asce
+ j .Lsysc_return # beware of critical section cleanup
+
+#
+# _TIF_NEED_RESCHED is set, call schedule
+#
+.Lsysc_reschedule:
+ larl %r14,.Lsysc_return
+ jg schedule
+
+#
+# _CIF_MCCK_PENDING is set, call handler
+#
+.Lsysc_mcck_pending:
+ larl %r14,.Lsysc_return
+ jg s390_handle_mcck # TIF bit will be cleared by handler
+
+#
+# _CIF_ASCE_PRIMARY and/or _CIF_ASCE_SECONDARY set, load user space asce
+#
+.Lsysc_asce:
+ ni __LC_CPU_FLAGS+7,255-_CIF_ASCE_SECONDARY
+ lctlg %c7,%c7,__LC_VDSO_ASCE # load secondary asce
+ TSTMSK __LC_CPU_FLAGS,_CIF_ASCE_PRIMARY
+ jz .Lsysc_return
+#ifndef CONFIG_HAVE_MARCH_Z10_FEATURES
+ tm __LC_STFLE_FAC_LIST+3,0x10 # has MVCOS ?
+ jnz .Lsysc_set_fs_fixup
+ ni __LC_CPU_FLAGS+7,255-_CIF_ASCE_PRIMARY
+ lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
+ j .Lsysc_return
+.Lsysc_set_fs_fixup:
+#endif
+ larl %r14,.Lsysc_return
+ jg set_fs_fixup
+
+#
+# CIF_FPU is set, restore floating-point controls and floating-point registers.
+#
+.Lsysc_vxrs:
+ larl %r14,.Lsysc_return
+ jg load_fpu_regs
+
+#
+# _TIF_SIGPENDING is set, call do_signal
+#
+.Lsysc_sigpending:
+ lgr %r2,%r11 # pass pointer to pt_regs
+ brasl %r14,do_signal
+ TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL
+ jno .Lsysc_return
+.Lsysc_do_syscall:
+ lghi %r13,__TASK_thread
+ lmg %r2,%r7,__PT_R2(%r11) # load svc arguments
+ lghi %r1,0 # svc 0 returns -ENOSYS
+ j .Lsysc_do_svc
+
+#
+# _TIF_NOTIFY_RESUME is set, call do_notify_resume
+#
+.Lsysc_notify_resume:
+ lgr %r2,%r11 # pass pointer to pt_regs
+ larl %r14,.Lsysc_return
+ jg do_notify_resume
+
+#
+# _TIF_UPROBE is set, call uprobe_notify_resume
+#
+#ifdef CONFIG_UPROBES
+.Lsysc_uprobe_notify:
+ lgr %r2,%r11 # pass pointer to pt_regs
+ larl %r14,.Lsysc_return
+ jg uprobe_notify_resume
+#endif
+
+#
+# _TIF_GUARDED_STORAGE is set, call guarded_storage_load
+#
+.Lsysc_guarded_storage:
+ lgr %r2,%r11 # pass pointer to pt_regs
+ larl %r14,.Lsysc_return
+ jg gs_load_bc_cb
+#
+# _TIF_PATCH_PENDING is set, call klp_update_patch_state
+#
+#ifdef CONFIG_LIVEPATCH
+.Lsysc_patch_pending:
+ lg %r2,__LC_CURRENT # pass pointer to task struct
+ larl %r14,.Lsysc_return
+ jg klp_update_patch_state
+#endif
+
+#
+# _PIF_PER_TRAP is set, call do_per_trap
+#
+.Lsysc_singlestep:
+ ni __PT_FLAGS+7(%r11),255-_PIF_PER_TRAP
+ lgr %r2,%r11 # pass pointer to pt_regs
+ larl %r14,.Lsysc_return
+ jg do_per_trap
+
+#
+# _PIF_SYSCALL_RESTART is set, repeat the current system call
+#
+.Lsysc_syscall_restart:
+ ni __PT_FLAGS+7(%r11),255-_PIF_SYSCALL_RESTART
+ lmg %r1,%r7,__PT_R1(%r11) # load svc arguments
+ lg %r2,__PT_ORIG_GPR2(%r11)
+ j .Lsysc_do_svc
+
+#
+# call tracehook_report_syscall_entry/tracehook_report_syscall_exit before
+# and after the system call
+#
+.Lsysc_tracesys:
+ lgr %r2,%r11 # pass pointer to pt_regs
+ la %r3,0
+ llgh %r0,__PT_INT_CODE+2(%r11)
+ stg %r0,__PT_R2(%r11)
+ brasl %r14,do_syscall_trace_enter
+ lghi %r0,NR_syscalls
+ clgr %r0,%r2
+ jnh .Lsysc_tracenogo
+ sllg %r8,%r2,2
+ lgf %r9,0(%r8,%r10)
+.Lsysc_tracego:
+ lmg %r3,%r7,__PT_R3(%r11)
+ stg %r7,STACK_FRAME_OVERHEAD(%r15)
+ lg %r2,__PT_ORIG_GPR2(%r11)
+ BASR_EX %r14,%r9 # call sys_xxx
+ stg %r2,__PT_R2(%r11) # store return value
+.Lsysc_tracenogo:
+ TSTMSK __TI_flags(%r12),_TIF_TRACE
+ jz .Lsysc_return
+ lgr %r2,%r11 # pass pointer to pt_regs
+ larl %r14,.Lsysc_return
+ jg do_syscall_trace_exit
+
+#
+# a new process exits the kernel with ret_from_fork
+#
+ENTRY(ret_from_fork)
+ la %r11,STACK_FRAME_OVERHEAD(%r15)
+ lg %r12,__LC_CURRENT
+ brasl %r14,schedule_tail
+ TRACE_IRQS_ON
+ ssm __LC_SVC_NEW_PSW # reenable interrupts
+ tm __PT_PSW+1(%r11),0x01 # forking a kernel thread ?
+ jne .Lsysc_tracenogo
+ # it's a kernel thread
+ lmg %r9,%r10,__PT_R9(%r11) # load gprs
+ENTRY(kernel_thread_starter)
+ la %r2,0(%r10)
+ BASR_EX %r14,%r9
+ j .Lsysc_tracenogo
+
+/*
+ * Program check handler routine
+ */
+
+ENTRY(pgm_check_handler)
+ stpt __LC_SYNC_ENTER_TIMER
+ BPOFF
+ stmg %r8,%r15,__LC_SAVE_AREA_SYNC
+ lg %r10,__LC_LAST_BREAK
+ lg %r12,__LC_CURRENT
+ lghi %r11,0
+ larl %r13,cleanup_critical
+ lmg %r8,%r9,__LC_PGM_OLD_PSW
+ tmhh %r8,0x0001 # test problem state bit
+ jnz 2f # -> fault in user space
+#if IS_ENABLED(CONFIG_KVM)
+ # cleanup critical section for program checks in sie64a
+ lgr %r14,%r9
+ slg %r14,BASED(.Lsie_critical_start)
+ clg %r14,BASED(.Lsie_critical_length)
+ jhe 0f
+ lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer
+ ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
+ lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
+ larl %r9,sie_exit # skip forward to sie_exit
+ lghi %r11,_PIF_GUEST_FAULT
+#endif
+0: tmhh %r8,0x4000 # PER bit set in old PSW ?
+ jnz 1f # -> enabled, can't be a double fault
+ tm __LC_PGM_ILC+3,0x80 # check for per exception
+ jnz .Lpgm_svcper # -> single stepped svc
+1: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC
+ aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+ j 4f
+2: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER
+ BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
+ lg %r15,__LC_KERNEL_STACK
+ lgr %r14,%r12
+ aghi %r14,__TASK_thread # pointer to thread_struct
+ lghi %r13,__LC_PGM_TDB
+ tm __LC_PGM_ILC+2,0x02 # check for transaction abort
+ jz 3f
+ mvc __THREAD_trap_tdb(256,%r14),0(%r13)
+3: stg %r10,__THREAD_last_break(%r14)
+4: lgr %r13,%r11
+ la %r11,STACK_FRAME_OVERHEAD(%r15)
+ stmg %r0,%r7,__PT_R0(%r11)
+ # clear user controlled registers to prevent speculative use
+ xgr %r0,%r0
+ xgr %r1,%r1
+ xgr %r2,%r2
+ xgr %r3,%r3
+ xgr %r4,%r4
+ xgr %r5,%r5
+ xgr %r6,%r6
+ xgr %r7,%r7
+ mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
+ stmg %r8,%r9,__PT_PSW(%r11)
+ mvc __PT_INT_CODE(4,%r11),__LC_PGM_ILC
+ mvc __PT_INT_PARM_LONG(8,%r11),__LC_TRANS_EXC_CODE
+ stg %r13,__PT_FLAGS(%r11)
+ stg %r10,__PT_ARGS(%r11)
+ tm __LC_PGM_ILC+3,0x80 # check for per exception
+ jz 5f
+ tmhh %r8,0x0001 # kernel per event ?
+ jz .Lpgm_kprobe
+ oi __PT_FLAGS+7(%r11),_PIF_PER_TRAP
+ mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS
+ mvc __THREAD_per_cause(2,%r14),__LC_PER_CODE
+ mvc __THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID
+5: REENABLE_IRQS
+ xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+ larl %r1,pgm_check_table
+ llgh %r10,__PT_INT_CODE+2(%r11)
+ nill %r10,0x007f
+ sll %r10,2
+ je .Lpgm_return
+ lgf %r9,0(%r10,%r1) # load address of handler routine
+ lgr %r2,%r11 # pass pointer to pt_regs
+ BASR_EX %r14,%r9 # branch to interrupt-handler
+.Lpgm_return:
+ LOCKDEP_SYS_EXIT
+ tm __PT_PSW+1(%r11),0x01 # returning to user ?
+ jno .Lsysc_restore
+ TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL
+ jo .Lsysc_do_syscall
+ j .Lsysc_tif
+
+#
+# PER event in supervisor state, must be kprobes
+#
+.Lpgm_kprobe:
+ REENABLE_IRQS
+ xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+ lgr %r2,%r11 # pass pointer to pt_regs
+ brasl %r14,do_per_trap
+ j .Lpgm_return
+
+#
+# single stepped system call
+#
+.Lpgm_svcper:
+ mvc __LC_RETURN_PSW(8),__LC_SVC_NEW_PSW
+ lghi %r13,__TASK_thread
+ larl %r14,.Lsysc_per
+ stg %r14,__LC_RETURN_PSW+8
+ lghi %r14,_PIF_SYSCALL | _PIF_PER_TRAP
+ lpswe __LC_RETURN_PSW # branch to .Lsysc_per and enable irqs
+
+/*
+ * IO interrupt handler routine
+ */
+ENTRY(io_int_handler)
+ STCK __LC_INT_CLOCK
+ stpt __LC_ASYNC_ENTER_TIMER
+ BPOFF
+ stmg %r8,%r15,__LC_SAVE_AREA_ASYNC
+ lg %r12,__LC_CURRENT
+ larl %r13,cleanup_critical
+ lmg %r8,%r9,__LC_IO_OLD_PSW
+ SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER
+ stmg %r0,%r7,__PT_R0(%r11)
+ # clear user controlled registers to prevent speculative use
+ xgr %r0,%r0
+ xgr %r1,%r1
+ xgr %r2,%r2
+ xgr %r3,%r3
+ xgr %r4,%r4
+ xgr %r5,%r5
+ xgr %r6,%r6
+ xgr %r7,%r7
+ xgr %r10,%r10
+ mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
+ stmg %r8,%r9,__PT_PSW(%r11)
+ mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
+ xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
+ TSTMSK __LC_CPU_FLAGS,_CIF_IGNORE_IRQ
+ jo .Lio_restore
+ TRACE_IRQS_OFF
+ xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+.Lio_loop:
+ lgr %r2,%r11 # pass pointer to pt_regs
+ lghi %r3,IO_INTERRUPT
+ tm __PT_INT_CODE+8(%r11),0x80 # adapter interrupt ?
+ jz .Lio_call
+ lghi %r3,THIN_INTERRUPT
+.Lio_call:
+ brasl %r14,do_IRQ
+ TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_LPAR
+ jz .Lio_return
+ tpi 0
+ jz .Lio_return
+ mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
+ j .Lio_loop
+.Lio_return:
+ LOCKDEP_SYS_EXIT
+ TRACE_IRQS_ON
+.Lio_tif:
+ TSTMSK __TI_flags(%r12),_TIF_WORK
+ jnz .Lio_work # there is work to do (signals etc.)
+ TSTMSK __LC_CPU_FLAGS,_CIF_WORK
+ jnz .Lio_work
+.Lio_restore:
+ lg %r14,__LC_VDSO_PER_CPU
+ lmg %r0,%r10,__PT_R0(%r11)
+ mvc __LC_RETURN_PSW(16),__PT_PSW(%r11)
+ tm __PT_PSW+1(%r11),0x01 # returning to user ?
+ jno .Lio_exit_kernel
+ BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP
+.Lio_exit_timer:
+ stpt __LC_EXIT_TIMER
+ mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
+.Lio_exit_kernel:
+ lmg %r11,%r15,__PT_R11(%r11)
+ lpswe __LC_RETURN_PSW
+.Lio_done:
+
+#
+# There is work todo, find out in which context we have been interrupted:
+# 1) if we return to user space we can do all _TIF_WORK work
+# 2) if we return to kernel code and kvm is enabled check if we need to
+# modify the psw to leave SIE
+# 3) if we return to kernel code and preemptive scheduling is enabled check
+# the preemption counter and if it is zero call preempt_schedule_irq
+# Before any work can be done, a switch to the kernel stack is required.
+#
+.Lio_work:
+ tm __PT_PSW+1(%r11),0x01 # returning to user ?
+ jo .Lio_work_user # yes -> do resched & signal
+#ifdef CONFIG_PREEMPT
+ # check for preemptive scheduling
+ icm %r0,15,__LC_PREEMPT_COUNT
+ jnz .Lio_restore # preemption is disabled
+ TSTMSK __TI_flags(%r12),_TIF_NEED_RESCHED
+ jno .Lio_restore
+ # switch to kernel stack
+ lg %r1,__PT_R15(%r11)
+ aghi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+ mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
+ xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
+ la %r11,STACK_FRAME_OVERHEAD(%r1)
+ lgr %r15,%r1
+ # TRACE_IRQS_ON already done at .Lio_return, call
+ # TRACE_IRQS_OFF to keep things symmetrical
+ TRACE_IRQS_OFF
+ brasl %r14,preempt_schedule_irq
+ j .Lio_return
+#else
+ j .Lio_restore
+#endif
+
+#
+# Need to do work before returning to userspace, switch to kernel stack
+#
+.Lio_work_user:
+ lg %r1,__LC_KERNEL_STACK
+ mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
+ xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
+ la %r11,STACK_FRAME_OVERHEAD(%r1)
+ lgr %r15,%r1
+
+#
+# One of the work bits is on. Find out which one.
+#
+.Lio_work_tif:
+ TSTMSK __LC_CPU_FLAGS,_CIF_MCCK_PENDING
+ jo .Lio_mcck_pending
+ TSTMSK __TI_flags(%r12),_TIF_NEED_RESCHED
+ jo .Lio_reschedule
+#ifdef CONFIG_LIVEPATCH
+ TSTMSK __TI_flags(%r12),_TIF_PATCH_PENDING
+ jo .Lio_patch_pending
+#endif
+ TSTMSK __TI_flags(%r12),_TIF_SIGPENDING
+ jo .Lio_sigpending
+ TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME
+ jo .Lio_notify_resume
+ TSTMSK __TI_flags(%r12),_TIF_GUARDED_STORAGE
+ jo .Lio_guarded_storage
+ TSTMSK __LC_CPU_FLAGS,_CIF_FPU
+ jo .Lio_vxrs
+ TSTMSK __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY)
+ jnz .Lio_asce
+ j .Lio_return # beware of critical section cleanup
+
+#
+# _CIF_MCCK_PENDING is set, call handler
+#
+.Lio_mcck_pending:
+ # TRACE_IRQS_ON already done at .Lio_return
+ brasl %r14,s390_handle_mcck # TIF bit will be cleared by handler
+ TRACE_IRQS_OFF
+ j .Lio_return
+
+#
+# _CIF_ASCE_PRIMARY and/or CIF_ASCE_SECONDARY set, load user space asce
+#
+.Lio_asce:
+ ni __LC_CPU_FLAGS+7,255-_CIF_ASCE_SECONDARY
+ lctlg %c7,%c7,__LC_VDSO_ASCE # load secondary asce
+ TSTMSK __LC_CPU_FLAGS,_CIF_ASCE_PRIMARY
+ jz .Lio_return
+#ifndef CONFIG_HAVE_MARCH_Z10_FEATURES
+ tm __LC_STFLE_FAC_LIST+3,0x10 # has MVCOS ?
+ jnz .Lio_set_fs_fixup
+ ni __LC_CPU_FLAGS+7,255-_CIF_ASCE_PRIMARY
+ lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
+ j .Lio_return
+.Lio_set_fs_fixup:
+#endif
+ larl %r14,.Lio_return
+ jg set_fs_fixup
+
+#
+# CIF_FPU is set, restore floating-point controls and floating-point registers.
+#
+.Lio_vxrs:
+ larl %r14,.Lio_return
+ jg load_fpu_regs
+
+#
+# _TIF_GUARDED_STORAGE is set, call guarded_storage_load
+#
+.Lio_guarded_storage:
+ # TRACE_IRQS_ON already done at .Lio_return
+ ssm __LC_SVC_NEW_PSW # reenable interrupts
+ lgr %r2,%r11 # pass pointer to pt_regs
+ brasl %r14,gs_load_bc_cb
+ ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
+ TRACE_IRQS_OFF
+ j .Lio_return
+
+#
+# _TIF_NEED_RESCHED is set, call schedule
+#
+.Lio_reschedule:
+ # TRACE_IRQS_ON already done at .Lio_return
+ ssm __LC_SVC_NEW_PSW # reenable interrupts
+ brasl %r14,schedule # call scheduler
+ ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
+ TRACE_IRQS_OFF
+ j .Lio_return
+
+#
+# _TIF_PATCH_PENDING is set, call klp_update_patch_state
+#
+#ifdef CONFIG_LIVEPATCH
+.Lio_patch_pending:
+ lg %r2,__LC_CURRENT # pass pointer to task struct
+ larl %r14,.Lio_return
+ jg klp_update_patch_state
+#endif
+
+#
+# _TIF_SIGPENDING or is set, call do_signal
+#
+.Lio_sigpending:
+ # TRACE_IRQS_ON already done at .Lio_return
+ ssm __LC_SVC_NEW_PSW # reenable interrupts
+ lgr %r2,%r11 # pass pointer to pt_regs
+ brasl %r14,do_signal
+ ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
+ TRACE_IRQS_OFF
+ j .Lio_return
+
+#
+# _TIF_NOTIFY_RESUME or is set, call do_notify_resume
+#
+.Lio_notify_resume:
+ # TRACE_IRQS_ON already done at .Lio_return
+ ssm __LC_SVC_NEW_PSW # reenable interrupts
+ lgr %r2,%r11 # pass pointer to pt_regs
+ brasl %r14,do_notify_resume
+ ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
+ TRACE_IRQS_OFF
+ j .Lio_return
+
+/*
+ * External interrupt handler routine
+ */
+ENTRY(ext_int_handler)
+ STCK __LC_INT_CLOCK
+ stpt __LC_ASYNC_ENTER_TIMER
+ BPOFF
+ stmg %r8,%r15,__LC_SAVE_AREA_ASYNC
+ lg %r12,__LC_CURRENT
+ larl %r13,cleanup_critical
+ lmg %r8,%r9,__LC_EXT_OLD_PSW
+ SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER
+ stmg %r0,%r7,__PT_R0(%r11)
+ # clear user controlled registers to prevent speculative use
+ xgr %r0,%r0
+ xgr %r1,%r1
+ xgr %r2,%r2
+ xgr %r3,%r3
+ xgr %r4,%r4
+ xgr %r5,%r5
+ xgr %r6,%r6
+ xgr %r7,%r7
+ xgr %r10,%r10
+ mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
+ stmg %r8,%r9,__PT_PSW(%r11)
+ lghi %r1,__LC_EXT_PARAMS2
+ mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR
+ mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS
+ mvc __PT_INT_PARM_LONG(8,%r11),0(%r1)
+ xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
+ TSTMSK __LC_CPU_FLAGS,_CIF_IGNORE_IRQ
+ jo .Lio_restore
+ TRACE_IRQS_OFF
+ xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+ lgr %r2,%r11 # pass pointer to pt_regs
+ lghi %r3,EXT_INTERRUPT
+ brasl %r14,do_IRQ
+ j .Lio_return
+
+/*
+ * Load idle PSW. The second "half" of this function is in .Lcleanup_idle.
+ */
+ENTRY(psw_idle)
+ stg %r14,(__SF_GPRS+8*8)(%r15)
+ stg %r3,__SF_EMPTY(%r15)
+ larl %r1,.Lpsw_idle_lpsw+4
+ stg %r1,__SF_EMPTY+8(%r15)
+#ifdef CONFIG_SMP
+ larl %r1,smp_cpu_mtid
+ llgf %r1,0(%r1)
+ ltgr %r1,%r1
+ jz .Lpsw_idle_stcctm
+ .insn rsy,0xeb0000000017,%r1,5,__SF_EMPTY+16(%r15)
+.Lpsw_idle_stcctm:
+#endif
+ oi __LC_CPU_FLAGS+7,_CIF_ENABLED_WAIT
+ BPON
+ STCK __CLOCK_IDLE_ENTER(%r2)
+ stpt __TIMER_IDLE_ENTER(%r2)
+.Lpsw_idle_lpsw:
+ lpswe __SF_EMPTY(%r15)
+ BR_EX %r14
+.Lpsw_idle_end:
+
+/*
+ * Store floating-point controls and floating-point or vector register
+ * depending whether the vector facility is available. A critical section
+ * cleanup assures that the registers are stored even if interrupted for
+ * some other work. The CIF_FPU flag is set to trigger a lazy restore
+ * of the register contents at return from io or a system call.
+ */
+ENTRY(save_fpu_regs)
+ lg %r2,__LC_CURRENT
+ aghi %r2,__TASK_thread
+ TSTMSK __LC_CPU_FLAGS,_CIF_FPU
+ jo .Lsave_fpu_regs_exit
+ stfpc __THREAD_FPU_fpc(%r2)
+ lg %r3,__THREAD_FPU_regs(%r2)
+ TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX
+ jz .Lsave_fpu_regs_fp # no -> store FP regs
+ VSTM %v0,%v15,0,%r3 # vstm 0,15,0(3)
+ VSTM %v16,%v31,256,%r3 # vstm 16,31,256(3)
+ j .Lsave_fpu_regs_done # -> set CIF_FPU flag
+.Lsave_fpu_regs_fp:
+ std 0,0(%r3)
+ std 1,8(%r3)
+ std 2,16(%r3)
+ std 3,24(%r3)
+ std 4,32(%r3)
+ std 5,40(%r3)
+ std 6,48(%r3)
+ std 7,56(%r3)
+ std 8,64(%r3)
+ std 9,72(%r3)
+ std 10,80(%r3)
+ std 11,88(%r3)
+ std 12,96(%r3)
+ std 13,104(%r3)
+ std 14,112(%r3)
+ std 15,120(%r3)
+.Lsave_fpu_regs_done:
+ oi __LC_CPU_FLAGS+7,_CIF_FPU
+.Lsave_fpu_regs_exit:
+ BR_EX %r14
+.Lsave_fpu_regs_end:
+EXPORT_SYMBOL(save_fpu_regs)
+
+/*
+ * Load floating-point controls and floating-point or vector registers.
+ * A critical section cleanup assures that the register contents are
+ * loaded even if interrupted for some other work.
+ *
+ * There are special calling conventions to fit into sysc and io return work:
+ * %r15: <kernel stack>
+ * The function requires:
+ * %r4
+ */
+load_fpu_regs:
+ lg %r4,__LC_CURRENT
+ aghi %r4,__TASK_thread
+ TSTMSK __LC_CPU_FLAGS,_CIF_FPU
+ jno .Lload_fpu_regs_exit
+ lfpc __THREAD_FPU_fpc(%r4)
+ TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX
+ lg %r4,__THREAD_FPU_regs(%r4) # %r4 <- reg save area
+ jz .Lload_fpu_regs_fp # -> no VX, load FP regs
+ VLM %v0,%v15,0,%r4
+ VLM %v16,%v31,256,%r4
+ j .Lload_fpu_regs_done
+.Lload_fpu_regs_fp:
+ ld 0,0(%r4)
+ ld 1,8(%r4)
+ ld 2,16(%r4)
+ ld 3,24(%r4)
+ ld 4,32(%r4)
+ ld 5,40(%r4)
+ ld 6,48(%r4)
+ ld 7,56(%r4)
+ ld 8,64(%r4)
+ ld 9,72(%r4)
+ ld 10,80(%r4)
+ ld 11,88(%r4)
+ ld 12,96(%r4)
+ ld 13,104(%r4)
+ ld 14,112(%r4)
+ ld 15,120(%r4)
+.Lload_fpu_regs_done:
+ ni __LC_CPU_FLAGS+7,255-_CIF_FPU
+.Lload_fpu_regs_exit:
+ BR_EX %r14
+.Lload_fpu_regs_end:
+
+.L__critical_end:
+
+/*
+ * Machine check handler routines
+ */
+ENTRY(mcck_int_handler)
+ STCK __LC_MCCK_CLOCK
+ BPOFF
+ la %r1,4095 # validate r1
+ spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # validate cpu timer
+ sckc __LC_CLOCK_COMPARATOR # validate comparator
+ lam %a0,%a15,__LC_AREGS_SAVE_AREA-4095(%r1) # validate acrs
+ lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# validate gprs
+ lg %r12,__LC_CURRENT
+ larl %r13,cleanup_critical
+ lmg %r8,%r9,__LC_MCK_OLD_PSW
+ TSTMSK __LC_MCCK_CODE,MCCK_CODE_SYSTEM_DAMAGE
+ jo .Lmcck_panic # yes -> rest of mcck code invalid
+ TSTMSK __LC_MCCK_CODE,MCCK_CODE_CR_VALID
+ jno .Lmcck_panic # control registers invalid -> panic
+ la %r14,4095
+ lctlg %c0,%c15,__LC_CREGS_SAVE_AREA-4095(%r14) # validate ctl regs
+ ptlb
+ lg %r11,__LC_MCESAD-4095(%r14) # extended machine check save area
+ nill %r11,0xfc00 # MCESA_ORIGIN_MASK
+ TSTMSK __LC_CREGS_SAVE_AREA+16-4095(%r14),CR2_GUARDED_STORAGE
+ jno 0f
+ TSTMSK __LC_MCCK_CODE,MCCK_CODE_GS_VALID
+ jno 0f
+ .insn rxy,0xe3000000004d,0,__MCESA_GS_SAVE_AREA(%r11) # LGSC
+0: l %r14,__LC_FP_CREG_SAVE_AREA-4095(%r14)
+ TSTMSK __LC_MCCK_CODE,MCCK_CODE_FC_VALID
+ jo 0f
+ sr %r14,%r14
+0: sfpc %r14
+ TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX
+ jo 0f
+ lghi %r14,__LC_FPREGS_SAVE_AREA
+ ld %f0,0(%r14)
+ ld %f1,8(%r14)
+ ld %f2,16(%r14)
+ ld %f3,24(%r14)
+ ld %f4,32(%r14)
+ ld %f5,40(%r14)
+ ld %f6,48(%r14)
+ ld %f7,56(%r14)
+ ld %f8,64(%r14)
+ ld %f9,72(%r14)
+ ld %f10,80(%r14)
+ ld %f11,88(%r14)
+ ld %f12,96(%r14)
+ ld %f13,104(%r14)
+ ld %f14,112(%r14)
+ ld %f15,120(%r14)
+ j 1f
+0: VLM %v0,%v15,0,%r11
+ VLM %v16,%v31,256,%r11
+1: lghi %r14,__LC_CPU_TIMER_SAVE_AREA
+ mvc __LC_MCCK_ENTER_TIMER(8),0(%r14)
+ TSTMSK __LC_MCCK_CODE,MCCK_CODE_CPU_TIMER_VALID
+ jo 3f
+ la %r14,__LC_SYNC_ENTER_TIMER
+ clc 0(8,%r14),__LC_ASYNC_ENTER_TIMER
+ jl 0f
+ la %r14,__LC_ASYNC_ENTER_TIMER
+0: clc 0(8,%r14),__LC_EXIT_TIMER
+ jl 1f
+ la %r14,__LC_EXIT_TIMER
+1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER
+ jl 2f
+ la %r14,__LC_LAST_UPDATE_TIMER
+2: spt 0(%r14)
+ mvc __LC_MCCK_ENTER_TIMER(8),0(%r14)
+3: TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_MWP_VALID
+ jno .Lmcck_panic
+ tmhh %r8,0x0001 # interrupting from user ?
+ jnz 4f
+ TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID
+ jno .Lmcck_panic
+4: SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER
+.Lmcck_skip:
+ lghi %r14,__LC_GPREGS_SAVE_AREA+64
+ stmg %r0,%r7,__PT_R0(%r11)
+ # clear user controlled registers to prevent speculative use
+ xgr %r0,%r0
+ xgr %r1,%r1
+ xgr %r2,%r2
+ xgr %r3,%r3
+ xgr %r4,%r4
+ xgr %r5,%r5
+ xgr %r6,%r6
+ xgr %r7,%r7
+ xgr %r10,%r10
+ mvc __PT_R8(64,%r11),0(%r14)
+ stmg %r8,%r9,__PT_PSW(%r11)
+ xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
+ xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+ lgr %r2,%r11 # pass pointer to pt_regs
+ brasl %r14,s390_do_machine_check
+ tm __PT_PSW+1(%r11),0x01 # returning to user ?
+ jno .Lmcck_return
+ lg %r1,__LC_KERNEL_STACK # switch to kernel stack
+ mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
+ xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
+ la %r11,STACK_FRAME_OVERHEAD(%r1)
+ lgr %r15,%r1
+ ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off
+ TSTMSK __LC_CPU_FLAGS,_CIF_MCCK_PENDING
+ jno .Lmcck_return
+ TRACE_IRQS_OFF
+ brasl %r14,s390_handle_mcck
+ TRACE_IRQS_ON
+.Lmcck_return:
+ lg %r14,__LC_VDSO_PER_CPU
+ lmg %r0,%r10,__PT_R0(%r11)
+ mvc __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW
+ tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ?
+ jno 0f
+ BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP
+ stpt __LC_EXIT_TIMER
+ mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
+0: lmg %r11,%r15,__PT_R11(%r11)
+ lpswe __LC_RETURN_MCCK_PSW
+
+.Lmcck_panic:
+ lg %r15,__LC_PANIC_STACK
+ la %r11,STACK_FRAME_OVERHEAD(%r15)
+ j .Lmcck_skip
+
+#
+# PSW restart interrupt handler
+#
+ENTRY(restart_int_handler)
+ ALTERNATIVE "", ".insn s,0xb2800000,_LPP_OFFSET", 40
+ stg %r15,__LC_SAVE_AREA_RESTART
+ lg %r15,__LC_RESTART_STACK
+ aghi %r15,-__PT_SIZE # create pt_regs on stack
+ xc 0(__PT_SIZE,%r15),0(%r15)
+ stmg %r0,%r14,__PT_R0(%r15)
+ mvc __PT_R15(8,%r15),__LC_SAVE_AREA_RESTART
+ mvc __PT_PSW(16,%r15),__LC_RST_OLD_PSW # store restart old psw
+ aghi %r15,-STACK_FRAME_OVERHEAD # create stack frame on stack
+ xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15)
+ lg %r1,__LC_RESTART_FN # load fn, parm & source cpu
+ lg %r2,__LC_RESTART_DATA
+ lg %r3,__LC_RESTART_SOURCE
+ ltgr %r3,%r3 # test source cpu address
+ jm 1f # negative -> skip source stop
+0: sigp %r4,%r3,SIGP_SENSE # sigp sense to source cpu
+ brc 10,0b # wait for status stored
+1: basr %r14,%r1 # call function
+ stap __SF_EMPTY(%r15) # store cpu address
+ llgh %r3,__SF_EMPTY(%r15)
+2: sigp %r4,%r3,SIGP_STOP # sigp stop to current cpu
+ brc 2,2b
+3: j 3b
+
+ .section .kprobes.text, "ax"
+
+#ifdef CONFIG_CHECK_STACK
+/*
+ * The synchronous or the asynchronous stack overflowed. We are dead.
+ * No need to properly save the registers, we are going to panic anyway.
+ * Setup a pt_regs so that show_trace can provide a good call trace.
+ */
+stack_overflow:
+ lg %r15,__LC_PANIC_STACK # change to panic stack
+ la %r11,STACK_FRAME_OVERHEAD(%r15)
+ stmg %r0,%r7,__PT_R0(%r11)
+ stmg %r8,%r9,__PT_PSW(%r11)
+ mvc __PT_R8(64,%r11),0(%r14)
+ stg %r10,__PT_ORIG_GPR2(%r11) # store last break to orig_gpr2
+ xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+ lgr %r2,%r11 # pass pointer to pt_regs
+ jg kernel_stack_overflow
+#endif
+
+cleanup_critical:
+#if IS_ENABLED(CONFIG_KVM)
+ clg %r9,BASED(.Lcleanup_table_sie) # .Lsie_gmap
+ jl 0f
+ clg %r9,BASED(.Lcleanup_table_sie+8)# .Lsie_done
+ jl .Lcleanup_sie
+#endif
+ clg %r9,BASED(.Lcleanup_table) # system_call
+ jl 0f
+ clg %r9,BASED(.Lcleanup_table+8) # .Lsysc_do_svc
+ jl .Lcleanup_system_call
+ clg %r9,BASED(.Lcleanup_table+16) # .Lsysc_tif
+ jl 0f
+ clg %r9,BASED(.Lcleanup_table+24) # .Lsysc_restore
+ jl .Lcleanup_sysc_tif
+ clg %r9,BASED(.Lcleanup_table+32) # .Lsysc_done
+ jl .Lcleanup_sysc_restore
+ clg %r9,BASED(.Lcleanup_table+40) # .Lio_tif
+ jl 0f
+ clg %r9,BASED(.Lcleanup_table+48) # .Lio_restore
+ jl .Lcleanup_io_tif
+ clg %r9,BASED(.Lcleanup_table+56) # .Lio_done
+ jl .Lcleanup_io_restore
+ clg %r9,BASED(.Lcleanup_table+64) # psw_idle
+ jl 0f
+ clg %r9,BASED(.Lcleanup_table+72) # .Lpsw_idle_end
+ jl .Lcleanup_idle
+ clg %r9,BASED(.Lcleanup_table+80) # save_fpu_regs
+ jl 0f
+ clg %r9,BASED(.Lcleanup_table+88) # .Lsave_fpu_regs_end
+ jl .Lcleanup_save_fpu_regs
+ clg %r9,BASED(.Lcleanup_table+96) # load_fpu_regs
+ jl 0f
+ clg %r9,BASED(.Lcleanup_table+104) # .Lload_fpu_regs_end
+ jl .Lcleanup_load_fpu_regs
+0: BR_EX %r14,%r11
+
+ .align 8
+.Lcleanup_table:
+ .quad system_call
+ .quad .Lsysc_do_svc
+ .quad .Lsysc_tif
+ .quad .Lsysc_restore
+ .quad .Lsysc_done
+ .quad .Lio_tif
+ .quad .Lio_restore
+ .quad .Lio_done
+ .quad psw_idle
+ .quad .Lpsw_idle_end
+ .quad save_fpu_regs
+ .quad .Lsave_fpu_regs_end
+ .quad load_fpu_regs
+ .quad .Lload_fpu_regs_end
+
+#if IS_ENABLED(CONFIG_KVM)
+.Lcleanup_table_sie:
+ .quad .Lsie_gmap
+ .quad .Lsie_done
+
+.Lcleanup_sie:
+ cghi %r11,__LC_SAVE_AREA_ASYNC #Is this in normal interrupt?
+ je 1f
+ slg %r9,BASED(.Lsie_crit_mcck_start)
+ clg %r9,BASED(.Lsie_crit_mcck_length)
+ jh 1f
+ oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST
+1: BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
+ lg %r9,__SF_SIE_CONTROL(%r15) # get control block pointer
+ ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE
+ lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
+ larl %r9,sie_exit # skip forward to sie_exit
+ BR_EX %r14,%r11
+#endif
+
+.Lcleanup_system_call:
+ # check if stpt has been executed
+ clg %r9,BASED(.Lcleanup_system_call_insn)
+ jh 0f
+ mvc __LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER
+ cghi %r11,__LC_SAVE_AREA_ASYNC
+ je 0f
+ mvc __LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER
+0: # check if stmg has been executed
+ clg %r9,BASED(.Lcleanup_system_call_insn+8)
+ jh 0f
+ mvc __LC_SAVE_AREA_SYNC(64),0(%r11)
+0: # check if base register setup + TIF bit load has been done
+ clg %r9,BASED(.Lcleanup_system_call_insn+16)
+ jhe 0f
+ # set up saved register r12 task struct pointer
+ stg %r12,32(%r11)
+ # set up saved register r13 __TASK_thread offset
+ mvc 40(8,%r11),BASED(.Lcleanup_system_call_const)
+0: # check if the user time update has been done
+ clg %r9,BASED(.Lcleanup_system_call_insn+24)
+ jh 0f
+ lg %r15,__LC_EXIT_TIMER
+ slg %r15,__LC_SYNC_ENTER_TIMER
+ alg %r15,__LC_USER_TIMER
+ stg %r15,__LC_USER_TIMER
+0: # check if the system time update has been done
+ clg %r9,BASED(.Lcleanup_system_call_insn+32)
+ jh 0f
+ lg %r15,__LC_LAST_UPDATE_TIMER
+ slg %r15,__LC_EXIT_TIMER
+ alg %r15,__LC_SYSTEM_TIMER
+ stg %r15,__LC_SYSTEM_TIMER
+0: # update accounting time stamp
+ mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
+ BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
+ # set up saved register r11
+ lg %r15,__LC_KERNEL_STACK
+ la %r9,STACK_FRAME_OVERHEAD(%r15)
+ stg %r9,24(%r11) # r11 pt_regs pointer
+ # fill pt_regs
+ mvc __PT_R8(64,%r9),__LC_SAVE_AREA_SYNC
+ stmg %r0,%r7,__PT_R0(%r9)
+ mvc __PT_PSW(16,%r9),__LC_SVC_OLD_PSW
+ mvc __PT_INT_CODE(4,%r9),__LC_SVC_ILC
+ xc __PT_FLAGS(8,%r9),__PT_FLAGS(%r9)
+ mvi __PT_FLAGS+7(%r9),_PIF_SYSCALL
+ # setup saved register r15
+ stg %r15,56(%r11) # r15 stack pointer
+ # set new psw address and exit
+ larl %r9,.Lsysc_do_svc
+ BR_EX %r14,%r11
+.Lcleanup_system_call_insn:
+ .quad system_call
+ .quad .Lsysc_stmg
+ .quad .Lsysc_per
+ .quad .Lsysc_vtime+36
+ .quad .Lsysc_vtime+42
+.Lcleanup_system_call_const:
+ .quad __TASK_thread
+
+.Lcleanup_sysc_tif:
+ larl %r9,.Lsysc_tif
+ BR_EX %r14,%r11
+
+.Lcleanup_sysc_restore:
+ # check if stpt has been executed
+ clg %r9,BASED(.Lcleanup_sysc_restore_insn)
+ jh 0f
+ mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
+ cghi %r11,__LC_SAVE_AREA_ASYNC
+ je 0f
+ mvc __LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER
+0: clg %r9,BASED(.Lcleanup_sysc_restore_insn+8)
+ je 1f
+ lg %r9,24(%r11) # get saved pointer to pt_regs
+ mvc __LC_RETURN_PSW(16),__PT_PSW(%r9)
+ mvc 0(64,%r11),__PT_R8(%r9)
+ lmg %r0,%r7,__PT_R0(%r9)
+1: lmg %r8,%r9,__LC_RETURN_PSW
+ BR_EX %r14,%r11
+.Lcleanup_sysc_restore_insn:
+ .quad .Lsysc_exit_timer
+ .quad .Lsysc_done - 4
+
+.Lcleanup_io_tif:
+ larl %r9,.Lio_tif
+ BR_EX %r14,%r11
+
+.Lcleanup_io_restore:
+ # check if stpt has been executed
+ clg %r9,BASED(.Lcleanup_io_restore_insn)
+ jh 0f
+ mvc __LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER
+0: clg %r9,BASED(.Lcleanup_io_restore_insn+8)
+ je 1f
+ lg %r9,24(%r11) # get saved r11 pointer to pt_regs
+ mvc __LC_RETURN_PSW(16),__PT_PSW(%r9)
+ mvc 0(64,%r11),__PT_R8(%r9)
+ lmg %r0,%r7,__PT_R0(%r9)
+1: lmg %r8,%r9,__LC_RETURN_PSW
+ BR_EX %r14,%r11
+.Lcleanup_io_restore_insn:
+ .quad .Lio_exit_timer
+ .quad .Lio_done - 4
+
+.Lcleanup_idle:
+ ni __LC_CPU_FLAGS+7,255-_CIF_ENABLED_WAIT
+ # copy interrupt clock & cpu timer
+ mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_INT_CLOCK
+ mvc __TIMER_IDLE_EXIT(8,%r2),__LC_ASYNC_ENTER_TIMER
+ cghi %r11,__LC_SAVE_AREA_ASYNC
+ je 0f
+ mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_MCCK_CLOCK
+ mvc __TIMER_IDLE_EXIT(8,%r2),__LC_MCCK_ENTER_TIMER
+0: # check if stck & stpt have been executed
+ clg %r9,BASED(.Lcleanup_idle_insn)
+ jhe 1f
+ mvc __CLOCK_IDLE_ENTER(8,%r2),__CLOCK_IDLE_EXIT(%r2)
+ mvc __TIMER_IDLE_ENTER(8,%r2),__TIMER_IDLE_EXIT(%r2)
+1: # calculate idle cycles
+#ifdef CONFIG_SMP
+ clg %r9,BASED(.Lcleanup_idle_insn)
+ jl 3f
+ larl %r1,smp_cpu_mtid
+ llgf %r1,0(%r1)
+ ltgr %r1,%r1
+ jz 3f
+ .insn rsy,0xeb0000000017,%r1,5,__SF_EMPTY+80(%r15)
+ larl %r3,mt_cycles
+ ag %r3,__LC_PERCPU_OFFSET
+ la %r4,__SF_EMPTY+16(%r15)
+2: lg %r0,0(%r3)
+ slg %r0,0(%r4)
+ alg %r0,64(%r4)
+ stg %r0,0(%r3)
+ la %r3,8(%r3)
+ la %r4,8(%r4)
+ brct %r1,2b
+#endif
+3: # account system time going idle
+ lg %r9,__LC_STEAL_TIMER
+ alg %r9,__CLOCK_IDLE_ENTER(%r2)
+ slg %r9,__LC_LAST_UPDATE_CLOCK
+ stg %r9,__LC_STEAL_TIMER
+ mvc __LC_LAST_UPDATE_CLOCK(8),__CLOCK_IDLE_EXIT(%r2)
+ lg %r9,__LC_SYSTEM_TIMER
+ alg %r9,__LC_LAST_UPDATE_TIMER
+ slg %r9,__TIMER_IDLE_ENTER(%r2)
+ stg %r9,__LC_SYSTEM_TIMER
+ mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2)
+ # prepare return psw
+ nihh %r8,0xfcfd # clear irq & wait state bits
+ lg %r9,48(%r11) # return from psw_idle
+ BR_EX %r14,%r11
+.Lcleanup_idle_insn:
+ .quad .Lpsw_idle_lpsw
+
+.Lcleanup_save_fpu_regs:
+ larl %r9,save_fpu_regs
+ BR_EX %r14,%r11
+
+.Lcleanup_load_fpu_regs:
+ larl %r9,load_fpu_regs
+ BR_EX %r14,%r11
+
+/*
+ * Integer constants
+ */
+ .align 8
+.Lcritical_start:
+ .quad .L__critical_start
+.Lcritical_length:
+ .quad .L__critical_end - .L__critical_start
+#if IS_ENABLED(CONFIG_KVM)
+.Lsie_critical_start:
+ .quad .Lsie_gmap
+.Lsie_critical_length:
+ .quad .Lsie_done - .Lsie_gmap
+.Lsie_crit_mcck_start:
+ .quad .Lsie_entry
+.Lsie_crit_mcck_length:
+ .quad .Lsie_skip - .Lsie_entry
+#endif
+ .section .rodata, "a"
+#define SYSCALL(esame,emu) .long esame
+ .globl sys_call_table
+sys_call_table:
+#include "asm/syscall_table.h"
+#undef SYSCALL
+
+#ifdef CONFIG_COMPAT
+
+#define SYSCALL(esame,emu) .long emu
+ .globl sys_call_table_emu
+sys_call_table_emu:
+#include "asm/syscall_table.h"
+#undef SYSCALL
+#endif
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
new file mode 100644
index 000000000..472fa2f1a
--- /dev/null
+++ b/arch/s390/kernel/entry.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ENTRY_H
+#define _ENTRY_H
+
+#include <linux/percpu.h>
+#include <linux/types.h>
+#include <linux/signal.h>
+#include <asm/ptrace.h>
+#include <asm/idle.h>
+
+extern void *restart_stack;
+extern unsigned long suspend_zero_pages;
+
+void system_call(void);
+void pgm_check_handler(void);
+void ext_int_handler(void);
+void io_int_handler(void);
+void mcck_int_handler(void);
+void restart_int_handler(void);
+void restart_call_handler(void);
+
+asmlinkage long do_syscall_trace_enter(struct pt_regs *regs);
+asmlinkage void do_syscall_trace_exit(struct pt_regs *regs);
+
+void do_protection_exception(struct pt_regs *regs);
+void do_dat_exception(struct pt_regs *regs);
+
+void addressing_exception(struct pt_regs *regs);
+void data_exception(struct pt_regs *regs);
+void default_trap_handler(struct pt_regs *regs);
+void divide_exception(struct pt_regs *regs);
+void execute_exception(struct pt_regs *regs);
+void hfp_divide_exception(struct pt_regs *regs);
+void hfp_overflow_exception(struct pt_regs *regs);
+void hfp_significance_exception(struct pt_regs *regs);
+void hfp_sqrt_exception(struct pt_regs *regs);
+void hfp_underflow_exception(struct pt_regs *regs);
+void illegal_op(struct pt_regs *regs);
+void operand_exception(struct pt_regs *regs);
+void overflow_exception(struct pt_regs *regs);
+void privileged_op(struct pt_regs *regs);
+void space_switch_exception(struct pt_regs *regs);
+void special_op_exception(struct pt_regs *regs);
+void specification_exception(struct pt_regs *regs);
+void transaction_exception(struct pt_regs *regs);
+void translation_exception(struct pt_regs *regs);
+void vector_exception(struct pt_regs *regs);
+
+void do_per_trap(struct pt_regs *regs);
+void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str);
+void syscall_trace(struct pt_regs *regs, int entryexit);
+void kernel_stack_overflow(struct pt_regs * regs);
+void do_signal(struct pt_regs *regs);
+void handle_signal32(struct ksignal *ksig, sigset_t *oldset,
+ struct pt_regs *regs);
+void do_notify_resume(struct pt_regs *regs);
+
+void __init init_IRQ(void);
+void do_IRQ(struct pt_regs *regs, int irq);
+void do_restart(void);
+void __init startup_init_nobss(void);
+void __init startup_init(void);
+void die(struct pt_regs *regs, const char *str);
+int setup_profiling_timer(unsigned int multiplier);
+void __init time_init(void);
+int pfn_is_nosave(unsigned long);
+void s390_early_resume(void);
+unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip);
+
+struct s390_mmap_arg_struct;
+struct fadvise64_64_args;
+struct old_sigaction;
+
+long sys_rt_sigreturn(void);
+long sys_sigreturn(void);
+
+long sys_s390_personality(unsigned int personality);
+long sys_s390_runtime_instr(int command, int signum);
+long sys_s390_guarded_storage(int command, struct gs_cb __user *);
+long sys_s390_pci_mmio_write(unsigned long, const void __user *, size_t);
+long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t);
+long sys_s390_sthyi(unsigned long function_code, void __user *buffer, u64 __user *return_code, unsigned long flags);
+
+DECLARE_PER_CPU(u64, mt_cycles[8]);
+
+void gs_load_bc_cb(struct pt_regs *regs);
+void set_fs_fixup(void);
+
+#endif /* _ENTRY_H */
diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c
new file mode 100644
index 000000000..594464f21
--- /dev/null
+++ b/arch/s390/kernel/fpu.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * In-kernel vector facility support functions
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+#include <linux/kernel.h>
+#include <linux/cpu.h>
+#include <linux/sched.h>
+#include <asm/fpu/types.h>
+#include <asm/fpu/api.h>
+
+asm(".include \"asm/vx-insn.h\"\n");
+
+void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags)
+{
+ /*
+ * Limit the save to the FPU/vector registers already
+ * in use by the previous context
+ */
+ flags &= state->mask;
+
+ if (flags & KERNEL_FPC)
+ /* Save floating point control */
+ asm volatile("stfpc %0" : "=m" (state->fpc));
+
+ if (!MACHINE_HAS_VX) {
+ if (flags & KERNEL_VXR_V0V7) {
+ /* Save floating-point registers */
+ asm volatile("std 0,%0" : "=Q" (state->fprs[0]));
+ asm volatile("std 1,%0" : "=Q" (state->fprs[1]));
+ asm volatile("std 2,%0" : "=Q" (state->fprs[2]));
+ asm volatile("std 3,%0" : "=Q" (state->fprs[3]));
+ asm volatile("std 4,%0" : "=Q" (state->fprs[4]));
+ asm volatile("std 5,%0" : "=Q" (state->fprs[5]));
+ asm volatile("std 6,%0" : "=Q" (state->fprs[6]));
+ asm volatile("std 7,%0" : "=Q" (state->fprs[7]));
+ asm volatile("std 8,%0" : "=Q" (state->fprs[8]));
+ asm volatile("std 9,%0" : "=Q" (state->fprs[9]));
+ asm volatile("std 10,%0" : "=Q" (state->fprs[10]));
+ asm volatile("std 11,%0" : "=Q" (state->fprs[11]));
+ asm volatile("std 12,%0" : "=Q" (state->fprs[12]));
+ asm volatile("std 13,%0" : "=Q" (state->fprs[13]));
+ asm volatile("std 14,%0" : "=Q" (state->fprs[14]));
+ asm volatile("std 15,%0" : "=Q" (state->fprs[15]));
+ }
+ return;
+ }
+
+ /* Test and save vector registers */
+ asm volatile (
+ /*
+ * Test if any vector register must be saved and, if so,
+ * test if all register can be saved.
+ */
+ " la 1,%[vxrs]\n" /* load save area */
+ " tmll %[m],30\n" /* KERNEL_VXR */
+ " jz 7f\n" /* no work -> done */
+ " jo 5f\n" /* -> save V0..V31 */
+ /*
+ * Test for special case KERNEL_FPU_MID only. In this
+ * case a vstm V8..V23 is the best instruction
+ */
+ " chi %[m],12\n" /* KERNEL_VXR_MID */
+ " jne 0f\n" /* -> save V8..V23 */
+ " VSTM 8,23,128,1\n" /* vstm %v8,%v23,128(%r1) */
+ " j 7f\n"
+ /* Test and save the first half of 16 vector registers */
+ "0: tmll %[m],6\n" /* KERNEL_VXR_LOW */
+ " jz 3f\n" /* -> KERNEL_VXR_HIGH */
+ " jo 2f\n" /* 11 -> save V0..V15 */
+ " brc 2,1f\n" /* 10 -> save V8..V15 */
+ " VSTM 0,7,0,1\n" /* vstm %v0,%v7,0(%r1) */
+ " j 3f\n"
+ "1: VSTM 8,15,128,1\n" /* vstm %v8,%v15,128(%r1) */
+ " j 3f\n"
+ "2: VSTM 0,15,0,1\n" /* vstm %v0,%v15,0(%r1) */
+ /* Test and save the second half of 16 vector registers */
+ "3: tmll %[m],24\n" /* KERNEL_VXR_HIGH */
+ " jz 7f\n"
+ " jo 6f\n" /* 11 -> save V16..V31 */
+ " brc 2,4f\n" /* 10 -> save V24..V31 */
+ " VSTM 16,23,256,1\n" /* vstm %v16,%v23,256(%r1) */
+ " j 7f\n"
+ "4: VSTM 24,31,384,1\n" /* vstm %v24,%v31,384(%r1) */
+ " j 7f\n"
+ "5: VSTM 0,15,0,1\n" /* vstm %v0,%v15,0(%r1) */
+ "6: VSTM 16,31,256,1\n" /* vstm %v16,%v31,256(%r1) */
+ "7:"
+ : [vxrs] "=Q" (*(struct vx_array *) &state->vxrs)
+ : [m] "d" (flags)
+ : "1", "cc");
+}
+EXPORT_SYMBOL(__kernel_fpu_begin);
+
+void __kernel_fpu_end(struct kernel_fpu *state, u32 flags)
+{
+ /*
+ * Limit the restore to the FPU/vector registers of the
+ * previous context that have been overwritte by the
+ * current context
+ */
+ flags &= state->mask;
+
+ if (flags & KERNEL_FPC)
+ /* Restore floating-point controls */
+ asm volatile("lfpc %0" : : "Q" (state->fpc));
+
+ if (!MACHINE_HAS_VX) {
+ if (flags & KERNEL_VXR_V0V7) {
+ /* Restore floating-point registers */
+ asm volatile("ld 0,%0" : : "Q" (state->fprs[0]));
+ asm volatile("ld 1,%0" : : "Q" (state->fprs[1]));
+ asm volatile("ld 2,%0" : : "Q" (state->fprs[2]));
+ asm volatile("ld 3,%0" : : "Q" (state->fprs[3]));
+ asm volatile("ld 4,%0" : : "Q" (state->fprs[4]));
+ asm volatile("ld 5,%0" : : "Q" (state->fprs[5]));
+ asm volatile("ld 6,%0" : : "Q" (state->fprs[6]));
+ asm volatile("ld 7,%0" : : "Q" (state->fprs[7]));
+ asm volatile("ld 8,%0" : : "Q" (state->fprs[8]));
+ asm volatile("ld 9,%0" : : "Q" (state->fprs[9]));
+ asm volatile("ld 10,%0" : : "Q" (state->fprs[10]));
+ asm volatile("ld 11,%0" : : "Q" (state->fprs[11]));
+ asm volatile("ld 12,%0" : : "Q" (state->fprs[12]));
+ asm volatile("ld 13,%0" : : "Q" (state->fprs[13]));
+ asm volatile("ld 14,%0" : : "Q" (state->fprs[14]));
+ asm volatile("ld 15,%0" : : "Q" (state->fprs[15]));
+ }
+ return;
+ }
+
+ /* Test and restore (load) vector registers */
+ asm volatile (
+ /*
+ * Test if any vector register must be loaded and, if so,
+ * test if all registers can be loaded at once.
+ */
+ " la 1,%[vxrs]\n" /* load restore area */
+ " tmll %[m],30\n" /* KERNEL_VXR */
+ " jz 7f\n" /* no work -> done */
+ " jo 5f\n" /* -> restore V0..V31 */
+ /*
+ * Test for special case KERNEL_FPU_MID only. In this
+ * case a vlm V8..V23 is the best instruction
+ */
+ " chi %[m],12\n" /* KERNEL_VXR_MID */
+ " jne 0f\n" /* -> restore V8..V23 */
+ " VLM 8,23,128,1\n" /* vlm %v8,%v23,128(%r1) */
+ " j 7f\n"
+ /* Test and restore the first half of 16 vector registers */
+ "0: tmll %[m],6\n" /* KERNEL_VXR_LOW */
+ " jz 3f\n" /* -> KERNEL_VXR_HIGH */
+ " jo 2f\n" /* 11 -> restore V0..V15 */
+ " brc 2,1f\n" /* 10 -> restore V8..V15 */
+ " VLM 0,7,0,1\n" /* vlm %v0,%v7,0(%r1) */
+ " j 3f\n"
+ "1: VLM 8,15,128,1\n" /* vlm %v8,%v15,128(%r1) */
+ " j 3f\n"
+ "2: VLM 0,15,0,1\n" /* vlm %v0,%v15,0(%r1) */
+ /* Test and restore the second half of 16 vector registers */
+ "3: tmll %[m],24\n" /* KERNEL_VXR_HIGH */
+ " jz 7f\n"
+ " jo 6f\n" /* 11 -> restore V16..V31 */
+ " brc 2,4f\n" /* 10 -> restore V24..V31 */
+ " VLM 16,23,256,1\n" /* vlm %v16,%v23,256(%r1) */
+ " j 7f\n"
+ "4: VLM 24,31,384,1\n" /* vlm %v24,%v31,384(%r1) */
+ " j 7f\n"
+ "5: VLM 0,15,0,1\n" /* vlm %v0,%v15,0(%r1) */
+ "6: VLM 16,31,256,1\n" /* vlm %v16,%v31,256(%r1) */
+ "7:"
+ : [vxrs] "=Q" (*(struct vx_array *) &state->vxrs)
+ : [m] "d" (flags)
+ : "1", "cc");
+}
+EXPORT_SYMBOL(__kernel_fpu_end);
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
new file mode 100644
index 000000000..463b9e927
--- /dev/null
+++ b/arch/s390/kernel/ftrace.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Dynamic function tracer architecture backend.
+ *
+ * Copyright IBM Corp. 2009,2014
+ *
+ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>,
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/moduleloader.h>
+#include <linux/hardirq.h>
+#include <linux/uaccess.h>
+#include <linux/ftrace.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/kprobes.h>
+#include <trace/syscall.h>
+#include <asm/asm-offsets.h>
+#include <asm/cacheflush.h>
+#include <asm/set_memory.h>
+#include "entry.h"
+
+/*
+ * The mcount code looks like this:
+ * stg %r14,8(%r15) # offset 0
+ * larl %r1,<&counter> # offset 6
+ * brasl %r14,_mcount # offset 12
+ * lg %r14,8(%r15) # offset 18
+ * Total length is 24 bytes. Only the first instruction will be patched
+ * by ftrace_make_call / ftrace_make_nop.
+ * The enabled ftrace code block looks like this:
+ * > brasl %r0,ftrace_caller # offset 0
+ * larl %r1,<&counter> # offset 6
+ * brasl %r14,_mcount # offset 12
+ * lg %r14,8(%r15) # offset 18
+ * The ftrace function gets called with a non-standard C function call ABI
+ * where r0 contains the return address. It is also expected that the called
+ * function only clobbers r0 and r1, but restores r2-r15.
+ * For module code we can't directly jump to ftrace caller, but need a
+ * trampoline (ftrace_plt), which clobbers also r1.
+ * The return point of the ftrace function has offset 24, so execution
+ * continues behind the mcount block.
+ * The disabled ftrace code block looks like this:
+ * > jg .+24 # offset 0
+ * larl %r1,<&counter> # offset 6
+ * brasl %r14,_mcount # offset 12
+ * lg %r14,8(%r15) # offset 18
+ * The jg instruction branches to offset 24 to skip as many instructions
+ * as possible.
+ * In case we use gcc's hotpatch feature the original and also the disabled
+ * function prologue contains only a single six byte instruction and looks
+ * like this:
+ * > brcl 0,0 # offset 0
+ * To enable ftrace the code gets patched like above and afterwards looks
+ * like this:
+ * > brasl %r0,ftrace_caller # offset 0
+ */
+
+void *ftrace_func __read_mostly = ftrace_stub;
+unsigned long ftrace_plt;
+
+static inline void ftrace_generate_orig_insn(struct ftrace_insn *insn)
+{
+#if defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT)
+ /* brcl 0,0 */
+ insn->opc = 0xc004;
+ insn->disp = 0;
+#else
+ /* stg r14,8(r15) */
+ insn->opc = 0xe3e0;
+ insn->disp = 0xf0080024;
+#endif
+}
+
+static inline int is_kprobe_on_ftrace(struct ftrace_insn *insn)
+{
+#ifdef CONFIG_KPROBES
+ if (insn->opc == BREAKPOINT_INSTRUCTION)
+ return 1;
+#endif
+ return 0;
+}
+
+static inline void ftrace_generate_kprobe_nop_insn(struct ftrace_insn *insn)
+{
+#ifdef CONFIG_KPROBES
+ insn->opc = BREAKPOINT_INSTRUCTION;
+ insn->disp = KPROBE_ON_FTRACE_NOP;
+#endif
+}
+
+static inline void ftrace_generate_kprobe_call_insn(struct ftrace_insn *insn)
+{
+#ifdef CONFIG_KPROBES
+ insn->opc = BREAKPOINT_INSTRUCTION;
+ insn->disp = KPROBE_ON_FTRACE_CALL;
+#endif
+}
+
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+ unsigned long addr)
+{
+ return 0;
+}
+
+int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
+ unsigned long addr)
+{
+ struct ftrace_insn orig, new, old;
+
+ if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old)))
+ return -EFAULT;
+ if (addr == MCOUNT_ADDR) {
+ /* Initial code replacement */
+ ftrace_generate_orig_insn(&orig);
+ ftrace_generate_nop_insn(&new);
+ } else if (is_kprobe_on_ftrace(&old)) {
+ /*
+ * If we find a breakpoint instruction, a kprobe has been
+ * placed at the beginning of the function. We write the
+ * constant KPROBE_ON_FTRACE_NOP into the remaining four
+ * bytes of the original instruction so that the kprobes
+ * handler can execute a nop, if it reaches this breakpoint.
+ */
+ ftrace_generate_kprobe_call_insn(&orig);
+ ftrace_generate_kprobe_nop_insn(&new);
+ } else {
+ /* Replace ftrace call with a nop. */
+ ftrace_generate_call_insn(&orig, rec->ip);
+ ftrace_generate_nop_insn(&new);
+ }
+ /* Verify that the to be replaced code matches what we expect. */
+ if (memcmp(&orig, &old, sizeof(old)))
+ return -EINVAL;
+ s390_kernel_write((void *) rec->ip, &new, sizeof(new));
+ return 0;
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ struct ftrace_insn orig, new, old;
+
+ if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old)))
+ return -EFAULT;
+ if (is_kprobe_on_ftrace(&old)) {
+ /*
+ * If we find a breakpoint instruction, a kprobe has been
+ * placed at the beginning of the function. We write the
+ * constant KPROBE_ON_FTRACE_CALL into the remaining four
+ * bytes of the original instruction so that the kprobes
+ * handler can execute a brasl if it reaches this breakpoint.
+ */
+ ftrace_generate_kprobe_nop_insn(&orig);
+ ftrace_generate_kprobe_call_insn(&new);
+ } else {
+ /* Replace nop with an ftrace call. */
+ ftrace_generate_nop_insn(&orig);
+ ftrace_generate_call_insn(&new, rec->ip);
+ }
+ /* Verify that the to be replaced code matches what we expect. */
+ if (memcmp(&orig, &old, sizeof(old)))
+ return -EINVAL;
+ s390_kernel_write((void *) rec->ip, &new, sizeof(new));
+ return 0;
+}
+
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+ ftrace_func = func;
+ return 0;
+}
+
+int __init ftrace_dyn_arch_init(void)
+{
+ return 0;
+}
+
+#ifdef CONFIG_MODULES
+
+static int __init ftrace_plt_init(void)
+{
+ unsigned int *ip;
+
+ ftrace_plt = (unsigned long) module_alloc(PAGE_SIZE);
+ if (!ftrace_plt)
+ panic("cannot allocate ftrace plt\n");
+ ip = (unsigned int *) ftrace_plt;
+ ip[0] = 0x0d10e310; /* basr 1,0; lg 1,10(1); br 1 */
+ ip[1] = 0x100a0004;
+ ip[2] = 0x07f10000;
+ ip[3] = FTRACE_ADDR >> 32;
+ ip[4] = FTRACE_ADDR & 0xffffffff;
+ set_memory_ro(ftrace_plt, 1);
+ return 0;
+}
+device_initcall(ftrace_plt_init);
+
+#endif /* CONFIG_MODULES */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+/*
+ * Hook the return address and push it in the stack of return addresses
+ * in current thread info.
+ */
+unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
+{
+ if (unlikely(ftrace_graph_is_dead()))
+ goto out;
+ if (unlikely(atomic_read(&current->tracing_graph_pause)))
+ goto out;
+ ip -= MCOUNT_INSN_SIZE;
+ if (!function_graph_enter(parent, ip, 0, NULL))
+ parent = (unsigned long) return_to_handler;
+out:
+ return parent;
+}
+NOKPROBE_SYMBOL(prepare_ftrace_return);
+
+/*
+ * Patch the kernel code at ftrace_graph_caller location. The instruction
+ * there is branch relative on condition. To enable the ftrace graph code
+ * block, we simply patch the mask field of the instruction to zero and
+ * turn the instruction into a nop.
+ * To disable the ftrace graph code the mask field will be patched to
+ * all ones, which turns the instruction into an unconditional branch.
+ */
+int ftrace_enable_ftrace_graph_caller(void)
+{
+ u8 op = 0x04; /* set mask field to zero */
+
+ s390_kernel_write(__va(ftrace_graph_caller)+1, &op, sizeof(op));
+ return 0;
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+ u8 op = 0xf4; /* set mask field to all ones */
+
+ s390_kernel_write(__va(ftrace_graph_caller)+1, &op, sizeof(op));
+ return 0;
+}
+
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/s390/kernel/guarded_storage.c b/arch/s390/kernel/guarded_storage.c
new file mode 100644
index 000000000..d14dd1c2e
--- /dev/null
+++ b/arch/s390/kernel/guarded_storage.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/syscalls.h>
+#include <linux/signal.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <asm/guarded_storage.h>
+#include "entry.h"
+
+void guarded_storage_release(struct task_struct *tsk)
+{
+ kfree(tsk->thread.gs_cb);
+ kfree(tsk->thread.gs_bc_cb);
+}
+
+static int gs_enable(void)
+{
+ struct gs_cb *gs_cb;
+
+ if (!current->thread.gs_cb) {
+ gs_cb = kzalloc(sizeof(*gs_cb), GFP_KERNEL);
+ if (!gs_cb)
+ return -ENOMEM;
+ gs_cb->gsd = 25;
+ preempt_disable();
+ __ctl_set_bit(2, 4);
+ load_gs_cb(gs_cb);
+ current->thread.gs_cb = gs_cb;
+ preempt_enable();
+ }
+ return 0;
+}
+
+static int gs_disable(void)
+{
+ if (current->thread.gs_cb) {
+ preempt_disable();
+ kfree(current->thread.gs_cb);
+ current->thread.gs_cb = NULL;
+ __ctl_clear_bit(2, 4);
+ preempt_enable();
+ }
+ return 0;
+}
+
+static int gs_set_bc_cb(struct gs_cb __user *u_gs_cb)
+{
+ struct gs_cb *gs_cb;
+
+ gs_cb = current->thread.gs_bc_cb;
+ if (!gs_cb) {
+ gs_cb = kzalloc(sizeof(*gs_cb), GFP_KERNEL);
+ if (!gs_cb)
+ return -ENOMEM;
+ current->thread.gs_bc_cb = gs_cb;
+ }
+ if (copy_from_user(gs_cb, u_gs_cb, sizeof(*gs_cb)))
+ return -EFAULT;
+ return 0;
+}
+
+static int gs_clear_bc_cb(void)
+{
+ struct gs_cb *gs_cb;
+
+ gs_cb = current->thread.gs_bc_cb;
+ current->thread.gs_bc_cb = NULL;
+ kfree(gs_cb);
+ return 0;
+}
+
+void gs_load_bc_cb(struct pt_regs *regs)
+{
+ struct gs_cb *gs_cb;
+
+ preempt_disable();
+ clear_thread_flag(TIF_GUARDED_STORAGE);
+ gs_cb = current->thread.gs_bc_cb;
+ if (gs_cb) {
+ kfree(current->thread.gs_cb);
+ current->thread.gs_bc_cb = NULL;
+ __ctl_set_bit(2, 4);
+ load_gs_cb(gs_cb);
+ current->thread.gs_cb = gs_cb;
+ }
+ preempt_enable();
+}
+
+static int gs_broadcast(void)
+{
+ struct task_struct *sibling;
+
+ read_lock(&tasklist_lock);
+ for_each_thread(current, sibling) {
+ if (!sibling->thread.gs_bc_cb)
+ continue;
+ if (test_and_set_tsk_thread_flag(sibling, TIF_GUARDED_STORAGE))
+ kick_process(sibling);
+ }
+ read_unlock(&tasklist_lock);
+ return 0;
+}
+
+SYSCALL_DEFINE2(s390_guarded_storage, int, command,
+ struct gs_cb __user *, gs_cb)
+{
+ if (!MACHINE_HAS_GS)
+ return -EOPNOTSUPP;
+ switch (command) {
+ case GS_ENABLE:
+ return gs_enable();
+ case GS_DISABLE:
+ return gs_disable();
+ case GS_SET_BC_CB:
+ return gs_set_bc_cb(gs_cb);
+ case GS_CLEAR_BC_CB:
+ return gs_clear_bc_cb();
+ case GS_BROADCAST:
+ return gs_broadcast();
+ default:
+ return -EINVAL;
+ }
+}
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
new file mode 100644
index 000000000..6d14ad42b
--- /dev/null
+++ b/arch/s390/kernel/head64.S
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 1999, 2010
+ *
+ * Author(s): Hartmut Penner <hp@de.ibm.com>
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>
+ * Rob van der Heij <rvdhei@iae.nl>
+ * Heiko Carstens <heiko.carstens@de.ibm.com>
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/thread_info.h>
+#include <asm/page.h>
+
+__HEAD
+ENTRY(startup_continue)
+ tm __LC_STFLE_FAC_LIST+5,0x80 # LPP available ?
+ jz 0f
+ xc __LC_LPP+1(7,0),__LC_LPP+1 # clear lpp and current_pid
+ mvi __LC_LPP,0x80 # and set LPP_MAGIC
+ .insn s,0xb2800000,__LC_LPP # load program parameter
+0: larl %r1,tod_clock_base
+ mvc 0(16,%r1),__LC_BOOT_CLOCK
+ larl %r13,.LPG1 # get base
+ lctlg %c0,%c15,.Lctl-.LPG1(%r13) # load control registers
+ lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area
+ # move IPL device to lowcore
+ larl %r0,boot_vdso_data
+ stg %r0,__LC_VDSO_PER_CPU
+#
+# Setup stack
+#
+ larl %r14,init_task
+ stg %r14,__LC_CURRENT
+ larl %r15,init_thread_union
+ aghi %r15,1<<(PAGE_SHIFT+THREAD_SIZE_ORDER) # init_task_union + THREAD_SIZE
+ stg %r15,__LC_KERNEL_STACK # set end of kernel stack
+ aghi %r15,-160
+#
+# Early setup functions that may not rely on an initialized bss section,
+# like moving the initrd. Returns with an initialized bss section.
+#
+ brasl %r14,startup_init_nobss
+#
+# Early machine initialization and detection functions.
+#
+ brasl %r14,startup_init
+
+# check control registers
+ stctg %c0,%c15,0(%r15)
+ oi 6(%r15),0x60 # enable sigp emergency & external call
+ oi 4(%r15),0x10 # switch on low address proctection
+ lctlg %c0,%c15,0(%r15)
+
+ lam 0,15,.Laregs-.LPG1(%r13) # load acrs needed by uaccess
+ brasl %r14,start_kernel # go to C code
+#
+# We returned from start_kernel ?!? PANIK
+#
+ basr %r13,0
+ lpswe .Ldw-.(%r13) # load disabled wait psw
+
+ .align 16
+.LPG1:
+.Lctl: .quad 0x04040000 # cr0: AFP registers & secondary space
+ .quad 0 # cr1: primary space segment table
+ .quad .Lduct # cr2: dispatchable unit control table
+ .quad 0 # cr3: instruction authorization
+ .quad 0xffff # cr4: instruction authorization
+ .quad .Lduct # cr5: primary-aste origin
+ .quad 0 # cr6: I/O interrupts
+ .quad 0 # cr7: secondary space segment table
+ .quad 0 # cr8: access registers translation
+ .quad 0 # cr9: tracing off
+ .quad 0 # cr10: tracing off
+ .quad 0 # cr11: tracing off
+ .quad 0 # cr12: tracing off
+ .quad 0 # cr13: home space segment table
+ .quad 0xc0000000 # cr14: machine check handling off
+ .quad .Llinkage_stack # cr15: linkage stack operations
+.Lpcmsk:.quad 0x0000000180000000
+.L4malign:.quad 0xffffffffffc00000
+.Lscan2g:.quad 0x80000000 + 0x20000 - 8 # 2GB + 128K - 8
+.Lnop: .long 0x07000700
+.Lparmaddr:
+ .quad PARMAREA
+ .align 64
+.Lduct: .long 0,.Laste,.Laste,0,.Lduald,0,0,0
+ .long 0,0,0,0,0,0,0,0
+.Laste: .quad 0,0xffffffffffffffff,0,0,0,0,0,0
+ .align 128
+.Lduald:.rept 8
+ .long 0x80000000,0,0,0 # invalid access-list entries
+ .endr
+.Llinkage_stack:
+ .long 0,0,0x89000000,0,0,0,0x8a000000,0
+.Ldw: .quad 0x0002000180000000,0x0000000000000000
+.Laregs:.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
new file mode 100644
index 000000000..8f8456816
--- /dev/null
+++ b/arch/s390/kernel/idle.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Idle functions for s390.
+ *
+ * Copyright IBM Corp. 2014
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/kprobes.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/sched/cputime.h>
+#include <asm/nmi.h>
+#include <asm/smp.h>
+#include "entry.h"
+
+static DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
+
+void enabled_wait(void)
+{
+ struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
+ unsigned long long idle_time;
+ unsigned long psw_mask;
+
+ trace_hardirqs_on();
+
+ /* Wait for external, I/O or machine check interrupt. */
+ psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT | PSW_MASK_DAT |
+ PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
+ clear_cpu_flag(CIF_NOHZ_DELAY);
+
+ /* Call the assembler magic in entry.S */
+ psw_idle(idle, psw_mask);
+
+ trace_hardirqs_off();
+
+ /* Account time spent with enabled wait psw loaded as idle time. */
+ write_seqcount_begin(&idle->seqcount);
+ idle_time = idle->clock_idle_exit - idle->clock_idle_enter;
+ idle->clock_idle_enter = idle->clock_idle_exit = 0ULL;
+ idle->idle_time += idle_time;
+ idle->idle_count++;
+ account_idle_time(cputime_to_nsecs(idle_time));
+ write_seqcount_end(&idle->seqcount);
+}
+NOKPROBE_SYMBOL(enabled_wait);
+
+static ssize_t show_idle_count(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
+ unsigned long long idle_count;
+ unsigned int seq;
+
+ do {
+ seq = read_seqcount_begin(&idle->seqcount);
+ idle_count = READ_ONCE(idle->idle_count);
+ if (READ_ONCE(idle->clock_idle_enter))
+ idle_count++;
+ } while (read_seqcount_retry(&idle->seqcount, seq));
+ return sprintf(buf, "%llu\n", idle_count);
+}
+DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL);
+
+static ssize_t show_idle_time(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ unsigned long long now, idle_time, idle_enter, idle_exit, in_idle;
+ struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
+ unsigned int seq;
+
+ do {
+ seq = read_seqcount_begin(&idle->seqcount);
+ idle_time = READ_ONCE(idle->idle_time);
+ idle_enter = READ_ONCE(idle->clock_idle_enter);
+ idle_exit = READ_ONCE(idle->clock_idle_exit);
+ } while (read_seqcount_retry(&idle->seqcount, seq));
+ in_idle = 0;
+ now = get_tod_clock();
+ if (idle_enter) {
+ if (idle_exit) {
+ in_idle = idle_exit - idle_enter;
+ } else if (now > idle_enter) {
+ in_idle = now - idle_enter;
+ }
+ }
+ idle_time += in_idle;
+ return sprintf(buf, "%llu\n", idle_time >> 12);
+}
+DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
+
+u64 arch_cpu_idle_time(int cpu)
+{
+ struct s390_idle_data *idle = &per_cpu(s390_idle, cpu);
+ unsigned long long now, idle_enter, idle_exit, in_idle;
+ unsigned int seq;
+
+ do {
+ seq = read_seqcount_begin(&idle->seqcount);
+ idle_enter = READ_ONCE(idle->clock_idle_enter);
+ idle_exit = READ_ONCE(idle->clock_idle_exit);
+ } while (read_seqcount_retry(&idle->seqcount, seq));
+ in_idle = 0;
+ now = get_tod_clock();
+ if (idle_enter) {
+ if (idle_exit) {
+ in_idle = idle_exit - idle_enter;
+ } else if (now > idle_enter) {
+ in_idle = now - idle_enter;
+ }
+ }
+ return cputime_to_nsecs(in_idle);
+}
+
+void arch_cpu_idle_enter(void)
+{
+ local_mcck_disable();
+}
+
+void arch_cpu_idle(void)
+{
+ if (!test_cpu_flag(CIF_MCCK_PENDING))
+ /* Halt the cpu and keep track of cpu time accounting. */
+ enabled_wait();
+ local_irq_enable();
+}
+
+void arch_cpu_idle_exit(void)
+{
+ local_mcck_enable();
+ if (test_cpu_flag(CIF_MCCK_PENDING))
+ s390_handle_mcck();
+}
+
+void arch_cpu_idle_dead(void)
+{
+ cpu_die();
+}
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
new file mode 100644
index 000000000..4296d7e61
--- /dev/null
+++ b/arch/s390/kernel/ipl.c
@@ -0,0 +1,1787 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ipl/reipl/dump support for Linux on s390.
+ *
+ * Copyright IBM Corp. 2005, 2012
+ * Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ * Heiko Carstens <heiko.carstens@de.ibm.com>
+ * Volker Sameske <sameske@de.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/ctype.h>
+#include <linux/fs.h>
+#include <linux/gfp.h>
+#include <linux/crash_dump.h>
+#include <linux/debug_locks.h>
+#include <asm/diag.h>
+#include <asm/ipl.h>
+#include <asm/smp.h>
+#include <asm/setup.h>
+#include <asm/cpcmd.h>
+#include <asm/ebcdic.h>
+#include <asm/sclp.h>
+#include <asm/checksum.h>
+#include <asm/debug.h>
+#include <asm/os_info.h>
+#include "entry.h"
+
+#define IPL_PARM_BLOCK_VERSION 0
+
+#define IPL_UNKNOWN_STR "unknown"
+#define IPL_CCW_STR "ccw"
+#define IPL_FCP_STR "fcp"
+#define IPL_FCP_DUMP_STR "fcp_dump"
+#define IPL_NSS_STR "nss"
+
+#define DUMP_CCW_STR "ccw"
+#define DUMP_FCP_STR "fcp"
+#define DUMP_NONE_STR "none"
+
+/*
+ * Four shutdown trigger types are supported:
+ * - panic
+ * - halt
+ * - power off
+ * - reipl
+ * - restart
+ */
+#define ON_PANIC_STR "on_panic"
+#define ON_HALT_STR "on_halt"
+#define ON_POFF_STR "on_poff"
+#define ON_REIPL_STR "on_reboot"
+#define ON_RESTART_STR "on_restart"
+
+struct shutdown_action;
+struct shutdown_trigger {
+ char *name;
+ struct shutdown_action *action;
+};
+
+/*
+ * The following shutdown action types are supported:
+ */
+#define SHUTDOWN_ACTION_IPL_STR "ipl"
+#define SHUTDOWN_ACTION_REIPL_STR "reipl"
+#define SHUTDOWN_ACTION_DUMP_STR "dump"
+#define SHUTDOWN_ACTION_VMCMD_STR "vmcmd"
+#define SHUTDOWN_ACTION_STOP_STR "stop"
+#define SHUTDOWN_ACTION_DUMP_REIPL_STR "dump_reipl"
+
+struct shutdown_action {
+ char *name;
+ void (*fn) (struct shutdown_trigger *trigger);
+ int (*init) (void);
+ int init_rc;
+};
+
+static char *ipl_type_str(enum ipl_type type)
+{
+ switch (type) {
+ case IPL_TYPE_CCW:
+ return IPL_CCW_STR;
+ case IPL_TYPE_FCP:
+ return IPL_FCP_STR;
+ case IPL_TYPE_FCP_DUMP:
+ return IPL_FCP_DUMP_STR;
+ case IPL_TYPE_NSS:
+ return IPL_NSS_STR;
+ case IPL_TYPE_UNKNOWN:
+ default:
+ return IPL_UNKNOWN_STR;
+ }
+}
+
+enum dump_type {
+ DUMP_TYPE_NONE = 1,
+ DUMP_TYPE_CCW = 2,
+ DUMP_TYPE_FCP = 4,
+};
+
+static char *dump_type_str(enum dump_type type)
+{
+ switch (type) {
+ case DUMP_TYPE_NONE:
+ return DUMP_NONE_STR;
+ case DUMP_TYPE_CCW:
+ return DUMP_CCW_STR;
+ case DUMP_TYPE_FCP:
+ return DUMP_FCP_STR;
+ default:
+ return NULL;
+ }
+}
+
+static int ipl_block_valid;
+static struct ipl_parameter_block ipl_block;
+
+static int reipl_capabilities = IPL_TYPE_UNKNOWN;
+
+static enum ipl_type reipl_type = IPL_TYPE_UNKNOWN;
+static struct ipl_parameter_block *reipl_block_fcp;
+static struct ipl_parameter_block *reipl_block_ccw;
+static struct ipl_parameter_block *reipl_block_nss;
+static struct ipl_parameter_block *reipl_block_actual;
+
+static int dump_capabilities = DUMP_TYPE_NONE;
+static enum dump_type dump_type = DUMP_TYPE_NONE;
+static struct ipl_parameter_block *dump_block_fcp;
+static struct ipl_parameter_block *dump_block_ccw;
+
+static struct sclp_ipl_info sclp_ipl_info;
+
+static inline int __diag308(unsigned long subcode, void *addr)
+{
+ register unsigned long _addr asm("0") = (unsigned long) addr;
+ register unsigned long _rc asm("1") = 0;
+
+ asm volatile(
+ " diag %0,%2,0x308\n"
+ "0: nopr %%r7\n"
+ EX_TABLE(0b,0b)
+ : "+d" (_addr), "+d" (_rc)
+ : "d" (subcode) : "cc", "memory");
+ return _rc;
+}
+
+int diag308(unsigned long subcode, void *addr)
+{
+ diag_stat_inc(DIAG_STAT_X308);
+ return __diag308(subcode, addr);
+}
+EXPORT_SYMBOL_GPL(diag308);
+
+/* SYSFS */
+
+#define IPL_ATTR_SHOW_FN(_prefix, _name, _format, args...) \
+static ssize_t sys_##_prefix##_##_name##_show(struct kobject *kobj, \
+ struct kobj_attribute *attr, \
+ char *page) \
+{ \
+ return snprintf(page, PAGE_SIZE, _format, ##args); \
+}
+
+#define IPL_ATTR_CCW_STORE_FN(_prefix, _name, _ipl_blk) \
+static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \
+ struct kobj_attribute *attr, \
+ const char *buf, size_t len) \
+{ \
+ unsigned long long ssid, devno; \
+ \
+ if (sscanf(buf, "0.%llx.%llx\n", &ssid, &devno) != 2) \
+ return -EINVAL; \
+ \
+ if (ssid > __MAX_SSID || devno > __MAX_SUBCHANNEL) \
+ return -EINVAL; \
+ \
+ _ipl_blk.ssid = ssid; \
+ _ipl_blk.devno = devno; \
+ return len; \
+}
+
+#define DEFINE_IPL_CCW_ATTR_RW(_prefix, _name, _ipl_blk) \
+IPL_ATTR_SHOW_FN(_prefix, _name, "0.%x.%04x\n", \
+ _ipl_blk.ssid, _ipl_blk.devno); \
+IPL_ATTR_CCW_STORE_FN(_prefix, _name, _ipl_blk); \
+static struct kobj_attribute sys_##_prefix##_##_name##_attr = \
+ __ATTR(_name, (S_IRUGO | S_IWUSR), \
+ sys_##_prefix##_##_name##_show, \
+ sys_##_prefix##_##_name##_store) \
+
+#define DEFINE_IPL_ATTR_RO(_prefix, _name, _format, _value) \
+IPL_ATTR_SHOW_FN(_prefix, _name, _format, _value) \
+static struct kobj_attribute sys_##_prefix##_##_name##_attr = \
+ __ATTR(_name, S_IRUGO, sys_##_prefix##_##_name##_show, NULL)
+
+#define DEFINE_IPL_ATTR_RW(_prefix, _name, _fmt_out, _fmt_in, _value) \
+IPL_ATTR_SHOW_FN(_prefix, _name, _fmt_out, (unsigned long long) _value) \
+static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \
+ struct kobj_attribute *attr, \
+ const char *buf, size_t len) \
+{ \
+ unsigned long long value; \
+ if (sscanf(buf, _fmt_in, &value) != 1) \
+ return -EINVAL; \
+ _value = value; \
+ return len; \
+} \
+static struct kobj_attribute sys_##_prefix##_##_name##_attr = \
+ __ATTR(_name,(S_IRUGO | S_IWUSR), \
+ sys_##_prefix##_##_name##_show, \
+ sys_##_prefix##_##_name##_store)
+
+#define DEFINE_IPL_ATTR_STR_RW(_prefix, _name, _fmt_out, _fmt_in, _value)\
+IPL_ATTR_SHOW_FN(_prefix, _name, _fmt_out, _value) \
+static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \
+ struct kobj_attribute *attr, \
+ const char *buf, size_t len) \
+{ \
+ strncpy(_value, buf, sizeof(_value) - 1); \
+ strim(_value); \
+ return len; \
+} \
+static struct kobj_attribute sys_##_prefix##_##_name##_attr = \
+ __ATTR(_name,(S_IRUGO | S_IWUSR), \
+ sys_##_prefix##_##_name##_show, \
+ sys_##_prefix##_##_name##_store)
+
+/*
+ * ipl section
+ */
+
+static __init enum ipl_type get_ipl_type(void)
+{
+ if (!ipl_block_valid)
+ return IPL_TYPE_UNKNOWN;
+
+ switch (ipl_block.hdr.pbt) {
+ case DIAG308_IPL_TYPE_CCW:
+ return IPL_TYPE_CCW;
+ case DIAG308_IPL_TYPE_FCP:
+ if (ipl_block.ipl_info.fcp.opt == DIAG308_IPL_OPT_DUMP)
+ return IPL_TYPE_FCP_DUMP;
+ else
+ return IPL_TYPE_FCP;
+ }
+ return IPL_TYPE_UNKNOWN;
+}
+
+struct ipl_info ipl_info;
+EXPORT_SYMBOL_GPL(ipl_info);
+
+static ssize_t ipl_type_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *page)
+{
+ return sprintf(page, "%s\n", ipl_type_str(ipl_info.type));
+}
+
+static struct kobj_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type);
+
+/* VM IPL PARM routines */
+static size_t reipl_get_ascii_vmparm(char *dest, size_t size,
+ const struct ipl_parameter_block *ipb)
+{
+ int i;
+ size_t len;
+ char has_lowercase = 0;
+
+ len = 0;
+ if ((ipb->ipl_info.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID) &&
+ (ipb->ipl_info.ccw.vm_parm_len > 0)) {
+
+ len = min_t(size_t, size - 1, ipb->ipl_info.ccw.vm_parm_len);
+ memcpy(dest, ipb->ipl_info.ccw.vm_parm, len);
+ /* If at least one character is lowercase, we assume mixed
+ * case; otherwise we convert everything to lowercase.
+ */
+ for (i = 0; i < len; i++)
+ if ((dest[i] > 0x80 && dest[i] < 0x8a) || /* a-i */
+ (dest[i] > 0x90 && dest[i] < 0x9a) || /* j-r */
+ (dest[i] > 0xa1 && dest[i] < 0xaa)) { /* s-z */
+ has_lowercase = 1;
+ break;
+ }
+ if (!has_lowercase)
+ EBC_TOLOWER(dest, len);
+ EBCASC(dest, len);
+ }
+ dest[len] = 0;
+
+ return len;
+}
+
+size_t append_ipl_vmparm(char *dest, size_t size)
+{
+ size_t rc;
+
+ rc = 0;
+ if (ipl_block_valid && ipl_block.hdr.pbt == DIAG308_IPL_TYPE_CCW)
+ rc = reipl_get_ascii_vmparm(dest, size, &ipl_block);
+ else
+ dest[0] = 0;
+ return rc;
+}
+
+static ssize_t ipl_vm_parm_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ char parm[DIAG308_VMPARM_SIZE + 1] = {};
+
+ append_ipl_vmparm(parm, sizeof(parm));
+ return sprintf(page, "%s\n", parm);
+}
+
+static size_t scpdata_length(const char* buf, size_t count)
+{
+ while (count) {
+ if (buf[count - 1] != '\0' && buf[count - 1] != ' ')
+ break;
+ count--;
+ }
+ return count;
+}
+
+static size_t reipl_append_ascii_scpdata(char *dest, size_t size,
+ const struct ipl_parameter_block *ipb)
+{
+ size_t count;
+ size_t i;
+ int has_lowercase;
+
+ count = min(size - 1, scpdata_length(ipb->ipl_info.fcp.scp_data,
+ ipb->ipl_info.fcp.scp_data_len));
+ if (!count)
+ goto out;
+
+ has_lowercase = 0;
+ for (i = 0; i < count; i++) {
+ if (!isascii(ipb->ipl_info.fcp.scp_data[i])) {
+ count = 0;
+ goto out;
+ }
+ if (!has_lowercase && islower(ipb->ipl_info.fcp.scp_data[i]))
+ has_lowercase = 1;
+ }
+
+ if (has_lowercase)
+ memcpy(dest, ipb->ipl_info.fcp.scp_data, count);
+ else
+ for (i = 0; i < count; i++)
+ dest[i] = tolower(ipb->ipl_info.fcp.scp_data[i]);
+out:
+ dest[count] = '\0';
+ return count;
+}
+
+size_t append_ipl_scpdata(char *dest, size_t len)
+{
+ size_t rc;
+
+ rc = 0;
+ if (ipl_block_valid && ipl_block.hdr.pbt == DIAG308_IPL_TYPE_FCP)
+ rc = reipl_append_ascii_scpdata(dest, len, &ipl_block);
+ else
+ dest[0] = 0;
+ return rc;
+}
+
+
+static struct kobj_attribute sys_ipl_vm_parm_attr =
+ __ATTR(parm, S_IRUGO, ipl_vm_parm_show, NULL);
+
+static ssize_t sys_ipl_device_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ switch (ipl_info.type) {
+ case IPL_TYPE_CCW:
+ return sprintf(page, "0.%x.%04x\n", ipl_block.ipl_info.ccw.ssid,
+ ipl_block.ipl_info.ccw.devno);
+ case IPL_TYPE_FCP:
+ case IPL_TYPE_FCP_DUMP:
+ return sprintf(page, "0.0.%04x\n",
+ ipl_block.ipl_info.fcp.devno);
+ default:
+ return 0;
+ }
+}
+
+static struct kobj_attribute sys_ipl_device_attr =
+ __ATTR(device, S_IRUGO, sys_ipl_device_show, NULL);
+
+static ssize_t ipl_parameter_read(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr, char *buf,
+ loff_t off, size_t count)
+{
+ return memory_read_from_buffer(buf, count, &off, &ipl_block,
+ ipl_block.hdr.len);
+}
+static struct bin_attribute ipl_parameter_attr =
+ __BIN_ATTR(binary_parameter, S_IRUGO, ipl_parameter_read, NULL,
+ PAGE_SIZE);
+
+static ssize_t ipl_scp_data_read(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr, char *buf,
+ loff_t off, size_t count)
+{
+ unsigned int size = ipl_block.ipl_info.fcp.scp_data_len;
+ void *scp_data = &ipl_block.ipl_info.fcp.scp_data;
+
+ return memory_read_from_buffer(buf, count, &off, scp_data, size);
+}
+static struct bin_attribute ipl_scp_data_attr =
+ __BIN_ATTR(scp_data, S_IRUGO, ipl_scp_data_read, NULL, PAGE_SIZE);
+
+static struct bin_attribute *ipl_fcp_bin_attrs[] = {
+ &ipl_parameter_attr,
+ &ipl_scp_data_attr,
+ NULL,
+};
+
+/* FCP ipl device attributes */
+
+DEFINE_IPL_ATTR_RO(ipl_fcp, wwpn, "0x%016llx\n",
+ (unsigned long long)ipl_block.ipl_info.fcp.wwpn);
+DEFINE_IPL_ATTR_RO(ipl_fcp, lun, "0x%016llx\n",
+ (unsigned long long)ipl_block.ipl_info.fcp.lun);
+DEFINE_IPL_ATTR_RO(ipl_fcp, bootprog, "%lld\n",
+ (unsigned long long)ipl_block.ipl_info.fcp.bootprog);
+DEFINE_IPL_ATTR_RO(ipl_fcp, br_lba, "%lld\n",
+ (unsigned long long)ipl_block.ipl_info.fcp.br_lba);
+
+static ssize_t ipl_ccw_loadparm_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ char loadparm[LOADPARM_LEN + 1] = {};
+
+ if (!sclp_ipl_info.is_valid)
+ return sprintf(page, "#unknown#\n");
+ memcpy(loadparm, &sclp_ipl_info.loadparm, LOADPARM_LEN);
+ EBCASC(loadparm, LOADPARM_LEN);
+ strim(loadparm);
+ return sprintf(page, "%s\n", loadparm);
+}
+
+static struct kobj_attribute sys_ipl_ccw_loadparm_attr =
+ __ATTR(loadparm, 0444, ipl_ccw_loadparm_show, NULL);
+
+static struct attribute *ipl_fcp_attrs[] = {
+ &sys_ipl_type_attr.attr,
+ &sys_ipl_device_attr.attr,
+ &sys_ipl_fcp_wwpn_attr.attr,
+ &sys_ipl_fcp_lun_attr.attr,
+ &sys_ipl_fcp_bootprog_attr.attr,
+ &sys_ipl_fcp_br_lba_attr.attr,
+ &sys_ipl_ccw_loadparm_attr.attr,
+ NULL,
+};
+
+static struct attribute_group ipl_fcp_attr_group = {
+ .attrs = ipl_fcp_attrs,
+ .bin_attrs = ipl_fcp_bin_attrs,
+};
+
+/* CCW ipl device attributes */
+
+static struct attribute *ipl_ccw_attrs_vm[] = {
+ &sys_ipl_type_attr.attr,
+ &sys_ipl_device_attr.attr,
+ &sys_ipl_ccw_loadparm_attr.attr,
+ &sys_ipl_vm_parm_attr.attr,
+ NULL,
+};
+
+static struct attribute *ipl_ccw_attrs_lpar[] = {
+ &sys_ipl_type_attr.attr,
+ &sys_ipl_device_attr.attr,
+ &sys_ipl_ccw_loadparm_attr.attr,
+ NULL,
+};
+
+static struct attribute_group ipl_ccw_attr_group_vm = {
+ .attrs = ipl_ccw_attrs_vm,
+};
+
+static struct attribute_group ipl_ccw_attr_group_lpar = {
+ .attrs = ipl_ccw_attrs_lpar
+};
+
+/* UNKNOWN ipl device attributes */
+
+static struct attribute *ipl_unknown_attrs[] = {
+ &sys_ipl_type_attr.attr,
+ NULL,
+};
+
+static struct attribute_group ipl_unknown_attr_group = {
+ .attrs = ipl_unknown_attrs,
+};
+
+static struct kset *ipl_kset;
+
+static void __ipl_run(void *unused)
+{
+ __bpon();
+ diag308(DIAG308_LOAD_CLEAR, NULL);
+}
+
+static void ipl_run(struct shutdown_trigger *trigger)
+{
+ smp_call_ipl_cpu(__ipl_run, NULL);
+}
+
+static int __init ipl_init(void)
+{
+ int rc;
+
+ ipl_kset = kset_create_and_add("ipl", NULL, firmware_kobj);
+ if (!ipl_kset) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ switch (ipl_info.type) {
+ case IPL_TYPE_CCW:
+ if (MACHINE_IS_VM)
+ rc = sysfs_create_group(&ipl_kset->kobj,
+ &ipl_ccw_attr_group_vm);
+ else
+ rc = sysfs_create_group(&ipl_kset->kobj,
+ &ipl_ccw_attr_group_lpar);
+ break;
+ case IPL_TYPE_FCP:
+ case IPL_TYPE_FCP_DUMP:
+ rc = sysfs_create_group(&ipl_kset->kobj, &ipl_fcp_attr_group);
+ break;
+ default:
+ rc = sysfs_create_group(&ipl_kset->kobj,
+ &ipl_unknown_attr_group);
+ break;
+ }
+out:
+ if (rc)
+ panic("ipl_init failed: rc = %i\n", rc);
+
+ return 0;
+}
+
+static struct shutdown_action __refdata ipl_action = {
+ .name = SHUTDOWN_ACTION_IPL_STR,
+ .fn = ipl_run,
+ .init = ipl_init,
+};
+
+/*
+ * reipl shutdown action: Reboot Linux on shutdown.
+ */
+
+/* VM IPL PARM attributes */
+static ssize_t reipl_generic_vmparm_show(struct ipl_parameter_block *ipb,
+ char *page)
+{
+ char vmparm[DIAG308_VMPARM_SIZE + 1] = {};
+
+ reipl_get_ascii_vmparm(vmparm, sizeof(vmparm), ipb);
+ return sprintf(page, "%s\n", vmparm);
+}
+
+static ssize_t reipl_generic_vmparm_store(struct ipl_parameter_block *ipb,
+ size_t vmparm_max,
+ const char *buf, size_t len)
+{
+ int i, ip_len;
+
+ /* ignore trailing newline */
+ ip_len = len;
+ if ((len > 0) && (buf[len - 1] == '\n'))
+ ip_len--;
+
+ if (ip_len > vmparm_max)
+ return -EINVAL;
+
+ /* parm is used to store kernel options, check for common chars */
+ for (i = 0; i < ip_len; i++)
+ if (!(isalnum(buf[i]) || isascii(buf[i]) || isprint(buf[i])))
+ return -EINVAL;
+
+ memset(ipb->ipl_info.ccw.vm_parm, 0, DIAG308_VMPARM_SIZE);
+ ipb->ipl_info.ccw.vm_parm_len = ip_len;
+ if (ip_len > 0) {
+ ipb->ipl_info.ccw.vm_flags |= DIAG308_VM_FLAGS_VP_VALID;
+ memcpy(ipb->ipl_info.ccw.vm_parm, buf, ip_len);
+ ASCEBC(ipb->ipl_info.ccw.vm_parm, ip_len);
+ } else {
+ ipb->ipl_info.ccw.vm_flags &= ~DIAG308_VM_FLAGS_VP_VALID;
+ }
+
+ return len;
+}
+
+/* NSS wrapper */
+static ssize_t reipl_nss_vmparm_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return reipl_generic_vmparm_show(reipl_block_nss, page);
+}
+
+static ssize_t reipl_nss_vmparm_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ return reipl_generic_vmparm_store(reipl_block_nss, 56, buf, len);
+}
+
+/* CCW wrapper */
+static ssize_t reipl_ccw_vmparm_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return reipl_generic_vmparm_show(reipl_block_ccw, page);
+}
+
+static ssize_t reipl_ccw_vmparm_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ return reipl_generic_vmparm_store(reipl_block_ccw, 64, buf, len);
+}
+
+static struct kobj_attribute sys_reipl_nss_vmparm_attr =
+ __ATTR(parm, S_IRUGO | S_IWUSR, reipl_nss_vmparm_show,
+ reipl_nss_vmparm_store);
+static struct kobj_attribute sys_reipl_ccw_vmparm_attr =
+ __ATTR(parm, S_IRUGO | S_IWUSR, reipl_ccw_vmparm_show,
+ reipl_ccw_vmparm_store);
+
+/* FCP reipl device attributes */
+
+static ssize_t reipl_fcp_scpdata_read(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr,
+ char *buf, loff_t off, size_t count)
+{
+ size_t size = reipl_block_fcp->ipl_info.fcp.scp_data_len;
+ void *scp_data = reipl_block_fcp->ipl_info.fcp.scp_data;
+
+ return memory_read_from_buffer(buf, count, &off, scp_data, size);
+}
+
+static ssize_t reipl_fcp_scpdata_write(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr,
+ char *buf, loff_t off, size_t count)
+{
+ size_t scpdata_len = count;
+ size_t padding;
+
+
+ if (off)
+ return -EINVAL;
+
+ memcpy(reipl_block_fcp->ipl_info.fcp.scp_data, buf, count);
+ if (scpdata_len % 8) {
+ padding = 8 - (scpdata_len % 8);
+ memset(reipl_block_fcp->ipl_info.fcp.scp_data + scpdata_len,
+ 0, padding);
+ scpdata_len += padding;
+ }
+
+ reipl_block_fcp->ipl_info.fcp.scp_data_len = scpdata_len;
+ reipl_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN + scpdata_len;
+ reipl_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN + scpdata_len;
+
+ return count;
+}
+static struct bin_attribute sys_reipl_fcp_scp_data_attr =
+ __BIN_ATTR(scp_data, (S_IRUGO | S_IWUSR), reipl_fcp_scpdata_read,
+ reipl_fcp_scpdata_write, DIAG308_SCPDATA_SIZE);
+
+static struct bin_attribute *reipl_fcp_bin_attrs[] = {
+ &sys_reipl_fcp_scp_data_attr,
+ NULL,
+};
+
+DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%llx\n",
+ reipl_block_fcp->ipl_info.fcp.wwpn);
+DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%llx\n",
+ reipl_block_fcp->ipl_info.fcp.lun);
+DEFINE_IPL_ATTR_RW(reipl_fcp, bootprog, "%lld\n", "%lld\n",
+ reipl_block_fcp->ipl_info.fcp.bootprog);
+DEFINE_IPL_ATTR_RW(reipl_fcp, br_lba, "%lld\n", "%lld\n",
+ reipl_block_fcp->ipl_info.fcp.br_lba);
+DEFINE_IPL_ATTR_RW(reipl_fcp, device, "0.0.%04llx\n", "0.0.%llx\n",
+ reipl_block_fcp->ipl_info.fcp.devno);
+
+static void reipl_get_ascii_loadparm(char *loadparm,
+ struct ipl_parameter_block *ibp)
+{
+ memcpy(loadparm, ibp->hdr.loadparm, LOADPARM_LEN);
+ EBCASC(loadparm, LOADPARM_LEN);
+ loadparm[LOADPARM_LEN] = 0;
+ strim(loadparm);
+}
+
+static ssize_t reipl_generic_loadparm_show(struct ipl_parameter_block *ipb,
+ char *page)
+{
+ char buf[LOADPARM_LEN + 1];
+
+ reipl_get_ascii_loadparm(buf, ipb);
+ return sprintf(page, "%s\n", buf);
+}
+
+static ssize_t reipl_generic_loadparm_store(struct ipl_parameter_block *ipb,
+ const char *buf, size_t len)
+{
+ int i, lp_len;
+
+ /* ignore trailing newline */
+ lp_len = len;
+ if ((len > 0) && (buf[len - 1] == '\n'))
+ lp_len--;
+ /* loadparm can have max 8 characters and must not start with a blank */
+ if ((lp_len > LOADPARM_LEN) || ((lp_len > 0) && (buf[0] == ' ')))
+ return -EINVAL;
+ /* loadparm can only contain "a-z,A-Z,0-9,SP,." */
+ for (i = 0; i < lp_len; i++) {
+ if (isalpha(buf[i]) || isdigit(buf[i]) || (buf[i] == ' ') ||
+ (buf[i] == '.'))
+ continue;
+ return -EINVAL;
+ }
+ /* initialize loadparm with blanks */
+ memset(ipb->hdr.loadparm, ' ', LOADPARM_LEN);
+ /* copy and convert to ebcdic */
+ memcpy(ipb->hdr.loadparm, buf, lp_len);
+ ASCEBC(ipb->hdr.loadparm, LOADPARM_LEN);
+ ipb->hdr.flags |= DIAG308_FLAGS_LP_VALID;
+ return len;
+}
+
+/* FCP wrapper */
+static ssize_t reipl_fcp_loadparm_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return reipl_generic_loadparm_show(reipl_block_fcp, page);
+}
+
+static ssize_t reipl_fcp_loadparm_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ return reipl_generic_loadparm_store(reipl_block_fcp, buf, len);
+}
+
+static struct kobj_attribute sys_reipl_fcp_loadparm_attr =
+ __ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_fcp_loadparm_show,
+ reipl_fcp_loadparm_store);
+
+static struct attribute *reipl_fcp_attrs[] = {
+ &sys_reipl_fcp_device_attr.attr,
+ &sys_reipl_fcp_wwpn_attr.attr,
+ &sys_reipl_fcp_lun_attr.attr,
+ &sys_reipl_fcp_bootprog_attr.attr,
+ &sys_reipl_fcp_br_lba_attr.attr,
+ &sys_reipl_fcp_loadparm_attr.attr,
+ NULL,
+};
+
+static struct attribute_group reipl_fcp_attr_group = {
+ .attrs = reipl_fcp_attrs,
+ .bin_attrs = reipl_fcp_bin_attrs,
+};
+
+/* CCW reipl device attributes */
+DEFINE_IPL_CCW_ATTR_RW(reipl_ccw, device, reipl_block_ccw->ipl_info.ccw);
+
+/* NSS wrapper */
+static ssize_t reipl_nss_loadparm_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return reipl_generic_loadparm_show(reipl_block_nss, page);
+}
+
+static ssize_t reipl_nss_loadparm_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ return reipl_generic_loadparm_store(reipl_block_nss, buf, len);
+}
+
+/* CCW wrapper */
+static ssize_t reipl_ccw_loadparm_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return reipl_generic_loadparm_show(reipl_block_ccw, page);
+}
+
+static ssize_t reipl_ccw_loadparm_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ return reipl_generic_loadparm_store(reipl_block_ccw, buf, len);
+}
+
+static struct kobj_attribute sys_reipl_ccw_loadparm_attr =
+ __ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_ccw_loadparm_show,
+ reipl_ccw_loadparm_store);
+
+static struct attribute *reipl_ccw_attrs_vm[] = {
+ &sys_reipl_ccw_device_attr.attr,
+ &sys_reipl_ccw_loadparm_attr.attr,
+ &sys_reipl_ccw_vmparm_attr.attr,
+ NULL,
+};
+
+static struct attribute *reipl_ccw_attrs_lpar[] = {
+ &sys_reipl_ccw_device_attr.attr,
+ &sys_reipl_ccw_loadparm_attr.attr,
+ NULL,
+};
+
+static struct attribute_group reipl_ccw_attr_group_vm = {
+ .name = IPL_CCW_STR,
+ .attrs = reipl_ccw_attrs_vm,
+};
+
+static struct attribute_group reipl_ccw_attr_group_lpar = {
+ .name = IPL_CCW_STR,
+ .attrs = reipl_ccw_attrs_lpar,
+};
+
+
+/* NSS reipl device attributes */
+static void reipl_get_ascii_nss_name(char *dst,
+ struct ipl_parameter_block *ipb)
+{
+ memcpy(dst, ipb->ipl_info.ccw.nss_name, NSS_NAME_SIZE);
+ EBCASC(dst, NSS_NAME_SIZE);
+ dst[NSS_NAME_SIZE] = 0;
+}
+
+static ssize_t reipl_nss_name_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ char nss_name[NSS_NAME_SIZE + 1] = {};
+
+ reipl_get_ascii_nss_name(nss_name, reipl_block_nss);
+ return sprintf(page, "%s\n", nss_name);
+}
+
+static ssize_t reipl_nss_name_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ int nss_len;
+
+ /* ignore trailing newline */
+ nss_len = len;
+ if ((len > 0) && (buf[len - 1] == '\n'))
+ nss_len--;
+
+ if (nss_len > NSS_NAME_SIZE)
+ return -EINVAL;
+
+ memset(reipl_block_nss->ipl_info.ccw.nss_name, 0x40, NSS_NAME_SIZE);
+ if (nss_len > 0) {
+ reipl_block_nss->ipl_info.ccw.vm_flags |=
+ DIAG308_VM_FLAGS_NSS_VALID;
+ memcpy(reipl_block_nss->ipl_info.ccw.nss_name, buf, nss_len);
+ ASCEBC(reipl_block_nss->ipl_info.ccw.nss_name, nss_len);
+ EBC_TOUPPER(reipl_block_nss->ipl_info.ccw.nss_name, nss_len);
+ } else {
+ reipl_block_nss->ipl_info.ccw.vm_flags &=
+ ~DIAG308_VM_FLAGS_NSS_VALID;
+ }
+
+ return len;
+}
+
+static struct kobj_attribute sys_reipl_nss_name_attr =
+ __ATTR(name, S_IRUGO | S_IWUSR, reipl_nss_name_show,
+ reipl_nss_name_store);
+
+static struct kobj_attribute sys_reipl_nss_loadparm_attr =
+ __ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_nss_loadparm_show,
+ reipl_nss_loadparm_store);
+
+static struct attribute *reipl_nss_attrs[] = {
+ &sys_reipl_nss_name_attr.attr,
+ &sys_reipl_nss_loadparm_attr.attr,
+ &sys_reipl_nss_vmparm_attr.attr,
+ NULL,
+};
+
+static struct attribute_group reipl_nss_attr_group = {
+ .name = IPL_NSS_STR,
+ .attrs = reipl_nss_attrs,
+};
+
+void set_os_info_reipl_block(void)
+{
+ os_info_entry_add(OS_INFO_REIPL_BLOCK, reipl_block_actual,
+ reipl_block_actual->hdr.len);
+}
+
+/* reipl type */
+
+static int reipl_set_type(enum ipl_type type)
+{
+ if (!(reipl_capabilities & type))
+ return -EINVAL;
+
+ switch(type) {
+ case IPL_TYPE_CCW:
+ reipl_block_actual = reipl_block_ccw;
+ break;
+ case IPL_TYPE_FCP:
+ reipl_block_actual = reipl_block_fcp;
+ break;
+ case IPL_TYPE_NSS:
+ reipl_block_actual = reipl_block_nss;
+ break;
+ default:
+ break;
+ }
+ reipl_type = type;
+ return 0;
+}
+
+static ssize_t reipl_type_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return sprintf(page, "%s\n", ipl_type_str(reipl_type));
+}
+
+static ssize_t reipl_type_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ int rc = -EINVAL;
+
+ if (strncmp(buf, IPL_CCW_STR, strlen(IPL_CCW_STR)) == 0)
+ rc = reipl_set_type(IPL_TYPE_CCW);
+ else if (strncmp(buf, IPL_FCP_STR, strlen(IPL_FCP_STR)) == 0)
+ rc = reipl_set_type(IPL_TYPE_FCP);
+ else if (strncmp(buf, IPL_NSS_STR, strlen(IPL_NSS_STR)) == 0)
+ rc = reipl_set_type(IPL_TYPE_NSS);
+ return (rc != 0) ? rc : len;
+}
+
+static struct kobj_attribute reipl_type_attr =
+ __ATTR(reipl_type, 0644, reipl_type_show, reipl_type_store);
+
+static struct kset *reipl_kset;
+static struct kset *reipl_fcp_kset;
+
+static void __reipl_run(void *unused)
+{
+ switch (reipl_type) {
+ case IPL_TYPE_CCW:
+ diag308(DIAG308_SET, reipl_block_ccw);
+ diag308(DIAG308_LOAD_CLEAR, NULL);
+ break;
+ case IPL_TYPE_FCP:
+ diag308(DIAG308_SET, reipl_block_fcp);
+ diag308(DIAG308_LOAD_CLEAR, NULL);
+ break;
+ case IPL_TYPE_NSS:
+ diag308(DIAG308_SET, reipl_block_nss);
+ diag308(DIAG308_LOAD_CLEAR, NULL);
+ break;
+ case IPL_TYPE_UNKNOWN:
+ diag308(DIAG308_LOAD_CLEAR, NULL);
+ break;
+ case IPL_TYPE_FCP_DUMP:
+ break;
+ }
+ disabled_wait((unsigned long) __builtin_return_address(0));
+}
+
+static void reipl_run(struct shutdown_trigger *trigger)
+{
+ smp_call_ipl_cpu(__reipl_run, NULL);
+}
+
+static void reipl_block_ccw_init(struct ipl_parameter_block *ipb)
+{
+ ipb->hdr.len = IPL_PARM_BLK_CCW_LEN;
+ ipb->hdr.version = IPL_PARM_BLOCK_VERSION;
+ ipb->hdr.blk0_len = IPL_PARM_BLK0_CCW_LEN;
+ ipb->hdr.pbt = DIAG308_IPL_TYPE_CCW;
+}
+
+static void reipl_block_ccw_fill_parms(struct ipl_parameter_block *ipb)
+{
+ /* LOADPARM */
+ /* check if read scp info worked and set loadparm */
+ if (sclp_ipl_info.is_valid)
+ memcpy(ipb->hdr.loadparm, &sclp_ipl_info.loadparm, LOADPARM_LEN);
+ else
+ /* read scp info failed: set empty loadparm (EBCDIC blanks) */
+ memset(ipb->hdr.loadparm, 0x40, LOADPARM_LEN);
+ ipb->hdr.flags = DIAG308_FLAGS_LP_VALID;
+
+ /* VM PARM */
+ if (MACHINE_IS_VM && ipl_block_valid &&
+ (ipl_block.ipl_info.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID)) {
+
+ ipb->ipl_info.ccw.vm_flags |= DIAG308_VM_FLAGS_VP_VALID;
+ ipb->ipl_info.ccw.vm_parm_len =
+ ipl_block.ipl_info.ccw.vm_parm_len;
+ memcpy(ipb->ipl_info.ccw.vm_parm,
+ ipl_block.ipl_info.ccw.vm_parm, DIAG308_VMPARM_SIZE);
+ }
+}
+
+static int __init reipl_nss_init(void)
+{
+ int rc;
+
+ if (!MACHINE_IS_VM)
+ return 0;
+
+ reipl_block_nss = (void *) get_zeroed_page(GFP_KERNEL);
+ if (!reipl_block_nss)
+ return -ENOMEM;
+
+ rc = sysfs_create_group(&reipl_kset->kobj, &reipl_nss_attr_group);
+ if (rc)
+ return rc;
+
+ reipl_block_ccw_init(reipl_block_nss);
+ reipl_capabilities |= IPL_TYPE_NSS;
+ return 0;
+}
+
+static int __init reipl_ccw_init(void)
+{
+ int rc;
+
+ reipl_block_ccw = (void *) get_zeroed_page(GFP_KERNEL);
+ if (!reipl_block_ccw)
+ return -ENOMEM;
+
+ rc = sysfs_create_group(&reipl_kset->kobj,
+ MACHINE_IS_VM ? &reipl_ccw_attr_group_vm
+ : &reipl_ccw_attr_group_lpar);
+ if (rc)
+ return rc;
+
+ reipl_block_ccw_init(reipl_block_ccw);
+ if (ipl_info.type == IPL_TYPE_CCW) {
+ reipl_block_ccw->ipl_info.ccw.ssid = ipl_block.ipl_info.ccw.ssid;
+ reipl_block_ccw->ipl_info.ccw.devno = ipl_block.ipl_info.ccw.devno;
+ reipl_block_ccw_fill_parms(reipl_block_ccw);
+ }
+
+ reipl_capabilities |= IPL_TYPE_CCW;
+ return 0;
+}
+
+static int __init reipl_fcp_init(void)
+{
+ int rc;
+
+ reipl_block_fcp = (void *) get_zeroed_page(GFP_KERNEL);
+ if (!reipl_block_fcp)
+ return -ENOMEM;
+
+ /* sysfs: create fcp kset for mixing attr group and bin attrs */
+ reipl_fcp_kset = kset_create_and_add(IPL_FCP_STR, NULL,
+ &reipl_kset->kobj);
+ if (!reipl_fcp_kset) {
+ free_page((unsigned long) reipl_block_fcp);
+ return -ENOMEM;
+ }
+
+ rc = sysfs_create_group(&reipl_fcp_kset->kobj, &reipl_fcp_attr_group);
+ if (rc) {
+ kset_unregister(reipl_fcp_kset);
+ free_page((unsigned long) reipl_block_fcp);
+ return rc;
+ }
+
+ if (ipl_info.type == IPL_TYPE_FCP) {
+ memcpy(reipl_block_fcp, &ipl_block, sizeof(ipl_block));
+ /*
+ * Fix loadparm: There are systems where the (SCSI) LOADPARM
+ * is invalid in the SCSI IPL parameter block, so take it
+ * always from sclp_ipl_info.
+ */
+ memcpy(reipl_block_fcp->hdr.loadparm, sclp_ipl_info.loadparm,
+ LOADPARM_LEN);
+ } else {
+ reipl_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN;
+ reipl_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION;
+ reipl_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN;
+ reipl_block_fcp->hdr.pbt = DIAG308_IPL_TYPE_FCP;
+ reipl_block_fcp->ipl_info.fcp.opt = DIAG308_IPL_OPT_IPL;
+ }
+ reipl_capabilities |= IPL_TYPE_FCP;
+ return 0;
+}
+
+static int __init reipl_type_init(void)
+{
+ enum ipl_type reipl_type = ipl_info.type;
+ struct ipl_parameter_block *reipl_block;
+ unsigned long size;
+
+ reipl_block = os_info_old_entry(OS_INFO_REIPL_BLOCK, &size);
+ if (!reipl_block)
+ goto out;
+ /*
+ * If we have an OS info reipl block, this will be used
+ */
+ if (reipl_block->hdr.pbt == DIAG308_IPL_TYPE_FCP) {
+ memcpy(reipl_block_fcp, reipl_block, size);
+ reipl_type = IPL_TYPE_FCP;
+ } else if (reipl_block->hdr.pbt == DIAG308_IPL_TYPE_CCW) {
+ memcpy(reipl_block_ccw, reipl_block, size);
+ reipl_type = IPL_TYPE_CCW;
+ }
+out:
+ return reipl_set_type(reipl_type);
+}
+
+static int __init reipl_init(void)
+{
+ int rc;
+
+ reipl_kset = kset_create_and_add("reipl", NULL, firmware_kobj);
+ if (!reipl_kset)
+ return -ENOMEM;
+ rc = sysfs_create_file(&reipl_kset->kobj, &reipl_type_attr.attr);
+ if (rc) {
+ kset_unregister(reipl_kset);
+ return rc;
+ }
+ rc = reipl_ccw_init();
+ if (rc)
+ return rc;
+ rc = reipl_fcp_init();
+ if (rc)
+ return rc;
+ rc = reipl_nss_init();
+ if (rc)
+ return rc;
+ return reipl_type_init();
+}
+
+static struct shutdown_action __refdata reipl_action = {
+ .name = SHUTDOWN_ACTION_REIPL_STR,
+ .fn = reipl_run,
+ .init = reipl_init,
+};
+
+/*
+ * dump shutdown action: Dump Linux on shutdown.
+ */
+
+/* FCP dump device attributes */
+
+DEFINE_IPL_ATTR_RW(dump_fcp, wwpn, "0x%016llx\n", "%llx\n",
+ dump_block_fcp->ipl_info.fcp.wwpn);
+DEFINE_IPL_ATTR_RW(dump_fcp, lun, "0x%016llx\n", "%llx\n",
+ dump_block_fcp->ipl_info.fcp.lun);
+DEFINE_IPL_ATTR_RW(dump_fcp, bootprog, "%lld\n", "%lld\n",
+ dump_block_fcp->ipl_info.fcp.bootprog);
+DEFINE_IPL_ATTR_RW(dump_fcp, br_lba, "%lld\n", "%lld\n",
+ dump_block_fcp->ipl_info.fcp.br_lba);
+DEFINE_IPL_ATTR_RW(dump_fcp, device, "0.0.%04llx\n", "0.0.%llx\n",
+ dump_block_fcp->ipl_info.fcp.devno);
+
+static struct attribute *dump_fcp_attrs[] = {
+ &sys_dump_fcp_device_attr.attr,
+ &sys_dump_fcp_wwpn_attr.attr,
+ &sys_dump_fcp_lun_attr.attr,
+ &sys_dump_fcp_bootprog_attr.attr,
+ &sys_dump_fcp_br_lba_attr.attr,
+ NULL,
+};
+
+static struct attribute_group dump_fcp_attr_group = {
+ .name = IPL_FCP_STR,
+ .attrs = dump_fcp_attrs,
+};
+
+/* CCW dump device attributes */
+DEFINE_IPL_CCW_ATTR_RW(dump_ccw, device, dump_block_ccw->ipl_info.ccw);
+
+static struct attribute *dump_ccw_attrs[] = {
+ &sys_dump_ccw_device_attr.attr,
+ NULL,
+};
+
+static struct attribute_group dump_ccw_attr_group = {
+ .name = IPL_CCW_STR,
+ .attrs = dump_ccw_attrs,
+};
+
+/* dump type */
+
+static int dump_set_type(enum dump_type type)
+{
+ if (!(dump_capabilities & type))
+ return -EINVAL;
+ dump_type = type;
+ return 0;
+}
+
+static ssize_t dump_type_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return sprintf(page, "%s\n", dump_type_str(dump_type));
+}
+
+static ssize_t dump_type_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ int rc = -EINVAL;
+
+ if (strncmp(buf, DUMP_NONE_STR, strlen(DUMP_NONE_STR)) == 0)
+ rc = dump_set_type(DUMP_TYPE_NONE);
+ else if (strncmp(buf, DUMP_CCW_STR, strlen(DUMP_CCW_STR)) == 0)
+ rc = dump_set_type(DUMP_TYPE_CCW);
+ else if (strncmp(buf, DUMP_FCP_STR, strlen(DUMP_FCP_STR)) == 0)
+ rc = dump_set_type(DUMP_TYPE_FCP);
+ return (rc != 0) ? rc : len;
+}
+
+static struct kobj_attribute dump_type_attr =
+ __ATTR(dump_type, 0644, dump_type_show, dump_type_store);
+
+static struct kset *dump_kset;
+
+static void diag308_dump(void *dump_block)
+{
+ diag308(DIAG308_SET, dump_block);
+ while (1) {
+ if (diag308(DIAG308_LOAD_NORMAL_DUMP, NULL) != 0x302)
+ break;
+ udelay_simple(USEC_PER_SEC);
+ }
+}
+
+static void __dump_run(void *unused)
+{
+ switch (dump_type) {
+ case DUMP_TYPE_CCW:
+ diag308_dump(dump_block_ccw);
+ break;
+ case DUMP_TYPE_FCP:
+ diag308_dump(dump_block_fcp);
+ break;
+ default:
+ break;
+ }
+}
+
+static void dump_run(struct shutdown_trigger *trigger)
+{
+ if (dump_type == DUMP_TYPE_NONE)
+ return;
+ smp_send_stop();
+ smp_call_ipl_cpu(__dump_run, NULL);
+}
+
+static int __init dump_ccw_init(void)
+{
+ int rc;
+
+ dump_block_ccw = (void *) get_zeroed_page(GFP_KERNEL);
+ if (!dump_block_ccw)
+ return -ENOMEM;
+ rc = sysfs_create_group(&dump_kset->kobj, &dump_ccw_attr_group);
+ if (rc) {
+ free_page((unsigned long)dump_block_ccw);
+ return rc;
+ }
+ dump_block_ccw->hdr.len = IPL_PARM_BLK_CCW_LEN;
+ dump_block_ccw->hdr.version = IPL_PARM_BLOCK_VERSION;
+ dump_block_ccw->hdr.blk0_len = IPL_PARM_BLK0_CCW_LEN;
+ dump_block_ccw->hdr.pbt = DIAG308_IPL_TYPE_CCW;
+ dump_capabilities |= DUMP_TYPE_CCW;
+ return 0;
+}
+
+static int __init dump_fcp_init(void)
+{
+ int rc;
+
+ if (!sclp_ipl_info.has_dump)
+ return 0; /* LDIPL DUMP is not installed */
+ dump_block_fcp = (void *) get_zeroed_page(GFP_KERNEL);
+ if (!dump_block_fcp)
+ return -ENOMEM;
+ rc = sysfs_create_group(&dump_kset->kobj, &dump_fcp_attr_group);
+ if (rc) {
+ free_page((unsigned long)dump_block_fcp);
+ return rc;
+ }
+ dump_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN;
+ dump_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION;
+ dump_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN;
+ dump_block_fcp->hdr.pbt = DIAG308_IPL_TYPE_FCP;
+ dump_block_fcp->ipl_info.fcp.opt = DIAG308_IPL_OPT_DUMP;
+ dump_capabilities |= DUMP_TYPE_FCP;
+ return 0;
+}
+
+static int __init dump_init(void)
+{
+ int rc;
+
+ dump_kset = kset_create_and_add("dump", NULL, firmware_kobj);
+ if (!dump_kset)
+ return -ENOMEM;
+ rc = sysfs_create_file(&dump_kset->kobj, &dump_type_attr.attr);
+ if (rc) {
+ kset_unregister(dump_kset);
+ return rc;
+ }
+ rc = dump_ccw_init();
+ if (rc)
+ return rc;
+ rc = dump_fcp_init();
+ if (rc)
+ return rc;
+ dump_set_type(DUMP_TYPE_NONE);
+ return 0;
+}
+
+static struct shutdown_action __refdata dump_action = {
+ .name = SHUTDOWN_ACTION_DUMP_STR,
+ .fn = dump_run,
+ .init = dump_init,
+};
+
+static void dump_reipl_run(struct shutdown_trigger *trigger)
+{
+ unsigned long ipib = (unsigned long) reipl_block_actual;
+ unsigned int csum;
+
+ csum = (__force unsigned int)
+ csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0);
+ mem_assign_absolute(S390_lowcore.ipib, ipib);
+ mem_assign_absolute(S390_lowcore.ipib_checksum, csum);
+ dump_run(trigger);
+}
+
+static struct shutdown_action __refdata dump_reipl_action = {
+ .name = SHUTDOWN_ACTION_DUMP_REIPL_STR,
+ .fn = dump_reipl_run,
+};
+
+/*
+ * vmcmd shutdown action: Trigger vm command on shutdown.
+ */
+
+static char vmcmd_on_reboot[128];
+static char vmcmd_on_panic[128];
+static char vmcmd_on_halt[128];
+static char vmcmd_on_poff[128];
+static char vmcmd_on_restart[128];
+
+DEFINE_IPL_ATTR_STR_RW(vmcmd, on_reboot, "%s\n", "%s\n", vmcmd_on_reboot);
+DEFINE_IPL_ATTR_STR_RW(vmcmd, on_panic, "%s\n", "%s\n", vmcmd_on_panic);
+DEFINE_IPL_ATTR_STR_RW(vmcmd, on_halt, "%s\n", "%s\n", vmcmd_on_halt);
+DEFINE_IPL_ATTR_STR_RW(vmcmd, on_poff, "%s\n", "%s\n", vmcmd_on_poff);
+DEFINE_IPL_ATTR_STR_RW(vmcmd, on_restart, "%s\n", "%s\n", vmcmd_on_restart);
+
+static struct attribute *vmcmd_attrs[] = {
+ &sys_vmcmd_on_reboot_attr.attr,
+ &sys_vmcmd_on_panic_attr.attr,
+ &sys_vmcmd_on_halt_attr.attr,
+ &sys_vmcmd_on_poff_attr.attr,
+ &sys_vmcmd_on_restart_attr.attr,
+ NULL,
+};
+
+static struct attribute_group vmcmd_attr_group = {
+ .attrs = vmcmd_attrs,
+};
+
+static struct kset *vmcmd_kset;
+
+static void vmcmd_run(struct shutdown_trigger *trigger)
+{
+ char *cmd;
+
+ if (strcmp(trigger->name, ON_REIPL_STR) == 0)
+ cmd = vmcmd_on_reboot;
+ else if (strcmp(trigger->name, ON_PANIC_STR) == 0)
+ cmd = vmcmd_on_panic;
+ else if (strcmp(trigger->name, ON_HALT_STR) == 0)
+ cmd = vmcmd_on_halt;
+ else if (strcmp(trigger->name, ON_POFF_STR) == 0)
+ cmd = vmcmd_on_poff;
+ else if (strcmp(trigger->name, ON_RESTART_STR) == 0)
+ cmd = vmcmd_on_restart;
+ else
+ return;
+
+ if (strlen(cmd) == 0)
+ return;
+ __cpcmd(cmd, NULL, 0, NULL);
+}
+
+static int vmcmd_init(void)
+{
+ if (!MACHINE_IS_VM)
+ return -EOPNOTSUPP;
+ vmcmd_kset = kset_create_and_add("vmcmd", NULL, firmware_kobj);
+ if (!vmcmd_kset)
+ return -ENOMEM;
+ return sysfs_create_group(&vmcmd_kset->kobj, &vmcmd_attr_group);
+}
+
+static struct shutdown_action vmcmd_action = {SHUTDOWN_ACTION_VMCMD_STR,
+ vmcmd_run, vmcmd_init};
+
+/*
+ * stop shutdown action: Stop Linux on shutdown.
+ */
+
+static void stop_run(struct shutdown_trigger *trigger)
+{
+ if (strcmp(trigger->name, ON_PANIC_STR) == 0 ||
+ strcmp(trigger->name, ON_RESTART_STR) == 0)
+ disabled_wait((unsigned long) __builtin_return_address(0));
+ smp_stop_cpu();
+}
+
+static struct shutdown_action stop_action = {SHUTDOWN_ACTION_STOP_STR,
+ stop_run, NULL};
+
+/* action list */
+
+static struct shutdown_action *shutdown_actions_list[] = {
+ &ipl_action, &reipl_action, &dump_reipl_action, &dump_action,
+ &vmcmd_action, &stop_action};
+#define SHUTDOWN_ACTIONS_COUNT (sizeof(shutdown_actions_list) / sizeof(void *))
+
+/*
+ * Trigger section
+ */
+
+static struct kset *shutdown_actions_kset;
+
+static int set_trigger(const char *buf, struct shutdown_trigger *trigger,
+ size_t len)
+{
+ int i;
+
+ for (i = 0; i < SHUTDOWN_ACTIONS_COUNT; i++) {
+ if (sysfs_streq(buf, shutdown_actions_list[i]->name)) {
+ if (shutdown_actions_list[i]->init_rc) {
+ return shutdown_actions_list[i]->init_rc;
+ } else {
+ trigger->action = shutdown_actions_list[i];
+ return len;
+ }
+ }
+ }
+ return -EINVAL;
+}
+
+/* on reipl */
+
+static struct shutdown_trigger on_reboot_trigger = {ON_REIPL_STR,
+ &reipl_action};
+
+static ssize_t on_reboot_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return sprintf(page, "%s\n", on_reboot_trigger.action->name);
+}
+
+static ssize_t on_reboot_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ return set_trigger(buf, &on_reboot_trigger, len);
+}
+static struct kobj_attribute on_reboot_attr = __ATTR_RW(on_reboot);
+
+static void do_machine_restart(char *__unused)
+{
+ smp_send_stop();
+ on_reboot_trigger.action->fn(&on_reboot_trigger);
+ reipl_run(NULL);
+}
+void (*_machine_restart)(char *command) = do_machine_restart;
+
+/* on panic */
+
+static struct shutdown_trigger on_panic_trigger = {ON_PANIC_STR, &stop_action};
+
+static ssize_t on_panic_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return sprintf(page, "%s\n", on_panic_trigger.action->name);
+}
+
+static ssize_t on_panic_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ return set_trigger(buf, &on_panic_trigger, len);
+}
+static struct kobj_attribute on_panic_attr = __ATTR_RW(on_panic);
+
+static void do_panic(void)
+{
+ lgr_info_log();
+ on_panic_trigger.action->fn(&on_panic_trigger);
+ stop_run(&on_panic_trigger);
+}
+
+/* on restart */
+
+static struct shutdown_trigger on_restart_trigger = {ON_RESTART_STR,
+ &stop_action};
+
+static ssize_t on_restart_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return sprintf(page, "%s\n", on_restart_trigger.action->name);
+}
+
+static ssize_t on_restart_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ return set_trigger(buf, &on_restart_trigger, len);
+}
+static struct kobj_attribute on_restart_attr = __ATTR_RW(on_restart);
+
+static void __do_restart(void *ignore)
+{
+ __arch_local_irq_stosm(0x04); /* enable DAT */
+ smp_send_stop();
+#ifdef CONFIG_CRASH_DUMP
+ crash_kexec(NULL);
+#endif
+ on_restart_trigger.action->fn(&on_restart_trigger);
+ stop_run(&on_restart_trigger);
+}
+
+void do_restart(void)
+{
+ tracing_off();
+ debug_locks_off();
+ lgr_info_log();
+ smp_call_online_cpu(__do_restart, NULL);
+}
+
+/* on halt */
+
+static struct shutdown_trigger on_halt_trigger = {ON_HALT_STR, &stop_action};
+
+static ssize_t on_halt_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return sprintf(page, "%s\n", on_halt_trigger.action->name);
+}
+
+static ssize_t on_halt_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ return set_trigger(buf, &on_halt_trigger, len);
+}
+static struct kobj_attribute on_halt_attr = __ATTR_RW(on_halt);
+
+static void do_machine_halt(void)
+{
+ smp_send_stop();
+ on_halt_trigger.action->fn(&on_halt_trigger);
+ stop_run(&on_halt_trigger);
+}
+void (*_machine_halt)(void) = do_machine_halt;
+
+/* on power off */
+
+static struct shutdown_trigger on_poff_trigger = {ON_POFF_STR, &stop_action};
+
+static ssize_t on_poff_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return sprintf(page, "%s\n", on_poff_trigger.action->name);
+}
+
+static ssize_t on_poff_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ return set_trigger(buf, &on_poff_trigger, len);
+}
+static struct kobj_attribute on_poff_attr = __ATTR_RW(on_poff);
+
+static void do_machine_power_off(void)
+{
+ smp_send_stop();
+ on_poff_trigger.action->fn(&on_poff_trigger);
+ stop_run(&on_poff_trigger);
+}
+void (*_machine_power_off)(void) = do_machine_power_off;
+
+static struct attribute *shutdown_action_attrs[] = {
+ &on_restart_attr.attr,
+ &on_reboot_attr.attr,
+ &on_panic_attr.attr,
+ &on_halt_attr.attr,
+ &on_poff_attr.attr,
+ NULL,
+};
+
+static struct attribute_group shutdown_action_attr_group = {
+ .attrs = shutdown_action_attrs,
+};
+
+static void __init shutdown_triggers_init(void)
+{
+ shutdown_actions_kset = kset_create_and_add("shutdown_actions", NULL,
+ firmware_kobj);
+ if (!shutdown_actions_kset)
+ goto fail;
+ if (sysfs_create_group(&shutdown_actions_kset->kobj,
+ &shutdown_action_attr_group))
+ goto fail;
+ return;
+fail:
+ panic("shutdown_triggers_init failed\n");
+}
+
+static void __init shutdown_actions_init(void)
+{
+ int i;
+
+ for (i = 0; i < SHUTDOWN_ACTIONS_COUNT; i++) {
+ if (!shutdown_actions_list[i]->init)
+ continue;
+ shutdown_actions_list[i]->init_rc =
+ shutdown_actions_list[i]->init();
+ }
+}
+
+static int __init s390_ipl_init(void)
+{
+ char str[8] = {0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40};
+
+ sclp_early_get_ipl_info(&sclp_ipl_info);
+ /*
+ * Fix loadparm: There are systems where the (SCSI) LOADPARM
+ * returned by read SCP info is invalid (contains EBCDIC blanks)
+ * when the system has been booted via diag308. In that case we use
+ * the value from diag308, if available.
+ *
+ * There are also systems where diag308 store does not work in
+ * case the system is booted from HMC. Fortunately in this case
+ * READ SCP info provides the correct value.
+ */
+ if (memcmp(sclp_ipl_info.loadparm, str, sizeof(str)) == 0 && ipl_block_valid)
+ memcpy(sclp_ipl_info.loadparm, ipl_block.hdr.loadparm, LOADPARM_LEN);
+ shutdown_actions_init();
+ shutdown_triggers_init();
+ return 0;
+}
+
+__initcall(s390_ipl_init);
+
+static void __init strncpy_skip_quote(char *dst, char *src, int n)
+{
+ int sx, dx;
+
+ dx = 0;
+ for (sx = 0; src[sx] != 0; sx++) {
+ if (src[sx] == '"')
+ continue;
+ dst[dx++] = src[sx];
+ if (dx >= n)
+ break;
+ }
+}
+
+static int __init vmcmd_on_reboot_setup(char *str)
+{
+ if (!MACHINE_IS_VM)
+ return 1;
+ strncpy_skip_quote(vmcmd_on_reboot, str, 127);
+ vmcmd_on_reboot[127] = 0;
+ on_reboot_trigger.action = &vmcmd_action;
+ return 1;
+}
+__setup("vmreboot=", vmcmd_on_reboot_setup);
+
+static int __init vmcmd_on_panic_setup(char *str)
+{
+ if (!MACHINE_IS_VM)
+ return 1;
+ strncpy_skip_quote(vmcmd_on_panic, str, 127);
+ vmcmd_on_panic[127] = 0;
+ on_panic_trigger.action = &vmcmd_action;
+ return 1;
+}
+__setup("vmpanic=", vmcmd_on_panic_setup);
+
+static int __init vmcmd_on_halt_setup(char *str)
+{
+ if (!MACHINE_IS_VM)
+ return 1;
+ strncpy_skip_quote(vmcmd_on_halt, str, 127);
+ vmcmd_on_halt[127] = 0;
+ on_halt_trigger.action = &vmcmd_action;
+ return 1;
+}
+__setup("vmhalt=", vmcmd_on_halt_setup);
+
+static int __init vmcmd_on_poff_setup(char *str)
+{
+ if (!MACHINE_IS_VM)
+ return 1;
+ strncpy_skip_quote(vmcmd_on_poff, str, 127);
+ vmcmd_on_poff[127] = 0;
+ on_poff_trigger.action = &vmcmd_action;
+ return 1;
+}
+__setup("vmpoff=", vmcmd_on_poff_setup);
+
+static int on_panic_notify(struct notifier_block *self,
+ unsigned long event, void *data)
+{
+ do_panic();
+ return NOTIFY_OK;
+}
+
+static struct notifier_block on_panic_nb = {
+ .notifier_call = on_panic_notify,
+ .priority = INT_MIN,
+};
+
+void __init setup_ipl(void)
+{
+ BUILD_BUG_ON(sizeof(struct ipl_parameter_block) != PAGE_SIZE);
+
+ ipl_info.type = get_ipl_type();
+ switch (ipl_info.type) {
+ case IPL_TYPE_CCW:
+ ipl_info.data.ccw.dev_id.ssid = ipl_block.ipl_info.ccw.ssid;
+ ipl_info.data.ccw.dev_id.devno = ipl_block.ipl_info.ccw.devno;
+ break;
+ case IPL_TYPE_FCP:
+ case IPL_TYPE_FCP_DUMP:
+ ipl_info.data.fcp.dev_id.ssid = 0;
+ ipl_info.data.fcp.dev_id.devno = ipl_block.ipl_info.fcp.devno;
+ ipl_info.data.fcp.wwpn = ipl_block.ipl_info.fcp.wwpn;
+ ipl_info.data.fcp.lun = ipl_block.ipl_info.fcp.lun;
+ break;
+ case IPL_TYPE_NSS:
+ case IPL_TYPE_UNKNOWN:
+ /* We have no info to copy */
+ break;
+ }
+ atomic_notifier_chain_register(&panic_notifier_list, &on_panic_nb);
+}
+
+void __init ipl_store_parameters(void)
+{
+ int rc;
+
+ rc = diag308(DIAG308_STORE, &ipl_block);
+ if (rc == DIAG308_RC_OK && ipl_block.hdr.version <= IPL_MAX_SUPPORTED_VERSION)
+ ipl_block_valid = 1;
+}
+
+void s390_reset_system(void)
+{
+ /* Disable prefixing */
+ set_prefix(0);
+
+ /* Disable lowcore protection */
+ __ctl_clear_bit(0, 28);
+ diag308_reset();
+}
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
new file mode 100644
index 000000000..3d17c4107
--- /dev/null
+++ b/arch/s390/kernel/irq.c
@@ -0,0 +1,311 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2004, 2011
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ * Holger Smolinski <Holger.Smolinski@de.ibm.com>,
+ * Thomas Spatzier <tspat@de.ibm.com>,
+ *
+ * This file contains interrupt related functions.
+ */
+
+#include <linux/kernel_stat.h>
+#include <linux/interrupt.h>
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
+#include <linux/profile.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/ftrace.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/irq.h>
+#include <asm/irq_regs.h>
+#include <asm/cputime.h>
+#include <asm/lowcore.h>
+#include <asm/irq.h>
+#include <asm/hw_irq.h>
+#include "entry.h"
+
+DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat);
+EXPORT_PER_CPU_SYMBOL_GPL(irq_stat);
+
+struct irq_class {
+ int irq;
+ char *name;
+ char *desc;
+};
+
+/*
+ * The list of "main" irq classes on s390. This is the list of interrupts
+ * that appear both in /proc/stat ("intr" line) and /proc/interrupts.
+ * Historically only external and I/O interrupts have been part of /proc/stat.
+ * We can't add the split external and I/O sub classes since the first field
+ * in the "intr" line in /proc/stat is supposed to be the sum of all other
+ * fields.
+ * Since the external and I/O interrupt fields are already sums we would end
+ * up with having a sum which accounts each interrupt twice.
+ */
+static const struct irq_class irqclass_main_desc[NR_IRQS_BASE] = {
+ {.irq = EXT_INTERRUPT, .name = "EXT"},
+ {.irq = IO_INTERRUPT, .name = "I/O"},
+ {.irq = THIN_INTERRUPT, .name = "AIO"},
+};
+
+/*
+ * The list of split external and I/O interrupts that appear only in
+ * /proc/interrupts.
+ * In addition this list contains non external / I/O events like NMIs.
+ */
+static const struct irq_class irqclass_sub_desc[] = {
+ {.irq = IRQEXT_CLK, .name = "CLK", .desc = "[EXT] Clock Comparator"},
+ {.irq = IRQEXT_EXC, .name = "EXC", .desc = "[EXT] External Call"},
+ {.irq = IRQEXT_EMS, .name = "EMS", .desc = "[EXT] Emergency Signal"},
+ {.irq = IRQEXT_TMR, .name = "TMR", .desc = "[EXT] CPU Timer"},
+ {.irq = IRQEXT_TLA, .name = "TAL", .desc = "[EXT] Timing Alert"},
+ {.irq = IRQEXT_PFL, .name = "PFL", .desc = "[EXT] Pseudo Page Fault"},
+ {.irq = IRQEXT_DSD, .name = "DSD", .desc = "[EXT] DASD Diag"},
+ {.irq = IRQEXT_VRT, .name = "VRT", .desc = "[EXT] Virtio"},
+ {.irq = IRQEXT_SCP, .name = "SCP", .desc = "[EXT] Service Call"},
+ {.irq = IRQEXT_IUC, .name = "IUC", .desc = "[EXT] IUCV"},
+ {.irq = IRQEXT_CMS, .name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling"},
+ {.irq = IRQEXT_CMC, .name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"},
+ {.irq = IRQEXT_FTP, .name = "FTP", .desc = "[EXT] HMC FTP Service"},
+ {.irq = IRQIO_CIO, .name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"},
+ {.irq = IRQIO_QAI, .name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt"},
+ {.irq = IRQIO_DAS, .name = "DAS", .desc = "[I/O] DASD"},
+ {.irq = IRQIO_C15, .name = "C15", .desc = "[I/O] 3215"},
+ {.irq = IRQIO_C70, .name = "C70", .desc = "[I/O] 3270"},
+ {.irq = IRQIO_TAP, .name = "TAP", .desc = "[I/O] Tape"},
+ {.irq = IRQIO_VMR, .name = "VMR", .desc = "[I/O] Unit Record Devices"},
+ {.irq = IRQIO_LCS, .name = "LCS", .desc = "[I/O] LCS"},
+ {.irq = IRQIO_CTC, .name = "CTC", .desc = "[I/O] CTC"},
+ {.irq = IRQIO_APB, .name = "APB", .desc = "[I/O] AP Bus"},
+ {.irq = IRQIO_ADM, .name = "ADM", .desc = "[I/O] EADM Subchannel"},
+ {.irq = IRQIO_CSC, .name = "CSC", .desc = "[I/O] CHSC Subchannel"},
+ {.irq = IRQIO_PCI, .name = "PCI", .desc = "[I/O] PCI Interrupt" },
+ {.irq = IRQIO_MSI, .name = "MSI", .desc = "[I/O] MSI Interrupt" },
+ {.irq = IRQIO_VIR, .name = "VIR", .desc = "[I/O] Virtual I/O Devices"},
+ {.irq = IRQIO_VAI, .name = "VAI", .desc = "[I/O] Virtual I/O Devices AI"},
+ {.irq = NMI_NMI, .name = "NMI", .desc = "[NMI] Machine Check"},
+ {.irq = CPU_RST, .name = "RST", .desc = "[CPU] CPU Restart"},
+};
+
+void __init init_IRQ(void)
+{
+ BUILD_BUG_ON(ARRAY_SIZE(irqclass_sub_desc) != NR_ARCH_IRQS);
+ init_cio_interrupts();
+ init_airq_interrupts();
+ init_ext_interrupts();
+}
+
+void do_IRQ(struct pt_regs *regs, int irq)
+{
+ struct pt_regs *old_regs;
+
+ old_regs = set_irq_regs(regs);
+ irq_enter();
+ if (tod_after_eq(S390_lowcore.int_clock,
+ S390_lowcore.clock_comparator))
+ /* Serve timer interrupts first. */
+ clock_comparator_work();
+ generic_handle_irq(irq);
+ irq_exit();
+ set_irq_regs(old_regs);
+}
+
+/*
+ * show_interrupts is needed by /proc/interrupts.
+ */
+int show_interrupts(struct seq_file *p, void *v)
+{
+ int index = *(loff_t *) v;
+ int cpu, irq;
+
+ get_online_cpus();
+ if (index == 0) {
+ seq_puts(p, " ");
+ for_each_online_cpu(cpu)
+ seq_printf(p, "CPU%d ", cpu);
+ seq_putc(p, '\n');
+ }
+ if (index < NR_IRQS_BASE) {
+ seq_printf(p, "%s: ", irqclass_main_desc[index].name);
+ irq = irqclass_main_desc[index].irq;
+ for_each_online_cpu(cpu)
+ seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu));
+ seq_putc(p, '\n');
+ goto out;
+ }
+ if (index > NR_IRQS_BASE)
+ goto out;
+
+ for (index = 0; index < NR_ARCH_IRQS; index++) {
+ seq_printf(p, "%s: ", irqclass_sub_desc[index].name);
+ irq = irqclass_sub_desc[index].irq;
+ for_each_online_cpu(cpu)
+ seq_printf(p, "%10u ",
+ per_cpu(irq_stat, cpu).irqs[irq]);
+ if (irqclass_sub_desc[index].desc)
+ seq_printf(p, " %s", irqclass_sub_desc[index].desc);
+ seq_putc(p, '\n');
+ }
+out:
+ put_online_cpus();
+ return 0;
+}
+
+unsigned int arch_dynirq_lower_bound(unsigned int from)
+{
+ return from < NR_IRQS_BASE ? NR_IRQS_BASE : from;
+}
+
+/*
+ * Switch to the asynchronous interrupt stack for softirq execution.
+ */
+void do_softirq_own_stack(void)
+{
+ unsigned long old, new;
+
+ old = current_stack_pointer();
+ /* Check against async. stack address range. */
+ new = S390_lowcore.async_stack;
+ if (((new - old) >> (PAGE_SHIFT + THREAD_SIZE_ORDER)) != 0) {
+ /* Need to switch to the async. stack. */
+ new -= STACK_FRAME_OVERHEAD;
+ ((struct stack_frame *) new)->back_chain = old;
+ asm volatile(" la 15,0(%0)\n"
+ " brasl 14,__do_softirq\n"
+ " la 15,0(%1)\n"
+ : : "a" (new), "a" (old)
+ : "0", "1", "2", "3", "4", "5", "14",
+ "cc", "memory" );
+ } else {
+ /* We are already on the async stack. */
+ __do_softirq();
+ }
+}
+
+/*
+ * ext_int_hash[index] is the list head for all external interrupts that hash
+ * to this index.
+ */
+static struct hlist_head ext_int_hash[32] ____cacheline_aligned;
+
+struct ext_int_info {
+ ext_int_handler_t handler;
+ struct hlist_node entry;
+ struct rcu_head rcu;
+ u16 code;
+};
+
+/* ext_int_hash_lock protects the handler lists for external interrupts */
+static DEFINE_SPINLOCK(ext_int_hash_lock);
+
+static inline int ext_hash(u16 code)
+{
+ BUILD_BUG_ON(!is_power_of_2(ARRAY_SIZE(ext_int_hash)));
+
+ return (code + (code >> 9)) & (ARRAY_SIZE(ext_int_hash) - 1);
+}
+
+int register_external_irq(u16 code, ext_int_handler_t handler)
+{
+ struct ext_int_info *p;
+ unsigned long flags;
+ int index;
+
+ p = kmalloc(sizeof(*p), GFP_ATOMIC);
+ if (!p)
+ return -ENOMEM;
+ p->code = code;
+ p->handler = handler;
+ index = ext_hash(code);
+
+ spin_lock_irqsave(&ext_int_hash_lock, flags);
+ hlist_add_head_rcu(&p->entry, &ext_int_hash[index]);
+ spin_unlock_irqrestore(&ext_int_hash_lock, flags);
+ return 0;
+}
+EXPORT_SYMBOL(register_external_irq);
+
+int unregister_external_irq(u16 code, ext_int_handler_t handler)
+{
+ struct ext_int_info *p;
+ unsigned long flags;
+ int index = ext_hash(code);
+
+ spin_lock_irqsave(&ext_int_hash_lock, flags);
+ hlist_for_each_entry_rcu(p, &ext_int_hash[index], entry) {
+ if (p->code == code && p->handler == handler) {
+ hlist_del_rcu(&p->entry);
+ kfree_rcu(p, rcu);
+ }
+ }
+ spin_unlock_irqrestore(&ext_int_hash_lock, flags);
+ return 0;
+}
+EXPORT_SYMBOL(unregister_external_irq);
+
+static irqreturn_t do_ext_interrupt(int irq, void *dummy)
+{
+ struct pt_regs *regs = get_irq_regs();
+ struct ext_code ext_code;
+ struct ext_int_info *p;
+ int index;
+
+ ext_code = *(struct ext_code *) &regs->int_code;
+ if (ext_code.code != EXT_IRQ_CLK_COMP)
+ set_cpu_flag(CIF_NOHZ_DELAY);
+
+ index = ext_hash(ext_code.code);
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(p, &ext_int_hash[index], entry) {
+ if (unlikely(p->code != ext_code.code))
+ continue;
+ p->handler(ext_code, regs->int_parm, regs->int_parm_long);
+ }
+ rcu_read_unlock();
+ return IRQ_HANDLED;
+}
+
+static struct irqaction external_interrupt = {
+ .name = "EXT",
+ .handler = do_ext_interrupt,
+};
+
+void __init init_ext_interrupts(void)
+{
+ int idx;
+
+ for (idx = 0; idx < ARRAY_SIZE(ext_int_hash); idx++)
+ INIT_HLIST_HEAD(&ext_int_hash[idx]);
+
+ irq_set_chip_and_handler(EXT_INTERRUPT,
+ &dummy_irq_chip, handle_percpu_irq);
+ setup_irq(EXT_INTERRUPT, &external_interrupt);
+}
+
+static DEFINE_SPINLOCK(irq_subclass_lock);
+static unsigned char irq_subclass_refcount[64];
+
+void irq_subclass_register(enum irq_subclass subclass)
+{
+ spin_lock(&irq_subclass_lock);
+ if (!irq_subclass_refcount[subclass])
+ ctl_set_bit(0, subclass);
+ irq_subclass_refcount[subclass]++;
+ spin_unlock(&irq_subclass_lock);
+}
+EXPORT_SYMBOL(irq_subclass_register);
+
+void irq_subclass_unregister(enum irq_subclass subclass)
+{
+ spin_lock(&irq_subclass_lock);
+ irq_subclass_refcount[subclass]--;
+ if (!irq_subclass_refcount[subclass])
+ ctl_clear_bit(0, subclass);
+ spin_unlock(&irq_subclass_lock);
+}
+EXPORT_SYMBOL(irq_subclass_unregister);
diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c
new file mode 100644
index 000000000..10009a0cd
--- /dev/null
+++ b/arch/s390/kernel/jump_label.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Jump label s390 support
+ *
+ * Copyright IBM Corp. 2011
+ * Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+#include <linux/uaccess.h>
+#include <linux/stop_machine.h>
+#include <linux/jump_label.h>
+#include <asm/ipl.h>
+
+struct insn {
+ u16 opcode;
+ s32 offset;
+} __packed;
+
+struct insn_args {
+ struct jump_entry *entry;
+ enum jump_label_type type;
+};
+
+static void jump_label_make_nop(struct jump_entry *entry, struct insn *insn)
+{
+ /* brcl 0,0 */
+ insn->opcode = 0xc004;
+ insn->offset = 0;
+}
+
+static void jump_label_make_branch(struct jump_entry *entry, struct insn *insn)
+{
+ /* brcl 15,offset */
+ insn->opcode = 0xc0f4;
+ insn->offset = (entry->target - entry->code) >> 1;
+}
+
+static void jump_label_bug(struct jump_entry *entry, struct insn *expected,
+ struct insn *new)
+{
+ unsigned char *ipc = (unsigned char *)entry->code;
+ unsigned char *ipe = (unsigned char *)expected;
+ unsigned char *ipn = (unsigned char *)new;
+
+ pr_emerg("Jump label code mismatch at %pS [%px]\n", ipc, ipc);
+ pr_emerg("Found: %6ph\n", ipc);
+ pr_emerg("Expected: %6ph\n", ipe);
+ pr_emerg("New: %6ph\n", ipn);
+ panic("Corrupted kernel text");
+}
+
+static struct insn orignop = {
+ .opcode = 0xc004,
+ .offset = JUMP_LABEL_NOP_OFFSET >> 1,
+};
+
+static void __jump_label_transform(struct jump_entry *entry,
+ enum jump_label_type type,
+ int init)
+{
+ struct insn old, new;
+
+ if (type == JUMP_LABEL_JMP) {
+ jump_label_make_nop(entry, &old);
+ jump_label_make_branch(entry, &new);
+ } else {
+ jump_label_make_branch(entry, &old);
+ jump_label_make_nop(entry, &new);
+ }
+ if (init) {
+ if (memcmp((void *)entry->code, &orignop, sizeof(orignop)))
+ jump_label_bug(entry, &orignop, &new);
+ } else {
+ if (memcmp((void *)entry->code, &old, sizeof(old)))
+ jump_label_bug(entry, &old, &new);
+ }
+ s390_kernel_write((void *)entry->code, &new, sizeof(new));
+}
+
+static int __sm_arch_jump_label_transform(void *data)
+{
+ struct insn_args *args = data;
+
+ __jump_label_transform(args->entry, args->type, 0);
+ return 0;
+}
+
+void arch_jump_label_transform(struct jump_entry *entry,
+ enum jump_label_type type)
+{
+ struct insn_args args;
+
+ args.entry = entry;
+ args.type = type;
+
+ stop_machine_cpuslocked(__sm_arch_jump_label_transform, &args, NULL);
+}
+
+void arch_jump_label_transform_static(struct jump_entry *entry,
+ enum jump_label_type type)
+{
+ __jump_label_transform(entry, type, 1);
+}
diff --git a/arch/s390/kernel/kdebugfs.c b/arch/s390/kernel/kdebugfs.c
new file mode 100644
index 000000000..2c46bd6c6
--- /dev/null
+++ b/arch/s390/kernel/kdebugfs.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/debugfs.h>
+#include <linux/export.h>
+#include <linux/init.h>
+
+struct dentry *arch_debugfs_dir;
+EXPORT_SYMBOL(arch_debugfs_dir);
+
+static int __init arch_kdebugfs_init(void)
+{
+ arch_debugfs_dir = debugfs_create_dir("s390", NULL);
+ if (IS_ERR(arch_debugfs_dir))
+ arch_debugfs_dir = NULL;
+ return 0;
+}
+postcore_initcall(arch_kdebugfs_init);
diff --git a/arch/s390/kernel/kexec_elf.c b/arch/s390/kernel/kexec_elf.c
new file mode 100644
index 000000000..5cf340b77
--- /dev/null
+++ b/arch/s390/kernel/kexec_elf.c
@@ -0,0 +1,152 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ELF loader for kexec_file_load system call.
+ *
+ * Copyright IBM Corp. 2018
+ *
+ * Author(s): Philipp Rudo <prudo@linux.vnet.ibm.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/kexec.h>
+#include <asm/setup.h>
+
+static int kexec_file_add_elf_kernel(struct kimage *image,
+ struct s390_load_data *data,
+ char *kernel, unsigned long kernel_len)
+{
+ struct kexec_buf buf;
+ const Elf_Ehdr *ehdr;
+ const Elf_Phdr *phdr;
+ Elf_Addr entry;
+ int i, ret;
+
+ ehdr = (Elf_Ehdr *)kernel;
+ buf.image = image;
+ if (image->type == KEXEC_TYPE_CRASH)
+ entry = STARTUP_KDUMP_OFFSET;
+ else
+ entry = ehdr->e_entry;
+
+ phdr = (void *)ehdr + ehdr->e_phoff;
+ for (i = 0; i < ehdr->e_phnum; i++, phdr++) {
+ if (phdr->p_type != PT_LOAD)
+ continue;
+
+ buf.buffer = kernel + phdr->p_offset;
+ buf.bufsz = phdr->p_filesz;
+
+ buf.mem = ALIGN(phdr->p_paddr, phdr->p_align);
+ buf.memsz = phdr->p_memsz;
+
+ if (entry - phdr->p_paddr < phdr->p_memsz) {
+ data->kernel_buf = buf.buffer;
+ data->memsz += STARTUP_NORMAL_OFFSET;
+
+ buf.buffer += STARTUP_NORMAL_OFFSET;
+ buf.bufsz -= STARTUP_NORMAL_OFFSET;
+
+ buf.mem += STARTUP_NORMAL_OFFSET;
+ buf.memsz -= STARTUP_NORMAL_OFFSET;
+ }
+
+ if (image->type == KEXEC_TYPE_CRASH)
+ buf.mem += crashk_res.start;
+
+ ret = kexec_add_buffer(&buf);
+ if (ret)
+ return ret;
+
+ data->memsz = ALIGN(data->memsz, phdr->p_align) + buf.memsz;
+ }
+
+ return 0;
+}
+
+static void *s390_elf_load(struct kimage *image,
+ char *kernel, unsigned long kernel_len,
+ char *initrd, unsigned long initrd_len,
+ char *cmdline, unsigned long cmdline_len)
+{
+ struct s390_load_data data = {0};
+ const Elf_Ehdr *ehdr;
+ const Elf_Phdr *phdr;
+ size_t size;
+ int i, ret;
+
+ /* image->fobs->probe already checked for valid ELF magic number. */
+ ehdr = (Elf_Ehdr *)kernel;
+
+ if (ehdr->e_type != ET_EXEC ||
+ ehdr->e_ident[EI_CLASS] != ELFCLASS64 ||
+ !elf_check_arch(ehdr))
+ return ERR_PTR(-EINVAL);
+
+ if (!ehdr->e_phnum || ehdr->e_phentsize != sizeof(Elf_Phdr))
+ return ERR_PTR(-EINVAL);
+
+ size = ehdr->e_ehsize + ehdr->e_phoff;
+ size += ehdr->e_phentsize * ehdr->e_phnum;
+ if (size > kernel_len)
+ return ERR_PTR(-EINVAL);
+
+ phdr = (void *)ehdr + ehdr->e_phoff;
+ size = ALIGN(size, phdr->p_align);
+ for (i = 0; i < ehdr->e_phnum; i++, phdr++) {
+ if (phdr->p_type == PT_INTERP)
+ return ERR_PTR(-EINVAL);
+
+ if (phdr->p_offset > kernel_len)
+ return ERR_PTR(-EINVAL);
+
+ size += ALIGN(phdr->p_filesz, phdr->p_align);
+ }
+
+ if (size > kernel_len)
+ return ERR_PTR(-EINVAL);
+
+ ret = kexec_file_add_elf_kernel(image, &data, kernel, kernel_len);
+ if (ret)
+ return ERR_PTR(ret);
+
+ if (!data.memsz)
+ return ERR_PTR(-EINVAL);
+
+ if (initrd) {
+ ret = kexec_file_add_initrd(image, &data, initrd, initrd_len);
+ if (ret)
+ return ERR_PTR(ret);
+ }
+
+ ret = kexec_file_add_purgatory(image, &data);
+ if (ret)
+ return ERR_PTR(ret);
+
+ return kexec_file_update_kernel(image, &data);
+}
+
+static int s390_elf_probe(const char *buf, unsigned long len)
+{
+ const Elf_Ehdr *ehdr;
+
+ if (len < sizeof(Elf_Ehdr))
+ return -ENOEXEC;
+
+ ehdr = (Elf_Ehdr *)buf;
+
+ /* Only check the ELF magic number here and do proper validity check
+ * in the loader. Any check here that fails would send the erroneous
+ * ELF file to the image loader that does not care what it gets.
+ * (Most likely) causing behavior not intended by the user.
+ */
+ if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0)
+ return -ENOEXEC;
+
+ return 0;
+}
+
+const struct kexec_file_ops s390_kexec_elf_ops = {
+ .probe = s390_elf_probe,
+ .load = s390_elf_load,
+};
diff --git a/arch/s390/kernel/kexec_image.c b/arch/s390/kernel/kexec_image.c
new file mode 100644
index 000000000..380085259
--- /dev/null
+++ b/arch/s390/kernel/kexec_image.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Image loader for kexec_file_load system call.
+ *
+ * Copyright IBM Corp. 2018
+ *
+ * Author(s): Philipp Rudo <prudo@linux.vnet.ibm.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/kexec.h>
+#include <asm/setup.h>
+
+static int kexec_file_add_image_kernel(struct kimage *image,
+ struct s390_load_data *data,
+ char *kernel, unsigned long kernel_len)
+{
+ struct kexec_buf buf;
+ int ret;
+
+ buf.image = image;
+
+ buf.buffer = kernel + STARTUP_NORMAL_OFFSET;
+ buf.bufsz = kernel_len - STARTUP_NORMAL_OFFSET;
+
+ buf.mem = STARTUP_NORMAL_OFFSET;
+ if (image->type == KEXEC_TYPE_CRASH)
+ buf.mem += crashk_res.start;
+ buf.memsz = buf.bufsz;
+
+ ret = kexec_add_buffer(&buf);
+
+ data->kernel_buf = kernel;
+ data->memsz += buf.memsz + STARTUP_NORMAL_OFFSET;
+
+ return ret;
+}
+
+static void *s390_image_load(struct kimage *image,
+ char *kernel, unsigned long kernel_len,
+ char *initrd, unsigned long initrd_len,
+ char *cmdline, unsigned long cmdline_len)
+{
+ struct s390_load_data data = {0};
+ int ret;
+
+ ret = kexec_file_add_image_kernel(image, &data, kernel, kernel_len);
+ if (ret)
+ return ERR_PTR(ret);
+
+ if (initrd) {
+ ret = kexec_file_add_initrd(image, &data, initrd, initrd_len);
+ if (ret)
+ return ERR_PTR(ret);
+ }
+
+ ret = kexec_file_add_purgatory(image, &data);
+ if (ret)
+ return ERR_PTR(ret);
+
+ return kexec_file_update_kernel(image, &data);
+}
+
+static int s390_image_probe(const char *buf, unsigned long len)
+{
+ /* Can't reliably tell if an image is valid. Therefore give the
+ * user whatever he wants.
+ */
+ return 0;
+}
+
+const struct kexec_file_ops s390_kexec_image_ops = {
+ .probe = s390_image_probe,
+ .load = s390_image_load,
+};
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
new file mode 100644
index 000000000..7c0a095e9
--- /dev/null
+++ b/arch/s390/kernel/kprobes.c
@@ -0,0 +1,658 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Kernel Probes (KProbes)
+ *
+ * Copyright IBM Corp. 2002, 2006
+ *
+ * s390 port, used ppc64 as template. Mike Grundy <grundym@us.ibm.com>
+ */
+
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+#include <linux/preempt.h>
+#include <linux/stop_machine.h>
+#include <linux/kdebug.h>
+#include <linux/uaccess.h>
+#include <linux/extable.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/hardirq.h>
+#include <linux/ftrace.h>
+#include <asm/set_memory.h>
+#include <asm/sections.h>
+#include <asm/dis.h>
+
+DEFINE_PER_CPU(struct kprobe *, current_kprobe);
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+
+struct kretprobe_blackpoint kretprobe_blacklist[] = { };
+
+DEFINE_INSN_CACHE_OPS(dmainsn);
+
+static void *alloc_dmainsn_page(void)
+{
+ void *page;
+
+ page = (void *) __get_free_page(GFP_KERNEL | GFP_DMA);
+ if (page)
+ set_memory_x((unsigned long) page, 1);
+ return page;
+}
+
+static void free_dmainsn_page(void *page)
+{
+ set_memory_nx((unsigned long) page, 1);
+ free_page((unsigned long)page);
+}
+
+struct kprobe_insn_cache kprobe_dmainsn_slots = {
+ .mutex = __MUTEX_INITIALIZER(kprobe_dmainsn_slots.mutex),
+ .alloc = alloc_dmainsn_page,
+ .free = free_dmainsn_page,
+ .pages = LIST_HEAD_INIT(kprobe_dmainsn_slots.pages),
+ .insn_size = MAX_INSN_SIZE,
+};
+
+static void copy_instruction(struct kprobe *p)
+{
+ unsigned long ip = (unsigned long) p->addr;
+ s64 disp, new_disp;
+ u64 addr, new_addr;
+
+ if (ftrace_location(ip) == ip) {
+ /*
+ * If kprobes patches the instruction that is morphed by
+ * ftrace make sure that kprobes always sees the branch
+ * "jg .+24" that skips the mcount block or the "brcl 0,0"
+ * in case of hotpatch.
+ */
+ ftrace_generate_nop_insn((struct ftrace_insn *)p->ainsn.insn);
+ p->ainsn.is_ftrace_insn = 1;
+ } else
+ memcpy(p->ainsn.insn, p->addr, insn_length(*p->addr >> 8));
+ p->opcode = p->ainsn.insn[0];
+ if (!probe_is_insn_relative_long(p->ainsn.insn))
+ return;
+ /*
+ * For pc-relative instructions in RIL-b or RIL-c format patch the
+ * RI2 displacement field. We have already made sure that the insn
+ * slot for the patched instruction is within the same 2GB area
+ * as the original instruction (either kernel image or module area).
+ * Therefore the new displacement will always fit.
+ */
+ disp = *(s32 *)&p->ainsn.insn[1];
+ addr = (u64)(unsigned long)p->addr;
+ new_addr = (u64)(unsigned long)p->ainsn.insn;
+ new_disp = ((addr + (disp * 2)) - new_addr) / 2;
+ *(s32 *)&p->ainsn.insn[1] = new_disp;
+}
+NOKPROBE_SYMBOL(copy_instruction);
+
+static inline int is_kernel_addr(void *addr)
+{
+ return addr < (void *)_end;
+}
+
+static int s390_get_insn_slot(struct kprobe *p)
+{
+ /*
+ * Get an insn slot that is within the same 2GB area like the original
+ * instruction. That way instructions with a 32bit signed displacement
+ * field can be patched and executed within the insn slot.
+ */
+ p->ainsn.insn = NULL;
+ if (is_kernel_addr(p->addr))
+ p->ainsn.insn = get_dmainsn_slot();
+ else if (is_module_addr(p->addr))
+ p->ainsn.insn = get_insn_slot();
+ return p->ainsn.insn ? 0 : -ENOMEM;
+}
+NOKPROBE_SYMBOL(s390_get_insn_slot);
+
+static void s390_free_insn_slot(struct kprobe *p)
+{
+ if (!p->ainsn.insn)
+ return;
+ if (is_kernel_addr(p->addr))
+ free_dmainsn_slot(p->ainsn.insn, 0);
+ else
+ free_insn_slot(p->ainsn.insn, 0);
+ p->ainsn.insn = NULL;
+}
+NOKPROBE_SYMBOL(s390_free_insn_slot);
+
+int arch_prepare_kprobe(struct kprobe *p)
+{
+ if ((unsigned long) p->addr & 0x01)
+ return -EINVAL;
+ /* Make sure the probe isn't going on a difficult instruction */
+ if (probe_is_prohibited_opcode(p->addr))
+ return -EINVAL;
+ if (s390_get_insn_slot(p))
+ return -ENOMEM;
+ copy_instruction(p);
+ return 0;
+}
+NOKPROBE_SYMBOL(arch_prepare_kprobe);
+
+int arch_check_ftrace_location(struct kprobe *p)
+{
+ return 0;
+}
+
+struct swap_insn_args {
+ struct kprobe *p;
+ unsigned int arm_kprobe : 1;
+};
+
+static int swap_instruction(void *data)
+{
+ struct swap_insn_args *args = data;
+ struct ftrace_insn new_insn, *insn;
+ struct kprobe *p = args->p;
+ size_t len;
+
+ new_insn.opc = args->arm_kprobe ? BREAKPOINT_INSTRUCTION : p->opcode;
+ len = sizeof(new_insn.opc);
+ if (!p->ainsn.is_ftrace_insn)
+ goto skip_ftrace;
+ len = sizeof(new_insn);
+ insn = (struct ftrace_insn *) p->addr;
+ if (args->arm_kprobe) {
+ if (is_ftrace_nop(insn))
+ new_insn.disp = KPROBE_ON_FTRACE_NOP;
+ else
+ new_insn.disp = KPROBE_ON_FTRACE_CALL;
+ } else {
+ ftrace_generate_call_insn(&new_insn, (unsigned long)p->addr);
+ if (insn->disp == KPROBE_ON_FTRACE_NOP)
+ ftrace_generate_nop_insn(&new_insn);
+ }
+skip_ftrace:
+ s390_kernel_write(p->addr, &new_insn, len);
+ return 0;
+}
+NOKPROBE_SYMBOL(swap_instruction);
+
+void arch_arm_kprobe(struct kprobe *p)
+{
+ struct swap_insn_args args = {.p = p, .arm_kprobe = 1};
+
+ stop_machine_cpuslocked(swap_instruction, &args, NULL);
+}
+NOKPROBE_SYMBOL(arch_arm_kprobe);
+
+void arch_disarm_kprobe(struct kprobe *p)
+{
+ struct swap_insn_args args = {.p = p, .arm_kprobe = 0};
+
+ stop_machine_cpuslocked(swap_instruction, &args, NULL);
+}
+NOKPROBE_SYMBOL(arch_disarm_kprobe);
+
+void arch_remove_kprobe(struct kprobe *p)
+{
+ s390_free_insn_slot(p);
+}
+NOKPROBE_SYMBOL(arch_remove_kprobe);
+
+static void enable_singlestep(struct kprobe_ctlblk *kcb,
+ struct pt_regs *regs,
+ unsigned long ip)
+{
+ struct per_regs per_kprobe;
+
+ /* Set up the PER control registers %cr9-%cr11 */
+ per_kprobe.control = PER_EVENT_IFETCH;
+ per_kprobe.start = ip;
+ per_kprobe.end = ip;
+
+ /* Save control regs and psw mask */
+ __ctl_store(kcb->kprobe_saved_ctl, 9, 11);
+ kcb->kprobe_saved_imask = regs->psw.mask &
+ (PSW_MASK_PER | PSW_MASK_IO | PSW_MASK_EXT);
+
+ /* Set PER control regs, turns on single step for the given address */
+ __ctl_load(per_kprobe, 9, 11);
+ regs->psw.mask |= PSW_MASK_PER;
+ regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT);
+ regs->psw.addr = ip;
+}
+NOKPROBE_SYMBOL(enable_singlestep);
+
+static void disable_singlestep(struct kprobe_ctlblk *kcb,
+ struct pt_regs *regs,
+ unsigned long ip)
+{
+ /* Restore control regs and psw mask, set new psw address */
+ __ctl_load(kcb->kprobe_saved_ctl, 9, 11);
+ regs->psw.mask &= ~PSW_MASK_PER;
+ regs->psw.mask |= kcb->kprobe_saved_imask;
+ regs->psw.addr = ip;
+}
+NOKPROBE_SYMBOL(disable_singlestep);
+
+/*
+ * Activate a kprobe by storing its pointer to current_kprobe. The
+ * previous kprobe is stored in kcb->prev_kprobe. A stack of up to
+ * two kprobes can be active, see KPROBE_REENTER.
+ */
+static void push_kprobe(struct kprobe_ctlblk *kcb, struct kprobe *p)
+{
+ kcb->prev_kprobe.kp = __this_cpu_read(current_kprobe);
+ kcb->prev_kprobe.status = kcb->kprobe_status;
+ __this_cpu_write(current_kprobe, p);
+}
+NOKPROBE_SYMBOL(push_kprobe);
+
+/*
+ * Deactivate a kprobe by backing up to the previous state. If the
+ * current state is KPROBE_REENTER prev_kprobe.kp will be non-NULL,
+ * for any other state prev_kprobe.kp will be NULL.
+ */
+static void pop_kprobe(struct kprobe_ctlblk *kcb)
+{
+ __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
+ kcb->kprobe_status = kcb->prev_kprobe.status;
+}
+NOKPROBE_SYMBOL(pop_kprobe);
+
+void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
+{
+ ri->ret_addr = (kprobe_opcode_t *) regs->gprs[14];
+
+ /* Replace the return addr with trampoline addr */
+ regs->gprs[14] = (unsigned long) &kretprobe_trampoline;
+}
+NOKPROBE_SYMBOL(arch_prepare_kretprobe);
+
+static void kprobe_reenter_check(struct kprobe_ctlblk *kcb, struct kprobe *p)
+{
+ switch (kcb->kprobe_status) {
+ case KPROBE_HIT_SSDONE:
+ case KPROBE_HIT_ACTIVE:
+ kprobes_inc_nmissed_count(p);
+ break;
+ case KPROBE_HIT_SS:
+ case KPROBE_REENTER:
+ default:
+ /*
+ * A kprobe on the code path to single step an instruction
+ * is a BUG. The code path resides in the .kprobes.text
+ * section and is executed with interrupts disabled.
+ */
+ pr_err("Invalid kprobe detected.\n");
+ dump_kprobe(p);
+ BUG();
+ }
+}
+NOKPROBE_SYMBOL(kprobe_reenter_check);
+
+static int kprobe_handler(struct pt_regs *regs)
+{
+ struct kprobe_ctlblk *kcb;
+ struct kprobe *p;
+
+ /*
+ * We want to disable preemption for the entire duration of kprobe
+ * processing. That includes the calls to the pre/post handlers
+ * and single stepping the kprobe instruction.
+ */
+ preempt_disable();
+ kcb = get_kprobe_ctlblk();
+ p = get_kprobe((void *)(regs->psw.addr - 2));
+
+ if (p) {
+ if (kprobe_running()) {
+ /*
+ * We have hit a kprobe while another is still
+ * active. This can happen in the pre and post
+ * handler. Single step the instruction of the
+ * new probe but do not call any handler function
+ * of this secondary kprobe.
+ * push_kprobe and pop_kprobe saves and restores
+ * the currently active kprobe.
+ */
+ kprobe_reenter_check(kcb, p);
+ push_kprobe(kcb, p);
+ kcb->kprobe_status = KPROBE_REENTER;
+ } else {
+ /*
+ * If we have no pre-handler or it returned 0, we
+ * continue with single stepping. If we have a
+ * pre-handler and it returned non-zero, it prepped
+ * for changing execution path, so get out doing
+ * nothing more here.
+ */
+ push_kprobe(kcb, p);
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+ if (p->pre_handler && p->pre_handler(p, regs)) {
+ pop_kprobe(kcb);
+ preempt_enable_no_resched();
+ return 1;
+ }
+ kcb->kprobe_status = KPROBE_HIT_SS;
+ }
+ enable_singlestep(kcb, regs, (unsigned long) p->ainsn.insn);
+ return 1;
+ } /* else:
+ * No kprobe at this address and no active kprobe. The trap has
+ * not been caused by a kprobe breakpoint. The race of breakpoint
+ * vs. kprobe remove does not exist because on s390 as we use
+ * stop_machine to arm/disarm the breakpoints.
+ */
+ preempt_enable_no_resched();
+ return 0;
+}
+NOKPROBE_SYMBOL(kprobe_handler);
+
+/*
+ * Function return probe trampoline:
+ * - init_kprobes() establishes a probepoint here
+ * - When the probed function returns, this probe
+ * causes the handlers to fire
+ */
+static void __used kretprobe_trampoline_holder(void)
+{
+ asm volatile(".global kretprobe_trampoline\n"
+ "kretprobe_trampoline: bcr 0,0\n");
+}
+
+/*
+ * Called when the probe at kretprobe trampoline is hit
+ */
+static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ struct kretprobe_instance *ri;
+ struct hlist_head *head, empty_rp;
+ struct hlist_node *tmp;
+ unsigned long flags, orig_ret_address;
+ unsigned long trampoline_address;
+ kprobe_opcode_t *correct_ret_addr;
+
+ INIT_HLIST_HEAD(&empty_rp);
+ kretprobe_hash_lock(current, &head, &flags);
+
+ /*
+ * It is possible to have multiple instances associated with a given
+ * task either because an multiple functions in the call path
+ * have a return probe installed on them, and/or more than one return
+ * return probe was registered for a target function.
+ *
+ * We can handle this because:
+ * - instances are always inserted at the head of the list
+ * - when multiple return probes are registered for the same
+ * function, the first instance's ret_addr will point to the
+ * real return address, and all the rest will point to
+ * kretprobe_trampoline
+ */
+ ri = NULL;
+ orig_ret_address = 0;
+ correct_ret_addr = NULL;
+ trampoline_address = (unsigned long) &kretprobe_trampoline;
+ hlist_for_each_entry_safe(ri, tmp, head, hlist) {
+ if (ri->task != current)
+ /* another task is sharing our hash bucket */
+ continue;
+
+ orig_ret_address = (unsigned long) ri->ret_addr;
+
+ if (orig_ret_address != trampoline_address)
+ /*
+ * This is the real return address. Any other
+ * instances associated with this task are for
+ * other calls deeper on the call stack
+ */
+ break;
+ }
+
+ kretprobe_assert(ri, orig_ret_address, trampoline_address);
+
+ correct_ret_addr = ri->ret_addr;
+ hlist_for_each_entry_safe(ri, tmp, head, hlist) {
+ if (ri->task != current)
+ /* another task is sharing our hash bucket */
+ continue;
+
+ orig_ret_address = (unsigned long) ri->ret_addr;
+
+ if (ri->rp && ri->rp->handler) {
+ ri->ret_addr = correct_ret_addr;
+ ri->rp->handler(ri, regs);
+ }
+
+ recycle_rp_inst(ri, &empty_rp);
+
+ if (orig_ret_address != trampoline_address)
+ /*
+ * This is the real return address. Any other
+ * instances associated with this task are for
+ * other calls deeper on the call stack
+ */
+ break;
+ }
+
+ regs->psw.addr = orig_ret_address;
+
+ kretprobe_hash_unlock(current, &flags);
+
+ hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
+ hlist_del(&ri->hlist);
+ kfree(ri);
+ }
+ /*
+ * By returning a non-zero value, we are telling
+ * kprobe_handler() that we don't want the post_handler
+ * to run (and have re-enabled preemption)
+ */
+ return 1;
+}
+NOKPROBE_SYMBOL(trampoline_probe_handler);
+
+/*
+ * Called after single-stepping. p->addr is the address of the
+ * instruction whose first byte has been replaced by the "breakpoint"
+ * instruction. To avoid the SMP problems that can occur when we
+ * temporarily put back the original opcode to single-step, we
+ * single-stepped a copy of the instruction. The address of this
+ * copy is p->ainsn.insn.
+ */
+static void resume_execution(struct kprobe *p, struct pt_regs *regs)
+{
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+ unsigned long ip = regs->psw.addr;
+ int fixup = probe_get_fixup_type(p->ainsn.insn);
+
+ /* Check if the kprobes location is an enabled ftrace caller */
+ if (p->ainsn.is_ftrace_insn) {
+ struct ftrace_insn *insn = (struct ftrace_insn *) p->addr;
+ struct ftrace_insn call_insn;
+
+ ftrace_generate_call_insn(&call_insn, (unsigned long) p->addr);
+ /*
+ * A kprobe on an enabled ftrace call site actually single
+ * stepped an unconditional branch (ftrace nop equivalent).
+ * Now we need to fixup things and pretend that a brasl r0,...
+ * was executed instead.
+ */
+ if (insn->disp == KPROBE_ON_FTRACE_CALL) {
+ ip += call_insn.disp * 2 - MCOUNT_INSN_SIZE;
+ regs->gprs[0] = (unsigned long)p->addr + sizeof(*insn);
+ }
+ }
+
+ if (fixup & FIXUP_PSW_NORMAL)
+ ip += (unsigned long) p->addr - (unsigned long) p->ainsn.insn;
+
+ if (fixup & FIXUP_BRANCH_NOT_TAKEN) {
+ int ilen = insn_length(p->ainsn.insn[0] >> 8);
+ if (ip - (unsigned long) p->ainsn.insn == ilen)
+ ip = (unsigned long) p->addr + ilen;
+ }
+
+ if (fixup & FIXUP_RETURN_REGISTER) {
+ int reg = (p->ainsn.insn[0] & 0xf0) >> 4;
+ regs->gprs[reg] += (unsigned long) p->addr -
+ (unsigned long) p->ainsn.insn;
+ }
+
+ disable_singlestep(kcb, regs, ip);
+}
+NOKPROBE_SYMBOL(resume_execution);
+
+static int post_kprobe_handler(struct pt_regs *regs)
+{
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+ struct kprobe *p = kprobe_running();
+
+ if (!p)
+ return 0;
+
+ if (kcb->kprobe_status != KPROBE_REENTER && p->post_handler) {
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ p->post_handler(p, regs, 0);
+ }
+
+ resume_execution(p, regs);
+ pop_kprobe(kcb);
+ preempt_enable_no_resched();
+
+ /*
+ * if somebody else is singlestepping across a probe point, psw mask
+ * will have PER set, in which case, continue the remaining processing
+ * of do_single_step, as if this is not a probe hit.
+ */
+ if (regs->psw.mask & PSW_MASK_PER)
+ return 0;
+
+ return 1;
+}
+NOKPROBE_SYMBOL(post_kprobe_handler);
+
+static int kprobe_trap_handler(struct pt_regs *regs, int trapnr)
+{
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+ struct kprobe *p = kprobe_running();
+ const struct exception_table_entry *entry;
+
+ switch(kcb->kprobe_status) {
+ case KPROBE_HIT_SS:
+ case KPROBE_REENTER:
+ /*
+ * We are here because the instruction being single
+ * stepped caused a page fault. We reset the current
+ * kprobe and the nip points back to the probe address
+ * and allow the page fault handler to continue as a
+ * normal page fault.
+ */
+ disable_singlestep(kcb, regs, (unsigned long) p->addr);
+ pop_kprobe(kcb);
+ preempt_enable_no_resched();
+ break;
+ case KPROBE_HIT_ACTIVE:
+ case KPROBE_HIT_SSDONE:
+ /*
+ * We increment the nmissed count for accounting,
+ * we can also use npre/npostfault count for accounting
+ * these specific fault cases.
+ */
+ kprobes_inc_nmissed_count(p);
+
+ /*
+ * We come here because instructions in the pre/post
+ * handler caused the page_fault, this could happen
+ * if handler tries to access user space by
+ * copy_from_user(), get_user() etc. Let the
+ * user-specified handler try to fix it first.
+ */
+ if (p->fault_handler && p->fault_handler(p, regs, trapnr))
+ return 1;
+
+ /*
+ * In case the user-specified fault handler returned
+ * zero, try to fix up.
+ */
+ entry = search_exception_tables(regs->psw.addr);
+ if (entry) {
+ regs->psw.addr = extable_fixup(entry);
+ return 1;
+ }
+
+ /*
+ * fixup_exception() could not handle it,
+ * Let do_page_fault() fix it.
+ */
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+NOKPROBE_SYMBOL(kprobe_trap_handler);
+
+int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+{
+ int ret;
+
+ if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT))
+ local_irq_disable();
+ ret = kprobe_trap_handler(regs, trapnr);
+ if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT))
+ local_irq_restore(regs->psw.mask & ~PSW_MASK_PER);
+ return ret;
+}
+NOKPROBE_SYMBOL(kprobe_fault_handler);
+
+/*
+ * Wrapper routine to for handling exceptions.
+ */
+int kprobe_exceptions_notify(struct notifier_block *self,
+ unsigned long val, void *data)
+{
+ struct die_args *args = (struct die_args *) data;
+ struct pt_regs *regs = args->regs;
+ int ret = NOTIFY_DONE;
+
+ if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT))
+ local_irq_disable();
+
+ switch (val) {
+ case DIE_BPT:
+ if (kprobe_handler(regs))
+ ret = NOTIFY_STOP;
+ break;
+ case DIE_SSTEP:
+ if (post_kprobe_handler(regs))
+ ret = NOTIFY_STOP;
+ break;
+ case DIE_TRAP:
+ if (!preemptible() && kprobe_running() &&
+ kprobe_trap_handler(regs, args->trapnr))
+ ret = NOTIFY_STOP;
+ break;
+ default:
+ break;
+ }
+
+ if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT))
+ local_irq_restore(regs->psw.mask & ~PSW_MASK_PER);
+
+ return ret;
+}
+NOKPROBE_SYMBOL(kprobe_exceptions_notify);
+
+static struct kprobe trampoline = {
+ .addr = (kprobe_opcode_t *) &kretprobe_trampoline,
+ .pre_handler = trampoline_probe_handler
+};
+
+int __init arch_init_kprobes(void)
+{
+ return register_kprobe(&trampoline);
+}
+
+int arch_trampoline_kprobe(struct kprobe *p)
+{
+ return p->addr == (kprobe_opcode_t *) &kretprobe_trampoline;
+}
+NOKPROBE_SYMBOL(arch_trampoline_kprobe);
diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c
new file mode 100644
index 000000000..452502f9a
--- /dev/null
+++ b/arch/s390/kernel/lgr.c
@@ -0,0 +1,188 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Linux Guest Relocation (LGR) detection
+ *
+ * Copyright IBM Corp. 2012
+ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/timer.h>
+#include <linux/slab.h>
+#include <asm/facility.h>
+#include <asm/sysinfo.h>
+#include <asm/ebcdic.h>
+#include <asm/debug.h>
+#include <asm/ipl.h>
+
+#define LGR_TIMER_INTERVAL_SECS (30 * 60)
+#define VM_LEVEL_MAX 2 /* Maximum is 8, but we only record two levels */
+
+/*
+ * LGR info: Contains stfle and stsi data
+ */
+struct lgr_info {
+ /* Bit field with facility information: 4 DWORDs are stored */
+ u64 stfle_fac_list[4];
+ /* Level of system (1 = CEC, 2 = LPAR, 3 = z/VM */
+ u32 level;
+ /* Level 1: CEC info (stsi 1.1.1) */
+ char manufacturer[16];
+ char type[4];
+ char sequence[16];
+ char plant[4];
+ char model[16];
+ /* Level 2: LPAR info (stsi 2.2.2) */
+ u16 lpar_number;
+ char name[8];
+ /* Level 3: VM info (stsi 3.2.2) */
+ u8 vm_count;
+ struct {
+ char name[8];
+ char cpi[16];
+ } vm[VM_LEVEL_MAX];
+} __packed __aligned(8);
+
+/*
+ * LGR globals
+ */
+static char lgr_page[PAGE_SIZE] __aligned(PAGE_SIZE);
+static struct lgr_info lgr_info_last;
+static struct lgr_info lgr_info_cur;
+static struct debug_info *lgr_dbf;
+
+/*
+ * Copy buffer and then convert it to ASCII
+ */
+static void cpascii(char *dst, char *src, int size)
+{
+ memcpy(dst, src, size);
+ EBCASC(dst, size);
+}
+
+/*
+ * Fill LGR info with 1.1.1 stsi data
+ */
+static void lgr_stsi_1_1_1(struct lgr_info *lgr_info)
+{
+ struct sysinfo_1_1_1 *si = (void *) lgr_page;
+
+ if (stsi(si, 1, 1, 1))
+ return;
+ cpascii(lgr_info->manufacturer, si->manufacturer,
+ sizeof(si->manufacturer));
+ cpascii(lgr_info->type, si->type, sizeof(si->type));
+ cpascii(lgr_info->model, si->model, sizeof(si->model));
+ cpascii(lgr_info->sequence, si->sequence, sizeof(si->sequence));
+ cpascii(lgr_info->plant, si->plant, sizeof(si->plant));
+}
+
+/*
+ * Fill LGR info with 2.2.2 stsi data
+ */
+static void lgr_stsi_2_2_2(struct lgr_info *lgr_info)
+{
+ struct sysinfo_2_2_2 *si = (void *) lgr_page;
+
+ if (stsi(si, 2, 2, 2))
+ return;
+ cpascii(lgr_info->name, si->name, sizeof(si->name));
+ memcpy(&lgr_info->lpar_number, &si->lpar_number,
+ sizeof(lgr_info->lpar_number));
+}
+
+/*
+ * Fill LGR info with 3.2.2 stsi data
+ */
+static void lgr_stsi_3_2_2(struct lgr_info *lgr_info)
+{
+ struct sysinfo_3_2_2 *si = (void *) lgr_page;
+ int i;
+
+ if (stsi(si, 3, 2, 2))
+ return;
+ for (i = 0; i < min_t(u8, si->count, VM_LEVEL_MAX); i++) {
+ cpascii(lgr_info->vm[i].name, si->vm[i].name,
+ sizeof(si->vm[i].name));
+ cpascii(lgr_info->vm[i].cpi, si->vm[i].cpi,
+ sizeof(si->vm[i].cpi));
+ }
+ lgr_info->vm_count = si->count;
+}
+
+/*
+ * Fill LGR info with current data
+ */
+static void lgr_info_get(struct lgr_info *lgr_info)
+{
+ int level;
+
+ memset(lgr_info, 0, sizeof(*lgr_info));
+ stfle(lgr_info->stfle_fac_list, ARRAY_SIZE(lgr_info->stfle_fac_list));
+ level = stsi(NULL, 0, 0, 0);
+ lgr_info->level = level;
+ if (level >= 1)
+ lgr_stsi_1_1_1(lgr_info);
+ if (level >= 2)
+ lgr_stsi_2_2_2(lgr_info);
+ if (level >= 3)
+ lgr_stsi_3_2_2(lgr_info);
+}
+
+/*
+ * Check if LGR info has changed and if yes log new LGR info to s390dbf
+ */
+void lgr_info_log(void)
+{
+ static DEFINE_SPINLOCK(lgr_info_lock);
+ unsigned long flags;
+
+ if (!spin_trylock_irqsave(&lgr_info_lock, flags))
+ return;
+ lgr_info_get(&lgr_info_cur);
+ if (memcmp(&lgr_info_last, &lgr_info_cur, sizeof(lgr_info_cur)) != 0) {
+ debug_event(lgr_dbf, 1, &lgr_info_cur, sizeof(lgr_info_cur));
+ lgr_info_last = lgr_info_cur;
+ }
+ spin_unlock_irqrestore(&lgr_info_lock, flags);
+}
+EXPORT_SYMBOL_GPL(lgr_info_log);
+
+static void lgr_timer_set(void);
+
+/*
+ * LGR timer callback
+ */
+static void lgr_timer_fn(struct timer_list *unused)
+{
+ lgr_info_log();
+ lgr_timer_set();
+}
+
+static struct timer_list lgr_timer;
+
+/*
+ * Setup next LGR timer
+ */
+static void lgr_timer_set(void)
+{
+ mod_timer(&lgr_timer, jiffies + LGR_TIMER_INTERVAL_SECS * HZ);
+}
+
+/*
+ * Initialize LGR: Add s390dbf, write initial lgr_info and setup timer
+ */
+static int __init lgr_init(void)
+{
+ lgr_dbf = debug_register("lgr", 1, 1, sizeof(struct lgr_info));
+ if (!lgr_dbf)
+ return -ENOMEM;
+ debug_register_view(lgr_dbf, &debug_hex_ascii_view);
+ lgr_info_get(&lgr_info_last);
+ debug_event(lgr_dbf, 1, &lgr_info_last, sizeof(lgr_info_last));
+ timer_setup(&lgr_timer, lgr_timer_fn, TIMER_DEFERRABLE);
+ lgr_timer_set();
+ return 0;
+}
+device_initcall(lgr_init);
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
new file mode 100644
index 000000000..b7020e721
--- /dev/null
+++ b/arch/s390/kernel/machine_kexec.c
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2005, 2011
+ *
+ * Author(s): Rolf Adelsberger,
+ * Heiko Carstens <heiko.carstens@de.ibm.com>
+ * Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#include <linux/device.h>
+#include <linux/mm.h>
+#include <linux/kexec.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/ftrace.h>
+#include <linux/debug_locks.h>
+#include <linux/suspend.h>
+#include <asm/cio.h>
+#include <asm/setup.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/smp.h>
+#include <asm/ipl.h>
+#include <asm/diag.h>
+#include <asm/elf.h>
+#include <asm/asm-offsets.h>
+#include <asm/cacheflush.h>
+#include <asm/os_info.h>
+#include <asm/set_memory.h>
+#include <asm/switch_to.h>
+#include <asm/nmi.h>
+
+typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long);
+
+extern const unsigned char relocate_kernel[];
+extern const unsigned long long relocate_kernel_len;
+
+#ifdef CONFIG_CRASH_DUMP
+
+/*
+ * PM notifier callback for kdump
+ */
+static int machine_kdump_pm_cb(struct notifier_block *nb, unsigned long action,
+ void *ptr)
+{
+ switch (action) {
+ case PM_SUSPEND_PREPARE:
+ case PM_HIBERNATION_PREPARE:
+ if (kexec_crash_image)
+ arch_kexec_unprotect_crashkres();
+ break;
+ case PM_POST_SUSPEND:
+ case PM_POST_HIBERNATION:
+ if (kexec_crash_image)
+ arch_kexec_protect_crashkres();
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+ return NOTIFY_OK;
+}
+
+static int __init machine_kdump_pm_init(void)
+{
+ pm_notifier(machine_kdump_pm_cb, 0);
+ return 0;
+}
+arch_initcall(machine_kdump_pm_init);
+
+/*
+ * Reset the system, copy boot CPU registers to absolute zero,
+ * and jump to the kdump image
+ */
+static void __do_machine_kdump(void *image)
+{
+ int (*start_kdump)(int);
+ unsigned long prefix;
+
+ /* store_status() saved the prefix register to lowcore */
+ prefix = (unsigned long) S390_lowcore.prefixreg_save_area;
+
+ /* Now do the reset */
+ s390_reset_system();
+
+ /*
+ * Copy dump CPU store status info to absolute zero.
+ * This need to be done *after* s390_reset_system set the
+ * prefix register of this CPU to zero
+ */
+ memcpy((void *) __LC_FPREGS_SAVE_AREA,
+ (void *)(prefix + __LC_FPREGS_SAVE_AREA), 512);
+
+ __load_psw_mask(PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA);
+ start_kdump = (void *)((struct kimage *) image)->start;
+ start_kdump(1);
+
+ /* Die if start_kdump returns */
+ disabled_wait((unsigned long) __builtin_return_address(0));
+}
+
+/*
+ * Start kdump: create a LGR log entry, store status of all CPUs and
+ * branch to __do_machine_kdump.
+ */
+static noinline void __machine_kdump(void *image)
+{
+ struct mcesa *mcesa;
+ union ctlreg2 cr2_old, cr2_new;
+ int this_cpu, cpu;
+
+ lgr_info_log();
+ /* Get status of the other CPUs */
+ this_cpu = smp_find_processor_id(stap());
+ for_each_online_cpu(cpu) {
+ if (cpu == this_cpu)
+ continue;
+ if (smp_store_status(cpu))
+ continue;
+ }
+ /* Store status of the boot CPU */
+ mcesa = (struct mcesa *)(S390_lowcore.mcesad & MCESA_ORIGIN_MASK);
+ if (MACHINE_HAS_VX)
+ save_vx_regs((__vector128 *) mcesa->vector_save_area);
+ if (MACHINE_HAS_GS) {
+ __ctl_store(cr2_old.val, 2, 2);
+ cr2_new = cr2_old;
+ cr2_new.gse = 1;
+ __ctl_load(cr2_new.val, 2, 2);
+ save_gs_cb((struct gs_cb *) mcesa->guarded_storage_save_area);
+ __ctl_load(cr2_old.val, 2, 2);
+ }
+ /*
+ * To create a good backchain for this CPU in the dump store_status
+ * is passed the address of a function. The address is saved into
+ * the PSW save area of the boot CPU and the function is invoked as
+ * a tail call of store_status. The backchain in the dump will look
+ * like this:
+ * restart_int_handler -> __machine_kexec -> __do_machine_kdump
+ * The call to store_status() will not return.
+ */
+ store_status(__do_machine_kdump, image);
+}
+#endif
+
+/*
+ * Check if kdump checksums are valid: We call purgatory with parameter "0"
+ */
+static bool kdump_csum_valid(struct kimage *image)
+{
+#ifdef CONFIG_CRASH_DUMP
+ int (*start_kdump)(int) = (void *)image->start;
+ int rc;
+
+ __arch_local_irq_stnsm(0xfb); /* disable DAT */
+ rc = start_kdump(0);
+ __arch_local_irq_stosm(0x04); /* enable DAT */
+ return rc == 0;
+#else
+ return false;
+#endif
+}
+
+#ifdef CONFIG_CRASH_DUMP
+
+void crash_free_reserved_phys_range(unsigned long begin, unsigned long end)
+{
+ unsigned long addr, size;
+
+ for (addr = begin; addr < end; addr += PAGE_SIZE)
+ free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT));
+ size = begin - crashk_res.start;
+ if (size)
+ os_info_crashkernel_add(crashk_res.start, size);
+ else
+ os_info_crashkernel_add(0, 0);
+}
+
+static void crash_protect_pages(int protect)
+{
+ unsigned long size;
+
+ if (!crashk_res.end)
+ return;
+ size = resource_size(&crashk_res);
+ if (protect)
+ set_memory_ro(crashk_res.start, size >> PAGE_SHIFT);
+ else
+ set_memory_rw(crashk_res.start, size >> PAGE_SHIFT);
+}
+
+void arch_kexec_protect_crashkres(void)
+{
+ crash_protect_pages(1);
+}
+
+void arch_kexec_unprotect_crashkres(void)
+{
+ crash_protect_pages(0);
+}
+
+#endif
+
+/*
+ * Give back memory to hypervisor before new kdump is loaded
+ */
+static int machine_kexec_prepare_kdump(void)
+{
+#ifdef CONFIG_CRASH_DUMP
+ if (MACHINE_IS_VM)
+ diag10_range(PFN_DOWN(crashk_res.start),
+ PFN_DOWN(crashk_res.end - crashk_res.start + 1));
+ return 0;
+#else
+ return -EINVAL;
+#endif
+}
+
+int machine_kexec_prepare(struct kimage *image)
+{
+ void *reboot_code_buffer;
+
+ if (image->type == KEXEC_TYPE_CRASH)
+ return machine_kexec_prepare_kdump();
+
+ /* We don't support anything but the default image type for now. */
+ if (image->type != KEXEC_TYPE_DEFAULT)
+ return -EINVAL;
+
+ /* Get the destination where the assembler code should be copied to.*/
+ reboot_code_buffer = (void *) page_to_phys(image->control_code_page);
+
+ /* Then copy it */
+ memcpy(reboot_code_buffer, relocate_kernel, relocate_kernel_len);
+ return 0;
+}
+
+void machine_kexec_cleanup(struct kimage *image)
+{
+}
+
+void arch_crash_save_vmcoreinfo(void)
+{
+ VMCOREINFO_SYMBOL(lowcore_ptr);
+ VMCOREINFO_SYMBOL(high_memory);
+ VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
+ mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
+}
+
+void machine_shutdown(void)
+{
+}
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+ set_os_info_reipl_block();
+}
+
+/*
+ * Do normal kexec
+ */
+static void __do_machine_kexec(void *data)
+{
+ relocate_kernel_t data_mover;
+ struct kimage *image = data;
+
+ s390_reset_system();
+ data_mover = (relocate_kernel_t) page_to_phys(image->control_code_page);
+
+ __arch_local_irq_stnsm(0xfb); /* disable DAT - avoid no-execute */
+ /* Call the moving routine */
+ (*data_mover)(&image->head, image->start);
+
+ /* Die if kexec returns */
+ disabled_wait((unsigned long) __builtin_return_address(0));
+}
+
+/*
+ * Reset system and call either kdump or normal kexec
+ */
+static void __machine_kexec(void *data)
+{
+ __arch_local_irq_stosm(0x04); /* enable DAT */
+ pfault_fini();
+ tracing_off();
+ debug_locks_off();
+#ifdef CONFIG_CRASH_DUMP
+ if (((struct kimage *) data)->type == KEXEC_TYPE_CRASH)
+ __machine_kdump(data);
+#endif
+ __do_machine_kexec(data);
+}
+
+/*
+ * Do either kdump or normal kexec. In case of kdump we first ask
+ * purgatory, if kdump checksums are valid.
+ */
+void machine_kexec(struct kimage *image)
+{
+ if (image->type == KEXEC_TYPE_CRASH && !kdump_csum_valid(image))
+ return;
+ tracer_disable();
+ smp_send_stop();
+ smp_call_ipl_cpu(__machine_kexec, image);
+}
diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c
new file mode 100644
index 000000000..f413f57f8
--- /dev/null
+++ b/arch/s390/kernel/machine_kexec_file.c
@@ -0,0 +1,245 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * s390 code for kexec_file_load system call
+ *
+ * Copyright IBM Corp. 2018
+ *
+ * Author(s): Philipp Rudo <prudo@linux.vnet.ibm.com>
+ */
+
+#include <linux/elf.h>
+#include <linux/kexec.h>
+#include <asm/setup.h>
+
+const struct kexec_file_ops * const kexec_file_loaders[] = {
+ &s390_kexec_elf_ops,
+ &s390_kexec_image_ops,
+ NULL,
+};
+
+int *kexec_file_update_kernel(struct kimage *image,
+ struct s390_load_data *data)
+{
+ unsigned long *loc;
+
+ if (image->cmdline_buf_len >= ARCH_COMMAND_LINE_SIZE)
+ return ERR_PTR(-EINVAL);
+
+ if (image->cmdline_buf_len)
+ memcpy(data->kernel_buf + COMMAND_LINE_OFFSET,
+ image->cmdline_buf, image->cmdline_buf_len);
+
+ if (image->type == KEXEC_TYPE_CRASH) {
+ loc = (unsigned long *)(data->kernel_buf + OLDMEM_BASE_OFFSET);
+ *loc = crashk_res.start;
+
+ loc = (unsigned long *)(data->kernel_buf + OLDMEM_SIZE_OFFSET);
+ *loc = crashk_res.end - crashk_res.start + 1;
+ }
+
+ if (image->initrd_buf) {
+ loc = (unsigned long *)(data->kernel_buf + INITRD_START_OFFSET);
+ *loc = data->initrd_load_addr;
+
+ loc = (unsigned long *)(data->kernel_buf + INITRD_SIZE_OFFSET);
+ *loc = image->initrd_buf_len;
+ }
+
+ return NULL;
+}
+
+static int kexec_file_update_purgatory(struct kimage *image)
+{
+ u64 entry, type;
+ int ret;
+
+ if (image->type == KEXEC_TYPE_CRASH) {
+ entry = STARTUP_KDUMP_OFFSET;
+ type = KEXEC_TYPE_CRASH;
+ } else {
+ entry = STARTUP_NORMAL_OFFSET;
+ type = KEXEC_TYPE_DEFAULT;
+ }
+
+ ret = kexec_purgatory_get_set_symbol(image, "kernel_entry", &entry,
+ sizeof(entry), false);
+ if (ret)
+ return ret;
+
+ ret = kexec_purgatory_get_set_symbol(image, "kernel_type", &type,
+ sizeof(type), false);
+ if (ret)
+ return ret;
+
+ if (image->type == KEXEC_TYPE_CRASH) {
+ u64 crash_size;
+
+ ret = kexec_purgatory_get_set_symbol(image, "crash_start",
+ &crashk_res.start,
+ sizeof(crashk_res.start),
+ false);
+ if (ret)
+ return ret;
+
+ crash_size = crashk_res.end - crashk_res.start + 1;
+ ret = kexec_purgatory_get_set_symbol(image, "crash_size",
+ &crash_size,
+ sizeof(crash_size),
+ false);
+ }
+ return ret;
+}
+
+int kexec_file_add_purgatory(struct kimage *image, struct s390_load_data *data)
+{
+ struct kexec_buf buf;
+ int ret;
+
+ buf.image = image;
+
+ data->memsz = ALIGN(data->memsz, PAGE_SIZE);
+ buf.mem = data->memsz;
+ if (image->type == KEXEC_TYPE_CRASH)
+ buf.mem += crashk_res.start;
+
+ ret = kexec_load_purgatory(image, &buf);
+ if (ret)
+ return ret;
+
+ ret = kexec_file_update_purgatory(image);
+ return ret;
+}
+
+int kexec_file_add_initrd(struct kimage *image, struct s390_load_data *data,
+ char *initrd, unsigned long initrd_len)
+{
+ struct kexec_buf buf;
+ int ret;
+
+ buf.image = image;
+
+ buf.buffer = initrd;
+ buf.bufsz = initrd_len;
+
+ data->memsz = ALIGN(data->memsz, PAGE_SIZE);
+ buf.mem = data->memsz;
+ if (image->type == KEXEC_TYPE_CRASH)
+ buf.mem += crashk_res.start;
+ buf.memsz = buf.bufsz;
+
+ data->initrd_load_addr = buf.mem;
+ data->memsz += buf.memsz;
+
+ ret = kexec_add_buffer(&buf);
+ return ret;
+}
+
+/*
+ * The kernel is loaded to a fixed location. Turn off kexec_locate_mem_hole
+ * and provide kbuf->mem by hand.
+ */
+int arch_kexec_walk_mem(struct kexec_buf *kbuf,
+ int (*func)(struct resource *, void *))
+{
+ return 1;
+}
+
+int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
+ Elf_Shdr *section,
+ const Elf_Shdr *relsec,
+ const Elf_Shdr *symtab)
+{
+ Elf_Rela *relas;
+ int i;
+
+ relas = (void *)pi->ehdr + relsec->sh_offset;
+
+ for (i = 0; i < relsec->sh_size / sizeof(*relas); i++) {
+ const Elf_Sym *sym; /* symbol to relocate */
+ unsigned long addr; /* final location after relocation */
+ unsigned long val; /* relocated symbol value */
+ void *loc; /* tmp location to modify */
+
+ sym = (void *)pi->ehdr + symtab->sh_offset;
+ sym += ELF64_R_SYM(relas[i].r_info);
+
+ if (sym->st_shndx == SHN_UNDEF)
+ return -ENOEXEC;
+
+ if (sym->st_shndx == SHN_COMMON)
+ return -ENOEXEC;
+
+ if (sym->st_shndx >= pi->ehdr->e_shnum &&
+ sym->st_shndx != SHN_ABS)
+ return -ENOEXEC;
+
+ loc = pi->purgatory_buf;
+ loc += section->sh_offset;
+ loc += relas[i].r_offset;
+
+ val = sym->st_value;
+ if (sym->st_shndx != SHN_ABS)
+ val += pi->sechdrs[sym->st_shndx].sh_addr;
+ val += relas[i].r_addend;
+
+ addr = section->sh_addr + relas[i].r_offset;
+
+ switch (ELF64_R_TYPE(relas[i].r_info)) {
+ case R_390_8: /* Direct 8 bit. */
+ *(u8 *)loc = val;
+ break;
+ case R_390_12: /* Direct 12 bit. */
+ *(u16 *)loc &= 0xf000;
+ *(u16 *)loc |= val & 0xfff;
+ break;
+ case R_390_16: /* Direct 16 bit. */
+ *(u16 *)loc = val;
+ break;
+ case R_390_20: /* Direct 20 bit. */
+ *(u32 *)loc &= 0xf00000ff;
+ *(u32 *)loc |= (val & 0xfff) << 16; /* DL */
+ *(u32 *)loc |= (val & 0xff000) >> 4; /* DH */
+ break;
+ case R_390_32: /* Direct 32 bit. */
+ *(u32 *)loc = val;
+ break;
+ case R_390_64: /* Direct 64 bit. */
+ *(u64 *)loc = val;
+ break;
+ case R_390_PC16: /* PC relative 16 bit. */
+ *(u16 *)loc = (val - addr);
+ break;
+ case R_390_PC16DBL: /* PC relative 16 bit shifted by 1. */
+ *(u16 *)loc = (val - addr) >> 1;
+ break;
+ case R_390_PC32DBL: /* PC relative 32 bit shifted by 1. */
+ *(u32 *)loc = (val - addr) >> 1;
+ break;
+ case R_390_PC32: /* PC relative 32 bit. */
+ *(u32 *)loc = (val - addr);
+ break;
+ case R_390_PC64: /* PC relative 64 bit. */
+ *(u64 *)loc = (val - addr);
+ break;
+ default:
+ break;
+ }
+ }
+ return 0;
+}
+
+int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+ unsigned long buf_len)
+{
+ /* A kernel must be at least large enough to contain head.S. During
+ * load memory in head.S will be accessed, e.g. to register the next
+ * command line. If the next kernel were smaller the current kernel
+ * will panic at load.
+ *
+ * 0x11000 = sizeof(head.S)
+ */
+ if (buf_len < 0x11000)
+ return -ENOEXEC;
+
+ return kexec_image_probe_default(image, buf, buf_len);
+}
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
new file mode 100644
index 000000000..a59e6b2b2
--- /dev/null
+++ b/arch/s390/kernel/mcount.S
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2008, 2009
+ *
+ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>,
+ *
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/ftrace.h>
+#include <asm/nospec-insn.h>
+#include <asm/ptrace.h>
+#include <asm/export.h>
+
+ GEN_BR_THUNK %r1
+ GEN_BR_THUNK %r14
+
+ .section .kprobes.text, "ax"
+
+ENTRY(ftrace_stub)
+ BR_EX %r14
+
+#define STACK_FRAME_SIZE (STACK_FRAME_OVERHEAD + __PT_SIZE)
+#define STACK_PTREGS (STACK_FRAME_OVERHEAD)
+#define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS)
+#define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW)
+#ifdef __PACK_STACK
+/* allocate just enough for r14, r15 and backchain */
+#define TRACED_FUNC_FRAME_SIZE 24
+#else
+#define TRACED_FUNC_FRAME_SIZE STACK_FRAME_OVERHEAD
+#endif
+
+ENTRY(_mcount)
+ BR_EX %r14
+
+EXPORT_SYMBOL(_mcount)
+
+ENTRY(ftrace_caller)
+ .globl ftrace_regs_caller
+ .set ftrace_regs_caller,ftrace_caller
+ stg %r14,(__SF_GPRS+8*8)(%r15) # save traced function caller
+ lgr %r1,%r15
+#if !(defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT))
+ aghi %r0,MCOUNT_RETURN_FIXUP
+#endif
+ # allocate stack frame for ftrace_caller to contain traced function
+ aghi %r15,-TRACED_FUNC_FRAME_SIZE
+ stg %r1,__SF_BACKCHAIN(%r15)
+ stg %r0,(__SF_GPRS+8*8)(%r15)
+ stg %r15,(__SF_GPRS+9*8)(%r15)
+ # allocate pt_regs and stack frame for ftrace_trace_function
+ aghi %r15,-STACK_FRAME_SIZE
+ stg %r1,(STACK_PTREGS_GPRS+15*8)(%r15)
+ aghi %r1,-TRACED_FUNC_FRAME_SIZE
+ stg %r1,__SF_BACKCHAIN(%r15)
+ stg %r0,(STACK_PTREGS_PSW+8)(%r15)
+ stmg %r2,%r14,(STACK_PTREGS_GPRS+2*8)(%r15)
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+ aghik %r2,%r0,-MCOUNT_INSN_SIZE
+ lgrl %r4,function_trace_op
+ lgrl %r1,ftrace_func
+#else
+ lgr %r2,%r0
+ aghi %r2,-MCOUNT_INSN_SIZE
+ larl %r4,function_trace_op
+ lg %r4,0(%r4)
+ larl %r1,ftrace_func
+ lg %r1,0(%r1)
+#endif
+ lgr %r3,%r14
+ la %r5,STACK_PTREGS(%r15)
+ BASR_EX %r14,%r1
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+# The j instruction gets runtime patched to a nop instruction.
+# See ftrace_enable_ftrace_graph_caller.
+ENTRY(ftrace_graph_caller)
+ j ftrace_graph_caller_end
+ lg %r2,(STACK_PTREGS_GPRS+14*8)(%r15)
+ lg %r3,(STACK_PTREGS_PSW+8)(%r15)
+ brasl %r14,prepare_ftrace_return
+ stg %r2,(STACK_PTREGS_GPRS+14*8)(%r15)
+ftrace_graph_caller_end:
+ .globl ftrace_graph_caller_end
+#endif
+ lg %r1,(STACK_PTREGS_PSW+8)(%r15)
+ lmg %r2,%r15,(STACK_PTREGS_GPRS+2*8)(%r15)
+ BR_EX %r1
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+ENTRY(return_to_handler)
+ stmg %r2,%r5,32(%r15)
+ lgr %r1,%r15
+ aghi %r15,-STACK_FRAME_OVERHEAD
+ stg %r1,__SF_BACKCHAIN(%r15)
+ brasl %r14,ftrace_return_to_handler
+ aghi %r15,STACK_FRAME_OVERHEAD
+ lgr %r14,%r2
+ lmg %r2,%r5,32(%r15)
+ BR_EX %r14
+
+#endif
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
new file mode 100644
index 000000000..d298d3cb4
--- /dev/null
+++ b/arch/s390/kernel/module.c
@@ -0,0 +1,478 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Kernel module help for s390.
+ *
+ * S390 version
+ * Copyright IBM Corp. 2002, 2003
+ * Author(s): Arnd Bergmann (arndb@de.ibm.com)
+ * Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * based on i386 version
+ * Copyright (C) 2001 Rusty Russell.
+ */
+#include <linux/module.h>
+#include <linux/elf.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/moduleloader.h>
+#include <linux/bug.h>
+#include <asm/alternative.h>
+#include <asm/nospec-branch.h>
+#include <asm/facility.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(fmt , ...)
+#endif
+
+#define PLT_ENTRY_SIZE 20
+
+void *module_alloc(unsigned long size)
+{
+ if (PAGE_ALIGN(size) > MODULES_LEN)
+ return NULL;
+ return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
+ GFP_KERNEL, PAGE_KERNEL_EXEC,
+ 0, NUMA_NO_NODE,
+ __builtin_return_address(0));
+}
+
+void module_arch_freeing_init(struct module *mod)
+{
+ if (is_livepatch_module(mod) &&
+ mod->state == MODULE_STATE_LIVE)
+ return;
+
+ vfree(mod->arch.syminfo);
+ mod->arch.syminfo = NULL;
+}
+
+static void check_rela(Elf_Rela *rela, struct module *me)
+{
+ struct mod_arch_syminfo *info;
+
+ info = me->arch.syminfo + ELF_R_SYM (rela->r_info);
+ switch (ELF_R_TYPE (rela->r_info)) {
+ case R_390_GOT12: /* 12 bit GOT offset. */
+ case R_390_GOT16: /* 16 bit GOT offset. */
+ case R_390_GOT20: /* 20 bit GOT offset. */
+ case R_390_GOT32: /* 32 bit GOT offset. */
+ case R_390_GOT64: /* 64 bit GOT offset. */
+ case R_390_GOTENT: /* 32 bit PC rel. to GOT entry shifted by 1. */
+ case R_390_GOTPLT12: /* 12 bit offset to jump slot. */
+ case R_390_GOTPLT16: /* 16 bit offset to jump slot. */
+ case R_390_GOTPLT20: /* 20 bit offset to jump slot. */
+ case R_390_GOTPLT32: /* 32 bit offset to jump slot. */
+ case R_390_GOTPLT64: /* 64 bit offset to jump slot. */
+ case R_390_GOTPLTENT: /* 32 bit rel. offset to jump slot >> 1. */
+ if (info->got_offset == -1UL) {
+ info->got_offset = me->arch.got_size;
+ me->arch.got_size += sizeof(void*);
+ }
+ break;
+ case R_390_PLT16DBL: /* 16 bit PC rel. PLT shifted by 1. */
+ case R_390_PLT32DBL: /* 32 bit PC rel. PLT shifted by 1. */
+ case R_390_PLT32: /* 32 bit PC relative PLT address. */
+ case R_390_PLT64: /* 64 bit PC relative PLT address. */
+ case R_390_PLTOFF16: /* 16 bit offset from GOT to PLT. */
+ case R_390_PLTOFF32: /* 32 bit offset from GOT to PLT. */
+ case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */
+ if (info->plt_offset == -1UL) {
+ info->plt_offset = me->arch.plt_size;
+ me->arch.plt_size += PLT_ENTRY_SIZE;
+ }
+ break;
+ case R_390_COPY:
+ case R_390_GLOB_DAT:
+ case R_390_JMP_SLOT:
+ case R_390_RELATIVE:
+ /* Only needed if we want to support loading of
+ modules linked with -shared. */
+ break;
+ }
+}
+
+/*
+ * Account for GOT and PLT relocations. We can't add sections for
+ * got and plt but we can increase the core module size.
+ */
+int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
+ char *secstrings, struct module *me)
+{
+ Elf_Shdr *symtab;
+ Elf_Sym *symbols;
+ Elf_Rela *rela;
+ char *strings;
+ int nrela, i, j;
+
+ /* Find symbol table and string table. */
+ symtab = NULL;
+ for (i = 0; i < hdr->e_shnum; i++)
+ switch (sechdrs[i].sh_type) {
+ case SHT_SYMTAB:
+ symtab = sechdrs + i;
+ break;
+ }
+ if (!symtab) {
+ printk(KERN_ERR "module %s: no symbol table\n", me->name);
+ return -ENOEXEC;
+ }
+
+ /* Allocate one syminfo structure per symbol. */
+ me->arch.nsyms = symtab->sh_size / sizeof(Elf_Sym);
+ me->arch.syminfo = vmalloc(array_size(sizeof(struct mod_arch_syminfo),
+ me->arch.nsyms));
+ if (!me->arch.syminfo)
+ return -ENOMEM;
+ symbols = (void *) hdr + symtab->sh_offset;
+ strings = (void *) hdr + sechdrs[symtab->sh_link].sh_offset;
+ for (i = 0; i < me->arch.nsyms; i++) {
+ if (symbols[i].st_shndx == SHN_UNDEF &&
+ strcmp(strings + symbols[i].st_name,
+ "_GLOBAL_OFFSET_TABLE_") == 0)
+ /* "Define" it as absolute. */
+ symbols[i].st_shndx = SHN_ABS;
+ me->arch.syminfo[i].got_offset = -1UL;
+ me->arch.syminfo[i].plt_offset = -1UL;
+ me->arch.syminfo[i].got_initialized = 0;
+ me->arch.syminfo[i].plt_initialized = 0;
+ }
+
+ /* Search for got/plt relocations. */
+ me->arch.got_size = me->arch.plt_size = 0;
+ for (i = 0; i < hdr->e_shnum; i++) {
+ if (sechdrs[i].sh_type != SHT_RELA)
+ continue;
+ nrela = sechdrs[i].sh_size / sizeof(Elf_Rela);
+ rela = (void *) hdr + sechdrs[i].sh_offset;
+ for (j = 0; j < nrela; j++)
+ check_rela(rela + j, me);
+ }
+
+ /* Increase core size by size of got & plt and set start
+ offsets for got and plt. */
+ me->core_layout.size = ALIGN(me->core_layout.size, 4);
+ me->arch.got_offset = me->core_layout.size;
+ me->core_layout.size += me->arch.got_size;
+ me->arch.plt_offset = me->core_layout.size;
+ if (me->arch.plt_size) {
+ if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable)
+ me->arch.plt_size += PLT_ENTRY_SIZE;
+ me->core_layout.size += me->arch.plt_size;
+ }
+ return 0;
+}
+
+static int apply_rela_bits(Elf_Addr loc, Elf_Addr val,
+ int sign, int bits, int shift)
+{
+ unsigned long umax;
+ long min, max;
+
+ if (val & ((1UL << shift) - 1))
+ return -ENOEXEC;
+ if (sign) {
+ val = (Elf_Addr)(((long) val) >> shift);
+ min = -(1L << (bits - 1));
+ max = (1L << (bits - 1)) - 1;
+ if ((long) val < min || (long) val > max)
+ return -ENOEXEC;
+ } else {
+ val >>= shift;
+ umax = ((1UL << (bits - 1)) << 1) - 1;
+ if ((unsigned long) val > umax)
+ return -ENOEXEC;
+ }
+
+ if (bits == 8)
+ *(unsigned char *) loc = val;
+ else if (bits == 12)
+ *(unsigned short *) loc = (val & 0xfff) |
+ (*(unsigned short *) loc & 0xf000);
+ else if (bits == 16)
+ *(unsigned short *) loc = val;
+ else if (bits == 20)
+ *(unsigned int *) loc = (val & 0xfff) << 16 |
+ (val & 0xff000) >> 4 |
+ (*(unsigned int *) loc & 0xf00000ff);
+ else if (bits == 32)
+ *(unsigned int *) loc = val;
+ else if (bits == 64)
+ *(unsigned long *) loc = val;
+ return 0;
+}
+
+static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
+ const char *strtab, struct module *me)
+{
+ struct mod_arch_syminfo *info;
+ Elf_Addr loc, val;
+ int r_type, r_sym;
+ int rc = -ENOEXEC;
+
+ /* This is where to make the change */
+ loc = base + rela->r_offset;
+ /* This is the symbol it is referring to. Note that all
+ undefined symbols have been resolved. */
+ r_sym = ELF_R_SYM(rela->r_info);
+ r_type = ELF_R_TYPE(rela->r_info);
+ info = me->arch.syminfo + r_sym;
+ val = symtab[r_sym].st_value;
+
+ switch (r_type) {
+ case R_390_NONE: /* No relocation. */
+ rc = 0;
+ break;
+ case R_390_8: /* Direct 8 bit. */
+ case R_390_12: /* Direct 12 bit. */
+ case R_390_16: /* Direct 16 bit. */
+ case R_390_20: /* Direct 20 bit. */
+ case R_390_32: /* Direct 32 bit. */
+ case R_390_64: /* Direct 64 bit. */
+ val += rela->r_addend;
+ if (r_type == R_390_8)
+ rc = apply_rela_bits(loc, val, 0, 8, 0);
+ else if (r_type == R_390_12)
+ rc = apply_rela_bits(loc, val, 0, 12, 0);
+ else if (r_type == R_390_16)
+ rc = apply_rela_bits(loc, val, 0, 16, 0);
+ else if (r_type == R_390_20)
+ rc = apply_rela_bits(loc, val, 1, 20, 0);
+ else if (r_type == R_390_32)
+ rc = apply_rela_bits(loc, val, 0, 32, 0);
+ else if (r_type == R_390_64)
+ rc = apply_rela_bits(loc, val, 0, 64, 0);
+ break;
+ case R_390_PC16: /* PC relative 16 bit. */
+ case R_390_PC16DBL: /* PC relative 16 bit shifted by 1. */
+ case R_390_PC32DBL: /* PC relative 32 bit shifted by 1. */
+ case R_390_PC32: /* PC relative 32 bit. */
+ case R_390_PC64: /* PC relative 64 bit. */
+ val += rela->r_addend - loc;
+ if (r_type == R_390_PC16)
+ rc = apply_rela_bits(loc, val, 1, 16, 0);
+ else if (r_type == R_390_PC16DBL)
+ rc = apply_rela_bits(loc, val, 1, 16, 1);
+ else if (r_type == R_390_PC32DBL)
+ rc = apply_rela_bits(loc, val, 1, 32, 1);
+ else if (r_type == R_390_PC32)
+ rc = apply_rela_bits(loc, val, 1, 32, 0);
+ else if (r_type == R_390_PC64)
+ rc = apply_rela_bits(loc, val, 1, 64, 0);
+ break;
+ case R_390_GOT12: /* 12 bit GOT offset. */
+ case R_390_GOT16: /* 16 bit GOT offset. */
+ case R_390_GOT20: /* 20 bit GOT offset. */
+ case R_390_GOT32: /* 32 bit GOT offset. */
+ case R_390_GOT64: /* 64 bit GOT offset. */
+ case R_390_GOTENT: /* 32 bit PC rel. to GOT entry shifted by 1. */
+ case R_390_GOTPLT12: /* 12 bit offset to jump slot. */
+ case R_390_GOTPLT20: /* 20 bit offset to jump slot. */
+ case R_390_GOTPLT16: /* 16 bit offset to jump slot. */
+ case R_390_GOTPLT32: /* 32 bit offset to jump slot. */
+ case R_390_GOTPLT64: /* 64 bit offset to jump slot. */
+ case R_390_GOTPLTENT: /* 32 bit rel. offset to jump slot >> 1. */
+ if (info->got_initialized == 0) {
+ Elf_Addr *gotent;
+
+ gotent = me->core_layout.base + me->arch.got_offset +
+ info->got_offset;
+ *gotent = val;
+ info->got_initialized = 1;
+ }
+ val = info->got_offset + rela->r_addend;
+ if (r_type == R_390_GOT12 ||
+ r_type == R_390_GOTPLT12)
+ rc = apply_rela_bits(loc, val, 0, 12, 0);
+ else if (r_type == R_390_GOT16 ||
+ r_type == R_390_GOTPLT16)
+ rc = apply_rela_bits(loc, val, 0, 16, 0);
+ else if (r_type == R_390_GOT20 ||
+ r_type == R_390_GOTPLT20)
+ rc = apply_rela_bits(loc, val, 1, 20, 0);
+ else if (r_type == R_390_GOT32 ||
+ r_type == R_390_GOTPLT32)
+ rc = apply_rela_bits(loc, val, 0, 32, 0);
+ else if (r_type == R_390_GOT64 ||
+ r_type == R_390_GOTPLT64)
+ rc = apply_rela_bits(loc, val, 0, 64, 0);
+ else if (r_type == R_390_GOTENT ||
+ r_type == R_390_GOTPLTENT) {
+ val += (Elf_Addr) me->core_layout.base - loc;
+ rc = apply_rela_bits(loc, val, 1, 32, 1);
+ }
+ break;
+ case R_390_PLT16DBL: /* 16 bit PC rel. PLT shifted by 1. */
+ case R_390_PLT32DBL: /* 32 bit PC rel. PLT shifted by 1. */
+ case R_390_PLT32: /* 32 bit PC relative PLT address. */
+ case R_390_PLT64: /* 64 bit PC relative PLT address. */
+ case R_390_PLTOFF16: /* 16 bit offset from GOT to PLT. */
+ case R_390_PLTOFF32: /* 32 bit offset from GOT to PLT. */
+ case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */
+ if (info->plt_initialized == 0) {
+ unsigned int *ip;
+ ip = me->core_layout.base + me->arch.plt_offset +
+ info->plt_offset;
+ ip[0] = 0x0d10e310; /* basr 1,0 */
+ ip[1] = 0x100a0004; /* lg 1,10(1) */
+ if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable) {
+ unsigned int *ij;
+ ij = me->core_layout.base +
+ me->arch.plt_offset +
+ me->arch.plt_size - PLT_ENTRY_SIZE;
+ ip[2] = 0xa7f40000 + /* j __jump_r1 */
+ (unsigned int)(u16)
+ (((unsigned long) ij - 8 -
+ (unsigned long) ip) / 2);
+ } else {
+ ip[2] = 0x07f10000; /* br %r1 */
+ }
+ ip[3] = (unsigned int) (val >> 32);
+ ip[4] = (unsigned int) val;
+ info->plt_initialized = 1;
+ }
+ if (r_type == R_390_PLTOFF16 ||
+ r_type == R_390_PLTOFF32 ||
+ r_type == R_390_PLTOFF64)
+ val = me->arch.plt_offset - me->arch.got_offset +
+ info->plt_offset + rela->r_addend;
+ else {
+ if (!((r_type == R_390_PLT16DBL &&
+ val - loc + 0xffffUL < 0x1ffffeUL) ||
+ (r_type == R_390_PLT32DBL &&
+ val - loc + 0xffffffffULL < 0x1fffffffeULL)))
+ val = (Elf_Addr) me->core_layout.base +
+ me->arch.plt_offset +
+ info->plt_offset;
+ val += rela->r_addend - loc;
+ }
+ if (r_type == R_390_PLT16DBL)
+ rc = apply_rela_bits(loc, val, 1, 16, 1);
+ else if (r_type == R_390_PLTOFF16)
+ rc = apply_rela_bits(loc, val, 0, 16, 0);
+ else if (r_type == R_390_PLT32DBL)
+ rc = apply_rela_bits(loc, val, 1, 32, 1);
+ else if (r_type == R_390_PLT32 ||
+ r_type == R_390_PLTOFF32)
+ rc = apply_rela_bits(loc, val, 0, 32, 0);
+ else if (r_type == R_390_PLT64 ||
+ r_type == R_390_PLTOFF64)
+ rc = apply_rela_bits(loc, val, 0, 64, 0);
+ break;
+ case R_390_GOTOFF16: /* 16 bit offset to GOT. */
+ case R_390_GOTOFF32: /* 32 bit offset to GOT. */
+ case R_390_GOTOFF64: /* 64 bit offset to GOT. */
+ val = val + rela->r_addend -
+ ((Elf_Addr) me->core_layout.base + me->arch.got_offset);
+ if (r_type == R_390_GOTOFF16)
+ rc = apply_rela_bits(loc, val, 0, 16, 0);
+ else if (r_type == R_390_GOTOFF32)
+ rc = apply_rela_bits(loc, val, 0, 32, 0);
+ else if (r_type == R_390_GOTOFF64)
+ rc = apply_rela_bits(loc, val, 0, 64, 0);
+ break;
+ case R_390_GOTPC: /* 32 bit PC relative offset to GOT. */
+ case R_390_GOTPCDBL: /* 32 bit PC rel. off. to GOT shifted by 1. */
+ val = (Elf_Addr) me->core_layout.base + me->arch.got_offset +
+ rela->r_addend - loc;
+ if (r_type == R_390_GOTPC)
+ rc = apply_rela_bits(loc, val, 1, 32, 0);
+ else if (r_type == R_390_GOTPCDBL)
+ rc = apply_rela_bits(loc, val, 1, 32, 1);
+ break;
+ case R_390_COPY:
+ case R_390_GLOB_DAT: /* Create GOT entry. */
+ case R_390_JMP_SLOT: /* Create PLT entry. */
+ case R_390_RELATIVE: /* Adjust by program base. */
+ /* Only needed if we want to support loading of
+ modules linked with -shared. */
+ return -ENOEXEC;
+ default:
+ printk(KERN_ERR "module %s: unknown relocation: %u\n",
+ me->name, r_type);
+ return -ENOEXEC;
+ }
+ if (rc) {
+ printk(KERN_ERR "module %s: relocation error for symbol %s "
+ "(r_type %i, value 0x%lx)\n",
+ me->name, strtab + symtab[r_sym].st_name,
+ r_type, (unsigned long) val);
+ return rc;
+ }
+ return 0;
+}
+
+int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
+ unsigned int symindex, unsigned int relsec,
+ struct module *me)
+{
+ Elf_Addr base;
+ Elf_Sym *symtab;
+ Elf_Rela *rela;
+ unsigned long i, n;
+ int rc;
+
+ DEBUGP("Applying relocate section %u to %u\n",
+ relsec, sechdrs[relsec].sh_info);
+ base = sechdrs[sechdrs[relsec].sh_info].sh_addr;
+ symtab = (Elf_Sym *) sechdrs[symindex].sh_addr;
+ rela = (Elf_Rela *) sechdrs[relsec].sh_addr;
+ n = sechdrs[relsec].sh_size / sizeof(Elf_Rela);
+
+ for (i = 0; i < n; i++, rela++) {
+ rc = apply_rela(rela, base, symtab, strtab, me);
+ if (rc)
+ return rc;
+ }
+ return 0;
+}
+
+int module_finalize(const Elf_Ehdr *hdr,
+ const Elf_Shdr *sechdrs,
+ struct module *me)
+{
+ const Elf_Shdr *s;
+ char *secstrings, *secname;
+ void *aseg;
+
+ if (IS_ENABLED(CONFIG_EXPOLINE) &&
+ !nospec_disable && me->arch.plt_size) {
+ unsigned int *ij;
+
+ ij = me->core_layout.base + me->arch.plt_offset +
+ me->arch.plt_size - PLT_ENTRY_SIZE;
+ if (test_facility(35)) {
+ ij[0] = 0xc6000000; /* exrl %r0,.+10 */
+ ij[1] = 0x0005a7f4; /* j . */
+ ij[2] = 0x000007f1; /* br %r1 */
+ } else {
+ ij[0] = 0x44000000 | (unsigned int)
+ offsetof(struct lowcore, br_r1_trampoline);
+ ij[1] = 0xa7f40000; /* j . */
+ }
+ }
+
+ secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+ for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
+ aseg = (void *) s->sh_addr;
+ secname = secstrings + s->sh_name;
+
+ if (!strcmp(".altinstructions", secname))
+ /* patch .altinstructions */
+ apply_alternatives(aseg, aseg + s->sh_size);
+
+ if (IS_ENABLED(CONFIG_EXPOLINE) &&
+ (!strncmp(".s390_indirect", secname, 14)))
+ nospec_revert(aseg, aseg + s->sh_size);
+
+ if (IS_ENABLED(CONFIG_EXPOLINE) &&
+ (!strncmp(".s390_return", secname, 12)))
+ nospec_revert(aseg, aseg + s->sh_size);
+ }
+
+ jump_label_apply_nops(me);
+ return 0;
+}
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
new file mode 100644
index 000000000..8c867b43c
--- /dev/null
+++ b/arch/s390/kernel/nmi.c
@@ -0,0 +1,476 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Machine check handler
+ *
+ * Copyright IBM Corp. 2000, 2009
+ * Author(s): Ingo Adlung <adlung@de.ibm.com>,
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ * Cornelia Huck <cornelia.huck@de.ibm.com>,
+ * Heiko Carstens <heiko.carstens@de.ibm.com>,
+ */
+
+#include <linux/kernel_stat.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/hardirq.h>
+#include <linux/log2.h>
+#include <linux/kprobes.h>
+#include <linux/kmemleak.h>
+#include <linux/time.h>
+#include <linux/module.h>
+#include <linux/sched/signal.h>
+
+#include <linux/export.h>
+#include <asm/lowcore.h>
+#include <asm/smp.h>
+#include <asm/stp.h>
+#include <asm/cputime.h>
+#include <asm/nmi.h>
+#include <asm/crw.h>
+#include <asm/switch_to.h>
+#include <asm/ctl_reg.h>
+#include <asm/asm-offsets.h>
+#include <linux/kvm_host.h>
+
+struct mcck_struct {
+ unsigned int kill_task : 1;
+ unsigned int channel_report : 1;
+ unsigned int warning : 1;
+ unsigned int stp_queue : 1;
+ unsigned long mcck_code;
+};
+
+static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck);
+static struct kmem_cache *mcesa_cache;
+static unsigned long mcesa_origin_lc;
+
+static inline int nmi_needs_mcesa(void)
+{
+ return MACHINE_HAS_VX || MACHINE_HAS_GS;
+}
+
+static inline unsigned long nmi_get_mcesa_size(void)
+{
+ if (MACHINE_HAS_GS)
+ return MCESA_MAX_SIZE;
+ return MCESA_MIN_SIZE;
+}
+
+/*
+ * The initial machine check extended save area for the boot CPU.
+ * It will be replaced by nmi_init() with an allocated structure.
+ * The structure is required for machine check happening early in
+ * the boot process.
+ */
+static struct mcesa boot_mcesa __initdata __aligned(MCESA_MAX_SIZE);
+
+void __init nmi_alloc_boot_cpu(struct lowcore *lc)
+{
+ if (!nmi_needs_mcesa())
+ return;
+ lc->mcesad = (unsigned long) &boot_mcesa;
+ if (MACHINE_HAS_GS)
+ lc->mcesad |= ilog2(MCESA_MAX_SIZE);
+}
+
+static int __init nmi_init(void)
+{
+ unsigned long origin, cr0, size;
+
+ if (!nmi_needs_mcesa())
+ return 0;
+ size = nmi_get_mcesa_size();
+ if (size > MCESA_MIN_SIZE)
+ mcesa_origin_lc = ilog2(size);
+ /* create slab cache for the machine-check-extended-save-areas */
+ mcesa_cache = kmem_cache_create("nmi_save_areas", size, size, 0, NULL);
+ if (!mcesa_cache)
+ panic("Couldn't create nmi save area cache");
+ origin = (unsigned long) kmem_cache_alloc(mcesa_cache, GFP_KERNEL);
+ if (!origin)
+ panic("Couldn't allocate nmi save area");
+ /* The pointer is stored with mcesa_bits ORed in */
+ kmemleak_not_leak((void *) origin);
+ __ctl_store(cr0, 0, 0);
+ __ctl_clear_bit(0, 28); /* disable lowcore protection */
+ /* Replace boot_mcesa on the boot CPU */
+ S390_lowcore.mcesad = origin | mcesa_origin_lc;
+ __ctl_load(cr0, 0, 0);
+ return 0;
+}
+early_initcall(nmi_init);
+
+int nmi_alloc_per_cpu(struct lowcore *lc)
+{
+ unsigned long origin;
+
+ if (!nmi_needs_mcesa())
+ return 0;
+ origin = (unsigned long) kmem_cache_alloc(mcesa_cache, GFP_KERNEL);
+ if (!origin)
+ return -ENOMEM;
+ /* The pointer is stored with mcesa_bits ORed in */
+ kmemleak_not_leak((void *) origin);
+ lc->mcesad = origin | mcesa_origin_lc;
+ return 0;
+}
+
+void nmi_free_per_cpu(struct lowcore *lc)
+{
+ if (!nmi_needs_mcesa())
+ return;
+ kmem_cache_free(mcesa_cache, (void *)(lc->mcesad & MCESA_ORIGIN_MASK));
+}
+
+static notrace void s390_handle_damage(void)
+{
+ smp_emergency_stop();
+ disabled_wait((unsigned long) __builtin_return_address(0));
+ while (1);
+}
+NOKPROBE_SYMBOL(s390_handle_damage);
+
+/*
+ * Main machine check handler function. Will be called with interrupts enabled
+ * or disabled and machine checks enabled or disabled.
+ */
+void s390_handle_mcck(void)
+{
+ unsigned long flags;
+ struct mcck_struct mcck;
+
+ /*
+ * Disable machine checks and get the current state of accumulated
+ * machine checks. Afterwards delete the old state and enable machine
+ * checks again.
+ */
+ local_irq_save(flags);
+ local_mcck_disable();
+ mcck = *this_cpu_ptr(&cpu_mcck);
+ memset(this_cpu_ptr(&cpu_mcck), 0, sizeof(mcck));
+ clear_cpu_flag(CIF_MCCK_PENDING);
+ local_mcck_enable();
+ local_irq_restore(flags);
+
+ if (mcck.channel_report)
+ crw_handle_channel_report();
+ /*
+ * A warning may remain for a prolonged period on the bare iron.
+ * (actually until the machine is powered off, or the problem is gone)
+ * So we just stop listening for the WARNING MCH and avoid continuously
+ * being interrupted. One caveat is however, that we must do this per
+ * processor and cannot use the smp version of ctl_clear_bit().
+ * On VM we only get one interrupt per virtally presented machinecheck.
+ * Though one suffices, we may get one interrupt per (virtual) cpu.
+ */
+ if (mcck.warning) { /* WARNING pending ? */
+ static int mchchk_wng_posted = 0;
+
+ /* Use single cpu clear, as we cannot handle smp here. */
+ __ctl_clear_bit(14, 24); /* Disable WARNING MCH */
+ if (xchg(&mchchk_wng_posted, 1) == 0)
+ kill_cad_pid(SIGPWR, 1);
+ }
+ if (mcck.stp_queue)
+ stp_queue_work();
+ if (mcck.kill_task) {
+ local_irq_enable();
+ printk(KERN_EMERG "mcck: Terminating task because of machine "
+ "malfunction (code 0x%016lx).\n", mcck.mcck_code);
+ printk(KERN_EMERG "mcck: task: %s, pid: %d.\n",
+ current->comm, current->pid);
+ do_exit(SIGSEGV);
+ }
+}
+EXPORT_SYMBOL_GPL(s390_handle_mcck);
+
+/*
+ * returns 0 if all required registers are available
+ * returns 1 otherwise
+ */
+static int notrace s390_check_registers(union mci mci, int umode)
+{
+ union ctlreg2 cr2;
+ int kill_task;
+
+ kill_task = 0;
+
+ if (!mci.gr) {
+ /*
+ * General purpose registers couldn't be restored and have
+ * unknown contents. Stop system or terminate process.
+ */
+ if (!umode)
+ s390_handle_damage();
+ kill_task = 1;
+ }
+ /* Check control registers */
+ if (!mci.cr) {
+ /*
+ * Control registers have unknown contents.
+ * Can't recover and therefore stopping machine.
+ */
+ s390_handle_damage();
+ }
+ if (!mci.fp) {
+ /*
+ * Floating point registers can't be restored. If the
+ * kernel currently uses floating point registers the
+ * system is stopped. If the process has its floating
+ * pointer registers loaded it is terminated.
+ */
+ if (S390_lowcore.fpu_flags & KERNEL_VXR_V0V7)
+ s390_handle_damage();
+ if (!test_cpu_flag(CIF_FPU))
+ kill_task = 1;
+ }
+ if (!mci.fc) {
+ /*
+ * Floating point control register can't be restored.
+ * If the kernel currently uses the floating pointer
+ * registers and needs the FPC register the system is
+ * stopped. If the process has its floating pointer
+ * registers loaded it is terminated.
+ */
+ if (S390_lowcore.fpu_flags & KERNEL_FPC)
+ s390_handle_damage();
+ if (!test_cpu_flag(CIF_FPU))
+ kill_task = 1;
+ }
+
+ if (MACHINE_HAS_VX) {
+ if (!mci.vr) {
+ /*
+ * Vector registers can't be restored. If the kernel
+ * currently uses vector registers the system is
+ * stopped. If the process has its vector registers
+ * loaded it is terminated.
+ */
+ if (S390_lowcore.fpu_flags & KERNEL_VXR)
+ s390_handle_damage();
+ if (!test_cpu_flag(CIF_FPU))
+ kill_task = 1;
+ }
+ }
+ /* Check if access registers are valid */
+ if (!mci.ar) {
+ /*
+ * Access registers have unknown contents.
+ * Terminating task.
+ */
+ kill_task = 1;
+ }
+ /* Check guarded storage registers */
+ cr2.val = S390_lowcore.cregs_save_area[2];
+ if (cr2.gse) {
+ if (!mci.gs) {
+ /*
+ * Guarded storage register can't be restored and
+ * the current processes uses guarded storage.
+ * It has to be terminated.
+ */
+ kill_task = 1;
+ }
+ }
+ /* Check if old PSW is valid */
+ if (!mci.wp) {
+ /*
+ * Can't tell if we come from user or kernel mode
+ * -> stopping machine.
+ */
+ s390_handle_damage();
+ }
+ /* Check for invalid kernel instruction address */
+ if (!mci.ia && !umode) {
+ /*
+ * The instruction address got lost while running
+ * in the kernel -> stopping machine.
+ */
+ s390_handle_damage();
+ }
+
+ if (!mci.ms || !mci.pm || !mci.ia)
+ kill_task = 1;
+
+ return kill_task;
+}
+NOKPROBE_SYMBOL(s390_check_registers);
+
+/*
+ * Backup the guest's machine check info to its description block
+ */
+static void notrace s390_backup_mcck_info(struct pt_regs *regs)
+{
+ struct mcck_volatile_info *mcck_backup;
+ struct sie_page *sie_page;
+
+ /* r14 contains the sie block, which was set in sie64a */
+ struct kvm_s390_sie_block *sie_block =
+ (struct kvm_s390_sie_block *) regs->gprs[14];
+
+ if (sie_block == NULL)
+ /* Something's seriously wrong, stop system. */
+ s390_handle_damage();
+
+ sie_page = container_of(sie_block, struct sie_page, sie_block);
+ mcck_backup = &sie_page->mcck_info;
+ mcck_backup->mcic = S390_lowcore.mcck_interruption_code &
+ ~(MCCK_CODE_CP | MCCK_CODE_EXT_DAMAGE);
+ mcck_backup->ext_damage_code = S390_lowcore.external_damage_code;
+ mcck_backup->failing_storage_address
+ = S390_lowcore.failing_storage_address;
+}
+NOKPROBE_SYMBOL(s390_backup_mcck_info);
+
+#define MAX_IPD_COUNT 29
+#define MAX_IPD_TIME (5 * 60 * USEC_PER_SEC) /* 5 minutes */
+
+#define ED_STP_ISLAND 6 /* External damage STP island check */
+#define ED_STP_SYNC 7 /* External damage STP sync check */
+
+#define MCCK_CODE_NO_GUEST (MCCK_CODE_CP | MCCK_CODE_EXT_DAMAGE)
+
+/*
+ * machine check handler.
+ */
+void notrace s390_do_machine_check(struct pt_regs *regs)
+{
+ static int ipd_count;
+ static DEFINE_SPINLOCK(ipd_lock);
+ static unsigned long long last_ipd;
+ struct mcck_struct *mcck;
+ unsigned long long tmp;
+ union mci mci;
+ unsigned long mcck_dam_code;
+
+ nmi_enter();
+ inc_irq_stat(NMI_NMI);
+ mci.val = S390_lowcore.mcck_interruption_code;
+ mcck = this_cpu_ptr(&cpu_mcck);
+
+ if (mci.sd) {
+ /* System damage -> stopping machine */
+ s390_handle_damage();
+ }
+
+ /*
+ * Reinject the instruction processing damages' machine checks
+ * including Delayed Access Exception into the guest
+ * instead of damaging the host if they happen in the guest.
+ */
+ if (mci.pd && !test_cpu_flag(CIF_MCCK_GUEST)) {
+ if (mci.b) {
+ /* Processing backup -> verify if we can survive this */
+ u64 z_mcic, o_mcic, t_mcic;
+ z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<29);
+ o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 |
+ 1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
+ 1ULL<<30 | 1ULL<<21 | 1ULL<<20 | 1ULL<<17 |
+ 1ULL<<16);
+ t_mcic = mci.val;
+
+ if (((t_mcic & z_mcic) != 0) ||
+ ((t_mcic & o_mcic) != o_mcic)) {
+ s390_handle_damage();
+ }
+
+ /*
+ * Nullifying exigent condition, therefore we might
+ * retry this instruction.
+ */
+ spin_lock(&ipd_lock);
+ tmp = get_tod_clock();
+ if (((tmp - last_ipd) >> 12) < MAX_IPD_TIME)
+ ipd_count++;
+ else
+ ipd_count = 1;
+ last_ipd = tmp;
+ if (ipd_count == MAX_IPD_COUNT)
+ s390_handle_damage();
+ spin_unlock(&ipd_lock);
+ } else {
+ /* Processing damage -> stopping machine */
+ s390_handle_damage();
+ }
+ }
+ if (s390_check_registers(mci, user_mode(regs))) {
+ /*
+ * Couldn't restore all register contents for the
+ * user space process -> mark task for termination.
+ */
+ mcck->kill_task = 1;
+ mcck->mcck_code = mci.val;
+ set_cpu_flag(CIF_MCCK_PENDING);
+ }
+
+ /*
+ * Backup the machine check's info if it happens when the guest
+ * is running.
+ */
+ if (test_cpu_flag(CIF_MCCK_GUEST))
+ s390_backup_mcck_info(regs);
+
+ if (mci.cd) {
+ /* Timing facility damage */
+ s390_handle_damage();
+ }
+ if (mci.ed && mci.ec) {
+ /* External damage */
+ if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC))
+ mcck->stp_queue |= stp_sync_check();
+ if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND))
+ mcck->stp_queue |= stp_island_check();
+ if (mcck->stp_queue)
+ set_cpu_flag(CIF_MCCK_PENDING);
+ }
+
+ /*
+ * Reinject storage related machine checks into the guest if they
+ * happen when the guest is running.
+ */
+ if (!test_cpu_flag(CIF_MCCK_GUEST)) {
+ if (mci.se)
+ /* Storage error uncorrected */
+ s390_handle_damage();
+ if (mci.ke)
+ /* Storage key-error uncorrected */
+ s390_handle_damage();
+ if (mci.ds && mci.fa)
+ /* Storage degradation */
+ s390_handle_damage();
+ }
+ if (mci.cp) {
+ /* Channel report word pending */
+ mcck->channel_report = 1;
+ set_cpu_flag(CIF_MCCK_PENDING);
+ }
+ if (mci.w) {
+ /* Warning pending */
+ mcck->warning = 1;
+ set_cpu_flag(CIF_MCCK_PENDING);
+ }
+
+ /*
+ * If there are only Channel Report Pending and External Damage
+ * machine checks, they will not be reinjected into the guest
+ * because they refer to host conditions only.
+ */
+ mcck_dam_code = (mci.val & MCIC_SUBCLASS_MASK);
+ if (test_cpu_flag(CIF_MCCK_GUEST) &&
+ (mcck_dam_code & MCCK_CODE_NO_GUEST) != mcck_dam_code) {
+ /* Set exit reason code for host's later handling */
+ *((long *)(regs->gprs[15] + __SF_SIE_REASON)) = -EINTR;
+ }
+ clear_cpu_flag(CIF_MCCK_GUEST);
+ nmi_exit();
+}
+NOKPROBE_SYMBOL(s390_do_machine_check);
+
+static int __init machine_check_init(void)
+{
+ ctl_set_bit(14, 25); /* enable external damage MCH */
+ ctl_set_bit(14, 27); /* enable system recovery MCH */
+ ctl_set_bit(14, 24); /* enable warning MCH */
+ return 0;
+}
+early_initcall(machine_check_init);
diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c
new file mode 100644
index 000000000..649135cbe
--- /dev/null
+++ b/arch/s390/kernel/nospec-branch.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/cpu.h>
+#include <asm/nospec-branch.h>
+
+static int __init nobp_setup_early(char *str)
+{
+ bool enabled;
+ int rc;
+
+ rc = kstrtobool(str, &enabled);
+ if (rc)
+ return rc;
+ if (enabled && test_facility(82)) {
+ /*
+ * The user explicitely requested nobp=1, enable it and
+ * disable the expoline support.
+ */
+ __set_facility(82, S390_lowcore.alt_stfle_fac_list);
+ if (IS_ENABLED(CONFIG_EXPOLINE))
+ nospec_disable = 1;
+ } else {
+ __clear_facility(82, S390_lowcore.alt_stfle_fac_list);
+ }
+ return 0;
+}
+early_param("nobp", nobp_setup_early);
+
+static int __init nospec_setup_early(char *str)
+{
+ __clear_facility(82, S390_lowcore.alt_stfle_fac_list);
+ return 0;
+}
+early_param("nospec", nospec_setup_early);
+
+static int __init nospec_report(void)
+{
+ if (test_facility(156))
+ pr_info("Spectre V2 mitigation: etokens\n");
+ if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable)
+ pr_info("Spectre V2 mitigation: execute trampolines\n");
+ if (__test_facility(82, S390_lowcore.alt_stfle_fac_list))
+ pr_info("Spectre V2 mitigation: limited branch prediction\n");
+ return 0;
+}
+arch_initcall(nospec_report);
+
+#ifdef CONFIG_EXPOLINE
+
+int nospec_disable = IS_ENABLED(CONFIG_EXPOLINE_OFF);
+
+static int __init nospectre_v2_setup_early(char *str)
+{
+ nospec_disable = 1;
+ return 0;
+}
+early_param("nospectre_v2", nospectre_v2_setup_early);
+
+void __init nospec_auto_detect(void)
+{
+ if (test_facility(156) || cpu_mitigations_off()) {
+ /*
+ * The machine supports etokens.
+ * Disable expolines and disable nobp.
+ */
+ if (IS_ENABLED(CC_USING_EXPOLINE))
+ nospec_disable = 1;
+ __clear_facility(82, S390_lowcore.alt_stfle_fac_list);
+ } else if (IS_ENABLED(CC_USING_EXPOLINE)) {
+ /*
+ * The kernel has been compiled with expolines.
+ * Keep expolines enabled and disable nobp.
+ */
+ nospec_disable = 0;
+ __clear_facility(82, S390_lowcore.alt_stfle_fac_list);
+ }
+ /*
+ * If the kernel has not been compiled with expolines the
+ * nobp setting decides what is done, this depends on the
+ * CONFIG_KERNEL_NP option and the nobp/nospec parameters.
+ */
+}
+
+static int __init spectre_v2_setup_early(char *str)
+{
+ if (str && !strncmp(str, "on", 2)) {
+ nospec_disable = 0;
+ __clear_facility(82, S390_lowcore.alt_stfle_fac_list);
+ }
+ if (str && !strncmp(str, "off", 3))
+ nospec_disable = 1;
+ if (str && !strncmp(str, "auto", 4))
+ nospec_auto_detect();
+ return 0;
+}
+early_param("spectre_v2", spectre_v2_setup_early);
+
+static void __init_or_module __nospec_revert(s32 *start, s32 *end)
+{
+ enum { BRCL_EXPOLINE, BRASL_EXPOLINE } type;
+ u8 *instr, *thunk, *br;
+ u8 insnbuf[6];
+ s32 *epo;
+
+ /* Second part of the instruction replace is always a nop */
+ for (epo = start; epo < end; epo++) {
+ instr = (u8 *) epo + *epo;
+ if (instr[0] == 0xc0 && (instr[1] & 0x0f) == 0x04)
+ type = BRCL_EXPOLINE; /* brcl instruction */
+ else if (instr[0] == 0xc0 && (instr[1] & 0x0f) == 0x05)
+ type = BRASL_EXPOLINE; /* brasl instruction */
+ else
+ continue;
+ thunk = instr + (*(int *)(instr + 2)) * 2;
+ if (thunk[0] == 0xc6 && thunk[1] == 0x00)
+ /* exrl %r0,<target-br> */
+ br = thunk + (*(int *)(thunk + 2)) * 2;
+ else if (thunk[0] == 0xc0 && (thunk[1] & 0x0f) == 0x00 &&
+ thunk[6] == 0x44 && thunk[7] == 0x00 &&
+ (thunk[8] & 0x0f) == 0x00 && thunk[9] == 0x00 &&
+ (thunk[1] & 0xf0) == (thunk[8] & 0xf0))
+ /* larl %rx,<target br> + ex %r0,0(%rx) */
+ br = thunk + (*(int *)(thunk + 2)) * 2;
+ else
+ continue;
+ /* Check for unconditional branch 0x07f? or 0x47f???? */
+ if ((br[0] & 0xbf) != 0x07 || (br[1] & 0xf0) != 0xf0)
+ continue;
+
+ memcpy(insnbuf + 2, (char[]) { 0x47, 0x00, 0x07, 0x00 }, 4);
+ switch (type) {
+ case BRCL_EXPOLINE:
+ insnbuf[0] = br[0];
+ insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f);
+ if (br[0] == 0x47) {
+ /* brcl to b, replace with bc + nopr */
+ insnbuf[2] = br[2];
+ insnbuf[3] = br[3];
+ } else {
+ /* brcl to br, replace with bcr + nop */
+ }
+ break;
+ case BRASL_EXPOLINE:
+ insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f);
+ if (br[0] == 0x47) {
+ /* brasl to b, replace with bas + nopr */
+ insnbuf[0] = 0x4d;
+ insnbuf[2] = br[2];
+ insnbuf[3] = br[3];
+ } else {
+ /* brasl to br, replace with basr + nop */
+ insnbuf[0] = 0x0d;
+ }
+ break;
+ }
+
+ s390_kernel_write(instr, insnbuf, 6);
+ }
+}
+
+void __init_or_module nospec_revert(s32 *start, s32 *end)
+{
+ if (nospec_disable)
+ __nospec_revert(start, end);
+}
+
+extern s32 __nospec_call_start[], __nospec_call_end[];
+extern s32 __nospec_return_start[], __nospec_return_end[];
+void __init nospec_init_branches(void)
+{
+ nospec_revert(__nospec_call_start, __nospec_call_end);
+ nospec_revert(__nospec_return_start, __nospec_return_end);
+}
+
+#endif /* CONFIG_EXPOLINE */
diff --git a/arch/s390/kernel/nospec-sysfs.c b/arch/s390/kernel/nospec-sysfs.c
new file mode 100644
index 000000000..e30e580ae
--- /dev/null
+++ b/arch/s390/kernel/nospec-sysfs.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/device.h>
+#include <linux/cpu.h>
+#include <asm/facility.h>
+#include <asm/nospec-branch.h>
+
+ssize_t cpu_show_spectre_v1(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ if (test_facility(156))
+ return sprintf(buf, "Mitigation: etokens\n");
+ if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable)
+ return sprintf(buf, "Mitigation: execute trampolines\n");
+ if (__test_facility(82, S390_lowcore.alt_stfle_fac_list))
+ return sprintf(buf, "Mitigation: limited branch prediction\n");
+ return sprintf(buf, "Vulnerable\n");
+}
diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c
new file mode 100644
index 000000000..0a5e4bafb
--- /dev/null
+++ b/arch/s390/kernel/os_info.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * OS info memory interface
+ *
+ * Copyright IBM Corp. 2012
+ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#define KMSG_COMPONENT "os_info"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/crash_dump.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <asm/checksum.h>
+#include <asm/lowcore.h>
+#include <asm/os_info.h>
+
+/*
+ * OS info structure has to be page aligned
+ */
+static struct os_info os_info __page_aligned_data;
+
+/*
+ * Compute checksum over OS info structure
+ */
+u32 os_info_csum(struct os_info *os_info)
+{
+ int size = sizeof(*os_info) - offsetof(struct os_info, version_major);
+ return (__force u32)csum_partial(&os_info->version_major, size, 0);
+}
+
+/*
+ * Add crashkernel info to OS info and update checksum
+ */
+void os_info_crashkernel_add(unsigned long base, unsigned long size)
+{
+ os_info.crashkernel_addr = (u64)(unsigned long)base;
+ os_info.crashkernel_size = (u64)(unsigned long)size;
+ os_info.csum = os_info_csum(&os_info);
+}
+
+/*
+ * Add OS info entry and update checksum
+ */
+void os_info_entry_add(int nr, void *ptr, u64 size)
+{
+ os_info.entry[nr].addr = (u64)(unsigned long)ptr;
+ os_info.entry[nr].size = size;
+ os_info.entry[nr].csum = (__force u32)csum_partial(ptr, size, 0);
+ os_info.csum = os_info_csum(&os_info);
+}
+
+/*
+ * Initialize OS info struture and set lowcore pointer
+ */
+void __init os_info_init(void)
+{
+ void *ptr = &os_info;
+
+ os_info.version_major = OS_INFO_VERSION_MAJOR;
+ os_info.version_minor = OS_INFO_VERSION_MINOR;
+ os_info.magic = OS_INFO_MAGIC;
+ os_info.csum = os_info_csum(&os_info);
+ mem_assign_absolute(S390_lowcore.os_info, (unsigned long) ptr);
+}
+
+#ifdef CONFIG_CRASH_DUMP
+
+static struct os_info *os_info_old;
+
+/*
+ * Allocate and copy OS info entry from oldmem
+ */
+static void os_info_old_alloc(int nr, int align)
+{
+ unsigned long addr, size = 0;
+ char *buf, *buf_align, *msg;
+ u32 csum;
+
+ addr = os_info_old->entry[nr].addr;
+ if (!addr) {
+ msg = "not available";
+ goto fail;
+ }
+ size = os_info_old->entry[nr].size;
+ buf = kmalloc(size + align - 1, GFP_KERNEL);
+ if (!buf) {
+ msg = "alloc failed";
+ goto fail;
+ }
+ buf_align = PTR_ALIGN(buf, align);
+ if (copy_oldmem_kernel(buf_align, (void *) addr, size)) {
+ msg = "copy failed";
+ goto fail_free;
+ }
+ csum = (__force u32)csum_partial(buf_align, size, 0);
+ if (csum != os_info_old->entry[nr].csum) {
+ msg = "checksum failed";
+ goto fail_free;
+ }
+ os_info_old->entry[nr].addr = (u64)(unsigned long)buf_align;
+ msg = "copied";
+ goto out;
+fail_free:
+ kfree(buf);
+fail:
+ os_info_old->entry[nr].addr = 0;
+out:
+ pr_info("entry %i: %s (addr=0x%lx size=%lu)\n",
+ nr, msg, addr, size);
+}
+
+/*
+ * Initialize os info and os info entries from oldmem
+ */
+static void os_info_old_init(void)
+{
+ static int os_info_init;
+ unsigned long addr;
+
+ if (os_info_init)
+ return;
+ if (!OLDMEM_BASE)
+ goto fail;
+ if (copy_oldmem_kernel(&addr, &S390_lowcore.os_info, sizeof(addr)))
+ goto fail;
+ if (addr == 0 || addr % PAGE_SIZE)
+ goto fail;
+ os_info_old = kzalloc(sizeof(*os_info_old), GFP_KERNEL);
+ if (!os_info_old)
+ goto fail;
+ if (copy_oldmem_kernel(os_info_old, (void *) addr,
+ sizeof(*os_info_old)))
+ goto fail_free;
+ if (os_info_old->magic != OS_INFO_MAGIC)
+ goto fail_free;
+ if (os_info_old->csum != os_info_csum(os_info_old))
+ goto fail_free;
+ if (os_info_old->version_major > OS_INFO_VERSION_MAJOR)
+ goto fail_free;
+ os_info_old_alloc(OS_INFO_VMCOREINFO, 1);
+ os_info_old_alloc(OS_INFO_REIPL_BLOCK, 1);
+ pr_info("crashkernel: addr=0x%lx size=%lu\n",
+ (unsigned long) os_info_old->crashkernel_addr,
+ (unsigned long) os_info_old->crashkernel_size);
+ os_info_init = 1;
+ return;
+fail_free:
+ kfree(os_info_old);
+fail:
+ os_info_init = 1;
+ os_info_old = NULL;
+}
+
+/*
+ * Return pointer to os infor entry and its size
+ */
+void *os_info_old_entry(int nr, unsigned long *size)
+{
+ os_info_old_init();
+
+ if (!os_info_old)
+ return NULL;
+ if (!os_info_old->entry[nr].addr)
+ return NULL;
+ *size = (unsigned long) os_info_old->entry[nr].size;
+ return (void *)(unsigned long)os_info_old->entry[nr].addr;
+}
+#endif
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
new file mode 100644
index 000000000..d5523aded
--- /dev/null
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -0,0 +1,724 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Performance event support for s390x - CPU-measurement Counter Facility
+ *
+ * Copyright IBM Corp. 2012, 2017
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+#define KMSG_COMPONENT "cpum_cf"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <asm/ctl_reg.h>
+#include <asm/irq.h>
+#include <asm/cpu_mf.h>
+
+enum cpumf_ctr_set {
+ CPUMF_CTR_SET_BASIC = 0, /* Basic Counter Set */
+ CPUMF_CTR_SET_USER = 1, /* Problem-State Counter Set */
+ CPUMF_CTR_SET_CRYPTO = 2, /* Crypto-Activity Counter Set */
+ CPUMF_CTR_SET_EXT = 3, /* Extended Counter Set */
+ CPUMF_CTR_SET_MT_DIAG = 4, /* MT-diagnostic Counter Set */
+
+ /* Maximum number of counter sets */
+ CPUMF_CTR_SET_MAX,
+};
+
+#define CPUMF_LCCTL_ENABLE_SHIFT 16
+#define CPUMF_LCCTL_ACTCTL_SHIFT 0
+static const u64 cpumf_state_ctl[CPUMF_CTR_SET_MAX] = {
+ [CPUMF_CTR_SET_BASIC] = 0x02,
+ [CPUMF_CTR_SET_USER] = 0x04,
+ [CPUMF_CTR_SET_CRYPTO] = 0x08,
+ [CPUMF_CTR_SET_EXT] = 0x01,
+ [CPUMF_CTR_SET_MT_DIAG] = 0x20,
+};
+
+static void ctr_set_enable(u64 *state, int ctr_set)
+{
+ *state |= cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ENABLE_SHIFT;
+}
+static void ctr_set_disable(u64 *state, int ctr_set)
+{
+ *state &= ~(cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ENABLE_SHIFT);
+}
+static void ctr_set_start(u64 *state, int ctr_set)
+{
+ *state |= cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ACTCTL_SHIFT;
+}
+static void ctr_set_stop(u64 *state, int ctr_set)
+{
+ *state &= ~(cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ACTCTL_SHIFT);
+}
+
+/* Local CPUMF event structure */
+struct cpu_hw_events {
+ struct cpumf_ctr_info info;
+ atomic_t ctr_set[CPUMF_CTR_SET_MAX];
+ u64 state, tx_state;
+ unsigned int flags;
+ unsigned int txn_flags;
+};
+static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
+ .ctr_set = {
+ [CPUMF_CTR_SET_BASIC] = ATOMIC_INIT(0),
+ [CPUMF_CTR_SET_USER] = ATOMIC_INIT(0),
+ [CPUMF_CTR_SET_CRYPTO] = ATOMIC_INIT(0),
+ [CPUMF_CTR_SET_EXT] = ATOMIC_INIT(0),
+ [CPUMF_CTR_SET_MT_DIAG] = ATOMIC_INIT(0),
+ },
+ .state = 0,
+ .flags = 0,
+ .txn_flags = 0,
+};
+
+static enum cpumf_ctr_set get_counter_set(u64 event)
+{
+ int set = CPUMF_CTR_SET_MAX;
+
+ if (event < 32)
+ set = CPUMF_CTR_SET_BASIC;
+ else if (event < 64)
+ set = CPUMF_CTR_SET_USER;
+ else if (event < 128)
+ set = CPUMF_CTR_SET_CRYPTO;
+ else if (event < 256)
+ set = CPUMF_CTR_SET_EXT;
+ else if (event >= 448 && event < 496)
+ set = CPUMF_CTR_SET_MT_DIAG;
+
+ return set;
+}
+
+static int validate_ctr_version(const struct hw_perf_event *hwc)
+{
+ struct cpu_hw_events *cpuhw;
+ int err = 0;
+ u16 mtdiag_ctl;
+
+ cpuhw = &get_cpu_var(cpu_hw_events);
+
+ /* check required version for counter sets */
+ switch (hwc->config_base) {
+ case CPUMF_CTR_SET_BASIC:
+ case CPUMF_CTR_SET_USER:
+ if (cpuhw->info.cfvn < 1)
+ err = -EOPNOTSUPP;
+ break;
+ case CPUMF_CTR_SET_CRYPTO:
+ case CPUMF_CTR_SET_EXT:
+ if (cpuhw->info.csvn < 1)
+ err = -EOPNOTSUPP;
+ if ((cpuhw->info.csvn == 1 && hwc->config > 159) ||
+ (cpuhw->info.csvn == 2 && hwc->config > 175) ||
+ (cpuhw->info.csvn > 2 && hwc->config > 255))
+ err = -EOPNOTSUPP;
+ break;
+ case CPUMF_CTR_SET_MT_DIAG:
+ if (cpuhw->info.csvn <= 3)
+ err = -EOPNOTSUPP;
+ /*
+ * MT-diagnostic counters are read-only. The counter set
+ * is automatically enabled and activated on all CPUs with
+ * multithreading (SMT). Deactivation of multithreading
+ * also disables the counter set. State changes are ignored
+ * by lcctl(). Because Linux controls SMT enablement through
+ * a kernel parameter only, the counter set is either disabled
+ * or enabled and active.
+ *
+ * Thus, the counters can only be used if SMT is on and the
+ * counter set is enabled and active.
+ */
+ mtdiag_ctl = cpumf_state_ctl[CPUMF_CTR_SET_MT_DIAG];
+ if (!((cpuhw->info.auth_ctl & mtdiag_ctl) &&
+ (cpuhw->info.enable_ctl & mtdiag_ctl) &&
+ (cpuhw->info.act_ctl & mtdiag_ctl)))
+ err = -EOPNOTSUPP;
+ break;
+ }
+
+ put_cpu_var(cpu_hw_events);
+ return err;
+}
+
+static int validate_ctr_auth(const struct hw_perf_event *hwc)
+{
+ struct cpu_hw_events *cpuhw;
+ u64 ctrs_state;
+ int err = 0;
+
+ cpuhw = &get_cpu_var(cpu_hw_events);
+
+ /* Check authorization for cpu counter sets.
+ * If the particular CPU counter set is not authorized,
+ * return with -ENOENT in order to fall back to other
+ * PMUs that might suffice the event request.
+ */
+ ctrs_state = cpumf_state_ctl[hwc->config_base];
+ if (!(ctrs_state & cpuhw->info.auth_ctl))
+ err = -ENOENT;
+
+ put_cpu_var(cpu_hw_events);
+ return err;
+}
+
+/*
+ * Change the CPUMF state to active.
+ * Enable and activate the CPU-counter sets according
+ * to the per-cpu control state.
+ */
+static void cpumf_pmu_enable(struct pmu *pmu)
+{
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+ int err;
+
+ if (cpuhw->flags & PMU_F_ENABLED)
+ return;
+
+ err = lcctl(cpuhw->state);
+ if (err) {
+ pr_err("Enabling the performance measuring unit "
+ "failed with rc=%x\n", err);
+ return;
+ }
+
+ cpuhw->flags |= PMU_F_ENABLED;
+}
+
+/*
+ * Change the CPUMF state to inactive.
+ * Disable and enable (inactive) the CPU-counter sets according
+ * to the per-cpu control state.
+ */
+static void cpumf_pmu_disable(struct pmu *pmu)
+{
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+ int err;
+ u64 inactive;
+
+ if (!(cpuhw->flags & PMU_F_ENABLED))
+ return;
+
+ inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1);
+ err = lcctl(inactive);
+ if (err) {
+ pr_err("Disabling the performance measuring unit "
+ "failed with rc=%x\n", err);
+ return;
+ }
+
+ cpuhw->flags &= ~PMU_F_ENABLED;
+}
+
+
+/* Number of perf events counting hardware events */
+static atomic_t num_events = ATOMIC_INIT(0);
+/* Used to avoid races in calling reserve/release_cpumf_hardware */
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+/* CPU-measurement alerts for the counter facility */
+static void cpumf_measurement_alert(struct ext_code ext_code,
+ unsigned int alert, unsigned long unused)
+{
+ struct cpu_hw_events *cpuhw;
+
+ if (!(alert & CPU_MF_INT_CF_MASK))
+ return;
+
+ inc_irq_stat(IRQEXT_CMC);
+ cpuhw = this_cpu_ptr(&cpu_hw_events);
+
+ /* Measurement alerts are shared and might happen when the PMU
+ * is not reserved. Ignore these alerts in this case. */
+ if (!(cpuhw->flags & PMU_F_RESERVED))
+ return;
+
+ /* counter authorization change alert */
+ if (alert & CPU_MF_INT_CF_CACA)
+ qctri(&cpuhw->info);
+
+ /* loss of counter data alert */
+ if (alert & CPU_MF_INT_CF_LCDA)
+ pr_err("CPU[%i] Counter data was lost\n", smp_processor_id());
+
+ /* loss of MT counter data alert */
+ if (alert & CPU_MF_INT_CF_MTDA)
+ pr_warn("CPU[%i] MT counter data was lost\n",
+ smp_processor_id());
+}
+
+#define PMC_INIT 0
+#define PMC_RELEASE 1
+static void setup_pmc_cpu(void *flags)
+{
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+
+ switch (*((int *) flags)) {
+ case PMC_INIT:
+ memset(&cpuhw->info, 0, sizeof(cpuhw->info));
+ qctri(&cpuhw->info);
+ cpuhw->flags |= PMU_F_RESERVED;
+ break;
+
+ case PMC_RELEASE:
+ cpuhw->flags &= ~PMU_F_RESERVED;
+ break;
+ }
+
+ /* Disable CPU counter sets */
+ lcctl(0);
+}
+
+/* Initialize the CPU-measurement facility */
+static int reserve_pmc_hardware(void)
+{
+ int flags = PMC_INIT;
+
+ on_each_cpu(setup_pmc_cpu, &flags, 1);
+ irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+
+ return 0;
+}
+
+/* Release the CPU-measurement facility */
+static void release_pmc_hardware(void)
+{
+ int flags = PMC_RELEASE;
+
+ on_each_cpu(setup_pmc_cpu, &flags, 1);
+ irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+}
+
+/* Release the PMU if event is the last perf event */
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+ if (!atomic_add_unless(&num_events, -1, 1)) {
+ mutex_lock(&pmc_reserve_mutex);
+ if (atomic_dec_return(&num_events) == 0)
+ release_pmc_hardware();
+ mutex_unlock(&pmc_reserve_mutex);
+ }
+}
+
+/* CPUMF <-> perf event mappings for kernel+userspace (basic set) */
+static const int cpumf_generic_events_basic[] = {
+ [PERF_COUNT_HW_CPU_CYCLES] = 0,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 1,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = -1,
+ [PERF_COUNT_HW_CACHE_MISSES] = -1,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1,
+ [PERF_COUNT_HW_BRANCH_MISSES] = -1,
+ [PERF_COUNT_HW_BUS_CYCLES] = -1,
+};
+/* CPUMF <-> perf event mappings for userspace (problem-state set) */
+static const int cpumf_generic_events_user[] = {
+ [PERF_COUNT_HW_CPU_CYCLES] = 32,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 33,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = -1,
+ [PERF_COUNT_HW_CACHE_MISSES] = -1,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1,
+ [PERF_COUNT_HW_BRANCH_MISSES] = -1,
+ [PERF_COUNT_HW_BUS_CYCLES] = -1,
+};
+
+static int __hw_perf_event_init(struct perf_event *event)
+{
+ struct perf_event_attr *attr = &event->attr;
+ struct hw_perf_event *hwc = &event->hw;
+ enum cpumf_ctr_set set;
+ int err;
+ u64 ev;
+
+ switch (attr->type) {
+ case PERF_TYPE_RAW:
+ /* Raw events are used to access counters directly,
+ * hence do not permit excludes */
+ if (attr->exclude_kernel || attr->exclude_user ||
+ attr->exclude_hv)
+ return -EOPNOTSUPP;
+ ev = attr->config;
+ break;
+
+ case PERF_TYPE_HARDWARE:
+ if (is_sampling_event(event)) /* No sampling support */
+ return -ENOENT;
+ ev = attr->config;
+ /* Count user space (problem-state) only */
+ if (!attr->exclude_user && attr->exclude_kernel) {
+ if (ev >= ARRAY_SIZE(cpumf_generic_events_user))
+ return -EOPNOTSUPP;
+ ev = cpumf_generic_events_user[ev];
+
+ /* No support for kernel space counters only */
+ } else if (!attr->exclude_kernel && attr->exclude_user) {
+ return -EOPNOTSUPP;
+
+ /* Count user and kernel space */
+ } else {
+ if (ev >= ARRAY_SIZE(cpumf_generic_events_basic))
+ return -EOPNOTSUPP;
+ ev = cpumf_generic_events_basic[ev];
+ }
+ break;
+
+ default:
+ return -ENOENT;
+ }
+
+ if (ev == -1)
+ return -ENOENT;
+
+ if (ev > PERF_CPUM_CF_MAX_CTR)
+ return -ENOENT;
+
+ /* Obtain the counter set to which the specified counter belongs */
+ set = get_counter_set(ev);
+ switch (set) {
+ case CPUMF_CTR_SET_BASIC:
+ case CPUMF_CTR_SET_USER:
+ case CPUMF_CTR_SET_CRYPTO:
+ case CPUMF_CTR_SET_EXT:
+ case CPUMF_CTR_SET_MT_DIAG:
+ /*
+ * Use the hardware perf event structure to store the
+ * counter number in the 'config' member and the counter
+ * set number in the 'config_base'. The counter set number
+ * is then later used to enable/disable the counter(s).
+ */
+ hwc->config = ev;
+ hwc->config_base = set;
+ break;
+ case CPUMF_CTR_SET_MAX:
+ /* The counter could not be associated to a counter set */
+ return -EINVAL;
+ };
+
+ /* Initialize for using the CPU-measurement counter facility */
+ if (!atomic_inc_not_zero(&num_events)) {
+ mutex_lock(&pmc_reserve_mutex);
+ if (atomic_read(&num_events) == 0 && reserve_pmc_hardware())
+ err = -EBUSY;
+ else
+ atomic_inc(&num_events);
+ mutex_unlock(&pmc_reserve_mutex);
+ }
+ event->destroy = hw_perf_event_destroy;
+
+ /* Finally, validate version and authorization of the counter set */
+ err = validate_ctr_auth(hwc);
+ if (!err)
+ err = validate_ctr_version(hwc);
+
+ return err;
+}
+
+static int cpumf_pmu_event_init(struct perf_event *event)
+{
+ int err;
+
+ switch (event->attr.type) {
+ case PERF_TYPE_HARDWARE:
+ case PERF_TYPE_HW_CACHE:
+ case PERF_TYPE_RAW:
+ err = __hw_perf_event_init(event);
+ break;
+ default:
+ return -ENOENT;
+ }
+
+ if (unlikely(err) && event->destroy)
+ event->destroy(event);
+
+ return err;
+}
+
+static int hw_perf_event_reset(struct perf_event *event)
+{
+ u64 prev, new;
+ int err;
+
+ do {
+ prev = local64_read(&event->hw.prev_count);
+ err = ecctr(event->hw.config, &new);
+ if (err) {
+ if (err != 3)
+ break;
+ /* The counter is not (yet) available. This
+ * might happen if the counter set to which
+ * this counter belongs is in the disabled
+ * state.
+ */
+ new = 0;
+ }
+ } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
+
+ return err;
+}
+
+static void hw_perf_event_update(struct perf_event *event)
+{
+ u64 prev, new, delta;
+ int err;
+
+ do {
+ prev = local64_read(&event->hw.prev_count);
+ err = ecctr(event->hw.config, &new);
+ if (err)
+ return;
+ } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
+
+ delta = (prev <= new) ? new - prev
+ : (-1ULL - prev) + new + 1; /* overflow */
+ local64_add(delta, &event->count);
+}
+
+static void cpumf_pmu_read(struct perf_event *event)
+{
+ if (event->hw.state & PERF_HES_STOPPED)
+ return;
+
+ hw_perf_event_update(event);
+}
+
+static void cpumf_pmu_start(struct perf_event *event, int flags)
+{
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+ return;
+
+ if (WARN_ON_ONCE(hwc->config == -1))
+ return;
+
+ if (flags & PERF_EF_RELOAD)
+ WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+
+ hwc->state = 0;
+
+ /* (Re-)enable and activate the counter set */
+ ctr_set_enable(&cpuhw->state, hwc->config_base);
+ ctr_set_start(&cpuhw->state, hwc->config_base);
+
+ /* The counter set to which this counter belongs can be already active.
+ * Because all counters in a set are active, the event->hw.prev_count
+ * needs to be synchronized. At this point, the counter set can be in
+ * the inactive or disabled state.
+ */
+ hw_perf_event_reset(event);
+
+ /* increment refcount for this counter set */
+ atomic_inc(&cpuhw->ctr_set[hwc->config_base]);
+}
+
+static void cpumf_pmu_stop(struct perf_event *event, int flags)
+{
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (!(hwc->state & PERF_HES_STOPPED)) {
+ /* Decrement reference count for this counter set and if this
+ * is the last used counter in the set, clear activation
+ * control and set the counter set state to inactive.
+ */
+ if (!atomic_dec_return(&cpuhw->ctr_set[hwc->config_base]))
+ ctr_set_stop(&cpuhw->state, hwc->config_base);
+ event->hw.state |= PERF_HES_STOPPED;
+ }
+
+ if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+ hw_perf_event_update(event);
+ event->hw.state |= PERF_HES_UPTODATE;
+ }
+}
+
+static int cpumf_pmu_add(struct perf_event *event, int flags)
+{
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+
+ /* Check authorization for the counter set to which this
+ * counter belongs.
+ * For group events transaction, the authorization check is
+ * done in cpumf_pmu_commit_txn().
+ */
+ if (!(cpuhw->txn_flags & PERF_PMU_TXN_ADD))
+ if (validate_ctr_auth(&event->hw))
+ return -ENOENT;
+
+ ctr_set_enable(&cpuhw->state, event->hw.config_base);
+ event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+ if (flags & PERF_EF_START)
+ cpumf_pmu_start(event, PERF_EF_RELOAD);
+
+ perf_event_update_userpage(event);
+
+ return 0;
+}
+
+static void cpumf_pmu_del(struct perf_event *event, int flags)
+{
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+
+ cpumf_pmu_stop(event, PERF_EF_UPDATE);
+
+ /* Check if any counter in the counter set is still used. If not used,
+ * change the counter set to the disabled state. This also clears the
+ * content of all counters in the set.
+ *
+ * When a new perf event has been added but not yet started, this can
+ * clear enable control and resets all counters in a set. Therefore,
+ * cpumf_pmu_start() always has to reenable a counter set.
+ */
+ if (!atomic_read(&cpuhw->ctr_set[event->hw.config_base]))
+ ctr_set_disable(&cpuhw->state, event->hw.config_base);
+
+ perf_event_update_userpage(event);
+}
+
+/*
+ * Start group events scheduling transaction.
+ * Set flags to perform a single test at commit time.
+ *
+ * We only support PERF_PMU_TXN_ADD transactions. Save the
+ * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD
+ * transactions.
+ */
+static void cpumf_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
+{
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+
+ WARN_ON_ONCE(cpuhw->txn_flags); /* txn already in flight */
+
+ cpuhw->txn_flags = txn_flags;
+ if (txn_flags & ~PERF_PMU_TXN_ADD)
+ return;
+
+ perf_pmu_disable(pmu);
+ cpuhw->tx_state = cpuhw->state;
+}
+
+/*
+ * Stop and cancel a group events scheduling tranctions.
+ * Assumes cpumf_pmu_del() is called for each successful added
+ * cpumf_pmu_add() during the transaction.
+ */
+static void cpumf_pmu_cancel_txn(struct pmu *pmu)
+{
+ unsigned int txn_flags;
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+
+ WARN_ON_ONCE(!cpuhw->txn_flags); /* no txn in flight */
+
+ txn_flags = cpuhw->txn_flags;
+ cpuhw->txn_flags = 0;
+ if (txn_flags & ~PERF_PMU_TXN_ADD)
+ return;
+
+ WARN_ON(cpuhw->tx_state != cpuhw->state);
+
+ perf_pmu_enable(pmu);
+}
+
+/*
+ * Commit the group events scheduling transaction. On success, the
+ * transaction is closed. On error, the transaction is kept open
+ * until cpumf_pmu_cancel_txn() is called.
+ */
+static int cpumf_pmu_commit_txn(struct pmu *pmu)
+{
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+ u64 state;
+
+ WARN_ON_ONCE(!cpuhw->txn_flags); /* no txn in flight */
+
+ if (cpuhw->txn_flags & ~PERF_PMU_TXN_ADD) {
+ cpuhw->txn_flags = 0;
+ return 0;
+ }
+
+ /* check if the updated state can be scheduled */
+ state = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1);
+ state >>= CPUMF_LCCTL_ENABLE_SHIFT;
+ if ((state & cpuhw->info.auth_ctl) != state)
+ return -ENOENT;
+
+ cpuhw->txn_flags = 0;
+ perf_pmu_enable(pmu);
+ return 0;
+}
+
+/* Performance monitoring unit for s390x */
+static struct pmu cpumf_pmu = {
+ .task_ctx_nr = perf_sw_context,
+ .capabilities = PERF_PMU_CAP_NO_INTERRUPT,
+ .pmu_enable = cpumf_pmu_enable,
+ .pmu_disable = cpumf_pmu_disable,
+ .event_init = cpumf_pmu_event_init,
+ .add = cpumf_pmu_add,
+ .del = cpumf_pmu_del,
+ .start = cpumf_pmu_start,
+ .stop = cpumf_pmu_stop,
+ .read = cpumf_pmu_read,
+ .start_txn = cpumf_pmu_start_txn,
+ .commit_txn = cpumf_pmu_commit_txn,
+ .cancel_txn = cpumf_pmu_cancel_txn,
+};
+
+static int cpumf_pmf_setup(unsigned int cpu, int flags)
+{
+ local_irq_disable();
+ setup_pmc_cpu(&flags);
+ local_irq_enable();
+ return 0;
+}
+
+static int s390_pmu_online_cpu(unsigned int cpu)
+{
+ return cpumf_pmf_setup(cpu, PMC_INIT);
+}
+
+static int s390_pmu_offline_cpu(unsigned int cpu)
+{
+ return cpumf_pmf_setup(cpu, PMC_RELEASE);
+}
+
+static int __init cpumf_pmu_init(void)
+{
+ int rc;
+
+ if (!cpum_cf_avail())
+ return -ENODEV;
+
+ /* clear bit 15 of cr0 to unauthorize problem-state to
+ * extract measurement counters */
+ ctl_clear_bit(0, 48);
+
+ /* register handler for measurement-alert interruptions */
+ rc = register_external_irq(EXT_IRQ_MEASURE_ALERT,
+ cpumf_measurement_alert);
+ if (rc) {
+ pr_err("Registering for CPU-measurement alerts "
+ "failed with rc=%i\n", rc);
+ return rc;
+ }
+
+ cpumf_pmu.attr_groups = cpumf_cf_event_group();
+ rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW);
+ if (rc) {
+ pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc);
+ unregister_external_irq(EXT_IRQ_MEASURE_ALERT,
+ cpumf_measurement_alert);
+ return rc;
+ }
+ return cpuhp_setup_state(CPUHP_AP_PERF_S390_CF_ONLINE,
+ "perf/s390/cf:online",
+ s390_pmu_online_cpu, s390_pmu_offline_cpu);
+}
+early_initcall(cpumf_pmu_init);
diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c
new file mode 100644
index 000000000..d63fb3c56
--- /dev/null
+++ b/arch/s390/kernel/perf_cpum_cf_events.c
@@ -0,0 +1,598 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Perf PMU sysfs events attributes for available CPU-measurement counters
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/perf_event.h>
+
+
+/* BEGIN: CPUM_CF COUNTER DEFINITIONS =================================== */
+
+CPUMF_EVENT_ATTR(cf_fvn1, CPU_CYCLES, 0x0000);
+CPUMF_EVENT_ATTR(cf_fvn1, INSTRUCTIONS, 0x0001);
+CPUMF_EVENT_ATTR(cf_fvn1, L1I_DIR_WRITES, 0x0002);
+CPUMF_EVENT_ATTR(cf_fvn1, L1I_PENALTY_CYCLES, 0x0003);
+CPUMF_EVENT_ATTR(cf_fvn1, PROBLEM_STATE_CPU_CYCLES, 0x0020);
+CPUMF_EVENT_ATTR(cf_fvn1, PROBLEM_STATE_INSTRUCTIONS, 0x0021);
+CPUMF_EVENT_ATTR(cf_fvn1, PROBLEM_STATE_L1I_DIR_WRITES, 0x0022);
+CPUMF_EVENT_ATTR(cf_fvn1, PROBLEM_STATE_L1I_PENALTY_CYCLES, 0x0023);
+CPUMF_EVENT_ATTR(cf_fvn1, PROBLEM_STATE_L1D_DIR_WRITES, 0x0024);
+CPUMF_EVENT_ATTR(cf_fvn1, PROBLEM_STATE_L1D_PENALTY_CYCLES, 0x0025);
+CPUMF_EVENT_ATTR(cf_fvn1, L1D_DIR_WRITES, 0x0004);
+CPUMF_EVENT_ATTR(cf_fvn1, L1D_PENALTY_CYCLES, 0x0005);
+CPUMF_EVENT_ATTR(cf_fvn3, CPU_CYCLES, 0x0000);
+CPUMF_EVENT_ATTR(cf_fvn3, INSTRUCTIONS, 0x0001);
+CPUMF_EVENT_ATTR(cf_fvn3, L1I_DIR_WRITES, 0x0002);
+CPUMF_EVENT_ATTR(cf_fvn3, L1I_PENALTY_CYCLES, 0x0003);
+CPUMF_EVENT_ATTR(cf_fvn3, PROBLEM_STATE_CPU_CYCLES, 0x0020);
+CPUMF_EVENT_ATTR(cf_fvn3, PROBLEM_STATE_INSTRUCTIONS, 0x0021);
+CPUMF_EVENT_ATTR(cf_fvn3, L1D_DIR_WRITES, 0x0004);
+CPUMF_EVENT_ATTR(cf_fvn3, L1D_PENALTY_CYCLES, 0x0005);
+CPUMF_EVENT_ATTR(cf_svn_generic, PRNG_FUNCTIONS, 0x0040);
+CPUMF_EVENT_ATTR(cf_svn_generic, PRNG_CYCLES, 0x0041);
+CPUMF_EVENT_ATTR(cf_svn_generic, PRNG_BLOCKED_FUNCTIONS, 0x0042);
+CPUMF_EVENT_ATTR(cf_svn_generic, PRNG_BLOCKED_CYCLES, 0x0043);
+CPUMF_EVENT_ATTR(cf_svn_generic, SHA_FUNCTIONS, 0x0044);
+CPUMF_EVENT_ATTR(cf_svn_generic, SHA_CYCLES, 0x0045);
+CPUMF_EVENT_ATTR(cf_svn_generic, SHA_BLOCKED_FUNCTIONS, 0x0046);
+CPUMF_EVENT_ATTR(cf_svn_generic, SHA_BLOCKED_CYCLES, 0x0047);
+CPUMF_EVENT_ATTR(cf_svn_generic, DEA_FUNCTIONS, 0x0048);
+CPUMF_EVENT_ATTR(cf_svn_generic, DEA_CYCLES, 0x0049);
+CPUMF_EVENT_ATTR(cf_svn_generic, DEA_BLOCKED_FUNCTIONS, 0x004a);
+CPUMF_EVENT_ATTR(cf_svn_generic, DEA_BLOCKED_CYCLES, 0x004b);
+CPUMF_EVENT_ATTR(cf_svn_generic, AES_FUNCTIONS, 0x004c);
+CPUMF_EVENT_ATTR(cf_svn_generic, AES_CYCLES, 0x004d);
+CPUMF_EVENT_ATTR(cf_svn_generic, AES_BLOCKED_FUNCTIONS, 0x004e);
+CPUMF_EVENT_ATTR(cf_svn_generic, AES_BLOCKED_CYCLES, 0x004f);
+CPUMF_EVENT_ATTR(cf_z10, L1I_L2_SOURCED_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z10, L1D_L2_SOURCED_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z10, L1I_L3_LOCAL_WRITES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z10, L1D_L3_LOCAL_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z10, L1I_L3_REMOTE_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_z10, L1D_L3_REMOTE_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_z10, L1D_LMEM_SOURCED_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z10, L1I_LMEM_SOURCED_WRITES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z10, L1D_RO_EXCL_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z10, L1I_CACHELINE_INVALIDATES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z10, ITLB1_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z10, DTLB1_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_z10, TLB2_PTE_WRITES, 0x008c);
+CPUMF_EVENT_ATTR(cf_z10, TLB2_CRSTE_WRITES, 0x008d);
+CPUMF_EVENT_ATTR(cf_z10, TLB2_CRSTE_HPAGE_WRITES, 0x008e);
+CPUMF_EVENT_ATTR(cf_z10, ITLB1_MISSES, 0x0091);
+CPUMF_EVENT_ATTR(cf_z10, DTLB1_MISSES, 0x0092);
+CPUMF_EVENT_ATTR(cf_z10, L2C_STORES_SENT, 0x0093);
+CPUMF_EVENT_ATTR(cf_z196, L1D_L2_SOURCED_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z196, L1I_L2_SOURCED_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z196, DTLB1_MISSES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z196, ITLB1_MISSES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z196, L2C_STORES_SENT, 0x0085);
+CPUMF_EVENT_ATTR(cf_z196, L1D_OFFBOOK_L3_SOURCED_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z196, L1D_ONBOOK_L4_SOURCED_WRITES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z196, L1I_ONBOOK_L4_SOURCED_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z196, L1D_RO_EXCL_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z196, L1D_OFFBOOK_L4_SOURCED_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z196, L1I_OFFBOOK_L4_SOURCED_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_z196, DTLB1_HPAGE_WRITES, 0x008c);
+CPUMF_EVENT_ATTR(cf_z196, L1D_LMEM_SOURCED_WRITES, 0x008d);
+CPUMF_EVENT_ATTR(cf_z196, L1I_LMEM_SOURCED_WRITES, 0x008e);
+CPUMF_EVENT_ATTR(cf_z196, L1I_OFFBOOK_L3_SOURCED_WRITES, 0x008f);
+CPUMF_EVENT_ATTR(cf_z196, DTLB1_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_z196, ITLB1_WRITES, 0x0091);
+CPUMF_EVENT_ATTR(cf_z196, TLB2_PTE_WRITES, 0x0092);
+CPUMF_EVENT_ATTR(cf_z196, TLB2_CRSTE_HPAGE_WRITES, 0x0093);
+CPUMF_EVENT_ATTR(cf_z196, TLB2_CRSTE_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_z196, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0096);
+CPUMF_EVENT_ATTR(cf_z196, L1D_OFFCHIP_L3_SOURCED_WRITES, 0x0098);
+CPUMF_EVENT_ATTR(cf_z196, L1I_ONCHIP_L3_SOURCED_WRITES, 0x0099);
+CPUMF_EVENT_ATTR(cf_z196, L1I_OFFCHIP_L3_SOURCED_WRITES, 0x009b);
+CPUMF_EVENT_ATTR(cf_zec12, DTLB1_MISSES, 0x0080);
+CPUMF_EVENT_ATTR(cf_zec12, ITLB1_MISSES, 0x0081);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_L2I_SOURCED_WRITES, 0x0082);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_L2I_SOURCED_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_L2D_SOURCED_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_zec12, DTLB1_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_LMEM_SOURCED_WRITES, 0x0087);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_LMEM_SOURCED_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_RO_EXCL_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_zec12, DTLB1_HPAGE_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_zec12, ITLB1_WRITES, 0x008c);
+CPUMF_EVENT_ATTR(cf_zec12, TLB2_PTE_WRITES, 0x008d);
+CPUMF_EVENT_ATTR(cf_zec12, TLB2_CRSTE_HPAGE_WRITES, 0x008e);
+CPUMF_EVENT_ATTR(cf_zec12, TLB2_CRSTE_WRITES, 0x008f);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES, 0x0091);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES, 0x0092);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_ONBOOK_L4_SOURCED_WRITES, 0x0093);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L4_SOURCED_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TEND, 0x0095);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0096);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES_IV, 0x0097);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES_IV, 0x0098);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES, 0x0099);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES, 0x009a);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES, 0x009b);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_ONBOOK_L4_SOURCED_WRITES, 0x009c);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L4_SOURCED_WRITES, 0x009d);
+CPUMF_EVENT_ATTR(cf_zec12, TX_C_TEND, 0x009e);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x009f);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES_IV, 0x00a0);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV, 0x00a1);
+CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TABORT, 0x00b1);
+CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_NO_SPECIAL, 0x00b2);
+CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_SPECIAL, 0x00b3);
+CPUMF_EVENT_ATTR(cf_z13, L1D_RO_EXCL_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z13, DTLB1_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z13, DTLB1_MISSES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z13, DTLB1_HPAGE_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z13, DTLB1_GPAGE_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_z13, L1D_L2D_SOURCED_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_z13, ITLB1_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z13, ITLB1_MISSES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z13, L1I_L2I_SOURCED_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z13, TLB2_PTE_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z13, TLB2_CRSTE_HPAGE_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z13, TLB2_CRSTE_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_z13, TX_C_TEND, 0x008c);
+CPUMF_EVENT_ATTR(cf_z13, TX_NC_TEND, 0x008d);
+CPUMF_EVENT_ATTR(cf_z13, L1C_TLB1_MISSES, 0x008f);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0091);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_L4_SOURCED_WRITES, 0x0092);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES_IV, 0x0093);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_L4_SOURCED_WRITES, 0x0095);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES_IV, 0x0096);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES, 0x0097);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_SCOL_L4_SOURCED_WRITES, 0x0098);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV, 0x0099);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES, 0x009a);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_FCOL_L4_SOURCED_WRITES, 0x009b);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV, 0x009c);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES, 0x009d);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_MEM_SOURCED_WRITES, 0x009e);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_MEM_SOURCED_WRITES, 0x009f);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_MEM_SOURCED_WRITES, 0x00a0);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONCHIP_MEM_SOURCED_WRITES, 0x00a1);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES, 0x00a2);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x00a3);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_L4_SOURCED_WRITES, 0x00a4);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES_IV, 0x00a5);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES, 0x00a6);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_L4_SOURCED_WRITES, 0x00a7);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES_IV, 0x00a8);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES, 0x00a9);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_SCOL_L4_SOURCED_WRITES, 0x00aa);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV, 0x00ab);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES, 0x00ac);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_FCOL_L4_SOURCED_WRITES, 0x00ad);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV, 0x00ae);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES, 0x00af);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_MEM_SOURCED_WRITES, 0x00b0);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_MEM_SOURCED_WRITES, 0x00b1);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_MEM_SOURCED_WRITES, 0x00b2);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONCHIP_MEM_SOURCED_WRITES, 0x00b3);
+CPUMF_EVENT_ATTR(cf_z13, TX_NC_TABORT, 0x00da);
+CPUMF_EVENT_ATTR(cf_z13, TX_C_TABORT_NO_SPECIAL, 0x00db);
+CPUMF_EVENT_ATTR(cf_z13, TX_C_TABORT_SPECIAL, 0x00dc);
+CPUMF_EVENT_ATTR(cf_z13, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
+CPUMF_EVENT_ATTR(cf_z13, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
+CPUMF_EVENT_ATTR(cf_z14, L1D_RO_EXCL_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z14, DTLB2_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z14, DTLB2_MISSES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z14, DTLB2_HPAGE_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z14, DTLB2_GPAGE_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_z14, L1D_L2D_SOURCED_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_z14, ITLB2_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z14, ITLB2_MISSES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z14, L1I_L2I_SOURCED_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z14, TLB2_PTE_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z14, TLB2_CRSTE_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z14, TLB2_ENGINES_BUSY, 0x008b);
+CPUMF_EVENT_ATTR(cf_z14, TX_C_TEND, 0x008c);
+CPUMF_EVENT_ATTR(cf_z14, TX_NC_TEND, 0x008d);
+CPUMF_EVENT_ATTR(cf_z14, L1C_TLB2_MISSES, 0x008f);
+CPUMF_EVENT_ATTR(cf_z14, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_z14, L1D_ONCHIP_MEMORY_SOURCED_WRITES, 0x0091);
+CPUMF_EVENT_ATTR(cf_z14, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0092);
+CPUMF_EVENT_ATTR(cf_z14, L1D_ONCLUSTER_L3_SOURCED_WRITES, 0x0093);
+CPUMF_EVENT_ATTR(cf_z14, L1D_ONCLUSTER_MEMORY_SOURCED_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_z14, L1D_ONCLUSTER_L3_SOURCED_WRITES_IV, 0x0095);
+CPUMF_EVENT_ATTR(cf_z14, L1D_OFFCLUSTER_L3_SOURCED_WRITES, 0x0096);
+CPUMF_EVENT_ATTR(cf_z14, L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES, 0x0097);
+CPUMF_EVENT_ATTR(cf_z14, L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV, 0x0098);
+CPUMF_EVENT_ATTR(cf_z14, L1D_OFFDRAWER_L3_SOURCED_WRITES, 0x0099);
+CPUMF_EVENT_ATTR(cf_z14, L1D_OFFDRAWER_MEMORY_SOURCED_WRITES, 0x009a);
+CPUMF_EVENT_ATTR(cf_z14, L1D_OFFDRAWER_L3_SOURCED_WRITES_IV, 0x009b);
+CPUMF_EVENT_ATTR(cf_z14, L1D_ONDRAWER_L4_SOURCED_WRITES, 0x009c);
+CPUMF_EVENT_ATTR(cf_z14, L1D_OFFDRAWER_L4_SOURCED_WRITES, 0x009d);
+CPUMF_EVENT_ATTR(cf_z14, L1D_ONCHIP_L3_SOURCED_WRITES_RO, 0x009e);
+CPUMF_EVENT_ATTR(cf_z14, L1I_ONCHIP_L3_SOURCED_WRITES, 0x00a2);
+CPUMF_EVENT_ATTR(cf_z14, L1I_ONCHIP_MEMORY_SOURCED_WRITES, 0x00a3);
+CPUMF_EVENT_ATTR(cf_z14, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x00a4);
+CPUMF_EVENT_ATTR(cf_z14, L1I_ONCLUSTER_L3_SOURCED_WRITES, 0x00a5);
+CPUMF_EVENT_ATTR(cf_z14, L1I_ONCLUSTER_MEMORY_SOURCED_WRITES, 0x00a6);
+CPUMF_EVENT_ATTR(cf_z14, L1I_ONCLUSTER_L3_SOURCED_WRITES_IV, 0x00a7);
+CPUMF_EVENT_ATTR(cf_z14, L1I_OFFCLUSTER_L3_SOURCED_WRITES, 0x00a8);
+CPUMF_EVENT_ATTR(cf_z14, L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES, 0x00a9);
+CPUMF_EVENT_ATTR(cf_z14, L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV, 0x00aa);
+CPUMF_EVENT_ATTR(cf_z14, L1I_OFFDRAWER_L3_SOURCED_WRITES, 0x00ab);
+CPUMF_EVENT_ATTR(cf_z14, L1I_OFFDRAWER_MEMORY_SOURCED_WRITES, 0x00ac);
+CPUMF_EVENT_ATTR(cf_z14, L1I_OFFDRAWER_L3_SOURCED_WRITES_IV, 0x00ad);
+CPUMF_EVENT_ATTR(cf_z14, L1I_ONDRAWER_L4_SOURCED_WRITES, 0x00ae);
+CPUMF_EVENT_ATTR(cf_z14, L1I_OFFDRAWER_L4_SOURCED_WRITES, 0x00af);
+CPUMF_EVENT_ATTR(cf_z14, BCD_DFP_EXECUTION_SLOTS, 0x00e0);
+CPUMF_EVENT_ATTR(cf_z14, VX_BCD_EXECUTION_SLOTS, 0x00e1);
+CPUMF_EVENT_ATTR(cf_z14, DECIMAL_INSTRUCTIONS, 0x00e2);
+CPUMF_EVENT_ATTR(cf_z14, LAST_HOST_TRANSLATIONS, 0x00e8);
+CPUMF_EVENT_ATTR(cf_z14, TX_NC_TABORT, 0x00f3);
+CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_NO_SPECIAL, 0x00f4);
+CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_SPECIAL, 0x00f5);
+CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
+CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
+
+static struct attribute *cpumcf_fvn1_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_fvn1, CPU_CYCLES),
+ CPUMF_EVENT_PTR(cf_fvn1, INSTRUCTIONS),
+ CPUMF_EVENT_PTR(cf_fvn1, L1I_DIR_WRITES),
+ CPUMF_EVENT_PTR(cf_fvn1, L1I_PENALTY_CYCLES),
+ CPUMF_EVENT_PTR(cf_fvn1, PROBLEM_STATE_CPU_CYCLES),
+ CPUMF_EVENT_PTR(cf_fvn1, PROBLEM_STATE_INSTRUCTIONS),
+ CPUMF_EVENT_PTR(cf_fvn1, PROBLEM_STATE_L1I_DIR_WRITES),
+ CPUMF_EVENT_PTR(cf_fvn1, PROBLEM_STATE_L1I_PENALTY_CYCLES),
+ CPUMF_EVENT_PTR(cf_fvn1, PROBLEM_STATE_L1D_DIR_WRITES),
+ CPUMF_EVENT_PTR(cf_fvn1, PROBLEM_STATE_L1D_PENALTY_CYCLES),
+ CPUMF_EVENT_PTR(cf_fvn1, L1D_DIR_WRITES),
+ CPUMF_EVENT_PTR(cf_fvn1, L1D_PENALTY_CYCLES),
+ NULL,
+};
+
+static struct attribute *cpumcf_fvn3_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_fvn3, CPU_CYCLES),
+ CPUMF_EVENT_PTR(cf_fvn3, INSTRUCTIONS),
+ CPUMF_EVENT_PTR(cf_fvn3, L1I_DIR_WRITES),
+ CPUMF_EVENT_PTR(cf_fvn3, L1I_PENALTY_CYCLES),
+ CPUMF_EVENT_PTR(cf_fvn3, PROBLEM_STATE_CPU_CYCLES),
+ CPUMF_EVENT_PTR(cf_fvn3, PROBLEM_STATE_INSTRUCTIONS),
+ CPUMF_EVENT_PTR(cf_fvn3, L1D_DIR_WRITES),
+ CPUMF_EVENT_PTR(cf_fvn3, L1D_PENALTY_CYCLES),
+ NULL,
+};
+
+static struct attribute *cpumcf_svn_generic_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_svn_generic, PRNG_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_generic, PRNG_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_generic, PRNG_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_generic, PRNG_BLOCKED_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_generic, SHA_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_generic, SHA_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_generic, SHA_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_generic, SHA_BLOCKED_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_generic, DEA_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_generic, DEA_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_generic, DEA_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_generic, DEA_BLOCKED_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_generic, AES_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_generic, AES_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_generic, AES_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_generic, AES_BLOCKED_CYCLES),
+ NULL,
+};
+
+static struct attribute *cpumcf_z10_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_z10, L1I_L2_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1D_L2_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1I_L3_LOCAL_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1D_L3_LOCAL_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1I_L3_REMOTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1D_L3_REMOTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1D_LMEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1I_LMEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1D_RO_EXCL_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1I_CACHELINE_INVALIDATES),
+ CPUMF_EVENT_PTR(cf_z10, ITLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, DTLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, TLB2_PTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, TLB2_CRSTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, TLB2_CRSTE_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, ITLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_z10, DTLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_z10, L2C_STORES_SENT),
+ NULL,
+};
+
+static struct attribute *cpumcf_z196_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_z196, L1D_L2_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_L2_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, DTLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_z196, ITLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_z196, L2C_STORES_SENT),
+ CPUMF_EVENT_PTR(cf_z196, L1D_OFFBOOK_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1D_ONBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_ONBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1D_RO_EXCL_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1D_OFFBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_OFFBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, DTLB1_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1D_LMEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_LMEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_OFFBOOK_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, DTLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, ITLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, TLB2_PTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, TLB2_CRSTE_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, TLB2_CRSTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1D_ONCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1D_OFFCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_ONCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_OFFCHIP_L3_SOURCED_WRITES),
+ NULL,
+};
+
+static struct attribute *cpumcf_zec12_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_zec12, DTLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_zec12, ITLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_L2I_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_L2I_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_L2D_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, DTLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_LMEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_LMEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_RO_EXCL_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, DTLB1_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, ITLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, TLB2_PTE_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, TLB2_CRSTE_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, TLB2_CRSTE_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_ONBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, TX_NC_TEND),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_ONBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, TX_C_TEND),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_zec12, TX_NC_TABORT),
+ CPUMF_EVENT_PTR(cf_zec12, TX_C_TABORT_NO_SPECIAL),
+ CPUMF_EVENT_PTR(cf_zec12, TX_C_TABORT_SPECIAL),
+ NULL,
+};
+
+static struct attribute *cpumcf_z13_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_z13, L1D_RO_EXCL_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, DTLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, DTLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_z13, DTLB1_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, DTLB1_GPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_L2D_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, ITLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, ITLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_L2I_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, TLB2_PTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, TLB2_CRSTE_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, TLB2_CRSTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, TX_C_TEND),
+ CPUMF_EVENT_PTR(cf_z13, TX_NC_TEND),
+ CPUMF_EVENT_PTR(cf_z13, L1C_TLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_SCOL_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_FCOL_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_MEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_MEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_MEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1D_ONCHIP_MEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_SCOL_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_FCOL_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_MEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_MEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_MEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, L1I_ONCHIP_MEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z13, TX_NC_TABORT),
+ CPUMF_EVENT_PTR(cf_z13, TX_C_TABORT_NO_SPECIAL),
+ CPUMF_EVENT_PTR(cf_z13, TX_C_TABORT_SPECIAL),
+ CPUMF_EVENT_PTR(cf_z13, MT_DIAG_CYCLES_ONE_THR_ACTIVE),
+ CPUMF_EVENT_PTR(cf_z13, MT_DIAG_CYCLES_TWO_THR_ACTIVE),
+ NULL,
+};
+
+static struct attribute *cpumcf_z14_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_z14, L1D_RO_EXCL_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, DTLB2_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, DTLB2_MISSES),
+ CPUMF_EVENT_PTR(cf_z14, DTLB2_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, DTLB2_GPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, L1D_L2D_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, ITLB2_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, ITLB2_MISSES),
+ CPUMF_EVENT_PTR(cf_z14, L1I_L2I_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, TLB2_PTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, TLB2_CRSTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, TLB2_ENGINES_BUSY),
+ CPUMF_EVENT_PTR(cf_z14, TX_C_TEND),
+ CPUMF_EVENT_PTR(cf_z14, TX_NC_TEND),
+ CPUMF_EVENT_PTR(cf_z14, L1C_TLB2_MISSES),
+ CPUMF_EVENT_PTR(cf_z14, L1D_ONCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, L1D_ONCHIP_MEMORY_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, L1D_ONCHIP_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z14, L1D_ONCLUSTER_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, L1D_ONCLUSTER_MEMORY_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, L1D_ONCLUSTER_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z14, L1D_OFFCLUSTER_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z14, L1D_OFFDRAWER_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, L1D_OFFDRAWER_MEMORY_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, L1D_OFFDRAWER_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z14, L1D_ONDRAWER_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, L1D_OFFDRAWER_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, L1D_ONCHIP_L3_SOURCED_WRITES_RO),
+ CPUMF_EVENT_PTR(cf_z14, L1I_ONCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, L1I_ONCHIP_MEMORY_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, L1I_ONCHIP_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z14, L1I_ONCLUSTER_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, L1I_ONCLUSTER_MEMORY_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, L1I_ONCLUSTER_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z14, L1I_OFFCLUSTER_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z14, L1I_OFFDRAWER_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, L1I_OFFDRAWER_MEMORY_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, L1I_OFFDRAWER_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z14, L1I_ONDRAWER_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, L1I_OFFDRAWER_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z14, BCD_DFP_EXECUTION_SLOTS),
+ CPUMF_EVENT_PTR(cf_z14, VX_BCD_EXECUTION_SLOTS),
+ CPUMF_EVENT_PTR(cf_z14, DECIMAL_INSTRUCTIONS),
+ CPUMF_EVENT_PTR(cf_z14, LAST_HOST_TRANSLATIONS),
+ CPUMF_EVENT_PTR(cf_z14, TX_NC_TABORT),
+ CPUMF_EVENT_PTR(cf_z14, TX_C_TABORT_NO_SPECIAL),
+ CPUMF_EVENT_PTR(cf_z14, TX_C_TABORT_SPECIAL),
+ CPUMF_EVENT_PTR(cf_z14, MT_DIAG_CYCLES_ONE_THR_ACTIVE),
+ CPUMF_EVENT_PTR(cf_z14, MT_DIAG_CYCLES_TWO_THR_ACTIVE),
+ NULL,
+};
+
+/* END: CPUM_CF COUNTER DEFINITIONS ===================================== */
+
+static struct attribute_group cpumcf_pmu_events_group = {
+ .name = "events",
+};
+
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *cpumcf_pmu_format_attr[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group cpumcf_pmu_format_group = {
+ .name = "format",
+ .attrs = cpumcf_pmu_format_attr,
+};
+
+static const struct attribute_group *cpumcf_pmu_attr_groups[] = {
+ &cpumcf_pmu_events_group,
+ &cpumcf_pmu_format_group,
+ NULL,
+};
+
+
+static __init struct attribute **merge_attr(struct attribute **a,
+ struct attribute **b,
+ struct attribute **c)
+{
+ struct attribute **new;
+ int j, i;
+
+ for (j = 0; a[j]; j++)
+ ;
+ for (i = 0; b[i]; i++)
+ j++;
+ for (i = 0; c[i]; i++)
+ j++;
+ j++;
+
+ new = kmalloc_array(j, sizeof(struct attribute *), GFP_KERNEL);
+ if (!new)
+ return NULL;
+ j = 0;
+ for (i = 0; a[i]; i++)
+ new[j++] = a[i];
+ for (i = 0; b[i]; i++)
+ new[j++] = b[i];
+ for (i = 0; c[i]; i++)
+ new[j++] = c[i];
+ new[j] = NULL;
+
+ return new;
+}
+
+__init const struct attribute_group **cpumf_cf_event_group(void)
+{
+ struct attribute **combined, **model, **cfvn, **csvn;
+ struct attribute *none[] = { NULL };
+ struct cpumf_ctr_info ci;
+ struct cpuid cpu_id;
+
+ /* Determine generic counters set(s) */
+ qctri(&ci);
+ switch (ci.cfvn) {
+ case 1:
+ cfvn = cpumcf_fvn1_pmu_event_attr;
+ break;
+ case 3:
+ cfvn = cpumcf_fvn3_pmu_event_attr;
+ break;
+ default:
+ cfvn = none;
+ }
+ csvn = cpumcf_svn_generic_pmu_event_attr;
+
+ /* Determine model-specific counter set(s) */
+ get_cpu_id(&cpu_id);
+ switch (cpu_id.machine) {
+ case 0x2097:
+ case 0x2098:
+ model = cpumcf_z10_pmu_event_attr;
+ break;
+ case 0x2817:
+ case 0x2818:
+ model = cpumcf_z196_pmu_event_attr;
+ break;
+ case 0x2827:
+ case 0x2828:
+ model = cpumcf_zec12_pmu_event_attr;
+ break;
+ case 0x2964:
+ case 0x2965:
+ model = cpumcf_z13_pmu_event_attr;
+ break;
+ case 0x3906:
+ case 0x3907:
+ model = cpumcf_z14_pmu_event_attr;
+ break;
+ default:
+ model = none;
+ break;
+ }
+
+ combined = merge_attr(cfvn, csvn, model);
+ if (combined)
+ cpumcf_pmu_events_group.attrs = combined;
+ return cpumcf_pmu_attr_groups;
+}
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
new file mode 100644
index 000000000..c8e1e3252
--- /dev/null
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -0,0 +1,2100 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Performance event support for the System z CPU-measurement Sampling Facility
+ *
+ * Copyright IBM Corp. 2013, 2018
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+#define KMSG_COMPONENT "cpum_sf"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/pid.h>
+#include <linux/notifier.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/moduleparam.h>
+#include <asm/cpu_mf.h>
+#include <asm/irq.h>
+#include <asm/debug.h>
+#include <asm/timex.h>
+
+/* Minimum number of sample-data-block-tables:
+ * At least one table is required for the sampling buffer structure.
+ * A single table contains up to 511 pointers to sample-data-blocks.
+ */
+#define CPUM_SF_MIN_SDBT 1
+
+/* Number of sample-data-blocks per sample-data-block-table (SDBT):
+ * A table contains SDB pointers (8 bytes) and one table-link entry
+ * that points to the origin of the next SDBT.
+ */
+#define CPUM_SF_SDB_PER_TABLE ((PAGE_SIZE - 8) / 8)
+
+/* Maximum page offset for an SDBT table-link entry:
+ * If this page offset is reached, a table-link entry to the next SDBT
+ * must be added.
+ */
+#define CPUM_SF_SDBT_TL_OFFSET (CPUM_SF_SDB_PER_TABLE * 8)
+static inline int require_table_link(const void *sdbt)
+{
+ return ((unsigned long) sdbt & ~PAGE_MASK) == CPUM_SF_SDBT_TL_OFFSET;
+}
+
+/* Minimum and maximum sampling buffer sizes:
+ *
+ * This number represents the maximum size of the sampling buffer taking
+ * the number of sample-data-block-tables into account. Note that these
+ * numbers apply to the basic-sampling function only.
+ * The maximum number of SDBs is increased by CPUM_SF_SDB_DIAG_FACTOR if
+ * the diagnostic-sampling function is active.
+ *
+ * Sampling buffer size Buffer characteristics
+ * ---------------------------------------------------
+ * 64KB == 16 pages (4KB per page)
+ * 1 page for SDB-tables
+ * 15 pages for SDBs
+ *
+ * 32MB == 8192 pages (4KB per page)
+ * 16 pages for SDB-tables
+ * 8176 pages for SDBs
+ */
+static unsigned long __read_mostly CPUM_SF_MIN_SDB = 15;
+static unsigned long __read_mostly CPUM_SF_MAX_SDB = 8176;
+static unsigned long __read_mostly CPUM_SF_SDB_DIAG_FACTOR = 1;
+
+struct sf_buffer {
+ unsigned long *sdbt; /* Sample-data-block-table origin */
+ /* buffer characteristics (required for buffer increments) */
+ unsigned long num_sdb; /* Number of sample-data-blocks */
+ unsigned long num_sdbt; /* Number of sample-data-block-tables */
+ unsigned long *tail; /* last sample-data-block-table */
+};
+
+struct aux_buffer {
+ struct sf_buffer sfb;
+ unsigned long head; /* index of SDB of buffer head */
+ unsigned long alert_mark; /* index of SDB of alert request position */
+ unsigned long empty_mark; /* mark of SDB not marked full */
+ unsigned long *sdb_index; /* SDB address for fast lookup */
+ unsigned long *sdbt_index; /* SDBT address for fast lookup */
+};
+
+struct cpu_hw_sf {
+ /* CPU-measurement sampling information block */
+ struct hws_qsi_info_block qsi;
+ /* CPU-measurement sampling control block */
+ struct hws_lsctl_request_block lsctl;
+ struct sf_buffer sfb; /* Sampling buffer */
+ unsigned int flags; /* Status flags */
+ struct perf_event *event; /* Scheduled perf event */
+ struct perf_output_handle handle; /* AUX buffer output handle */
+};
+static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf);
+
+/* Debug feature */
+static debug_info_t *sfdbg;
+
+/*
+ * sf_disable() - Switch off sampling facility
+ */
+static int sf_disable(void)
+{
+ struct hws_lsctl_request_block sreq;
+
+ memset(&sreq, 0, sizeof(sreq));
+ return lsctl(&sreq);
+}
+
+/*
+ * sf_buffer_available() - Check for an allocated sampling buffer
+ */
+static int sf_buffer_available(struct cpu_hw_sf *cpuhw)
+{
+ return !!cpuhw->sfb.sdbt;
+}
+
+/*
+ * deallocate sampling facility buffer
+ */
+static void free_sampling_buffer(struct sf_buffer *sfb)
+{
+ unsigned long *sdbt, *curr;
+
+ if (!sfb->sdbt)
+ return;
+
+ sdbt = sfb->sdbt;
+ curr = sdbt;
+
+ /* Free the SDBT after all SDBs are processed... */
+ while (1) {
+ if (!*curr || !sdbt)
+ break;
+
+ /* Process table-link entries */
+ if (is_link_entry(curr)) {
+ curr = get_next_sdbt(curr);
+ if (sdbt)
+ free_page((unsigned long) sdbt);
+
+ /* If the origin is reached, sampling buffer is freed */
+ if (curr == sfb->sdbt)
+ break;
+ else
+ sdbt = curr;
+ } else {
+ /* Process SDB pointer */
+ if (*curr) {
+ free_page(*curr);
+ curr++;
+ }
+ }
+ }
+
+ debug_sprintf_event(sfdbg, 5,
+ "free_sampling_buffer: freed sdbt=%p\n", sfb->sdbt);
+ memset(sfb, 0, sizeof(*sfb));
+}
+
+static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags)
+{
+ unsigned long sdb, *trailer;
+
+ /* Allocate and initialize sample-data-block */
+ sdb = get_zeroed_page(gfp_flags);
+ if (!sdb)
+ return -ENOMEM;
+ trailer = trailer_entry_ptr(sdb);
+ *trailer = SDB_TE_ALERT_REQ_MASK;
+
+ /* Link SDB into the sample-data-block-table */
+ *sdbt = sdb;
+
+ return 0;
+}
+
+/*
+ * realloc_sampling_buffer() - extend sampler memory
+ *
+ * Allocates new sample-data-blocks and adds them to the specified sampling
+ * buffer memory.
+ *
+ * Important: This modifies the sampling buffer and must be called when the
+ * sampling facility is disabled.
+ *
+ * Returns zero on success, non-zero otherwise.
+ */
+static int realloc_sampling_buffer(struct sf_buffer *sfb,
+ unsigned long num_sdb, gfp_t gfp_flags)
+{
+ int i, rc;
+ unsigned long *new, *tail, *tail_prev = NULL;
+
+ if (!sfb->sdbt || !sfb->tail)
+ return -EINVAL;
+
+ if (!is_link_entry(sfb->tail))
+ return -EINVAL;
+
+ /* Append to the existing sampling buffer, overwriting the table-link
+ * register.
+ * The tail variables always points to the "tail" (last and table-link)
+ * entry in an SDB-table.
+ */
+ tail = sfb->tail;
+
+ /* Do a sanity check whether the table-link entry points to
+ * the sampling buffer origin.
+ */
+ if (sfb->sdbt != get_next_sdbt(tail)) {
+ debug_sprintf_event(sfdbg, 3, "realloc_sampling_buffer: "
+ "sampling buffer is not linked: origin=%p"
+ "tail=%p\n",
+ (void *) sfb->sdbt, (void *) tail);
+ return -EINVAL;
+ }
+
+ /* Allocate remaining SDBs */
+ rc = 0;
+ for (i = 0; i < num_sdb; i++) {
+ /* Allocate a new SDB-table if it is full. */
+ if (require_table_link(tail)) {
+ new = (unsigned long *) get_zeroed_page(gfp_flags);
+ if (!new) {
+ rc = -ENOMEM;
+ break;
+ }
+ sfb->num_sdbt++;
+ /* Link current page to tail of chain */
+ *tail = (unsigned long)(void *) new + 1;
+ tail_prev = tail;
+ tail = new;
+ }
+
+ /* Allocate a new sample-data-block.
+ * If there is not enough memory, stop the realloc process
+ * and simply use what was allocated. If this is a temporary
+ * issue, a new realloc call (if required) might succeed.
+ */
+ rc = alloc_sample_data_block(tail, gfp_flags);
+ if (rc) {
+ /* Undo last SDBT. An SDBT with no SDB at its first
+ * entry but with an SDBT entry instead can not be
+ * handled by the interrupt handler code.
+ * Avoid this situation.
+ */
+ if (tail_prev) {
+ sfb->num_sdbt--;
+ free_page((unsigned long) new);
+ tail = tail_prev;
+ }
+ break;
+ }
+ sfb->num_sdb++;
+ tail++;
+ tail_prev = new = NULL; /* Allocated at least one SBD */
+ }
+
+ /* Link sampling buffer to its origin */
+ *tail = (unsigned long) sfb->sdbt + 1;
+ sfb->tail = tail;
+
+ debug_sprintf_event(sfdbg, 4, "realloc_sampling_buffer: new buffer"
+ " settings: sdbt=%lu sdb=%lu\n",
+ sfb->num_sdbt, sfb->num_sdb);
+ return rc;
+}
+
+/*
+ * allocate_sampling_buffer() - allocate sampler memory
+ *
+ * Allocates and initializes a sampling buffer structure using the
+ * specified number of sample-data-blocks (SDB). For each allocation,
+ * a 4K page is used. The number of sample-data-block-tables (SDBT)
+ * are calculated from SDBs.
+ * Also set the ALERT_REQ mask in each SDBs trailer.
+ *
+ * Returns zero on success, non-zero otherwise.
+ */
+static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb)
+{
+ int rc;
+
+ if (sfb->sdbt)
+ return -EINVAL;
+
+ /* Allocate the sample-data-block-table origin */
+ sfb->sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL);
+ if (!sfb->sdbt)
+ return -ENOMEM;
+ sfb->num_sdb = 0;
+ sfb->num_sdbt = 1;
+
+ /* Link the table origin to point to itself to prepare for
+ * realloc_sampling_buffer() invocation.
+ */
+ sfb->tail = sfb->sdbt;
+ *sfb->tail = (unsigned long)(void *) sfb->sdbt + 1;
+
+ /* Allocate requested number of sample-data-blocks */
+ rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL);
+ if (rc) {
+ free_sampling_buffer(sfb);
+ debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: "
+ "realloc_sampling_buffer failed with rc=%i\n", rc);
+ } else
+ debug_sprintf_event(sfdbg, 4,
+ "alloc_sampling_buffer: tear=%p dear=%p\n",
+ sfb->sdbt, (void *) *sfb->sdbt);
+ return rc;
+}
+
+static void sfb_set_limits(unsigned long min, unsigned long max)
+{
+ struct hws_qsi_info_block si;
+
+ CPUM_SF_MIN_SDB = min;
+ CPUM_SF_MAX_SDB = max;
+
+ memset(&si, 0, sizeof(si));
+ if (!qsi(&si))
+ CPUM_SF_SDB_DIAG_FACTOR = DIV_ROUND_UP(si.dsdes, si.bsdes);
+}
+
+static unsigned long sfb_max_limit(struct hw_perf_event *hwc)
+{
+ return SAMPL_DIAG_MODE(hwc) ? CPUM_SF_MAX_SDB * CPUM_SF_SDB_DIAG_FACTOR
+ : CPUM_SF_MAX_SDB;
+}
+
+static unsigned long sfb_pending_allocs(struct sf_buffer *sfb,
+ struct hw_perf_event *hwc)
+{
+ if (!sfb->sdbt)
+ return SFB_ALLOC_REG(hwc);
+ if (SFB_ALLOC_REG(hwc) > sfb->num_sdb)
+ return SFB_ALLOC_REG(hwc) - sfb->num_sdb;
+ return 0;
+}
+
+static int sfb_has_pending_allocs(struct sf_buffer *sfb,
+ struct hw_perf_event *hwc)
+{
+ return sfb_pending_allocs(sfb, hwc) > 0;
+}
+
+static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc)
+{
+ /* Limit the number of SDBs to not exceed the maximum */
+ num = min_t(unsigned long, num, sfb_max_limit(hwc) - SFB_ALLOC_REG(hwc));
+ if (num)
+ SFB_ALLOC_REG(hwc) += num;
+}
+
+static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc)
+{
+ SFB_ALLOC_REG(hwc) = 0;
+ sfb_account_allocs(num, hwc);
+}
+
+static void deallocate_buffers(struct cpu_hw_sf *cpuhw)
+{
+ if (cpuhw->sfb.sdbt)
+ free_sampling_buffer(&cpuhw->sfb);
+}
+
+static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
+{
+ unsigned long n_sdb, freq, factor;
+ size_t sample_size;
+
+ /* Calculate sampling buffers using 4K pages
+ *
+ * 1. Determine the sample data size which depends on the used
+ * sampling functions, for example, basic-sampling or
+ * basic-sampling with diagnostic-sampling.
+ *
+ * 2. Use the sampling frequency as input. The sampling buffer is
+ * designed for almost one second. This can be adjusted through
+ * the "factor" variable.
+ * In any case, alloc_sampling_buffer() sets the Alert Request
+ * Control indicator to trigger a measurement-alert to harvest
+ * sample-data-blocks (sdb).
+ *
+ * 3. Compute the number of sample-data-blocks and ensure a minimum
+ * of CPUM_SF_MIN_SDB. Also ensure the upper limit does not
+ * exceed a "calculated" maximum. The symbolic maximum is
+ * designed for basic-sampling only and needs to be increased if
+ * diagnostic-sampling is active.
+ * See also the remarks for these symbolic constants.
+ *
+ * 4. Compute the number of sample-data-block-tables (SDBT) and
+ * ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up
+ * to 511 SDBs).
+ */
+ sample_size = sizeof(struct hws_basic_entry);
+ freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc));
+ factor = 1;
+ n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / sample_size));
+ if (n_sdb < CPUM_SF_MIN_SDB)
+ n_sdb = CPUM_SF_MIN_SDB;
+
+ /* If there is already a sampling buffer allocated, it is very likely
+ * that the sampling facility is enabled too. If the event to be
+ * initialized requires a greater sampling buffer, the allocation must
+ * be postponed. Changing the sampling buffer requires the sampling
+ * facility to be in the disabled state. So, account the number of
+ * required SDBs and let cpumsf_pmu_enable() resize the buffer just
+ * before the event is started.
+ */
+ sfb_init_allocs(n_sdb, hwc);
+ if (sf_buffer_available(cpuhw))
+ return 0;
+
+ debug_sprintf_event(sfdbg, 3,
+ "allocate_buffers: rate=%lu f=%lu sdb=%lu/%lu"
+ " sample_size=%lu cpuhw=%p\n",
+ SAMPL_RATE(hwc), freq, n_sdb, sfb_max_limit(hwc),
+ sample_size, cpuhw);
+
+ return alloc_sampling_buffer(&cpuhw->sfb,
+ sfb_pending_allocs(&cpuhw->sfb, hwc));
+}
+
+static unsigned long min_percent(unsigned int percent, unsigned long base,
+ unsigned long min)
+{
+ return min_t(unsigned long, min, DIV_ROUND_UP(percent * base, 100));
+}
+
+static unsigned long compute_sfb_extent(unsigned long ratio, unsigned long base)
+{
+ /* Use a percentage-based approach to extend the sampling facility
+ * buffer. Accept up to 5% sample data loss.
+ * Vary the extents between 1% to 5% of the current number of
+ * sample-data-blocks.
+ */
+ if (ratio <= 5)
+ return 0;
+ if (ratio <= 25)
+ return min_percent(1, base, 1);
+ if (ratio <= 50)
+ return min_percent(1, base, 1);
+ if (ratio <= 75)
+ return min_percent(2, base, 2);
+ if (ratio <= 100)
+ return min_percent(3, base, 3);
+ if (ratio <= 250)
+ return min_percent(4, base, 4);
+
+ return min_percent(5, base, 8);
+}
+
+static void sfb_account_overflows(struct cpu_hw_sf *cpuhw,
+ struct hw_perf_event *hwc)
+{
+ unsigned long ratio, num;
+
+ if (!OVERFLOW_REG(hwc))
+ return;
+
+ /* The sample_overflow contains the average number of sample data
+ * that has been lost because sample-data-blocks were full.
+ *
+ * Calculate the total number of sample data entries that has been
+ * discarded. Then calculate the ratio of lost samples to total samples
+ * per second in percent.
+ */
+ ratio = DIV_ROUND_UP(100 * OVERFLOW_REG(hwc) * cpuhw->sfb.num_sdb,
+ sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc)));
+
+ /* Compute number of sample-data-blocks */
+ num = compute_sfb_extent(ratio, cpuhw->sfb.num_sdb);
+ if (num)
+ sfb_account_allocs(num, hwc);
+
+ debug_sprintf_event(sfdbg, 5, "sfb: overflow: overflow=%llu ratio=%lu"
+ " num=%lu\n", OVERFLOW_REG(hwc), ratio, num);
+ OVERFLOW_REG(hwc) = 0;
+}
+
+/* extend_sampling_buffer() - Extend sampling buffer
+ * @sfb: Sampling buffer structure (for local CPU)
+ * @hwc: Perf event hardware structure
+ *
+ * Use this function to extend the sampling buffer based on the overflow counter
+ * and postponed allocation extents stored in the specified Perf event hardware.
+ *
+ * Important: This function disables the sampling facility in order to safely
+ * change the sampling buffer structure. Do not call this function
+ * when the PMU is active.
+ */
+static void extend_sampling_buffer(struct sf_buffer *sfb,
+ struct hw_perf_event *hwc)
+{
+ unsigned long num, num_old;
+ int rc;
+
+ num = sfb_pending_allocs(sfb, hwc);
+ if (!num)
+ return;
+ num_old = sfb->num_sdb;
+
+ /* Disable the sampling facility to reset any states and also
+ * clear pending measurement alerts.
+ */
+ sf_disable();
+
+ /* Extend the sampling buffer.
+ * This memory allocation typically happens in an atomic context when
+ * called by perf. Because this is a reallocation, it is fine if the
+ * new SDB-request cannot be satisfied immediately.
+ */
+ rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC);
+ if (rc)
+ debug_sprintf_event(sfdbg, 5, "sfb: extend: realloc "
+ "failed with rc=%i\n", rc);
+
+ if (sfb_has_pending_allocs(sfb, hwc))
+ debug_sprintf_event(sfdbg, 5, "sfb: extend: "
+ "req=%lu alloc=%lu remaining=%lu\n",
+ num, sfb->num_sdb - num_old,
+ sfb_pending_allocs(sfb, hwc));
+}
+
+
+/* Number of perf events counting hardware events */
+static atomic_t num_events;
+/* Used to avoid races in calling reserve/release_cpumf_hardware */
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+#define PMC_INIT 0
+#define PMC_RELEASE 1
+#define PMC_FAILURE 2
+static void setup_pmc_cpu(void *flags)
+{
+ int err;
+ struct cpu_hw_sf *cpusf = this_cpu_ptr(&cpu_hw_sf);
+
+ err = 0;
+ switch (*((int *) flags)) {
+ case PMC_INIT:
+ memset(cpusf, 0, sizeof(*cpusf));
+ err = qsi(&cpusf->qsi);
+ if (err)
+ break;
+ cpusf->flags |= PMU_F_RESERVED;
+ err = sf_disable();
+ if (err)
+ pr_err("Switching off the sampling facility failed "
+ "with rc=%i\n", err);
+ debug_sprintf_event(sfdbg, 5,
+ "setup_pmc_cpu: initialized: cpuhw=%p\n", cpusf);
+ break;
+ case PMC_RELEASE:
+ cpusf->flags &= ~PMU_F_RESERVED;
+ err = sf_disable();
+ if (err) {
+ pr_err("Switching off the sampling facility failed "
+ "with rc=%i\n", err);
+ } else
+ deallocate_buffers(cpusf);
+ debug_sprintf_event(sfdbg, 5,
+ "setup_pmc_cpu: released: cpuhw=%p\n", cpusf);
+ break;
+ }
+ if (err)
+ *((int *) flags) |= PMC_FAILURE;
+}
+
+static void release_pmc_hardware(void)
+{
+ int flags = PMC_RELEASE;
+
+ irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+ on_each_cpu(setup_pmc_cpu, &flags, 1);
+}
+
+static int reserve_pmc_hardware(void)
+{
+ int flags = PMC_INIT;
+
+ on_each_cpu(setup_pmc_cpu, &flags, 1);
+ if (flags & PMC_FAILURE) {
+ release_pmc_hardware();
+ return -ENODEV;
+ }
+ irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+
+ return 0;
+}
+
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+ /* Release PMC if this is the last perf event */
+ if (!atomic_add_unless(&num_events, -1, 1)) {
+ mutex_lock(&pmc_reserve_mutex);
+ if (atomic_dec_return(&num_events) == 0)
+ release_pmc_hardware();
+ mutex_unlock(&pmc_reserve_mutex);
+ }
+}
+
+static void hw_init_period(struct hw_perf_event *hwc, u64 period)
+{
+ hwc->sample_period = period;
+ hwc->last_period = hwc->sample_period;
+ local64_set(&hwc->period_left, hwc->sample_period);
+}
+
+static void hw_reset_registers(struct hw_perf_event *hwc,
+ unsigned long *sdbt_origin)
+{
+ /* (Re)set to first sample-data-block-table */
+ TEAR_REG(hwc) = (unsigned long) sdbt_origin;
+}
+
+static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si,
+ unsigned long rate)
+{
+ return clamp_t(unsigned long, rate,
+ si->min_sampl_rate, si->max_sampl_rate);
+}
+
+static u32 cpumsf_pid_type(struct perf_event *event,
+ u32 pid, enum pid_type type)
+{
+ struct task_struct *tsk;
+
+ /* Idle process */
+ if (!pid)
+ goto out;
+
+ tsk = find_task_by_pid_ns(pid, &init_pid_ns);
+ pid = -1;
+ if (tsk) {
+ /*
+ * Only top level events contain the pid namespace in which
+ * they are created.
+ */
+ if (event->parent)
+ event = event->parent;
+ pid = __task_pid_nr_ns(tsk, type, event->ns);
+ /*
+ * See also 1d953111b648
+ * "perf/core: Don't report zero PIDs for exiting tasks".
+ */
+ if (!pid && !pid_alive(tsk))
+ pid = -1;
+ }
+out:
+ return pid;
+}
+
+static void cpumsf_output_event_pid(struct perf_event *event,
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
+{
+ u32 pid;
+ struct perf_event_header header;
+ struct perf_output_handle handle;
+
+ /*
+ * Obtain the PID from the basic-sampling data entry and
+ * correct the data->tid_entry.pid value.
+ */
+ pid = data->tid_entry.pid;
+
+ /* Protect callchain buffers, tasks */
+ rcu_read_lock();
+
+ perf_prepare_sample(&header, data, event, regs);
+ if (perf_output_begin(&handle, event, header.size))
+ goto out;
+
+ /* Update the process ID (see also kernel/events/core.c) */
+ data->tid_entry.pid = cpumsf_pid_type(event, pid, PIDTYPE_TGID);
+ data->tid_entry.tid = cpumsf_pid_type(event, pid, PIDTYPE_PID);
+
+ perf_output_sample(&handle, &header, data, event);
+ perf_output_end(&handle);
+out:
+ rcu_read_unlock();
+}
+
+static int __hw_perf_event_init(struct perf_event *event)
+{
+ struct cpu_hw_sf *cpuhw;
+ struct hws_qsi_info_block si;
+ struct perf_event_attr *attr = &event->attr;
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned long rate;
+ int cpu, err;
+
+ /* Reserve CPU-measurement sampling facility */
+ err = 0;
+ if (!atomic_inc_not_zero(&num_events)) {
+ mutex_lock(&pmc_reserve_mutex);
+ if (atomic_read(&num_events) == 0 && reserve_pmc_hardware())
+ err = -EBUSY;
+ else
+ atomic_inc(&num_events);
+ mutex_unlock(&pmc_reserve_mutex);
+ }
+ event->destroy = hw_perf_event_destroy;
+
+ if (err)
+ goto out;
+
+ /* Access per-CPU sampling information (query sampling info) */
+ /*
+ * The event->cpu value can be -1 to count on every CPU, for example,
+ * when attaching to a task. If this is specified, use the query
+ * sampling info from the current CPU, otherwise use event->cpu to
+ * retrieve the per-CPU information.
+ * Later, cpuhw indicates whether to allocate sampling buffers for a
+ * particular CPU (cpuhw!=NULL) or each online CPU (cpuw==NULL).
+ */
+ memset(&si, 0, sizeof(si));
+ cpuhw = NULL;
+ if (event->cpu == -1)
+ qsi(&si);
+ else {
+ /* Event is pinned to a particular CPU, retrieve the per-CPU
+ * sampling structure for accessing the CPU-specific QSI.
+ */
+ cpuhw = &per_cpu(cpu_hw_sf, event->cpu);
+ si = cpuhw->qsi;
+ }
+
+ /* Check sampling facility authorization and, if not authorized,
+ * fall back to other PMUs. It is safe to check any CPU because
+ * the authorization is identical for all configured CPUs.
+ */
+ if (!si.as) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ /* Always enable basic sampling */
+ SAMPL_FLAGS(hwc) = PERF_CPUM_SF_BASIC_MODE;
+
+ /* Check if diagnostic sampling is requested. Deny if the required
+ * sampling authorization is missing.
+ */
+ if (attr->config == PERF_EVENT_CPUM_SF_DIAG) {
+ if (!si.ad) {
+ err = -EPERM;
+ goto out;
+ }
+ SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_DIAG_MODE;
+ }
+
+ /* Check and set other sampling flags */
+ if (attr->config1 & PERF_CPUM_SF_FULL_BLOCKS)
+ SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FULL_BLOCKS;
+
+ /* The sampling information (si) contains information about the
+ * min/max sampling intervals and the CPU speed. So calculate the
+ * correct sampling interval and avoid the whole period adjust
+ * feedback loop.
+ */
+ rate = 0;
+ if (attr->freq) {
+ if (!attr->sample_freq) {
+ err = -EINVAL;
+ goto out;
+ }
+ rate = freq_to_sample_rate(&si, attr->sample_freq);
+ rate = hw_limit_rate(&si, rate);
+ attr->freq = 0;
+ attr->sample_period = rate;
+ } else {
+ /* The min/max sampling rates specifies the valid range
+ * of sample periods. If the specified sample period is
+ * out of range, limit the period to the range boundary.
+ */
+ rate = hw_limit_rate(&si, hwc->sample_period);
+
+ /* The perf core maintains a maximum sample rate that is
+ * configurable through the sysctl interface. Ensure the
+ * sampling rate does not exceed this value. This also helps
+ * to avoid throttling when pushing samples with
+ * perf_event_overflow().
+ */
+ if (sample_rate_to_freq(&si, rate) >
+ sysctl_perf_event_sample_rate) {
+ err = -EINVAL;
+ debug_sprintf_event(sfdbg, 1, "Sampling rate exceeds maximum perf sample rate\n");
+ goto out;
+ }
+ }
+ SAMPL_RATE(hwc) = rate;
+ hw_init_period(hwc, SAMPL_RATE(hwc));
+
+ /* Initialize sample data overflow accounting */
+ hwc->extra_reg.reg = REG_OVERFLOW;
+ OVERFLOW_REG(hwc) = 0;
+
+ /* Use AUX buffer. No need to allocate it by ourself */
+ if (attr->config == PERF_EVENT_CPUM_SF_DIAG)
+ return 0;
+
+ /* Allocate the per-CPU sampling buffer using the CPU information
+ * from the event. If the event is not pinned to a particular
+ * CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling
+ * buffers for each online CPU.
+ */
+ if (cpuhw)
+ /* Event is pinned to a particular CPU */
+ err = allocate_buffers(cpuhw, hwc);
+ else {
+ /* Event is not pinned, allocate sampling buffer on
+ * each online CPU
+ */
+ for_each_online_cpu(cpu) {
+ cpuhw = &per_cpu(cpu_hw_sf, cpu);
+ err = allocate_buffers(cpuhw, hwc);
+ if (err)
+ break;
+ }
+ }
+
+ /* If PID/TID sampling is active, replace the default overflow
+ * handler to extract and resolve the PIDs from the basic-sampling
+ * data entries.
+ */
+ if (event->attr.sample_type & PERF_SAMPLE_TID)
+ if (is_default_overflow_handler(event))
+ event->overflow_handler = cpumsf_output_event_pid;
+out:
+ return err;
+}
+
+static int cpumsf_pmu_event_init(struct perf_event *event)
+{
+ int err;
+
+ /* No support for taken branch sampling */
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
+ switch (event->attr.type) {
+ case PERF_TYPE_RAW:
+ if ((event->attr.config != PERF_EVENT_CPUM_SF) &&
+ (event->attr.config != PERF_EVENT_CPUM_SF_DIAG))
+ return -ENOENT;
+ break;
+ case PERF_TYPE_HARDWARE:
+ /* Support sampling of CPU cycles in addition to the
+ * counter facility. However, the counter facility
+ * is more precise and, hence, restrict this PMU to
+ * sampling events only.
+ */
+ if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES)
+ return -ENOENT;
+ if (!is_sampling_event(event))
+ return -ENOENT;
+ break;
+ default:
+ return -ENOENT;
+ }
+
+ /* Check online status of the CPU to which the event is pinned */
+ if (event->cpu >= 0 && !cpu_online(event->cpu))
+ return -ENODEV;
+
+ /* Force reset of idle/hv excludes regardless of what the
+ * user requested.
+ */
+ if (event->attr.exclude_hv)
+ event->attr.exclude_hv = 0;
+ if (event->attr.exclude_idle)
+ event->attr.exclude_idle = 0;
+
+ err = __hw_perf_event_init(event);
+ if (unlikely(err))
+ if (event->destroy)
+ event->destroy(event);
+ return err;
+}
+
+static void cpumsf_pmu_enable(struct pmu *pmu)
+{
+ struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
+ struct hw_perf_event *hwc;
+ int err;
+
+ if (cpuhw->flags & PMU_F_ENABLED)
+ return;
+
+ if (cpuhw->flags & PMU_F_ERR_MASK)
+ return;
+
+ /* Check whether to extent the sampling buffer.
+ *
+ * Two conditions trigger an increase of the sampling buffer for a
+ * perf event:
+ * 1. Postponed buffer allocations from the event initialization.
+ * 2. Sampling overflows that contribute to pending allocations.
+ *
+ * Note that the extend_sampling_buffer() function disables the sampling
+ * facility, but it can be fully re-enabled using sampling controls that
+ * have been saved in cpumsf_pmu_disable().
+ */
+ if (cpuhw->event) {
+ hwc = &cpuhw->event->hw;
+ if (!(SAMPL_DIAG_MODE(hwc))) {
+ /*
+ * Account number of overflow-designated
+ * buffer extents
+ */
+ sfb_account_overflows(cpuhw, hwc);
+ if (sfb_has_pending_allocs(&cpuhw->sfb, hwc))
+ extend_sampling_buffer(&cpuhw->sfb, hwc);
+ }
+ }
+
+ /* (Re)enable the PMU and sampling facility */
+ cpuhw->flags |= PMU_F_ENABLED;
+ barrier();
+
+ err = lsctl(&cpuhw->lsctl);
+ if (err) {
+ cpuhw->flags &= ~PMU_F_ENABLED;
+ pr_err("Loading sampling controls failed: op=%i err=%i\n",
+ 1, err);
+ return;
+ }
+
+ /* Load current program parameter */
+ lpp(&S390_lowcore.lpp);
+
+ debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i ed=%i cd=%i "
+ "tear=%p dear=%p\n", cpuhw->lsctl.es, cpuhw->lsctl.cs,
+ cpuhw->lsctl.ed, cpuhw->lsctl.cd,
+ (void *) cpuhw->lsctl.tear, (void *) cpuhw->lsctl.dear);
+}
+
+static void cpumsf_pmu_disable(struct pmu *pmu)
+{
+ struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
+ struct hws_lsctl_request_block inactive;
+ struct hws_qsi_info_block si;
+ int err;
+
+ if (!(cpuhw->flags & PMU_F_ENABLED))
+ return;
+
+ if (cpuhw->flags & PMU_F_ERR_MASK)
+ return;
+
+ /* Switch off sampling activation control */
+ inactive = cpuhw->lsctl;
+ inactive.cs = 0;
+ inactive.cd = 0;
+
+ err = lsctl(&inactive);
+ if (err) {
+ pr_err("Loading sampling controls failed: op=%i err=%i\n",
+ 2, err);
+ return;
+ }
+
+ /* Save state of TEAR and DEAR register contents */
+ if (!qsi(&si)) {
+ /* TEAR/DEAR values are valid only if the sampling facility is
+ * enabled. Note that cpumsf_pmu_disable() might be called even
+ * for a disabled sampling facility because cpumsf_pmu_enable()
+ * controls the enable/disable state.
+ */
+ if (si.es) {
+ cpuhw->lsctl.tear = si.tear;
+ cpuhw->lsctl.dear = si.dear;
+ }
+ } else
+ debug_sprintf_event(sfdbg, 3, "cpumsf_pmu_disable: "
+ "qsi() failed with err=%i\n", err);
+
+ cpuhw->flags &= ~PMU_F_ENABLED;
+}
+
+/* perf_exclude_event() - Filter event
+ * @event: The perf event
+ * @regs: pt_regs structure
+ * @sde_regs: Sample-data-entry (sde) regs structure
+ *
+ * Filter perf events according to their exclude specification.
+ *
+ * Return non-zero if the event shall be excluded.
+ */
+static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs,
+ struct perf_sf_sde_regs *sde_regs)
+{
+ if (event->attr.exclude_user && user_mode(regs))
+ return 1;
+ if (event->attr.exclude_kernel && !user_mode(regs))
+ return 1;
+ if (event->attr.exclude_guest && sde_regs->in_guest)
+ return 1;
+ if (event->attr.exclude_host && !sde_regs->in_guest)
+ return 1;
+ return 0;
+}
+
+/* perf_push_sample() - Push samples to perf
+ * @event: The perf event
+ * @sample: Hardware sample data
+ *
+ * Use the hardware sample data to create perf event sample. The sample
+ * is the pushed to the event subsystem and the function checks for
+ * possible event overflows. If an event overflow occurs, the PMU is
+ * stopped.
+ *
+ * Return non-zero if an event overflow occurred.
+ */
+static int perf_push_sample(struct perf_event *event,
+ struct hws_basic_entry *basic)
+{
+ int overflow;
+ struct pt_regs regs;
+ struct perf_sf_sde_regs *sde_regs;
+ struct perf_sample_data data;
+
+ /* Setup perf sample */
+ perf_sample_data_init(&data, 0, event->hw.last_period);
+
+ /* Setup pt_regs to look like an CPU-measurement external interrupt
+ * using the Program Request Alert code. The regs.int_parm_long
+ * field which is unused contains additional sample-data-entry related
+ * indicators.
+ */
+ memset(&regs, 0, sizeof(regs));
+ regs.int_code = 0x1407;
+ regs.int_parm = CPU_MF_INT_SF_PRA;
+ sde_regs = (struct perf_sf_sde_regs *) &regs.int_parm_long;
+
+ psw_bits(regs.psw).ia = basic->ia;
+ psw_bits(regs.psw).dat = basic->T;
+ psw_bits(regs.psw).wait = basic->W;
+ psw_bits(regs.psw).pstate = basic->P;
+ psw_bits(regs.psw).as = basic->AS;
+
+ /*
+ * Use the hardware provided configuration level to decide if the
+ * sample belongs to a guest or host. If that is not available,
+ * fall back to the following heuristics:
+ * A non-zero guest program parameter always indicates a guest
+ * sample. Some early samples or samples from guests without
+ * lpp usage would be misaccounted to the host. We use the asn
+ * value as an addon heuristic to detect most of these guest samples.
+ * If the value differs from 0xffff (the host value), we assume to
+ * be a KVM guest.
+ */
+ switch (basic->CL) {
+ case 1: /* logical partition */
+ sde_regs->in_guest = 0;
+ break;
+ case 2: /* virtual machine */
+ sde_regs->in_guest = 1;
+ break;
+ default: /* old machine, use heuristics */
+ if (basic->gpp || basic->prim_asn != 0xffff)
+ sde_regs->in_guest = 1;
+ break;
+ }
+
+ /*
+ * Store the PID value from the sample-data-entry to be
+ * processed and resolved by cpumsf_output_event_pid().
+ */
+ data.tid_entry.pid = basic->hpp & LPP_PID_MASK;
+
+ overflow = 0;
+ if (perf_exclude_event(event, &regs, sde_regs))
+ goto out;
+ if (perf_event_overflow(event, &data, &regs)) {
+ overflow = 1;
+ event->pmu->stop(event, 0);
+ }
+ perf_event_update_userpage(event);
+out:
+ return overflow;
+}
+
+static void perf_event_count_update(struct perf_event *event, u64 count)
+{
+ local64_add(count, &event->count);
+}
+
+static void debug_sample_entry(struct hws_basic_entry *sample,
+ struct hws_trailer_entry *te)
+{
+ debug_sprintf_event(sfdbg, 4, "hw_collect_samples: Found unknown "
+ "sampling data entry: te->f=%i basic.def=%04x (%p)\n",
+ te->f, sample->def, sample);
+}
+
+/* hw_collect_samples() - Walk through a sample-data-block and collect samples
+ * @event: The perf event
+ * @sdbt: Sample-data-block table
+ * @overflow: Event overflow counter
+ *
+ * Walks through a sample-data-block and collects sampling data entries that are
+ * then pushed to the perf event subsystem. Depending on the sampling function,
+ * there can be either basic-sampling or combined-sampling data entries. A
+ * combined-sampling data entry consists of a basic- and a diagnostic-sampling
+ * data entry. The sampling function is determined by the flags in the perf
+ * event hardware structure. The function always works with a combined-sampling
+ * data entry but ignores the the diagnostic portion if it is not available.
+ *
+ * Note that the implementation focuses on basic-sampling data entries and, if
+ * such an entry is not valid, the entire combined-sampling data entry is
+ * ignored.
+ *
+ * The overflow variables counts the number of samples that has been discarded
+ * due to a perf event overflow.
+ */
+static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
+ unsigned long long *overflow)
+{
+ struct hws_trailer_entry *te;
+ struct hws_basic_entry *sample;
+
+ te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
+ sample = (struct hws_basic_entry *) *sdbt;
+ while ((unsigned long *) sample < (unsigned long *) te) {
+ /* Check for an empty sample */
+ if (!sample->def)
+ break;
+
+ /* Update perf event period */
+ perf_event_count_update(event, SAMPL_RATE(&event->hw));
+
+ /* Check whether sample is valid */
+ if (sample->def == 0x0001) {
+ /* If an event overflow occurred, the PMU is stopped to
+ * throttle event delivery. Remaining sample data is
+ * discarded.
+ */
+ if (!*overflow) {
+ /* Check whether sample is consistent */
+ if (sample->I == 0 && sample->W == 0) {
+ /* Deliver sample data to perf */
+ *overflow = perf_push_sample(event,
+ sample);
+ }
+ } else
+ /* Count discarded samples */
+ *overflow += 1;
+ } else {
+ debug_sample_entry(sample, te);
+ /* Sample slot is not yet written or other record.
+ *
+ * This condition can occur if the buffer was reused
+ * from a combined basic- and diagnostic-sampling.
+ * If only basic-sampling is then active, entries are
+ * written into the larger diagnostic entries.
+ * This is typically the case for sample-data-blocks
+ * that are not full. Stop processing if the first
+ * invalid format was detected.
+ */
+ if (!te->f)
+ break;
+ }
+
+ /* Reset sample slot and advance to next sample */
+ sample->def = 0;
+ sample++;
+ }
+}
+
+/* hw_perf_event_update() - Process sampling buffer
+ * @event: The perf event
+ * @flush_all: Flag to also flush partially filled sample-data-blocks
+ *
+ * Processes the sampling buffer and create perf event samples.
+ * The sampling buffer position are retrieved and saved in the TEAR_REG
+ * register of the specified perf event.
+ *
+ * Only full sample-data-blocks are processed. Specify the flash_all flag
+ * to also walk through partially filled sample-data-blocks. It is ignored
+ * if PERF_CPUM_SF_FULL_BLOCKS is set. The PERF_CPUM_SF_FULL_BLOCKS flag
+ * enforces the processing of full sample-data-blocks only (trailer entries
+ * with the block-full-indicator bit set).
+ */
+static void hw_perf_event_update(struct perf_event *event, int flush_all)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hws_trailer_entry *te;
+ unsigned long *sdbt;
+ unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags;
+ int done;
+
+ /*
+ * AUX buffer is used when in diagnostic sampling mode.
+ * No perf events/samples are created.
+ */
+ if (SAMPL_DIAG_MODE(&event->hw))
+ return;
+
+ if (flush_all && SDB_FULL_BLOCKS(hwc))
+ flush_all = 0;
+
+ sdbt = (unsigned long *) TEAR_REG(hwc);
+ done = event_overflow = sampl_overflow = num_sdb = 0;
+ while (!done) {
+ /* Get the trailer entry of the sample-data-block */
+ te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
+
+ /* Leave loop if no more work to do (block full indicator) */
+ if (!te->f) {
+ done = 1;
+ if (!flush_all)
+ break;
+ }
+
+ /* Check the sample overflow count */
+ if (te->overflow)
+ /* Account sample overflows and, if a particular limit
+ * is reached, extend the sampling buffer.
+ * For details, see sfb_account_overflows().
+ */
+ sampl_overflow += te->overflow;
+
+ /* Timestamps are valid for full sample-data-blocks only */
+ debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p "
+ "overflow=%llu timestamp=0x%llx\n",
+ sdbt, te->overflow,
+ (te->f) ? trailer_timestamp(te) : 0ULL);
+
+ /* Collect all samples from a single sample-data-block and
+ * flag if an (perf) event overflow happened. If so, the PMU
+ * is stopped and remaining samples will be discarded.
+ */
+ hw_collect_samples(event, sdbt, &event_overflow);
+ num_sdb++;
+
+ /* Reset trailer (using compare-double-and-swap) */
+ do {
+ te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK;
+ te_flags |= SDB_TE_ALERT_REQ_MASK;
+ } while (!cmpxchg_double(&te->flags, &te->overflow,
+ te->flags, te->overflow,
+ te_flags, 0ULL));
+
+ /* Advance to next sample-data-block */
+ sdbt++;
+ if (is_link_entry(sdbt))
+ sdbt = get_next_sdbt(sdbt);
+
+ /* Update event hardware registers */
+ TEAR_REG(hwc) = (unsigned long) sdbt;
+
+ /* Stop processing sample-data if all samples of the current
+ * sample-data-block were flushed even if it was not full.
+ */
+ if (flush_all && done)
+ break;
+ }
+
+ /* Account sample overflows in the event hardware structure */
+ if (sampl_overflow)
+ OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) +
+ sampl_overflow, 1 + num_sdb);
+
+ /* Perf_event_overflow() and perf_event_account_interrupt() limit
+ * the interrupt rate to an upper limit. Roughly 1000 samples per
+ * task tick.
+ * Hitting this limit results in a large number
+ * of throttled REF_REPORT_THROTTLE entries and the samples
+ * are dropped.
+ * Slightly increase the interval to avoid hitting this limit.
+ */
+ if (event_overflow) {
+ SAMPL_RATE(hwc) += DIV_ROUND_UP(SAMPL_RATE(hwc), 10);
+ debug_sprintf_event(sfdbg, 1, "%s: rate adjustment %ld\n",
+ __func__,
+ DIV_ROUND_UP(SAMPL_RATE(hwc), 10));
+ }
+
+ if (sampl_overflow || event_overflow)
+ debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: "
+ "overflow stats: sample=%llu event=%llu\n",
+ sampl_overflow, event_overflow);
+}
+
+#define AUX_SDB_INDEX(aux, i) ((i) % aux->sfb.num_sdb)
+#define AUX_SDB_NUM(aux, start, end) (end >= start ? end - start + 1 : 0)
+#define AUX_SDB_NUM_ALERT(aux) AUX_SDB_NUM(aux, aux->head, aux->alert_mark)
+#define AUX_SDB_NUM_EMPTY(aux) AUX_SDB_NUM(aux, aux->head, aux->empty_mark)
+
+/*
+ * Get trailer entry by index of SDB.
+ */
+static struct hws_trailer_entry *aux_sdb_trailer(struct aux_buffer *aux,
+ unsigned long index)
+{
+ unsigned long sdb;
+
+ index = AUX_SDB_INDEX(aux, index);
+ sdb = aux->sdb_index[index];
+ return (struct hws_trailer_entry *)trailer_entry_ptr(sdb);
+}
+
+/*
+ * Finish sampling on the cpu. Called by cpumsf_pmu_del() with pmu
+ * disabled. Collect the full SDBs in AUX buffer which have not reached
+ * the point of alert indicator. And ignore the SDBs which are not
+ * full.
+ *
+ * 1. Scan SDBs to see how much data is there and consume them.
+ * 2. Remove alert indicator in the buffer.
+ */
+static void aux_output_end(struct perf_output_handle *handle)
+{
+ unsigned long i, range_scan, idx;
+ struct aux_buffer *aux;
+ struct hws_trailer_entry *te;
+
+ aux = perf_get_aux(handle);
+ if (!aux)
+ return;
+
+ range_scan = AUX_SDB_NUM_ALERT(aux);
+ for (i = 0, idx = aux->head; i < range_scan; i++, idx++) {
+ te = aux_sdb_trailer(aux, idx);
+ if (!(te->flags & SDB_TE_BUFFER_FULL_MASK))
+ break;
+ }
+ /* i is num of SDBs which are full */
+ perf_aux_output_end(handle, i << PAGE_SHIFT);
+
+ /* Remove alert indicators in the buffer */
+ te = aux_sdb_trailer(aux, aux->alert_mark);
+ te->flags &= ~SDB_TE_ALERT_REQ_MASK;
+
+ debug_sprintf_event(sfdbg, 6, "aux_output_end: collect %lx SDBs\n", i);
+}
+
+/*
+ * Start sampling on the CPU. Called by cpumsf_pmu_add() when an event
+ * is first added to the CPU or rescheduled again to the CPU. It is called
+ * with pmu disabled.
+ *
+ * 1. Reset the trailer of SDBs to get ready for new data.
+ * 2. Tell the hardware where to put the data by reset the SDBs buffer
+ * head(tear/dear).
+ */
+static int aux_output_begin(struct perf_output_handle *handle,
+ struct aux_buffer *aux,
+ struct cpu_hw_sf *cpuhw)
+{
+ unsigned long range;
+ unsigned long i, range_scan, idx;
+ unsigned long head, base, offset;
+ struct hws_trailer_entry *te;
+
+ if (WARN_ON_ONCE(handle->head & ~PAGE_MASK))
+ return -EINVAL;
+
+ aux->head = handle->head >> PAGE_SHIFT;
+ range = (handle->size + 1) >> PAGE_SHIFT;
+ if (range <= 1)
+ return -ENOMEM;
+
+ /*
+ * SDBs between aux->head and aux->empty_mark are already ready
+ * for new data. range_scan is num of SDBs not within them.
+ */
+ if (range > AUX_SDB_NUM_EMPTY(aux)) {
+ range_scan = range - AUX_SDB_NUM_EMPTY(aux);
+ idx = aux->empty_mark + 1;
+ for (i = 0; i < range_scan; i++, idx++) {
+ te = aux_sdb_trailer(aux, idx);
+ te->flags &= ~(SDB_TE_BUFFER_FULL_MASK |
+ SDB_TE_ALERT_REQ_MASK);
+ te->overflow = 0;
+ }
+ /* Save the position of empty SDBs */
+ aux->empty_mark = aux->head + range - 1;
+ }
+
+ /* Set alert indicator */
+ aux->alert_mark = aux->head + range/2 - 1;
+ te = aux_sdb_trailer(aux, aux->alert_mark);
+ te->flags = te->flags | SDB_TE_ALERT_REQ_MASK;
+
+ /* Reset hardware buffer head */
+ head = AUX_SDB_INDEX(aux, aux->head);
+ base = aux->sdbt_index[head / CPUM_SF_SDB_PER_TABLE];
+ offset = head % CPUM_SF_SDB_PER_TABLE;
+ cpuhw->lsctl.tear = base + offset * sizeof(unsigned long);
+ cpuhw->lsctl.dear = aux->sdb_index[head];
+
+ debug_sprintf_event(sfdbg, 6, "aux_output_begin: "
+ "head->alert_mark->empty_mark (num_alert, range)"
+ "[%lx -> %lx -> %lx] (%lx, %lx) "
+ "tear index %lx, tear %lx dear %lx\n",
+ aux->head, aux->alert_mark, aux->empty_mark,
+ AUX_SDB_NUM_ALERT(aux), range,
+ head / CPUM_SF_SDB_PER_TABLE,
+ cpuhw->lsctl.tear,
+ cpuhw->lsctl.dear);
+
+ return 0;
+}
+
+/*
+ * Set alert indicator on SDB at index @alert_index while sampler is running.
+ *
+ * Return true if successfully.
+ * Return false if full indicator is already set by hardware sampler.
+ */
+static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index,
+ unsigned long long *overflow)
+{
+ unsigned long long orig_overflow, orig_flags, new_flags;
+ struct hws_trailer_entry *te;
+
+ te = aux_sdb_trailer(aux, alert_index);
+ do {
+ orig_flags = te->flags;
+ *overflow = orig_overflow = te->overflow;
+ if (orig_flags & SDB_TE_BUFFER_FULL_MASK) {
+ /*
+ * SDB is already set by hardware.
+ * Abort and try to set somewhere
+ * behind.
+ */
+ return false;
+ }
+ new_flags = orig_flags | SDB_TE_ALERT_REQ_MASK;
+ } while (!cmpxchg_double(&te->flags, &te->overflow,
+ orig_flags, orig_overflow,
+ new_flags, 0ULL));
+ return true;
+}
+
+/*
+ * aux_reset_buffer() - Scan and setup SDBs for new samples
+ * @aux: The AUX buffer to set
+ * @range: The range of SDBs to scan started from aux->head
+ * @overflow: Set to overflow count
+ *
+ * Set alert indicator on the SDB at index of aux->alert_mark. If this SDB is
+ * marked as empty, check if it is already set full by the hardware sampler.
+ * If yes, that means new data is already there before we can set an alert
+ * indicator. Caller should try to set alert indicator to some position behind.
+ *
+ * Scan the SDBs in AUX buffer from behind aux->empty_mark. They are used
+ * previously and have already been consumed by user space. Reset these SDBs
+ * (clear full indicator and alert indicator) for new data.
+ * If aux->alert_mark fall in this area, just set it. Overflow count is
+ * recorded while scanning.
+ *
+ * SDBs between aux->head and aux->empty_mark are already reset at last time.
+ * and ready for new samples. So scanning on this area could be skipped.
+ *
+ * Return true if alert indicator is set successfully and false if not.
+ */
+static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
+ unsigned long long *overflow)
+{
+ unsigned long long orig_overflow, orig_flags, new_flags;
+ unsigned long i, range_scan, idx;
+ struct hws_trailer_entry *te;
+
+ if (range <= AUX_SDB_NUM_EMPTY(aux))
+ /*
+ * No need to scan. All SDBs in range are marked as empty.
+ * Just set alert indicator. Should check race with hardware
+ * sampler.
+ */
+ return aux_set_alert(aux, aux->alert_mark, overflow);
+
+ if (aux->alert_mark <= aux->empty_mark)
+ /*
+ * Set alert indicator on empty SDB. Should check race
+ * with hardware sampler.
+ */
+ if (!aux_set_alert(aux, aux->alert_mark, overflow))
+ return false;
+
+ /*
+ * Scan the SDBs to clear full and alert indicator used previously.
+ * Start scanning from one SDB behind empty_mark. If the new alert
+ * indicator fall into this range, set it.
+ */
+ range_scan = range - AUX_SDB_NUM_EMPTY(aux);
+ idx = aux->empty_mark + 1;
+ for (i = 0; i < range_scan; i++, idx++) {
+ te = aux_sdb_trailer(aux, idx);
+ do {
+ orig_flags = te->flags;
+ orig_overflow = te->overflow;
+ new_flags = orig_flags & ~SDB_TE_BUFFER_FULL_MASK;
+ if (idx == aux->alert_mark)
+ new_flags |= SDB_TE_ALERT_REQ_MASK;
+ else
+ new_flags &= ~SDB_TE_ALERT_REQ_MASK;
+ } while (!cmpxchg_double(&te->flags, &te->overflow,
+ orig_flags, orig_overflow,
+ new_flags, 0ULL));
+ *overflow += orig_overflow;
+ }
+
+ /* Update empty_mark to new position */
+ aux->empty_mark = aux->head + range - 1;
+
+ return true;
+}
+
+/*
+ * Measurement alert handler for diagnostic mode sampling.
+ */
+static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
+{
+ struct aux_buffer *aux;
+ int done = 0;
+ unsigned long range = 0, size;
+ unsigned long long overflow = 0;
+ struct perf_output_handle *handle = &cpuhw->handle;
+ unsigned long num_sdb;
+
+ aux = perf_get_aux(handle);
+ if (WARN_ON_ONCE(!aux))
+ return;
+
+ /* Inform user space new data arrived */
+ size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT;
+ perf_aux_output_end(handle, size);
+ num_sdb = aux->sfb.num_sdb;
+
+ num_sdb = aux->sfb.num_sdb;
+ while (!done) {
+ /* Get an output handle */
+ aux = perf_aux_output_begin(handle, cpuhw->event);
+ if (handle->size == 0) {
+ pr_err("The AUX buffer with %lu pages for the "
+ "diagnostic-sampling mode is full\n",
+ num_sdb);
+ debug_sprintf_event(sfdbg, 1, "AUX buffer used up\n");
+ break;
+ }
+ if (WARN_ON_ONCE(!aux))
+ return;
+
+ /* Update head and alert_mark to new position */
+ aux->head = handle->head >> PAGE_SHIFT;
+ range = (handle->size + 1) >> PAGE_SHIFT;
+ if (range == 1)
+ aux->alert_mark = aux->head;
+ else
+ aux->alert_mark = aux->head + range/2 - 1;
+
+ if (aux_reset_buffer(aux, range, &overflow)) {
+ if (!overflow) {
+ done = 1;
+ break;
+ }
+ size = range << PAGE_SHIFT;
+ perf_aux_output_end(&cpuhw->handle, size);
+ pr_err("Sample data caused the AUX buffer with %lu "
+ "pages to overflow\n", num_sdb);
+ debug_sprintf_event(sfdbg, 1, "head %lx range %lx "
+ "overflow %llx\n",
+ aux->head, range, overflow);
+ } else {
+ size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT;
+ perf_aux_output_end(&cpuhw->handle, size);
+ debug_sprintf_event(sfdbg, 6, "head %lx alert %lx "
+ "already full, try another\n",
+ aux->head, aux->alert_mark);
+ }
+ }
+
+ if (done)
+ debug_sprintf_event(sfdbg, 6, "aux_reset_buffer: "
+ "[%lx -> %lx -> %lx] (%lx, %lx)\n",
+ aux->head, aux->alert_mark, aux->empty_mark,
+ AUX_SDB_NUM_ALERT(aux), range);
+}
+
+/*
+ * Callback when freeing AUX buffers.
+ */
+static void aux_buffer_free(void *data)
+{
+ struct aux_buffer *aux = data;
+ unsigned long i, num_sdbt;
+
+ if (!aux)
+ return;
+
+ /* Free SDBT. SDB is freed by the caller */
+ num_sdbt = aux->sfb.num_sdbt;
+ for (i = 0; i < num_sdbt; i++)
+ free_page(aux->sdbt_index[i]);
+
+ kfree(aux->sdbt_index);
+ kfree(aux->sdb_index);
+ kfree(aux);
+
+ debug_sprintf_event(sfdbg, 4, "aux_buffer_free: free "
+ "%lu SDBTs\n", num_sdbt);
+}
+
+static void aux_sdb_init(unsigned long sdb)
+{
+ struct hws_trailer_entry *te;
+
+ te = (struct hws_trailer_entry *)trailer_entry_ptr(sdb);
+
+ /* Save clock base */
+ te->clock_base = 1;
+ memcpy(&te->progusage2, &tod_clock_base[1], 8);
+}
+
+/*
+ * aux_buffer_setup() - Setup AUX buffer for diagnostic mode sampling
+ * @event: Event the buffer is setup for, event->cpu == -1 means current
+ * @pages: Array of pointers to buffer pages passed from perf core
+ * @nr_pages: Total pages
+ * @snapshot: Flag for snapshot mode
+ *
+ * This is the callback when setup an event using AUX buffer. Perf tool can
+ * trigger this by an additional mmap() call on the event. Unlike the buffer
+ * for basic samples, AUX buffer belongs to the event. It is scheduled with
+ * the task among online cpus when it is a per-thread event.
+ *
+ * Return the private AUX buffer structure if success or NULL if fails.
+ */
+static void *aux_buffer_setup(struct perf_event *event, void **pages,
+ int nr_pages, bool snapshot)
+{
+ struct sf_buffer *sfb;
+ struct aux_buffer *aux;
+ unsigned long *new, *tail;
+ int i, n_sdbt;
+
+ if (!nr_pages || !pages)
+ return NULL;
+
+ if (nr_pages > CPUM_SF_MAX_SDB * CPUM_SF_SDB_DIAG_FACTOR) {
+ pr_err("AUX buffer size (%i pages) is larger than the "
+ "maximum sampling buffer limit\n",
+ nr_pages);
+ return NULL;
+ } else if (nr_pages < CPUM_SF_MIN_SDB * CPUM_SF_SDB_DIAG_FACTOR) {
+ pr_err("AUX buffer size (%i pages) is less than the "
+ "minimum sampling buffer limit\n",
+ nr_pages);
+ return NULL;
+ }
+
+ /* Allocate aux_buffer struct for the event */
+ aux = kzalloc(sizeof(struct aux_buffer), GFP_KERNEL);
+ if (!aux)
+ goto no_aux;
+ sfb = &aux->sfb;
+
+ /* Allocate sdbt_index for fast reference */
+ n_sdbt = (nr_pages + CPUM_SF_SDB_PER_TABLE - 1) / CPUM_SF_SDB_PER_TABLE;
+ aux->sdbt_index = kmalloc_array(n_sdbt, sizeof(void *), GFP_KERNEL);
+ if (!aux->sdbt_index)
+ goto no_sdbt_index;
+
+ /* Allocate sdb_index for fast reference */
+ aux->sdb_index = kmalloc_array(nr_pages, sizeof(void *), GFP_KERNEL);
+ if (!aux->sdb_index)
+ goto no_sdb_index;
+
+ /* Allocate the first SDBT */
+ sfb->num_sdbt = 0;
+ sfb->sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL);
+ if (!sfb->sdbt)
+ goto no_sdbt;
+ aux->sdbt_index[sfb->num_sdbt++] = (unsigned long)sfb->sdbt;
+ tail = sfb->tail = sfb->sdbt;
+
+ /*
+ * Link the provided pages of AUX buffer to SDBT.
+ * Allocate SDBT if needed.
+ */
+ for (i = 0; i < nr_pages; i++, tail++) {
+ if (require_table_link(tail)) {
+ new = (unsigned long *) get_zeroed_page(GFP_KERNEL);
+ if (!new)
+ goto no_sdbt;
+ aux->sdbt_index[sfb->num_sdbt++] = (unsigned long)new;
+ /* Link current page to tail of chain */
+ *tail = (unsigned long)(void *) new + 1;
+ tail = new;
+ }
+ /* Tail is the entry in a SDBT */
+ *tail = (unsigned long)pages[i];
+ aux->sdb_index[i] = (unsigned long)pages[i];
+ aux_sdb_init((unsigned long)pages[i]);
+ }
+ sfb->num_sdb = nr_pages;
+
+ /* Link the last entry in the SDBT to the first SDBT */
+ *tail = (unsigned long) sfb->sdbt + 1;
+ sfb->tail = tail;
+
+ /*
+ * Initial all SDBs are zeroed. Mark it as empty.
+ * So there is no need to clear the full indicator
+ * when this event is first added.
+ */
+ aux->empty_mark = sfb->num_sdb - 1;
+
+ debug_sprintf_event(sfdbg, 4, "aux_buffer_setup: setup %lu SDBTs"
+ " and %lu SDBs\n",
+ sfb->num_sdbt, sfb->num_sdb);
+
+ return aux;
+
+no_sdbt:
+ /* SDBs (AUX buffer pages) are freed by caller */
+ for (i = 0; i < sfb->num_sdbt; i++)
+ free_page(aux->sdbt_index[i]);
+ kfree(aux->sdb_index);
+no_sdb_index:
+ kfree(aux->sdbt_index);
+no_sdbt_index:
+ kfree(aux);
+no_aux:
+ return NULL;
+}
+
+static void cpumsf_pmu_read(struct perf_event *event)
+{
+ /* Nothing to do ... updates are interrupt-driven */
+}
+
+/* Activate sampling control.
+ * Next call of pmu_enable() starts sampling.
+ */
+static void cpumsf_pmu_start(struct perf_event *event, int flags)
+{
+ struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
+
+ if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+ return;
+
+ if (flags & PERF_EF_RELOAD)
+ WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+
+ perf_pmu_disable(event->pmu);
+ event->hw.state = 0;
+ cpuhw->lsctl.cs = 1;
+ if (SAMPL_DIAG_MODE(&event->hw))
+ cpuhw->lsctl.cd = 1;
+ perf_pmu_enable(event->pmu);
+}
+
+/* Deactivate sampling control.
+ * Next call of pmu_enable() stops sampling.
+ */
+static void cpumsf_pmu_stop(struct perf_event *event, int flags)
+{
+ struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
+
+ if (event->hw.state & PERF_HES_STOPPED)
+ return;
+
+ perf_pmu_disable(event->pmu);
+ cpuhw->lsctl.cs = 0;
+ cpuhw->lsctl.cd = 0;
+ event->hw.state |= PERF_HES_STOPPED;
+
+ if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) {
+ hw_perf_event_update(event, 1);
+ event->hw.state |= PERF_HES_UPTODATE;
+ }
+ perf_pmu_enable(event->pmu);
+}
+
+static int cpumsf_pmu_add(struct perf_event *event, int flags)
+{
+ struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
+ struct aux_buffer *aux;
+ int err;
+
+ if (cpuhw->flags & PMU_F_IN_USE)
+ return -EAGAIN;
+
+ if (!SAMPL_DIAG_MODE(&event->hw) && !cpuhw->sfb.sdbt)
+ return -EINVAL;
+
+ err = 0;
+ perf_pmu_disable(event->pmu);
+
+ event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+ /* Set up sampling controls. Always program the sampling register
+ * using the SDB-table start. Reset TEAR_REG event hardware register
+ * that is used by hw_perf_event_update() to store the sampling buffer
+ * position after samples have been flushed.
+ */
+ cpuhw->lsctl.s = 0;
+ cpuhw->lsctl.h = 1;
+ cpuhw->lsctl.interval = SAMPL_RATE(&event->hw);
+ if (!SAMPL_DIAG_MODE(&event->hw)) {
+ cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt;
+ cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt;
+ hw_reset_registers(&event->hw, cpuhw->sfb.sdbt);
+ }
+
+ /* Ensure sampling functions are in the disabled state. If disabled,
+ * switch on sampling enable control. */
+ if (WARN_ON_ONCE(cpuhw->lsctl.es == 1 || cpuhw->lsctl.ed == 1)) {
+ err = -EAGAIN;
+ goto out;
+ }
+ if (SAMPL_DIAG_MODE(&event->hw)) {
+ aux = perf_aux_output_begin(&cpuhw->handle, event);
+ if (!aux) {
+ err = -EINVAL;
+ goto out;
+ }
+ err = aux_output_begin(&cpuhw->handle, aux, cpuhw);
+ if (err)
+ goto out;
+ cpuhw->lsctl.ed = 1;
+ }
+ cpuhw->lsctl.es = 1;
+
+ /* Set in_use flag and store event */
+ cpuhw->event = event;
+ cpuhw->flags |= PMU_F_IN_USE;
+
+ if (flags & PERF_EF_START)
+ cpumsf_pmu_start(event, PERF_EF_RELOAD);
+out:
+ perf_event_update_userpage(event);
+ perf_pmu_enable(event->pmu);
+ return err;
+}
+
+static void cpumsf_pmu_del(struct perf_event *event, int flags)
+{
+ struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
+
+ perf_pmu_disable(event->pmu);
+ cpumsf_pmu_stop(event, PERF_EF_UPDATE);
+
+ cpuhw->lsctl.es = 0;
+ cpuhw->lsctl.ed = 0;
+ cpuhw->flags &= ~PMU_F_IN_USE;
+ cpuhw->event = NULL;
+
+ if (SAMPL_DIAG_MODE(&event->hw))
+ aux_output_end(&cpuhw->handle);
+ perf_event_update_userpage(event);
+ perf_pmu_enable(event->pmu);
+}
+
+CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF);
+CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG);
+
+static struct attribute *cpumsf_pmu_events_attr[] = {
+ CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC),
+ NULL,
+ NULL,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *cpumsf_pmu_format_attr[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group cpumsf_pmu_events_group = {
+ .name = "events",
+ .attrs = cpumsf_pmu_events_attr,
+};
+static struct attribute_group cpumsf_pmu_format_group = {
+ .name = "format",
+ .attrs = cpumsf_pmu_format_attr,
+};
+static const struct attribute_group *cpumsf_pmu_attr_groups[] = {
+ &cpumsf_pmu_events_group,
+ &cpumsf_pmu_format_group,
+ NULL,
+};
+
+static struct pmu cpumf_sampling = {
+ .pmu_enable = cpumsf_pmu_enable,
+ .pmu_disable = cpumsf_pmu_disable,
+
+ .event_init = cpumsf_pmu_event_init,
+ .add = cpumsf_pmu_add,
+ .del = cpumsf_pmu_del,
+
+ .start = cpumsf_pmu_start,
+ .stop = cpumsf_pmu_stop,
+ .read = cpumsf_pmu_read,
+
+ .attr_groups = cpumsf_pmu_attr_groups,
+
+ .setup_aux = aux_buffer_setup,
+ .free_aux = aux_buffer_free,
+};
+
+static void cpumf_measurement_alert(struct ext_code ext_code,
+ unsigned int alert, unsigned long unused)
+{
+ struct cpu_hw_sf *cpuhw;
+
+ if (!(alert & CPU_MF_INT_SF_MASK))
+ return;
+ inc_irq_stat(IRQEXT_CMS);
+ cpuhw = this_cpu_ptr(&cpu_hw_sf);
+
+ /* Measurement alerts are shared and might happen when the PMU
+ * is not reserved. Ignore these alerts in this case. */
+ if (!(cpuhw->flags & PMU_F_RESERVED))
+ return;
+
+ /* The processing below must take care of multiple alert events that
+ * might be indicated concurrently. */
+
+ /* Program alert request */
+ if (alert & CPU_MF_INT_SF_PRA) {
+ if (cpuhw->flags & PMU_F_IN_USE)
+ if (SAMPL_DIAG_MODE(&cpuhw->event->hw))
+ hw_collect_aux(cpuhw);
+ else
+ hw_perf_event_update(cpuhw->event, 0);
+ else
+ WARN_ON_ONCE(!(cpuhw->flags & PMU_F_IN_USE));
+ }
+
+ /* Report measurement alerts only for non-PRA codes */
+ if (alert != CPU_MF_INT_SF_PRA)
+ debug_sprintf_event(sfdbg, 6, "measurement alert: 0x%x\n", alert);
+
+ /* Sampling authorization change request */
+ if (alert & CPU_MF_INT_SF_SACA)
+ qsi(&cpuhw->qsi);
+
+ /* Loss of sample data due to high-priority machine activities */
+ if (alert & CPU_MF_INT_SF_LSDA) {
+ pr_err("Sample data was lost\n");
+ cpuhw->flags |= PMU_F_ERR_LSDA;
+ sf_disable();
+ }
+
+ /* Invalid sampling buffer entry */
+ if (alert & (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE)) {
+ pr_err("A sampling buffer entry is incorrect (alert=0x%x)\n",
+ alert);
+ cpuhw->flags |= PMU_F_ERR_IBE;
+ sf_disable();
+ }
+}
+static int cpusf_pmu_setup(unsigned int cpu, int flags)
+{
+ /* Ignore the notification if no events are scheduled on the PMU.
+ * This might be racy...
+ */
+ if (!atomic_read(&num_events))
+ return 0;
+
+ local_irq_disable();
+ setup_pmc_cpu(&flags);
+ local_irq_enable();
+ return 0;
+}
+
+static int s390_pmu_sf_online_cpu(unsigned int cpu)
+{
+ return cpusf_pmu_setup(cpu, PMC_INIT);
+}
+
+static int s390_pmu_sf_offline_cpu(unsigned int cpu)
+{
+ return cpusf_pmu_setup(cpu, PMC_RELEASE);
+}
+
+static int param_get_sfb_size(char *buffer, const struct kernel_param *kp)
+{
+ if (!cpum_sf_avail())
+ return -ENODEV;
+ return sprintf(buffer, "%lu,%lu", CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
+}
+
+static int param_set_sfb_size(const char *val, const struct kernel_param *kp)
+{
+ int rc;
+ unsigned long min, max;
+
+ if (!cpum_sf_avail())
+ return -ENODEV;
+ if (!val || !strlen(val))
+ return -EINVAL;
+
+ /* Valid parameter values: "min,max" or "max" */
+ min = CPUM_SF_MIN_SDB;
+ max = CPUM_SF_MAX_SDB;
+ if (strchr(val, ','))
+ rc = (sscanf(val, "%lu,%lu", &min, &max) == 2) ? 0 : -EINVAL;
+ else
+ rc = kstrtoul(val, 10, &max);
+
+ if (min < 2 || min >= max || max > get_num_physpages())
+ rc = -EINVAL;
+ if (rc)
+ return rc;
+
+ sfb_set_limits(min, max);
+ pr_info("The sampling buffer limits have changed to: "
+ "min=%lu max=%lu (diag=x%lu)\n",
+ CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB, CPUM_SF_SDB_DIAG_FACTOR);
+ return 0;
+}
+
+#define param_check_sfb_size(name, p) __param_check(name, p, void)
+static const struct kernel_param_ops param_ops_sfb_size = {
+ .set = param_set_sfb_size,
+ .get = param_get_sfb_size,
+};
+
+#define RS_INIT_FAILURE_QSI 0x0001
+#define RS_INIT_FAILURE_BSDES 0x0002
+#define RS_INIT_FAILURE_ALRT 0x0003
+#define RS_INIT_FAILURE_PERF 0x0004
+static void __init pr_cpumsf_err(unsigned int reason)
+{
+ pr_err("Sampling facility support for perf is not available: "
+ "reason=%04x\n", reason);
+}
+
+static int __init init_cpum_sampling_pmu(void)
+{
+ struct hws_qsi_info_block si;
+ int err;
+
+ if (!cpum_sf_avail())
+ return -ENODEV;
+
+ memset(&si, 0, sizeof(si));
+ if (qsi(&si)) {
+ pr_cpumsf_err(RS_INIT_FAILURE_QSI);
+ return -ENODEV;
+ }
+
+ if (!si.as && !si.ad)
+ return -ENODEV;
+
+ if (si.bsdes != sizeof(struct hws_basic_entry)) {
+ pr_cpumsf_err(RS_INIT_FAILURE_BSDES);
+ return -EINVAL;
+ }
+
+ if (si.ad) {
+ sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
+ cpumsf_pmu_events_attr[1] =
+ CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG);
+ }
+
+ sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80);
+ if (!sfdbg) {
+ pr_err("Registering for s390dbf failed\n");
+ return -ENOMEM;
+ }
+ debug_register_view(sfdbg, &debug_sprintf_view);
+
+ err = register_external_irq(EXT_IRQ_MEASURE_ALERT,
+ cpumf_measurement_alert);
+ if (err) {
+ pr_cpumsf_err(RS_INIT_FAILURE_ALRT);
+ debug_unregister(sfdbg);
+ goto out;
+ }
+
+ err = perf_pmu_register(&cpumf_sampling, "cpum_sf", PERF_TYPE_RAW);
+ if (err) {
+ pr_cpumsf_err(RS_INIT_FAILURE_PERF);
+ unregister_external_irq(EXT_IRQ_MEASURE_ALERT,
+ cpumf_measurement_alert);
+ debug_unregister(sfdbg);
+ goto out;
+ }
+
+ cpuhp_setup_state(CPUHP_AP_PERF_S390_SF_ONLINE, "perf/s390/sf:online",
+ s390_pmu_sf_online_cpu, s390_pmu_sf_offline_cpu);
+out:
+ return err;
+}
+arch_initcall(init_cpum_sampling_pmu);
+core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0644);
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
new file mode 100644
index 000000000..0d770e513
--- /dev/null
+++ b/arch/s390/kernel/perf_event.c
@@ -0,0 +1,246 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Performance event support for s390x
+ *
+ * Copyright IBM Corp. 2012, 2013
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+#define KMSG_COMPONENT "perf"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/perf_event.h>
+#include <linux/kvm_host.h>
+#include <linux/percpu.h>
+#include <linux/export.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <linux/sysfs.h>
+#include <asm/irq.h>
+#include <asm/cpu_mf.h>
+#include <asm/lowcore.h>
+#include <asm/processor.h>
+#include <asm/sysinfo.h>
+
+const char *perf_pmu_name(void)
+{
+ if (cpum_cf_avail() || cpum_sf_avail())
+ return "CPU-Measurement Facilities (CPU-MF)";
+ return "pmu";
+}
+EXPORT_SYMBOL(perf_pmu_name);
+
+int perf_num_counters(void)
+{
+ int num = 0;
+
+ if (cpum_cf_avail())
+ num += PERF_CPUM_CF_MAX_CTR;
+ if (cpum_sf_avail())
+ num += PERF_CPUM_SF_MAX_CTR;
+
+ return num;
+}
+EXPORT_SYMBOL(perf_num_counters);
+
+static struct kvm_s390_sie_block *sie_block(struct pt_regs *regs)
+{
+ struct stack_frame *stack = (struct stack_frame *) regs->gprs[15];
+
+ if (!stack)
+ return NULL;
+
+ return (struct kvm_s390_sie_block *) stack->empty1[0];
+}
+
+static bool is_in_guest(struct pt_regs *regs)
+{
+ if (user_mode(regs))
+ return false;
+#if IS_ENABLED(CONFIG_KVM)
+ return instruction_pointer(regs) == (unsigned long) &sie_exit;
+#else
+ return false;
+#endif
+}
+
+static unsigned long guest_is_user_mode(struct pt_regs *regs)
+{
+ return sie_block(regs)->gpsw.mask & PSW_MASK_PSTATE;
+}
+
+static unsigned long instruction_pointer_guest(struct pt_regs *regs)
+{
+ return sie_block(regs)->gpsw.addr;
+}
+
+unsigned long perf_instruction_pointer(struct pt_regs *regs)
+{
+ return is_in_guest(regs) ? instruction_pointer_guest(regs)
+ : instruction_pointer(regs);
+}
+
+static unsigned long perf_misc_guest_flags(struct pt_regs *regs)
+{
+ return guest_is_user_mode(regs) ? PERF_RECORD_MISC_GUEST_USER
+ : PERF_RECORD_MISC_GUEST_KERNEL;
+}
+
+static unsigned long perf_misc_flags_sf(struct pt_regs *regs)
+{
+ struct perf_sf_sde_regs *sde_regs;
+ unsigned long flags;
+
+ sde_regs = (struct perf_sf_sde_regs *) &regs->int_parm_long;
+ if (sde_regs->in_guest)
+ flags = user_mode(regs) ? PERF_RECORD_MISC_GUEST_USER
+ : PERF_RECORD_MISC_GUEST_KERNEL;
+ else
+ flags = user_mode(regs) ? PERF_RECORD_MISC_USER
+ : PERF_RECORD_MISC_KERNEL;
+ return flags;
+}
+
+unsigned long perf_misc_flags(struct pt_regs *regs)
+{
+ /* Check if the cpum_sf PMU has created the pt_regs structure.
+ * In this case, perf misc flags can be easily extracted. Otherwise,
+ * do regular checks on the pt_regs content.
+ */
+ if (regs->int_code == 0x1407 && regs->int_parm == CPU_MF_INT_SF_PRA)
+ if (!regs->gprs[15])
+ return perf_misc_flags_sf(regs);
+
+ if (is_in_guest(regs))
+ return perf_misc_guest_flags(regs);
+
+ return user_mode(regs) ? PERF_RECORD_MISC_USER
+ : PERF_RECORD_MISC_KERNEL;
+}
+
+static void print_debug_cf(void)
+{
+ struct cpumf_ctr_info cf_info;
+ int cpu = smp_processor_id();
+
+ memset(&cf_info, 0, sizeof(cf_info));
+ if (!qctri(&cf_info))
+ pr_info("CPU[%i] CPUM_CF: ver=%u.%u A=%04x E=%04x C=%04x\n",
+ cpu, cf_info.cfvn, cf_info.csvn,
+ cf_info.auth_ctl, cf_info.enable_ctl, cf_info.act_ctl);
+}
+
+static void print_debug_sf(void)
+{
+ struct hws_qsi_info_block si;
+ int cpu = smp_processor_id();
+
+ memset(&si, 0, sizeof(si));
+ if (qsi(&si))
+ return;
+
+ pr_info("CPU[%i] CPUM_SF: basic=%i diag=%i min=%lu max=%lu cpu_speed=%u\n",
+ cpu, si.as, si.ad, si.min_sampl_rate, si.max_sampl_rate,
+ si.cpu_speed);
+
+ if (si.as)
+ pr_info("CPU[%i] CPUM_SF: Basic-sampling: a=%i e=%i c=%i"
+ " bsdes=%i tear=%016lx dear=%016lx\n", cpu,
+ si.as, si.es, si.cs, si.bsdes, si.tear, si.dear);
+ if (si.ad)
+ pr_info("CPU[%i] CPUM_SF: Diagnostic-sampling: a=%i e=%i c=%i"
+ " dsdes=%i tear=%016lx dear=%016lx\n", cpu,
+ si.ad, si.ed, si.cd, si.dsdes, si.tear, si.dear);
+}
+
+void perf_event_print_debug(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ if (cpum_cf_avail())
+ print_debug_cf();
+ if (cpum_sf_avail())
+ print_debug_sf();
+ local_irq_restore(flags);
+}
+
+/* Service level infrastructure */
+static void sl_print_counter(struct seq_file *m)
+{
+ struct cpumf_ctr_info ci;
+
+ memset(&ci, 0, sizeof(ci));
+ if (qctri(&ci))
+ return;
+
+ seq_printf(m, "CPU-MF: Counter facility: version=%u.%u "
+ "authorization=%04x\n", ci.cfvn, ci.csvn, ci.auth_ctl);
+}
+
+static void sl_print_sampling(struct seq_file *m)
+{
+ struct hws_qsi_info_block si;
+
+ memset(&si, 0, sizeof(si));
+ if (qsi(&si))
+ return;
+
+ if (!si.as && !si.ad)
+ return;
+
+ seq_printf(m, "CPU-MF: Sampling facility: min_rate=%lu max_rate=%lu"
+ " cpu_speed=%u\n", si.min_sampl_rate, si.max_sampl_rate,
+ si.cpu_speed);
+ if (si.as)
+ seq_printf(m, "CPU-MF: Sampling facility: mode=basic"
+ " sample_size=%u\n", si.bsdes);
+ if (si.ad)
+ seq_printf(m, "CPU-MF: Sampling facility: mode=diagnostic"
+ " sample_size=%u\n", si.dsdes);
+}
+
+static void service_level_perf_print(struct seq_file *m,
+ struct service_level *sl)
+{
+ if (cpum_cf_avail())
+ sl_print_counter(m);
+ if (cpum_sf_avail())
+ sl_print_sampling(m);
+}
+
+static struct service_level service_level_perf = {
+ .seq_print = service_level_perf_print,
+};
+
+static int __init service_level_perf_register(void)
+{
+ return register_service_level(&service_level_perf);
+}
+arch_initcall(service_level_perf_register);
+
+static int __perf_callchain_kernel(void *data, unsigned long address, int reliable)
+{
+ struct perf_callchain_entry_ctx *entry = data;
+
+ perf_callchain_store(entry, address);
+ return 0;
+}
+
+void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
+ struct pt_regs *regs)
+{
+ if (user_mode(regs))
+ return;
+ dump_trace(__perf_callchain_kernel, entry, NULL, regs->gprs[15]);
+}
+
+/* Perf definitions for PMU event attributes in sysfs */
+ssize_t cpumf_events_sysfs_show(struct device *dev,
+ struct device_attribute *attr, char *page)
+{
+ struct perf_pmu_events_attr *pmu_attr;
+
+ pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+ return sprintf(page, "event=0x%04llx\n", pmu_attr->id);
+}
diff --git a/arch/s390/kernel/perf_regs.c b/arch/s390/kernel/perf_regs.c
new file mode 100644
index 000000000..4352a504f
--- /dev/null
+++ b/arch/s390/kernel/perf_regs.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/perf_event.h>
+#include <linux/perf_regs.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/bug.h>
+#include <asm/ptrace.h>
+#include <asm/fpu/api.h>
+#include <asm/fpu/types.h>
+
+u64 perf_reg_value(struct pt_regs *regs, int idx)
+{
+ freg_t fp;
+
+ if (idx >= PERF_REG_S390_R0 && idx <= PERF_REG_S390_R15)
+ return regs->gprs[idx];
+
+ if (idx >= PERF_REG_S390_FP0 && idx <= PERF_REG_S390_FP15) {
+ if (!user_mode(regs))
+ return 0;
+
+ idx -= PERF_REG_S390_FP0;
+ fp = MACHINE_HAS_VX ? *(freg_t *)(current->thread.fpu.vxrs + idx)
+ : current->thread.fpu.fprs[idx];
+ return fp.ui;
+ }
+
+ if (idx == PERF_REG_S390_MASK)
+ return regs->psw.mask;
+ if (idx == PERF_REG_S390_PC)
+ return regs->psw.addr;
+
+ WARN_ON_ONCE((u32)idx >= PERF_REG_S390_MAX);
+ return 0;
+}
+
+#define REG_RESERVED (~((1UL << PERF_REG_S390_MAX) - 1))
+
+int perf_reg_validate(u64 mask)
+{
+ if (!mask || mask & REG_RESERVED)
+ return -EINVAL;
+
+ return 0;
+}
+
+u64 perf_reg_abi(struct task_struct *task)
+{
+ if (test_tsk_thread_flag(task, TIF_31BIT))
+ return PERF_SAMPLE_REGS_ABI_32;
+
+ return PERF_SAMPLE_REGS_ABI_64;
+}
+
+void perf_get_regs_user(struct perf_regs *regs_user,
+ struct pt_regs *regs,
+ struct pt_regs *regs_user_copy)
+{
+ /*
+ * Use the regs from the first interruption and let
+ * perf_sample_regs_intr() handle interrupts (regs == get_irq_regs()).
+ *
+ * Also save FPU registers for user-space tasks only.
+ */
+ regs_user->regs = task_pt_regs(current);
+ if (user_mode(regs_user->regs))
+ save_fpu_regs();
+ regs_user->abi = perf_reg_abi(current);
+}
diff --git a/arch/s390/kernel/pgm_check.S b/arch/s390/kernel/pgm_check.S
new file mode 100644
index 000000000..3e62aae34
--- /dev/null
+++ b/arch/s390/kernel/pgm_check.S
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Program check table.
+ *
+ * Copyright IBM Corp. 2012
+ */
+
+#include <linux/linkage.h>
+
+#define PGM_CHECK(handler) .long handler
+#define PGM_CHECK_DEFAULT PGM_CHECK(default_trap_handler)
+
+/*
+ * The program check table contains exactly 128 (0x00-0x7f) entries. Each
+ * line defines the function to be called corresponding to the program check
+ * interruption code.
+ */
+.section .rodata, "a"
+ENTRY(pgm_check_table)
+PGM_CHECK_DEFAULT /* 00 */
+PGM_CHECK(illegal_op) /* 01 */
+PGM_CHECK(privileged_op) /* 02 */
+PGM_CHECK(execute_exception) /* 03 */
+PGM_CHECK(do_protection_exception) /* 04 */
+PGM_CHECK(addressing_exception) /* 05 */
+PGM_CHECK(specification_exception) /* 06 */
+PGM_CHECK(data_exception) /* 07 */
+PGM_CHECK(overflow_exception) /* 08 */
+PGM_CHECK(divide_exception) /* 09 */
+PGM_CHECK(overflow_exception) /* 0a */
+PGM_CHECK(divide_exception) /* 0b */
+PGM_CHECK(hfp_overflow_exception) /* 0c */
+PGM_CHECK(hfp_underflow_exception) /* 0d */
+PGM_CHECK(hfp_significance_exception) /* 0e */
+PGM_CHECK(hfp_divide_exception) /* 0f */
+PGM_CHECK(do_dat_exception) /* 10 */
+PGM_CHECK(do_dat_exception) /* 11 */
+PGM_CHECK(translation_exception) /* 12 */
+PGM_CHECK(special_op_exception) /* 13 */
+PGM_CHECK_DEFAULT /* 14 */
+PGM_CHECK(operand_exception) /* 15 */
+PGM_CHECK_DEFAULT /* 16 */
+PGM_CHECK_DEFAULT /* 17 */
+PGM_CHECK(transaction_exception) /* 18 */
+PGM_CHECK_DEFAULT /* 19 */
+PGM_CHECK_DEFAULT /* 1a */
+PGM_CHECK(vector_exception) /* 1b */
+PGM_CHECK(space_switch_exception) /* 1c */
+PGM_CHECK(hfp_sqrt_exception) /* 1d */
+PGM_CHECK_DEFAULT /* 1e */
+PGM_CHECK_DEFAULT /* 1f */
+PGM_CHECK_DEFAULT /* 20 */
+PGM_CHECK_DEFAULT /* 21 */
+PGM_CHECK_DEFAULT /* 22 */
+PGM_CHECK_DEFAULT /* 23 */
+PGM_CHECK_DEFAULT /* 24 */
+PGM_CHECK_DEFAULT /* 25 */
+PGM_CHECK_DEFAULT /* 26 */
+PGM_CHECK_DEFAULT /* 27 */
+PGM_CHECK_DEFAULT /* 28 */
+PGM_CHECK_DEFAULT /* 29 */
+PGM_CHECK_DEFAULT /* 2a */
+PGM_CHECK_DEFAULT /* 2b */
+PGM_CHECK_DEFAULT /* 2c */
+PGM_CHECK_DEFAULT /* 2d */
+PGM_CHECK_DEFAULT /* 2e */
+PGM_CHECK_DEFAULT /* 2f */
+PGM_CHECK_DEFAULT /* 30 */
+PGM_CHECK_DEFAULT /* 31 */
+PGM_CHECK_DEFAULT /* 32 */
+PGM_CHECK_DEFAULT /* 33 */
+PGM_CHECK_DEFAULT /* 34 */
+PGM_CHECK_DEFAULT /* 35 */
+PGM_CHECK_DEFAULT /* 36 */
+PGM_CHECK_DEFAULT /* 37 */
+PGM_CHECK(do_dat_exception) /* 38 */
+PGM_CHECK(do_dat_exception) /* 39 */
+PGM_CHECK(do_dat_exception) /* 3a */
+PGM_CHECK(do_dat_exception) /* 3b */
+PGM_CHECK_DEFAULT /* 3c */
+PGM_CHECK_DEFAULT /* 3d */
+PGM_CHECK_DEFAULT /* 3e */
+PGM_CHECK_DEFAULT /* 3f */
+PGM_CHECK_DEFAULT /* 40 */
+PGM_CHECK_DEFAULT /* 41 */
+PGM_CHECK_DEFAULT /* 42 */
+PGM_CHECK_DEFAULT /* 43 */
+PGM_CHECK_DEFAULT /* 44 */
+PGM_CHECK_DEFAULT /* 45 */
+PGM_CHECK_DEFAULT /* 46 */
+PGM_CHECK_DEFAULT /* 47 */
+PGM_CHECK_DEFAULT /* 48 */
+PGM_CHECK_DEFAULT /* 49 */
+PGM_CHECK_DEFAULT /* 4a */
+PGM_CHECK_DEFAULT /* 4b */
+PGM_CHECK_DEFAULT /* 4c */
+PGM_CHECK_DEFAULT /* 4d */
+PGM_CHECK_DEFAULT /* 4e */
+PGM_CHECK_DEFAULT /* 4f */
+PGM_CHECK_DEFAULT /* 50 */
+PGM_CHECK_DEFAULT /* 51 */
+PGM_CHECK_DEFAULT /* 52 */
+PGM_CHECK_DEFAULT /* 53 */
+PGM_CHECK_DEFAULT /* 54 */
+PGM_CHECK_DEFAULT /* 55 */
+PGM_CHECK_DEFAULT /* 56 */
+PGM_CHECK_DEFAULT /* 57 */
+PGM_CHECK_DEFAULT /* 58 */
+PGM_CHECK_DEFAULT /* 59 */
+PGM_CHECK_DEFAULT /* 5a */
+PGM_CHECK_DEFAULT /* 5b */
+PGM_CHECK_DEFAULT /* 5c */
+PGM_CHECK_DEFAULT /* 5d */
+PGM_CHECK_DEFAULT /* 5e */
+PGM_CHECK_DEFAULT /* 5f */
+PGM_CHECK_DEFAULT /* 60 */
+PGM_CHECK_DEFAULT /* 61 */
+PGM_CHECK_DEFAULT /* 62 */
+PGM_CHECK_DEFAULT /* 63 */
+PGM_CHECK_DEFAULT /* 64 */
+PGM_CHECK_DEFAULT /* 65 */
+PGM_CHECK_DEFAULT /* 66 */
+PGM_CHECK_DEFAULT /* 67 */
+PGM_CHECK_DEFAULT /* 68 */
+PGM_CHECK_DEFAULT /* 69 */
+PGM_CHECK_DEFAULT /* 6a */
+PGM_CHECK_DEFAULT /* 6b */
+PGM_CHECK_DEFAULT /* 6c */
+PGM_CHECK_DEFAULT /* 6d */
+PGM_CHECK_DEFAULT /* 6e */
+PGM_CHECK_DEFAULT /* 6f */
+PGM_CHECK_DEFAULT /* 70 */
+PGM_CHECK_DEFAULT /* 71 */
+PGM_CHECK_DEFAULT /* 72 */
+PGM_CHECK_DEFAULT /* 73 */
+PGM_CHECK_DEFAULT /* 74 */
+PGM_CHECK_DEFAULT /* 75 */
+PGM_CHECK_DEFAULT /* 76 */
+PGM_CHECK_DEFAULT /* 77 */
+PGM_CHECK_DEFAULT /* 78 */
+PGM_CHECK_DEFAULT /* 79 */
+PGM_CHECK_DEFAULT /* 7a */
+PGM_CHECK_DEFAULT /* 7b */
+PGM_CHECK_DEFAULT /* 7c */
+PGM_CHECK_DEFAULT /* 7d */
+PGM_CHECK_DEFAULT /* 7e */
+PGM_CHECK_DEFAULT /* 7f */
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
new file mode 100644
index 000000000..99ef537e5
--- /dev/null
+++ b/arch/s390/kernel/process.c
@@ -0,0 +1,243 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This file handles the architecture dependent parts of process handling.
+ *
+ * Copyright IBM Corp. 1999, 2009
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ * Hartmut Penner <hp@de.ibm.com>,
+ * Denis Joseph Barrow,
+ */
+
+#include <linux/elf-randomize.h>
+#include <linux/compiler.h>
+#include <linux/cpu.h>
+#include <linux/sched.h>
+#include <linux/sched/debug.h>
+#include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/elfcore.h>
+#include <linux/smp.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/tick.h>
+#include <linux/personality.h>
+#include <linux/syscalls.h>
+#include <linux/compat.h>
+#include <linux/kprobes.h>
+#include <linux/random.h>
+#include <linux/export.h>
+#include <linux/init_task.h>
+#include <asm/cpu_mf.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+#include <asm/vtimer.h>
+#include <asm/exec.h>
+#include <asm/irq.h>
+#include <asm/nmi.h>
+#include <asm/smp.h>
+#include <asm/switch_to.h>
+#include <asm/runtime_instr.h>
+#include "entry.h"
+
+asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
+
+extern void kernel_thread_starter(void);
+
+void flush_thread(void)
+{
+}
+
+void arch_setup_new_exec(void)
+{
+ if (S390_lowcore.current_pid != current->pid) {
+ S390_lowcore.current_pid = current->pid;
+ if (test_facility(40))
+ lpp(&S390_lowcore.lpp);
+ }
+}
+
+void arch_release_task_struct(struct task_struct *tsk)
+{
+ runtime_instr_release(tsk);
+ guarded_storage_release(tsk);
+}
+
+int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
+{
+ /*
+ * Save the floating-point or vector register state of the current
+ * task and set the CIF_FPU flag to lazy restore the FPU register
+ * state when returning to user space.
+ */
+ save_fpu_regs();
+
+ memcpy(dst, src, arch_task_struct_size);
+ dst->thread.fpu.regs = dst->thread.fpu.fprs;
+ return 0;
+}
+
+int copy_thread_tls(unsigned long clone_flags, unsigned long new_stackp,
+ unsigned long arg, struct task_struct *p, unsigned long tls)
+{
+ struct fake_frame
+ {
+ struct stack_frame sf;
+ struct pt_regs childregs;
+ } *frame;
+
+ frame = container_of(task_pt_regs(p), struct fake_frame, childregs);
+ p->thread.ksp = (unsigned long) frame;
+ /* Save access registers to new thread structure. */
+ save_access_regs(&p->thread.acrs[0]);
+ /* start new process with ar4 pointing to the correct address space */
+ p->thread.mm_segment = get_fs();
+ /* Don't copy debug registers */
+ memset(&p->thread.per_user, 0, sizeof(p->thread.per_user));
+ memset(&p->thread.per_event, 0, sizeof(p->thread.per_event));
+ clear_tsk_thread_flag(p, TIF_SINGLE_STEP);
+ p->thread.per_flags = 0;
+ /* Initialize per thread user and system timer values */
+ p->thread.user_timer = 0;
+ p->thread.guest_timer = 0;
+ p->thread.system_timer = 0;
+ p->thread.hardirq_timer = 0;
+ p->thread.softirq_timer = 0;
+
+ frame->sf.back_chain = 0;
+ /* new return point is ret_from_fork */
+ frame->sf.gprs[8] = (unsigned long) ret_from_fork;
+ /* fake return stack for resume(), don't go back to schedule */
+ frame->sf.gprs[9] = (unsigned long) frame;
+
+ /* Store access registers to kernel stack of new process. */
+ if (unlikely(p->flags & PF_KTHREAD)) {
+ /* kernel thread */
+ memset(&frame->childregs, 0, sizeof(struct pt_regs));
+ frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT |
+ PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
+ frame->childregs.psw.addr =
+ (unsigned long) kernel_thread_starter;
+ frame->childregs.gprs[9] = new_stackp; /* function */
+ frame->childregs.gprs[10] = arg;
+ frame->childregs.gprs[11] = (unsigned long) do_exit;
+ frame->childregs.orig_gpr2 = -1;
+
+ return 0;
+ }
+ frame->childregs = *current_pt_regs();
+ frame->childregs.gprs[2] = 0; /* child returns 0 on fork. */
+ frame->childregs.flags = 0;
+ if (new_stackp)
+ frame->childregs.gprs[15] = new_stackp;
+
+ /* Don't copy runtime instrumentation info */
+ p->thread.ri_cb = NULL;
+ frame->childregs.psw.mask &= ~PSW_MASK_RI;
+ /* Don't copy guarded storage control block */
+ p->thread.gs_cb = NULL;
+ p->thread.gs_bc_cb = NULL;
+
+ /* Set a new TLS ? */
+ if (clone_flags & CLONE_SETTLS) {
+ if (is_compat_task()) {
+ p->thread.acrs[0] = (unsigned int)tls;
+ } else {
+ p->thread.acrs[0] = (unsigned int)(tls >> 32);
+ p->thread.acrs[1] = (unsigned int)tls;
+ }
+ }
+ return 0;
+}
+
+asmlinkage void execve_tail(void)
+{
+ current->thread.fpu.fpc = 0;
+ asm volatile("sfpc %0" : : "d" (0));
+}
+
+/*
+ * fill in the FPU structure for a core dump.
+ */
+int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs)
+{
+ save_fpu_regs();
+ fpregs->fpc = current->thread.fpu.fpc;
+ fpregs->pad = 0;
+ if (MACHINE_HAS_VX)
+ convert_vx_to_fp((freg_t *)&fpregs->fprs,
+ current->thread.fpu.vxrs);
+ else
+ memcpy(&fpregs->fprs, current->thread.fpu.fprs,
+ sizeof(fpregs->fprs));
+ return 1;
+}
+EXPORT_SYMBOL(dump_fpu);
+
+unsigned long get_wchan(struct task_struct *p)
+{
+ struct stack_frame *sf, *low, *high;
+ unsigned long return_address;
+ int count;
+
+ if (!p || p == current || p->state == TASK_RUNNING || !task_stack_page(p))
+ return 0;
+
+ if (!try_get_task_stack(p))
+ return 0;
+
+ low = task_stack_page(p);
+ high = (struct stack_frame *) task_pt_regs(p);
+ sf = (struct stack_frame *) p->thread.ksp;
+ if (sf <= low || sf > high) {
+ return_address = 0;
+ goto out;
+ }
+ for (count = 0; count < 16; count++) {
+ sf = (struct stack_frame *) sf->back_chain;
+ if (sf <= low || sf > high) {
+ return_address = 0;
+ goto out;
+ }
+ return_address = sf->gprs[8];
+ if (!in_sched_functions(return_address))
+ goto out;
+ }
+out:
+ put_task_stack(p);
+ return return_address;
+}
+
+unsigned long arch_align_stack(unsigned long sp)
+{
+ if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
+ sp -= get_random_int() & ~PAGE_MASK;
+ return sp & ~0xf;
+}
+
+static inline unsigned long brk_rnd(void)
+{
+ return (get_random_int() & BRK_RND_MASK) << PAGE_SHIFT;
+}
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+ unsigned long ret;
+
+ ret = PAGE_ALIGN(mm->brk + brk_rnd());
+ return (ret > mm->brk) ? ret : mm->brk;
+}
+
+void set_fs_fixup(void)
+{
+ struct pt_regs *regs = current_pt_regs();
+ static bool warned;
+
+ set_fs(USER_DS);
+ if (warned)
+ return;
+ WARN(1, "Unbalanced set_fs - int code: 0x%x\n", regs->int_code);
+ show_registers(regs);
+ warned = true;
+}
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
new file mode 100644
index 000000000..675d4be0c
--- /dev/null
+++ b/arch/s390/kernel/processor.c
@@ -0,0 +1,220 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2008
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+
+#define KMSG_COMPONENT "cpu"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/cpufeature.h>
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <linux/sched/mm.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+#include <linux/mm_types.h>
+#include <linux/delay.h>
+#include <linux/cpu.h>
+
+#include <asm/diag.h>
+#include <asm/facility.h>
+#include <asm/elf.h>
+#include <asm/lowcore.h>
+#include <asm/param.h>
+#include <asm/smp.h>
+
+struct cpu_info {
+ unsigned int cpu_mhz_dynamic;
+ unsigned int cpu_mhz_static;
+ struct cpuid cpu_id;
+};
+
+static DEFINE_PER_CPU(struct cpu_info, cpu_info);
+
+static bool machine_has_cpu_mhz;
+
+void __init cpu_detect_mhz_feature(void)
+{
+ if (test_facility(34) && __ecag(ECAG_CPU_ATTRIBUTE, 0) != -1UL)
+ machine_has_cpu_mhz = true;
+}
+
+static void update_cpu_mhz(void *arg)
+{
+ unsigned long mhz;
+ struct cpu_info *c;
+
+ mhz = __ecag(ECAG_CPU_ATTRIBUTE, 0);
+ c = this_cpu_ptr(&cpu_info);
+ c->cpu_mhz_dynamic = mhz >> 32;
+ c->cpu_mhz_static = mhz & 0xffffffff;
+}
+
+void s390_update_cpu_mhz(void)
+{
+ s390_adjust_jiffies();
+ if (machine_has_cpu_mhz)
+ on_each_cpu(update_cpu_mhz, NULL, 0);
+}
+
+void notrace cpu_relax_yield(void)
+{
+ if (!smp_cpu_mtid && MACHINE_HAS_DIAG44) {
+ diag_stat_inc(DIAG_STAT_X044);
+ asm volatile("diag 0,0,0x44");
+ }
+ barrier();
+}
+EXPORT_SYMBOL(cpu_relax_yield);
+
+/*
+ * cpu_init - initializes state that is per-CPU.
+ */
+void cpu_init(void)
+{
+ struct cpuid *id = this_cpu_ptr(&cpu_info.cpu_id);
+
+ get_cpu_id(id);
+ if (machine_has_cpu_mhz)
+ update_cpu_mhz(NULL);
+ mmgrab(&init_mm);
+ current->active_mm = &init_mm;
+ BUG_ON(current->mm);
+ enter_lazy_tlb(&init_mm, current);
+}
+
+/*
+ * cpu_have_feature - Test CPU features on module initialization
+ */
+int cpu_have_feature(unsigned int num)
+{
+ return elf_hwcap & (1UL << num);
+}
+EXPORT_SYMBOL(cpu_have_feature);
+
+static void show_facilities(struct seq_file *m)
+{
+ unsigned int bit;
+ long *facilities;
+
+ facilities = (long *)&S390_lowcore.stfle_fac_list;
+ seq_puts(m, "facilities :");
+ for_each_set_bit_inv(bit, facilities, MAX_FACILITY_BIT)
+ seq_printf(m, " %d", bit);
+ seq_putc(m, '\n');
+}
+
+static void show_cpu_summary(struct seq_file *m, void *v)
+{
+ static const char *hwcap_str[] = {
+ "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp",
+ "edat", "etf3eh", "highgprs", "te", "vx", "vxd", "vxe", "gs"
+ };
+ static const char * const int_hwcap_str[] = {
+ "sie"
+ };
+ int i, cpu;
+
+ seq_printf(m, "vendor_id : IBM/S390\n"
+ "# processors : %i\n"
+ "bogomips per cpu: %lu.%02lu\n",
+ num_online_cpus(), loops_per_jiffy/(500000/HZ),
+ (loops_per_jiffy/(5000/HZ))%100);
+ seq_printf(m, "max thread id : %d\n", smp_cpu_mtid);
+ seq_puts(m, "features\t: ");
+ for (i = 0; i < ARRAY_SIZE(hwcap_str); i++)
+ if (hwcap_str[i] && (elf_hwcap & (1UL << i)))
+ seq_printf(m, "%s ", hwcap_str[i]);
+ for (i = 0; i < ARRAY_SIZE(int_hwcap_str); i++)
+ if (int_hwcap_str[i] && (int_hwcap & (1UL << i)))
+ seq_printf(m, "%s ", int_hwcap_str[i]);
+ seq_puts(m, "\n");
+ show_facilities(m);
+ show_cacheinfo(m);
+ for_each_online_cpu(cpu) {
+ struct cpuid *id = &per_cpu(cpu_info.cpu_id, cpu);
+
+ seq_printf(m, "processor %d: "
+ "version = %02X, "
+ "identification = %06X, "
+ "machine = %04X\n",
+ cpu, id->version, id->ident, id->machine);
+ }
+}
+
+static void show_cpu_mhz(struct seq_file *m, unsigned long n)
+{
+ struct cpu_info *c = per_cpu_ptr(&cpu_info, n);
+
+ seq_printf(m, "cpu MHz dynamic : %d\n", c->cpu_mhz_dynamic);
+ seq_printf(m, "cpu MHz static : %d\n", c->cpu_mhz_static);
+}
+
+/*
+ * show_cpuinfo - Get information on one CPU for use by procfs.
+ */
+static int show_cpuinfo(struct seq_file *m, void *v)
+{
+ unsigned long n = (unsigned long) v - 1;
+ unsigned long first = cpumask_first(cpu_online_mask);
+
+ if (n == first)
+ show_cpu_summary(m, v);
+ if (!machine_has_cpu_mhz)
+ return 0;
+ seq_printf(m, "\ncpu number : %ld\n", n);
+ show_cpu_mhz(m, n);
+ return 0;
+}
+
+static inline void *c_update(loff_t *pos)
+{
+ if (*pos)
+ *pos = cpumask_next(*pos - 1, cpu_online_mask);
+ else
+ *pos = cpumask_first(cpu_online_mask);
+ return *pos < nr_cpu_ids ? (void *)*pos + 1 : NULL;
+}
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+ get_online_cpus();
+ return c_update(pos);
+}
+
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ ++*pos;
+ return c_update(pos);
+}
+
+static void c_stop(struct seq_file *m, void *v)
+{
+ put_online_cpus();
+}
+
+const struct seq_operations cpuinfo_op = {
+ .start = c_start,
+ .next = c_next,
+ .stop = c_stop,
+ .show = show_cpuinfo,
+};
+
+int s390_isolate_bp(void)
+{
+ if (!test_facility(82))
+ return -EOPNOTSUPP;
+ set_thread_flag(TIF_ISOLATE_BP);
+ return 0;
+}
+EXPORT_SYMBOL(s390_isolate_bp);
+
+int s390_isolate_bp_guest(void)
+{
+ if (!test_facility(82))
+ return -EOPNOTSUPP;
+ set_thread_flag(TIF_ISOLATE_BP_GUEST);
+ return 0;
+}
+EXPORT_SYMBOL(s390_isolate_bp_guest);
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
new file mode 100644
index 000000000..3ffa2847c
--- /dev/null
+++ b/arch/s390/kernel/ptrace.c
@@ -0,0 +1,1776 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Ptrace user space interface.
+ *
+ * Copyright IBM Corp. 1999, 2010
+ * Author(s): Denis Joseph Barrow
+ * Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/security.h>
+#include <linux/audit.h>
+#include <linux/signal.h>
+#include <linux/elf.h>
+#include <linux/regset.h>
+#include <linux/tracehook.h>
+#include <linux/seccomp.h>
+#include <linux/compat.h>
+#include <trace/syscall.h>
+#include <asm/segment.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <linux/uaccess.h>
+#include <asm/unistd.h>
+#include <asm/switch_to.h>
+#include <asm/runtime_instr.h>
+#include <asm/facility.h>
+
+#include "entry.h"
+
+#ifdef CONFIG_COMPAT
+#include "compat_ptrace.h"
+#endif
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/syscalls.h>
+
+void update_cr_regs(struct task_struct *task)
+{
+ struct pt_regs *regs = task_pt_regs(task);
+ struct thread_struct *thread = &task->thread;
+ struct per_regs old, new;
+ union ctlreg0 cr0_old, cr0_new;
+ union ctlreg2 cr2_old, cr2_new;
+ int cr0_changed, cr2_changed;
+
+ __ctl_store(cr0_old.val, 0, 0);
+ __ctl_store(cr2_old.val, 2, 2);
+ cr0_new = cr0_old;
+ cr2_new = cr2_old;
+ /* Take care of the enable/disable of transactional execution. */
+ if (MACHINE_HAS_TE) {
+ /* Set or clear transaction execution TXC bit 8. */
+ cr0_new.tcx = 1;
+ if (task->thread.per_flags & PER_FLAG_NO_TE)
+ cr0_new.tcx = 0;
+ /* Set or clear transaction execution TDC bits 62 and 63. */
+ cr2_new.tdc = 0;
+ if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
+ if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND)
+ cr2_new.tdc = 1;
+ else
+ cr2_new.tdc = 2;
+ }
+ }
+ /* Take care of enable/disable of guarded storage. */
+ if (MACHINE_HAS_GS) {
+ cr2_new.gse = 0;
+ if (task->thread.gs_cb)
+ cr2_new.gse = 1;
+ }
+ /* Load control register 0/2 iff changed */
+ cr0_changed = cr0_new.val != cr0_old.val;
+ cr2_changed = cr2_new.val != cr2_old.val;
+ if (cr0_changed)
+ __ctl_load(cr0_new.val, 0, 0);
+ if (cr2_changed)
+ __ctl_load(cr2_new.val, 2, 2);
+ /* Copy user specified PER registers */
+ new.control = thread->per_user.control;
+ new.start = thread->per_user.start;
+ new.end = thread->per_user.end;
+
+ /* merge TIF_SINGLE_STEP into user specified PER registers. */
+ if (test_tsk_thread_flag(task, TIF_SINGLE_STEP) ||
+ test_tsk_thread_flag(task, TIF_UPROBE_SINGLESTEP)) {
+ if (test_tsk_thread_flag(task, TIF_BLOCK_STEP))
+ new.control |= PER_EVENT_BRANCH;
+ else
+ new.control |= PER_EVENT_IFETCH;
+ new.control |= PER_CONTROL_SUSPENSION;
+ new.control |= PER_EVENT_TRANSACTION_END;
+ if (test_tsk_thread_flag(task, TIF_UPROBE_SINGLESTEP))
+ new.control |= PER_EVENT_IFETCH;
+ new.start = 0;
+ new.end = -1UL;
+ }
+
+ /* Take care of the PER enablement bit in the PSW. */
+ if (!(new.control & PER_EVENT_MASK)) {
+ regs->psw.mask &= ~PSW_MASK_PER;
+ return;
+ }
+ regs->psw.mask |= PSW_MASK_PER;
+ __ctl_store(old, 9, 11);
+ if (memcmp(&new, &old, sizeof(struct per_regs)) != 0)
+ __ctl_load(new, 9, 11);
+}
+
+void user_enable_single_step(struct task_struct *task)
+{
+ clear_tsk_thread_flag(task, TIF_BLOCK_STEP);
+ set_tsk_thread_flag(task, TIF_SINGLE_STEP);
+}
+
+void user_disable_single_step(struct task_struct *task)
+{
+ clear_tsk_thread_flag(task, TIF_BLOCK_STEP);
+ clear_tsk_thread_flag(task, TIF_SINGLE_STEP);
+}
+
+void user_enable_block_step(struct task_struct *task)
+{
+ set_tsk_thread_flag(task, TIF_SINGLE_STEP);
+ set_tsk_thread_flag(task, TIF_BLOCK_STEP);
+}
+
+/*
+ * Called by kernel/ptrace.c when detaching..
+ *
+ * Clear all debugging related fields.
+ */
+void ptrace_disable(struct task_struct *task)
+{
+ memset(&task->thread.per_user, 0, sizeof(task->thread.per_user));
+ memset(&task->thread.per_event, 0, sizeof(task->thread.per_event));
+ clear_tsk_thread_flag(task, TIF_SINGLE_STEP);
+ clear_pt_regs_flag(task_pt_regs(task), PIF_PER_TRAP);
+ task->thread.per_flags = 0;
+}
+
+#define __ADDR_MASK 7
+
+static inline unsigned long __peek_user_per(struct task_struct *child,
+ addr_t addr)
+{
+ struct per_struct_kernel *dummy = NULL;
+
+ if (addr == (addr_t) &dummy->cr9)
+ /* Control bits of the active per set. */
+ return test_thread_flag(TIF_SINGLE_STEP) ?
+ PER_EVENT_IFETCH : child->thread.per_user.control;
+ else if (addr == (addr_t) &dummy->cr10)
+ /* Start address of the active per set. */
+ return test_thread_flag(TIF_SINGLE_STEP) ?
+ 0 : child->thread.per_user.start;
+ else if (addr == (addr_t) &dummy->cr11)
+ /* End address of the active per set. */
+ return test_thread_flag(TIF_SINGLE_STEP) ?
+ -1UL : child->thread.per_user.end;
+ else if (addr == (addr_t) &dummy->bits)
+ /* Single-step bit. */
+ return test_thread_flag(TIF_SINGLE_STEP) ?
+ (1UL << (BITS_PER_LONG - 1)) : 0;
+ else if (addr == (addr_t) &dummy->starting_addr)
+ /* Start address of the user specified per set. */
+ return child->thread.per_user.start;
+ else if (addr == (addr_t) &dummy->ending_addr)
+ /* End address of the user specified per set. */
+ return child->thread.per_user.end;
+ else if (addr == (addr_t) &dummy->perc_atmid)
+ /* PER code, ATMID and AI of the last PER trap */
+ return (unsigned long)
+ child->thread.per_event.cause << (BITS_PER_LONG - 16);
+ else if (addr == (addr_t) &dummy->address)
+ /* Address of the last PER trap */
+ return child->thread.per_event.address;
+ else if (addr == (addr_t) &dummy->access_id)
+ /* Access id of the last PER trap */
+ return (unsigned long)
+ child->thread.per_event.paid << (BITS_PER_LONG - 8);
+ return 0;
+}
+
+/*
+ * Read the word at offset addr from the user area of a process. The
+ * trouble here is that the information is littered over different
+ * locations. The process registers are found on the kernel stack,
+ * the floating point stuff and the trace settings are stored in
+ * the task structure. In addition the different structures in
+ * struct user contain pad bytes that should be read as zeroes.
+ * Lovely...
+ */
+static unsigned long __peek_user(struct task_struct *child, addr_t addr)
+{
+ struct user *dummy = NULL;
+ addr_t offset, tmp;
+
+ if (addr < (addr_t) &dummy->regs.acrs) {
+ /*
+ * psw and gprs are stored on the stack
+ */
+ tmp = *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr);
+ if (addr == (addr_t) &dummy->regs.psw.mask) {
+ /* Return a clean psw mask. */
+ tmp &= PSW_MASK_USER | PSW_MASK_RI;
+ tmp |= PSW_USER_BITS;
+ }
+
+ } else if (addr < (addr_t) &dummy->regs.orig_gpr2) {
+ /*
+ * access registers are stored in the thread structure
+ */
+ offset = addr - (addr_t) &dummy->regs.acrs;
+ /*
+ * Very special case: old & broken 64 bit gdb reading
+ * from acrs[15]. Result is a 64 bit value. Read the
+ * 32 bit acrs[15] value and shift it by 32. Sick...
+ */
+ if (addr == (addr_t) &dummy->regs.acrs[15])
+ tmp = ((unsigned long) child->thread.acrs[15]) << 32;
+ else
+ tmp = *(addr_t *)((addr_t) &child->thread.acrs + offset);
+
+ } else if (addr == (addr_t) &dummy->regs.orig_gpr2) {
+ /*
+ * orig_gpr2 is stored on the kernel stack
+ */
+ tmp = (addr_t) task_pt_regs(child)->orig_gpr2;
+
+ } else if (addr < (addr_t) &dummy->regs.fp_regs) {
+ /*
+ * prevent reads of padding hole between
+ * orig_gpr2 and fp_regs on s390.
+ */
+ tmp = 0;
+
+ } else if (addr == (addr_t) &dummy->regs.fp_regs.fpc) {
+ /*
+ * floating point control reg. is in the thread structure
+ */
+ tmp = child->thread.fpu.fpc;
+ tmp <<= BITS_PER_LONG - 32;
+
+ } else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) {
+ /*
+ * floating point regs. are either in child->thread.fpu
+ * or the child->thread.fpu.vxrs array
+ */
+ offset = addr - (addr_t) &dummy->regs.fp_regs.fprs;
+ if (MACHINE_HAS_VX)
+ tmp = *(addr_t *)
+ ((addr_t) child->thread.fpu.vxrs + 2*offset);
+ else
+ tmp = *(addr_t *)
+ ((addr_t) child->thread.fpu.fprs + offset);
+
+ } else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
+ /*
+ * Handle access to the per_info structure.
+ */
+ addr -= (addr_t) &dummy->regs.per_info;
+ tmp = __peek_user_per(child, addr);
+
+ } else
+ tmp = 0;
+
+ return tmp;
+}
+
+static int
+peek_user(struct task_struct *child, addr_t addr, addr_t data)
+{
+ addr_t tmp, mask;
+
+ /*
+ * Stupid gdb peeks/pokes the access registers in 64 bit with
+ * an alignment of 4. Programmers from hell...
+ */
+ mask = __ADDR_MASK;
+ if (addr >= (addr_t) &((struct user *) NULL)->regs.acrs &&
+ addr < (addr_t) &((struct user *) NULL)->regs.orig_gpr2)
+ mask = 3;
+ if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK)
+ return -EIO;
+
+ tmp = __peek_user(child, addr);
+ return put_user(tmp, (addr_t __user *) data);
+}
+
+static inline void __poke_user_per(struct task_struct *child,
+ addr_t addr, addr_t data)
+{
+ struct per_struct_kernel *dummy = NULL;
+
+ /*
+ * There are only three fields in the per_info struct that the
+ * debugger user can write to.
+ * 1) cr9: the debugger wants to set a new PER event mask
+ * 2) starting_addr: the debugger wants to set a new starting
+ * address to use with the PER event mask.
+ * 3) ending_addr: the debugger wants to set a new ending
+ * address to use with the PER event mask.
+ * The user specified PER event mask and the start and end
+ * addresses are used only if single stepping is not in effect.
+ * Writes to any other field in per_info are ignored.
+ */
+ if (addr == (addr_t) &dummy->cr9)
+ /* PER event mask of the user specified per set. */
+ child->thread.per_user.control =
+ data & (PER_EVENT_MASK | PER_CONTROL_MASK);
+ else if (addr == (addr_t) &dummy->starting_addr)
+ /* Starting address of the user specified per set. */
+ child->thread.per_user.start = data;
+ else if (addr == (addr_t) &dummy->ending_addr)
+ /* Ending address of the user specified per set. */
+ child->thread.per_user.end = data;
+}
+
+static void fixup_int_code(struct task_struct *child, addr_t data)
+{
+ struct pt_regs *regs = task_pt_regs(child);
+ int ilc = regs->int_code >> 16;
+ u16 insn;
+
+ if (ilc > 6)
+ return;
+
+ if (ptrace_access_vm(child, regs->psw.addr - (regs->int_code >> 16),
+ &insn, sizeof(insn), FOLL_FORCE) != sizeof(insn))
+ return;
+
+ /* double check that tracee stopped on svc instruction */
+ if ((insn >> 8) != 0xa)
+ return;
+
+ regs->int_code = 0x20000 | (data & 0xffff);
+}
+/*
+ * Write a word to the user area of a process at location addr. This
+ * operation does have an additional problem compared to peek_user.
+ * Stores to the program status word and on the floating point
+ * control register needs to get checked for validity.
+ */
+static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
+{
+ struct user *dummy = NULL;
+ addr_t offset;
+
+
+ if (addr < (addr_t) &dummy->regs.acrs) {
+ struct pt_regs *regs = task_pt_regs(child);
+ /*
+ * psw and gprs are stored on the stack
+ */
+ if (addr == (addr_t) &dummy->regs.psw.mask) {
+ unsigned long mask = PSW_MASK_USER;
+
+ mask |= is_ri_task(child) ? PSW_MASK_RI : 0;
+ if ((data ^ PSW_USER_BITS) & ~mask)
+ /* Invalid psw mask. */
+ return -EINVAL;
+ if ((data & PSW_MASK_ASC) == PSW_ASC_HOME)
+ /* Invalid address-space-control bits */
+ return -EINVAL;
+ if ((data & PSW_MASK_EA) && !(data & PSW_MASK_BA))
+ /* Invalid addressing mode bits */
+ return -EINVAL;
+ }
+
+ if (test_pt_regs_flag(regs, PIF_SYSCALL) &&
+ addr == offsetof(struct user, regs.gprs[2]))
+ fixup_int_code(child, data);
+ *(addr_t *)((addr_t) &regs->psw + addr) = data;
+
+ } else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) {
+ /*
+ * access registers are stored in the thread structure
+ */
+ offset = addr - (addr_t) &dummy->regs.acrs;
+ /*
+ * Very special case: old & broken 64 bit gdb writing
+ * to acrs[15] with a 64 bit value. Ignore the lower
+ * half of the value and write the upper 32 bit to
+ * acrs[15]. Sick...
+ */
+ if (addr == (addr_t) &dummy->regs.acrs[15])
+ child->thread.acrs[15] = (unsigned int) (data >> 32);
+ else
+ *(addr_t *)((addr_t) &child->thread.acrs + offset) = data;
+
+ } else if (addr == (addr_t) &dummy->regs.orig_gpr2) {
+ /*
+ * orig_gpr2 is stored on the kernel stack
+ */
+ task_pt_regs(child)->orig_gpr2 = data;
+
+ } else if (addr < (addr_t) &dummy->regs.fp_regs) {
+ /*
+ * prevent writes of padding hole between
+ * orig_gpr2 and fp_regs on s390.
+ */
+ return 0;
+
+ } else if (addr == (addr_t) &dummy->regs.fp_regs.fpc) {
+ /*
+ * floating point control reg. is in the thread structure
+ */
+ if ((unsigned int) data != 0 ||
+ test_fp_ctl(data >> (BITS_PER_LONG - 32)))
+ return -EINVAL;
+ child->thread.fpu.fpc = data >> (BITS_PER_LONG - 32);
+
+ } else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) {
+ /*
+ * floating point regs. are either in child->thread.fpu
+ * or the child->thread.fpu.vxrs array
+ */
+ offset = addr - (addr_t) &dummy->regs.fp_regs.fprs;
+ if (MACHINE_HAS_VX)
+ *(addr_t *)((addr_t)
+ child->thread.fpu.vxrs + 2*offset) = data;
+ else
+ *(addr_t *)((addr_t)
+ child->thread.fpu.fprs + offset) = data;
+
+ } else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
+ /*
+ * Handle access to the per_info structure.
+ */
+ addr -= (addr_t) &dummy->regs.per_info;
+ __poke_user_per(child, addr, data);
+
+ }
+
+ return 0;
+}
+
+static int poke_user(struct task_struct *child, addr_t addr, addr_t data)
+{
+ addr_t mask;
+
+ /*
+ * Stupid gdb peeks/pokes the access registers in 64 bit with
+ * an alignment of 4. Programmers from hell indeed...
+ */
+ mask = __ADDR_MASK;
+ if (addr >= (addr_t) &((struct user *) NULL)->regs.acrs &&
+ addr < (addr_t) &((struct user *) NULL)->regs.orig_gpr2)
+ mask = 3;
+ if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK)
+ return -EIO;
+
+ return __poke_user(child, addr, data);
+}
+
+long arch_ptrace(struct task_struct *child, long request,
+ unsigned long addr, unsigned long data)
+{
+ ptrace_area parea;
+ int copied, ret;
+
+ switch (request) {
+ case PTRACE_PEEKUSR:
+ /* read the word at location addr in the USER area. */
+ return peek_user(child, addr, data);
+
+ case PTRACE_POKEUSR:
+ /* write the word at location addr in the USER area */
+ return poke_user(child, addr, data);
+
+ case PTRACE_PEEKUSR_AREA:
+ case PTRACE_POKEUSR_AREA:
+ if (copy_from_user(&parea, (void __force __user *) addr,
+ sizeof(parea)))
+ return -EFAULT;
+ addr = parea.kernel_addr;
+ data = parea.process_addr;
+ copied = 0;
+ while (copied < parea.len) {
+ if (request == PTRACE_PEEKUSR_AREA)
+ ret = peek_user(child, addr, data);
+ else {
+ addr_t utmp;
+ if (get_user(utmp,
+ (addr_t __force __user *) data))
+ return -EFAULT;
+ ret = poke_user(child, addr, utmp);
+ }
+ if (ret)
+ return ret;
+ addr += sizeof(unsigned long);
+ data += sizeof(unsigned long);
+ copied += sizeof(unsigned long);
+ }
+ return 0;
+ case PTRACE_GET_LAST_BREAK:
+ put_user(child->thread.last_break,
+ (unsigned long __user *) data);
+ return 0;
+ case PTRACE_ENABLE_TE:
+ if (!MACHINE_HAS_TE)
+ return -EIO;
+ child->thread.per_flags &= ~PER_FLAG_NO_TE;
+ return 0;
+ case PTRACE_DISABLE_TE:
+ if (!MACHINE_HAS_TE)
+ return -EIO;
+ child->thread.per_flags |= PER_FLAG_NO_TE;
+ child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND;
+ return 0;
+ case PTRACE_TE_ABORT_RAND:
+ if (!MACHINE_HAS_TE || (child->thread.per_flags & PER_FLAG_NO_TE))
+ return -EIO;
+ switch (data) {
+ case 0UL:
+ child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND;
+ break;
+ case 1UL:
+ child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND;
+ child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND_TEND;
+ break;
+ case 2UL:
+ child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND;
+ child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND_TEND;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+ default:
+ return ptrace_request(child, request, addr, data);
+ }
+}
+
+#ifdef CONFIG_COMPAT
+/*
+ * Now the fun part starts... a 31 bit program running in the
+ * 31 bit emulation tracing another program. PTRACE_PEEKTEXT,
+ * PTRACE_PEEKDATA, PTRACE_POKETEXT and PTRACE_POKEDATA are easy
+ * to handle, the difference to the 64 bit versions of the requests
+ * is that the access is done in multiples of 4 byte instead of
+ * 8 bytes (sizeof(unsigned long) on 31/64 bit).
+ * The ugly part are PTRACE_PEEKUSR, PTRACE_PEEKUSR_AREA,
+ * PTRACE_POKEUSR and PTRACE_POKEUSR_AREA. If the traced program
+ * is a 31 bit program too, the content of struct user can be
+ * emulated. A 31 bit program peeking into the struct user of
+ * a 64 bit program is a no-no.
+ */
+
+/*
+ * Same as peek_user_per but for a 31 bit program.
+ */
+static inline __u32 __peek_user_per_compat(struct task_struct *child,
+ addr_t addr)
+{
+ struct compat_per_struct_kernel *dummy32 = NULL;
+
+ if (addr == (addr_t) &dummy32->cr9)
+ /* Control bits of the active per set. */
+ return (__u32) test_thread_flag(TIF_SINGLE_STEP) ?
+ PER_EVENT_IFETCH : child->thread.per_user.control;
+ else if (addr == (addr_t) &dummy32->cr10)
+ /* Start address of the active per set. */
+ return (__u32) test_thread_flag(TIF_SINGLE_STEP) ?
+ 0 : child->thread.per_user.start;
+ else if (addr == (addr_t) &dummy32->cr11)
+ /* End address of the active per set. */
+ return test_thread_flag(TIF_SINGLE_STEP) ?
+ PSW32_ADDR_INSN : child->thread.per_user.end;
+ else if (addr == (addr_t) &dummy32->bits)
+ /* Single-step bit. */
+ return (__u32) test_thread_flag(TIF_SINGLE_STEP) ?
+ 0x80000000 : 0;
+ else if (addr == (addr_t) &dummy32->starting_addr)
+ /* Start address of the user specified per set. */
+ return (__u32) child->thread.per_user.start;
+ else if (addr == (addr_t) &dummy32->ending_addr)
+ /* End address of the user specified per set. */
+ return (__u32) child->thread.per_user.end;
+ else if (addr == (addr_t) &dummy32->perc_atmid)
+ /* PER code, ATMID and AI of the last PER trap */
+ return (__u32) child->thread.per_event.cause << 16;
+ else if (addr == (addr_t) &dummy32->address)
+ /* Address of the last PER trap */
+ return (__u32) child->thread.per_event.address;
+ else if (addr == (addr_t) &dummy32->access_id)
+ /* Access id of the last PER trap */
+ return (__u32) child->thread.per_event.paid << 24;
+ return 0;
+}
+
+/*
+ * Same as peek_user but for a 31 bit program.
+ */
+static u32 __peek_user_compat(struct task_struct *child, addr_t addr)
+{
+ struct compat_user *dummy32 = NULL;
+ addr_t offset;
+ __u32 tmp;
+
+ if (addr < (addr_t) &dummy32->regs.acrs) {
+ struct pt_regs *regs = task_pt_regs(child);
+ /*
+ * psw and gprs are stored on the stack
+ */
+ if (addr == (addr_t) &dummy32->regs.psw.mask) {
+ /* Fake a 31 bit psw mask. */
+ tmp = (__u32)(regs->psw.mask >> 32);
+ tmp &= PSW32_MASK_USER | PSW32_MASK_RI;
+ tmp |= PSW32_USER_BITS;
+ } else if (addr == (addr_t) &dummy32->regs.psw.addr) {
+ /* Fake a 31 bit psw address. */
+ tmp = (__u32) regs->psw.addr |
+ (__u32)(regs->psw.mask & PSW_MASK_BA);
+ } else {
+ /* gpr 0-15 */
+ tmp = *(__u32 *)((addr_t) &regs->psw + addr*2 + 4);
+ }
+ } else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) {
+ /*
+ * access registers are stored in the thread structure
+ */
+ offset = addr - (addr_t) &dummy32->regs.acrs;
+ tmp = *(__u32*)((addr_t) &child->thread.acrs + offset);
+
+ } else if (addr == (addr_t) (&dummy32->regs.orig_gpr2)) {
+ /*
+ * orig_gpr2 is stored on the kernel stack
+ */
+ tmp = *(__u32*)((addr_t) &task_pt_regs(child)->orig_gpr2 + 4);
+
+ } else if (addr < (addr_t) &dummy32->regs.fp_regs) {
+ /*
+ * prevent reads of padding hole between
+ * orig_gpr2 and fp_regs on s390.
+ */
+ tmp = 0;
+
+ } else if (addr == (addr_t) &dummy32->regs.fp_regs.fpc) {
+ /*
+ * floating point control reg. is in the thread structure
+ */
+ tmp = child->thread.fpu.fpc;
+
+ } else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
+ /*
+ * floating point regs. are either in child->thread.fpu
+ * or the child->thread.fpu.vxrs array
+ */
+ offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs;
+ if (MACHINE_HAS_VX)
+ tmp = *(__u32 *)
+ ((addr_t) child->thread.fpu.vxrs + 2*offset);
+ else
+ tmp = *(__u32 *)
+ ((addr_t) child->thread.fpu.fprs + offset);
+
+ } else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
+ /*
+ * Handle access to the per_info structure.
+ */
+ addr -= (addr_t) &dummy32->regs.per_info;
+ tmp = __peek_user_per_compat(child, addr);
+
+ } else
+ tmp = 0;
+
+ return tmp;
+}
+
+static int peek_user_compat(struct task_struct *child,
+ addr_t addr, addr_t data)
+{
+ __u32 tmp;
+
+ if (!is_compat_task() || (addr & 3) || addr > sizeof(struct user) - 3)
+ return -EIO;
+
+ tmp = __peek_user_compat(child, addr);
+ return put_user(tmp, (__u32 __user *) data);
+}
+
+/*
+ * Same as poke_user_per but for a 31 bit program.
+ */
+static inline void __poke_user_per_compat(struct task_struct *child,
+ addr_t addr, __u32 data)
+{
+ struct compat_per_struct_kernel *dummy32 = NULL;
+
+ if (addr == (addr_t) &dummy32->cr9)
+ /* PER event mask of the user specified per set. */
+ child->thread.per_user.control =
+ data & (PER_EVENT_MASK | PER_CONTROL_MASK);
+ else if (addr == (addr_t) &dummy32->starting_addr)
+ /* Starting address of the user specified per set. */
+ child->thread.per_user.start = data;
+ else if (addr == (addr_t) &dummy32->ending_addr)
+ /* Ending address of the user specified per set. */
+ child->thread.per_user.end = data;
+}
+
+/*
+ * Same as poke_user but for a 31 bit program.
+ */
+static int __poke_user_compat(struct task_struct *child,
+ addr_t addr, addr_t data)
+{
+ struct compat_user *dummy32 = NULL;
+ __u32 tmp = (__u32) data;
+ addr_t offset;
+
+ if (addr < (addr_t) &dummy32->regs.acrs) {
+ struct pt_regs *regs = task_pt_regs(child);
+ /*
+ * psw, gprs, acrs and orig_gpr2 are stored on the stack
+ */
+ if (addr == (addr_t) &dummy32->regs.psw.mask) {
+ __u32 mask = PSW32_MASK_USER;
+
+ mask |= is_ri_task(child) ? PSW32_MASK_RI : 0;
+ /* Build a 64 bit psw mask from 31 bit mask. */
+ if ((tmp ^ PSW32_USER_BITS) & ~mask)
+ /* Invalid psw mask. */
+ return -EINVAL;
+ if ((data & PSW32_MASK_ASC) == PSW32_ASC_HOME)
+ /* Invalid address-space-control bits */
+ return -EINVAL;
+ regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) |
+ (regs->psw.mask & PSW_MASK_BA) |
+ (__u64)(tmp & mask) << 32;
+ } else if (addr == (addr_t) &dummy32->regs.psw.addr) {
+ /* Build a 64 bit psw address from 31 bit address. */
+ regs->psw.addr = (__u64) tmp & PSW32_ADDR_INSN;
+ /* Transfer 31 bit amode bit to psw mask. */
+ regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) |
+ (__u64)(tmp & PSW32_ADDR_AMODE);
+ } else {
+
+ if (test_pt_regs_flag(regs, PIF_SYSCALL) &&
+ addr == offsetof(struct compat_user, regs.gprs[2]))
+ fixup_int_code(child, data);
+ /* gpr 0-15 */
+ *(__u32*)((addr_t) &regs->psw + addr*2 + 4) = tmp;
+ }
+ } else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) {
+ /*
+ * access registers are stored in the thread structure
+ */
+ offset = addr - (addr_t) &dummy32->regs.acrs;
+ *(__u32*)((addr_t) &child->thread.acrs + offset) = tmp;
+
+ } else if (addr == (addr_t) (&dummy32->regs.orig_gpr2)) {
+ /*
+ * orig_gpr2 is stored on the kernel stack
+ */
+ *(__u32*)((addr_t) &task_pt_regs(child)->orig_gpr2 + 4) = tmp;
+
+ } else if (addr < (addr_t) &dummy32->regs.fp_regs) {
+ /*
+ * prevent writess of padding hole between
+ * orig_gpr2 and fp_regs on s390.
+ */
+ return 0;
+
+ } else if (addr == (addr_t) &dummy32->regs.fp_regs.fpc) {
+ /*
+ * floating point control reg. is in the thread structure
+ */
+ if (test_fp_ctl(tmp))
+ return -EINVAL;
+ child->thread.fpu.fpc = data;
+
+ } else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
+ /*
+ * floating point regs. are either in child->thread.fpu
+ * or the child->thread.fpu.vxrs array
+ */
+ offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs;
+ if (MACHINE_HAS_VX)
+ *(__u32 *)((addr_t)
+ child->thread.fpu.vxrs + 2*offset) = tmp;
+ else
+ *(__u32 *)((addr_t)
+ child->thread.fpu.fprs + offset) = tmp;
+
+ } else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
+ /*
+ * Handle access to the per_info structure.
+ */
+ addr -= (addr_t) &dummy32->regs.per_info;
+ __poke_user_per_compat(child, addr, data);
+ }
+
+ return 0;
+}
+
+static int poke_user_compat(struct task_struct *child,
+ addr_t addr, addr_t data)
+{
+ if (!is_compat_task() || (addr & 3) ||
+ addr > sizeof(struct compat_user) - 3)
+ return -EIO;
+
+ return __poke_user_compat(child, addr, data);
+}
+
+long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
+ compat_ulong_t caddr, compat_ulong_t cdata)
+{
+ unsigned long addr = caddr;
+ unsigned long data = cdata;
+ compat_ptrace_area parea;
+ int copied, ret;
+
+ switch (request) {
+ case PTRACE_PEEKUSR:
+ /* read the word at location addr in the USER area. */
+ return peek_user_compat(child, addr, data);
+
+ case PTRACE_POKEUSR:
+ /* write the word at location addr in the USER area */
+ return poke_user_compat(child, addr, data);
+
+ case PTRACE_PEEKUSR_AREA:
+ case PTRACE_POKEUSR_AREA:
+ if (copy_from_user(&parea, (void __force __user *) addr,
+ sizeof(parea)))
+ return -EFAULT;
+ addr = parea.kernel_addr;
+ data = parea.process_addr;
+ copied = 0;
+ while (copied < parea.len) {
+ if (request == PTRACE_PEEKUSR_AREA)
+ ret = peek_user_compat(child, addr, data);
+ else {
+ __u32 utmp;
+ if (get_user(utmp,
+ (__u32 __force __user *) data))
+ return -EFAULT;
+ ret = poke_user_compat(child, addr, utmp);
+ }
+ if (ret)
+ return ret;
+ addr += sizeof(unsigned int);
+ data += sizeof(unsigned int);
+ copied += sizeof(unsigned int);
+ }
+ return 0;
+ case PTRACE_GET_LAST_BREAK:
+ put_user(child->thread.last_break,
+ (unsigned int __user *) data);
+ return 0;
+ }
+ return compat_ptrace_request(child, request, addr, data);
+}
+#endif
+
+asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
+{
+ unsigned long mask = -1UL;
+
+ /*
+ * The sysc_tracesys code in entry.S stored the system
+ * call number to gprs[2].
+ */
+ if (test_thread_flag(TIF_SYSCALL_TRACE) &&
+ (tracehook_report_syscall_entry(regs) ||
+ regs->gprs[2] >= NR_syscalls)) {
+ /*
+ * Tracing decided this syscall should not happen or the
+ * debugger stored an invalid system call number. Skip
+ * the system call and the system call restart handling.
+ */
+ clear_pt_regs_flag(regs, PIF_SYSCALL);
+ return -1;
+ }
+
+ /* Do the secure computing check after ptrace. */
+ if (secure_computing(NULL)) {
+ /* seccomp failures shouldn't expose any additional code. */
+ return -1;
+ }
+
+ if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+ trace_sys_enter(regs, regs->gprs[2]);
+
+ if (is_compat_task())
+ mask = 0xffffffff;
+
+ audit_syscall_entry(regs->gprs[2], regs->orig_gpr2 & mask,
+ regs->gprs[3] &mask, regs->gprs[4] &mask,
+ regs->gprs[5] &mask);
+
+ return regs->gprs[2];
+}
+
+asmlinkage void do_syscall_trace_exit(struct pt_regs *regs)
+{
+ audit_syscall_exit(regs);
+
+ if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+ trace_sys_exit(regs, regs->gprs[2]);
+
+ if (test_thread_flag(TIF_SYSCALL_TRACE))
+ tracehook_report_syscall_exit(regs, 0);
+}
+
+/*
+ * user_regset definitions.
+ */
+
+static int s390_regs_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ if (target == current)
+ save_access_regs(target->thread.acrs);
+
+ if (kbuf) {
+ unsigned long *k = kbuf;
+ while (count > 0) {
+ *k++ = __peek_user(target, pos);
+ count -= sizeof(*k);
+ pos += sizeof(*k);
+ }
+ } else {
+ unsigned long __user *u = ubuf;
+ while (count > 0) {
+ if (__put_user(__peek_user(target, pos), u++))
+ return -EFAULT;
+ count -= sizeof(*u);
+ pos += sizeof(*u);
+ }
+ }
+ return 0;
+}
+
+static int s390_regs_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int rc = 0;
+
+ if (target == current)
+ save_access_regs(target->thread.acrs);
+
+ if (kbuf) {
+ const unsigned long *k = kbuf;
+ while (count > 0 && !rc) {
+ rc = __poke_user(target, pos, *k++);
+ count -= sizeof(*k);
+ pos += sizeof(*k);
+ }
+ } else {
+ const unsigned long __user *u = ubuf;
+ while (count > 0 && !rc) {
+ unsigned long word;
+ rc = __get_user(word, u++);
+ if (rc)
+ break;
+ rc = __poke_user(target, pos, word);
+ count -= sizeof(*u);
+ pos += sizeof(*u);
+ }
+ }
+
+ if (rc == 0 && target == current)
+ restore_access_regs(target->thread.acrs);
+
+ return rc;
+}
+
+static int s390_fpregs_get(struct task_struct *target,
+ const struct user_regset *regset, unsigned int pos,
+ unsigned int count, void *kbuf, void __user *ubuf)
+{
+ _s390_fp_regs fp_regs;
+
+ if (target == current)
+ save_fpu_regs();
+
+ fp_regs.fpc = target->thread.fpu.fpc;
+ fpregs_store(&fp_regs, &target->thread.fpu);
+
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &fp_regs, 0, -1);
+}
+
+static int s390_fpregs_set(struct task_struct *target,
+ const struct user_regset *regset, unsigned int pos,
+ unsigned int count, const void *kbuf,
+ const void __user *ubuf)
+{
+ int rc = 0;
+ freg_t fprs[__NUM_FPRS];
+
+ if (target == current)
+ save_fpu_regs();
+
+ if (MACHINE_HAS_VX)
+ convert_vx_to_fp(fprs, target->thread.fpu.vxrs);
+ else
+ memcpy(&fprs, target->thread.fpu.fprs, sizeof(fprs));
+
+ /* If setting FPC, must validate it first. */
+ if (count > 0 && pos < offsetof(s390_fp_regs, fprs)) {
+ u32 ufpc[2] = { target->thread.fpu.fpc, 0 };
+ rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ufpc,
+ 0, offsetof(s390_fp_regs, fprs));
+ if (rc)
+ return rc;
+ if (ufpc[1] != 0 || test_fp_ctl(ufpc[0]))
+ return -EINVAL;
+ target->thread.fpu.fpc = ufpc[0];
+ }
+
+ if (rc == 0 && count > 0)
+ rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ fprs, offsetof(s390_fp_regs, fprs), -1);
+ if (rc)
+ return rc;
+
+ if (MACHINE_HAS_VX)
+ convert_fp_to_vx(target->thread.fpu.vxrs, fprs);
+ else
+ memcpy(target->thread.fpu.fprs, &fprs, sizeof(fprs));
+
+ return rc;
+}
+
+static int s390_last_break_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ if (count > 0) {
+ if (kbuf) {
+ unsigned long *k = kbuf;
+ *k = target->thread.last_break;
+ } else {
+ unsigned long __user *u = ubuf;
+ if (__put_user(target->thread.last_break, u))
+ return -EFAULT;
+ }
+ }
+ return 0;
+}
+
+static int s390_last_break_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ return 0;
+}
+
+static int s390_tdb_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ struct pt_regs *regs = task_pt_regs(target);
+ unsigned char *data;
+
+ if (!(regs->int_code & 0x200))
+ return -ENODATA;
+ data = target->thread.trap_tdb;
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf, data, 0, 256);
+}
+
+static int s390_tdb_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ return 0;
+}
+
+static int s390_vxrs_low_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ __u64 vxrs[__NUM_VXRS_LOW];
+ int i;
+
+ if (!MACHINE_HAS_VX)
+ return -ENODEV;
+ if (target == current)
+ save_fpu_regs();
+ for (i = 0; i < __NUM_VXRS_LOW; i++)
+ vxrs[i] = *((__u64 *)(target->thread.fpu.vxrs + i) + 1);
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
+}
+
+static int s390_vxrs_low_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ __u64 vxrs[__NUM_VXRS_LOW];
+ int i, rc;
+
+ if (!MACHINE_HAS_VX)
+ return -ENODEV;
+ if (target == current)
+ save_fpu_regs();
+
+ for (i = 0; i < __NUM_VXRS_LOW; i++)
+ vxrs[i] = *((__u64 *)(target->thread.fpu.vxrs + i) + 1);
+
+ rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
+ if (rc == 0)
+ for (i = 0; i < __NUM_VXRS_LOW; i++)
+ *((__u64 *)(target->thread.fpu.vxrs + i) + 1) = vxrs[i];
+
+ return rc;
+}
+
+static int s390_vxrs_high_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ __vector128 vxrs[__NUM_VXRS_HIGH];
+
+ if (!MACHINE_HAS_VX)
+ return -ENODEV;
+ if (target == current)
+ save_fpu_regs();
+ memcpy(vxrs, target->thread.fpu.vxrs + __NUM_VXRS_LOW, sizeof(vxrs));
+
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
+}
+
+static int s390_vxrs_high_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int rc;
+
+ if (!MACHINE_HAS_VX)
+ return -ENODEV;
+ if (target == current)
+ save_fpu_regs();
+
+ rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ target->thread.fpu.vxrs + __NUM_VXRS_LOW, 0, -1);
+ return rc;
+}
+
+static int s390_system_call_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ unsigned int *data = &target->thread.system_call;
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ data, 0, sizeof(unsigned int));
+}
+
+static int s390_system_call_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ unsigned int *data = &target->thread.system_call;
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ data, 0, sizeof(unsigned int));
+}
+
+static int s390_gs_cb_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ struct gs_cb *data = target->thread.gs_cb;
+
+ if (!MACHINE_HAS_GS)
+ return -ENODEV;
+ if (!data)
+ return -ENODATA;
+ if (target == current)
+ save_gs_cb(data);
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ data, 0, sizeof(struct gs_cb));
+}
+
+static int s390_gs_cb_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ struct gs_cb gs_cb = { }, *data = NULL;
+ int rc;
+
+ if (!MACHINE_HAS_GS)
+ return -ENODEV;
+ if (!target->thread.gs_cb) {
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+ }
+ if (!target->thread.gs_cb)
+ gs_cb.gsd = 25;
+ else if (target == current)
+ save_gs_cb(&gs_cb);
+ else
+ gs_cb = *target->thread.gs_cb;
+ rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &gs_cb, 0, sizeof(gs_cb));
+ if (rc) {
+ kfree(data);
+ return -EFAULT;
+ }
+ preempt_disable();
+ if (!target->thread.gs_cb)
+ target->thread.gs_cb = data;
+ *target->thread.gs_cb = gs_cb;
+ if (target == current) {
+ __ctl_set_bit(2, 4);
+ restore_gs_cb(target->thread.gs_cb);
+ }
+ preempt_enable();
+ return rc;
+}
+
+static int s390_gs_bc_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ struct gs_cb *data = target->thread.gs_bc_cb;
+
+ if (!MACHINE_HAS_GS)
+ return -ENODEV;
+ if (!data)
+ return -ENODATA;
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ data, 0, sizeof(struct gs_cb));
+}
+
+static int s390_gs_bc_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ struct gs_cb *data = target->thread.gs_bc_cb;
+
+ if (!MACHINE_HAS_GS)
+ return -ENODEV;
+ if (!data) {
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+ target->thread.gs_bc_cb = data;
+ }
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ data, 0, sizeof(struct gs_cb));
+}
+
+static bool is_ri_cb_valid(struct runtime_instr_cb *cb)
+{
+ return (cb->rca & 0x1f) == 0 &&
+ (cb->roa & 0xfff) == 0 &&
+ (cb->rla & 0xfff) == 0xfff &&
+ cb->s == 1 &&
+ cb->k == 1 &&
+ cb->h == 0 &&
+ cb->reserved1 == 0 &&
+ cb->ps == 1 &&
+ cb->qs == 0 &&
+ cb->pc == 1 &&
+ cb->qc == 0 &&
+ cb->reserved2 == 0 &&
+ cb->reserved3 == 0 &&
+ cb->reserved4 == 0 &&
+ cb->reserved5 == 0 &&
+ cb->reserved6 == 0 &&
+ cb->reserved7 == 0 &&
+ cb->reserved8 == 0 &&
+ cb->rla >= cb->roa &&
+ cb->rca >= cb->roa &&
+ cb->rca <= cb->rla+1 &&
+ cb->m < 3;
+}
+
+static int s390_runtime_instr_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ struct runtime_instr_cb *data = target->thread.ri_cb;
+
+ if (!test_facility(64))
+ return -ENODEV;
+ if (!data)
+ return -ENODATA;
+
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ data, 0, sizeof(struct runtime_instr_cb));
+}
+
+static int s390_runtime_instr_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ struct runtime_instr_cb ri_cb = { }, *data = NULL;
+ int rc;
+
+ if (!test_facility(64))
+ return -ENODEV;
+
+ if (!target->thread.ri_cb) {
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+ }
+
+ if (target->thread.ri_cb) {
+ if (target == current)
+ store_runtime_instr_cb(&ri_cb);
+ else
+ ri_cb = *target->thread.ri_cb;
+ }
+
+ rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &ri_cb, 0, sizeof(struct runtime_instr_cb));
+ if (rc) {
+ kfree(data);
+ return -EFAULT;
+ }
+
+ if (!is_ri_cb_valid(&ri_cb)) {
+ kfree(data);
+ return -EINVAL;
+ }
+ /*
+ * Override access key in any case, since user space should
+ * not be able to set it, nor should it care about it.
+ */
+ ri_cb.key = PAGE_DEFAULT_KEY >> 4;
+ preempt_disable();
+ if (!target->thread.ri_cb)
+ target->thread.ri_cb = data;
+ *target->thread.ri_cb = ri_cb;
+ if (target == current)
+ load_runtime_instr_cb(target->thread.ri_cb);
+ preempt_enable();
+
+ return 0;
+}
+
+static const struct user_regset s390_regsets[] = {
+ {
+ .core_note_type = NT_PRSTATUS,
+ .n = sizeof(s390_regs) / sizeof(long),
+ .size = sizeof(long),
+ .align = sizeof(long),
+ .get = s390_regs_get,
+ .set = s390_regs_set,
+ },
+ {
+ .core_note_type = NT_PRFPREG,
+ .n = sizeof(s390_fp_regs) / sizeof(long),
+ .size = sizeof(long),
+ .align = sizeof(long),
+ .get = s390_fpregs_get,
+ .set = s390_fpregs_set,
+ },
+ {
+ .core_note_type = NT_S390_SYSTEM_CALL,
+ .n = 1,
+ .size = sizeof(unsigned int),
+ .align = sizeof(unsigned int),
+ .get = s390_system_call_get,
+ .set = s390_system_call_set,
+ },
+ {
+ .core_note_type = NT_S390_LAST_BREAK,
+ .n = 1,
+ .size = sizeof(long),
+ .align = sizeof(long),
+ .get = s390_last_break_get,
+ .set = s390_last_break_set,
+ },
+ {
+ .core_note_type = NT_S390_TDB,
+ .n = 1,
+ .size = 256,
+ .align = 1,
+ .get = s390_tdb_get,
+ .set = s390_tdb_set,
+ },
+ {
+ .core_note_type = NT_S390_VXRS_LOW,
+ .n = __NUM_VXRS_LOW,
+ .size = sizeof(__u64),
+ .align = sizeof(__u64),
+ .get = s390_vxrs_low_get,
+ .set = s390_vxrs_low_set,
+ },
+ {
+ .core_note_type = NT_S390_VXRS_HIGH,
+ .n = __NUM_VXRS_HIGH,
+ .size = sizeof(__vector128),
+ .align = sizeof(__vector128),
+ .get = s390_vxrs_high_get,
+ .set = s390_vxrs_high_set,
+ },
+ {
+ .core_note_type = NT_S390_GS_CB,
+ .n = sizeof(struct gs_cb) / sizeof(__u64),
+ .size = sizeof(__u64),
+ .align = sizeof(__u64),
+ .get = s390_gs_cb_get,
+ .set = s390_gs_cb_set,
+ },
+ {
+ .core_note_type = NT_S390_GS_BC,
+ .n = sizeof(struct gs_cb) / sizeof(__u64),
+ .size = sizeof(__u64),
+ .align = sizeof(__u64),
+ .get = s390_gs_bc_get,
+ .set = s390_gs_bc_set,
+ },
+ {
+ .core_note_type = NT_S390_RI_CB,
+ .n = sizeof(struct runtime_instr_cb) / sizeof(__u64),
+ .size = sizeof(__u64),
+ .align = sizeof(__u64),
+ .get = s390_runtime_instr_get,
+ .set = s390_runtime_instr_set,
+ },
+};
+
+static const struct user_regset_view user_s390_view = {
+ .name = UTS_MACHINE,
+ .e_machine = EM_S390,
+ .regsets = s390_regsets,
+ .n = ARRAY_SIZE(s390_regsets)
+};
+
+#ifdef CONFIG_COMPAT
+static int s390_compat_regs_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ if (target == current)
+ save_access_regs(target->thread.acrs);
+
+ if (kbuf) {
+ compat_ulong_t *k = kbuf;
+ while (count > 0) {
+ *k++ = __peek_user_compat(target, pos);
+ count -= sizeof(*k);
+ pos += sizeof(*k);
+ }
+ } else {
+ compat_ulong_t __user *u = ubuf;
+ while (count > 0) {
+ if (__put_user(__peek_user_compat(target, pos), u++))
+ return -EFAULT;
+ count -= sizeof(*u);
+ pos += sizeof(*u);
+ }
+ }
+ return 0;
+}
+
+static int s390_compat_regs_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int rc = 0;
+
+ if (target == current)
+ save_access_regs(target->thread.acrs);
+
+ if (kbuf) {
+ const compat_ulong_t *k = kbuf;
+ while (count > 0 && !rc) {
+ rc = __poke_user_compat(target, pos, *k++);
+ count -= sizeof(*k);
+ pos += sizeof(*k);
+ }
+ } else {
+ const compat_ulong_t __user *u = ubuf;
+ while (count > 0 && !rc) {
+ compat_ulong_t word;
+ rc = __get_user(word, u++);
+ if (rc)
+ break;
+ rc = __poke_user_compat(target, pos, word);
+ count -= sizeof(*u);
+ pos += sizeof(*u);
+ }
+ }
+
+ if (rc == 0 && target == current)
+ restore_access_regs(target->thread.acrs);
+
+ return rc;
+}
+
+static int s390_compat_regs_high_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ compat_ulong_t *gprs_high;
+
+ gprs_high = (compat_ulong_t *)
+ &task_pt_regs(target)->gprs[pos / sizeof(compat_ulong_t)];
+ if (kbuf) {
+ compat_ulong_t *k = kbuf;
+ while (count > 0) {
+ *k++ = *gprs_high;
+ gprs_high += 2;
+ count -= sizeof(*k);
+ }
+ } else {
+ compat_ulong_t __user *u = ubuf;
+ while (count > 0) {
+ if (__put_user(*gprs_high, u++))
+ return -EFAULT;
+ gprs_high += 2;
+ count -= sizeof(*u);
+ }
+ }
+ return 0;
+}
+
+static int s390_compat_regs_high_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ compat_ulong_t *gprs_high;
+ int rc = 0;
+
+ gprs_high = (compat_ulong_t *)
+ &task_pt_regs(target)->gprs[pos / sizeof(compat_ulong_t)];
+ if (kbuf) {
+ const compat_ulong_t *k = kbuf;
+ while (count > 0) {
+ *gprs_high = *k++;
+ *gprs_high += 2;
+ count -= sizeof(*k);
+ }
+ } else {
+ const compat_ulong_t __user *u = ubuf;
+ while (count > 0 && !rc) {
+ unsigned long word;
+ rc = __get_user(word, u++);
+ if (rc)
+ break;
+ *gprs_high = word;
+ *gprs_high += 2;
+ count -= sizeof(*u);
+ }
+ }
+
+ return rc;
+}
+
+static int s390_compat_last_break_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ compat_ulong_t last_break;
+
+ if (count > 0) {
+ last_break = target->thread.last_break;
+ if (kbuf) {
+ unsigned long *k = kbuf;
+ *k = last_break;
+ } else {
+ unsigned long __user *u = ubuf;
+ if (__put_user(last_break, u))
+ return -EFAULT;
+ }
+ }
+ return 0;
+}
+
+static int s390_compat_last_break_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ return 0;
+}
+
+static const struct user_regset s390_compat_regsets[] = {
+ {
+ .core_note_type = NT_PRSTATUS,
+ .n = sizeof(s390_compat_regs) / sizeof(compat_long_t),
+ .size = sizeof(compat_long_t),
+ .align = sizeof(compat_long_t),
+ .get = s390_compat_regs_get,
+ .set = s390_compat_regs_set,
+ },
+ {
+ .core_note_type = NT_PRFPREG,
+ .n = sizeof(s390_fp_regs) / sizeof(compat_long_t),
+ .size = sizeof(compat_long_t),
+ .align = sizeof(compat_long_t),
+ .get = s390_fpregs_get,
+ .set = s390_fpregs_set,
+ },
+ {
+ .core_note_type = NT_S390_SYSTEM_CALL,
+ .n = 1,
+ .size = sizeof(compat_uint_t),
+ .align = sizeof(compat_uint_t),
+ .get = s390_system_call_get,
+ .set = s390_system_call_set,
+ },
+ {
+ .core_note_type = NT_S390_LAST_BREAK,
+ .n = 1,
+ .size = sizeof(long),
+ .align = sizeof(long),
+ .get = s390_compat_last_break_get,
+ .set = s390_compat_last_break_set,
+ },
+ {
+ .core_note_type = NT_S390_TDB,
+ .n = 1,
+ .size = 256,
+ .align = 1,
+ .get = s390_tdb_get,
+ .set = s390_tdb_set,
+ },
+ {
+ .core_note_type = NT_S390_VXRS_LOW,
+ .n = __NUM_VXRS_LOW,
+ .size = sizeof(__u64),
+ .align = sizeof(__u64),
+ .get = s390_vxrs_low_get,
+ .set = s390_vxrs_low_set,
+ },
+ {
+ .core_note_type = NT_S390_VXRS_HIGH,
+ .n = __NUM_VXRS_HIGH,
+ .size = sizeof(__vector128),
+ .align = sizeof(__vector128),
+ .get = s390_vxrs_high_get,
+ .set = s390_vxrs_high_set,
+ },
+ {
+ .core_note_type = NT_S390_HIGH_GPRS,
+ .n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t),
+ .size = sizeof(compat_long_t),
+ .align = sizeof(compat_long_t),
+ .get = s390_compat_regs_high_get,
+ .set = s390_compat_regs_high_set,
+ },
+ {
+ .core_note_type = NT_S390_GS_CB,
+ .n = sizeof(struct gs_cb) / sizeof(__u64),
+ .size = sizeof(__u64),
+ .align = sizeof(__u64),
+ .get = s390_gs_cb_get,
+ .set = s390_gs_cb_set,
+ },
+ {
+ .core_note_type = NT_S390_GS_BC,
+ .n = sizeof(struct gs_cb) / sizeof(__u64),
+ .size = sizeof(__u64),
+ .align = sizeof(__u64),
+ .get = s390_gs_bc_get,
+ .set = s390_gs_bc_set,
+ },
+ {
+ .core_note_type = NT_S390_RI_CB,
+ .n = sizeof(struct runtime_instr_cb) / sizeof(__u64),
+ .size = sizeof(__u64),
+ .align = sizeof(__u64),
+ .get = s390_runtime_instr_get,
+ .set = s390_runtime_instr_set,
+ },
+};
+
+static const struct user_regset_view user_s390_compat_view = {
+ .name = "s390",
+ .e_machine = EM_S390,
+ .regsets = s390_compat_regsets,
+ .n = ARRAY_SIZE(s390_compat_regsets)
+};
+#endif
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+#ifdef CONFIG_COMPAT
+ if (test_tsk_thread_flag(task, TIF_31BIT))
+ return &user_s390_compat_view;
+#endif
+ return &user_s390_view;
+}
+
+static const char *gpr_names[NUM_GPRS] = {
+ "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
+ "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
+};
+
+unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset)
+{
+ if (offset >= NUM_GPRS)
+ return 0;
+ return regs->gprs[offset];
+}
+
+int regs_query_register_offset(const char *name)
+{
+ unsigned long offset;
+
+ if (!name || *name != 'r')
+ return -EINVAL;
+ if (kstrtoul(name + 1, 10, &offset))
+ return -EINVAL;
+ if (offset >= NUM_GPRS)
+ return -EINVAL;
+ return offset;
+}
+
+const char *regs_query_register_name(unsigned int offset)
+{
+ if (offset >= NUM_GPRS)
+ return NULL;
+ return gpr_names[offset];
+}
+
+static int regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr)
+{
+ unsigned long ksp = kernel_stack_pointer(regs);
+
+ return (addr & ~(THREAD_SIZE - 1)) == (ksp & ~(THREAD_SIZE - 1));
+}
+
+/**
+ * regs_get_kernel_stack_nth() - get Nth entry of the stack
+ * @regs:pt_regs which contains kernel stack pointer.
+ * @n:stack entry number.
+ *
+ * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
+ * is specifined by @regs. If the @n th entry is NOT in the kernel stack,
+ * this returns 0.
+ */
+unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n)
+{
+ unsigned long addr;
+
+ addr = kernel_stack_pointer(regs) + n * sizeof(long);
+ if (!regs_within_kernel_stack(regs, addr))
+ return 0;
+ return *(unsigned long *)addr;
+}
diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S
new file mode 100644
index 000000000..7f14adf51
--- /dev/null
+++ b/arch/s390/kernel/reipl.S
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp 2000, 2011
+ * Author(s): Holger Smolinski <Holger.Smolinski@de.ibm.com>,
+ * Denis Joseph Barrow,
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/nospec-insn.h>
+#include <asm/sigp.h>
+
+ GEN_BR_THUNK %r9
+
+#
+# Issue "store status" for the current CPU to its prefix page
+# and call passed function afterwards
+#
+# r2 = Function to be called after store status
+# r3 = Parameter for function
+#
+ENTRY(store_status)
+ /* Save register one and load save area base */
+ stg %r1,__LC_SAVE_AREA_RESTART
+ /* General purpose registers */
+ lghi %r1,__LC_GPREGS_SAVE_AREA
+ stmg %r0,%r15,0(%r1)
+ mvc 8(8,%r1),__LC_SAVE_AREA_RESTART
+ /* Control registers */
+ lghi %r1,__LC_CREGS_SAVE_AREA
+ stctg %c0,%c15,0(%r1)
+ /* Access registers */
+ lghi %r1,__LC_AREGS_SAVE_AREA
+ stam %a0,%a15,0(%r1)
+ /* Floating point registers */
+ lghi %r1,__LC_FPREGS_SAVE_AREA
+ std %f0, 0x00(%r1)
+ std %f1, 0x08(%r1)
+ std %f2, 0x10(%r1)
+ std %f3, 0x18(%r1)
+ std %f4, 0x20(%r1)
+ std %f5, 0x28(%r1)
+ std %f6, 0x30(%r1)
+ std %f7, 0x38(%r1)
+ std %f8, 0x40(%r1)
+ std %f9, 0x48(%r1)
+ std %f10,0x50(%r1)
+ std %f11,0x58(%r1)
+ std %f12,0x60(%r1)
+ std %f13,0x68(%r1)
+ std %f14,0x70(%r1)
+ std %f15,0x78(%r1)
+ /* Floating point control register */
+ lghi %r1,__LC_FP_CREG_SAVE_AREA
+ stfpc 0(%r1)
+ /* CPU timer */
+ lghi %r1,__LC_CPU_TIMER_SAVE_AREA
+ stpt 0(%r1)
+ /* Store prefix register */
+ lghi %r1,__LC_PREFIX_SAVE_AREA
+ stpx 0(%r1)
+ /* Clock comparator - seven bytes */
+ lghi %r1,__LC_CLOCK_COMP_SAVE_AREA
+ larl %r4,.Lclkcmp
+ stckc 0(%r4)
+ mvc 1(7,%r1),1(%r4)
+ /* Program status word */
+ lghi %r1,__LC_PSW_SAVE_AREA
+ epsw %r4,%r5
+ st %r4,0(%r1)
+ st %r5,4(%r1)
+ stg %r2,8(%r1)
+ lgr %r9,%r2
+ lgr %r2,%r3
+ BR_EX %r9
+
+ .section .bss
+ .align 8
+.Lclkcmp: .quad 0x0000000000000000
+ .previous
diff --git a/arch/s390/kernel/relocate_kernel.S b/arch/s390/kernel/relocate_kernel.S
new file mode 100644
index 000000000..c97c2d40f
--- /dev/null
+++ b/arch/s390/kernel/relocate_kernel.S
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2005
+ *
+ * Author(s): Rolf Adelsberger,
+ * Heiko Carstens <heiko.carstens@de.ibm.com>
+ *
+ */
+
+#include <linux/linkage.h>
+#include <asm/page.h>
+#include <asm/sigp.h>
+
+/*
+ * moves the new kernel to its destination...
+ * %r2 = pointer to first kimage_entry_t
+ * %r3 = start address - where to jump to after the job is done...
+ *
+ * %r5 will be used as temp. storage
+ * %r6 holds the destination address
+ * %r7 = PAGE_SIZE
+ * %r8 holds the source address
+ * %r9 = PAGE_SIZE
+ *
+ * 0xf000 is a page_mask
+ */
+
+ .text
+ENTRY(relocate_kernel)
+ basr %r13,0 # base address
+ .base:
+ lghi %r7,PAGE_SIZE # load PAGE_SIZE in r7
+ lghi %r9,PAGE_SIZE # load PAGE_SIZE in r9
+ lg %r5,0(%r2) # read another word for indirection page
+ aghi %r2,8 # increment pointer
+ tml %r5,0x1 # is it a destination page?
+ je .indir_check # NO, goto "indir_check"
+ lgr %r6,%r5 # r6 = r5
+ nill %r6,0xf000 # mask it out and...
+ j .base # ...next iteration
+ .indir_check:
+ tml %r5,0x2 # is it a indirection page?
+ je .done_test # NO, goto "done_test"
+ nill %r5,0xf000 # YES, mask out,
+ lgr %r2,%r5 # move it into the right register,
+ j .base # and read next...
+ .done_test:
+ tml %r5,0x4 # is it the done indicator?
+ je .source_test # NO! Well, then it should be the source indicator...
+ j .done # ok, lets finish it here...
+ .source_test:
+ tml %r5,0x8 # it should be a source indicator...
+ je .base # NO, ignore it...
+ lgr %r8,%r5 # r8 = r5
+ nill %r8,0xf000 # masking
+ 0: mvcle %r6,%r8,0x0 # copy PAGE_SIZE bytes from r8 to r6 - pad with 0
+ jo 0b
+ j .base
+ .done:
+ sgr %r0,%r0 # clear register r0
+ la %r4,load_psw-.base(%r13) # load psw-address into the register
+ o %r3,4(%r4) # or load address into psw
+ st %r3,4(%r4)
+ mvc 0(8,%r0),0(%r4) # copy psw to absolute address 0
+ diag %r0,%r0,0x308
+
+ .align 8
+ load_psw:
+ .long 0x00080000,0x80000000
+ relocate_kernel_end:
+ .align 8
+ .globl relocate_kernel_len
+ relocate_kernel_len:
+ .quad relocate_kernel_end - relocate_kernel
diff --git a/arch/s390/kernel/runtime_instr.c b/arch/s390/kernel/runtime_instr.c
new file mode 100644
index 000000000..1788a5454
--- /dev/null
+++ b/arch/s390/kernel/runtime_instr.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2012
+ * Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/syscalls.h>
+#include <linux/signal.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/kernel_stat.h>
+#include <linux/sched/task_stack.h>
+
+#include <asm/runtime_instr.h>
+#include <asm/cpu_mf.h>
+#include <asm/irq.h>
+
+#include "entry.h"
+
+/* empty control block to disable RI by loading it */
+struct runtime_instr_cb runtime_instr_empty_cb;
+
+void runtime_instr_release(struct task_struct *tsk)
+{
+ kfree(tsk->thread.ri_cb);
+}
+
+static void disable_runtime_instr(void)
+{
+ struct task_struct *task = current;
+ struct pt_regs *regs;
+
+ if (!task->thread.ri_cb)
+ return;
+ regs = task_pt_regs(task);
+ preempt_disable();
+ load_runtime_instr_cb(&runtime_instr_empty_cb);
+ kfree(task->thread.ri_cb);
+ task->thread.ri_cb = NULL;
+ preempt_enable();
+
+ /*
+ * Make sure the RI bit is deleted from the PSW. If the user did not
+ * switch off RI before the system call the process will get a
+ * specification exception otherwise.
+ */
+ regs->psw.mask &= ~PSW_MASK_RI;
+}
+
+static void init_runtime_instr_cb(struct runtime_instr_cb *cb)
+{
+ cb->rla = 0xfff;
+ cb->s = 1;
+ cb->k = 1;
+ cb->ps = 1;
+ cb->pc = 1;
+ cb->key = PAGE_DEFAULT_KEY >> 4;
+ cb->v = 1;
+}
+
+/*
+ * The signum argument is unused. In older kernels it was used to
+ * specify a real-time signal. For backwards compatibility user space
+ * should pass a valid real-time signal number (the signum argument
+ * was checked in older kernels).
+ */
+SYSCALL_DEFINE2(s390_runtime_instr, int, command, int, signum)
+{
+ struct runtime_instr_cb *cb;
+
+ if (!test_facility(64))
+ return -EOPNOTSUPP;
+
+ if (command == S390_RUNTIME_INSTR_STOP) {
+ disable_runtime_instr();
+ return 0;
+ }
+
+ if (command != S390_RUNTIME_INSTR_START)
+ return -EINVAL;
+
+ if (!current->thread.ri_cb) {
+ cb = kzalloc(sizeof(*cb), GFP_KERNEL);
+ if (!cb)
+ return -ENOMEM;
+ } else {
+ cb = current->thread.ri_cb;
+ memset(cb, 0, sizeof(*cb));
+ }
+
+ init_runtime_instr_cb(cb);
+
+ /* now load the control block to make it available */
+ preempt_disable();
+ current->thread.ri_cb = cb;
+ load_runtime_instr_cb(cb);
+ preempt_enable();
+ return 0;
+}
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
new file mode 100644
index 000000000..098794fc5
--- /dev/null
+++ b/arch/s390/kernel/setup.c
@@ -0,0 +1,985 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999, 2012
+ * Author(s): Hartmut Penner (hp@de.ibm.com),
+ * Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * Derived from "arch/i386/kernel/setup.c"
+ * Copyright (C) 1995, Linus Torvalds
+ */
+
+/*
+ * This file handles the architecture-dependent parts of initialization
+ */
+
+#define KMSG_COMPONENT "setup"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/sched/task.h>
+#include <linux/cpu.h>
+#include <linux/kernel.h>
+#include <linux/memblock.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/random.h>
+#include <linux/user.h>
+#include <linux/tty.h>
+#include <linux/ioport.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/initrd.h>
+#include <linux/bootmem.h>
+#include <linux/root_dev.h>
+#include <linux/console.h>
+#include <linux/kernel_stat.h>
+#include <linux/dma-contiguous.h>
+#include <linux/device.h>
+#include <linux/notifier.h>
+#include <linux/pfn.h>
+#include <linux/ctype.h>
+#include <linux/reboot.h>
+#include <linux/topology.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>
+#include <linux/memory.h>
+#include <linux/compat.h>
+
+#include <asm/ipl.h>
+#include <asm/facility.h>
+#include <asm/smp.h>
+#include <asm/mmu_context.h>
+#include <asm/cpcmd.h>
+#include <asm/lowcore.h>
+#include <asm/nmi.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/sections.h>
+#include <asm/ebcdic.h>
+#include <asm/diag.h>
+#include <asm/os_info.h>
+#include <asm/sclp.h>
+#include <asm/sysinfo.h>
+#include <asm/numa.h>
+#include <asm/alternative.h>
+#include <asm/nospec-branch.h>
+#include "entry.h"
+
+/*
+ * Machine setup..
+ */
+unsigned int console_mode = 0;
+EXPORT_SYMBOL(console_mode);
+
+unsigned int console_devno = -1;
+EXPORT_SYMBOL(console_devno);
+
+unsigned int console_irq = -1;
+EXPORT_SYMBOL(console_irq);
+
+unsigned long elf_hwcap __read_mostly = 0;
+char elf_platform[ELF_PLATFORM_SIZE];
+
+unsigned long int_hwcap = 0;
+
+int __initdata memory_end_set;
+unsigned long __initdata memory_end;
+unsigned long __initdata max_physmem_end;
+
+unsigned long VMALLOC_START;
+EXPORT_SYMBOL(VMALLOC_START);
+
+unsigned long VMALLOC_END;
+EXPORT_SYMBOL(VMALLOC_END);
+
+struct page *vmemmap;
+EXPORT_SYMBOL(vmemmap);
+
+unsigned long MODULES_VADDR;
+unsigned long MODULES_END;
+
+/* An array with a pointer to the lowcore of every CPU. */
+struct lowcore *lowcore_ptr[NR_CPUS];
+EXPORT_SYMBOL(lowcore_ptr);
+
+/*
+ * This is set up by the setup-routine at boot-time
+ * for S390 need to find out, what we have to setup
+ * using address 0x10400 ...
+ */
+
+#include <asm/setup.h>
+
+/*
+ * condev= and conmode= setup parameter.
+ */
+
+static int __init condev_setup(char *str)
+{
+ int vdev;
+
+ vdev = simple_strtoul(str, &str, 0);
+ if (vdev >= 0 && vdev < 65536) {
+ console_devno = vdev;
+ console_irq = -1;
+ }
+ return 1;
+}
+
+__setup("condev=", condev_setup);
+
+static void __init set_preferred_console(void)
+{
+ if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
+ add_preferred_console("ttyS", 0, NULL);
+ else if (CONSOLE_IS_3270)
+ add_preferred_console("tty3270", 0, NULL);
+ else if (CONSOLE_IS_VT220)
+ add_preferred_console("ttysclp", 0, NULL);
+ else if (CONSOLE_IS_HVC)
+ add_preferred_console("hvc", 0, NULL);
+}
+
+static int __init conmode_setup(char *str)
+{
+#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
+ if (strncmp(str, "hwc", 4) == 0 || strncmp(str, "sclp", 5) == 0)
+ SET_CONSOLE_SCLP;
+#endif
+#if defined(CONFIG_TN3215_CONSOLE)
+ if (strncmp(str, "3215", 5) == 0)
+ SET_CONSOLE_3215;
+#endif
+#if defined(CONFIG_TN3270_CONSOLE)
+ if (strncmp(str, "3270", 5) == 0)
+ SET_CONSOLE_3270;
+#endif
+ set_preferred_console();
+ return 1;
+}
+
+__setup("conmode=", conmode_setup);
+
+static void __init conmode_default(void)
+{
+ char query_buffer[1024];
+ char *ptr;
+
+ if (MACHINE_IS_VM) {
+ cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL);
+ console_devno = simple_strtoul(query_buffer + 5, NULL, 16);
+ ptr = strstr(query_buffer, "SUBCHANNEL =");
+ console_irq = simple_strtoul(ptr + 13, NULL, 16);
+ cpcmd("QUERY TERM", query_buffer, 1024, NULL);
+ ptr = strstr(query_buffer, "CONMODE");
+ /*
+ * Set the conmode to 3215 so that the device recognition
+ * will set the cu_type of the console to 3215. If the
+ * conmode is 3270 and we don't set it back then both
+ * 3215 and the 3270 driver will try to access the console
+ * device (3215 as console and 3270 as normal tty).
+ */
+ cpcmd("TERM CONMODE 3215", NULL, 0, NULL);
+ if (ptr == NULL) {
+#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
+ SET_CONSOLE_SCLP;
+#endif
+ return;
+ }
+ if (strncmp(ptr + 8, "3270", 4) == 0) {
+#if defined(CONFIG_TN3270_CONSOLE)
+ SET_CONSOLE_3270;
+#elif defined(CONFIG_TN3215_CONSOLE)
+ SET_CONSOLE_3215;
+#elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
+ SET_CONSOLE_SCLP;
+#endif
+ } else if (strncmp(ptr + 8, "3215", 4) == 0) {
+#if defined(CONFIG_TN3215_CONSOLE)
+ SET_CONSOLE_3215;
+#elif defined(CONFIG_TN3270_CONSOLE)
+ SET_CONSOLE_3270;
+#elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
+ SET_CONSOLE_SCLP;
+#endif
+ }
+ } else if (MACHINE_IS_KVM) {
+ if (sclp.has_vt220 && IS_ENABLED(CONFIG_SCLP_VT220_CONSOLE))
+ SET_CONSOLE_VT220;
+ else if (sclp.has_linemode && IS_ENABLED(CONFIG_SCLP_CONSOLE))
+ SET_CONSOLE_SCLP;
+ else
+ SET_CONSOLE_HVC;
+ } else {
+#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
+ SET_CONSOLE_SCLP;
+#endif
+ }
+ if (IS_ENABLED(CONFIG_VT) && IS_ENABLED(CONFIG_DUMMY_CONSOLE))
+ conswitchp = &dummy_con;
+}
+
+#ifdef CONFIG_CRASH_DUMP
+static void __init setup_zfcpdump(void)
+{
+ if (ipl_info.type != IPL_TYPE_FCP_DUMP)
+ return;
+ if (OLDMEM_BASE)
+ return;
+ strcat(boot_command_line, " cio_ignore=all,!ipldev,!condev");
+ console_loglevel = 2;
+}
+#else
+static inline void setup_zfcpdump(void) {}
+#endif /* CONFIG_CRASH_DUMP */
+
+ /*
+ * Reboot, halt and power_off stubs. They just call _machine_restart,
+ * _machine_halt or _machine_power_off.
+ */
+
+void machine_restart(char *command)
+{
+ if ((!in_interrupt() && !in_atomic()) || oops_in_progress)
+ /*
+ * Only unblank the console if we are called in enabled
+ * context or a bust_spinlocks cleared the way for us.
+ */
+ console_unblank();
+ _machine_restart(command);
+}
+
+void machine_halt(void)
+{
+ if (!in_interrupt() || oops_in_progress)
+ /*
+ * Only unblank the console if we are called in enabled
+ * context or a bust_spinlocks cleared the way for us.
+ */
+ console_unblank();
+ _machine_halt();
+}
+
+void machine_power_off(void)
+{
+ if (!in_interrupt() || oops_in_progress)
+ /*
+ * Only unblank the console if we are called in enabled
+ * context or a bust_spinlocks cleared the way for us.
+ */
+ console_unblank();
+ _machine_power_off();
+}
+
+/*
+ * Dummy power off function.
+ */
+void (*pm_power_off)(void) = machine_power_off;
+EXPORT_SYMBOL_GPL(pm_power_off);
+
+static int __init early_parse_mem(char *p)
+{
+ memory_end = memparse(p, &p);
+ memory_end &= PAGE_MASK;
+ memory_end_set = 1;
+ return 0;
+}
+early_param("mem", early_parse_mem);
+
+static int __init parse_vmalloc(char *arg)
+{
+ if (!arg)
+ return -EINVAL;
+ VMALLOC_END = (memparse(arg, &arg) + PAGE_SIZE - 1) & PAGE_MASK;
+ return 0;
+}
+early_param("vmalloc", parse_vmalloc);
+
+void *restart_stack __section(.data);
+
+static void __init setup_lowcore_dat_off(void)
+{
+ struct lowcore *lc;
+
+ /*
+ * Setup lowcore for boot cpu
+ */
+ BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * PAGE_SIZE);
+ lc = memblock_virt_alloc_low(sizeof(*lc), sizeof(*lc));
+ lc->restart_psw.mask = PSW_KERNEL_BITS;
+ lc->restart_psw.addr = (unsigned long) restart_int_handler;
+ lc->external_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
+ lc->external_new_psw.addr = (unsigned long) ext_int_handler;
+ lc->svc_new_psw.mask = PSW_KERNEL_BITS |
+ PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
+ lc->svc_new_psw.addr = (unsigned long) system_call;
+ lc->program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
+ lc->program_new_psw.addr = (unsigned long) pgm_check_handler;
+ lc->mcck_new_psw.mask = PSW_KERNEL_BITS;
+ lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler;
+ lc->io_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
+ lc->io_new_psw.addr = (unsigned long) io_int_handler;
+ lc->clock_comparator = clock_comparator_max;
+ lc->kernel_stack = ((unsigned long) &init_thread_union)
+ + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
+ lc->async_stack = (unsigned long)
+ memblock_virt_alloc(ASYNC_SIZE, ASYNC_SIZE)
+ + ASYNC_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
+ lc->panic_stack = (unsigned long)
+ memblock_virt_alloc(PAGE_SIZE, PAGE_SIZE)
+ + PAGE_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
+ lc->current_task = (unsigned long)&init_task;
+ lc->lpp = LPP_MAGIC;
+ lc->machine_flags = S390_lowcore.machine_flags;
+ lc->preempt_count = S390_lowcore.preempt_count;
+ lc->stfl_fac_list = S390_lowcore.stfl_fac_list;
+ memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
+ sizeof(lc->stfle_fac_list));
+ memcpy(lc->alt_stfle_fac_list, S390_lowcore.alt_stfle_fac_list,
+ sizeof(lc->alt_stfle_fac_list));
+ nmi_alloc_boot_cpu(lc);
+ vdso_alloc_boot_cpu(lc);
+ lc->sync_enter_timer = S390_lowcore.sync_enter_timer;
+ lc->async_enter_timer = S390_lowcore.async_enter_timer;
+ lc->exit_timer = S390_lowcore.exit_timer;
+ lc->user_timer = S390_lowcore.user_timer;
+ lc->system_timer = S390_lowcore.system_timer;
+ lc->steal_timer = S390_lowcore.steal_timer;
+ lc->last_update_timer = S390_lowcore.last_update_timer;
+ lc->last_update_clock = S390_lowcore.last_update_clock;
+
+ restart_stack = memblock_virt_alloc(ASYNC_SIZE, ASYNC_SIZE);
+ restart_stack += ASYNC_SIZE;
+
+ /*
+ * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant
+ * restart data to the absolute zero lowcore. This is necessary if
+ * PSW restart is done on an offline CPU that has lowcore zero.
+ */
+ lc->restart_stack = (unsigned long) restart_stack;
+ lc->restart_fn = (unsigned long) do_restart;
+ lc->restart_data = 0;
+ lc->restart_source = -1UL;
+
+ /* Setup absolute zero lowcore */
+ mem_assign_absolute(S390_lowcore.restart_stack, lc->restart_stack);
+ mem_assign_absolute(S390_lowcore.restart_fn, lc->restart_fn);
+ mem_assign_absolute(S390_lowcore.restart_data, lc->restart_data);
+ mem_assign_absolute(S390_lowcore.restart_source, lc->restart_source);
+ mem_assign_absolute(S390_lowcore.restart_psw, lc->restart_psw);
+
+#ifdef CONFIG_SMP
+ lc->spinlock_lockval = arch_spin_lockval(0);
+ lc->spinlock_index = 0;
+ arch_spin_lock_setup(0);
+#endif
+ lc->br_r1_trampoline = 0x07f1; /* br %r1 */
+
+ set_prefix((u32)(unsigned long) lc);
+ lowcore_ptr[0] = lc;
+}
+
+static void __init setup_lowcore_dat_on(void)
+{
+ __ctl_clear_bit(0, 28);
+ S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT;
+ S390_lowcore.svc_new_psw.mask |= PSW_MASK_DAT;
+ S390_lowcore.program_new_psw.mask |= PSW_MASK_DAT;
+ S390_lowcore.io_new_psw.mask |= PSW_MASK_DAT;
+ __ctl_set_bit(0, 28);
+}
+
+static struct resource code_resource = {
+ .name = "Kernel code",
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
+};
+
+static struct resource data_resource = {
+ .name = "Kernel data",
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
+};
+
+static struct resource bss_resource = {
+ .name = "Kernel bss",
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
+};
+
+static struct resource __initdata *standard_resources[] = {
+ &code_resource,
+ &data_resource,
+ &bss_resource,
+};
+
+static void __init setup_resources(void)
+{
+ struct resource *res, *std_res, *sub_res;
+ struct memblock_region *reg;
+ int j;
+
+ code_resource.start = (unsigned long) _text;
+ code_resource.end = (unsigned long) _etext - 1;
+ data_resource.start = (unsigned long) _etext;
+ data_resource.end = (unsigned long) _edata - 1;
+ bss_resource.start = (unsigned long) __bss_start;
+ bss_resource.end = (unsigned long) __bss_stop - 1;
+
+ for_each_memblock(memory, reg) {
+ res = memblock_virt_alloc(sizeof(*res), 8);
+ res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
+
+ res->name = "System RAM";
+ res->start = reg->base;
+ res->end = reg->base + reg->size - 1;
+ request_resource(&iomem_resource, res);
+
+ for (j = 0; j < ARRAY_SIZE(standard_resources); j++) {
+ std_res = standard_resources[j];
+ if (std_res->start < res->start ||
+ std_res->start > res->end)
+ continue;
+ if (std_res->end > res->end) {
+ sub_res = memblock_virt_alloc(sizeof(*sub_res), 8);
+ *sub_res = *std_res;
+ sub_res->end = res->end;
+ std_res->start = res->end + 1;
+ request_resource(res, sub_res);
+ } else {
+ request_resource(res, std_res);
+ }
+ }
+ }
+#ifdef CONFIG_CRASH_DUMP
+ /*
+ * Re-add removed crash kernel memory as reserved memory. This makes
+ * sure it will be mapped with the identity mapping and struct pages
+ * will be created, so it can be resized later on.
+ * However add it later since the crash kernel resource should not be
+ * part of the System RAM resource.
+ */
+ if (crashk_res.end) {
+ memblock_add_node(crashk_res.start, resource_size(&crashk_res), 0);
+ memblock_reserve(crashk_res.start, resource_size(&crashk_res));
+ insert_resource(&iomem_resource, &crashk_res);
+ }
+#endif
+}
+
+static void __init setup_memory_end(void)
+{
+ unsigned long vmax, vmalloc_size, tmp;
+
+ /* Choose kernel address space layout: 2, 3, or 4 levels. */
+ vmalloc_size = VMALLOC_END ?: (128UL << 30) - MODULES_LEN;
+ tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE;
+ tmp = tmp * (sizeof(struct page) + PAGE_SIZE);
+ if (tmp + vmalloc_size + MODULES_LEN <= _REGION2_SIZE)
+ vmax = _REGION2_SIZE; /* 3-level kernel page table */
+ else
+ vmax = _REGION1_SIZE; /* 4-level kernel page table */
+ /* module area is at the end of the kernel address space. */
+ MODULES_END = vmax;
+ MODULES_VADDR = MODULES_END - MODULES_LEN;
+ VMALLOC_END = MODULES_VADDR;
+ VMALLOC_START = vmax - vmalloc_size;
+
+ /* Split remaining virtual space between 1:1 mapping & vmemmap array */
+ tmp = VMALLOC_START / (PAGE_SIZE + sizeof(struct page));
+ /* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */
+ tmp = SECTION_ALIGN_UP(tmp);
+ tmp = VMALLOC_START - tmp * sizeof(struct page);
+ tmp &= ~((vmax >> 11) - 1); /* align to page table level */
+ tmp = min(tmp, 1UL << MAX_PHYSMEM_BITS);
+ vmemmap = (struct page *) tmp;
+
+ /* Take care that memory_end is set and <= vmemmap */
+ memory_end = min(memory_end ?: max_physmem_end, tmp);
+ max_pfn = max_low_pfn = PFN_DOWN(memory_end);
+ memblock_remove(memory_end, ULONG_MAX);
+
+ pr_notice("The maximum memory size is %luMB\n", memory_end >> 20);
+}
+
+#ifdef CONFIG_CRASH_DUMP
+
+/*
+ * When kdump is enabled, we have to ensure that no memory from
+ * the area [0 - crashkernel memory size] and
+ * [crashk_res.start - crashk_res.end] is set offline.
+ */
+static int kdump_mem_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct memory_notify *arg = data;
+
+ if (action != MEM_GOING_OFFLINE)
+ return NOTIFY_OK;
+ if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res)))
+ return NOTIFY_BAD;
+ if (arg->start_pfn > PFN_DOWN(crashk_res.end))
+ return NOTIFY_OK;
+ if (arg->start_pfn + arg->nr_pages - 1 < PFN_DOWN(crashk_res.start))
+ return NOTIFY_OK;
+ return NOTIFY_BAD;
+}
+
+static struct notifier_block kdump_mem_nb = {
+ .notifier_call = kdump_mem_notifier,
+};
+
+#endif
+
+/*
+ * Make sure that the area behind memory_end is protected
+ */
+static void __init reserve_memory_end(void)
+{
+#ifdef CONFIG_CRASH_DUMP
+ if (ipl_info.type == IPL_TYPE_FCP_DUMP &&
+ !OLDMEM_BASE && sclp.hsa_size) {
+ memory_end = sclp.hsa_size;
+ memory_end &= PAGE_MASK;
+ memory_end_set = 1;
+ }
+#endif
+ if (!memory_end_set)
+ return;
+ memblock_reserve(memory_end, ULONG_MAX);
+}
+
+/*
+ * Make sure that oldmem, where the dump is stored, is protected
+ */
+static void __init reserve_oldmem(void)
+{
+#ifdef CONFIG_CRASH_DUMP
+ if (OLDMEM_BASE)
+ /* Forget all memory above the running kdump system */
+ memblock_reserve(OLDMEM_SIZE, (phys_addr_t)ULONG_MAX);
+#endif
+}
+
+/*
+ * Make sure that oldmem, where the dump is stored, is protected
+ */
+static void __init remove_oldmem(void)
+{
+#ifdef CONFIG_CRASH_DUMP
+ if (OLDMEM_BASE)
+ /* Forget all memory above the running kdump system */
+ memblock_remove(OLDMEM_SIZE, (phys_addr_t)ULONG_MAX);
+#endif
+}
+
+/*
+ * Reserve memory for kdump kernel to be loaded with kexec
+ */
+static void __init reserve_crashkernel(void)
+{
+#ifdef CONFIG_CRASH_DUMP
+ unsigned long long crash_base, crash_size;
+ phys_addr_t low, high;
+ int rc;
+
+ rc = parse_crashkernel(boot_command_line, memory_end, &crash_size,
+ &crash_base);
+
+ crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN);
+ crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN);
+ if (rc || crash_size == 0)
+ return;
+
+ if (memblock.memory.regions[0].size < crash_size) {
+ pr_info("crashkernel reservation failed: %s\n",
+ "first memory chunk must be at least crashkernel size");
+ return;
+ }
+
+ low = crash_base ?: OLDMEM_BASE;
+ high = low + crash_size;
+ if (low >= OLDMEM_BASE && high <= OLDMEM_BASE + OLDMEM_SIZE) {
+ /* The crashkernel fits into OLDMEM, reuse OLDMEM */
+ crash_base = low;
+ } else {
+ /* Find suitable area in free memory */
+ low = max_t(unsigned long, crash_size, sclp.hsa_size);
+ high = crash_base ? crash_base + crash_size : ULONG_MAX;
+
+ if (crash_base && crash_base < low) {
+ pr_info("crashkernel reservation failed: %s\n",
+ "crash_base too low");
+ return;
+ }
+ low = crash_base ?: low;
+ crash_base = memblock_find_in_range(low, high, crash_size,
+ KEXEC_CRASH_MEM_ALIGN);
+ }
+
+ if (!crash_base) {
+ pr_info("crashkernel reservation failed: %s\n",
+ "no suitable area found");
+ return;
+ }
+
+ if (register_memory_notifier(&kdump_mem_nb))
+ return;
+
+ if (!OLDMEM_BASE && MACHINE_IS_VM)
+ diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size));
+ crashk_res.start = crash_base;
+ crashk_res.end = crash_base + crash_size - 1;
+ memblock_remove(crash_base, crash_size);
+ pr_info("Reserving %lluMB of memory at %lluMB "
+ "for crashkernel (System RAM: %luMB)\n",
+ crash_size >> 20, crash_base >> 20,
+ (unsigned long)memblock.memory.total_size >> 20);
+ os_info_crashkernel_add(crash_base, crash_size);
+#endif
+}
+
+/*
+ * Reserve the initrd from being used by memblock
+ */
+static void __init reserve_initrd(void)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+ if (!INITRD_START || !INITRD_SIZE)
+ return;
+ initrd_start = INITRD_START;
+ initrd_end = initrd_start + INITRD_SIZE;
+ memblock_reserve(INITRD_START, INITRD_SIZE);
+#endif
+}
+
+/*
+ * Check for initrd being in usable memory
+ */
+static void __init check_initrd(void)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+ if (INITRD_START && INITRD_SIZE &&
+ !memblock_is_region_memory(INITRD_START, INITRD_SIZE)) {
+ pr_err("The initial RAM disk does not fit into the memory\n");
+ memblock_free(INITRD_START, INITRD_SIZE);
+ initrd_start = initrd_end = 0;
+ }
+#endif
+}
+
+/*
+ * Reserve memory used for lowcore/command line/kernel image.
+ */
+static void __init reserve_kernel(void)
+{
+ unsigned long start_pfn = PFN_UP(__pa(_end));
+
+#ifdef CONFIG_DMA_API_DEBUG
+ /*
+ * DMA_API_DEBUG code stumbles over addresses from the
+ * range [PARMAREA_END, _stext]. Mark the memory as reserved
+ * so it is not used for CONFIG_DMA_API_DEBUG=y.
+ */
+ memblock_reserve(0, PFN_PHYS(start_pfn));
+#else
+ memblock_reserve(0, PARMAREA_END);
+ memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn)
+ - (unsigned long)_stext);
+#endif
+}
+
+static void __init setup_memory(void)
+{
+ struct memblock_region *reg;
+
+ /*
+ * Init storage key for present memory
+ */
+ for_each_memblock(memory, reg) {
+ storage_key_init_range(reg->base, reg->base + reg->size);
+ }
+ psw_set_key(PAGE_DEFAULT_KEY);
+}
+
+/*
+ * Setup hardware capabilities.
+ */
+static int __init setup_hwcaps(void)
+{
+ static const int stfl_bits[6] = { 0, 2, 7, 17, 19, 21 };
+ struct cpuid cpu_id;
+ int i;
+
+ /*
+ * The store facility list bits numbers as found in the principles
+ * of operation are numbered with bit 1UL<<31 as number 0 to
+ * bit 1UL<<0 as number 31.
+ * Bit 0: instructions named N3, "backported" to esa-mode
+ * Bit 2: z/Architecture mode is active
+ * Bit 7: the store-facility-list-extended facility is installed
+ * Bit 17: the message-security assist is installed
+ * Bit 19: the long-displacement facility is installed
+ * Bit 21: the extended-immediate facility is installed
+ * Bit 22: extended-translation facility 3 is installed
+ * Bit 30: extended-translation facility 3 enhancement facility
+ * These get translated to:
+ * HWCAP_S390_ESAN3 bit 0, HWCAP_S390_ZARCH bit 1,
+ * HWCAP_S390_STFLE bit 2, HWCAP_S390_MSA bit 3,
+ * HWCAP_S390_LDISP bit 4, HWCAP_S390_EIMM bit 5 and
+ * HWCAP_S390_ETF3EH bit 8 (22 && 30).
+ */
+ for (i = 0; i < 6; i++)
+ if (test_facility(stfl_bits[i]))
+ elf_hwcap |= 1UL << i;
+
+ if (test_facility(22) && test_facility(30))
+ elf_hwcap |= HWCAP_S390_ETF3EH;
+
+ /*
+ * Check for additional facilities with store-facility-list-extended.
+ * stfle stores doublewords (8 byte) with bit 1ULL<<63 as bit 0
+ * and 1ULL<<0 as bit 63. Bits 0-31 contain the same information
+ * as stored by stfl, bits 32-xxx contain additional facilities.
+ * How many facility words are stored depends on the number of
+ * doublewords passed to the instruction. The additional facilities
+ * are:
+ * Bit 42: decimal floating point facility is installed
+ * Bit 44: perform floating point operation facility is installed
+ * translated to:
+ * HWCAP_S390_DFP bit 6 (42 && 44).
+ */
+ if ((elf_hwcap & (1UL << 2)) && test_facility(42) && test_facility(44))
+ elf_hwcap |= HWCAP_S390_DFP;
+
+ /*
+ * Huge page support HWCAP_S390_HPAGE is bit 7.
+ */
+ if (MACHINE_HAS_EDAT1)
+ elf_hwcap |= HWCAP_S390_HPAGE;
+
+ /*
+ * 64-bit register support for 31-bit processes
+ * HWCAP_S390_HIGH_GPRS is bit 9.
+ */
+ elf_hwcap |= HWCAP_S390_HIGH_GPRS;
+
+ /*
+ * Transactional execution support HWCAP_S390_TE is bit 10.
+ */
+ if (MACHINE_HAS_TE)
+ elf_hwcap |= HWCAP_S390_TE;
+
+ /*
+ * Vector extension HWCAP_S390_VXRS is bit 11. The Vector extension
+ * can be disabled with the "novx" parameter. Use MACHINE_HAS_VX
+ * instead of facility bit 129.
+ */
+ if (MACHINE_HAS_VX) {
+ elf_hwcap |= HWCAP_S390_VXRS;
+ if (test_facility(134))
+ elf_hwcap |= HWCAP_S390_VXRS_EXT;
+ if (test_facility(135))
+ elf_hwcap |= HWCAP_S390_VXRS_BCD;
+ }
+
+ /*
+ * Guarded storage support HWCAP_S390_GS is bit 12.
+ */
+ if (MACHINE_HAS_GS)
+ elf_hwcap |= HWCAP_S390_GS;
+
+ get_cpu_id(&cpu_id);
+ add_device_randomness(&cpu_id, sizeof(cpu_id));
+ switch (cpu_id.machine) {
+ case 0x2064:
+ case 0x2066:
+ default: /* Use "z900" as default for 64 bit kernels. */
+ strcpy(elf_platform, "z900");
+ break;
+ case 0x2084:
+ case 0x2086:
+ strcpy(elf_platform, "z990");
+ break;
+ case 0x2094:
+ case 0x2096:
+ strcpy(elf_platform, "z9-109");
+ break;
+ case 0x2097:
+ case 0x2098:
+ strcpy(elf_platform, "z10");
+ break;
+ case 0x2817:
+ case 0x2818:
+ strcpy(elf_platform, "z196");
+ break;
+ case 0x2827:
+ case 0x2828:
+ strcpy(elf_platform, "zEC12");
+ break;
+ case 0x2964:
+ case 0x2965:
+ strcpy(elf_platform, "z13");
+ break;
+ case 0x3906:
+ case 0x3907:
+ strcpy(elf_platform, "z14");
+ break;
+ }
+
+ /*
+ * Virtualization support HWCAP_INT_SIE is bit 0.
+ */
+ if (sclp.has_sief2)
+ int_hwcap |= HWCAP_INT_SIE;
+
+ return 0;
+}
+arch_initcall(setup_hwcaps);
+
+/*
+ * Add system information as device randomness
+ */
+static void __init setup_randomness(void)
+{
+ struct sysinfo_3_2_2 *vmms;
+
+ vmms = (struct sysinfo_3_2_2 *) memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+ if (stsi(vmms, 3, 2, 2) == 0 && vmms->count)
+ add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count);
+ memblock_free((unsigned long) vmms, PAGE_SIZE);
+}
+
+/*
+ * Find the correct size for the task_struct. This depends on
+ * the size of the struct fpu at the end of the thread_struct
+ * which is embedded in the task_struct.
+ */
+static void __init setup_task_size(void)
+{
+ int task_size = sizeof(struct task_struct);
+
+ if (!MACHINE_HAS_VX) {
+ task_size -= sizeof(__vector128) * __NUM_VXRS;
+ task_size += sizeof(freg_t) * __NUM_FPRS;
+ }
+ arch_task_struct_size = task_size;
+}
+
+/*
+ * Setup function called from init/main.c just after the banner
+ * was printed.
+ */
+
+void __init setup_arch(char **cmdline_p)
+{
+ /*
+ * print what head.S has found out about the machine
+ */
+ if (MACHINE_IS_VM)
+ pr_info("Linux is running as a z/VM "
+ "guest operating system in 64-bit mode\n");
+ else if (MACHINE_IS_KVM)
+ pr_info("Linux is running under KVM in 64-bit mode\n");
+ else if (MACHINE_IS_LPAR)
+ pr_info("Linux is running natively in 64-bit mode\n");
+ else
+ pr_info("Linux is running as a guest in 64-bit mode\n");
+
+ /* Have one command line that is parsed and saved in /proc/cmdline */
+ /* boot_command_line has been already set up in early.c */
+ *cmdline_p = boot_command_line;
+
+ ROOT_DEV = Root_RAM0;
+
+ /* Is init_mm really needed? */
+ init_mm.start_code = PAGE_OFFSET;
+ init_mm.end_code = (unsigned long) _etext;
+ init_mm.end_data = (unsigned long) _edata;
+ init_mm.brk = (unsigned long) _end;
+
+ if (IS_ENABLED(CONFIG_EXPOLINE_AUTO))
+ nospec_auto_detect();
+
+ parse_early_param();
+#ifdef CONFIG_CRASH_DUMP
+ /* Deactivate elfcorehdr= kernel parameter */
+ elfcorehdr_addr = ELFCORE_ADDR_MAX;
+#endif
+
+ os_info_init();
+ setup_ipl();
+ setup_task_size();
+
+ /* Do some memory reservations *before* memory is added to memblock */
+ reserve_memory_end();
+ reserve_oldmem();
+ reserve_kernel();
+ reserve_initrd();
+ memblock_allow_resize();
+
+ /* Get information about *all* installed memory */
+ detect_memory_memblock();
+
+ remove_oldmem();
+
+ /*
+ * Make sure all chunks are MAX_ORDER aligned so we don't need the
+ * extra checks that HOLES_IN_ZONE would require.
+ *
+ * Is this still required?
+ */
+ memblock_trim_memory(1UL << (MAX_ORDER - 1 + PAGE_SHIFT));
+
+ setup_memory_end();
+ setup_memory();
+ dma_contiguous_reserve(memory_end);
+ vmcp_cma_reserve();
+
+ check_initrd();
+ reserve_crashkernel();
+#ifdef CONFIG_CRASH_DUMP
+ /*
+ * Be aware that smp_save_dump_cpus() triggers a system reset.
+ * Therefore CPU and device initialization should be done afterwards.
+ */
+ smp_save_dump_cpus();
+#endif
+
+ setup_resources();
+ setup_lowcore_dat_off();
+ smp_fill_possible_mask();
+ cpu_detect_mhz_feature();
+ cpu_init();
+ numa_setup();
+ smp_detect_cpus();
+ topology_init_early();
+
+ /*
+ * Create kernel page tables and switch to virtual addressing.
+ */
+ paging_init();
+
+ /*
+ * After paging_init created the kernel page table, the new PSWs
+ * in lowcore can now run with DAT enabled.
+ */
+ setup_lowcore_dat_on();
+
+ /* Setup default console */
+ conmode_default();
+ set_preferred_console();
+
+ apply_alternative_instructions();
+ if (IS_ENABLED(CONFIG_EXPOLINE))
+ nospec_init_branches();
+
+ /* Setup zfcpdump support */
+ setup_zfcpdump();
+
+ /* Add system specific data to the random pool */
+ setup_randomness();
+}
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
new file mode 100644
index 000000000..22f08245a
--- /dev/null
+++ b/arch/s390/kernel/signal.c
@@ -0,0 +1,541 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 1999, 2006
+ * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
+ *
+ * Based on Intel version
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
+ */
+
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+#include <linux/ptrace.h>
+#include <linux/unistd.h>
+#include <linux/stddef.h>
+#include <linux/tty.h>
+#include <linux/personality.h>
+#include <linux/binfmts.h>
+#include <linux/tracehook.h>
+#include <linux/syscalls.h>
+#include <linux/compat.h>
+#include <asm/ucontext.h>
+#include <linux/uaccess.h>
+#include <asm/lowcore.h>
+#include <asm/switch_to.h>
+#include "entry.h"
+
+/*
+ * Layout of an old-style signal-frame:
+ * -----------------------------------------
+ * | save area (_SIGNAL_FRAMESIZE) |
+ * -----------------------------------------
+ * | struct sigcontext |
+ * | oldmask |
+ * | _sigregs * |
+ * -----------------------------------------
+ * | _sigregs with |
+ * | _s390_regs_common |
+ * | _s390_fp_regs |
+ * -----------------------------------------
+ * | int signo |
+ * -----------------------------------------
+ * | _sigregs_ext with |
+ * | gprs_high 64 byte (opt) |
+ * | vxrs_low 128 byte (opt) |
+ * | vxrs_high 256 byte (opt) |
+ * | reserved 128 byte (opt) |
+ * -----------------------------------------
+ * | __u16 svc_insn |
+ * -----------------------------------------
+ * The svc_insn entry with the sigreturn system call opcode does not
+ * have a fixed position and moves if gprs_high or vxrs exist.
+ * Future extensions will be added to _sigregs_ext.
+ */
+struct sigframe
+{
+ __u8 callee_used_stack[__SIGNAL_FRAMESIZE];
+ struct sigcontext sc;
+ _sigregs sregs;
+ int signo;
+ _sigregs_ext sregs_ext;
+ __u16 svc_insn; /* Offset of svc_insn is NOT fixed! */
+};
+
+/*
+ * Layout of an rt signal-frame:
+ * -----------------------------------------
+ * | save area (_SIGNAL_FRAMESIZE) |
+ * -----------------------------------------
+ * | svc __NR_rt_sigreturn 2 byte |
+ * -----------------------------------------
+ * | struct siginfo |
+ * -----------------------------------------
+ * | struct ucontext_extended with |
+ * | unsigned long uc_flags |
+ * | struct ucontext *uc_link |
+ * | stack_t uc_stack |
+ * | _sigregs uc_mcontext with |
+ * | _s390_regs_common |
+ * | _s390_fp_regs |
+ * | sigset_t uc_sigmask |
+ * | _sigregs_ext uc_mcontext_ext |
+ * | gprs_high 64 byte (opt) |
+ * | vxrs_low 128 byte (opt) |
+ * | vxrs_high 256 byte (opt)|
+ * | reserved 128 byte (opt) |
+ * -----------------------------------------
+ * Future extensions will be added to _sigregs_ext.
+ */
+struct rt_sigframe
+{
+ __u8 callee_used_stack[__SIGNAL_FRAMESIZE];
+ __u16 svc_insn;
+ struct siginfo info;
+ struct ucontext_extended uc;
+};
+
+/* Store registers needed to create the signal frame */
+static void store_sigregs(void)
+{
+ save_access_regs(current->thread.acrs);
+ save_fpu_regs();
+}
+
+/* Load registers after signal return */
+static void load_sigregs(void)
+{
+ restore_access_regs(current->thread.acrs);
+}
+
+/* Returns non-zero on fault. */
+static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
+{
+ _sigregs user_sregs;
+
+ /* Copy a 'clean' PSW mask to the user to avoid leaking
+ information about whether PER is currently on. */
+ user_sregs.regs.psw.mask = PSW_USER_BITS |
+ (regs->psw.mask & (PSW_MASK_USER | PSW_MASK_RI));
+ user_sregs.regs.psw.addr = regs->psw.addr;
+ memcpy(&user_sregs.regs.gprs, &regs->gprs, sizeof(sregs->regs.gprs));
+ memcpy(&user_sregs.regs.acrs, current->thread.acrs,
+ sizeof(user_sregs.regs.acrs));
+ fpregs_store(&user_sregs.fpregs, &current->thread.fpu);
+ if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs)))
+ return -EFAULT;
+ return 0;
+}
+
+static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
+{
+ _sigregs user_sregs;
+
+ /* Alwys make any pending restarted system call return -EINTR */
+ current->restart_block.fn = do_no_restart_syscall;
+
+ if (__copy_from_user(&user_sregs, sregs, sizeof(user_sregs)))
+ return -EFAULT;
+
+ if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW_MASK_RI))
+ return -EINVAL;
+
+ /* Test the floating-point-control word. */
+ if (test_fp_ctl(user_sregs.fpregs.fpc))
+ return -EINVAL;
+
+ /* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */
+ regs->psw.mask = (regs->psw.mask & ~(PSW_MASK_USER | PSW_MASK_RI)) |
+ (user_sregs.regs.psw.mask & (PSW_MASK_USER | PSW_MASK_RI));
+ /* Check for invalid user address space control. */
+ if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_HOME)
+ regs->psw.mask = PSW_ASC_PRIMARY |
+ (regs->psw.mask & ~PSW_MASK_ASC);
+ /* Check for invalid amode */
+ if (regs->psw.mask & PSW_MASK_EA)
+ regs->psw.mask |= PSW_MASK_BA;
+ regs->psw.addr = user_sregs.regs.psw.addr;
+ memcpy(&regs->gprs, &user_sregs.regs.gprs, sizeof(sregs->regs.gprs));
+ memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
+ sizeof(current->thread.acrs));
+
+ fpregs_load(&user_sregs.fpregs, &current->thread.fpu);
+
+ clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
+ return 0;
+}
+
+/* Returns non-zero on fault. */
+static int save_sigregs_ext(struct pt_regs *regs,
+ _sigregs_ext __user *sregs_ext)
+{
+ __u64 vxrs[__NUM_VXRS_LOW];
+ int i;
+
+ /* Save vector registers to signal stack */
+ if (MACHINE_HAS_VX) {
+ for (i = 0; i < __NUM_VXRS_LOW; i++)
+ vxrs[i] = *((__u64 *)(current->thread.fpu.vxrs + i) + 1);
+ if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
+ sizeof(sregs_ext->vxrs_low)) ||
+ __copy_to_user(&sregs_ext->vxrs_high,
+ current->thread.fpu.vxrs + __NUM_VXRS_LOW,
+ sizeof(sregs_ext->vxrs_high)))
+ return -EFAULT;
+ }
+ return 0;
+}
+
+static int restore_sigregs_ext(struct pt_regs *regs,
+ _sigregs_ext __user *sregs_ext)
+{
+ __u64 vxrs[__NUM_VXRS_LOW];
+ int i;
+
+ /* Restore vector registers from signal stack */
+ if (MACHINE_HAS_VX) {
+ if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
+ sizeof(sregs_ext->vxrs_low)) ||
+ __copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW,
+ &sregs_ext->vxrs_high,
+ sizeof(sregs_ext->vxrs_high)))
+ return -EFAULT;
+ for (i = 0; i < __NUM_VXRS_LOW; i++)
+ *((__u64 *)(current->thread.fpu.vxrs + i) + 1) = vxrs[i];
+ }
+ return 0;
+}
+
+SYSCALL_DEFINE0(sigreturn)
+{
+ struct pt_regs *regs = task_pt_regs(current);
+ struct sigframe __user *frame =
+ (struct sigframe __user *) regs->gprs[15];
+ sigset_t set;
+
+ if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE))
+ goto badframe;
+ set_current_blocked(&set);
+ save_fpu_regs();
+ if (restore_sigregs(regs, &frame->sregs))
+ goto badframe;
+ if (restore_sigregs_ext(regs, &frame->sregs_ext))
+ goto badframe;
+ load_sigregs();
+ return regs->gprs[2];
+badframe:
+ force_sig(SIGSEGV, current);
+ return 0;
+}
+
+SYSCALL_DEFINE0(rt_sigreturn)
+{
+ struct pt_regs *regs = task_pt_regs(current);
+ struct rt_sigframe __user *frame =
+ (struct rt_sigframe __user *)regs->gprs[15];
+ sigset_t set;
+
+ if (__copy_from_user(&set.sig, &frame->uc.uc_sigmask, sizeof(set)))
+ goto badframe;
+ set_current_blocked(&set);
+ if (restore_altstack(&frame->uc.uc_stack))
+ goto badframe;
+ save_fpu_regs();
+ if (restore_sigregs(regs, &frame->uc.uc_mcontext))
+ goto badframe;
+ if (restore_sigregs_ext(regs, &frame->uc.uc_mcontext_ext))
+ goto badframe;
+ load_sigregs();
+ return regs->gprs[2];
+badframe:
+ force_sig(SIGSEGV, current);
+ return 0;
+}
+
+/*
+ * Determine which stack to use..
+ */
+static inline void __user *
+get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
+{
+ unsigned long sp;
+
+ /* Default to using normal stack */
+ sp = regs->gprs[15];
+
+ /* Overflow on alternate signal stack gives SIGSEGV. */
+ if (on_sig_stack(sp) && !on_sig_stack((sp - frame_size) & -8UL))
+ return (void __user *) -1UL;
+
+ /* This is the X/Open sanctioned signal stack switching. */
+ if (ka->sa.sa_flags & SA_ONSTACK) {
+ if (! sas_ss_flags(sp))
+ sp = current->sas_ss_sp + current->sas_ss_size;
+ }
+
+ return (void __user *)((sp - frame_size) & -8ul);
+}
+
+static int setup_frame(int sig, struct k_sigaction *ka,
+ sigset_t *set, struct pt_regs * regs)
+{
+ struct sigframe __user *frame;
+ struct sigcontext sc;
+ unsigned long restorer;
+ size_t frame_size;
+
+ /*
+ * gprs_high are only present for a 31-bit task running on
+ * a 64-bit kernel (see compat_signal.c) but the space for
+ * gprs_high need to be allocated if vector registers are
+ * included in the signal frame on a 31-bit system.
+ */
+ frame_size = sizeof(*frame) - sizeof(frame->sregs_ext);
+ if (MACHINE_HAS_VX)
+ frame_size += sizeof(frame->sregs_ext);
+ frame = get_sigframe(ka, regs, frame_size);
+ if (frame == (void __user *) -1UL)
+ return -EFAULT;
+
+ /* Set up backchain. */
+ if (__put_user(regs->gprs[15], (addr_t __user *) frame))
+ return -EFAULT;
+
+ /* Create struct sigcontext on the signal stack */
+ memcpy(&sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE);
+ sc.sregs = (_sigregs __user __force *) &frame->sregs;
+ if (__copy_to_user(&frame->sc, &sc, sizeof(frame->sc)))
+ return -EFAULT;
+
+ /* Store registers needed to create the signal frame */
+ store_sigregs();
+
+ /* Create _sigregs on the signal stack */
+ if (save_sigregs(regs, &frame->sregs))
+ return -EFAULT;
+
+ /* Place signal number on stack to allow backtrace from handler. */
+ if (__put_user(regs->gprs[2], (int __user *) &frame->signo))
+ return -EFAULT;
+
+ /* Create _sigregs_ext on the signal stack */
+ if (save_sigregs_ext(regs, &frame->sregs_ext))
+ return -EFAULT;
+
+ /* Set up to return from userspace. If provided, use a stub
+ already in userspace. */
+ if (ka->sa.sa_flags & SA_RESTORER) {
+ restorer = (unsigned long) ka->sa.sa_restorer;
+ } else {
+ /* Signal frame without vector registers are short ! */
+ __u16 __user *svc = (void __user *) frame + frame_size - 2;
+ if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, svc))
+ return -EFAULT;
+ restorer = (unsigned long) svc;
+ }
+
+ /* Set up registers for signal handler */
+ regs->gprs[14] = restorer;
+ regs->gprs[15] = (unsigned long) frame;
+ /* Force default amode and default user address space control. */
+ regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
+ (PSW_USER_BITS & PSW_MASK_ASC) |
+ (regs->psw.mask & ~PSW_MASK_ASC);
+ regs->psw.addr = (unsigned long) ka->sa.sa_handler;
+
+ regs->gprs[2] = sig;
+ regs->gprs[3] = (unsigned long) &frame->sc;
+
+ /* We forgot to include these in the sigcontext.
+ To avoid breaking binary compatibility, they are passed as args. */
+ if (sig == SIGSEGV || sig == SIGBUS || sig == SIGILL ||
+ sig == SIGTRAP || sig == SIGFPE) {
+ /* set extra registers only for synchronous signals */
+ regs->gprs[4] = regs->int_code & 127;
+ regs->gprs[5] = regs->int_parm_long;
+ regs->gprs[6] = current->thread.last_break;
+ }
+ return 0;
+}
+
+static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
+ struct pt_regs *regs)
+{
+ struct rt_sigframe __user *frame;
+ unsigned long uc_flags, restorer;
+ size_t frame_size;
+
+ frame_size = sizeof(struct rt_sigframe) - sizeof(_sigregs_ext);
+ /*
+ * gprs_high are only present for a 31-bit task running on
+ * a 64-bit kernel (see compat_signal.c) but the space for
+ * gprs_high need to be allocated if vector registers are
+ * included in the signal frame on a 31-bit system.
+ */
+ uc_flags = 0;
+ if (MACHINE_HAS_VX) {
+ frame_size += sizeof(_sigregs_ext);
+ uc_flags |= UC_VXRS;
+ }
+ frame = get_sigframe(&ksig->ka, regs, frame_size);
+ if (frame == (void __user *) -1UL)
+ return -EFAULT;
+
+ /* Set up backchain. */
+ if (__put_user(regs->gprs[15], (addr_t __user *) frame))
+ return -EFAULT;
+
+ /* Set up to return from userspace. If provided, use a stub
+ already in userspace. */
+ if (ksig->ka.sa.sa_flags & SA_RESTORER) {
+ restorer = (unsigned long) ksig->ka.sa.sa_restorer;
+ } else {
+ __u16 __user *svc = &frame->svc_insn;
+ if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, svc))
+ return -EFAULT;
+ restorer = (unsigned long) svc;
+ }
+
+ /* Create siginfo on the signal stack */
+ if (copy_siginfo_to_user(&frame->info, &ksig->info))
+ return -EFAULT;
+
+ /* Store registers needed to create the signal frame */
+ store_sigregs();
+
+ /* Create ucontext on the signal stack. */
+ if (__put_user(uc_flags, &frame->uc.uc_flags) ||
+ __put_user(NULL, &frame->uc.uc_link) ||
+ __save_altstack(&frame->uc.uc_stack, regs->gprs[15]) ||
+ save_sigregs(regs, &frame->uc.uc_mcontext) ||
+ __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)) ||
+ save_sigregs_ext(regs, &frame->uc.uc_mcontext_ext))
+ return -EFAULT;
+
+ /* Set up registers for signal handler */
+ regs->gprs[14] = restorer;
+ regs->gprs[15] = (unsigned long) frame;
+ /* Force default amode and default user address space control. */
+ regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
+ (PSW_USER_BITS & PSW_MASK_ASC) |
+ (regs->psw.mask & ~PSW_MASK_ASC);
+ regs->psw.addr = (unsigned long) ksig->ka.sa.sa_handler;
+
+ regs->gprs[2] = ksig->sig;
+ regs->gprs[3] = (unsigned long) &frame->info;
+ regs->gprs[4] = (unsigned long) &frame->uc;
+ regs->gprs[5] = current->thread.last_break;
+ return 0;
+}
+
+static void handle_signal(struct ksignal *ksig, sigset_t *oldset,
+ struct pt_regs *regs)
+{
+ int ret;
+
+ /* Set up the stack frame */
+ if (ksig->ka.sa.sa_flags & SA_SIGINFO)
+ ret = setup_rt_frame(ksig, oldset, regs);
+ else
+ ret = setup_frame(ksig->sig, &ksig->ka, oldset, regs);
+
+ signal_setup_done(ret, ksig, test_thread_flag(TIF_SINGLE_STEP));
+}
+
+/*
+ * Note that 'init' is a special process: it doesn't get signals it doesn't
+ * want to handle. Thus you cannot kill init even with a SIGKILL even by
+ * mistake.
+ *
+ * Note that we go through the signals twice: once to check the signals that
+ * the kernel can handle, and then we build all the user-level signal handling
+ * stack-frames in one go after that.
+ */
+void do_signal(struct pt_regs *regs)
+{
+ struct ksignal ksig;
+ sigset_t *oldset = sigmask_to_save();
+
+ /*
+ * Get signal to deliver. When running under ptrace, at this point
+ * the debugger may change all our registers, including the system
+ * call information.
+ */
+ current->thread.system_call =
+ test_pt_regs_flag(regs, PIF_SYSCALL) ? regs->int_code : 0;
+
+ if (get_signal(&ksig)) {
+ /* Whee! Actually deliver the signal. */
+ if (current->thread.system_call) {
+ regs->int_code = current->thread.system_call;
+ /* Check for system call restarting. */
+ switch (regs->gprs[2]) {
+ case -ERESTART_RESTARTBLOCK:
+ case -ERESTARTNOHAND:
+ regs->gprs[2] = -EINTR;
+ break;
+ case -ERESTARTSYS:
+ if (!(ksig.ka.sa.sa_flags & SA_RESTART)) {
+ regs->gprs[2] = -EINTR;
+ break;
+ }
+ /* fallthrough */
+ case -ERESTARTNOINTR:
+ regs->gprs[2] = regs->orig_gpr2;
+ regs->psw.addr =
+ __rewind_psw(regs->psw,
+ regs->int_code >> 16);
+ break;
+ }
+ }
+ /* No longer in a system call */
+ clear_pt_regs_flag(regs, PIF_SYSCALL);
+ rseq_signal_deliver(&ksig, regs);
+ if (is_compat_task())
+ handle_signal32(&ksig, oldset, regs);
+ else
+ handle_signal(&ksig, oldset, regs);
+ return;
+ }
+
+ /* No handlers present - check for system call restart */
+ clear_pt_regs_flag(regs, PIF_SYSCALL);
+ if (current->thread.system_call) {
+ regs->int_code = current->thread.system_call;
+ switch (regs->gprs[2]) {
+ case -ERESTART_RESTARTBLOCK:
+ /* Restart with sys_restart_syscall */
+ regs->int_code = __NR_restart_syscall;
+ /* fallthrough */
+ case -ERESTARTNOHAND:
+ case -ERESTARTSYS:
+ case -ERESTARTNOINTR:
+ /* Restart system call with magic TIF bit. */
+ regs->gprs[2] = regs->orig_gpr2;
+ set_pt_regs_flag(regs, PIF_SYSCALL);
+ if (test_thread_flag(TIF_SINGLE_STEP))
+ clear_pt_regs_flag(regs, PIF_PER_TRAP);
+ break;
+ }
+ }
+
+ /*
+ * If there's no signal to deliver, we just put the saved sigmask back.
+ */
+ restore_saved_sigmask();
+}
+
+void do_notify_resume(struct pt_regs *regs)
+{
+ clear_thread_flag(TIF_NOTIFY_RESUME);
+ tracehook_notify_resume(regs);
+ rseq_handle_notify_resume(NULL, regs);
+}
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
new file mode 100644
index 000000000..bce678c71
--- /dev/null
+++ b/arch/s390/kernel/smp.c
@@ -0,0 +1,1212 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * SMP related functions
+ *
+ * Copyright IBM Corp. 1999, 2012
+ * Author(s): Denis Joseph Barrow,
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ * Heiko Carstens <heiko.carstens@de.ibm.com>,
+ *
+ * based on other smp stuff by
+ * (c) 1995 Alan Cox, CymruNET Ltd <alan@cymru.net>
+ * (c) 1998 Ingo Molnar
+ *
+ * The code outside of smp.c uses logical cpu numbers, only smp.c does
+ * the translation of logical to physical cpu ids. All new code that
+ * operates on physical cpu numbers needs to go into smp.c.
+ */
+
+#define KMSG_COMPONENT "cpu"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/workqueue.h>
+#include <linux/bootmem.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/err.h>
+#include <linux/spinlock.h>
+#include <linux/kernel_stat.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/irqflags.h>
+#include <linux/cpu.h>
+#include <linux/slab.h>
+#include <linux/sched/hotplug.h>
+#include <linux/sched/task_stack.h>
+#include <linux/crash_dump.h>
+#include <linux/memblock.h>
+#include <linux/kprobes.h>
+#include <asm/asm-offsets.h>
+#include <asm/diag.h>
+#include <asm/switch_to.h>
+#include <asm/facility.h>
+#include <asm/ipl.h>
+#include <asm/setup.h>
+#include <asm/irq.h>
+#include <asm/tlbflush.h>
+#include <asm/vtimer.h>
+#include <asm/lowcore.h>
+#include <asm/sclp.h>
+#include <asm/vdso.h>
+#include <asm/debug.h>
+#include <asm/os_info.h>
+#include <asm/sigp.h>
+#include <asm/idle.h>
+#include <asm/nmi.h>
+#include <asm/topology.h>
+#include "entry.h"
+
+enum {
+ ec_schedule = 0,
+ ec_call_function_single,
+ ec_stop_cpu,
+};
+
+enum {
+ CPU_STATE_STANDBY,
+ CPU_STATE_CONFIGURED,
+};
+
+static DEFINE_PER_CPU(struct cpu *, cpu_device);
+
+struct pcpu {
+ struct lowcore *lowcore; /* lowcore page(s) for the cpu */
+ unsigned long ec_mask; /* bit mask for ec_xxx functions */
+ unsigned long ec_clk; /* sigp timestamp for ec_xxx */
+ signed char state; /* physical cpu state */
+ signed char polarization; /* physical polarization */
+ u16 address; /* physical cpu address */
+};
+
+static u8 boot_core_type;
+static struct pcpu pcpu_devices[NR_CPUS];
+
+unsigned int smp_cpu_mt_shift;
+EXPORT_SYMBOL(smp_cpu_mt_shift);
+
+unsigned int smp_cpu_mtid;
+EXPORT_SYMBOL(smp_cpu_mtid);
+
+#ifdef CONFIG_CRASH_DUMP
+__vector128 __initdata boot_cpu_vector_save_area[__NUM_VXRS];
+#endif
+
+static unsigned int smp_max_threads __initdata = -1U;
+
+static int __init early_nosmt(char *s)
+{
+ smp_max_threads = 1;
+ return 0;
+}
+early_param("nosmt", early_nosmt);
+
+static int __init early_smt(char *s)
+{
+ get_option(&s, &smp_max_threads);
+ return 0;
+}
+early_param("smt", early_smt);
+
+/*
+ * The smp_cpu_state_mutex must be held when changing the state or polarization
+ * member of a pcpu data structure within the pcpu_devices arreay.
+ */
+DEFINE_MUTEX(smp_cpu_state_mutex);
+
+/*
+ * Signal processor helper functions.
+ */
+static inline int __pcpu_sigp_relax(u16 addr, u8 order, unsigned long parm)
+{
+ int cc;
+
+ while (1) {
+ cc = __pcpu_sigp(addr, order, parm, NULL);
+ if (cc != SIGP_CC_BUSY)
+ return cc;
+ cpu_relax();
+ }
+}
+
+static int pcpu_sigp_retry(struct pcpu *pcpu, u8 order, u32 parm)
+{
+ int cc, retry;
+
+ for (retry = 0; ; retry++) {
+ cc = __pcpu_sigp(pcpu->address, order, parm, NULL);
+ if (cc != SIGP_CC_BUSY)
+ break;
+ if (retry >= 3)
+ udelay(10);
+ }
+ return cc;
+}
+
+static inline int pcpu_stopped(struct pcpu *pcpu)
+{
+ u32 uninitialized_var(status);
+
+ if (__pcpu_sigp(pcpu->address, SIGP_SENSE,
+ 0, &status) != SIGP_CC_STATUS_STORED)
+ return 0;
+ return !!(status & (SIGP_STATUS_CHECK_STOP|SIGP_STATUS_STOPPED));
+}
+
+static inline int pcpu_running(struct pcpu *pcpu)
+{
+ if (__pcpu_sigp(pcpu->address, SIGP_SENSE_RUNNING,
+ 0, NULL) != SIGP_CC_STATUS_STORED)
+ return 1;
+ /* Status stored condition code is equivalent to cpu not running. */
+ return 0;
+}
+
+/*
+ * Find struct pcpu by cpu address.
+ */
+static struct pcpu *pcpu_find_address(const struct cpumask *mask, u16 address)
+{
+ int cpu;
+
+ for_each_cpu(cpu, mask)
+ if (pcpu_devices[cpu].address == address)
+ return pcpu_devices + cpu;
+ return NULL;
+}
+
+static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit)
+{
+ int order;
+
+ if (test_and_set_bit(ec_bit, &pcpu->ec_mask))
+ return;
+ order = pcpu_running(pcpu) ? SIGP_EXTERNAL_CALL : SIGP_EMERGENCY_SIGNAL;
+ pcpu->ec_clk = get_tod_clock_fast();
+ pcpu_sigp_retry(pcpu, order, 0);
+}
+
+#define ASYNC_FRAME_OFFSET (ASYNC_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE)
+#define PANIC_FRAME_OFFSET (PAGE_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE)
+
+static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
+{
+ unsigned long async_stack, panic_stack;
+ struct lowcore *lc;
+
+ if (pcpu != &pcpu_devices[0]) {
+ pcpu->lowcore = (struct lowcore *)
+ __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
+ async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER);
+ panic_stack = __get_free_page(GFP_KERNEL);
+ if (!pcpu->lowcore || !panic_stack || !async_stack)
+ goto out;
+ } else {
+ async_stack = pcpu->lowcore->async_stack - ASYNC_FRAME_OFFSET;
+ panic_stack = pcpu->lowcore->panic_stack - PANIC_FRAME_OFFSET;
+ }
+ lc = pcpu->lowcore;
+ memcpy(lc, &S390_lowcore, 512);
+ memset((char *) lc + 512, 0, sizeof(*lc) - 512);
+ lc->async_stack = async_stack + ASYNC_FRAME_OFFSET;
+ lc->panic_stack = panic_stack + PANIC_FRAME_OFFSET;
+ lc->cpu_nr = cpu;
+ lc->spinlock_lockval = arch_spin_lockval(cpu);
+ lc->spinlock_index = 0;
+ lc->br_r1_trampoline = 0x07f1; /* br %r1 */
+ if (nmi_alloc_per_cpu(lc))
+ goto out;
+ if (vdso_alloc_per_cpu(lc))
+ goto out_mcesa;
+ lowcore_ptr[cpu] = lc;
+ pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, (u32)(unsigned long) lc);
+ return 0;
+
+out_mcesa:
+ nmi_free_per_cpu(lc);
+out:
+ if (pcpu != &pcpu_devices[0]) {
+ free_page(panic_stack);
+ free_pages(async_stack, ASYNC_ORDER);
+ free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
+ }
+ return -ENOMEM;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+static void pcpu_free_lowcore(struct pcpu *pcpu)
+{
+ pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0);
+ lowcore_ptr[pcpu - pcpu_devices] = NULL;
+ vdso_free_per_cpu(pcpu->lowcore);
+ nmi_free_per_cpu(pcpu->lowcore);
+ if (pcpu == &pcpu_devices[0])
+ return;
+ free_page(pcpu->lowcore->panic_stack-PANIC_FRAME_OFFSET);
+ free_pages(pcpu->lowcore->async_stack-ASYNC_FRAME_OFFSET, ASYNC_ORDER);
+ free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
+}
+
+#endif /* CONFIG_HOTPLUG_CPU */
+
+static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
+{
+ struct lowcore *lc = pcpu->lowcore;
+
+ cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask);
+ cpumask_set_cpu(cpu, mm_cpumask(&init_mm));
+ lc->cpu_nr = cpu;
+ lc->spinlock_lockval = arch_spin_lockval(cpu);
+ lc->spinlock_index = 0;
+ lc->percpu_offset = __per_cpu_offset[cpu];
+ lc->kernel_asce = S390_lowcore.kernel_asce;
+ lc->user_asce = S390_lowcore.kernel_asce;
+ lc->machine_flags = S390_lowcore.machine_flags;
+ lc->user_timer = lc->system_timer = lc->steal_timer = 0;
+ __ctl_store(lc->cregs_save_area, 0, 15);
+ lc->cregs_save_area[1] = lc->kernel_asce;
+ lc->cregs_save_area[7] = lc->vdso_asce;
+ save_access_regs((unsigned int *) lc->access_regs_save_area);
+ memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
+ sizeof(lc->stfle_fac_list));
+ memcpy(lc->alt_stfle_fac_list, S390_lowcore.alt_stfle_fac_list,
+ sizeof(lc->alt_stfle_fac_list));
+ arch_spin_lock_setup(cpu);
+}
+
+static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk)
+{
+ struct lowcore *lc = pcpu->lowcore;
+
+ lc->kernel_stack = (unsigned long) task_stack_page(tsk)
+ + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
+ lc->current_task = (unsigned long) tsk;
+ lc->lpp = LPP_MAGIC;
+ lc->current_pid = tsk->pid;
+ lc->user_timer = tsk->thread.user_timer;
+ lc->guest_timer = tsk->thread.guest_timer;
+ lc->system_timer = tsk->thread.system_timer;
+ lc->hardirq_timer = tsk->thread.hardirq_timer;
+ lc->softirq_timer = tsk->thread.softirq_timer;
+ lc->steal_timer = 0;
+}
+
+static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data)
+{
+ struct lowcore *lc = pcpu->lowcore;
+
+ lc->restart_stack = lc->kernel_stack;
+ lc->restart_fn = (unsigned long) func;
+ lc->restart_data = (unsigned long) data;
+ lc->restart_source = -1UL;
+ pcpu_sigp_retry(pcpu, SIGP_RESTART, 0);
+}
+
+/*
+ * Call function via PSW restart on pcpu and stop the current cpu.
+ */
+static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *),
+ void *data, unsigned long stack)
+{
+ struct lowcore *lc = lowcore_ptr[pcpu - pcpu_devices];
+ unsigned long source_cpu = stap();
+
+ __load_psw_mask(PSW_KERNEL_BITS);
+ if (pcpu->address == source_cpu)
+ func(data); /* should not return */
+ /* Stop target cpu (if func returns this stops the current cpu). */
+ pcpu_sigp_retry(pcpu, SIGP_STOP, 0);
+ /* Restart func on the target cpu and stop the current cpu. */
+ mem_assign_absolute(lc->restart_stack, stack);
+ mem_assign_absolute(lc->restart_fn, (unsigned long) func);
+ mem_assign_absolute(lc->restart_data, (unsigned long) data);
+ mem_assign_absolute(lc->restart_source, source_cpu);
+ __bpon();
+ asm volatile(
+ "0: sigp 0,%0,%2 # sigp restart to target cpu\n"
+ " brc 2,0b # busy, try again\n"
+ "1: sigp 0,%1,%3 # sigp stop to current cpu\n"
+ " brc 2,1b # busy, try again\n"
+ : : "d" (pcpu->address), "d" (source_cpu),
+ "K" (SIGP_RESTART), "K" (SIGP_STOP)
+ : "0", "1", "cc");
+ for (;;) ;
+}
+
+/*
+ * Enable additional logical cpus for multi-threading.
+ */
+static int pcpu_set_smt(unsigned int mtid)
+{
+ int cc;
+
+ if (smp_cpu_mtid == mtid)
+ return 0;
+ cc = __pcpu_sigp(0, SIGP_SET_MULTI_THREADING, mtid, NULL);
+ if (cc == 0) {
+ smp_cpu_mtid = mtid;
+ smp_cpu_mt_shift = 0;
+ while (smp_cpu_mtid >= (1U << smp_cpu_mt_shift))
+ smp_cpu_mt_shift++;
+ pcpu_devices[0].address = stap();
+ }
+ return cc;
+}
+
+/*
+ * Call function on an online CPU.
+ */
+void smp_call_online_cpu(void (*func)(void *), void *data)
+{
+ struct pcpu *pcpu;
+
+ /* Use the current cpu if it is online. */
+ pcpu = pcpu_find_address(cpu_online_mask, stap());
+ if (!pcpu)
+ /* Use the first online cpu. */
+ pcpu = pcpu_devices + cpumask_first(cpu_online_mask);
+ pcpu_delegate(pcpu, func, data, (unsigned long) restart_stack);
+}
+
+/*
+ * Call function on the ipl CPU.
+ */
+void smp_call_ipl_cpu(void (*func)(void *), void *data)
+{
+ struct lowcore *lc = pcpu_devices->lowcore;
+
+ if (pcpu_devices[0].address == stap())
+ lc = &S390_lowcore;
+
+ pcpu_delegate(&pcpu_devices[0], func, data,
+ lc->panic_stack - PANIC_FRAME_OFFSET + PAGE_SIZE);
+}
+
+int smp_find_processor_id(u16 address)
+{
+ int cpu;
+
+ for_each_present_cpu(cpu)
+ if (pcpu_devices[cpu].address == address)
+ return cpu;
+ return -1;
+}
+
+bool notrace arch_vcpu_is_preempted(int cpu)
+{
+ if (test_cpu_flag_of(CIF_ENABLED_WAIT, cpu))
+ return false;
+ if (pcpu_running(pcpu_devices + cpu))
+ return false;
+ return true;
+}
+EXPORT_SYMBOL(arch_vcpu_is_preempted);
+
+void notrace smp_yield_cpu(int cpu)
+{
+ if (MACHINE_HAS_DIAG9C) {
+ diag_stat_inc_norecursion(DIAG_STAT_X09C);
+ asm volatile("diag %0,0,0x9c"
+ : : "d" (pcpu_devices[cpu].address));
+ } else if (MACHINE_HAS_DIAG44) {
+ diag_stat_inc_norecursion(DIAG_STAT_X044);
+ asm volatile("diag 0,0,0x44");
+ }
+}
+
+/*
+ * Send cpus emergency shutdown signal. This gives the cpus the
+ * opportunity to complete outstanding interrupts.
+ */
+void notrace smp_emergency_stop(void)
+{
+ cpumask_t cpumask;
+ u64 end;
+ int cpu;
+
+ cpumask_copy(&cpumask, cpu_online_mask);
+ cpumask_clear_cpu(smp_processor_id(), &cpumask);
+
+ end = get_tod_clock() + (1000000UL << 12);
+ for_each_cpu(cpu, &cpumask) {
+ struct pcpu *pcpu = pcpu_devices + cpu;
+ set_bit(ec_stop_cpu, &pcpu->ec_mask);
+ while (__pcpu_sigp(pcpu->address, SIGP_EMERGENCY_SIGNAL,
+ 0, NULL) == SIGP_CC_BUSY &&
+ get_tod_clock() < end)
+ cpu_relax();
+ }
+ while (get_tod_clock() < end) {
+ for_each_cpu(cpu, &cpumask)
+ if (pcpu_stopped(pcpu_devices + cpu))
+ cpumask_clear_cpu(cpu, &cpumask);
+ if (cpumask_empty(&cpumask))
+ break;
+ cpu_relax();
+ }
+}
+NOKPROBE_SYMBOL(smp_emergency_stop);
+
+/*
+ * Stop all cpus but the current one.
+ */
+void smp_send_stop(void)
+{
+ int cpu;
+
+ /* Disable all interrupts/machine checks */
+ __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
+ trace_hardirqs_off();
+
+ debug_set_critical();
+
+ if (oops_in_progress)
+ smp_emergency_stop();
+
+ /* stop all processors */
+ for_each_online_cpu(cpu) {
+ if (cpu == smp_processor_id())
+ continue;
+ pcpu_sigp_retry(pcpu_devices + cpu, SIGP_STOP, 0);
+ while (!pcpu_stopped(pcpu_devices + cpu))
+ cpu_relax();
+ }
+}
+
+/*
+ * This is the main routine where commands issued by other
+ * cpus are handled.
+ */
+static void smp_handle_ext_call(void)
+{
+ unsigned long bits;
+
+ /* handle bit signal external calls */
+ bits = xchg(&pcpu_devices[smp_processor_id()].ec_mask, 0);
+ if (test_bit(ec_stop_cpu, &bits))
+ smp_stop_cpu();
+ if (test_bit(ec_schedule, &bits))
+ scheduler_ipi();
+ if (test_bit(ec_call_function_single, &bits))
+ generic_smp_call_function_single_interrupt();
+}
+
+static void do_ext_call_interrupt(struct ext_code ext_code,
+ unsigned int param32, unsigned long param64)
+{
+ inc_irq_stat(ext_code.code == 0x1202 ? IRQEXT_EXC : IRQEXT_EMS);
+ smp_handle_ext_call();
+}
+
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
+{
+ int cpu;
+
+ for_each_cpu(cpu, mask)
+ pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single);
+}
+
+void arch_send_call_function_single_ipi(int cpu)
+{
+ pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single);
+}
+
+/*
+ * this function sends a 'reschedule' IPI to another CPU.
+ * it goes straight through and wastes no time serializing
+ * anything. Worst case is that we lose a reschedule ...
+ */
+void smp_send_reschedule(int cpu)
+{
+ pcpu_ec_call(pcpu_devices + cpu, ec_schedule);
+}
+
+/*
+ * parameter area for the set/clear control bit callbacks
+ */
+struct ec_creg_mask_parms {
+ unsigned long orval;
+ unsigned long andval;
+ int cr;
+};
+
+/*
+ * callback for setting/clearing control bits
+ */
+static void smp_ctl_bit_callback(void *info)
+{
+ struct ec_creg_mask_parms *pp = info;
+ unsigned long cregs[16];
+
+ __ctl_store(cregs, 0, 15);
+ cregs[pp->cr] = (cregs[pp->cr] & pp->andval) | pp->orval;
+ __ctl_load(cregs, 0, 15);
+}
+
+/*
+ * Set a bit in a control register of all cpus
+ */
+void smp_ctl_set_bit(int cr, int bit)
+{
+ struct ec_creg_mask_parms parms = { 1UL << bit, -1UL, cr };
+
+ on_each_cpu(smp_ctl_bit_callback, &parms, 1);
+}
+EXPORT_SYMBOL(smp_ctl_set_bit);
+
+/*
+ * Clear a bit in a control register of all cpus
+ */
+void smp_ctl_clear_bit(int cr, int bit)
+{
+ struct ec_creg_mask_parms parms = { 0, ~(1UL << bit), cr };
+
+ on_each_cpu(smp_ctl_bit_callback, &parms, 1);
+}
+EXPORT_SYMBOL(smp_ctl_clear_bit);
+
+#ifdef CONFIG_CRASH_DUMP
+
+int smp_store_status(int cpu)
+{
+ struct pcpu *pcpu = pcpu_devices + cpu;
+ unsigned long pa;
+
+ pa = __pa(&pcpu->lowcore->floating_pt_save_area);
+ if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS,
+ pa) != SIGP_CC_ORDER_CODE_ACCEPTED)
+ return -EIO;
+ if (!MACHINE_HAS_VX && !MACHINE_HAS_GS)
+ return 0;
+ pa = __pa(pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK);
+ if (MACHINE_HAS_GS)
+ pa |= pcpu->lowcore->mcesad & MCESA_LC_MASK;
+ if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS,
+ pa) != SIGP_CC_ORDER_CODE_ACCEPTED)
+ return -EIO;
+ return 0;
+}
+
+/*
+ * Collect CPU state of the previous, crashed system.
+ * There are four cases:
+ * 1) standard zfcp dump
+ * condition: OLDMEM_BASE == NULL && ipl_info.type == IPL_TYPE_FCP_DUMP
+ * The state for all CPUs except the boot CPU needs to be collected
+ * with sigp stop-and-store-status. The boot CPU state is located in
+ * the absolute lowcore of the memory stored in the HSA. The zcore code
+ * will copy the boot CPU state from the HSA.
+ * 2) stand-alone kdump for SCSI (zfcp dump with swapped memory)
+ * condition: OLDMEM_BASE != NULL && ipl_info.type == IPL_TYPE_FCP_DUMP
+ * The state for all CPUs except the boot CPU needs to be collected
+ * with sigp stop-and-store-status. The firmware or the boot-loader
+ * stored the registers of the boot CPU in the absolute lowcore in the
+ * memory of the old system.
+ * 3) kdump and the old kernel did not store the CPU state,
+ * or stand-alone kdump for DASD
+ * condition: OLDMEM_BASE != NULL && !is_kdump_kernel()
+ * The state for all CPUs except the boot CPU needs to be collected
+ * with sigp stop-and-store-status. The kexec code or the boot-loader
+ * stored the registers of the boot CPU in the memory of the old system.
+ * 4) kdump and the old kernel stored the CPU state
+ * condition: OLDMEM_BASE != NULL && is_kdump_kernel()
+ * This case does not exist for s390 anymore, setup_arch explicitly
+ * deactivates the elfcorehdr= kernel parameter
+ */
+static __init void smp_save_cpu_vxrs(struct save_area *sa, u16 addr,
+ bool is_boot_cpu, unsigned long page)
+{
+ __vector128 *vxrs = (__vector128 *) page;
+
+ if (is_boot_cpu)
+ vxrs = boot_cpu_vector_save_area;
+ else
+ __pcpu_sigp_relax(addr, SIGP_STORE_ADDITIONAL_STATUS, page);
+ save_area_add_vxrs(sa, vxrs);
+}
+
+static __init void smp_save_cpu_regs(struct save_area *sa, u16 addr,
+ bool is_boot_cpu, unsigned long page)
+{
+ void *regs = (void *) page;
+
+ if (is_boot_cpu)
+ copy_oldmem_kernel(regs, (void *) __LC_FPREGS_SAVE_AREA, 512);
+ else
+ __pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, page);
+ save_area_add_regs(sa, regs);
+}
+
+void __init smp_save_dump_cpus(void)
+{
+ int addr, boot_cpu_addr, max_cpu_addr;
+ struct save_area *sa;
+ unsigned long page;
+ bool is_boot_cpu;
+
+ if (!(OLDMEM_BASE || ipl_info.type == IPL_TYPE_FCP_DUMP))
+ /* No previous system present, normal boot. */
+ return;
+ /* Allocate a page as dumping area for the store status sigps */
+ page = memblock_alloc_base(PAGE_SIZE, PAGE_SIZE, 1UL << 31);
+ /* Set multi-threading state to the previous system. */
+ pcpu_set_smt(sclp.mtid_prev);
+ boot_cpu_addr = stap();
+ max_cpu_addr = SCLP_MAX_CORES << sclp.mtid_prev;
+ for (addr = 0; addr <= max_cpu_addr; addr++) {
+ if (__pcpu_sigp_relax(addr, SIGP_SENSE, 0) ==
+ SIGP_CC_NOT_OPERATIONAL)
+ continue;
+ is_boot_cpu = (addr == boot_cpu_addr);
+ /* Allocate save area */
+ sa = save_area_alloc(is_boot_cpu);
+ if (!sa)
+ panic("could not allocate memory for save area\n");
+ if (MACHINE_HAS_VX)
+ /* Get the vector registers */
+ smp_save_cpu_vxrs(sa, addr, is_boot_cpu, page);
+ /*
+ * For a zfcp dump OLDMEM_BASE == NULL and the registers
+ * of the boot CPU are stored in the HSA. To retrieve
+ * these registers an SCLP request is required which is
+ * done by drivers/s390/char/zcore.c:init_cpu_info()
+ */
+ if (!is_boot_cpu || OLDMEM_BASE)
+ /* Get the CPU registers */
+ smp_save_cpu_regs(sa, addr, is_boot_cpu, page);
+ }
+ memblock_free(page, PAGE_SIZE);
+ diag308_reset();
+ pcpu_set_smt(0);
+}
+#endif /* CONFIG_CRASH_DUMP */
+
+void smp_cpu_set_polarization(int cpu, int val)
+{
+ pcpu_devices[cpu].polarization = val;
+}
+
+int smp_cpu_get_polarization(int cpu)
+{
+ return pcpu_devices[cpu].polarization;
+}
+
+static void __ref smp_get_core_info(struct sclp_core_info *info, int early)
+{
+ static int use_sigp_detection;
+ int address;
+
+ if (use_sigp_detection || sclp_get_core_info(info, early)) {
+ use_sigp_detection = 1;
+ for (address = 0;
+ address < (SCLP_MAX_CORES << smp_cpu_mt_shift);
+ address += (1U << smp_cpu_mt_shift)) {
+ if (__pcpu_sigp_relax(address, SIGP_SENSE, 0) ==
+ SIGP_CC_NOT_OPERATIONAL)
+ continue;
+ info->core[info->configured].core_id =
+ address >> smp_cpu_mt_shift;
+ info->configured++;
+ }
+ info->combined = info->configured;
+ }
+}
+
+static int smp_add_present_cpu(int cpu);
+
+static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail,
+ bool configured, bool early)
+{
+ struct pcpu *pcpu;
+ int cpu, nr, i;
+ u16 address;
+
+ nr = 0;
+ if (sclp.has_core_type && core->type != boot_core_type)
+ return nr;
+ cpu = cpumask_first(avail);
+ address = core->core_id << smp_cpu_mt_shift;
+ for (i = 0; (i <= smp_cpu_mtid) && (cpu < nr_cpu_ids); i++) {
+ if (pcpu_find_address(cpu_present_mask, address + i))
+ continue;
+ pcpu = pcpu_devices + cpu;
+ pcpu->address = address + i;
+ if (configured)
+ pcpu->state = CPU_STATE_CONFIGURED;
+ else
+ pcpu->state = CPU_STATE_STANDBY;
+ smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+ set_cpu_present(cpu, true);
+ if (!early && smp_add_present_cpu(cpu) != 0)
+ set_cpu_present(cpu, false);
+ else
+ nr++;
+ cpumask_clear_cpu(cpu, avail);
+ cpu = cpumask_next(cpu, avail);
+ }
+ return nr;
+}
+
+static int __smp_rescan_cpus(struct sclp_core_info *info, bool early)
+{
+ struct sclp_core_entry *core;
+ static cpumask_t avail;
+ bool configured;
+ u16 core_id;
+ int nr, i;
+
+ nr = 0;
+ cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask);
+ /*
+ * Add IPL core first (which got logical CPU number 0) to make sure
+ * that all SMT threads get subsequent logical CPU numbers.
+ */
+ if (early) {
+ core_id = pcpu_devices[0].address >> smp_cpu_mt_shift;
+ for (i = 0; i < info->configured; i++) {
+ core = &info->core[i];
+ if (core->core_id == core_id) {
+ nr += smp_add_core(core, &avail, true, early);
+ break;
+ }
+ }
+ }
+ for (i = 0; i < info->combined; i++) {
+ configured = i < info->configured;
+ nr += smp_add_core(&info->core[i], &avail, configured, early);
+ }
+ return nr;
+}
+
+void __init smp_detect_cpus(void)
+{
+ unsigned int cpu, mtid, c_cpus, s_cpus;
+ struct sclp_core_info *info;
+ u16 address;
+
+ /* Get CPU information */
+ info = memblock_virt_alloc(sizeof(*info), 8);
+ smp_get_core_info(info, 1);
+ /* Find boot CPU type */
+ if (sclp.has_core_type) {
+ address = stap();
+ for (cpu = 0; cpu < info->combined; cpu++)
+ if (info->core[cpu].core_id == address) {
+ /* The boot cpu dictates the cpu type. */
+ boot_core_type = info->core[cpu].type;
+ break;
+ }
+ if (cpu >= info->combined)
+ panic("Could not find boot CPU type");
+ }
+
+ /* Set multi-threading state for the current system */
+ mtid = boot_core_type ? sclp.mtid : sclp.mtid_cp;
+ mtid = (mtid < smp_max_threads) ? mtid : smp_max_threads - 1;
+ pcpu_set_smt(mtid);
+
+ /* Print number of CPUs */
+ c_cpus = s_cpus = 0;
+ for (cpu = 0; cpu < info->combined; cpu++) {
+ if (sclp.has_core_type &&
+ info->core[cpu].type != boot_core_type)
+ continue;
+ if (cpu < info->configured)
+ c_cpus += smp_cpu_mtid + 1;
+ else
+ s_cpus += smp_cpu_mtid + 1;
+ }
+ pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus);
+
+ /* Add CPUs present at boot */
+ get_online_cpus();
+ __smp_rescan_cpus(info, true);
+ put_online_cpus();
+ memblock_free_early((unsigned long)info, sizeof(*info));
+}
+
+/*
+ * Activate a secondary processor.
+ */
+static void smp_start_secondary(void *cpuvoid)
+{
+ int cpu = raw_smp_processor_id();
+
+ S390_lowcore.last_update_clock = get_tod_clock();
+ S390_lowcore.restart_stack = (unsigned long) restart_stack;
+ S390_lowcore.restart_fn = (unsigned long) do_restart;
+ S390_lowcore.restart_data = 0;
+ S390_lowcore.restart_source = -1UL;
+ restore_access_regs(S390_lowcore.access_regs_save_area);
+ __ctl_load(S390_lowcore.cregs_save_area, 0, 15);
+ __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
+ set_cpu_flag(CIF_ASCE_PRIMARY);
+ set_cpu_flag(CIF_ASCE_SECONDARY);
+ cpu_init();
+ rcu_cpu_starting(cpu);
+ preempt_disable();
+ init_cpu_timer();
+ vtime_init();
+ pfault_init();
+ notify_cpu_starting(cpu);
+ if (topology_cpu_dedicated(cpu))
+ set_cpu_flag(CIF_DEDICATED_CPU);
+ else
+ clear_cpu_flag(CIF_DEDICATED_CPU);
+ set_cpu_online(cpu, true);
+ inc_irq_stat(CPU_RST);
+ local_irq_enable();
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
+}
+
+/* Upping and downing of CPUs */
+int __cpu_up(unsigned int cpu, struct task_struct *tidle)
+{
+ struct pcpu *pcpu = pcpu_devices + cpu;
+ int rc;
+
+ if (pcpu->state != CPU_STATE_CONFIGURED)
+ return -EIO;
+ if (pcpu_sigp_retry(pcpu, SIGP_INITIAL_CPU_RESET, 0) !=
+ SIGP_CC_ORDER_CODE_ACCEPTED)
+ return -EIO;
+
+ rc = pcpu_alloc_lowcore(pcpu, cpu);
+ if (rc)
+ return rc;
+ pcpu_prepare_secondary(pcpu, cpu);
+ pcpu_attach_task(pcpu, tidle);
+ pcpu_start_fn(pcpu, smp_start_secondary, NULL);
+ /* Wait until cpu puts itself in the online & active maps */
+ while (!cpu_online(cpu))
+ cpu_relax();
+ return 0;
+}
+
+static unsigned int setup_possible_cpus __initdata;
+
+static int __init _setup_possible_cpus(char *s)
+{
+ get_option(&s, &setup_possible_cpus);
+ return 0;
+}
+early_param("possible_cpus", _setup_possible_cpus);
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+int __cpu_disable(void)
+{
+ unsigned long cregs[16];
+
+ /* Handle possible pending IPIs */
+ smp_handle_ext_call();
+ set_cpu_online(smp_processor_id(), false);
+ /* Disable pseudo page faults on this cpu. */
+ pfault_fini();
+ /* Disable interrupt sources via control register. */
+ __ctl_store(cregs, 0, 15);
+ cregs[0] &= ~0x0000ee70UL; /* disable all external interrupts */
+ cregs[6] &= ~0xff000000UL; /* disable all I/O interrupts */
+ cregs[14] &= ~0x1f000000UL; /* disable most machine checks */
+ __ctl_load(cregs, 0, 15);
+ clear_cpu_flag(CIF_NOHZ_DELAY);
+ return 0;
+}
+
+void __cpu_die(unsigned int cpu)
+{
+ struct pcpu *pcpu;
+
+ /* Wait until target cpu is down */
+ pcpu = pcpu_devices + cpu;
+ while (!pcpu_stopped(pcpu))
+ cpu_relax();
+ pcpu_free_lowcore(pcpu);
+ cpumask_clear_cpu(cpu, mm_cpumask(&init_mm));
+ cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask);
+}
+
+void __noreturn cpu_die(void)
+{
+ idle_task_exit();
+ __bpon();
+ pcpu_sigp_retry(pcpu_devices + smp_processor_id(), SIGP_STOP, 0);
+ for (;;) ;
+}
+
+#endif /* CONFIG_HOTPLUG_CPU */
+
+void __init smp_fill_possible_mask(void)
+{
+ unsigned int possible, sclp_max, cpu;
+
+ sclp_max = max(sclp.mtid, sclp.mtid_cp) + 1;
+ sclp_max = min(smp_max_threads, sclp_max);
+ sclp_max = (sclp.max_cores * sclp_max) ?: nr_cpu_ids;
+ possible = setup_possible_cpus ?: nr_cpu_ids;
+ possible = min(possible, sclp_max);
+ for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++)
+ set_cpu_possible(cpu, true);
+}
+
+void __init smp_prepare_cpus(unsigned int max_cpus)
+{
+ /* request the 0x1201 emergency signal external interrupt */
+ if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt))
+ panic("Couldn't request external interrupt 0x1201");
+ /* request the 0x1202 external call external interrupt */
+ if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt))
+ panic("Couldn't request external interrupt 0x1202");
+}
+
+void __init smp_prepare_boot_cpu(void)
+{
+ struct pcpu *pcpu = pcpu_devices;
+
+ WARN_ON(!cpu_present(0) || !cpu_online(0));
+ pcpu->state = CPU_STATE_CONFIGURED;
+ pcpu->lowcore = (struct lowcore *)(unsigned long) store_prefix();
+ S390_lowcore.percpu_offset = __per_cpu_offset[0];
+ smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN);
+}
+
+void __init smp_cpus_done(unsigned int max_cpus)
+{
+}
+
+void __init smp_setup_processor_id(void)
+{
+ pcpu_devices[0].address = stap();
+ S390_lowcore.cpu_nr = 0;
+ S390_lowcore.spinlock_lockval = arch_spin_lockval(0);
+ S390_lowcore.spinlock_index = 0;
+}
+
+/*
+ * the frequency of the profiling timer can be changed
+ * by writing a multiplier value into /proc/profile.
+ *
+ * usually you want to run this on all CPUs ;)
+ */
+int setup_profiling_timer(unsigned int multiplier)
+{
+ return 0;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static ssize_t cpu_configure_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ ssize_t count;
+
+ mutex_lock(&smp_cpu_state_mutex);
+ count = sprintf(buf, "%d\n", pcpu_devices[dev->id].state);
+ mutex_unlock(&smp_cpu_state_mutex);
+ return count;
+}
+
+static ssize_t cpu_configure_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct pcpu *pcpu;
+ int cpu, val, rc, i;
+ char delim;
+
+ if (sscanf(buf, "%d %c", &val, &delim) != 1)
+ return -EINVAL;
+ if (val != 0 && val != 1)
+ return -EINVAL;
+ get_online_cpus();
+ mutex_lock(&smp_cpu_state_mutex);
+ rc = -EBUSY;
+ /* disallow configuration changes of online cpus and cpu 0 */
+ cpu = dev->id;
+ cpu = smp_get_base_cpu(cpu);
+ if (cpu == 0)
+ goto out;
+ for (i = 0; i <= smp_cpu_mtid; i++)
+ if (cpu_online(cpu + i))
+ goto out;
+ pcpu = pcpu_devices + cpu;
+ rc = 0;
+ switch (val) {
+ case 0:
+ if (pcpu->state != CPU_STATE_CONFIGURED)
+ break;
+ rc = sclp_core_deconfigure(pcpu->address >> smp_cpu_mt_shift);
+ if (rc)
+ break;
+ for (i = 0; i <= smp_cpu_mtid; i++) {
+ if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i))
+ continue;
+ pcpu[i].state = CPU_STATE_STANDBY;
+ smp_cpu_set_polarization(cpu + i,
+ POLARIZATION_UNKNOWN);
+ }
+ topology_expect_change();
+ break;
+ case 1:
+ if (pcpu->state != CPU_STATE_STANDBY)
+ break;
+ rc = sclp_core_configure(pcpu->address >> smp_cpu_mt_shift);
+ if (rc)
+ break;
+ for (i = 0; i <= smp_cpu_mtid; i++) {
+ if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i))
+ continue;
+ pcpu[i].state = CPU_STATE_CONFIGURED;
+ smp_cpu_set_polarization(cpu + i,
+ POLARIZATION_UNKNOWN);
+ }
+ topology_expect_change();
+ break;
+ default:
+ break;
+ }
+out:
+ mutex_unlock(&smp_cpu_state_mutex);
+ put_online_cpus();
+ return rc ? rc : count;
+}
+static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store);
+#endif /* CONFIG_HOTPLUG_CPU */
+
+static ssize_t show_cpu_address(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", pcpu_devices[dev->id].address);
+}
+static DEVICE_ATTR(address, 0444, show_cpu_address, NULL);
+
+static struct attribute *cpu_common_attrs[] = {
+#ifdef CONFIG_HOTPLUG_CPU
+ &dev_attr_configure.attr,
+#endif
+ &dev_attr_address.attr,
+ NULL,
+};
+
+static struct attribute_group cpu_common_attr_group = {
+ .attrs = cpu_common_attrs,
+};
+
+static struct attribute *cpu_online_attrs[] = {
+ &dev_attr_idle_count.attr,
+ &dev_attr_idle_time_us.attr,
+ NULL,
+};
+
+static struct attribute_group cpu_online_attr_group = {
+ .attrs = cpu_online_attrs,
+};
+
+static int smp_cpu_online(unsigned int cpu)
+{
+ struct device *s = &per_cpu(cpu_device, cpu)->dev;
+
+ return sysfs_create_group(&s->kobj, &cpu_online_attr_group);
+}
+static int smp_cpu_pre_down(unsigned int cpu)
+{
+ struct device *s = &per_cpu(cpu_device, cpu)->dev;
+
+ sysfs_remove_group(&s->kobj, &cpu_online_attr_group);
+ return 0;
+}
+
+static int smp_add_present_cpu(int cpu)
+{
+ struct device *s;
+ struct cpu *c;
+ int rc;
+
+ c = kzalloc(sizeof(*c), GFP_KERNEL);
+ if (!c)
+ return -ENOMEM;
+ per_cpu(cpu_device, cpu) = c;
+ s = &c->dev;
+ c->hotpluggable = 1;
+ rc = register_cpu(c, cpu);
+ if (rc)
+ goto out;
+ rc = sysfs_create_group(&s->kobj, &cpu_common_attr_group);
+ if (rc)
+ goto out_cpu;
+ rc = topology_cpu_init(c);
+ if (rc)
+ goto out_topology;
+ return 0;
+
+out_topology:
+ sysfs_remove_group(&s->kobj, &cpu_common_attr_group);
+out_cpu:
+#ifdef CONFIG_HOTPLUG_CPU
+ unregister_cpu(c);
+#endif
+out:
+ return rc;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+int __ref smp_rescan_cpus(void)
+{
+ struct sclp_core_info *info;
+ int nr;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+ smp_get_core_info(info, 0);
+ get_online_cpus();
+ mutex_lock(&smp_cpu_state_mutex);
+ nr = __smp_rescan_cpus(info, false);
+ mutex_unlock(&smp_cpu_state_mutex);
+ put_online_cpus();
+ kfree(info);
+ if (nr)
+ topology_schedule_update();
+ return 0;
+}
+
+static ssize_t __ref rescan_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ int rc;
+
+ rc = lock_device_hotplug_sysfs();
+ if (rc)
+ return rc;
+ rc = smp_rescan_cpus();
+ unlock_device_hotplug();
+ return rc ? rc : count;
+}
+static DEVICE_ATTR_WO(rescan);
+#endif /* CONFIG_HOTPLUG_CPU */
+
+static int __init s390_smp_init(void)
+{
+ int cpu, rc = 0;
+
+#ifdef CONFIG_HOTPLUG_CPU
+ rc = device_create_file(cpu_subsys.dev_root, &dev_attr_rescan);
+ if (rc)
+ return rc;
+#endif
+ for_each_present_cpu(cpu) {
+ rc = smp_add_present_cpu(cpu);
+ if (rc)
+ goto out;
+ }
+
+ rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "s390/smp:online",
+ smp_cpu_online, smp_cpu_pre_down);
+ rc = rc <= 0 ? rc : 0;
+out:
+ return rc;
+}
+subsys_initcall(s390_smp_init);
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
new file mode 100644
index 000000000..460dcfba7
--- /dev/null
+++ b/arch/s390/kernel/stacktrace.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Stack trace management functions
+ *
+ * Copyright IBM Corp. 2006
+ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#include <linux/sched.h>
+#include <linux/sched/debug.h>
+#include <linux/stacktrace.h>
+#include <linux/kallsyms.h>
+#include <linux/export.h>
+
+static int __save_address(void *data, unsigned long address, int nosched)
+{
+ struct stack_trace *trace = data;
+
+ if (nosched && in_sched_functions(address))
+ return 0;
+ if (trace->skip > 0) {
+ trace->skip--;
+ return 0;
+ }
+ if (trace->nr_entries < trace->max_entries) {
+ trace->entries[trace->nr_entries++] = address;
+ return 0;
+ }
+ return 1;
+}
+
+static int save_address(void *data, unsigned long address, int reliable)
+{
+ return __save_address(data, address, 0);
+}
+
+static int save_address_nosched(void *data, unsigned long address, int reliable)
+{
+ return __save_address(data, address, 1);
+}
+
+void save_stack_trace(struct stack_trace *trace)
+{
+ unsigned long sp;
+
+ sp = current_stack_pointer();
+ dump_trace(save_address, trace, NULL, sp);
+ if (trace->nr_entries < trace->max_entries)
+ trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
+EXPORT_SYMBOL_GPL(save_stack_trace);
+
+void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
+{
+ unsigned long sp;
+
+ sp = tsk->thread.ksp;
+ if (tsk == current)
+ sp = current_stack_pointer();
+ dump_trace(save_address_nosched, trace, tsk, sp);
+ if (trace->nr_entries < trace->max_entries)
+ trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
+EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
+
+void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
+{
+ unsigned long sp;
+
+ sp = kernel_stack_pointer(regs);
+ dump_trace(save_address, trace, NULL, sp);
+ if (trace->nr_entries < trace->max_entries)
+ trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
+EXPORT_SYMBOL_GPL(save_stack_trace_regs);
diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c
new file mode 100644
index 000000000..888cc2f16
--- /dev/null
+++ b/arch/s390/kernel/sthyi.c
@@ -0,0 +1,516 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * store hypervisor information instruction emulation functions.
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Janosch Frank <frankja@linux.vnet.ibm.com>
+ */
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+#include <linux/vmalloc.h>
+#include <linux/syscalls.h>
+#include <linux/mutex.h>
+#include <asm/asm-offsets.h>
+#include <asm/sclp.h>
+#include <asm/diag.h>
+#include <asm/sysinfo.h>
+#include <asm/ebcdic.h>
+#include <asm/facility.h>
+#include <asm/sthyi.h>
+#include "entry.h"
+
+#define DED_WEIGHT 0xffff
+/*
+ * CP and IFL as EBCDIC strings, SP/0x40 determines the end of string
+ * as they are justified with spaces.
+ */
+#define CP 0xc3d7404040404040UL
+#define IFL 0xc9c6d34040404040UL
+
+enum hdr_flags {
+ HDR_NOT_LPAR = 0x10,
+ HDR_STACK_INCM = 0x20,
+ HDR_STSI_UNAV = 0x40,
+ HDR_PERF_UNAV = 0x80,
+};
+
+enum mac_validity {
+ MAC_NAME_VLD = 0x20,
+ MAC_ID_VLD = 0x40,
+ MAC_CNT_VLD = 0x80,
+};
+
+enum par_flag {
+ PAR_MT_EN = 0x80,
+};
+
+enum par_validity {
+ PAR_GRP_VLD = 0x08,
+ PAR_ID_VLD = 0x10,
+ PAR_ABS_VLD = 0x20,
+ PAR_WGHT_VLD = 0x40,
+ PAR_PCNT_VLD = 0x80,
+};
+
+struct hdr_sctn {
+ u8 infhflg1;
+ u8 infhflg2; /* reserved */
+ u8 infhval1; /* reserved */
+ u8 infhval2; /* reserved */
+ u8 reserved[3];
+ u8 infhygct;
+ u16 infhtotl;
+ u16 infhdln;
+ u16 infmoff;
+ u16 infmlen;
+ u16 infpoff;
+ u16 infplen;
+ u16 infhoff1;
+ u16 infhlen1;
+ u16 infgoff1;
+ u16 infglen1;
+ u16 infhoff2;
+ u16 infhlen2;
+ u16 infgoff2;
+ u16 infglen2;
+ u16 infhoff3;
+ u16 infhlen3;
+ u16 infgoff3;
+ u16 infglen3;
+ u8 reserved2[4];
+} __packed;
+
+struct mac_sctn {
+ u8 infmflg1; /* reserved */
+ u8 infmflg2; /* reserved */
+ u8 infmval1;
+ u8 infmval2; /* reserved */
+ u16 infmscps;
+ u16 infmdcps;
+ u16 infmsifl;
+ u16 infmdifl;
+ char infmname[8];
+ char infmtype[4];
+ char infmmanu[16];
+ char infmseq[16];
+ char infmpman[4];
+ u8 reserved[4];
+} __packed;
+
+struct par_sctn {
+ u8 infpflg1;
+ u8 infpflg2; /* reserved */
+ u8 infpval1;
+ u8 infpval2; /* reserved */
+ u16 infppnum;
+ u16 infpscps;
+ u16 infpdcps;
+ u16 infpsifl;
+ u16 infpdifl;
+ u16 reserved;
+ char infppnam[8];
+ u32 infpwbcp;
+ u32 infpabcp;
+ u32 infpwbif;
+ u32 infpabif;
+ char infplgnm[8];
+ u32 infplgcp;
+ u32 infplgif;
+} __packed;
+
+struct sthyi_sctns {
+ struct hdr_sctn hdr;
+ struct mac_sctn mac;
+ struct par_sctn par;
+} __packed;
+
+struct cpu_inf {
+ u64 lpar_cap;
+ u64 lpar_grp_cap;
+ u64 lpar_weight;
+ u64 all_weight;
+ int cpu_num_ded;
+ int cpu_num_shd;
+};
+
+struct lpar_cpu_inf {
+ struct cpu_inf cp;
+ struct cpu_inf ifl;
+};
+
+/*
+ * STHYI requires extensive locking in the higher hypervisors
+ * and is very computational/memory expensive. Therefore we
+ * cache the retrieved data whose valid period is 1s.
+ */
+#define CACHE_VALID_JIFFIES HZ
+
+struct sthyi_info {
+ void *info;
+ unsigned long end;
+};
+
+static DEFINE_MUTEX(sthyi_mutex);
+static struct sthyi_info sthyi_cache;
+
+static inline u64 cpu_id(u8 ctidx, void *diag224_buf)
+{
+ return *((u64 *)(diag224_buf + (ctidx + 1) * DIAG204_CPU_NAME_LEN));
+}
+
+/*
+ * Scales the cpu capping from the lpar range to the one expected in
+ * sthyi data.
+ *
+ * diag204 reports a cap in hundredths of processor units.
+ * z/VM's range for one core is 0 - 0x10000.
+ */
+static u32 scale_cap(u32 in)
+{
+ return (0x10000 * in) / 100;
+}
+
+static void fill_hdr(struct sthyi_sctns *sctns)
+{
+ sctns->hdr.infhdln = sizeof(sctns->hdr);
+ sctns->hdr.infmoff = sizeof(sctns->hdr);
+ sctns->hdr.infmlen = sizeof(sctns->mac);
+ sctns->hdr.infplen = sizeof(sctns->par);
+ sctns->hdr.infpoff = sctns->hdr.infhdln + sctns->hdr.infmlen;
+ sctns->hdr.infhtotl = sctns->hdr.infpoff + sctns->hdr.infplen;
+}
+
+static void fill_stsi_mac(struct sthyi_sctns *sctns,
+ struct sysinfo_1_1_1 *sysinfo)
+{
+ sclp_ocf_cpc_name_copy(sctns->mac.infmname);
+ if (*(u64 *)sctns->mac.infmname != 0)
+ sctns->mac.infmval1 |= MAC_NAME_VLD;
+
+ if (stsi(sysinfo, 1, 1, 1))
+ return;
+
+ memcpy(sctns->mac.infmtype, sysinfo->type, sizeof(sctns->mac.infmtype));
+ memcpy(sctns->mac.infmmanu, sysinfo->manufacturer, sizeof(sctns->mac.infmmanu));
+ memcpy(sctns->mac.infmpman, sysinfo->plant, sizeof(sctns->mac.infmpman));
+ memcpy(sctns->mac.infmseq, sysinfo->sequence, sizeof(sctns->mac.infmseq));
+
+ sctns->mac.infmval1 |= MAC_ID_VLD;
+}
+
+static void fill_stsi_par(struct sthyi_sctns *sctns,
+ struct sysinfo_2_2_2 *sysinfo)
+{
+ if (stsi(sysinfo, 2, 2, 2))
+ return;
+
+ sctns->par.infppnum = sysinfo->lpar_number;
+ memcpy(sctns->par.infppnam, sysinfo->name, sizeof(sctns->par.infppnam));
+
+ sctns->par.infpval1 |= PAR_ID_VLD;
+}
+
+static void fill_stsi(struct sthyi_sctns *sctns)
+{
+ void *sysinfo;
+
+ /* Errors are handled through the validity bits in the response. */
+ sysinfo = (void *)__get_free_page(GFP_KERNEL);
+ if (!sysinfo)
+ return;
+
+ fill_stsi_mac(sctns, sysinfo);
+ fill_stsi_par(sctns, sysinfo);
+
+ free_pages((unsigned long)sysinfo, 0);
+}
+
+static void fill_diag_mac(struct sthyi_sctns *sctns,
+ struct diag204_x_phys_block *block,
+ void *diag224_buf)
+{
+ int i;
+
+ for (i = 0; i < block->hdr.cpus; i++) {
+ switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) {
+ case CP:
+ if (block->cpus[i].weight == DED_WEIGHT)
+ sctns->mac.infmdcps++;
+ else
+ sctns->mac.infmscps++;
+ break;
+ case IFL:
+ if (block->cpus[i].weight == DED_WEIGHT)
+ sctns->mac.infmdifl++;
+ else
+ sctns->mac.infmsifl++;
+ break;
+ }
+ }
+ sctns->mac.infmval1 |= MAC_CNT_VLD;
+}
+
+/* Returns a pointer to the the next partition block. */
+static struct diag204_x_part_block *lpar_cpu_inf(struct lpar_cpu_inf *part_inf,
+ bool this_lpar,
+ void *diag224_buf,
+ struct diag204_x_part_block *block)
+{
+ int i, capped = 0, weight_cp = 0, weight_ifl = 0;
+ struct cpu_inf *cpu_inf;
+
+ for (i = 0; i < block->hdr.rcpus; i++) {
+ if (!(block->cpus[i].cflag & DIAG204_CPU_ONLINE))
+ continue;
+
+ switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) {
+ case CP:
+ cpu_inf = &part_inf->cp;
+ if (block->cpus[i].cur_weight < DED_WEIGHT)
+ weight_cp |= block->cpus[i].cur_weight;
+ break;
+ case IFL:
+ cpu_inf = &part_inf->ifl;
+ if (block->cpus[i].cur_weight < DED_WEIGHT)
+ weight_ifl |= block->cpus[i].cur_weight;
+ break;
+ default:
+ continue;
+ }
+
+ if (!this_lpar)
+ continue;
+
+ capped |= block->cpus[i].cflag & DIAG204_CPU_CAPPED;
+ cpu_inf->lpar_cap |= block->cpus[i].cpu_type_cap;
+ cpu_inf->lpar_grp_cap |= block->cpus[i].group_cpu_type_cap;
+
+ if (block->cpus[i].weight == DED_WEIGHT)
+ cpu_inf->cpu_num_ded += 1;
+ else
+ cpu_inf->cpu_num_shd += 1;
+ }
+
+ if (this_lpar && capped) {
+ part_inf->cp.lpar_weight = weight_cp;
+ part_inf->ifl.lpar_weight = weight_ifl;
+ }
+ part_inf->cp.all_weight += weight_cp;
+ part_inf->ifl.all_weight += weight_ifl;
+ return (struct diag204_x_part_block *)&block->cpus[i];
+}
+
+static void fill_diag(struct sthyi_sctns *sctns)
+{
+ int i, r, pages;
+ bool this_lpar;
+ void *diag204_buf;
+ void *diag224_buf = NULL;
+ struct diag204_x_info_blk_hdr *ti_hdr;
+ struct diag204_x_part_block *part_block;
+ struct diag204_x_phys_block *phys_block;
+ struct lpar_cpu_inf lpar_inf = {};
+
+ /* Errors are handled through the validity bits in the response. */
+ pages = diag204((unsigned long)DIAG204_SUBC_RSI |
+ (unsigned long)DIAG204_INFO_EXT, 0, NULL);
+ if (pages <= 0)
+ return;
+
+ diag204_buf = vmalloc(array_size(pages, PAGE_SIZE));
+ if (!diag204_buf)
+ return;
+
+ r = diag204((unsigned long)DIAG204_SUBC_STIB7 |
+ (unsigned long)DIAG204_INFO_EXT, pages, diag204_buf);
+ if (r < 0)
+ goto out;
+
+ diag224_buf = (void *)__get_free_page(GFP_KERNEL | GFP_DMA);
+ if (!diag224_buf || diag224(diag224_buf))
+ goto out;
+
+ ti_hdr = diag204_buf;
+ part_block = diag204_buf + sizeof(*ti_hdr);
+
+ for (i = 0; i < ti_hdr->npar; i++) {
+ /*
+ * For the calling lpar we also need to get the cpu
+ * caps and weights. The time information block header
+ * specifies the offset to the partition block of the
+ * caller lpar, so we know when we process its data.
+ */
+ this_lpar = (void *)part_block - diag204_buf == ti_hdr->this_part;
+ part_block = lpar_cpu_inf(&lpar_inf, this_lpar, diag224_buf,
+ part_block);
+ }
+
+ phys_block = (struct diag204_x_phys_block *)part_block;
+ part_block = diag204_buf + ti_hdr->this_part;
+ if (part_block->hdr.mtid)
+ sctns->par.infpflg1 = PAR_MT_EN;
+
+ sctns->par.infpval1 |= PAR_GRP_VLD;
+ sctns->par.infplgcp = scale_cap(lpar_inf.cp.lpar_grp_cap);
+ sctns->par.infplgif = scale_cap(lpar_inf.ifl.lpar_grp_cap);
+ memcpy(sctns->par.infplgnm, part_block->hdr.hardware_group_name,
+ sizeof(sctns->par.infplgnm));
+
+ sctns->par.infpscps = lpar_inf.cp.cpu_num_shd;
+ sctns->par.infpdcps = lpar_inf.cp.cpu_num_ded;
+ sctns->par.infpsifl = lpar_inf.ifl.cpu_num_shd;
+ sctns->par.infpdifl = lpar_inf.ifl.cpu_num_ded;
+ sctns->par.infpval1 |= PAR_PCNT_VLD;
+
+ sctns->par.infpabcp = scale_cap(lpar_inf.cp.lpar_cap);
+ sctns->par.infpabif = scale_cap(lpar_inf.ifl.lpar_cap);
+ sctns->par.infpval1 |= PAR_ABS_VLD;
+
+ /*
+ * Everything below needs global performance data to be
+ * meaningful.
+ */
+ if (!(ti_hdr->flags & DIAG204_LPAR_PHYS_FLG)) {
+ sctns->hdr.infhflg1 |= HDR_PERF_UNAV;
+ goto out;
+ }
+
+ fill_diag_mac(sctns, phys_block, diag224_buf);
+
+ if (lpar_inf.cp.lpar_weight) {
+ sctns->par.infpwbcp = sctns->mac.infmscps * 0x10000 *
+ lpar_inf.cp.lpar_weight / lpar_inf.cp.all_weight;
+ }
+
+ if (lpar_inf.ifl.lpar_weight) {
+ sctns->par.infpwbif = sctns->mac.infmsifl * 0x10000 *
+ lpar_inf.ifl.lpar_weight / lpar_inf.ifl.all_weight;
+ }
+ sctns->par.infpval1 |= PAR_WGHT_VLD;
+
+out:
+ free_page((unsigned long)diag224_buf);
+ vfree(diag204_buf);
+}
+
+static int sthyi(u64 vaddr, u64 *rc)
+{
+ register u64 code asm("0") = 0;
+ register u64 addr asm("2") = vaddr;
+ register u64 rcode asm("3");
+ int cc;
+
+ asm volatile(
+ ".insn rre,0xB2560000,%[code],%[addr]\n"
+ "ipm %[cc]\n"
+ "srl %[cc],28\n"
+ : [cc] "=d" (cc), "=d" (rcode)
+ : [code] "d" (code), [addr] "a" (addr)
+ : "memory", "cc");
+ *rc = rcode;
+ return cc;
+}
+
+static int fill_dst(void *dst, u64 *rc)
+{
+ struct sthyi_sctns *sctns = (struct sthyi_sctns *)dst;
+
+ /*
+ * If the facility is on, we don't want to emulate the instruction.
+ * We ask the hypervisor to provide the data.
+ */
+ if (test_facility(74))
+ return sthyi((u64)dst, rc);
+
+ fill_hdr(sctns);
+ fill_stsi(sctns);
+ fill_diag(sctns);
+ *rc = 0;
+ return 0;
+}
+
+static int sthyi_init_cache(void)
+{
+ if (sthyi_cache.info)
+ return 0;
+ sthyi_cache.info = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!sthyi_cache.info)
+ return -ENOMEM;
+ sthyi_cache.end = jiffies - 1; /* expired */
+ return 0;
+}
+
+static int sthyi_update_cache(u64 *rc)
+{
+ int r;
+
+ memset(sthyi_cache.info, 0, PAGE_SIZE);
+ r = fill_dst(sthyi_cache.info, rc);
+ if (r)
+ return r;
+ sthyi_cache.end = jiffies + CACHE_VALID_JIFFIES;
+ return r;
+}
+
+/*
+ * sthyi_fill - Fill page with data returned by the STHYI instruction
+ *
+ * @dst: Pointer to zeroed page
+ * @rc: Pointer for storing the return code of the instruction
+ *
+ * Fills the destination with system information returned by the STHYI
+ * instruction. The data is generated by emulation or execution of STHYI,
+ * if available. The return value is the condition code that would be
+ * returned, the rc parameter is the return code which is passed in
+ * register R2 + 1.
+ */
+int sthyi_fill(void *dst, u64 *rc)
+{
+ int r;
+
+ mutex_lock(&sthyi_mutex);
+ r = sthyi_init_cache();
+ if (r)
+ goto out;
+
+ if (time_is_before_jiffies(sthyi_cache.end)) {
+ /* cache expired */
+ r = sthyi_update_cache(rc);
+ if (r)
+ goto out;
+ }
+ *rc = 0;
+ memcpy(dst, sthyi_cache.info, PAGE_SIZE);
+out:
+ mutex_unlock(&sthyi_mutex);
+ return r;
+}
+EXPORT_SYMBOL_GPL(sthyi_fill);
+
+SYSCALL_DEFINE4(s390_sthyi, unsigned long, function_code, void __user *, buffer,
+ u64 __user *, return_code, unsigned long, flags)
+{
+ u64 sthyi_rc;
+ void *info;
+ int r;
+
+ if (flags)
+ return -EINVAL;
+ if (function_code != STHYI_FC_CP_IFL_CAP)
+ return -EOPNOTSUPP;
+ info = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+ r = sthyi_fill(info, &sthyi_rc);
+ if (r < 0)
+ goto out;
+ if (return_code && put_user(sthyi_rc, return_code)) {
+ r = -EFAULT;
+ goto out;
+ }
+ if (copy_to_user(buffer, info, PAGE_SIZE))
+ r = -EFAULT;
+out:
+ free_page((unsigned long)info);
+ return r;
+}
diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c
new file mode 100644
index 000000000..75b7b3079
--- /dev/null
+++ b/arch/s390/kernel/suspend.c
@@ -0,0 +1,240 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Suspend support specific for s390.
+ *
+ * Copyright IBM Corp. 2009
+ *
+ * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com>
+ */
+
+#include <linux/pfn.h>
+#include <linux/suspend.h>
+#include <linux/mm.h>
+#include <linux/pci.h>
+#include <asm/ctl_reg.h>
+#include <asm/ipl.h>
+#include <asm/cio.h>
+#include <asm/sections.h>
+#include "entry.h"
+
+/*
+ * The restore of the saved pages in an hibernation image will set
+ * the change and referenced bits in the storage key for each page.
+ * Overindication of the referenced bits after an hibernation cycle
+ * does not cause any harm but the overindication of the change bits
+ * would cause trouble.
+ * Use the ARCH_SAVE_PAGE_KEYS hooks to save the storage key of each
+ * page to the most significant byte of the associated page frame
+ * number in the hibernation image.
+ */
+
+/*
+ * Key storage is allocated as a linked list of pages.
+ * The size of the keys array is (PAGE_SIZE - sizeof(long))
+ */
+struct page_key_data {
+ struct page_key_data *next;
+ unsigned char data[];
+};
+
+#define PAGE_KEY_DATA_SIZE (PAGE_SIZE - sizeof(struct page_key_data *))
+
+static struct page_key_data *page_key_data;
+static struct page_key_data *page_key_rp, *page_key_wp;
+static unsigned long page_key_rx, page_key_wx;
+unsigned long suspend_zero_pages;
+
+/*
+ * For each page in the hibernation image one additional byte is
+ * stored in the most significant byte of the page frame number.
+ * On suspend no additional memory is required but on resume the
+ * keys need to be memorized until the page data has been restored.
+ * Only then can the storage keys be set to their old state.
+ */
+unsigned long page_key_additional_pages(unsigned long pages)
+{
+ return DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE);
+}
+
+/*
+ * Free page_key_data list of arrays.
+ */
+void page_key_free(void)
+{
+ struct page_key_data *pkd;
+
+ while (page_key_data) {
+ pkd = page_key_data;
+ page_key_data = pkd->next;
+ free_page((unsigned long) pkd);
+ }
+}
+
+/*
+ * Allocate page_key_data list of arrays with enough room to store
+ * one byte for each page in the hibernation image.
+ */
+int page_key_alloc(unsigned long pages)
+{
+ struct page_key_data *pk;
+ unsigned long size;
+
+ size = DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE);
+ while (size--) {
+ pk = (struct page_key_data *) get_zeroed_page(GFP_KERNEL);
+ if (!pk) {
+ page_key_free();
+ return -ENOMEM;
+ }
+ pk->next = page_key_data;
+ page_key_data = pk;
+ }
+ page_key_rp = page_key_wp = page_key_data;
+ page_key_rx = page_key_wx = 0;
+ return 0;
+}
+
+/*
+ * Save the storage key into the upper 8 bits of the page frame number.
+ */
+void page_key_read(unsigned long *pfn)
+{
+ struct page *page;
+ unsigned long addr;
+ unsigned char key;
+
+ page = pfn_to_page(*pfn);
+ addr = (unsigned long) page_address(page);
+ key = (unsigned char) page_get_storage_key(addr) & 0x7f;
+ if (arch_test_page_nodat(page))
+ key |= 0x80;
+ *(unsigned char *) pfn = key;
+}
+
+/*
+ * Extract the storage key from the upper 8 bits of the page frame number
+ * and store it in the page_key_data list of arrays.
+ */
+void page_key_memorize(unsigned long *pfn)
+{
+ page_key_wp->data[page_key_wx] = *(unsigned char *) pfn;
+ *(unsigned char *) pfn = 0;
+ if (++page_key_wx < PAGE_KEY_DATA_SIZE)
+ return;
+ page_key_wp = page_key_wp->next;
+ page_key_wx = 0;
+}
+
+/*
+ * Get the next key from the page_key_data list of arrays and set the
+ * storage key of the page referred by @address. If @address refers to
+ * a "safe" page the swsusp_arch_resume code will transfer the storage
+ * key from the buffer page to the original page.
+ */
+void page_key_write(void *address)
+{
+ struct page *page;
+ unsigned char key;
+
+ key = page_key_rp->data[page_key_rx];
+ page_set_storage_key((unsigned long) address, key & 0x7f, 0);
+ page = virt_to_page(address);
+ if (key & 0x80)
+ arch_set_page_nodat(page, 0);
+ else
+ arch_set_page_dat(page, 0);
+ if (++page_key_rx >= PAGE_KEY_DATA_SIZE)
+ return;
+ page_key_rp = page_key_rp->next;
+ page_key_rx = 0;
+}
+
+int pfn_is_nosave(unsigned long pfn)
+{
+ unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin));
+ unsigned long nosave_end_pfn = PFN_DOWN(__pa(&__nosave_end));
+ unsigned long end_rodata_pfn = PFN_DOWN(__pa(__end_rodata)) - 1;
+ unsigned long stext_pfn = PFN_DOWN(__pa(_stext));
+
+ /* Always save lowcore pages (LC protection might be enabled). */
+ if (pfn <= LC_PAGES)
+ return 0;
+ if (pfn >= nosave_begin_pfn && pfn < nosave_end_pfn)
+ return 1;
+ /* Skip memory holes and read-only pages (DCSS, ...). */
+ if (pfn >= stext_pfn && pfn <= end_rodata_pfn)
+ return 0;
+ if (tprot(PFN_PHYS(pfn)))
+ return 1;
+ return 0;
+}
+
+/*
+ * PM notifier callback for suspend
+ */
+static int suspend_pm_cb(struct notifier_block *nb, unsigned long action,
+ void *ptr)
+{
+ switch (action) {
+ case PM_SUSPEND_PREPARE:
+ case PM_HIBERNATION_PREPARE:
+ suspend_zero_pages = __get_free_pages(GFP_KERNEL, LC_ORDER);
+ if (!suspend_zero_pages)
+ return NOTIFY_BAD;
+ break;
+ case PM_POST_SUSPEND:
+ case PM_POST_HIBERNATION:
+ free_pages(suspend_zero_pages, LC_ORDER);
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+ return NOTIFY_OK;
+}
+
+static int __init suspend_pm_init(void)
+{
+ pm_notifier(suspend_pm_cb, 0);
+ return 0;
+}
+arch_initcall(suspend_pm_init);
+
+void save_processor_state(void)
+{
+ /* swsusp_arch_suspend() actually saves all cpu register contents.
+ * Machine checks must be disabled since swsusp_arch_suspend() stores
+ * register contents to their lowcore save areas. That's the same
+ * place where register contents on machine checks would be saved.
+ * To avoid register corruption disable machine checks.
+ * We must also disable machine checks in the new psw mask for
+ * program checks, since swsusp_arch_suspend() may generate program
+ * checks. Disabling machine checks for all other new psw masks is
+ * just paranoia.
+ */
+ local_mcck_disable();
+ /* Disable lowcore protection */
+ __ctl_clear_bit(0,28);
+ S390_lowcore.external_new_psw.mask &= ~PSW_MASK_MCHECK;
+ S390_lowcore.svc_new_psw.mask &= ~PSW_MASK_MCHECK;
+ S390_lowcore.io_new_psw.mask &= ~PSW_MASK_MCHECK;
+ S390_lowcore.program_new_psw.mask &= ~PSW_MASK_MCHECK;
+}
+
+void restore_processor_state(void)
+{
+ S390_lowcore.external_new_psw.mask |= PSW_MASK_MCHECK;
+ S390_lowcore.svc_new_psw.mask |= PSW_MASK_MCHECK;
+ S390_lowcore.io_new_psw.mask |= PSW_MASK_MCHECK;
+ S390_lowcore.program_new_psw.mask |= PSW_MASK_MCHECK;
+ /* Enable lowcore protection */
+ __ctl_set_bit(0,28);
+ local_mcck_enable();
+}
+
+/* Called at the end of swsusp_arch_resume */
+void s390_early_resume(void)
+{
+ lgr_info_log();
+ channel_subsystem_reinit();
+ zpci_rescan();
+}
diff --git a/arch/s390/kernel/swsusp.S b/arch/s390/kernel/swsusp.S
new file mode 100644
index 000000000..c1a080b11
--- /dev/null
+++ b/arch/s390/kernel/swsusp.S
@@ -0,0 +1,286 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * S390 64-bit swsusp implementation
+ *
+ * Copyright IBM Corp. 2009
+ *
+ * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com>
+ * Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#include <linux/linkage.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/thread_info.h>
+#include <asm/asm-offsets.h>
+#include <asm/nospec-insn.h>
+#include <asm/sigp.h>
+
+/*
+ * Save register context in absolute 0 lowcore and call swsusp_save() to
+ * create in-memory kernel image. The context is saved in the designated
+ * "store status" memory locations (see POP).
+ * We return from this function twice. The first time during the suspend to
+ * disk process. The second time via the swsusp_arch_resume() function
+ * (see below) in the resume process.
+ * This function runs with disabled interrupts.
+ */
+ GEN_BR_THUNK %r14
+
+ .section .text
+ENTRY(swsusp_arch_suspend)
+ stmg %r6,%r15,__SF_GPRS(%r15)
+ lgr %r1,%r15
+ aghi %r15,-STACK_FRAME_OVERHEAD
+ stg %r1,__SF_BACKCHAIN(%r15)
+
+ /* Store FPU registers */
+ brasl %r14,save_fpu_regs
+
+ /* Deactivate DAT */
+ stnsm __SF_EMPTY(%r15),0xfb
+
+ /* Store prefix register on stack */
+ stpx __SF_EMPTY(%r15)
+
+ /* Save prefix register contents for lowcore copy */
+ llgf %r10,__SF_EMPTY(%r15)
+
+ /* Get pointer to save area */
+ lghi %r1,0x1000
+
+ /* Save CPU address */
+ stap __LC_EXT_CPU_ADDR(%r0)
+
+ /* Store registers */
+ mvc 0x318(4,%r1),__SF_EMPTY(%r15) /* move prefix to lowcore */
+ stam %a0,%a15,0x340(%r1) /* store access registers */
+ stctg %c0,%c15,0x380(%r1) /* store control registers */
+ stmg %r0,%r15,0x280(%r1) /* store general registers */
+
+ stpt 0x328(%r1) /* store timer */
+ stck __SF_EMPTY(%r15) /* store clock */
+ stckc 0x330(%r1) /* store clock comparator */
+
+ /* Update cputime accounting before going to sleep */
+ lg %r0,__LC_LAST_UPDATE_TIMER
+ slg %r0,0x328(%r1)
+ alg %r0,__LC_SYSTEM_TIMER
+ stg %r0,__LC_SYSTEM_TIMER
+ mvc __LC_LAST_UPDATE_TIMER(8),0x328(%r1)
+ lg %r0,__LC_LAST_UPDATE_CLOCK
+ slg %r0,__SF_EMPTY(%r15)
+ alg %r0,__LC_STEAL_TIMER
+ stg %r0,__LC_STEAL_TIMER
+ mvc __LC_LAST_UPDATE_CLOCK(8),__SF_EMPTY(%r15)
+
+ /* Activate DAT */
+ stosm __SF_EMPTY(%r15),0x04
+
+ /* Set prefix page to zero */
+ xc __SF_EMPTY(4,%r15),__SF_EMPTY(%r15)
+ spx __SF_EMPTY(%r15)
+
+ /* Save absolute zero pages */
+ larl %r2,suspend_zero_pages
+ lg %r2,0(%r2)
+ lghi %r4,0
+ lghi %r3,2*PAGE_SIZE
+ lghi %r5,2*PAGE_SIZE
+1: mvcle %r2,%r4,0
+ jo 1b
+
+ /* Copy lowcore to absolute zero lowcore */
+ lghi %r2,0
+ lgr %r4,%r10
+ lghi %r3,2*PAGE_SIZE
+ lghi %r5,2*PAGE_SIZE
+1: mvcle %r2,%r4,0
+ jo 1b
+
+ /* Save image */
+ brasl %r14,swsusp_save
+
+ /* Restore prefix register and return */
+ lghi %r1,0x1000
+ spx 0x318(%r1)
+ lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
+ lghi %r2,0
+ BR_EX %r14
+
+/*
+ * Restore saved memory image to correct place and restore register context.
+ * Then we return to the function that called swsusp_arch_suspend().
+ * swsusp_arch_resume() runs with disabled interrupts.
+ */
+ENTRY(swsusp_arch_resume)
+ stmg %r6,%r15,__SF_GPRS(%r15)
+ lgr %r1,%r15
+ aghi %r15,-STACK_FRAME_OVERHEAD
+ stg %r1,__SF_BACKCHAIN(%r15)
+
+ /* Make all free pages stable */
+ lghi %r2,1
+ brasl %r14,arch_set_page_states
+
+ /* Deactivate DAT */
+ stnsm __SF_EMPTY(%r15),0xfb
+
+ /* Set prefix page to zero */
+ xc __SF_EMPTY(4,%r15),__SF_EMPTY(%r15)
+ spx __SF_EMPTY(%r15)
+
+ /* Restore saved image */
+ larl %r1,restore_pblist
+ lg %r1,0(%r1)
+ ltgr %r1,%r1
+ jz 2f
+0:
+ lg %r2,8(%r1)
+ lg %r4,0(%r1)
+ iske %r0,%r4
+ lghi %r3,PAGE_SIZE
+ lghi %r5,PAGE_SIZE
+1:
+ mvcle %r2,%r4,0
+ jo 1b
+ lg %r2,8(%r1)
+ sske %r0,%r2
+ lg %r1,16(%r1)
+ ltgr %r1,%r1
+ jnz 0b
+2:
+ ptlb /* flush tlb */
+
+ /* Reset System */
+ larl %r1,restart_entry
+ larl %r2,.Lrestart_diag308_psw
+ og %r1,0(%r2)
+ stg %r1,0(%r0)
+ larl %r1,.Lnew_pgm_check_psw
+ epsw %r2,%r3
+ stm %r2,%r3,0(%r1)
+ mvc __LC_PGM_NEW_PSW(16,%r0),0(%r1)
+ lghi %r0,0
+ diag %r0,%r0,0x308
+restart_entry:
+ lhi %r1,1
+ sigp %r1,%r0,SIGP_SET_ARCHITECTURE
+ sam64
+#ifdef CONFIG_SMP
+ larl %r1,smp_cpu_mt_shift
+ icm %r1,15,0(%r1)
+ jz smt_done
+ llgfr %r1,%r1
+smt_loop:
+ sigp %r1,%r0,SIGP_SET_MULTI_THREADING
+ brc 8,smt_done /* accepted */
+ brc 2,smt_loop /* busy, try again */
+smt_done:
+#endif
+ larl %r1,.Lnew_pgm_check_psw
+ lpswe 0(%r1)
+pgm_check_entry:
+
+ /* Switch to original suspend CPU */
+ larl %r1,.Lresume_cpu /* Resume CPU address: r2 */
+ stap 0(%r1)
+ llgh %r2,0(%r1)
+ llgh %r1,__LC_EXT_CPU_ADDR(%r0) /* Suspend CPU address: r1 */
+ cgr %r1,%r2
+ je restore_registers /* r1 = r2 -> nothing to do */
+ larl %r4,.Lrestart_suspend_psw /* Set new restart PSW */
+ mvc __LC_RST_NEW_PSW(16,%r0),0(%r4)
+3:
+ sigp %r9,%r1,SIGP_INITIAL_CPU_RESET /* sigp initial cpu reset */
+ brc 8,4f /* accepted */
+ brc 2,3b /* busy, try again */
+
+ /* Suspend CPU not available -> panic */
+ larl %r15,init_thread_union
+ aghi %r15,1<<(PAGE_SHIFT+THREAD_SIZE_ORDER)
+ aghi %r15,-STACK_FRAME_OVERHEAD
+ larl %r2,.Lpanic_string
+ brasl %r14,sclp_early_printk_force
+ larl %r3,.Ldisabled_wait_31
+ lpsw 0(%r3)
+4:
+ /* Switch to suspend CPU */
+ sigp %r9,%r1,SIGP_RESTART /* sigp restart to suspend CPU */
+ brc 2,4b /* busy, try again */
+5:
+ sigp %r9,%r2,SIGP_STOP /* sigp stop to current resume CPU */
+ brc 2,5b /* busy, try again */
+6: j 6b
+
+restart_suspend:
+ larl %r1,.Lresume_cpu
+ llgh %r2,0(%r1)
+7:
+ sigp %r9,%r2,SIGP_SENSE /* sigp sense, wait for resume CPU */
+ brc 8,7b /* accepted, status 0, still running */
+ brc 2,7b /* busy, try again */
+ tmll %r9,0x40 /* Test if resume CPU is stopped */
+ jz 7b
+
+restore_registers:
+ /* Restore registers */
+ lghi %r13,0x1000 /* %r1 = pointer to save area */
+
+ /* Ignore time spent in suspended state. */
+ llgf %r1,0x318(%r13)
+ stck __LC_LAST_UPDATE_CLOCK(%r1)
+ spt 0x328(%r13) /* reprogram timer */
+ //sckc 0x330(%r13) /* set clock comparator */
+
+ lctlg %c0,%c15,0x380(%r13) /* load control registers */
+ lam %a0,%a15,0x340(%r13) /* load access registers */
+
+ /* Load old stack */
+ lg %r15,0x2f8(%r13)
+
+ /* Save prefix register */
+ mvc __SF_EMPTY(4,%r15),0x318(%r13)
+
+ /* Restore absolute zero pages */
+ lghi %r2,0
+ larl %r4,suspend_zero_pages
+ lg %r4,0(%r4)
+ lghi %r3,2*PAGE_SIZE
+ lghi %r5,2*PAGE_SIZE
+1: mvcle %r2,%r4,0
+ jo 1b
+
+ /* Restore prefix register */
+ spx __SF_EMPTY(%r15)
+
+ /* Activate DAT */
+ stosm __SF_EMPTY(%r15),0x04
+
+ /* Make all free pages unstable */
+ lghi %r2,0
+ brasl %r14,arch_set_page_states
+
+ /* Call arch specific early resume code */
+ brasl %r14,s390_early_resume
+
+ /* Return 0 */
+ lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
+ lghi %r2,0
+ BR_EX %r14
+
+ .section .data..nosave,"aw",@progbits
+ .align 8
+.Ldisabled_wait_31:
+ .long 0x000a0000,0x00000000
+.Lpanic_string:
+ .asciz "Resume not possible because suspend CPU is no longer available\n"
+ .align 8
+.Lrestart_diag308_psw:
+ .long 0x00080000,0x80000000
+.Lrestart_suspend_psw:
+ .quad 0x0000000180000000,restart_suspend
+.Lnew_pgm_check_psw:
+ .quad 0,pgm_check_entry
+.Lresume_cpu:
+ .byte 0,0
diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c
new file mode 100644
index 000000000..31cefe0c2
--- /dev/null
+++ b/arch/s390/kernel/sys_s390.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999, 2000
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ * Thomas Spatzier (tspat@de.ibm.com)
+ *
+ * Derived from "arch/i386/kernel/sys_i386.c"
+ *
+ * This file contains various random system calls that
+ * have a non-standard calling sequence on the Linux/s390
+ * platform.
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/smp.h>
+#include <linux/sem.h>
+#include <linux/msg.h>
+#include <linux/shm.h>
+#include <linux/stat.h>
+#include <linux/syscalls.h>
+#include <linux/mman.h>
+#include <linux/file.h>
+#include <linux/utsname.h>
+#include <linux/personality.h>
+#include <linux/unistd.h>
+#include <linux/ipc.h>
+#include <linux/uaccess.h>
+#include "entry.h"
+
+/*
+ * Perform the mmap() system call. Linux for S/390 isn't able to handle more
+ * than 5 system call parameters, so this system call uses a memory block
+ * for parameter passing.
+ */
+
+struct s390_mmap_arg_struct {
+ unsigned long addr;
+ unsigned long len;
+ unsigned long prot;
+ unsigned long flags;
+ unsigned long fd;
+ unsigned long offset;
+};
+
+SYSCALL_DEFINE1(mmap2, struct s390_mmap_arg_struct __user *, arg)
+{
+ struct s390_mmap_arg_struct a;
+ int error = -EFAULT;
+
+ if (copy_from_user(&a, arg, sizeof(a)))
+ goto out;
+ error = ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset);
+out:
+ return error;
+}
+
+/*
+ * sys_ipc() is the de-multiplexer for the SysV IPC calls.
+ */
+SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, unsigned long, second,
+ unsigned long, third, void __user *, ptr)
+{
+ if (call >> 16)
+ return -EINVAL;
+ /* The s390 sys_ipc variant has only five parameters instead of six
+ * like the generic variant. The only difference is the handling of
+ * the SEMTIMEDOP subcall where on s390 the third parameter is used
+ * as a pointer to a struct timespec where the generic variant uses
+ * the fifth parameter.
+ * Therefore we can call the generic variant by simply passing the
+ * third parameter also as fifth parameter.
+ */
+ return sys_ipc(call, first, second, third, ptr, third);
+}
+
+SYSCALL_DEFINE1(s390_personality, unsigned int, personality)
+{
+ unsigned int ret;
+
+ if (personality(current->personality) == PER_LINUX32 &&
+ personality(personality) == PER_LINUX)
+ personality |= PER_LINUX32;
+ ret = sys_personality(personality);
+ if (personality(ret) == PER_LINUX32)
+ ret &= ~PER_LINUX32;
+
+ return ret;
+}
diff --git a/arch/s390/kernel/syscalls/Makefile b/arch/s390/kernel/syscalls/Makefile
new file mode 100644
index 000000000..4d929edc8
--- /dev/null
+++ b/arch/s390/kernel/syscalls/Makefile
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: GPL-2.0
+
+gen := arch/$(ARCH)/include/generated
+kapi := $(gen)/asm
+uapi := $(gen)/uapi/asm
+
+syscall := $(srctree)/$(src)/syscall.tbl
+systbl := $(srctree)/$(src)/syscalltbl
+
+gen-y := $(kapi)/syscall_table.h
+kapi-hdrs-y := $(kapi)/unistd_nr.h
+uapi-hdrs-y := $(uapi)/unistd_32.h
+uapi-hdrs-y += $(uapi)/unistd_64.h
+
+targets += $(addprefix ../../../,$(gen-y) $(kapi-hdrs-y) $(uapi-hdrs-y))
+
+PHONY += kapi uapi
+
+kapi: $(gen-y) $(kapi-hdrs-y)
+uapi: $(uapi-hdrs-y)
+
+
+# Create output directory if not already present
+_dummy := $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)') \
+ $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)')
+
+define filechk_syshdr
+ $(CONFIG_SHELL) '$(systbl)' -H -a $(syshdr_abi_$(basetarget)) -f "$2" < $<
+endef
+
+define filechk_sysnr
+ $(CONFIG_SHELL) '$(systbl)' -N -a $(sysnr_abi_$(basetarget)) < $<
+endef
+
+define filechk_syscalls
+ $(CONFIG_SHELL) '$(systbl)' -S < $<
+endef
+
+syshdr_abi_unistd_32 := common,32
+$(uapi)/unistd_32.h: $(syscall) FORCE
+ $(call filechk,syshdr,$@)
+
+syshdr_abi_unistd_64 := common,64
+$(uapi)/unistd_64.h: $(syscall) FORCE
+ $(call filechk,syshdr,$@)
+
+$(kapi)/syscall_table.h: $(syscall) FORCE
+ $(call filechk,syscalls)
+
+sysnr_abi_unistd_nr := common,32,64
+$(kapi)/unistd_nr.h: $(syscall) FORCE
+ $(call filechk,sysnr)
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
new file mode 100644
index 000000000..022fc099b
--- /dev/null
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -0,0 +1,393 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+#
+# System call table for s390
+#
+# Format:
+#
+# <nr> <abi> <syscall> <entry-64bit> <compat-entry>
+#
+# where <abi> can be common, 64, or 32
+
+1 common exit sys_exit sys_exit
+2 common fork sys_fork sys_fork
+3 common read sys_read compat_sys_s390_read
+4 common write sys_write compat_sys_s390_write
+5 common open sys_open compat_sys_open
+6 common close sys_close sys_close
+7 common restart_syscall sys_restart_syscall sys_restart_syscall
+8 common creat sys_creat compat_sys_creat
+9 common link sys_link compat_sys_link
+10 common unlink sys_unlink compat_sys_unlink
+11 common execve sys_execve compat_sys_execve
+12 common chdir sys_chdir compat_sys_chdir
+13 32 time - compat_sys_time
+14 common mknod sys_mknod compat_sys_mknod
+15 common chmod sys_chmod compat_sys_chmod
+16 32 lchown - compat_sys_s390_lchown16
+19 common lseek sys_lseek compat_sys_lseek
+20 common getpid sys_getpid sys_getpid
+21 common mount sys_mount compat_sys_mount
+22 common umount sys_oldumount compat_sys_oldumount
+23 32 setuid - compat_sys_s390_setuid16
+24 32 getuid - compat_sys_s390_getuid16
+25 32 stime - compat_sys_stime
+26 common ptrace sys_ptrace compat_sys_ptrace
+27 common alarm sys_alarm sys_alarm
+29 common pause sys_pause sys_pause
+30 common utime sys_utime compat_sys_utime
+33 common access sys_access compat_sys_access
+34 common nice sys_nice sys_nice
+36 common sync sys_sync sys_sync
+37 common kill sys_kill sys_kill
+38 common rename sys_rename compat_sys_rename
+39 common mkdir sys_mkdir compat_sys_mkdir
+40 common rmdir sys_rmdir compat_sys_rmdir
+41 common dup sys_dup sys_dup
+42 common pipe sys_pipe compat_sys_pipe
+43 common times sys_times compat_sys_times
+45 common brk sys_brk compat_sys_brk
+46 32 setgid - compat_sys_s390_setgid16
+47 32 getgid - compat_sys_s390_getgid16
+48 common signal sys_signal compat_sys_signal
+49 32 geteuid - compat_sys_s390_geteuid16
+50 32 getegid - compat_sys_s390_getegid16
+51 common acct sys_acct compat_sys_acct
+52 common umount2 sys_umount compat_sys_umount
+54 common ioctl sys_ioctl compat_sys_ioctl
+55 common fcntl sys_fcntl compat_sys_fcntl
+57 common setpgid sys_setpgid sys_setpgid
+60 common umask sys_umask sys_umask
+61 common chroot sys_chroot compat_sys_chroot
+62 common ustat sys_ustat compat_sys_ustat
+63 common dup2 sys_dup2 sys_dup2
+64 common getppid sys_getppid sys_getppid
+65 common getpgrp sys_getpgrp sys_getpgrp
+66 common setsid sys_setsid sys_setsid
+67 common sigaction sys_sigaction compat_sys_sigaction
+70 32 setreuid - compat_sys_s390_setreuid16
+71 32 setregid - compat_sys_s390_setregid16
+72 common sigsuspend sys_sigsuspend compat_sys_sigsuspend
+73 common sigpending sys_sigpending compat_sys_sigpending
+74 common sethostname sys_sethostname compat_sys_sethostname
+75 common setrlimit sys_setrlimit compat_sys_setrlimit
+76 32 getrlimit - compat_sys_old_getrlimit
+77 common getrusage sys_getrusage compat_sys_getrusage
+78 common gettimeofday sys_gettimeofday compat_sys_gettimeofday
+79 common settimeofday sys_settimeofday compat_sys_settimeofday
+80 32 getgroups - compat_sys_s390_getgroups16
+81 32 setgroups - compat_sys_s390_setgroups16
+83 common symlink sys_symlink compat_sys_symlink
+85 common readlink sys_readlink compat_sys_readlink
+86 common uselib sys_uselib compat_sys_uselib
+87 common swapon sys_swapon compat_sys_swapon
+88 common reboot sys_reboot compat_sys_reboot
+89 common readdir - compat_sys_old_readdir
+90 common mmap sys_old_mmap compat_sys_s390_old_mmap
+91 common munmap sys_munmap compat_sys_munmap
+92 common truncate sys_truncate compat_sys_truncate
+93 common ftruncate sys_ftruncate compat_sys_ftruncate
+94 common fchmod sys_fchmod sys_fchmod
+95 32 fchown - compat_sys_s390_fchown16
+96 common getpriority sys_getpriority sys_getpriority
+97 common setpriority sys_setpriority sys_setpriority
+99 common statfs sys_statfs compat_sys_statfs
+100 common fstatfs sys_fstatfs compat_sys_fstatfs
+101 32 ioperm - -
+102 common socketcall sys_socketcall compat_sys_socketcall
+103 common syslog sys_syslog compat_sys_syslog
+104 common setitimer sys_setitimer compat_sys_setitimer
+105 common getitimer sys_getitimer compat_sys_getitimer
+106 common stat sys_newstat compat_sys_newstat
+107 common lstat sys_newlstat compat_sys_newlstat
+108 common fstat sys_newfstat compat_sys_newfstat
+110 common lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie
+111 common vhangup sys_vhangup sys_vhangup
+112 common idle - -
+114 common wait4 sys_wait4 compat_sys_wait4
+115 common swapoff sys_swapoff compat_sys_swapoff
+116 common sysinfo sys_sysinfo compat_sys_sysinfo
+117 common ipc sys_s390_ipc compat_sys_s390_ipc
+118 common fsync sys_fsync sys_fsync
+119 common sigreturn sys_sigreturn compat_sys_sigreturn
+120 common clone sys_clone compat_sys_clone
+121 common setdomainname sys_setdomainname compat_sys_setdomainname
+122 common uname sys_newuname compat_sys_newuname
+124 common adjtimex sys_adjtimex compat_sys_adjtimex
+125 common mprotect sys_mprotect compat_sys_mprotect
+126 common sigprocmask sys_sigprocmask compat_sys_sigprocmask
+127 common create_module - -
+128 common init_module sys_init_module compat_sys_init_module
+129 common delete_module sys_delete_module compat_sys_delete_module
+130 common get_kernel_syms - -
+131 common quotactl sys_quotactl compat_sys_quotactl
+132 common getpgid sys_getpgid sys_getpgid
+133 common fchdir sys_fchdir sys_fchdir
+134 common bdflush sys_bdflush compat_sys_bdflush
+135 common sysfs sys_sysfs compat_sys_sysfs
+136 common personality sys_s390_personality sys_s390_personality
+137 common afs_syscall - -
+138 32 setfsuid - compat_sys_s390_setfsuid16
+139 32 setfsgid - compat_sys_s390_setfsgid16
+140 32 _llseek - compat_sys_llseek
+141 common getdents sys_getdents compat_sys_getdents
+142 32 _newselect - compat_sys_select
+142 64 select sys_select -
+143 common flock sys_flock sys_flock
+144 common msync sys_msync compat_sys_msync
+145 common readv sys_readv compat_sys_readv
+146 common writev sys_writev compat_sys_writev
+147 common getsid sys_getsid sys_getsid
+148 common fdatasync sys_fdatasync sys_fdatasync
+149 common _sysctl sys_sysctl compat_sys_sysctl
+150 common mlock sys_mlock compat_sys_mlock
+151 common munlock sys_munlock compat_sys_munlock
+152 common mlockall sys_mlockall sys_mlockall
+153 common munlockall sys_munlockall sys_munlockall
+154 common sched_setparam sys_sched_setparam compat_sys_sched_setparam
+155 common sched_getparam sys_sched_getparam compat_sys_sched_getparam
+156 common sched_setscheduler sys_sched_setscheduler compat_sys_sched_setscheduler
+157 common sched_getscheduler sys_sched_getscheduler sys_sched_getscheduler
+158 common sched_yield sys_sched_yield sys_sched_yield
+159 common sched_get_priority_max sys_sched_get_priority_max sys_sched_get_priority_max
+160 common sched_get_priority_min sys_sched_get_priority_min sys_sched_get_priority_min
+161 common sched_rr_get_interval sys_sched_rr_get_interval compat_sys_sched_rr_get_interval
+162 common nanosleep sys_nanosleep compat_sys_nanosleep
+163 common mremap sys_mremap compat_sys_mremap
+164 32 setresuid - compat_sys_s390_setresuid16
+165 32 getresuid - compat_sys_s390_getresuid16
+167 common query_module - -
+168 common poll sys_poll compat_sys_poll
+169 common nfsservctl - -
+170 32 setresgid - compat_sys_s390_setresgid16
+171 32 getresgid - compat_sys_s390_getresgid16
+172 common prctl sys_prctl compat_sys_prctl
+173 common rt_sigreturn sys_rt_sigreturn compat_sys_rt_sigreturn
+174 common rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction
+175 common rt_sigprocmask sys_rt_sigprocmask compat_sys_rt_sigprocmask
+176 common rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending
+177 common rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait
+178 common rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo
+179 common rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend
+180 common pread64 sys_pread64 compat_sys_s390_pread64
+181 common pwrite64 sys_pwrite64 compat_sys_s390_pwrite64
+182 32 chown - compat_sys_s390_chown16
+183 common getcwd sys_getcwd compat_sys_getcwd
+184 common capget sys_capget compat_sys_capget
+185 common capset sys_capset compat_sys_capset
+186 common sigaltstack sys_sigaltstack compat_sys_sigaltstack
+187 common sendfile sys_sendfile64 compat_sys_sendfile
+188 common getpmsg - -
+189 common putpmsg - -
+190 common vfork sys_vfork sys_vfork
+191 32 ugetrlimit - compat_sys_getrlimit
+191 64 getrlimit sys_getrlimit -
+192 32 mmap2 - compat_sys_s390_mmap2
+193 32 truncate64 - compat_sys_s390_truncate64
+194 32 ftruncate64 - compat_sys_s390_ftruncate64
+195 32 stat64 - compat_sys_s390_stat64
+196 32 lstat64 - compat_sys_s390_lstat64
+197 32 fstat64 - compat_sys_s390_fstat64
+198 32 lchown32 - compat_sys_lchown
+198 64 lchown sys_lchown -
+199 32 getuid32 - sys_getuid
+199 64 getuid sys_getuid -
+200 32 getgid32 - sys_getgid
+200 64 getgid sys_getgid -
+201 32 geteuid32 - sys_geteuid
+201 64 geteuid sys_geteuid -
+202 32 getegid32 - sys_getegid
+202 64 getegid sys_getegid -
+203 32 setreuid32 - sys_setreuid
+203 64 setreuid sys_setreuid -
+204 32 setregid32 - sys_setregid
+204 64 setregid sys_setregid -
+205 32 getgroups32 - compat_sys_getgroups
+205 64 getgroups sys_getgroups -
+206 32 setgroups32 - compat_sys_setgroups
+206 64 setgroups sys_setgroups -
+207 32 fchown32 - sys_fchown
+207 64 fchown sys_fchown -
+208 32 setresuid32 - sys_setresuid
+208 64 setresuid sys_setresuid -
+209 32 getresuid32 - compat_sys_getresuid
+209 64 getresuid sys_getresuid -
+210 32 setresgid32 - sys_setresgid
+210 64 setresgid sys_setresgid -
+211 32 getresgid32 - compat_sys_getresgid
+211 64 getresgid sys_getresgid -
+212 32 chown32 - compat_sys_chown
+212 64 chown sys_chown -
+213 32 setuid32 - sys_setuid
+213 64 setuid sys_setuid -
+214 32 setgid32 - sys_setgid
+214 64 setgid sys_setgid -
+215 32 setfsuid32 - sys_setfsuid
+215 64 setfsuid sys_setfsuid -
+216 32 setfsgid32 - sys_setfsgid
+216 64 setfsgid sys_setfsgid -
+217 common pivot_root sys_pivot_root compat_sys_pivot_root
+218 common mincore sys_mincore compat_sys_mincore
+219 common madvise sys_madvise compat_sys_madvise
+220 common getdents64 sys_getdents64 compat_sys_getdents64
+221 32 fcntl64 - compat_sys_fcntl64
+222 common readahead sys_readahead compat_sys_s390_readahead
+223 32 sendfile64 - compat_sys_sendfile64
+224 common setxattr sys_setxattr compat_sys_setxattr
+225 common lsetxattr sys_lsetxattr compat_sys_lsetxattr
+226 common fsetxattr sys_fsetxattr compat_sys_fsetxattr
+227 common getxattr sys_getxattr compat_sys_getxattr
+228 common lgetxattr sys_lgetxattr compat_sys_lgetxattr
+229 common fgetxattr sys_fgetxattr compat_sys_fgetxattr
+230 common listxattr sys_listxattr compat_sys_listxattr
+231 common llistxattr sys_llistxattr compat_sys_llistxattr
+232 common flistxattr sys_flistxattr compat_sys_flistxattr
+233 common removexattr sys_removexattr compat_sys_removexattr
+234 common lremovexattr sys_lremovexattr compat_sys_lremovexattr
+235 common fremovexattr sys_fremovexattr compat_sys_fremovexattr
+236 common gettid sys_gettid sys_gettid
+237 common tkill sys_tkill sys_tkill
+238 common futex sys_futex compat_sys_futex
+239 common sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity
+240 common sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity
+241 common tgkill sys_tgkill sys_tgkill
+243 common io_setup sys_io_setup compat_sys_io_setup
+244 common io_destroy sys_io_destroy compat_sys_io_destroy
+245 common io_getevents sys_io_getevents compat_sys_io_getevents
+246 common io_submit sys_io_submit compat_sys_io_submit
+247 common io_cancel sys_io_cancel compat_sys_io_cancel
+248 common exit_group sys_exit_group sys_exit_group
+249 common epoll_create sys_epoll_create sys_epoll_create
+250 common epoll_ctl sys_epoll_ctl compat_sys_epoll_ctl
+251 common epoll_wait sys_epoll_wait compat_sys_epoll_wait
+252 common set_tid_address sys_set_tid_address compat_sys_set_tid_address
+253 common fadvise64 sys_fadvise64_64 compat_sys_s390_fadvise64
+254 common timer_create sys_timer_create compat_sys_timer_create
+255 common timer_settime sys_timer_settime compat_sys_timer_settime
+256 common timer_gettime sys_timer_gettime compat_sys_timer_gettime
+257 common timer_getoverrun sys_timer_getoverrun sys_timer_getoverrun
+258 common timer_delete sys_timer_delete sys_timer_delete
+259 common clock_settime sys_clock_settime compat_sys_clock_settime
+260 common clock_gettime sys_clock_gettime compat_sys_clock_gettime
+261 common clock_getres sys_clock_getres compat_sys_clock_getres
+262 common clock_nanosleep sys_clock_nanosleep compat_sys_clock_nanosleep
+264 32 fadvise64_64 - compat_sys_s390_fadvise64_64
+265 common statfs64 sys_statfs64 compat_sys_statfs64
+266 common fstatfs64 sys_fstatfs64 compat_sys_fstatfs64
+267 common remap_file_pages sys_remap_file_pages compat_sys_remap_file_pages
+268 common mbind sys_mbind compat_sys_mbind
+269 common get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy
+270 common set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy
+271 common mq_open sys_mq_open compat_sys_mq_open
+272 common mq_unlink sys_mq_unlink compat_sys_mq_unlink
+273 common mq_timedsend sys_mq_timedsend compat_sys_mq_timedsend
+274 common mq_timedreceive sys_mq_timedreceive compat_sys_mq_timedreceive
+275 common mq_notify sys_mq_notify compat_sys_mq_notify
+276 common mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr
+277 common kexec_load sys_kexec_load compat_sys_kexec_load
+278 common add_key sys_add_key compat_sys_add_key
+279 common request_key sys_request_key compat_sys_request_key
+280 common keyctl sys_keyctl compat_sys_keyctl
+281 common waitid sys_waitid compat_sys_waitid
+282 common ioprio_set sys_ioprio_set sys_ioprio_set
+283 common ioprio_get sys_ioprio_get sys_ioprio_get
+284 common inotify_init sys_inotify_init sys_inotify_init
+285 common inotify_add_watch sys_inotify_add_watch compat_sys_inotify_add_watch
+286 common inotify_rm_watch sys_inotify_rm_watch sys_inotify_rm_watch
+287 common migrate_pages sys_migrate_pages compat_sys_migrate_pages
+288 common openat sys_openat compat_sys_openat
+289 common mkdirat sys_mkdirat compat_sys_mkdirat
+290 common mknodat sys_mknodat compat_sys_mknodat
+291 common fchownat sys_fchownat compat_sys_fchownat
+292 common futimesat sys_futimesat compat_sys_futimesat
+293 32 fstatat64 - compat_sys_s390_fstatat64
+293 64 newfstatat sys_newfstatat -
+294 common unlinkat sys_unlinkat compat_sys_unlinkat
+295 common renameat sys_renameat compat_sys_renameat
+296 common linkat sys_linkat compat_sys_linkat
+297 common symlinkat sys_symlinkat compat_sys_symlinkat
+298 common readlinkat sys_readlinkat compat_sys_readlinkat
+299 common fchmodat sys_fchmodat compat_sys_fchmodat
+300 common faccessat sys_faccessat compat_sys_faccessat
+301 common pselect6 sys_pselect6 compat_sys_pselect6
+302 common ppoll sys_ppoll compat_sys_ppoll
+303 common unshare sys_unshare compat_sys_unshare
+304 common set_robust_list sys_set_robust_list compat_sys_set_robust_list
+305 common get_robust_list sys_get_robust_list compat_sys_get_robust_list
+306 common splice sys_splice compat_sys_splice
+307 common sync_file_range sys_sync_file_range compat_sys_s390_sync_file_range
+308 common tee sys_tee compat_sys_tee
+309 common vmsplice sys_vmsplice compat_sys_vmsplice
+310 common move_pages sys_move_pages compat_sys_move_pages
+311 common getcpu sys_getcpu compat_sys_getcpu
+312 common epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait
+313 common utimes sys_utimes compat_sys_utimes
+314 common fallocate sys_fallocate compat_sys_s390_fallocate
+315 common utimensat sys_utimensat compat_sys_utimensat
+316 common signalfd sys_signalfd compat_sys_signalfd
+317 common timerfd - -
+318 common eventfd sys_eventfd sys_eventfd
+319 common timerfd_create sys_timerfd_create sys_timerfd_create
+320 common timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime
+321 common timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime
+322 common signalfd4 sys_signalfd4 compat_sys_signalfd4
+323 common eventfd2 sys_eventfd2 sys_eventfd2
+324 common inotify_init1 sys_inotify_init1 sys_inotify_init1
+325 common pipe2 sys_pipe2 compat_sys_pipe2
+326 common dup3 sys_dup3 sys_dup3
+327 common epoll_create1 sys_epoll_create1 sys_epoll_create1
+328 common preadv sys_preadv compat_sys_preadv
+329 common pwritev sys_pwritev compat_sys_pwritev
+330 common rt_tgsigqueueinfo sys_rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo
+331 common perf_event_open sys_perf_event_open compat_sys_perf_event_open
+332 common fanotify_init sys_fanotify_init sys_fanotify_init
+333 common fanotify_mark sys_fanotify_mark compat_sys_fanotify_mark
+334 common prlimit64 sys_prlimit64 compat_sys_prlimit64
+335 common name_to_handle_at sys_name_to_handle_at compat_sys_name_to_handle_at
+336 common open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at
+337 common clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime
+338 common syncfs sys_syncfs sys_syncfs
+339 common setns sys_setns sys_setns
+340 common process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv
+341 common process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev
+342 common s390_runtime_instr sys_s390_runtime_instr sys_s390_runtime_instr
+343 common kcmp sys_kcmp compat_sys_kcmp
+344 common finit_module sys_finit_module compat_sys_finit_module
+345 common sched_setattr sys_sched_setattr compat_sys_sched_setattr
+346 common sched_getattr sys_sched_getattr compat_sys_sched_getattr
+347 common renameat2 sys_renameat2 compat_sys_renameat2
+348 common seccomp sys_seccomp compat_sys_seccomp
+349 common getrandom sys_getrandom compat_sys_getrandom
+350 common memfd_create sys_memfd_create compat_sys_memfd_create
+351 common bpf sys_bpf compat_sys_bpf
+352 common s390_pci_mmio_write sys_s390_pci_mmio_write compat_sys_s390_pci_mmio_write
+353 common s390_pci_mmio_read sys_s390_pci_mmio_read compat_sys_s390_pci_mmio_read
+354 common execveat sys_execveat compat_sys_execveat
+355 common userfaultfd sys_userfaultfd sys_userfaultfd
+356 common membarrier sys_membarrier sys_membarrier
+357 common recvmmsg sys_recvmmsg compat_sys_recvmmsg
+358 common sendmmsg sys_sendmmsg compat_sys_sendmmsg
+359 common socket sys_socket sys_socket
+360 common socketpair sys_socketpair compat_sys_socketpair
+361 common bind sys_bind compat_sys_bind
+362 common connect sys_connect compat_sys_connect
+363 common listen sys_listen sys_listen
+364 common accept4 sys_accept4 compat_sys_accept4
+365 common getsockopt sys_getsockopt compat_sys_getsockopt
+366 common setsockopt sys_setsockopt compat_sys_setsockopt
+367 common getsockname sys_getsockname compat_sys_getsockname
+368 common getpeername sys_getpeername compat_sys_getpeername
+369 common sendto sys_sendto compat_sys_sendto
+370 common sendmsg sys_sendmsg compat_sys_sendmsg
+371 common recvfrom sys_recvfrom compat_sys_recvfrom
+372 common recvmsg sys_recvmsg compat_sys_recvmsg
+373 common shutdown sys_shutdown sys_shutdown
+374 common mlock2 sys_mlock2 compat_sys_mlock2
+375 common copy_file_range sys_copy_file_range compat_sys_copy_file_range
+376 common preadv2 sys_preadv2 compat_sys_preadv2
+377 common pwritev2 sys_pwritev2 compat_sys_pwritev2
+378 common s390_guarded_storage sys_s390_guarded_storage compat_sys_s390_guarded_storage
+379 common statx sys_statx compat_sys_statx
+380 common s390_sthyi sys_s390_sthyi compat_sys_s390_sthyi
+381 common kexec_file_load sys_kexec_file_load compat_sys_kexec_file_load
+382 common io_pgetevents sys_io_pgetevents compat_sys_io_pgetevents
+383 common rseq sys_rseq compat_sys_rseq
diff --git a/arch/s390/kernel/syscalls/syscalltbl b/arch/s390/kernel/syscalls/syscalltbl
new file mode 100755
index 000000000..fbac1732f
--- /dev/null
+++ b/arch/s390/kernel/syscalls/syscalltbl
@@ -0,0 +1,232 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Generate system call table and header files
+#
+# Copyright IBM Corp. 2018
+# Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+
+#
+# File path to the system call table definition.
+# You can set the path with the -i option. If omitted,
+# system call table definitions are read from standard input.
+#
+SYSCALL_TBL=""
+
+
+create_syscall_table_entries()
+{
+ local nr abi name entry64 entry32 _ignore
+ local temp=$(mktemp ${TMPDIR:-/tmp}/syscalltbl-common.XXXXXXXXX)
+
+ (
+ #
+ # Initialize with 0 to create an NI_SYSCALL for 0
+ #
+ local prev_nr=0 prev_32=sys_ni_syscall prev_64=sys_ni_syscall
+ while read nr abi name entry64 entry32 _ignore; do
+ test x$entry32 = x- && entry32=sys_ni_syscall
+ test x$entry64 = x- && entry64=sys_ni_syscall
+
+ if test $prev_nr -eq $nr; then
+ #
+ # Same syscall but different ABI, just update
+ # the respective entry point
+ #
+ case $abi in
+ 32)
+ prev_32=$entry32
+ ;;
+ 64)
+ prev_64=$entry64
+ ;;
+ esac
+ continue;
+ else
+ printf "%d\t%s\t%s\n" $prev_nr $prev_64 $prev_32
+ fi
+
+ prev_nr=$nr
+ prev_64=$entry64
+ prev_32=$entry32
+ done
+ printf "%d\t%s\t%s\n" $prev_nr $prev_64 $prev_32
+ ) >> $temp
+
+ #
+ # Check for duplicate syscall numbers
+ #
+ if ! cat $temp |cut -f1 |uniq -d 2>&1; then
+ echo "Error: generated system call table contains duplicate entries: $temp" >&2
+ exit 1
+ fi
+
+ #
+ # Generate syscall table
+ #
+ prev_nr=0
+ while read nr entry64 entry32; do
+ while test $prev_nr -lt $((nr - 1)); do
+ printf "NI_SYSCALL\n"
+ prev_nr=$((prev_nr + 1))
+ done
+ if test x$entry64 = xsys_ni_syscall &&
+ test x$entry32 = xsys_ni_syscall; then
+ printf "NI_SYSCALL\n"
+ else
+ printf "SYSCALL(%s,%s)\n" $entry64 $entry32
+ fi
+ prev_nr=$nr
+ done < $temp
+ rm $temp
+}
+
+generate_syscall_table()
+{
+ cat <<-EoHEADER
+ /* SPDX-License-Identifier: GPL-2.0 */
+ /*
+ * Definitions for sys_call_table, each line represents an
+ * entry in the table in the form
+ * SYSCALL(64 bit syscall, 31 bit emulated syscall)
+ *
+ * This file is meant to be included from entry.S.
+ */
+
+ #define NI_SYSCALL SYSCALL(sys_ni_syscall,sys_ni_syscall)
+
+EoHEADER
+ grep -Ev '^(#|[[:blank:]]*$)' $SYSCALL_TBL \
+ |sort -k1 -n \
+ |create_syscall_table_entries
+}
+
+create_header_defines()
+{
+ local nr abi name _ignore
+
+ while read nr abi name _ignore; do
+ printf "#define __NR_%s %d\n" $name $nr
+ done
+}
+
+normalize_fileguard()
+{
+ local fileguard="$1"
+
+ echo "$1" |tr '[[:lower:]]' '[[:upper:]]' \
+ |sed -e 's/[^A-Z0-9_]/_/g' -e 's/__/_/g'
+}
+
+generate_syscall_header()
+{
+ local abis=$(echo "($1)" | tr ',' '|')
+ local filename="$2"
+ local fileguard suffix
+
+ if test "$filename"; then
+ fileguard=$(normalize_fileguard "__UAPI_ASM_S390_$2")
+ else
+ case "$abis" in
+ *64*) suffix=64 ;;
+ *32*) suffix=32 ;;
+ esac
+ fileguard=$(normalize_fileguard "__UAPI_ASM_S390_SYSCALLS_$suffix")
+ fi
+
+ cat <<-EoHEADER
+ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+ #ifndef ${fileguard}
+ #define ${fileguard}
+
+EoHEADER
+
+ grep -E "^[[:digit:]]+[[:space:]]+${abis}" $SYSCALL_TBL \
+ |sort -k1 -n \
+ |create_header_defines
+
+ cat <<-EoFOOTER
+
+ #endif /* ${fileguard} */
+EoFOOTER
+}
+
+__max_syscall_nr()
+{
+ local abis=$(echo "($1)" | tr ',' '|')
+
+ grep -E "^[[:digit:]]+[[:space:]]+${abis}" $SYSCALL_TBL \
+ |sed -ne 's/^\([[:digit:]]*\)[[:space:]].*/\1/p' \
+ |sort -n \
+ |tail -1
+}
+
+
+generate_syscall_nr()
+{
+ local abis="$1"
+ local max_syscall_nr num_syscalls
+
+ max_syscall_nr=$(__max_syscall_nr "$abis")
+ num_syscalls=$((max_syscall_nr + 1))
+
+ cat <<-EoHEADER
+ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+ #ifndef __ASM_S390_SYSCALLS_NR
+ #define __ASM_S390_SYSCALLS_NR
+
+ #define NR_syscalls ${num_syscalls}
+
+ #endif /* __ASM_S390_SYSCALLS_NR */
+EoHEADER
+}
+
+
+#
+# Parse command line arguments
+#
+do_syscall_header=""
+do_syscall_table=""
+do_syscall_nr=""
+output_file=""
+abi_list="common,64"
+filename=""
+while getopts ":HNSXi:a:f:" arg; do
+ case $arg in
+ a)
+ abi_list="$OPTARG"
+ ;;
+ i)
+ SYSCALL_TBL="$OPTARG"
+ ;;
+ f)
+ filename=${OPTARG##*/}
+ ;;
+ H)
+ do_syscall_header=1
+ ;;
+ N)
+ do_syscall_nr=1
+ ;;
+ S)
+ do_syscall_table=1
+ ;;
+ X)
+ set -x
+ ;;
+ :)
+ echo "Missing argument for -$OPTARG" >&2
+ exit 1
+ ;;
+ \?)
+ echo "Invalid option specified" >&2
+ exit 1
+ ;;
+ esac
+done
+
+test "$do_syscall_header" && generate_syscall_header "$abi_list" "$filename"
+test "$do_syscall_table" && generate_syscall_table
+test "$do_syscall_nr" && generate_syscall_nr "$abi_list"
+
+exit 0
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
new file mode 100644
index 000000000..12f80d1f0
--- /dev/null
+++ b/arch/s390/kernel/sysinfo.c
@@ -0,0 +1,568 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2001, 2009
+ * Author(s): Ulrich Weigand <Ulrich.Weigand@de.ibm.com>,
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ */
+
+#include <linux/debugfs.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <asm/ebcdic.h>
+#include <asm/debug.h>
+#include <asm/sysinfo.h>
+#include <asm/cpcmd.h>
+#include <asm/topology.h>
+#include <asm/fpu/api.h>
+
+int topology_max_mnest;
+
+static inline int __stsi(void *sysinfo, int fc, int sel1, int sel2, int *lvl)
+{
+ register int r0 asm("0") = (fc << 28) | sel1;
+ register int r1 asm("1") = sel2;
+ int rc = 0;
+
+ asm volatile(
+ " stsi 0(%3)\n"
+ "0: jz 2f\n"
+ "1: lhi %1,%4\n"
+ "2:\n"
+ EX_TABLE(0b, 1b)
+ : "+d" (r0), "+d" (rc)
+ : "d" (r1), "a" (sysinfo), "K" (-EOPNOTSUPP)
+ : "cc", "memory");
+ *lvl = ((unsigned int) r0) >> 28;
+ return rc;
+}
+
+/*
+ * stsi - store system information
+ *
+ * Returns the current configuration level if function code 0 was specified.
+ * Otherwise returns 0 on success or a negative value on error.
+ */
+int stsi(void *sysinfo, int fc, int sel1, int sel2)
+{
+ int lvl, rc;
+
+ rc = __stsi(sysinfo, fc, sel1, sel2, &lvl);
+ if (rc)
+ return rc;
+ return fc ? 0 : lvl;
+}
+EXPORT_SYMBOL(stsi);
+
+#ifdef CONFIG_PROC_FS
+
+static bool convert_ext_name(unsigned char encoding, char *name, size_t len)
+{
+ switch (encoding) {
+ case 1: /* EBCDIC */
+ EBCASC(name, len);
+ break;
+ case 2: /* UTF-8 */
+ break;
+ default:
+ return false;
+ }
+ return true;
+}
+
+static void stsi_1_1_1(struct seq_file *m, struct sysinfo_1_1_1 *info)
+{
+ int i;
+
+ if (stsi(info, 1, 1, 1))
+ return;
+ EBCASC(info->manufacturer, sizeof(info->manufacturer));
+ EBCASC(info->type, sizeof(info->type));
+ EBCASC(info->model, sizeof(info->model));
+ EBCASC(info->sequence, sizeof(info->sequence));
+ EBCASC(info->plant, sizeof(info->plant));
+ EBCASC(info->model_capacity, sizeof(info->model_capacity));
+ EBCASC(info->model_perm_cap, sizeof(info->model_perm_cap));
+ EBCASC(info->model_temp_cap, sizeof(info->model_temp_cap));
+ seq_printf(m, "Manufacturer: %-16.16s\n", info->manufacturer);
+ seq_printf(m, "Type: %-4.4s\n", info->type);
+ if (info->lic)
+ seq_printf(m, "LIC Identifier: %016lx\n", info->lic);
+ /*
+ * Sigh: the model field has been renamed with System z9
+ * to model_capacity and a new model field has been added
+ * after the plant field. To avoid confusing older programs
+ * the "Model:" prints "model_capacity model" or just
+ * "model_capacity" if the model string is empty .
+ */
+ seq_printf(m, "Model: %-16.16s", info->model_capacity);
+ if (info->model[0] != '\0')
+ seq_printf(m, " %-16.16s", info->model);
+ seq_putc(m, '\n');
+ seq_printf(m, "Sequence Code: %-16.16s\n", info->sequence);
+ seq_printf(m, "Plant: %-4.4s\n", info->plant);
+ seq_printf(m, "Model Capacity: %-16.16s %08u\n",
+ info->model_capacity, info->model_cap_rating);
+ if (info->model_perm_cap_rating)
+ seq_printf(m, "Model Perm. Capacity: %-16.16s %08u\n",
+ info->model_perm_cap,
+ info->model_perm_cap_rating);
+ if (info->model_temp_cap_rating)
+ seq_printf(m, "Model Temp. Capacity: %-16.16s %08u\n",
+ info->model_temp_cap,
+ info->model_temp_cap_rating);
+ if (info->ncr)
+ seq_printf(m, "Nominal Cap. Rating: %08u\n", info->ncr);
+ if (info->npr)
+ seq_printf(m, "Nominal Perm. Rating: %08u\n", info->npr);
+ if (info->ntr)
+ seq_printf(m, "Nominal Temp. Rating: %08u\n", info->ntr);
+ if (info->cai) {
+ seq_printf(m, "Capacity Adj. Ind.: %d\n", info->cai);
+ seq_printf(m, "Capacity Ch. Reason: %d\n", info->ccr);
+ seq_printf(m, "Capacity Transient: %d\n", info->t);
+ }
+ if (info->p) {
+ for (i = 1; i <= ARRAY_SIZE(info->typepct); i++) {
+ seq_printf(m, "Type %d Percentage: %d\n",
+ i, info->typepct[i - 1]);
+ }
+ }
+}
+
+static void stsi_15_1_x(struct seq_file *m, struct sysinfo_15_1_x *info)
+{
+ int i;
+
+ seq_putc(m, '\n');
+ if (!MACHINE_HAS_TOPOLOGY)
+ return;
+ if (stsi(info, 15, 1, topology_max_mnest))
+ return;
+ seq_printf(m, "CPU Topology HW: ");
+ for (i = 0; i < TOPOLOGY_NR_MAG; i++)
+ seq_printf(m, " %d", info->mag[i]);
+ seq_putc(m, '\n');
+#ifdef CONFIG_SCHED_TOPOLOGY
+ store_topology(info);
+ seq_printf(m, "CPU Topology SW: ");
+ for (i = 0; i < TOPOLOGY_NR_MAG; i++)
+ seq_printf(m, " %d", info->mag[i]);
+ seq_putc(m, '\n');
+#endif
+}
+
+static void stsi_1_2_2(struct seq_file *m, struct sysinfo_1_2_2 *info)
+{
+ struct sysinfo_1_2_2_extension *ext;
+ int i;
+
+ if (stsi(info, 1, 2, 2))
+ return;
+ ext = (struct sysinfo_1_2_2_extension *)
+ ((unsigned long) info + info->acc_offset);
+ seq_printf(m, "CPUs Total: %d\n", info->cpus_total);
+ seq_printf(m, "CPUs Configured: %d\n", info->cpus_configured);
+ seq_printf(m, "CPUs Standby: %d\n", info->cpus_standby);
+ seq_printf(m, "CPUs Reserved: %d\n", info->cpus_reserved);
+ if (info->mt_installed) {
+ seq_printf(m, "CPUs G-MTID: %d\n", info->mt_gtid);
+ seq_printf(m, "CPUs S-MTID: %d\n", info->mt_stid);
+ }
+ /*
+ * Sigh 2. According to the specification the alternate
+ * capability field is a 32 bit floating point number
+ * if the higher order 8 bits are not zero. Printing
+ * a floating point number in the kernel is a no-no,
+ * always print the number as 32 bit unsigned integer.
+ * The user-space needs to know about the strange
+ * encoding of the alternate cpu capability.
+ */
+ seq_printf(m, "Capability: %u", info->capability);
+ if (info->format == 1)
+ seq_printf(m, " %u", ext->alt_capability);
+ seq_putc(m, '\n');
+ if (info->nominal_cap)
+ seq_printf(m, "Nominal Capability: %d\n", info->nominal_cap);
+ if (info->secondary_cap)
+ seq_printf(m, "Secondary Capability: %d\n", info->secondary_cap);
+ for (i = 2; i <= info->cpus_total; i++) {
+ seq_printf(m, "Adjustment %02d-way: %u",
+ i, info->adjustment[i-2]);
+ if (info->format == 1)
+ seq_printf(m, " %u", ext->alt_adjustment[i-2]);
+ seq_putc(m, '\n');
+ }
+}
+
+static void stsi_2_2_2(struct seq_file *m, struct sysinfo_2_2_2 *info)
+{
+ if (stsi(info, 2, 2, 2))
+ return;
+ EBCASC(info->name, sizeof(info->name));
+ seq_putc(m, '\n');
+ seq_printf(m, "LPAR Number: %d\n", info->lpar_number);
+ seq_printf(m, "LPAR Characteristics: ");
+ if (info->characteristics & LPAR_CHAR_DEDICATED)
+ seq_printf(m, "Dedicated ");
+ if (info->characteristics & LPAR_CHAR_SHARED)
+ seq_printf(m, "Shared ");
+ if (info->characteristics & LPAR_CHAR_LIMITED)
+ seq_printf(m, "Limited ");
+ seq_putc(m, '\n');
+ seq_printf(m, "LPAR Name: %-8.8s\n", info->name);
+ seq_printf(m, "LPAR Adjustment: %d\n", info->caf);
+ seq_printf(m, "LPAR CPUs Total: %d\n", info->cpus_total);
+ seq_printf(m, "LPAR CPUs Configured: %d\n", info->cpus_configured);
+ seq_printf(m, "LPAR CPUs Standby: %d\n", info->cpus_standby);
+ seq_printf(m, "LPAR CPUs Reserved: %d\n", info->cpus_reserved);
+ seq_printf(m, "LPAR CPUs Dedicated: %d\n", info->cpus_dedicated);
+ seq_printf(m, "LPAR CPUs Shared: %d\n", info->cpus_shared);
+ if (info->mt_installed) {
+ seq_printf(m, "LPAR CPUs G-MTID: %d\n", info->mt_gtid);
+ seq_printf(m, "LPAR CPUs S-MTID: %d\n", info->mt_stid);
+ seq_printf(m, "LPAR CPUs PS-MTID: %d\n", info->mt_psmtid);
+ }
+ if (convert_ext_name(info->vsne, info->ext_name, sizeof(info->ext_name))) {
+ seq_printf(m, "LPAR Extended Name: %-.256s\n", info->ext_name);
+ seq_printf(m, "LPAR UUID: %pUb\n", &info->uuid);
+ }
+}
+
+static void print_ext_name(struct seq_file *m, int lvl,
+ struct sysinfo_3_2_2 *info)
+{
+ size_t len = sizeof(info->ext_names[lvl]);
+
+ if (!convert_ext_name(info->vm[lvl].evmne, info->ext_names[lvl], len))
+ return;
+ seq_printf(m, "VM%02d Extended Name: %-.256s\n", lvl,
+ info->ext_names[lvl]);
+}
+
+static void print_uuid(struct seq_file *m, int i, struct sysinfo_3_2_2 *info)
+{
+ if (uuid_is_null(&info->vm[i].uuid))
+ return;
+ seq_printf(m, "VM%02d UUID: %pUb\n", i, &info->vm[i].uuid);
+}
+
+static void stsi_3_2_2(struct seq_file *m, struct sysinfo_3_2_2 *info)
+{
+ int i;
+
+ if (stsi(info, 3, 2, 2))
+ return;
+ for (i = 0; i < info->count; i++) {
+ EBCASC(info->vm[i].name, sizeof(info->vm[i].name));
+ EBCASC(info->vm[i].cpi, sizeof(info->vm[i].cpi));
+ seq_putc(m, '\n');
+ seq_printf(m, "VM%02d Name: %-8.8s\n", i, info->vm[i].name);
+ seq_printf(m, "VM%02d Control Program: %-16.16s\n", i, info->vm[i].cpi);
+ seq_printf(m, "VM%02d Adjustment: %d\n", i, info->vm[i].caf);
+ seq_printf(m, "VM%02d CPUs Total: %d\n", i, info->vm[i].cpus_total);
+ seq_printf(m, "VM%02d CPUs Configured: %d\n", i, info->vm[i].cpus_configured);
+ seq_printf(m, "VM%02d CPUs Standby: %d\n", i, info->vm[i].cpus_standby);
+ seq_printf(m, "VM%02d CPUs Reserved: %d\n", i, info->vm[i].cpus_reserved);
+ print_ext_name(m, i, info);
+ print_uuid(m, i, info);
+ }
+}
+
+static int sysinfo_show(struct seq_file *m, void *v)
+{
+ void *info = (void *)get_zeroed_page(GFP_KERNEL);
+ int level;
+
+ if (!info)
+ return 0;
+ level = stsi(NULL, 0, 0, 0);
+ if (level >= 1)
+ stsi_1_1_1(m, info);
+ if (level >= 1)
+ stsi_15_1_x(m, info);
+ if (level >= 1)
+ stsi_1_2_2(m, info);
+ if (level >= 2)
+ stsi_2_2_2(m, info);
+ if (level >= 3)
+ stsi_3_2_2(m, info);
+ free_page((unsigned long)info);
+ return 0;
+}
+
+static int __init sysinfo_create_proc(void)
+{
+ proc_create_single("sysinfo", 0444, NULL, sysinfo_show);
+ return 0;
+}
+device_initcall(sysinfo_create_proc);
+
+#endif /* CONFIG_PROC_FS */
+
+/*
+ * Service levels interface.
+ */
+
+static DECLARE_RWSEM(service_level_sem);
+static LIST_HEAD(service_level_list);
+
+int register_service_level(struct service_level *slr)
+{
+ struct service_level *ptr;
+
+ down_write(&service_level_sem);
+ list_for_each_entry(ptr, &service_level_list, list)
+ if (ptr == slr) {
+ up_write(&service_level_sem);
+ return -EEXIST;
+ }
+ list_add_tail(&slr->list, &service_level_list);
+ up_write(&service_level_sem);
+ return 0;
+}
+EXPORT_SYMBOL(register_service_level);
+
+int unregister_service_level(struct service_level *slr)
+{
+ struct service_level *ptr, *next;
+ int rc = -ENOENT;
+
+ down_write(&service_level_sem);
+ list_for_each_entry_safe(ptr, next, &service_level_list, list) {
+ if (ptr != slr)
+ continue;
+ list_del(&ptr->list);
+ rc = 0;
+ break;
+ }
+ up_write(&service_level_sem);
+ return rc;
+}
+EXPORT_SYMBOL(unregister_service_level);
+
+static void *service_level_start(struct seq_file *m, loff_t *pos)
+{
+ down_read(&service_level_sem);
+ return seq_list_start(&service_level_list, *pos);
+}
+
+static void *service_level_next(struct seq_file *m, void *p, loff_t *pos)
+{
+ return seq_list_next(p, &service_level_list, pos);
+}
+
+static void service_level_stop(struct seq_file *m, void *p)
+{
+ up_read(&service_level_sem);
+}
+
+static int service_level_show(struct seq_file *m, void *p)
+{
+ struct service_level *slr;
+
+ slr = list_entry(p, struct service_level, list);
+ slr->seq_print(m, slr);
+ return 0;
+}
+
+static const struct seq_operations service_level_seq_ops = {
+ .start = service_level_start,
+ .next = service_level_next,
+ .stop = service_level_stop,
+ .show = service_level_show
+};
+
+static void service_level_vm_print(struct seq_file *m,
+ struct service_level *slr)
+{
+ char *query_buffer, *str;
+
+ query_buffer = kmalloc(1024, GFP_KERNEL | GFP_DMA);
+ if (!query_buffer)
+ return;
+ cpcmd("QUERY CPLEVEL", query_buffer, 1024, NULL);
+ str = strchr(query_buffer, '\n');
+ if (str)
+ *str = 0;
+ seq_printf(m, "VM: %s\n", query_buffer);
+ kfree(query_buffer);
+}
+
+static struct service_level service_level_vm = {
+ .seq_print = service_level_vm_print
+};
+
+static __init int create_proc_service_level(void)
+{
+ proc_create_seq("service_levels", 0, NULL, &service_level_seq_ops);
+ if (MACHINE_IS_VM)
+ register_service_level(&service_level_vm);
+ return 0;
+}
+subsys_initcall(create_proc_service_level);
+
+/*
+ * CPU capability might have changed. Therefore recalculate loops_per_jiffy.
+ */
+void s390_adjust_jiffies(void)
+{
+ struct sysinfo_1_2_2 *info;
+ unsigned long capability;
+ struct kernel_fpu fpu;
+
+ info = (void *) get_zeroed_page(GFP_KERNEL);
+ if (!info)
+ return;
+
+ if (stsi(info, 1, 2, 2) == 0) {
+ /*
+ * Major sigh. The cpu capability encoding is "special".
+ * If the first 9 bits of info->capability are 0 then it
+ * is a 32 bit unsigned integer in the range 0 .. 2^23.
+ * If the first 9 bits are != 0 then it is a 32 bit float.
+ * In addition a lower value indicates a proportionally
+ * higher cpu capacity. Bogomips are the other way round.
+ * To get to a halfway suitable number we divide 1e7
+ * by the cpu capability number. Yes, that means a floating
+ * point division ..
+ */
+ kernel_fpu_begin(&fpu, KERNEL_FPR);
+ asm volatile(
+ " sfpc %3\n"
+ " l %0,%1\n"
+ " tmlh %0,0xff80\n"
+ " jnz 0f\n"
+ " cefbr %%f2,%0\n"
+ " j 1f\n"
+ "0: le %%f2,%1\n"
+ "1: cefbr %%f0,%2\n"
+ " debr %%f0,%%f2\n"
+ " cgebr %0,5,%%f0\n"
+ : "=&d" (capability)
+ : "Q" (info->capability), "d" (10000000), "d" (0)
+ : "cc"
+ );
+ kernel_fpu_end(&fpu, KERNEL_FPR);
+ } else
+ /*
+ * Really old machine without stsi block for basic
+ * cpu information. Report 42.0 bogomips.
+ */
+ capability = 42;
+ loops_per_jiffy = capability * (500000/HZ);
+ free_page((unsigned long) info);
+}
+
+/*
+ * calibrate the delay loop
+ */
+void calibrate_delay(void)
+{
+ s390_adjust_jiffies();
+ /* Print the good old Bogomips line .. */
+ printk(KERN_DEBUG "Calibrating delay loop (skipped)... "
+ "%lu.%02lu BogoMIPS preset\n", loops_per_jiffy/(500000/HZ),
+ (loops_per_jiffy/(5000/HZ)) % 100);
+}
+
+#ifdef CONFIG_DEBUG_FS
+
+#define STSI_FILE(fc, s1, s2) \
+static int stsi_open_##fc##_##s1##_##s2(struct inode *inode, struct file *file)\
+{ \
+ file->private_data = (void *) get_zeroed_page(GFP_KERNEL); \
+ if (!file->private_data) \
+ return -ENOMEM; \
+ if (stsi(file->private_data, fc, s1, s2)) { \
+ free_page((unsigned long)file->private_data); \
+ file->private_data = NULL; \
+ return -EACCES; \
+ } \
+ return nonseekable_open(inode, file); \
+} \
+ \
+static const struct file_operations stsi_##fc##_##s1##_##s2##_fs_ops = { \
+ .open = stsi_open_##fc##_##s1##_##s2, \
+ .release = stsi_release, \
+ .read = stsi_read, \
+ .llseek = no_llseek, \
+};
+
+static int stsi_release(struct inode *inode, struct file *file)
+{
+ free_page((unsigned long)file->private_data);
+ return 0;
+}
+
+static ssize_t stsi_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
+{
+ return simple_read_from_buffer(buf, size, ppos, file->private_data, PAGE_SIZE);
+}
+
+STSI_FILE( 1, 1, 1);
+STSI_FILE( 1, 2, 1);
+STSI_FILE( 1, 2, 2);
+STSI_FILE( 2, 2, 1);
+STSI_FILE( 2, 2, 2);
+STSI_FILE( 3, 2, 2);
+STSI_FILE(15, 1, 2);
+STSI_FILE(15, 1, 3);
+STSI_FILE(15, 1, 4);
+STSI_FILE(15, 1, 5);
+STSI_FILE(15, 1, 6);
+
+struct stsi_file {
+ const struct file_operations *fops;
+ char *name;
+};
+
+static struct stsi_file stsi_file[] __initdata = {
+ {.fops = &stsi_1_1_1_fs_ops, .name = "1_1_1"},
+ {.fops = &stsi_1_2_1_fs_ops, .name = "1_2_1"},
+ {.fops = &stsi_1_2_2_fs_ops, .name = "1_2_2"},
+ {.fops = &stsi_2_2_1_fs_ops, .name = "2_2_1"},
+ {.fops = &stsi_2_2_2_fs_ops, .name = "2_2_2"},
+ {.fops = &stsi_3_2_2_fs_ops, .name = "3_2_2"},
+ {.fops = &stsi_15_1_2_fs_ops, .name = "15_1_2"},
+ {.fops = &stsi_15_1_3_fs_ops, .name = "15_1_3"},
+ {.fops = &stsi_15_1_4_fs_ops, .name = "15_1_4"},
+ {.fops = &stsi_15_1_5_fs_ops, .name = "15_1_5"},
+ {.fops = &stsi_15_1_6_fs_ops, .name = "15_1_6"},
+};
+
+static u8 stsi_0_0_0;
+
+static __init int stsi_init_debugfs(void)
+{
+ struct dentry *stsi_root;
+ struct stsi_file *sf;
+ int lvl, i;
+
+ stsi_root = debugfs_create_dir("stsi", arch_debugfs_dir);
+ if (IS_ERR_OR_NULL(stsi_root))
+ return 0;
+ lvl = stsi(NULL, 0, 0, 0);
+ if (lvl > 0)
+ stsi_0_0_0 = lvl;
+ debugfs_create_u8("0_0_0", 0400, stsi_root, &stsi_0_0_0);
+ for (i = 0; i < ARRAY_SIZE(stsi_file); i++) {
+ sf = &stsi_file[i];
+ debugfs_create_file(sf->name, 0400, stsi_root, NULL, sf->fops);
+ }
+ if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY) && MACHINE_HAS_TOPOLOGY) {
+ char link_to[10];
+
+ sprintf(link_to, "15_1_%d", topology_mnest_limit());
+ debugfs_create_symlink("topology", stsi_root, link_to);
+ }
+ return 0;
+}
+device_initcall(stsi_init_debugfs);
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
new file mode 100644
index 000000000..11c32b228
--- /dev/null
+++ b/arch/s390/kernel/time.c
@@ -0,0 +1,923 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Time of day based timer functions.
+ *
+ * S390 version
+ * Copyright IBM Corp. 1999, 2008
+ * Author(s): Hartmut Penner (hp@de.ibm.com),
+ * Martin Schwidefsky (schwidefsky@de.ibm.com),
+ * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
+ *
+ * Derived from "arch/i386/kernel/time.c"
+ * Copyright (C) 1991, 1992, 1995 Linus Torvalds
+ */
+
+#define KMSG_COMPONENT "time"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel_stat.h>
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/sched/clock.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/cpu.h>
+#include <linux/stop_machine.h>
+#include <linux/time.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/types.h>
+#include <linux/profile.h>
+#include <linux/timex.h>
+#include <linux/notifier.h>
+#include <linux/timekeeper_internal.h>
+#include <linux/clockchips.h>
+#include <linux/gfp.h>
+#include <linux/kprobes.h>
+#include <linux/uaccess.h>
+#include <asm/facility.h>
+#include <asm/delay.h>
+#include <asm/div64.h>
+#include <asm/vdso.h>
+#include <asm/irq.h>
+#include <asm/irq_regs.h>
+#include <asm/vtimer.h>
+#include <asm/stp.h>
+#include <asm/cio.h>
+#include "entry.h"
+
+unsigned char tod_clock_base[16] __aligned(8) = {
+ /* Force to data section. */
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+};
+EXPORT_SYMBOL_GPL(tod_clock_base);
+
+u64 clock_comparator_max = -1ULL;
+EXPORT_SYMBOL_GPL(clock_comparator_max);
+
+static DEFINE_PER_CPU(struct clock_event_device, comparators);
+
+ATOMIC_NOTIFIER_HEAD(s390_epoch_delta_notifier);
+EXPORT_SYMBOL(s390_epoch_delta_notifier);
+
+unsigned char ptff_function_mask[16];
+
+static unsigned long long lpar_offset;
+static unsigned long long initial_leap_seconds;
+static unsigned long long tod_steering_end;
+static long long tod_steering_delta;
+
+/*
+ * Get time offsets with PTFF
+ */
+void __init time_early_init(void)
+{
+ struct ptff_qto qto;
+ struct ptff_qui qui;
+
+ /* Initialize TOD steering parameters */
+ tod_steering_end = *(unsigned long long *) &tod_clock_base[1];
+ vdso_data->ts_end = tod_steering_end;
+
+ if (!test_facility(28))
+ return;
+
+ ptff(&ptff_function_mask, sizeof(ptff_function_mask), PTFF_QAF);
+
+ /* get LPAR offset */
+ if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0)
+ lpar_offset = qto.tod_epoch_difference;
+
+ /* get initial leap seconds */
+ if (ptff_query(PTFF_QUI) && ptff(&qui, sizeof(qui), PTFF_QUI) == 0)
+ initial_leap_seconds = (unsigned long long)
+ ((long) qui.old_leap * 4096000000L);
+}
+
+/*
+ * Scheduler clock - returns current time in nanosec units.
+ */
+unsigned long long notrace sched_clock(void)
+{
+ return tod_to_ns(get_tod_clock_monotonic());
+}
+NOKPROBE_SYMBOL(sched_clock);
+
+/*
+ * Monotonic_clock - returns # of nanoseconds passed since time_init()
+ */
+unsigned long long monotonic_clock(void)
+{
+ return sched_clock();
+}
+EXPORT_SYMBOL(monotonic_clock);
+
+static void ext_to_timespec64(unsigned char *clk, struct timespec64 *xt)
+{
+ unsigned long long high, low, rem, sec, nsec;
+
+ /* Split extendnd TOD clock to micro-seconds and sub-micro-seconds */
+ high = (*(unsigned long long *) clk) >> 4;
+ low = (*(unsigned long long *)&clk[7]) << 4;
+ /* Calculate seconds and nano-seconds */
+ sec = high;
+ rem = do_div(sec, 1000000);
+ nsec = (((low >> 32) + (rem << 32)) * 1000) >> 32;
+
+ xt->tv_sec = sec;
+ xt->tv_nsec = nsec;
+}
+
+void clock_comparator_work(void)
+{
+ struct clock_event_device *cd;
+
+ S390_lowcore.clock_comparator = clock_comparator_max;
+ cd = this_cpu_ptr(&comparators);
+ cd->event_handler(cd);
+}
+
+static int s390_next_event(unsigned long delta,
+ struct clock_event_device *evt)
+{
+ S390_lowcore.clock_comparator = get_tod_clock() + delta;
+ set_clock_comparator(S390_lowcore.clock_comparator);
+ return 0;
+}
+
+/*
+ * Set up lowcore and control register of the current cpu to
+ * enable TOD clock and clock comparator interrupts.
+ */
+void init_cpu_timer(void)
+{
+ struct clock_event_device *cd;
+ int cpu;
+
+ S390_lowcore.clock_comparator = clock_comparator_max;
+ set_clock_comparator(S390_lowcore.clock_comparator);
+
+ cpu = smp_processor_id();
+ cd = &per_cpu(comparators, cpu);
+ cd->name = "comparator";
+ cd->features = CLOCK_EVT_FEAT_ONESHOT;
+ cd->mult = 16777;
+ cd->shift = 12;
+ cd->min_delta_ns = 1;
+ cd->min_delta_ticks = 1;
+ cd->max_delta_ns = LONG_MAX;
+ cd->max_delta_ticks = ULONG_MAX;
+ cd->rating = 400;
+ cd->cpumask = cpumask_of(cpu);
+ cd->set_next_event = s390_next_event;
+
+ clockevents_register_device(cd);
+
+ /* Enable clock comparator timer interrupt. */
+ __ctl_set_bit(0,11);
+
+ /* Always allow the timing alert external interrupt. */
+ __ctl_set_bit(0, 4);
+}
+
+static void clock_comparator_interrupt(struct ext_code ext_code,
+ unsigned int param32,
+ unsigned long param64)
+{
+ inc_irq_stat(IRQEXT_CLK);
+ if (S390_lowcore.clock_comparator == clock_comparator_max)
+ set_clock_comparator(S390_lowcore.clock_comparator);
+}
+
+static void stp_timing_alert(struct stp_irq_parm *);
+
+static void timing_alert_interrupt(struct ext_code ext_code,
+ unsigned int param32, unsigned long param64)
+{
+ inc_irq_stat(IRQEXT_TLA);
+ if (param32 & 0x00038000)
+ stp_timing_alert((struct stp_irq_parm *) &param32);
+}
+
+static void stp_reset(void);
+
+void read_persistent_clock64(struct timespec64 *ts)
+{
+ unsigned char clk[STORE_CLOCK_EXT_SIZE];
+ __u64 delta;
+
+ delta = initial_leap_seconds + TOD_UNIX_EPOCH;
+ get_tod_clock_ext(clk);
+ *(__u64 *) &clk[1] -= delta;
+ if (*(__u64 *) &clk[1] > delta)
+ clk[0]--;
+ ext_to_timespec64(clk, ts);
+}
+
+void __init read_persistent_wall_and_boot_offset(struct timespec64 *wall_time,
+ struct timespec64 *boot_offset)
+{
+ unsigned char clk[STORE_CLOCK_EXT_SIZE];
+ struct timespec64 boot_time;
+ __u64 delta;
+
+ delta = initial_leap_seconds + TOD_UNIX_EPOCH;
+ memcpy(clk, tod_clock_base, STORE_CLOCK_EXT_SIZE);
+ *(__u64 *)&clk[1] -= delta;
+ if (*(__u64 *)&clk[1] > delta)
+ clk[0]--;
+ ext_to_timespec64(clk, &boot_time);
+
+ read_persistent_clock64(wall_time);
+ *boot_offset = timespec64_sub(*wall_time, boot_time);
+}
+
+static u64 read_tod_clock(struct clocksource *cs)
+{
+ unsigned long long now, adj;
+
+ preempt_disable(); /* protect from changes to steering parameters */
+ now = get_tod_clock();
+ adj = tod_steering_end - now;
+ if (unlikely((s64) adj >= 0))
+ /*
+ * manually steer by 1 cycle every 2^16 cycles. This
+ * corresponds to shifting the tod delta by 15. 1s is
+ * therefore steered in ~9h. The adjust will decrease
+ * over time, until it finally reaches 0.
+ */
+ now += (tod_steering_delta < 0) ? (adj >> 15) : -(adj >> 15);
+ preempt_enable();
+ return now;
+}
+
+static struct clocksource clocksource_tod = {
+ .name = "tod",
+ .rating = 400,
+ .read = read_tod_clock,
+ .mask = -1ULL,
+ .mult = 1000,
+ .shift = 12,
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+struct clocksource * __init clocksource_default_clock(void)
+{
+ return &clocksource_tod;
+}
+
+void update_vsyscall(struct timekeeper *tk)
+{
+ u64 nsecps;
+
+ if (tk->tkr_mono.clock != &clocksource_tod)
+ return;
+
+ /* Make userspace gettimeofday spin until we're done. */
+ ++vdso_data->tb_update_count;
+ smp_wmb();
+ vdso_data->xtime_tod_stamp = tk->tkr_mono.cycle_last;
+ vdso_data->xtime_clock_sec = tk->xtime_sec;
+ vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
+ vdso_data->wtom_clock_sec =
+ tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
+ vdso_data->wtom_clock_nsec = tk->tkr_mono.xtime_nsec +
+ + ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift);
+ nsecps = (u64) NSEC_PER_SEC << tk->tkr_mono.shift;
+ while (vdso_data->wtom_clock_nsec >= nsecps) {
+ vdso_data->wtom_clock_nsec -= nsecps;
+ vdso_data->wtom_clock_sec++;
+ }
+
+ vdso_data->xtime_coarse_sec = tk->xtime_sec;
+ vdso_data->xtime_coarse_nsec =
+ (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
+ vdso_data->wtom_coarse_sec =
+ vdso_data->xtime_coarse_sec + tk->wall_to_monotonic.tv_sec;
+ vdso_data->wtom_coarse_nsec =
+ vdso_data->xtime_coarse_nsec + tk->wall_to_monotonic.tv_nsec;
+ while (vdso_data->wtom_coarse_nsec >= NSEC_PER_SEC) {
+ vdso_data->wtom_coarse_nsec -= NSEC_PER_SEC;
+ vdso_data->wtom_coarse_sec++;
+ }
+
+ vdso_data->tk_mult = tk->tkr_mono.mult;
+ vdso_data->tk_shift = tk->tkr_mono.shift;
+ vdso_data->hrtimer_res = hrtimer_resolution;
+ smp_wmb();
+ ++vdso_data->tb_update_count;
+}
+
+extern struct timezone sys_tz;
+
+void update_vsyscall_tz(void)
+{
+ vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
+ vdso_data->tz_dsttime = sys_tz.tz_dsttime;
+}
+
+/*
+ * Initialize the TOD clock and the CPU timer of
+ * the boot cpu.
+ */
+void __init time_init(void)
+{
+ /* Reset time synchronization interfaces. */
+ stp_reset();
+
+ /* request the clock comparator external interrupt */
+ if (register_external_irq(EXT_IRQ_CLK_COMP, clock_comparator_interrupt))
+ panic("Couldn't request external interrupt 0x1004");
+
+ /* request the timing alert external interrupt */
+ if (register_external_irq(EXT_IRQ_TIMING_ALERT, timing_alert_interrupt))
+ panic("Couldn't request external interrupt 0x1406");
+
+ if (__clocksource_register(&clocksource_tod) != 0)
+ panic("Could not register TOD clock source");
+
+ /* Enable TOD clock interrupts on the boot cpu. */
+ init_cpu_timer();
+
+ /* Enable cpu timer interrupts on the boot cpu. */
+ vtime_init();
+}
+
+static DEFINE_PER_CPU(atomic_t, clock_sync_word);
+static DEFINE_MUTEX(clock_sync_mutex);
+static unsigned long clock_sync_flags;
+
+#define CLOCK_SYNC_HAS_STP 0
+#define CLOCK_SYNC_STP 1
+#define CLOCK_SYNC_STPINFO_VALID 2
+
+/*
+ * The get_clock function for the physical clock. It will get the current
+ * TOD clock, subtract the LPAR offset and write the result to *clock.
+ * The function returns 0 if the clock is in sync with the external time
+ * source. If the clock mode is local it will return -EOPNOTSUPP and
+ * -EAGAIN if the clock is not in sync with the external reference.
+ */
+int get_phys_clock(unsigned long *clock)
+{
+ atomic_t *sw_ptr;
+ unsigned int sw0, sw1;
+
+ sw_ptr = &get_cpu_var(clock_sync_word);
+ sw0 = atomic_read(sw_ptr);
+ *clock = get_tod_clock() - lpar_offset;
+ sw1 = atomic_read(sw_ptr);
+ put_cpu_var(clock_sync_word);
+ if (sw0 == sw1 && (sw0 & 0x80000000U))
+ /* Success: time is in sync. */
+ return 0;
+ if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
+ return -EOPNOTSUPP;
+ if (!test_bit(CLOCK_SYNC_STP, &clock_sync_flags))
+ return -EACCES;
+ return -EAGAIN;
+}
+EXPORT_SYMBOL(get_phys_clock);
+
+/*
+ * Make get_phys_clock() return -EAGAIN.
+ */
+static void disable_sync_clock(void *dummy)
+{
+ atomic_t *sw_ptr = this_cpu_ptr(&clock_sync_word);
+ /*
+ * Clear the in-sync bit 2^31. All get_phys_clock calls will
+ * fail until the sync bit is turned back on. In addition
+ * increase the "sequence" counter to avoid the race of an
+ * stp event and the complete recovery against get_phys_clock.
+ */
+ atomic_andnot(0x80000000, sw_ptr);
+ atomic_inc(sw_ptr);
+}
+
+/*
+ * Make get_phys_clock() return 0 again.
+ * Needs to be called from a context disabled for preemption.
+ */
+static void enable_sync_clock(void)
+{
+ atomic_t *sw_ptr = this_cpu_ptr(&clock_sync_word);
+ atomic_or(0x80000000, sw_ptr);
+}
+
+/*
+ * Function to check if the clock is in sync.
+ */
+static inline int check_sync_clock(void)
+{
+ atomic_t *sw_ptr;
+ int rc;
+
+ sw_ptr = &get_cpu_var(clock_sync_word);
+ rc = (atomic_read(sw_ptr) & 0x80000000U) != 0;
+ put_cpu_var(clock_sync_word);
+ return rc;
+}
+
+/*
+ * Apply clock delta to the global data structures.
+ * This is called once on the CPU that performed the clock sync.
+ */
+static void clock_sync_global(unsigned long long delta)
+{
+ unsigned long now, adj;
+ struct ptff_qto qto;
+
+ /* Fixup the monotonic sched clock. */
+ *(unsigned long long *) &tod_clock_base[1] += delta;
+ if (*(unsigned long long *) &tod_clock_base[1] < delta)
+ /* Epoch overflow */
+ tod_clock_base[0]++;
+ /* Adjust TOD steering parameters. */
+ vdso_data->tb_update_count++;
+ now = get_tod_clock();
+ adj = tod_steering_end - now;
+ if (unlikely((s64) adj >= 0))
+ /* Calculate how much of the old adjustment is left. */
+ tod_steering_delta = (tod_steering_delta < 0) ?
+ -(adj >> 15) : (adj >> 15);
+ tod_steering_delta += delta;
+ if ((abs(tod_steering_delta) >> 48) != 0)
+ panic("TOD clock sync offset %lli is too large to drift\n",
+ tod_steering_delta);
+ tod_steering_end = now + (abs(tod_steering_delta) << 15);
+ vdso_data->ts_dir = (tod_steering_delta < 0) ? 0 : 1;
+ vdso_data->ts_end = tod_steering_end;
+ vdso_data->tb_update_count++;
+ /* Update LPAR offset. */
+ if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0)
+ lpar_offset = qto.tod_epoch_difference;
+ /* Call the TOD clock change notifier. */
+ atomic_notifier_call_chain(&s390_epoch_delta_notifier, 0, &delta);
+}
+
+/*
+ * Apply clock delta to the per-CPU data structures of this CPU.
+ * This is called for each online CPU after the call to clock_sync_global.
+ */
+static void clock_sync_local(unsigned long long delta)
+{
+ /* Add the delta to the clock comparator. */
+ if (S390_lowcore.clock_comparator != clock_comparator_max) {
+ S390_lowcore.clock_comparator += delta;
+ set_clock_comparator(S390_lowcore.clock_comparator);
+ }
+ /* Adjust the last_update_clock time-stamp. */
+ S390_lowcore.last_update_clock += delta;
+}
+
+/* Single threaded workqueue used for stp sync events */
+static struct workqueue_struct *time_sync_wq;
+
+static void __init time_init_wq(void)
+{
+ if (time_sync_wq)
+ return;
+ time_sync_wq = create_singlethread_workqueue("timesync");
+}
+
+struct clock_sync_data {
+ atomic_t cpus;
+ int in_sync;
+ unsigned long long clock_delta;
+};
+
+/*
+ * Server Time Protocol (STP) code.
+ */
+static bool stp_online;
+static struct stp_sstpi stp_info;
+static void *stp_page;
+
+static void stp_work_fn(struct work_struct *work);
+static DEFINE_MUTEX(stp_work_mutex);
+static DECLARE_WORK(stp_work, stp_work_fn);
+static struct timer_list stp_timer;
+
+static int __init early_parse_stp(char *p)
+{
+ return kstrtobool(p, &stp_online);
+}
+early_param("stp", early_parse_stp);
+
+/*
+ * Reset STP attachment.
+ */
+static void __init stp_reset(void)
+{
+ int rc;
+
+ stp_page = (void *) get_zeroed_page(GFP_ATOMIC);
+ rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000, NULL);
+ if (rc == 0)
+ set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags);
+ else if (stp_online) {
+ pr_warn("The real or virtual hardware system does not provide an STP interface\n");
+ free_page((unsigned long) stp_page);
+ stp_page = NULL;
+ stp_online = false;
+ }
+}
+
+static void stp_timeout(struct timer_list *unused)
+{
+ queue_work(time_sync_wq, &stp_work);
+}
+
+static int __init stp_init(void)
+{
+ if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
+ return 0;
+ timer_setup(&stp_timer, stp_timeout, 0);
+ time_init_wq();
+ if (!stp_online)
+ return 0;
+ queue_work(time_sync_wq, &stp_work);
+ return 0;
+}
+
+arch_initcall(stp_init);
+
+/*
+ * STP timing alert. There are three causes:
+ * 1) timing status change
+ * 2) link availability change
+ * 3) time control parameter change
+ * In all three cases we are only interested in the clock source state.
+ * If a STP clock source is now available use it.
+ */
+static void stp_timing_alert(struct stp_irq_parm *intparm)
+{
+ if (intparm->tsc || intparm->lac || intparm->tcpc)
+ queue_work(time_sync_wq, &stp_work);
+}
+
+/*
+ * STP sync check machine check. This is called when the timing state
+ * changes from the synchronized state to the unsynchronized state.
+ * After a STP sync check the clock is not in sync. The machine check
+ * is broadcasted to all cpus at the same time.
+ */
+int stp_sync_check(void)
+{
+ disable_sync_clock(NULL);
+ return 1;
+}
+
+/*
+ * STP island condition machine check. This is called when an attached
+ * server attempts to communicate over an STP link and the servers
+ * have matching CTN ids and have a valid stratum-1 configuration
+ * but the configurations do not match.
+ */
+int stp_island_check(void)
+{
+ disable_sync_clock(NULL);
+ return 1;
+}
+
+void stp_queue_work(void)
+{
+ queue_work(time_sync_wq, &stp_work);
+}
+
+static int __store_stpinfo(void)
+{
+ int rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi));
+
+ if (rc)
+ clear_bit(CLOCK_SYNC_STPINFO_VALID, &clock_sync_flags);
+ else
+ set_bit(CLOCK_SYNC_STPINFO_VALID, &clock_sync_flags);
+ return rc;
+}
+
+static int stpinfo_valid(void)
+{
+ return stp_online && test_bit(CLOCK_SYNC_STPINFO_VALID, &clock_sync_flags);
+}
+
+static int stp_sync_clock(void *data)
+{
+ struct clock_sync_data *sync = data;
+ unsigned long long clock_delta;
+ static int first;
+ int rc;
+
+ enable_sync_clock();
+ if (xchg(&first, 1) == 0) {
+ /* Wait until all other cpus entered the sync function. */
+ while (atomic_read(&sync->cpus) != 0)
+ cpu_relax();
+ rc = 0;
+ if (stp_info.todoff[0] || stp_info.todoff[1] ||
+ stp_info.todoff[2] || stp_info.todoff[3] ||
+ stp_info.tmd != 2) {
+ rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0,
+ &clock_delta);
+ if (rc == 0) {
+ sync->clock_delta = clock_delta;
+ clock_sync_global(clock_delta);
+ rc = __store_stpinfo();
+ if (rc == 0 && stp_info.tmd != 2)
+ rc = -EAGAIN;
+ }
+ }
+ sync->in_sync = rc ? -EAGAIN : 1;
+ xchg(&first, 0);
+ } else {
+ /* Slave */
+ atomic_dec(&sync->cpus);
+ /* Wait for in_sync to be set. */
+ while (READ_ONCE(sync->in_sync) == 0)
+ __udelay(1);
+ }
+ if (sync->in_sync != 1)
+ /* Didn't work. Clear per-cpu in sync bit again. */
+ disable_sync_clock(NULL);
+ /* Apply clock delta to per-CPU fields of this CPU. */
+ clock_sync_local(sync->clock_delta);
+
+ return 0;
+}
+
+/*
+ * STP work. Check for the STP state and take over the clock
+ * synchronization if the STP clock source is usable.
+ */
+static void stp_work_fn(struct work_struct *work)
+{
+ struct clock_sync_data stp_sync;
+ int rc;
+
+ /* prevent multiple execution. */
+ mutex_lock(&stp_work_mutex);
+
+ if (!stp_online) {
+ chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000, NULL);
+ del_timer_sync(&stp_timer);
+ goto out_unlock;
+ }
+
+ rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xb0e0, NULL);
+ if (rc)
+ goto out_unlock;
+
+ rc = __store_stpinfo();
+ if (rc || stp_info.c == 0)
+ goto out_unlock;
+
+ /* Skip synchronization if the clock is already in sync. */
+ if (check_sync_clock())
+ goto out_unlock;
+
+ memset(&stp_sync, 0, sizeof(stp_sync));
+ cpus_read_lock();
+ atomic_set(&stp_sync.cpus, num_online_cpus() - 1);
+ stop_machine_cpuslocked(stp_sync_clock, &stp_sync, cpu_online_mask);
+ cpus_read_unlock();
+
+ if (!check_sync_clock())
+ /*
+ * There is a usable clock but the synchonization failed.
+ * Retry after a second.
+ */
+ mod_timer(&stp_timer, jiffies + HZ);
+
+out_unlock:
+ mutex_unlock(&stp_work_mutex);
+}
+
+/*
+ * STP subsys sysfs interface functions
+ */
+static struct bus_type stp_subsys = {
+ .name = "stp",
+ .dev_name = "stp",
+};
+
+static ssize_t stp_ctn_id_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ ssize_t ret = -ENODATA;
+
+ mutex_lock(&stp_work_mutex);
+ if (stpinfo_valid())
+ ret = sprintf(buf, "%016llx\n",
+ *(unsigned long long *) stp_info.ctnid);
+ mutex_unlock(&stp_work_mutex);
+ return ret;
+}
+
+static DEVICE_ATTR(ctn_id, 0400, stp_ctn_id_show, NULL);
+
+static ssize_t stp_ctn_type_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ ssize_t ret = -ENODATA;
+
+ mutex_lock(&stp_work_mutex);
+ if (stpinfo_valid())
+ ret = sprintf(buf, "%i\n", stp_info.ctn);
+ mutex_unlock(&stp_work_mutex);
+ return ret;
+}
+
+static DEVICE_ATTR(ctn_type, 0400, stp_ctn_type_show, NULL);
+
+static ssize_t stp_dst_offset_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ ssize_t ret = -ENODATA;
+
+ mutex_lock(&stp_work_mutex);
+ if (stpinfo_valid() && (stp_info.vbits & 0x2000))
+ ret = sprintf(buf, "%i\n", (int)(s16) stp_info.dsto);
+ mutex_unlock(&stp_work_mutex);
+ return ret;
+}
+
+static DEVICE_ATTR(dst_offset, 0400, stp_dst_offset_show, NULL);
+
+static ssize_t stp_leap_seconds_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ ssize_t ret = -ENODATA;
+
+ mutex_lock(&stp_work_mutex);
+ if (stpinfo_valid() && (stp_info.vbits & 0x8000))
+ ret = sprintf(buf, "%i\n", (int)(s16) stp_info.leaps);
+ mutex_unlock(&stp_work_mutex);
+ return ret;
+}
+
+static DEVICE_ATTR(leap_seconds, 0400, stp_leap_seconds_show, NULL);
+
+static ssize_t stp_stratum_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ ssize_t ret = -ENODATA;
+
+ mutex_lock(&stp_work_mutex);
+ if (stpinfo_valid())
+ ret = sprintf(buf, "%i\n", (int)(s16) stp_info.stratum);
+ mutex_unlock(&stp_work_mutex);
+ return ret;
+}
+
+static DEVICE_ATTR(stratum, 0400, stp_stratum_show, NULL);
+
+static ssize_t stp_time_offset_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ ssize_t ret = -ENODATA;
+
+ mutex_lock(&stp_work_mutex);
+ if (stpinfo_valid() && (stp_info.vbits & 0x0800))
+ ret = sprintf(buf, "%i\n", (int) stp_info.tto);
+ mutex_unlock(&stp_work_mutex);
+ return ret;
+}
+
+static DEVICE_ATTR(time_offset, 0400, stp_time_offset_show, NULL);
+
+static ssize_t stp_time_zone_offset_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ ssize_t ret = -ENODATA;
+
+ mutex_lock(&stp_work_mutex);
+ if (stpinfo_valid() && (stp_info.vbits & 0x4000))
+ ret = sprintf(buf, "%i\n", (int)(s16) stp_info.tzo);
+ mutex_unlock(&stp_work_mutex);
+ return ret;
+}
+
+static DEVICE_ATTR(time_zone_offset, 0400,
+ stp_time_zone_offset_show, NULL);
+
+static ssize_t stp_timing_mode_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ ssize_t ret = -ENODATA;
+
+ mutex_lock(&stp_work_mutex);
+ if (stpinfo_valid())
+ ret = sprintf(buf, "%i\n", stp_info.tmd);
+ mutex_unlock(&stp_work_mutex);
+ return ret;
+}
+
+static DEVICE_ATTR(timing_mode, 0400, stp_timing_mode_show, NULL);
+
+static ssize_t stp_timing_state_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ ssize_t ret = -ENODATA;
+
+ mutex_lock(&stp_work_mutex);
+ if (stpinfo_valid())
+ ret = sprintf(buf, "%i\n", stp_info.tst);
+ mutex_unlock(&stp_work_mutex);
+ return ret;
+}
+
+static DEVICE_ATTR(timing_state, 0400, stp_timing_state_show, NULL);
+
+static ssize_t stp_online_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%i\n", stp_online);
+}
+
+static ssize_t stp_online_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned int value;
+
+ value = simple_strtoul(buf, NULL, 0);
+ if (value != 0 && value != 1)
+ return -EINVAL;
+ if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
+ return -EOPNOTSUPP;
+ mutex_lock(&clock_sync_mutex);
+ stp_online = value;
+ if (stp_online)
+ set_bit(CLOCK_SYNC_STP, &clock_sync_flags);
+ else
+ clear_bit(CLOCK_SYNC_STP, &clock_sync_flags);
+ queue_work(time_sync_wq, &stp_work);
+ mutex_unlock(&clock_sync_mutex);
+ return count;
+}
+
+/*
+ * Can't use DEVICE_ATTR because the attribute should be named
+ * stp/online but dev_attr_online already exists in this file ..
+ */
+static struct device_attribute dev_attr_stp_online = {
+ .attr = { .name = "online", .mode = 0600 },
+ .show = stp_online_show,
+ .store = stp_online_store,
+};
+
+static struct device_attribute *stp_attributes[] = {
+ &dev_attr_ctn_id,
+ &dev_attr_ctn_type,
+ &dev_attr_dst_offset,
+ &dev_attr_leap_seconds,
+ &dev_attr_stp_online,
+ &dev_attr_stratum,
+ &dev_attr_time_offset,
+ &dev_attr_time_zone_offset,
+ &dev_attr_timing_mode,
+ &dev_attr_timing_state,
+ NULL
+};
+
+static int __init stp_init_sysfs(void)
+{
+ struct device_attribute **attr;
+ int rc;
+
+ rc = subsys_system_register(&stp_subsys, NULL);
+ if (rc)
+ goto out;
+ for (attr = stp_attributes; *attr; attr++) {
+ rc = device_create_file(stp_subsys.dev_root, *attr);
+ if (rc)
+ goto out_unreg;
+ }
+ return 0;
+out_unreg:
+ for (; attr >= stp_attributes; attr--)
+ device_remove_file(stp_subsys.dev_root, *attr);
+ bus_unregister(&stp_subsys);
+out:
+ return rc;
+}
+
+device_initcall(stp_init_sysfs);
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
new file mode 100644
index 000000000..7b9688897
--- /dev/null
+++ b/arch/s390/kernel/topology.c
@@ -0,0 +1,641 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2007, 2011
+ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#define KMSG_COMPONENT "cpu"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/workqueue.h>
+#include <linux/bootmem.h>
+#include <linux/uaccess.h>
+#include <linux/sysctl.h>
+#include <linux/cpuset.h>
+#include <linux/device.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/sched/topology.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+#include <linux/mm.h>
+#include <linux/nodemask.h>
+#include <linux/node.h>
+#include <asm/sysinfo.h>
+#include <asm/numa.h>
+
+#define PTF_HORIZONTAL (0UL)
+#define PTF_VERTICAL (1UL)
+#define PTF_CHECK (2UL)
+
+enum {
+ TOPOLOGY_MODE_HW,
+ TOPOLOGY_MODE_SINGLE,
+ TOPOLOGY_MODE_PACKAGE,
+ TOPOLOGY_MODE_UNINITIALIZED
+};
+
+struct mask_info {
+ struct mask_info *next;
+ unsigned char id;
+ cpumask_t mask;
+};
+
+static int topology_mode = TOPOLOGY_MODE_UNINITIALIZED;
+static void set_topology_timer(void);
+static void topology_work_fn(struct work_struct *work);
+static struct sysinfo_15_1_x *tl_info;
+
+static DECLARE_WORK(topology_work, topology_work_fn);
+
+/*
+ * Socket/Book linked lists and cpu_topology updates are
+ * protected by "sched_domains_mutex".
+ */
+static struct mask_info socket_info;
+static struct mask_info book_info;
+static struct mask_info drawer_info;
+
+struct cpu_topology_s390 cpu_topology[NR_CPUS];
+EXPORT_SYMBOL_GPL(cpu_topology);
+
+cpumask_t cpus_with_topology;
+
+static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu)
+{
+ cpumask_t mask;
+
+ cpumask_copy(&mask, cpumask_of(cpu));
+ switch (topology_mode) {
+ case TOPOLOGY_MODE_HW:
+ while (info) {
+ if (cpumask_test_cpu(cpu, &info->mask)) {
+ mask = info->mask;
+ break;
+ }
+ info = info->next;
+ }
+ if (cpumask_empty(&mask))
+ cpumask_copy(&mask, cpumask_of(cpu));
+ break;
+ case TOPOLOGY_MODE_PACKAGE:
+ cpumask_copy(&mask, cpu_present_mask);
+ break;
+ default:
+ /* fallthrough */
+ case TOPOLOGY_MODE_SINGLE:
+ cpumask_copy(&mask, cpumask_of(cpu));
+ break;
+ }
+ return mask;
+}
+
+static cpumask_t cpu_thread_map(unsigned int cpu)
+{
+ cpumask_t mask;
+ int i;
+
+ cpumask_copy(&mask, cpumask_of(cpu));
+ if (topology_mode != TOPOLOGY_MODE_HW)
+ return mask;
+ cpu -= cpu % (smp_cpu_mtid + 1);
+ for (i = 0; i <= smp_cpu_mtid; i++)
+ if (cpu_present(cpu + i))
+ cpumask_set_cpu(cpu + i, &mask);
+ return mask;
+}
+
+#define TOPOLOGY_CORE_BITS 64
+
+static void add_cpus_to_mask(struct topology_core *tl_core,
+ struct mask_info *drawer,
+ struct mask_info *book,
+ struct mask_info *socket)
+{
+ struct cpu_topology_s390 *topo;
+ unsigned int core;
+
+ for_each_set_bit(core, &tl_core->mask, TOPOLOGY_CORE_BITS) {
+ unsigned int rcore;
+ int lcpu, i;
+
+ rcore = TOPOLOGY_CORE_BITS - 1 - core + tl_core->origin;
+ lcpu = smp_find_processor_id(rcore << smp_cpu_mt_shift);
+ if (lcpu < 0)
+ continue;
+ for (i = 0; i <= smp_cpu_mtid; i++) {
+ topo = &cpu_topology[lcpu + i];
+ topo->drawer_id = drawer->id;
+ topo->book_id = book->id;
+ topo->socket_id = socket->id;
+ topo->core_id = rcore;
+ topo->thread_id = lcpu + i;
+ topo->dedicated = tl_core->d;
+ cpumask_set_cpu(lcpu + i, &drawer->mask);
+ cpumask_set_cpu(lcpu + i, &book->mask);
+ cpumask_set_cpu(lcpu + i, &socket->mask);
+ cpumask_set_cpu(lcpu + i, &cpus_with_topology);
+ smp_cpu_set_polarization(lcpu + i, tl_core->pp);
+ }
+ }
+}
+
+static void clear_masks(void)
+{
+ struct mask_info *info;
+
+ info = &socket_info;
+ while (info) {
+ cpumask_clear(&info->mask);
+ info = info->next;
+ }
+ info = &book_info;
+ while (info) {
+ cpumask_clear(&info->mask);
+ info = info->next;
+ }
+ info = &drawer_info;
+ while (info) {
+ cpumask_clear(&info->mask);
+ info = info->next;
+ }
+}
+
+static union topology_entry *next_tle(union topology_entry *tle)
+{
+ if (!tle->nl)
+ return (union topology_entry *)((struct topology_core *)tle + 1);
+ return (union topology_entry *)((struct topology_container *)tle + 1);
+}
+
+static void tl_to_masks(struct sysinfo_15_1_x *info)
+{
+ struct mask_info *socket = &socket_info;
+ struct mask_info *book = &book_info;
+ struct mask_info *drawer = &drawer_info;
+ union topology_entry *tle, *end;
+
+ clear_masks();
+ tle = info->tle;
+ end = (union topology_entry *)((unsigned long)info + info->length);
+ while (tle < end) {
+ switch (tle->nl) {
+ case 3:
+ drawer = drawer->next;
+ drawer->id = tle->container.id;
+ break;
+ case 2:
+ book = book->next;
+ book->id = tle->container.id;
+ break;
+ case 1:
+ socket = socket->next;
+ socket->id = tle->container.id;
+ break;
+ case 0:
+ add_cpus_to_mask(&tle->cpu, drawer, book, socket);
+ break;
+ default:
+ clear_masks();
+ return;
+ }
+ tle = next_tle(tle);
+ }
+}
+
+static void topology_update_polarization_simple(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ smp_cpu_set_polarization(cpu, POLARIZATION_HRZ);
+}
+
+static int ptf(unsigned long fc)
+{
+ int rc;
+
+ asm volatile(
+ " .insn rre,0xb9a20000,%1,%1\n"
+ " ipm %0\n"
+ " srl %0,28\n"
+ : "=d" (rc)
+ : "d" (fc) : "cc");
+ return rc;
+}
+
+int topology_set_cpu_management(int fc)
+{
+ int cpu, rc;
+
+ if (!MACHINE_HAS_TOPOLOGY)
+ return -EOPNOTSUPP;
+ if (fc)
+ rc = ptf(PTF_VERTICAL);
+ else
+ rc = ptf(PTF_HORIZONTAL);
+ if (rc)
+ return -EBUSY;
+ for_each_possible_cpu(cpu)
+ smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+ return rc;
+}
+
+static void update_cpu_masks(void)
+{
+ struct cpu_topology_s390 *topo;
+ int cpu, id;
+
+ for_each_possible_cpu(cpu) {
+ topo = &cpu_topology[cpu];
+ topo->thread_mask = cpu_thread_map(cpu);
+ topo->core_mask = cpu_group_map(&socket_info, cpu);
+ topo->book_mask = cpu_group_map(&book_info, cpu);
+ topo->drawer_mask = cpu_group_map(&drawer_info, cpu);
+ if (topology_mode != TOPOLOGY_MODE_HW) {
+ id = topology_mode == TOPOLOGY_MODE_PACKAGE ? 0 : cpu;
+ topo->thread_id = cpu;
+ topo->core_id = cpu;
+ topo->socket_id = id;
+ topo->book_id = id;
+ topo->drawer_id = id;
+ if (cpu_present(cpu))
+ cpumask_set_cpu(cpu, &cpus_with_topology);
+ }
+ }
+ numa_update_cpu_topology();
+}
+
+void store_topology(struct sysinfo_15_1_x *info)
+{
+ stsi(info, 15, 1, topology_mnest_limit());
+}
+
+static void __arch_update_dedicated_flag(void *arg)
+{
+ if (topology_cpu_dedicated(smp_processor_id()))
+ set_cpu_flag(CIF_DEDICATED_CPU);
+ else
+ clear_cpu_flag(CIF_DEDICATED_CPU);
+}
+
+static int __arch_update_cpu_topology(void)
+{
+ struct sysinfo_15_1_x *info = tl_info;
+ int rc = 0;
+
+ mutex_lock(&smp_cpu_state_mutex);
+ cpumask_clear(&cpus_with_topology);
+ if (MACHINE_HAS_TOPOLOGY) {
+ rc = 1;
+ store_topology(info);
+ tl_to_masks(info);
+ }
+ update_cpu_masks();
+ if (!MACHINE_HAS_TOPOLOGY)
+ topology_update_polarization_simple();
+ mutex_unlock(&smp_cpu_state_mutex);
+ return rc;
+}
+
+int arch_update_cpu_topology(void)
+{
+ struct device *dev;
+ int cpu, rc;
+
+ rc = __arch_update_cpu_topology();
+ on_each_cpu(__arch_update_dedicated_flag, NULL, 0);
+ for_each_online_cpu(cpu) {
+ dev = get_cpu_device(cpu);
+ if (dev)
+ kobject_uevent(&dev->kobj, KOBJ_CHANGE);
+ }
+ return rc;
+}
+
+static void topology_work_fn(struct work_struct *work)
+{
+ rebuild_sched_domains();
+}
+
+void topology_schedule_update(void)
+{
+ schedule_work(&topology_work);
+}
+
+static void topology_flush_work(void)
+{
+ flush_work(&topology_work);
+}
+
+static void topology_timer_fn(struct timer_list *unused)
+{
+ if (ptf(PTF_CHECK))
+ topology_schedule_update();
+ set_topology_timer();
+}
+
+static struct timer_list topology_timer;
+
+static atomic_t topology_poll = ATOMIC_INIT(0);
+
+static void set_topology_timer(void)
+{
+ if (atomic_add_unless(&topology_poll, -1, 0))
+ mod_timer(&topology_timer, jiffies + HZ / 10);
+ else
+ mod_timer(&topology_timer, jiffies + HZ * 60);
+}
+
+void topology_expect_change(void)
+{
+ if (!MACHINE_HAS_TOPOLOGY)
+ return;
+ /* This is racy, but it doesn't matter since it is just a heuristic.
+ * Worst case is that we poll in a higher frequency for a bit longer.
+ */
+ if (atomic_read(&topology_poll) > 60)
+ return;
+ atomic_add(60, &topology_poll);
+ set_topology_timer();
+}
+
+static int cpu_management;
+
+static ssize_t dispatching_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ ssize_t count;
+
+ mutex_lock(&smp_cpu_state_mutex);
+ count = sprintf(buf, "%d\n", cpu_management);
+ mutex_unlock(&smp_cpu_state_mutex);
+ return count;
+}
+
+static ssize_t dispatching_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ int val, rc;
+ char delim;
+
+ if (sscanf(buf, "%d %c", &val, &delim) != 1)
+ return -EINVAL;
+ if (val != 0 && val != 1)
+ return -EINVAL;
+ rc = 0;
+ get_online_cpus();
+ mutex_lock(&smp_cpu_state_mutex);
+ if (cpu_management == val)
+ goto out;
+ rc = topology_set_cpu_management(val);
+ if (rc)
+ goto out;
+ cpu_management = val;
+ topology_expect_change();
+out:
+ mutex_unlock(&smp_cpu_state_mutex);
+ put_online_cpus();
+ return rc ? rc : count;
+}
+static DEVICE_ATTR_RW(dispatching);
+
+static ssize_t cpu_polarization_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ int cpu = dev->id;
+ ssize_t count;
+
+ mutex_lock(&smp_cpu_state_mutex);
+ switch (smp_cpu_get_polarization(cpu)) {
+ case POLARIZATION_HRZ:
+ count = sprintf(buf, "horizontal\n");
+ break;
+ case POLARIZATION_VL:
+ count = sprintf(buf, "vertical:low\n");
+ break;
+ case POLARIZATION_VM:
+ count = sprintf(buf, "vertical:medium\n");
+ break;
+ case POLARIZATION_VH:
+ count = sprintf(buf, "vertical:high\n");
+ break;
+ default:
+ count = sprintf(buf, "unknown\n");
+ break;
+ }
+ mutex_unlock(&smp_cpu_state_mutex);
+ return count;
+}
+static DEVICE_ATTR(polarization, 0444, cpu_polarization_show, NULL);
+
+static struct attribute *topology_cpu_attrs[] = {
+ &dev_attr_polarization.attr,
+ NULL,
+};
+
+static struct attribute_group topology_cpu_attr_group = {
+ .attrs = topology_cpu_attrs,
+};
+
+static ssize_t cpu_dedicated_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ int cpu = dev->id;
+ ssize_t count;
+
+ mutex_lock(&smp_cpu_state_mutex);
+ count = sprintf(buf, "%d\n", topology_cpu_dedicated(cpu));
+ mutex_unlock(&smp_cpu_state_mutex);
+ return count;
+}
+static DEVICE_ATTR(dedicated, 0444, cpu_dedicated_show, NULL);
+
+static struct attribute *topology_extra_cpu_attrs[] = {
+ &dev_attr_dedicated.attr,
+ NULL,
+};
+
+static struct attribute_group topology_extra_cpu_attr_group = {
+ .attrs = topology_extra_cpu_attrs,
+};
+
+int topology_cpu_init(struct cpu *cpu)
+{
+ int rc;
+
+ rc = sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group);
+ if (rc || !MACHINE_HAS_TOPOLOGY)
+ return rc;
+ rc = sysfs_create_group(&cpu->dev.kobj, &topology_extra_cpu_attr_group);
+ if (rc)
+ sysfs_remove_group(&cpu->dev.kobj, &topology_cpu_attr_group);
+ return rc;
+}
+
+static const struct cpumask *cpu_thread_mask(int cpu)
+{
+ return &cpu_topology[cpu].thread_mask;
+}
+
+
+const struct cpumask *cpu_coregroup_mask(int cpu)
+{
+ return &cpu_topology[cpu].core_mask;
+}
+
+static const struct cpumask *cpu_book_mask(int cpu)
+{
+ return &cpu_topology[cpu].book_mask;
+}
+
+static const struct cpumask *cpu_drawer_mask(int cpu)
+{
+ return &cpu_topology[cpu].drawer_mask;
+}
+
+static struct sched_domain_topology_level s390_topology[] = {
+ { cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
+ { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
+ { cpu_book_mask, SD_INIT_NAME(BOOK) },
+ { cpu_drawer_mask, SD_INIT_NAME(DRAWER) },
+ { cpu_cpu_mask, SD_INIT_NAME(DIE) },
+ { NULL, },
+};
+
+static void __init alloc_masks(struct sysinfo_15_1_x *info,
+ struct mask_info *mask, int offset)
+{
+ int i, nr_masks;
+
+ nr_masks = info->mag[TOPOLOGY_NR_MAG - offset];
+ for (i = 0; i < info->mnest - offset; i++)
+ nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i];
+ nr_masks = max(nr_masks, 1);
+ for (i = 0; i < nr_masks; i++) {
+ mask->next = memblock_virt_alloc(sizeof(*mask->next), 8);
+ mask = mask->next;
+ }
+}
+
+void __init topology_init_early(void)
+{
+ struct sysinfo_15_1_x *info;
+
+ set_sched_topology(s390_topology);
+ if (topology_mode == TOPOLOGY_MODE_UNINITIALIZED) {
+ if (MACHINE_HAS_TOPOLOGY)
+ topology_mode = TOPOLOGY_MODE_HW;
+ else
+ topology_mode = TOPOLOGY_MODE_SINGLE;
+ }
+ if (!MACHINE_HAS_TOPOLOGY)
+ goto out;
+ tl_info = memblock_virt_alloc(PAGE_SIZE, PAGE_SIZE);
+ info = tl_info;
+ store_topology(info);
+ pr_info("The CPU configuration topology of the machine is: %d %d %d %d %d %d / %d\n",
+ info->mag[0], info->mag[1], info->mag[2], info->mag[3],
+ info->mag[4], info->mag[5], info->mnest);
+ alloc_masks(info, &socket_info, 1);
+ alloc_masks(info, &book_info, 2);
+ alloc_masks(info, &drawer_info, 3);
+out:
+ __arch_update_cpu_topology();
+ __arch_update_dedicated_flag(NULL);
+}
+
+static inline int topology_get_mode(int enabled)
+{
+ if (!enabled)
+ return TOPOLOGY_MODE_SINGLE;
+ return MACHINE_HAS_TOPOLOGY ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE;
+}
+
+static inline int topology_is_enabled(void)
+{
+ return topology_mode != TOPOLOGY_MODE_SINGLE;
+}
+
+static int __init topology_setup(char *str)
+{
+ bool enabled;
+ int rc;
+
+ rc = kstrtobool(str, &enabled);
+ if (rc)
+ return rc;
+ topology_mode = topology_get_mode(enabled);
+ return 0;
+}
+early_param("topology", topology_setup);
+
+static int topology_ctl_handler(struct ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int enabled = topology_is_enabled();
+ int new_mode;
+ int zero = 0;
+ int one = 1;
+ int rc;
+ struct ctl_table ctl_entry = {
+ .procname = ctl->procname,
+ .data = &enabled,
+ .maxlen = sizeof(int),
+ .extra1 = &zero,
+ .extra2 = &one,
+ };
+
+ rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos);
+ if (rc < 0 || !write)
+ return rc;
+
+ mutex_lock(&smp_cpu_state_mutex);
+ new_mode = topology_get_mode(enabled);
+ if (topology_mode != new_mode) {
+ topology_mode = new_mode;
+ topology_schedule_update();
+ }
+ mutex_unlock(&smp_cpu_state_mutex);
+ topology_flush_work();
+
+ return rc;
+}
+
+static struct ctl_table topology_ctl_table[] = {
+ {
+ .procname = "topology",
+ .mode = 0644,
+ .proc_handler = topology_ctl_handler,
+ },
+ { },
+};
+
+static struct ctl_table topology_dir_table[] = {
+ {
+ .procname = "s390",
+ .maxlen = 0,
+ .mode = 0555,
+ .child = topology_ctl_table,
+ },
+ { },
+};
+
+static int __init topology_init(void)
+{
+ timer_setup(&topology_timer, topology_timer_fn, TIMER_DEFERRABLE);
+ if (MACHINE_HAS_TOPOLOGY)
+ set_topology_timer();
+ else
+ topology_update_polarization_simple();
+ register_sysctl_table(topology_dir_table);
+ return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching);
+}
+device_initcall(topology_init);
diff --git a/arch/s390/kernel/trace.c b/arch/s390/kernel/trace.c
new file mode 100644
index 000000000..11a669f3c
--- /dev/null
+++ b/arch/s390/kernel/trace.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Tracepoint definitions for s390
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/percpu.h>
+#define CREATE_TRACE_POINTS
+#include <asm/trace/diag.h>
+
+EXPORT_TRACEPOINT_SYMBOL(s390_diagnose);
+
+static DEFINE_PER_CPU(unsigned int, diagnose_trace_depth);
+
+void notrace trace_s390_diagnose_norecursion(int diag_nr)
+{
+ unsigned long flags;
+ unsigned int *depth;
+
+ /* Avoid lockdep recursion. */
+ if (IS_ENABLED(CONFIG_LOCKDEP))
+ return;
+ local_irq_save(flags);
+ depth = this_cpu_ptr(&diagnose_trace_depth);
+ if (*depth == 0) {
+ (*depth)++;
+ trace_s390_diagnose(diag_nr);
+ (*depth)--;
+ }
+ local_irq_restore(flags);
+}
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
new file mode 100644
index 000000000..8003b38c1
--- /dev/null
+++ b/arch/s390/kernel/traps.c
@@ -0,0 +1,267 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999, 2000
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
+ *
+ * Derived from "arch/i386/kernel/traps.c"
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+/*
+ * 'Traps.c' handles hardware traps and faults after we have saved some
+ * state in 'asm.s'.
+ */
+#include <linux/kprobes.h>
+#include <linux/kdebug.h>
+#include <linux/extable.h>
+#include <linux/ptrace.h>
+#include <linux/sched.h>
+#include <linux/sched/debug.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/cpu.h>
+#include <asm/fpu/api.h>
+#include "entry.h"
+
+static inline void __user *get_trap_ip(struct pt_regs *regs)
+{
+ unsigned long address;
+
+ if (regs->int_code & 0x200)
+ address = *(unsigned long *)(current->thread.trap_tdb + 24);
+ else
+ address = regs->psw.addr;
+ return (void __user *) (address - (regs->int_code >> 16));
+}
+
+int is_valid_bugaddr(unsigned long addr)
+{
+ return 1;
+}
+
+void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
+{
+ if (user_mode(regs)) {
+ force_sig_fault(si_signo, si_code, get_trap_ip(regs), current);
+ report_user_fault(regs, si_signo, 0);
+ } else {
+ const struct exception_table_entry *fixup;
+ fixup = search_exception_tables(regs->psw.addr);
+ if (fixup)
+ regs->psw.addr = extable_fixup(fixup);
+ else {
+ enum bug_trap_type btt;
+
+ btt = report_bug(regs->psw.addr, regs);
+ if (btt == BUG_TRAP_TYPE_WARN)
+ return;
+ die(regs, str);
+ }
+ }
+}
+
+static void do_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
+{
+ if (notify_die(DIE_TRAP, str, regs, 0,
+ regs->int_code, si_signo) == NOTIFY_STOP)
+ return;
+ do_report_trap(regs, si_signo, si_code, str);
+}
+NOKPROBE_SYMBOL(do_trap);
+
+void do_per_trap(struct pt_regs *regs)
+{
+ if (notify_die(DIE_SSTEP, "sstep", regs, 0, 0, SIGTRAP) == NOTIFY_STOP)
+ return;
+ if (!current->ptrace)
+ return;
+ force_sig_fault(SIGTRAP, TRAP_HWBKPT,
+ (void __force __user *) current->thread.per_event.address, current);
+}
+NOKPROBE_SYMBOL(do_per_trap);
+
+void default_trap_handler(struct pt_regs *regs)
+{
+ if (user_mode(regs)) {
+ report_user_fault(regs, SIGSEGV, 0);
+ do_exit(SIGSEGV);
+ } else
+ die(regs, "Unknown program exception");
+}
+
+#define DO_ERROR_INFO(name, signr, sicode, str) \
+void name(struct pt_regs *regs) \
+{ \
+ do_trap(regs, signr, sicode, str); \
+}
+
+DO_ERROR_INFO(addressing_exception, SIGILL, ILL_ILLADR,
+ "addressing exception")
+DO_ERROR_INFO(execute_exception, SIGILL, ILL_ILLOPN,
+ "execute exception")
+DO_ERROR_INFO(divide_exception, SIGFPE, FPE_INTDIV,
+ "fixpoint divide exception")
+DO_ERROR_INFO(overflow_exception, SIGFPE, FPE_INTOVF,
+ "fixpoint overflow exception")
+DO_ERROR_INFO(hfp_overflow_exception, SIGFPE, FPE_FLTOVF,
+ "HFP overflow exception")
+DO_ERROR_INFO(hfp_underflow_exception, SIGFPE, FPE_FLTUND,
+ "HFP underflow exception")
+DO_ERROR_INFO(hfp_significance_exception, SIGFPE, FPE_FLTRES,
+ "HFP significance exception")
+DO_ERROR_INFO(hfp_divide_exception, SIGFPE, FPE_FLTDIV,
+ "HFP divide exception")
+DO_ERROR_INFO(hfp_sqrt_exception, SIGFPE, FPE_FLTINV,
+ "HFP square root exception")
+DO_ERROR_INFO(operand_exception, SIGILL, ILL_ILLOPN,
+ "operand exception")
+DO_ERROR_INFO(privileged_op, SIGILL, ILL_PRVOPC,
+ "privileged operation")
+DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN,
+ "special operation exception")
+DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN,
+ "transaction constraint exception")
+
+static inline void do_fp_trap(struct pt_regs *regs, __u32 fpc)
+{
+ int si_code = 0;
+ /* FPC[2] is Data Exception Code */
+ if ((fpc & 0x00000300) == 0) {
+ /* bits 6 and 7 of DXC are 0 iff IEEE exception */
+ if (fpc & 0x8000) /* invalid fp operation */
+ si_code = FPE_FLTINV;
+ else if (fpc & 0x4000) /* div by 0 */
+ si_code = FPE_FLTDIV;
+ else if (fpc & 0x2000) /* overflow */
+ si_code = FPE_FLTOVF;
+ else if (fpc & 0x1000) /* underflow */
+ si_code = FPE_FLTUND;
+ else if (fpc & 0x0800) /* inexact */
+ si_code = FPE_FLTRES;
+ }
+ do_trap(regs, SIGFPE, si_code, "floating point exception");
+}
+
+void translation_exception(struct pt_regs *regs)
+{
+ /* May never happen. */
+ panic("Translation exception");
+}
+
+void illegal_op(struct pt_regs *regs)
+{
+ __u8 opcode[6];
+ __u16 __user *location;
+ int is_uprobe_insn = 0;
+ int signal = 0;
+
+ location = get_trap_ip(regs);
+
+ if (user_mode(regs)) {
+ if (get_user(*((__u16 *) opcode), (__u16 __user *) location))
+ return;
+ if (*((__u16 *) opcode) == S390_BREAKPOINT_U16) {
+ if (current->ptrace)
+ force_sig_fault(SIGTRAP, TRAP_BRKPT, location, current);
+ else
+ signal = SIGILL;
+#ifdef CONFIG_UPROBES
+ } else if (*((__u16 *) opcode) == UPROBE_SWBP_INSN) {
+ is_uprobe_insn = 1;
+#endif
+ } else
+ signal = SIGILL;
+ }
+ /*
+ * We got either an illegal op in kernel mode, or user space trapped
+ * on a uprobes illegal instruction. See if kprobes or uprobes picks
+ * it up. If not, SIGILL.
+ */
+ if (is_uprobe_insn || !user_mode(regs)) {
+ if (notify_die(DIE_BPT, "bpt", regs, 0,
+ 3, SIGTRAP) != NOTIFY_STOP)
+ signal = SIGILL;
+ }
+ if (signal)
+ do_trap(regs, signal, ILL_ILLOPC, "illegal operation");
+}
+NOKPROBE_SYMBOL(illegal_op);
+
+DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN,
+ "specification exception");
+
+void vector_exception(struct pt_regs *regs)
+{
+ int si_code, vic;
+
+ if (!MACHINE_HAS_VX) {
+ do_trap(regs, SIGILL, ILL_ILLOPN, "illegal operation");
+ return;
+ }
+
+ /* get vector interrupt code from fpc */
+ save_fpu_regs();
+ vic = (current->thread.fpu.fpc & 0xf00) >> 8;
+ switch (vic) {
+ case 1: /* invalid vector operation */
+ si_code = FPE_FLTINV;
+ break;
+ case 2: /* division by zero */
+ si_code = FPE_FLTDIV;
+ break;
+ case 3: /* overflow */
+ si_code = FPE_FLTOVF;
+ break;
+ case 4: /* underflow */
+ si_code = FPE_FLTUND;
+ break;
+ case 5: /* inexact */
+ si_code = FPE_FLTRES;
+ break;
+ default: /* unknown cause */
+ si_code = 0;
+ }
+ do_trap(regs, SIGFPE, si_code, "vector exception");
+}
+
+void data_exception(struct pt_regs *regs)
+{
+ int signal = 0;
+
+ save_fpu_regs();
+ if (current->thread.fpu.fpc & FPC_DXC_MASK)
+ signal = SIGFPE;
+ else
+ signal = SIGILL;
+ if (signal == SIGFPE)
+ do_fp_trap(regs, current->thread.fpu.fpc);
+ else if (signal)
+ do_trap(regs, signal, ILL_ILLOPN, "data exception");
+}
+
+void space_switch_exception(struct pt_regs *regs)
+{
+ /* Set user psw back to home space mode. */
+ if (user_mode(regs))
+ regs->psw.mask |= PSW_ASC_HOME;
+ /* Send SIGILL. */
+ do_trap(regs, SIGILL, ILL_PRVOPC, "space switch event");
+}
+
+void kernel_stack_overflow(struct pt_regs *regs)
+{
+ bust_spinlocks(1);
+ printk("Kernel stack overflow.\n");
+ show_regs(regs);
+ bust_spinlocks(0);
+ panic("Corrupt kernel stack, can't continue.");
+}
+NOKPROBE_SYMBOL(kernel_stack_overflow);
+
+void __init trap_init(void)
+{
+ local_mcck_enable();
+}
diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c
new file mode 100644
index 000000000..5007fac01
--- /dev/null
+++ b/arch/s390/kernel/uprobes.c
@@ -0,0 +1,397 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * User-space Probes (UProbes) for s390
+ *
+ * Copyright IBM Corp. 2014
+ * Author(s): Jan Willeke,
+ */
+
+#include <linux/uaccess.h>
+#include <linux/uprobes.h>
+#include <linux/compat.h>
+#include <linux/kdebug.h>
+#include <linux/sched/task_stack.h>
+
+#include <asm/switch_to.h>
+#include <asm/facility.h>
+#include <asm/kprobes.h>
+#include <asm/dis.h>
+#include "entry.h"
+
+#define UPROBE_TRAP_NR UINT_MAX
+
+int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
+ unsigned long addr)
+{
+ return probe_is_prohibited_opcode(auprobe->insn);
+}
+
+int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+ if (psw_bits(regs->psw).eaba == PSW_BITS_AMODE_24BIT)
+ return -EINVAL;
+ if (!is_compat_task() && psw_bits(regs->psw).eaba == PSW_BITS_AMODE_31BIT)
+ return -EINVAL;
+ clear_pt_regs_flag(regs, PIF_PER_TRAP);
+ auprobe->saved_per = psw_bits(regs->psw).per;
+ auprobe->saved_int_code = regs->int_code;
+ regs->int_code = UPROBE_TRAP_NR;
+ regs->psw.addr = current->utask->xol_vaddr;
+ set_tsk_thread_flag(current, TIF_UPROBE_SINGLESTEP);
+ update_cr_regs(current);
+ return 0;
+}
+
+bool arch_uprobe_xol_was_trapped(struct task_struct *tsk)
+{
+ struct pt_regs *regs = task_pt_regs(tsk);
+
+ if (regs->int_code != UPROBE_TRAP_NR)
+ return true;
+ return false;
+}
+
+static int check_per_event(unsigned short cause, unsigned long control,
+ struct pt_regs *regs)
+{
+ if (!(regs->psw.mask & PSW_MASK_PER))
+ return 0;
+ /* user space single step */
+ if (control == 0)
+ return 1;
+ /* over indication for storage alteration */
+ if ((control & 0x20200000) && (cause & 0x2000))
+ return 1;
+ if (cause & 0x8000) {
+ /* all branches */
+ if ((control & 0x80800000) == 0x80000000)
+ return 1;
+ /* branch into selected range */
+ if (((control & 0x80800000) == 0x80800000) &&
+ regs->psw.addr >= current->thread.per_user.start &&
+ regs->psw.addr <= current->thread.per_user.end)
+ return 1;
+ }
+ return 0;
+}
+
+int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+ int fixup = probe_get_fixup_type(auprobe->insn);
+ struct uprobe_task *utask = current->utask;
+
+ clear_tsk_thread_flag(current, TIF_UPROBE_SINGLESTEP);
+ update_cr_regs(current);
+ psw_bits(regs->psw).per = auprobe->saved_per;
+ regs->int_code = auprobe->saved_int_code;
+
+ if (fixup & FIXUP_PSW_NORMAL)
+ regs->psw.addr += utask->vaddr - utask->xol_vaddr;
+ if (fixup & FIXUP_RETURN_REGISTER) {
+ int reg = (auprobe->insn[0] & 0xf0) >> 4;
+
+ regs->gprs[reg] += utask->vaddr - utask->xol_vaddr;
+ }
+ if (fixup & FIXUP_BRANCH_NOT_TAKEN) {
+ int ilen = insn_length(auprobe->insn[0] >> 8);
+
+ if (regs->psw.addr - utask->xol_vaddr == ilen)
+ regs->psw.addr = utask->vaddr + ilen;
+ }
+ if (check_per_event(current->thread.per_event.cause,
+ current->thread.per_user.control, regs)) {
+ /* fix per address */
+ current->thread.per_event.address = utask->vaddr;
+ /* trigger per event */
+ set_pt_regs_flag(regs, PIF_PER_TRAP);
+ }
+ return 0;
+}
+
+int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val,
+ void *data)
+{
+ struct die_args *args = data;
+ struct pt_regs *regs = args->regs;
+
+ if (!user_mode(regs))
+ return NOTIFY_DONE;
+ if (regs->int_code & 0x200) /* Trap during transaction */
+ return NOTIFY_DONE;
+ switch (val) {
+ case DIE_BPT:
+ if (uprobe_pre_sstep_notifier(regs))
+ return NOTIFY_STOP;
+ break;
+ case DIE_SSTEP:
+ if (uprobe_post_sstep_notifier(regs))
+ return NOTIFY_STOP;
+ default:
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+ clear_thread_flag(TIF_UPROBE_SINGLESTEP);
+ regs->int_code = auprobe->saved_int_code;
+ regs->psw.addr = current->utask->vaddr;
+ current->thread.per_event.address = current->utask->vaddr;
+}
+
+unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline,
+ struct pt_regs *regs)
+{
+ unsigned long orig;
+
+ orig = regs->gprs[14];
+ regs->gprs[14] = trampoline;
+ return orig;
+}
+
+bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx,
+ struct pt_regs *regs)
+{
+ if (ctx == RP_CHECK_CHAIN_CALL)
+ return user_stack_pointer(regs) <= ret->stack;
+ else
+ return user_stack_pointer(regs) < ret->stack;
+}
+
+/* Instruction Emulation */
+
+static void adjust_psw_addr(psw_t *psw, unsigned long len)
+{
+ psw->addr = __rewind_psw(*psw, -len);
+}
+
+#define EMU_ILLEGAL_OP 1
+#define EMU_SPECIFICATION 2
+#define EMU_ADDRESSING 3
+
+#define emu_load_ril(ptr, output) \
+({ \
+ unsigned int mask = sizeof(*(ptr)) - 1; \
+ __typeof__(*(ptr)) input; \
+ int __rc = 0; \
+ \
+ if (!test_facility(34)) \
+ __rc = EMU_ILLEGAL_OP; \
+ else if ((u64 __force)ptr & mask) \
+ __rc = EMU_SPECIFICATION; \
+ else if (get_user(input, ptr)) \
+ __rc = EMU_ADDRESSING; \
+ else \
+ *(output) = input; \
+ __rc; \
+})
+
+#define emu_store_ril(regs, ptr, input) \
+({ \
+ unsigned int mask = sizeof(*(ptr)) - 1; \
+ __typeof__(ptr) __ptr = (ptr); \
+ int __rc = 0; \
+ \
+ if (!test_facility(34)) \
+ __rc = EMU_ILLEGAL_OP; \
+ else if ((u64 __force)__ptr & mask) \
+ __rc = EMU_SPECIFICATION; \
+ else if (put_user(*(input), __ptr)) \
+ __rc = EMU_ADDRESSING; \
+ if (__rc == 0) \
+ sim_stor_event(regs, \
+ (void __force *)__ptr, \
+ mask + 1); \
+ __rc; \
+})
+
+#define emu_cmp_ril(regs, ptr, cmp) \
+({ \
+ unsigned int mask = sizeof(*(ptr)) - 1; \
+ __typeof__(*(ptr)) input; \
+ int __rc = 0; \
+ \
+ if (!test_facility(34)) \
+ __rc = EMU_ILLEGAL_OP; \
+ else if ((u64 __force)ptr & mask) \
+ __rc = EMU_SPECIFICATION; \
+ else if (get_user(input, ptr)) \
+ __rc = EMU_ADDRESSING; \
+ else if (input > *(cmp)) \
+ psw_bits((regs)->psw).cc = 1; \
+ else if (input < *(cmp)) \
+ psw_bits((regs)->psw).cc = 2; \
+ else \
+ psw_bits((regs)->psw).cc = 0; \
+ __rc; \
+})
+
+struct insn_ril {
+ u8 opc0;
+ u8 reg : 4;
+ u8 opc1 : 4;
+ s32 disp;
+} __packed;
+
+union split_register {
+ u64 u64;
+ u32 u32[2];
+ u16 u16[4];
+ s64 s64;
+ s32 s32[2];
+ s16 s16[4];
+};
+
+/*
+ * If user per registers are setup to trace storage alterations and an
+ * emulated store took place on a fitting address a user trap is generated.
+ */
+static void sim_stor_event(struct pt_regs *regs, void *addr, int len)
+{
+ if (!(regs->psw.mask & PSW_MASK_PER))
+ return;
+ if (!(current->thread.per_user.control & PER_EVENT_STORE))
+ return;
+ if ((void *)current->thread.per_user.start > (addr + len))
+ return;
+ if ((void *)current->thread.per_user.end < addr)
+ return;
+ current->thread.per_event.address = regs->psw.addr;
+ current->thread.per_event.cause = PER_EVENT_STORE >> 16;
+ set_pt_regs_flag(regs, PIF_PER_TRAP);
+}
+
+/*
+ * pc relative instructions are emulated, since parameters may not be
+ * accessible from the xol area due to range limitations.
+ */
+static void handle_insn_ril(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+ union split_register *rx;
+ struct insn_ril *insn;
+ unsigned int ilen;
+ void *uptr;
+ int rc = 0;
+
+ insn = (struct insn_ril *) &auprobe->insn;
+ rx = (union split_register *) &regs->gprs[insn->reg];
+ uptr = (void *)(regs->psw.addr + (insn->disp * 2));
+ ilen = insn_length(insn->opc0);
+
+ switch (insn->opc0) {
+ case 0xc0:
+ switch (insn->opc1) {
+ case 0x00: /* larl */
+ rx->u64 = (unsigned long)uptr;
+ break;
+ }
+ break;
+ case 0xc4:
+ switch (insn->opc1) {
+ case 0x02: /* llhrl */
+ rc = emu_load_ril((u16 __user *)uptr, &rx->u32[1]);
+ break;
+ case 0x04: /* lghrl */
+ rc = emu_load_ril((s16 __user *)uptr, &rx->u64);
+ break;
+ case 0x05: /* lhrl */
+ rc = emu_load_ril((s16 __user *)uptr, &rx->u32[1]);
+ break;
+ case 0x06: /* llghrl */
+ rc = emu_load_ril((u16 __user *)uptr, &rx->u64);
+ break;
+ case 0x08: /* lgrl */
+ rc = emu_load_ril((u64 __user *)uptr, &rx->u64);
+ break;
+ case 0x0c: /* lgfrl */
+ rc = emu_load_ril((s32 __user *)uptr, &rx->u64);
+ break;
+ case 0x0d: /* lrl */
+ rc = emu_load_ril((u32 __user *)uptr, &rx->u32[1]);
+ break;
+ case 0x0e: /* llgfrl */
+ rc = emu_load_ril((u32 __user *)uptr, &rx->u64);
+ break;
+ case 0x07: /* sthrl */
+ rc = emu_store_ril(regs, (u16 __user *)uptr, &rx->u16[3]);
+ break;
+ case 0x0b: /* stgrl */
+ rc = emu_store_ril(regs, (u64 __user *)uptr, &rx->u64);
+ break;
+ case 0x0f: /* strl */
+ rc = emu_store_ril(regs, (u32 __user *)uptr, &rx->u32[1]);
+ break;
+ }
+ break;
+ case 0xc6:
+ switch (insn->opc1) {
+ case 0x02: /* pfdrl */
+ if (!test_facility(34))
+ rc = EMU_ILLEGAL_OP;
+ break;
+ case 0x04: /* cghrl */
+ rc = emu_cmp_ril(regs, (s16 __user *)uptr, &rx->s64);
+ break;
+ case 0x05: /* chrl */
+ rc = emu_cmp_ril(regs, (s16 __user *)uptr, &rx->s32[1]);
+ break;
+ case 0x06: /* clghrl */
+ rc = emu_cmp_ril(regs, (u16 __user *)uptr, &rx->u64);
+ break;
+ case 0x07: /* clhrl */
+ rc = emu_cmp_ril(regs, (u16 __user *)uptr, &rx->u32[1]);
+ break;
+ case 0x08: /* cgrl */
+ rc = emu_cmp_ril(regs, (s64 __user *)uptr, &rx->s64);
+ break;
+ case 0x0a: /* clgrl */
+ rc = emu_cmp_ril(regs, (u64 __user *)uptr, &rx->u64);
+ break;
+ case 0x0c: /* cgfrl */
+ rc = emu_cmp_ril(regs, (s32 __user *)uptr, &rx->s64);
+ break;
+ case 0x0d: /* crl */
+ rc = emu_cmp_ril(regs, (s32 __user *)uptr, &rx->s32[1]);
+ break;
+ case 0x0e: /* clgfrl */
+ rc = emu_cmp_ril(regs, (u32 __user *)uptr, &rx->u64);
+ break;
+ case 0x0f: /* clrl */
+ rc = emu_cmp_ril(regs, (u32 __user *)uptr, &rx->u32[1]);
+ break;
+ }
+ break;
+ }
+ adjust_psw_addr(&regs->psw, ilen);
+ switch (rc) {
+ case EMU_ILLEGAL_OP:
+ regs->int_code = ilen << 16 | 0x0001;
+ do_report_trap(regs, SIGILL, ILL_ILLOPC, NULL);
+ break;
+ case EMU_SPECIFICATION:
+ regs->int_code = ilen << 16 | 0x0006;
+ do_report_trap(regs, SIGILL, ILL_ILLOPC , NULL);
+ break;
+ case EMU_ADDRESSING:
+ regs->int_code = ilen << 16 | 0x0005;
+ do_report_trap(regs, SIGSEGV, SEGV_MAPERR, NULL);
+ break;
+ }
+}
+
+bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+ if ((psw_bits(regs->psw).eaba == PSW_BITS_AMODE_24BIT) ||
+ ((psw_bits(regs->psw).eaba == PSW_BITS_AMODE_31BIT) &&
+ !is_compat_task())) {
+ regs->psw.addr = __rewind_psw(regs->psw, UPROBE_SWBP_INSN_SIZE);
+ do_report_trap(regs, SIGILL, ILL_ILLADR, NULL);
+ return true;
+ }
+ if (probe_is_insn_relative_long(auprobe->insn)) {
+ handle_insn_ril(auprobe, regs);
+ return true;
+ }
+ return false;
+}
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
new file mode 100644
index 000000000..7ab7d256d
--- /dev/null
+++ b/arch/s390/kernel/vdso.c
@@ -0,0 +1,325 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * vdso setup for s390
+ *
+ * Copyright IBM Corp. 2008
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/user.h>
+#include <linux/elf.h>
+#include <linux/security.h>
+#include <linux/bootmem.h>
+#include <linux/compat.h>
+#include <asm/asm-offsets.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+#include <asm/sections.h>
+#include <asm/vdso.h>
+#include <asm/facility.h>
+
+#ifdef CONFIG_COMPAT
+extern char vdso32_start, vdso32_end;
+static void *vdso32_kbase = &vdso32_start;
+static unsigned int vdso32_pages;
+static struct page **vdso32_pagelist;
+#endif
+
+extern char vdso64_start, vdso64_end;
+static void *vdso64_kbase = &vdso64_start;
+static unsigned int vdso64_pages;
+static struct page **vdso64_pagelist;
+
+/*
+ * Should the kernel map a VDSO page into processes and pass its
+ * address down to glibc upon exec()?
+ */
+unsigned int __read_mostly vdso_enabled = 1;
+
+static vm_fault_t vdso_fault(const struct vm_special_mapping *sm,
+ struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct page **vdso_pagelist;
+ unsigned long vdso_pages;
+
+ vdso_pagelist = vdso64_pagelist;
+ vdso_pages = vdso64_pages;
+#ifdef CONFIG_COMPAT
+ if (vma->vm_mm->context.compat_mm) {
+ vdso_pagelist = vdso32_pagelist;
+ vdso_pages = vdso32_pages;
+ }
+#endif
+
+ if (vmf->pgoff >= vdso_pages)
+ return VM_FAULT_SIGBUS;
+
+ vmf->page = vdso_pagelist[vmf->pgoff];
+ get_page(vmf->page);
+ return 0;
+}
+
+static int vdso_mremap(const struct vm_special_mapping *sm,
+ struct vm_area_struct *vma)
+{
+ unsigned long vdso_pages;
+
+ vdso_pages = vdso64_pages;
+#ifdef CONFIG_COMPAT
+ if (vma->vm_mm->context.compat_mm)
+ vdso_pages = vdso32_pages;
+#endif
+
+ if ((vdso_pages << PAGE_SHIFT) != vma->vm_end - vma->vm_start)
+ return -EINVAL;
+
+ if (WARN_ON_ONCE(current->mm != vma->vm_mm))
+ return -EFAULT;
+
+ current->mm->context.vdso_base = vma->vm_start;
+ return 0;
+}
+
+static const struct vm_special_mapping vdso_mapping = {
+ .name = "[vdso]",
+ .fault = vdso_fault,
+ .mremap = vdso_mremap,
+};
+
+static int __init vdso_setup(char *s)
+{
+ unsigned long val;
+ int rc;
+
+ rc = 0;
+ if (strncmp(s, "on", 3) == 0)
+ vdso_enabled = 1;
+ else if (strncmp(s, "off", 4) == 0)
+ vdso_enabled = 0;
+ else {
+ rc = kstrtoul(s, 0, &val);
+ vdso_enabled = rc ? 0 : !!val;
+ }
+ return !rc;
+}
+__setup("vdso=", vdso_setup);
+
+/*
+ * The vdso data page
+ */
+static union {
+ struct vdso_data data;
+ u8 page[PAGE_SIZE];
+} vdso_data_store __page_aligned_data;
+struct vdso_data *vdso_data = &vdso_data_store.data;
+
+/*
+ * Setup vdso data page.
+ */
+static void __init vdso_init_data(struct vdso_data *vd)
+{
+ vd->ectg_available = test_facility(31);
+}
+
+/*
+ * Allocate/free per cpu vdso data.
+ */
+#define SEGMENT_ORDER 2
+
+/*
+ * The initial vdso_data structure for the boot CPU. Eventually
+ * it is replaced with a properly allocated structure in vdso_init.
+ * This is necessary because a valid S390_lowcore.vdso_per_cpu_data
+ * pointer is required to be able to return from an interrupt or
+ * program check. See the exit paths in entry.S.
+ */
+struct vdso_data boot_vdso_data __initdata;
+
+void __init vdso_alloc_boot_cpu(struct lowcore *lowcore)
+{
+ lowcore->vdso_per_cpu_data = (unsigned long) &boot_vdso_data;
+}
+
+int vdso_alloc_per_cpu(struct lowcore *lowcore)
+{
+ unsigned long segment_table, page_table, page_frame;
+ struct vdso_per_cpu_data *vd;
+
+ segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER);
+ page_table = get_zeroed_page(GFP_KERNEL);
+ page_frame = get_zeroed_page(GFP_KERNEL);
+ if (!segment_table || !page_table || !page_frame)
+ goto out;
+ arch_set_page_dat(virt_to_page(segment_table), SEGMENT_ORDER);
+ arch_set_page_dat(virt_to_page(page_table), 0);
+
+ /* Initialize per-cpu vdso data page */
+ vd = (struct vdso_per_cpu_data *) page_frame;
+ vd->cpu_nr = lowcore->cpu_nr;
+ vd->node_id = cpu_to_node(vd->cpu_nr);
+
+ /* Set up page table for the vdso address space */
+ memset64((u64 *)segment_table, _SEGMENT_ENTRY_EMPTY, _CRST_ENTRIES);
+ memset64((u64 *)page_table, _PAGE_INVALID, PTRS_PER_PTE);
+
+ *(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table;
+ *(unsigned long *) page_table = _PAGE_PROTECT + page_frame;
+
+ lowcore->vdso_asce = segment_table +
+ _ASCE_TABLE_LENGTH + _ASCE_USER_BITS + _ASCE_TYPE_SEGMENT;
+ lowcore->vdso_per_cpu_data = page_frame;
+
+ return 0;
+
+out:
+ free_page(page_frame);
+ free_page(page_table);
+ free_pages(segment_table, SEGMENT_ORDER);
+ return -ENOMEM;
+}
+
+void vdso_free_per_cpu(struct lowcore *lowcore)
+{
+ unsigned long segment_table, page_table, page_frame;
+
+ segment_table = lowcore->vdso_asce & PAGE_MASK;
+ page_table = *(unsigned long *) segment_table;
+ page_frame = *(unsigned long *) page_table;
+
+ free_page(page_frame);
+ free_page(page_table);
+ free_pages(segment_table, SEGMENT_ORDER);
+}
+
+/*
+ * This is called from binfmt_elf, we create the special vma for the
+ * vDSO and insert it into the mm struct tree
+ */
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+{
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ unsigned long vdso_pages;
+ unsigned long vdso_base;
+ int rc;
+
+ if (!vdso_enabled)
+ return 0;
+ /*
+ * Only map the vdso for dynamically linked elf binaries.
+ */
+ if (!uses_interp)
+ return 0;
+
+ vdso_pages = vdso64_pages;
+#ifdef CONFIG_COMPAT
+ mm->context.compat_mm = is_compat_task();
+ if (mm->context.compat_mm)
+ vdso_pages = vdso32_pages;
+#endif
+ /*
+ * vDSO has a problem and was disabled, just don't "enable" it for
+ * the process
+ */
+ if (vdso_pages == 0)
+ return 0;
+
+ /*
+ * pick a base address for the vDSO in process space. We try to put
+ * it at vdso_base which is the "natural" base for it, but we might
+ * fail and end up putting it elsewhere.
+ */
+ if (down_write_killable(&mm->mmap_sem))
+ return -EINTR;
+ vdso_base = get_unmapped_area(NULL, 0, vdso_pages << PAGE_SHIFT, 0, 0);
+ if (IS_ERR_VALUE(vdso_base)) {
+ rc = vdso_base;
+ goto out_up;
+ }
+
+ /*
+ * our vma flags don't have VM_WRITE so by default, the process
+ * isn't allowed to write those pages.
+ * gdb can break that with ptrace interface, and thus trigger COW
+ * on those pages but it's then your responsibility to never do that
+ * on the "data" page of the vDSO or you'll stop getting kernel
+ * updates and your nice userland gettimeofday will be totally dead.
+ * It's fine to use that for setting breakpoints in the vDSO code
+ * pages though.
+ */
+ vma = _install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT,
+ VM_READ|VM_EXEC|
+ VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
+ &vdso_mapping);
+ if (IS_ERR(vma)) {
+ rc = PTR_ERR(vma);
+ goto out_up;
+ }
+
+ current->mm->context.vdso_base = vdso_base;
+ rc = 0;
+
+out_up:
+ up_write(&mm->mmap_sem);
+ return rc;
+}
+
+static int __init vdso_init(void)
+{
+ int i;
+
+ vdso_init_data(vdso_data);
+#ifdef CONFIG_COMPAT
+ /* Calculate the size of the 32 bit vDSO */
+ vdso32_pages = ((&vdso32_end - &vdso32_start
+ + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1;
+
+ /* Make sure pages are in the correct state */
+ vdso32_pagelist = kcalloc(vdso32_pages + 1, sizeof(struct page *),
+ GFP_KERNEL);
+ BUG_ON(vdso32_pagelist == NULL);
+ for (i = 0; i < vdso32_pages - 1; i++) {
+ struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE);
+ ClearPageReserved(pg);
+ get_page(pg);
+ vdso32_pagelist[i] = pg;
+ }
+ vdso32_pagelist[vdso32_pages - 1] = virt_to_page(vdso_data);
+ vdso32_pagelist[vdso32_pages] = NULL;
+#endif
+
+ /* Calculate the size of the 64 bit vDSO */
+ vdso64_pages = ((&vdso64_end - &vdso64_start
+ + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1;
+
+ /* Make sure pages are in the correct state */
+ vdso64_pagelist = kcalloc(vdso64_pages + 1, sizeof(struct page *),
+ GFP_KERNEL);
+ BUG_ON(vdso64_pagelist == NULL);
+ for (i = 0; i < vdso64_pages - 1; i++) {
+ struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE);
+ ClearPageReserved(pg);
+ get_page(pg);
+ vdso64_pagelist[i] = pg;
+ }
+ vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data);
+ vdso64_pagelist[vdso64_pages] = NULL;
+ if (vdso_alloc_per_cpu(&S390_lowcore))
+ BUG();
+
+ get_page(virt_to_page(vdso_data));
+
+ return 0;
+}
+early_initcall(vdso_init);
diff --git a/arch/s390/kernel/vdso32/.gitignore b/arch/s390/kernel/vdso32/.gitignore
new file mode 100644
index 000000000..e45fba9d0
--- /dev/null
+++ b/arch/s390/kernel/vdso32/.gitignore
@@ -0,0 +1 @@
+vdso32.lds
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
new file mode 100644
index 000000000..e76309fbb
--- /dev/null
+++ b/arch/s390/kernel/vdso32/Makefile
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: GPL-2.0
+# List of files in the vdso, has to be asm only for now
+
+KCOV_INSTRUMENT := n
+
+obj-vdso32 = gettimeofday.o clock_getres.o clock_gettime.o note.o getcpu.o
+
+# Build rules
+
+targets := $(obj-vdso32) vdso32.so vdso32.so.dbg
+obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
+
+KBUILD_AFLAGS += -DBUILD_VDSO
+KBUILD_CFLAGS += -DBUILD_VDSO
+
+KBUILD_AFLAGS_31 := $(filter-out -m64,$(KBUILD_AFLAGS))
+KBUILD_AFLAGS_31 += -m31 -s
+
+KBUILD_CFLAGS_31 := $(filter-out -m64,$(KBUILD_CFLAGS))
+KBUILD_CFLAGS_31 += -m31 -fPIC -shared -fno-common -fno-builtin
+KBUILD_CFLAGS_31 += -nostdlib -Wl,-soname=linux-vdso32.so.1 \
+ $(call cc-ldoption, -Wl$(comma)--hash-style=both)
+
+$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_31)
+$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_31)
+
+obj-y += vdso32_wrapper.o
+extra-y += vdso32.lds
+CPPFLAGS_vdso32.lds += -P -C -U$(ARCH)
+
+# Disable gcov profiling, ubsan and kasan for VDSO code
+GCOV_PROFILE := n
+UBSAN_SANITIZE := n
+KASAN_SANITIZE := n
+
+# Force dependency (incbin is bad)
+$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
+
+# link rule for the .so file, .lds has to be first
+$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE
+ $(call if_changed,vdso32ld)
+
+# strip rule for the .so file
+$(obj)/%.so: OBJCOPYFLAGS := -S
+$(obj)/%.so: $(obj)/%.so.dbg FORCE
+ $(call if_changed,objcopy)
+
+# assembly rules for the .S files
+$(obj-vdso32): %.o: %.S FORCE
+ $(call if_changed_dep,vdso32as)
+
+# actual build commands
+quiet_cmd_vdso32ld = VDSO32L $@
+ cmd_vdso32ld = $(CC) $(c_flags) -Wl,-T $(filter %.lds %.o,$^) -o $@
+quiet_cmd_vdso32as = VDSO32A $@
+ cmd_vdso32as = $(CC) $(a_flags) -c -o $@ $<
+
+# install commands for the unstripped file
+quiet_cmd_vdso_install = INSTALL $@
+ cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
+
+vdso32.so: $(obj)/vdso32.so.dbg
+ @mkdir -p $(MODLIB)/vdso
+ $(call cmd,vdso_install)
+
+vdso_install: vdso32.so
diff --git a/arch/s390/kernel/vdso32/clock_getres.S b/arch/s390/kernel/vdso32/clock_getres.S
new file mode 100644
index 000000000..eaf9cf141
--- /dev/null
+++ b/arch/s390/kernel/vdso32/clock_getres.S
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Userland implementation of clock_getres() for 32 bits processes in a
+ * s390 kernel for use in the vDSO
+ *
+ * Copyright IBM Corp. 2008
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+#include <asm/dwarf.h>
+
+ .text
+ .align 4
+ .globl __kernel_clock_getres
+ .type __kernel_clock_getres,@function
+__kernel_clock_getres:
+ CFI_STARTPROC
+ basr %r1,0
+ la %r1,4f-.(%r1)
+ chi %r2,__CLOCK_REALTIME
+ je 0f
+ chi %r2,__CLOCK_MONOTONIC
+ je 0f
+ la %r1,5f-4f(%r1)
+ chi %r2,__CLOCK_REALTIME_COARSE
+ je 0f
+ chi %r2,__CLOCK_MONOTONIC_COARSE
+ jne 3f
+0: ltr %r3,%r3
+ jz 2f /* res == NULL */
+1: l %r0,0(%r1)
+ xc 0(4,%r3),0(%r3) /* set tp->tv_sec to zero */
+ st %r0,4(%r3) /* store tp->tv_usec */
+2: lhi %r2,0
+ br %r14
+3: lhi %r1,__NR_clock_getres /* fallback to svc */
+ svc 0
+ br %r14
+ CFI_ENDPROC
+4: .long __CLOCK_REALTIME_RES
+5: .long __CLOCK_COARSE_RES
+ .size __kernel_clock_getres,.-__kernel_clock_getres
diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S
new file mode 100644
index 000000000..ada5c11a1
--- /dev/null
+++ b/arch/s390/kernel/vdso32/clock_gettime.S
@@ -0,0 +1,179 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Userland implementation of clock_gettime() for 32 bits processes in a
+ * s390 kernel for use in the vDSO
+ *
+ * Copyright IBM Corp. 2008
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+#include <asm/dwarf.h>
+#include <asm/ptrace.h>
+
+ .text
+ .align 4
+ .globl __kernel_clock_gettime
+ .type __kernel_clock_gettime,@function
+__kernel_clock_gettime:
+ CFI_STARTPROC
+ ahi %r15,-16
+ CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
+ CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
+ basr %r5,0
+0: al %r5,21f-0b(%r5) /* get &_vdso_data */
+ chi %r2,__CLOCK_REALTIME_COARSE
+ je 10f
+ chi %r2,__CLOCK_REALTIME
+ je 11f
+ chi %r2,__CLOCK_MONOTONIC_COARSE
+ je 9f
+ chi %r2,__CLOCK_MONOTONIC
+ jne 19f
+
+ /* CLOCK_MONOTONIC */
+1: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */
+ tml %r4,0x0001 /* pending update ? loop */
+ jnz 1b
+ stcke 0(%r15) /* Store TOD clock */
+ lm %r0,%r1,1(%r15)
+ s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
+ sl %r1,__VDSO_XTIME_STAMP+4(%r5)
+ brc 3,2f
+ ahi %r0,-1
+2: ms %r0,__VDSO_TK_MULT(%r5) /* * tk->mult */
+ lr %r2,%r0
+ l %r0,__VDSO_TK_MULT(%r5)
+ ltr %r1,%r1
+ mr %r0,%r0
+ jnm 3f
+ a %r0,__VDSO_TK_MULT(%r5)
+3: alr %r0,%r2
+ al %r0,__VDSO_WTOM_NSEC(%r5)
+ al %r1,__VDSO_WTOM_NSEC+4(%r5)
+ brc 12,5f
+ ahi %r0,1
+5: l %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
+ srdl %r0,0(%r2) /* >> tk->shift */
+ l %r2,__VDSO_WTOM_SEC+4(%r5)
+ cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */
+ jne 1b
+ basr %r5,0
+6: ltr %r0,%r0
+ jnz 7f
+ cl %r1,20f-6b(%r5)
+ jl 8f
+7: ahi %r2,1
+ sl %r1,20f-6b(%r5)
+ brc 3,6b
+ ahi %r0,-1
+ j 6b
+8: st %r2,0(%r3) /* store tp->tv_sec */
+ st %r1,4(%r3) /* store tp->tv_nsec */
+ lhi %r2,0
+ ahi %r15,16
+ CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
+ CFI_RESTORE 15
+ br %r14
+
+ /* CLOCK_MONOTONIC_COARSE */
+ CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
+ CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
+9: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */
+ tml %r4,0x0001 /* pending update ? loop */
+ jnz 9b
+ l %r2,__VDSO_WTOM_CRS_SEC+4(%r5)
+ l %r1,__VDSO_WTOM_CRS_NSEC+4(%r5)
+ cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */
+ jne 9b
+ j 8b
+
+ /* CLOCK_REALTIME_COARSE */
+10: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */
+ tml %r4,0x0001 /* pending update ? loop */
+ jnz 10b
+ l %r2,__VDSO_XTIME_CRS_SEC+4(%r5)
+ l %r1,__VDSO_XTIME_CRS_NSEC+4(%r5)
+ cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */
+ jne 10b
+ j 17f
+
+ /* CLOCK_REALTIME */
+11: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */
+ tml %r4,0x0001 /* pending update ? loop */
+ jnz 11b
+ stcke 0(%r15) /* Store TOD clock */
+ lm %r0,%r1,__VDSO_TS_END(%r5) /* TOD steering end time */
+ s %r0,1(%r15) /* no - ts_steering_end */
+ sl %r1,5(%r15)
+ brc 3,22f
+ ahi %r0,-1
+22: ltr %r0,%r0 /* past end of steering? */
+ jm 24f
+ srdl %r0,15 /* 1 per 2^16 */
+ tm __VDSO_TS_DIR+3(%r5),0x01 /* steering direction? */
+ jz 23f
+ lcr %r0,%r0 /* negative TOD offset */
+ lcr %r1,%r1
+ je 23f
+ ahi %r0,-1
+23: a %r0,1(%r15) /* add TOD timestamp */
+ al %r1,5(%r15)
+ brc 12,25f
+ ahi %r0,1
+ j 25f
+24: lm %r0,%r1,1(%r15) /* load TOD timestamp */
+25: s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
+ sl %r1,__VDSO_XTIME_STAMP+4(%r5)
+ brc 3,12f
+ ahi %r0,-1
+12: ms %r0,__VDSO_TK_MULT(%r5) /* * tk->mult */
+ lr %r2,%r0
+ l %r0,__VDSO_TK_MULT(%r5)
+ ltr %r1,%r1
+ mr %r0,%r0
+ jnm 13f
+ a %r0,__VDSO_TK_MULT(%r5)
+13: alr %r0,%r2
+ al %r0,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */
+ al %r1,__VDSO_XTIME_NSEC+4(%r5)
+ brc 12,14f
+ ahi %r0,1
+14: l %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
+ srdl %r0,0(%r2) /* >> tk->shift */
+ l %r2,__VDSO_XTIME_SEC+4(%r5)
+ cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */
+ jne 11b
+ basr %r5,0
+15: ltr %r0,%r0
+ jnz 16f
+ cl %r1,20f-15b(%r5)
+ jl 17f
+16: ahi %r2,1
+ sl %r1,20f-15b(%r5)
+ brc 3,15b
+ ahi %r0,-1
+ j 15b
+17: st %r2,0(%r3) /* store tp->tv_sec */
+ st %r1,4(%r3) /* store tp->tv_nsec */
+ lhi %r2,0
+ ahi %r15,16
+ CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
+ CFI_RESTORE 15
+ br %r14
+
+ /* Fallback to system call */
+ CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
+ CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
+19: lhi %r1,__NR_clock_gettime
+ svc 0
+ ahi %r15,16
+ CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
+ CFI_RESTORE 15
+ br %r14
+ CFI_ENDPROC
+
+20: .long 1000000000
+21: .long _vdso_data - 0b
+ .size __kernel_clock_gettime,.-__kernel_clock_gettime
diff --git a/arch/s390/kernel/vdso32/getcpu.S b/arch/s390/kernel/vdso32/getcpu.S
new file mode 100644
index 000000000..25515f3fb
--- /dev/null
+++ b/arch/s390/kernel/vdso32/getcpu.S
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Userland implementation of getcpu() for 32 bits processes in a
+ * s390 kernel for use in the vDSO
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+#include <asm/dwarf.h>
+
+ .text
+ .align 4
+ .globl __kernel_getcpu
+ .type __kernel_getcpu,@function
+__kernel_getcpu:
+ CFI_STARTPROC
+ la %r4,0
+ sacf 256
+ l %r5,__VDSO_CPU_NR(%r4)
+ l %r4,__VDSO_NODE_ID(%r4)
+ sacf 0
+ ltr %r2,%r2
+ jz 2f
+ st %r5,0(%r2)
+2: ltr %r3,%r3
+ jz 3f
+ st %r4,0(%r3)
+3: lhi %r2,0
+ br %r14
+ CFI_ENDPROC
+ .size __kernel_getcpu,.-__kernel_getcpu
diff --git a/arch/s390/kernel/vdso32/gettimeofday.S b/arch/s390/kernel/vdso32/gettimeofday.S
new file mode 100644
index 000000000..b23063fbc
--- /dev/null
+++ b/arch/s390/kernel/vdso32/gettimeofday.S
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Userland implementation of gettimeofday() for 32 bits processes in a
+ * s390 kernel for use in the vDSO
+ *
+ * Copyright IBM Corp. 2008
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+#include <asm/dwarf.h>
+#include <asm/ptrace.h>
+
+ .text
+ .align 4
+ .globl __kernel_gettimeofday
+ .type __kernel_gettimeofday,@function
+__kernel_gettimeofday:
+ CFI_STARTPROC
+ ahi %r15,-16
+ CFI_ADJUST_CFA_OFFSET 16
+ CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
+ basr %r5,0
+0: al %r5,13f-0b(%r5) /* get &_vdso_data */
+1: ltr %r3,%r3 /* check if tz is NULL */
+ je 2f
+ mvc 0(8,%r3),__VDSO_TIMEZONE(%r5)
+2: ltr %r2,%r2 /* check if tv is NULL */
+ je 10f
+ l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */
+ tml %r4,0x0001 /* pending update ? loop */
+ jnz 1b
+ stcke 0(%r15) /* Store TOD clock */
+ lm %r0,%r1,__VDSO_TS_END(%r5) /* TOD steering end time */
+ s %r0,1(%r15)
+ sl %r1,5(%r15)
+ brc 3,14f
+ ahi %r0,-1
+14: ltr %r0,%r0 /* past end of steering? */
+ jm 16f
+ srdl %r0,15 /* 1 per 2^16 */
+ tm __VDSO_TS_DIR+3(%r5),0x01 /* steering direction? */
+ jz 15f
+ lcr %r0,%r0 /* negative TOD offset */
+ lcr %r1,%r1
+ je 15f
+ ahi %r0,-1
+15: a %r0,1(%r15) /* add TOD timestamp */
+ al %r1,5(%r15)
+ brc 12,17f
+ ahi %r0,1
+ j 17f
+16: lm %r0,%r1,1(%r15) /* load TOD timestamp */
+17: s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
+ sl %r1,__VDSO_XTIME_STAMP+4(%r5)
+ brc 3,3f
+ ahi %r0,-1
+3: ms %r0,__VDSO_TK_MULT(%r5) /* * tk->mult */
+ st %r0,0(%r15)
+ l %r0,__VDSO_TK_MULT(%r5)
+ ltr %r1,%r1
+ mr %r0,%r0
+ jnm 4f
+ a %r0,__VDSO_TK_MULT(%r5)
+4: al %r0,0(%r15)
+ al %r0,__VDSO_XTIME_NSEC(%r5) /* + xtime */
+ al %r1,__VDSO_XTIME_NSEC+4(%r5)
+ brc 12,5f
+ ahi %r0,1
+5: mvc 0(4,%r15),__VDSO_XTIME_SEC+4(%r5)
+ cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */
+ jne 1b
+ l %r4,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
+ srdl %r0,0(%r4) /* >> tk->shift */
+ l %r4,0(%r15) /* get tv_sec from stack */
+ basr %r5,0
+6: ltr %r0,%r0
+ jnz 7f
+ cl %r1,11f-6b(%r5)
+ jl 8f
+7: ahi %r4,1
+ sl %r1,11f-6b(%r5)
+ brc 3,6b
+ ahi %r0,-1
+ j 6b
+8: st %r4,0(%r2) /* store tv->tv_sec */
+ ltr %r1,%r1
+ m %r0,12f-6b(%r5)
+ jnm 9f
+ al %r0,12f-6b(%r5)
+9: srl %r0,6
+ st %r0,4(%r2) /* store tv->tv_usec */
+10: slr %r2,%r2
+ ahi %r15,16
+ CFI_ADJUST_CFA_OFFSET -16
+ CFI_RESTORE 15
+ br %r14
+ CFI_ENDPROC
+11: .long 1000000000
+12: .long 274877907
+13: .long _vdso_data - 0b
+ .size __kernel_gettimeofday,.-__kernel_gettimeofday
diff --git a/arch/s390/kernel/vdso32/note.S b/arch/s390/kernel/vdso32/note.S
new file mode 100644
index 000000000..db19d0680
--- /dev/null
+++ b/arch/s390/kernel/vdso32/note.S
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/uts.h>
+#include <linux/version.h>
+#include <linux/elfnote.h>
+
+ELFNOTE_START(Linux, 0, "a")
+ .long LINUX_VERSION_CODE
+ELFNOTE_END
diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S
new file mode 100644
index 000000000..721c4954c
--- /dev/null
+++ b/arch/s390/kernel/vdso32/vdso32.lds.S
@@ -0,0 +1,142 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This is the infamous ld script for the 32 bits vdso
+ * library
+ */
+
+#include <asm/page.h>
+#include <asm/vdso.h>
+
+OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390")
+OUTPUT_ARCH(s390:31-bit)
+ENTRY(_start)
+
+SECTIONS
+{
+ . = VDSO32_LBASE + SIZEOF_HEADERS;
+
+ .hash : { *(.hash) } :text
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+
+ .note : { *(.note.*) } :text :note
+
+ . = ALIGN(16);
+ .text : {
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ } :text
+ PROVIDE(__etext = .);
+ PROVIDE(_etext = .);
+ PROVIDE(etext = .);
+
+ /*
+ * Other stuff is appended to the text segment:
+ */
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+
+ .dynamic : { *(.dynamic) } :text :dynamic
+
+ .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
+ .eh_frame : { KEEP (*(.eh_frame)) } :text
+ .gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) }
+
+ .rela.dyn ALIGN(8) : { *(.rela.dyn) }
+ .got ALIGN(8) : { *(.got .toc) }
+
+ _end = .;
+ PROVIDE(end = .);
+
+ /*
+ * Stabs debugging sections are here too.
+ */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+
+ /*
+ * DWARF debug sections.
+ * Symbols in the DWARF debugging sections are relative to the
+ * beginning of the section so we begin them at 0.
+ */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+
+ . = ALIGN(PAGE_SIZE);
+ PROVIDE(_vdso_data = .);
+
+ /DISCARD/ : {
+ *(.note.GNU-stack)
+ *(.branch_lt)
+ *(.data .data.* .gnu.linkonce.d.* .sdata*)
+ *(.bss .sbss .dynbss .dynsbss)
+ }
+}
+
+/*
+ * Very old versions of ld do not recognize this name token; use the constant.
+ */
+#define PT_GNU_EH_FRAME 0x6474e550
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+ text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
+ dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
+ note PT_NOTE FLAGS(4); /* PF_R */
+ eh_frame_hdr PT_GNU_EH_FRAME;
+}
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+ VDSO_VERSION_STRING {
+ global:
+ /*
+ * Has to be there for the kernel to find
+ */
+ __kernel_gettimeofday;
+ __kernel_clock_gettime;
+ __kernel_clock_getres;
+ __kernel_getcpu;
+
+ local: *;
+ };
+}
diff --git a/arch/s390/kernel/vdso32/vdso32_wrapper.S b/arch/s390/kernel/vdso32/vdso32_wrapper.S
new file mode 100644
index 000000000..de2fb9304
--- /dev/null
+++ b/arch/s390/kernel/vdso32/vdso32_wrapper.S
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/page.h>
+
+ __PAGE_ALIGNED_DATA
+
+ .globl vdso32_start, vdso32_end
+ .balign PAGE_SIZE
+vdso32_start:
+ .incbin "arch/s390/kernel/vdso32/vdso32.so"
+ .balign PAGE_SIZE
+vdso32_end:
+
+ .previous
diff --git a/arch/s390/kernel/vdso64/.gitignore b/arch/s390/kernel/vdso64/.gitignore
new file mode 100644
index 000000000..3fd18cf9f
--- /dev/null
+++ b/arch/s390/kernel/vdso64/.gitignore
@@ -0,0 +1 @@
+vdso64.lds
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
new file mode 100644
index 000000000..f849ac61c
--- /dev/null
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: GPL-2.0
+# List of files in the vdso, has to be asm only for now
+
+KCOV_INSTRUMENT := n
+
+obj-vdso64 = gettimeofday.o clock_getres.o clock_gettime.o note.o getcpu.o
+
+# Build rules
+
+targets := $(obj-vdso64) vdso64.so vdso64.so.dbg
+obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
+
+KBUILD_AFLAGS += -DBUILD_VDSO
+KBUILD_CFLAGS += -DBUILD_VDSO
+
+KBUILD_AFLAGS_64 := $(filter-out -m64,$(KBUILD_AFLAGS))
+KBUILD_AFLAGS_64 += -m64 -s
+
+KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS))
+KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin
+KBUILD_CFLAGS_64 += -nostdlib -Wl,-soname=linux-vdso64.so.1 \
+ $(call cc-ldoption, -Wl$(comma)--hash-style=both)
+
+$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64)
+$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64)
+
+obj-y += vdso64_wrapper.o
+extra-y += vdso64.lds
+CPPFLAGS_vdso64.lds += -P -C -U$(ARCH)
+
+# Disable gcov profiling, ubsan and kasan for VDSO code
+GCOV_PROFILE := n
+UBSAN_SANITIZE := n
+KASAN_SANITIZE := n
+
+# Force dependency (incbin is bad)
+$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
+
+# link rule for the .so file, .lds has to be first
+$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) FORCE
+ $(call if_changed,vdso64ld)
+
+# strip rule for the .so file
+$(obj)/%.so: OBJCOPYFLAGS := -S
+$(obj)/%.so: $(obj)/%.so.dbg FORCE
+ $(call if_changed,objcopy)
+
+# assembly rules for the .S files
+$(obj-vdso64): %.o: %.S FORCE
+ $(call if_changed_dep,vdso64as)
+
+# actual build commands
+quiet_cmd_vdso64ld = VDSO64L $@
+ cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $(filter %.lds %.o,$^) -o $@
+quiet_cmd_vdso64as = VDSO64A $@
+ cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $<
+
+# install commands for the unstripped file
+quiet_cmd_vdso_install = INSTALL $@
+ cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
+
+vdso64.so: $(obj)/vdso64.so.dbg
+ @mkdir -p $(MODLIB)/vdso
+ $(call cmd,vdso_install)
+
+vdso_install: vdso64.so
diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S
new file mode 100644
index 000000000..0c79caa32
--- /dev/null
+++ b/arch/s390/kernel/vdso64/clock_getres.S
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Userland implementation of clock_getres() for 64 bits processes in a
+ * s390 kernel for use in the vDSO
+ *
+ * Copyright IBM Corp. 2008
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+#include <asm/dwarf.h>
+
+ .text
+ .align 4
+ .globl __kernel_clock_getres
+ .type __kernel_clock_getres,@function
+__kernel_clock_getres:
+ CFI_STARTPROC
+ larl %r1,3f
+ lg %r0,0(%r1)
+ cghi %r2,__CLOCK_REALTIME_COARSE
+ je 0f
+ cghi %r2,__CLOCK_MONOTONIC_COARSE
+ je 0f
+ larl %r1,_vdso_data
+ llgf %r0,__VDSO_CLOCK_REALTIME_RES(%r1)
+ cghi %r2,__CLOCK_REALTIME
+ je 0f
+ cghi %r2,__CLOCK_MONOTONIC
+ je 0f
+ cghi %r2,__CLOCK_THREAD_CPUTIME_ID
+ je 0f
+ cghi %r2,-2 /* Per-thread CPUCLOCK with PID=0, VIRT=1 */
+ jne 2f
+ larl %r5,_vdso_data
+ icm %r0,15,__LC_ECTG_OK(%r5)
+ jz 2f
+0: ltgr %r3,%r3
+ jz 1f /* res == NULL */
+ xc 0(8,%r3),0(%r3) /* set tp->tv_sec to zero */
+ stg %r0,8(%r3) /* store tp->tv_usec */
+1: lghi %r2,0
+ br %r14
+2: lghi %r1,__NR_clock_getres /* fallback to svc */
+ svc 0
+ br %r14
+ CFI_ENDPROC
+3: .quad __CLOCK_COARSE_RES
+ .size __kernel_clock_getres,.-__kernel_clock_getres
diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S
new file mode 100644
index 000000000..9d2ee79b9
--- /dev/null
+++ b/arch/s390/kernel/vdso64/clock_gettime.S
@@ -0,0 +1,163 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Userland implementation of clock_gettime() for 64 bits processes in a
+ * s390 kernel for use in the vDSO
+ *
+ * Copyright IBM Corp. 2008
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+#include <asm/dwarf.h>
+#include <asm/ptrace.h>
+
+ .text
+ .align 4
+ .globl __kernel_clock_gettime
+ .type __kernel_clock_gettime,@function
+__kernel_clock_gettime:
+ CFI_STARTPROC
+ aghi %r15,-16
+ CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
+ CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
+ larl %r5,_vdso_data
+ cghi %r2,__CLOCK_REALTIME_COARSE
+ je 4f
+ cghi %r2,__CLOCK_REALTIME
+ je 5f
+ cghi %r2,-3 /* Per-thread CPUCLOCK with PID=0, VIRT=1 */
+ je 9f
+ cghi %r2,__CLOCK_MONOTONIC_COARSE
+ je 3f
+ cghi %r2,__CLOCK_MONOTONIC
+ jne 12f
+
+ /* CLOCK_MONOTONIC */
+0: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */
+ tmll %r4,0x0001 /* pending update ? loop */
+ jnz 0b
+ stcke 0(%r15) /* Store TOD clock */
+ lgf %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
+ lg %r0,__VDSO_WTOM_SEC(%r5)
+ lg %r1,1(%r15)
+ sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
+ msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */
+ alg %r1,__VDSO_WTOM_NSEC(%r5)
+ srlg %r1,%r1,0(%r2) /* >> tk->shift */
+ clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
+ jne 0b
+ larl %r5,13f
+1: clg %r1,0(%r5)
+ jl 2f
+ slg %r1,0(%r5)
+ aghi %r0,1
+ j 1b
+2: stg %r0,0(%r3) /* store tp->tv_sec */
+ stg %r1,8(%r3) /* store tp->tv_nsec */
+ lghi %r2,0
+ aghi %r15,16
+ CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
+ CFI_RESTORE 15
+ br %r14
+
+ /* CLOCK_MONOTONIC_COARSE */
+ CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
+ CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
+3: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */
+ tmll %r4,0x0001 /* pending update ? loop */
+ jnz 3b
+ lg %r0,__VDSO_WTOM_CRS_SEC(%r5)
+ lg %r1,__VDSO_WTOM_CRS_NSEC(%r5)
+ clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
+ jne 3b
+ j 2b
+
+ /* CLOCK_REALTIME_COARSE */
+4: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */
+ tmll %r4,0x0001 /* pending update ? loop */
+ jnz 4b
+ lg %r0,__VDSO_XTIME_CRS_SEC(%r5)
+ lg %r1,__VDSO_XTIME_CRS_NSEC(%r5)
+ clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
+ jne 4b
+ j 7f
+
+ /* CLOCK_REALTIME */
+5: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */
+ tmll %r4,0x0001 /* pending update ? loop */
+ jnz 5b
+ stcke 0(%r15) /* Store TOD clock */
+ lg %r1,1(%r15)
+ lg %r0,__VDSO_TS_END(%r5) /* TOD steering end time */
+ slgr %r0,%r1 /* now - ts_steering_end */
+ ltgr %r0,%r0 /* past end of steering ? */
+ jm 17f
+ srlg %r0,%r0,15 /* 1 per 2^16 */
+ tm __VDSO_TS_DIR+3(%r5),0x01 /* steering direction? */
+ jz 18f
+ lcgr %r0,%r0 /* negative TOD offset */
+18: algr %r1,%r0 /* add steering offset */
+17: lgf %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
+ sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
+ msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */
+ alg %r1,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */
+ srlg %r1,%r1,0(%r2) /* >> tk->shift */
+ lg %r0,__VDSO_XTIME_SEC(%r5) /* tk->xtime_sec */
+ clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
+ jne 5b
+ larl %r5,13f
+6: clg %r1,0(%r5)
+ jl 7f
+ slg %r1,0(%r5)
+ aghi %r0,1
+ j 6b
+7: stg %r0,0(%r3) /* store tp->tv_sec */
+ stg %r1,8(%r3) /* store tp->tv_nsec */
+ lghi %r2,0
+ aghi %r15,16
+ CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
+ CFI_RESTORE 15
+ br %r14
+
+ /* CPUCLOCK_VIRT for this thread */
+ CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
+ CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
+9: lghi %r4,0
+ icm %r0,15,__VDSO_ECTG_OK(%r5)
+ jz 12f
+ sacf 256 /* Magic ectg instruction */
+ .insn ssf,0xc80100000000,__VDSO_ECTG_BASE(4),__VDSO_ECTG_USER(4),4
+ sacf 0
+ algr %r1,%r0 /* r1 = cputime as TOD value */
+ mghi %r1,1000 /* convert to nanoseconds */
+ srlg %r1,%r1,12 /* r1 = cputime in nanosec */
+ lgr %r4,%r1
+ larl %r5,13f
+ srlg %r1,%r1,9 /* divide by 1000000000 */
+ mlg %r0,8(%r5)
+ srlg %r0,%r0,11 /* r0 = tv_sec */
+ stg %r0,0(%r3)
+ msg %r0,0(%r5) /* calculate tv_nsec */
+ slgr %r4,%r0 /* r4 = tv_nsec */
+ stg %r4,8(%r3)
+ lghi %r2,0
+ aghi %r15,16
+ CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
+ CFI_RESTORE 15
+ br %r14
+
+ /* Fallback to system call */
+ CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
+ CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
+12: lghi %r1,__NR_clock_gettime
+ svc 0
+ aghi %r15,16
+ CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
+ CFI_RESTORE 15
+ br %r14
+ CFI_ENDPROC
+
+13: .quad 1000000000
+14: .quad 19342813113834067
+ .size __kernel_clock_gettime,.-__kernel_clock_gettime
diff --git a/arch/s390/kernel/vdso64/getcpu.S b/arch/s390/kernel/vdso64/getcpu.S
new file mode 100644
index 000000000..2446e9dac
--- /dev/null
+++ b/arch/s390/kernel/vdso64/getcpu.S
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Userland implementation of getcpu() for 64 bits processes in a
+ * s390 kernel for use in the vDSO
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+#include <asm/dwarf.h>
+
+ .text
+ .align 4
+ .globl __kernel_getcpu
+ .type __kernel_getcpu,@function
+__kernel_getcpu:
+ CFI_STARTPROC
+ la %r4,0
+ sacf 256
+ l %r5,__VDSO_CPU_NR(%r4)
+ l %r4,__VDSO_NODE_ID(%r4)
+ sacf 0
+ ltgr %r2,%r2
+ jz 2f
+ st %r5,0(%r2)
+2: ltgr %r3,%r3
+ jz 3f
+ st %r4,0(%r3)
+3: lghi %r2,0
+ br %r14
+ CFI_ENDPROC
+ .size __kernel_getcpu,.-__kernel_getcpu
diff --git a/arch/s390/kernel/vdso64/gettimeofday.S b/arch/s390/kernel/vdso64/gettimeofday.S
new file mode 100644
index 000000000..aebe10dc7
--- /dev/null
+++ b/arch/s390/kernel/vdso64/gettimeofday.S
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Userland implementation of gettimeofday() for 64 bits processes in a
+ * s390 kernel for use in the vDSO
+ *
+ * Copyright IBM Corp. 2008
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+#include <asm/dwarf.h>
+#include <asm/ptrace.h>
+
+ .text
+ .align 4
+ .globl __kernel_gettimeofday
+ .type __kernel_gettimeofday,@function
+__kernel_gettimeofday:
+ CFI_STARTPROC
+ aghi %r15,-16
+ CFI_ADJUST_CFA_OFFSET 16
+ CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
+ larl %r5,_vdso_data
+0: ltgr %r3,%r3 /* check if tz is NULL */
+ je 1f
+ mvc 0(8,%r3),__VDSO_TIMEZONE(%r5)
+1: ltgr %r2,%r2 /* check if tv is NULL */
+ je 4f
+ lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */
+ tmll %r4,0x0001 /* pending update ? loop */
+ jnz 0b
+ stcke 0(%r15) /* Store TOD clock */
+ lg %r1,1(%r15)
+ lg %r0,__VDSO_TS_END(%r5) /* TOD steering end time */
+ slgr %r0,%r1 /* now - ts_steering_end */
+ ltgr %r0,%r0 /* past end of steering ? */
+ jm 6f
+ srlg %r0,%r0,15 /* 1 per 2^16 */
+ tm __VDSO_TS_DIR+3(%r5),0x01 /* steering direction? */
+ jz 7f
+ lcgr %r0,%r0 /* negative TOD offset */
+7: algr %r1,%r0 /* add steering offset */
+6: sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
+ msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */
+ alg %r1,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */
+ lg %r0,__VDSO_XTIME_SEC(%r5) /* tk->xtime_sec */
+ clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
+ jne 0b
+ lgf %r5,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
+ srlg %r1,%r1,0(%r5) /* >> tk->shift */
+ larl %r5,5f
+2: clg %r1,0(%r5)
+ jl 3f
+ slg %r1,0(%r5)
+ aghi %r0,1
+ j 2b
+3: stg %r0,0(%r2) /* store tv->tv_sec */
+ slgr %r0,%r0 /* tv_nsec -> tv_usec */
+ ml %r0,8(%r5)
+ srlg %r0,%r0,6
+ stg %r0,8(%r2) /* store tv->tv_usec */
+4: lghi %r2,0
+ aghi %r15,16
+ CFI_ADJUST_CFA_OFFSET -16
+ CFI_RESTORE 15
+ br %r14
+ CFI_ENDPROC
+5: .quad 1000000000
+ .long 274877907
+ .size __kernel_gettimeofday,.-__kernel_gettimeofday
diff --git a/arch/s390/kernel/vdso64/note.S b/arch/s390/kernel/vdso64/note.S
new file mode 100644
index 000000000..db19d0680
--- /dev/null
+++ b/arch/s390/kernel/vdso64/note.S
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/uts.h>
+#include <linux/version.h>
+#include <linux/elfnote.h>
+
+ELFNOTE_START(Linux, 0, "a")
+ .long LINUX_VERSION_CODE
+ELFNOTE_END
diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S
new file mode 100644
index 000000000..7ddb116b5
--- /dev/null
+++ b/arch/s390/kernel/vdso64/vdso64.lds.S
@@ -0,0 +1,142 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This is the infamous ld script for the 64 bits vdso
+ * library
+ */
+
+#include <asm/page.h>
+#include <asm/vdso.h>
+
+OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
+OUTPUT_ARCH(s390:64-bit)
+ENTRY(_start)
+
+SECTIONS
+{
+ . = VDSO64_LBASE + SIZEOF_HEADERS;
+
+ .hash : { *(.hash) } :text
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+
+ .note : { *(.note.*) } :text :note
+
+ . = ALIGN(16);
+ .text : {
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ } :text
+ PROVIDE(__etext = .);
+ PROVIDE(_etext = .);
+ PROVIDE(etext = .);
+
+ /*
+ * Other stuff is appended to the text segment:
+ */
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+
+ .dynamic : { *(.dynamic) } :text :dynamic
+
+ .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
+ .eh_frame : { KEEP (*(.eh_frame)) } :text
+ .gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) }
+
+ .rela.dyn ALIGN(8) : { *(.rela.dyn) }
+ .got ALIGN(8) : { *(.got .toc) }
+
+ _end = .;
+ PROVIDE(end = .);
+
+ /*
+ * Stabs debugging sections are here too.
+ */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+
+ /*
+ * DWARF debug sections.
+ * Symbols in the DWARF debugging sections are relative to the
+ * beginning of the section so we begin them at 0.
+ */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+
+ . = ALIGN(PAGE_SIZE);
+ PROVIDE(_vdso_data = .);
+
+ /DISCARD/ : {
+ *(.note.GNU-stack)
+ *(.branch_lt)
+ *(.data .data.* .gnu.linkonce.d.* .sdata*)
+ *(.bss .sbss .dynbss .dynsbss)
+ }
+}
+
+/*
+ * Very old versions of ld do not recognize this name token; use the constant.
+ */
+#define PT_GNU_EH_FRAME 0x6474e550
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+ text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
+ dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
+ note PT_NOTE FLAGS(4); /* PF_R */
+ eh_frame_hdr PT_GNU_EH_FRAME;
+}
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+ VDSO_VERSION_STRING {
+ global:
+ /*
+ * Has to be there for the kernel to find
+ */
+ __kernel_gettimeofday;
+ __kernel_clock_gettime;
+ __kernel_clock_getres;
+ __kernel_getcpu;
+
+ local: *;
+ };
+}
diff --git a/arch/s390/kernel/vdso64/vdso64_wrapper.S b/arch/s390/kernel/vdso64/vdso64_wrapper.S
new file mode 100644
index 000000000..672184998
--- /dev/null
+++ b/arch/s390/kernel/vdso64/vdso64_wrapper.S
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/page.h>
+
+ __PAGE_ALIGNED_DATA
+
+ .globl vdso64_start, vdso64_end
+ .balign PAGE_SIZE
+vdso64_start:
+ .incbin "arch/s390/kernel/vdso64/vdso64.so"
+ .balign PAGE_SIZE
+vdso64_end:
+
+ .previous
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
new file mode 100644
index 000000000..18ede6e80
--- /dev/null
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -0,0 +1,156 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* ld script to make s390 Linux kernel
+ * Written by Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+
+#include <asm/thread_info.h>
+#include <asm/page.h>
+
+/*
+ * Put .bss..swapper_pg_dir as the first thing in .bss. This will
+ * make sure it has 16k alignment.
+ */
+#define BSS_FIRST_SECTIONS *(.bss..swapper_pg_dir)
+
+/* Handle ro_after_init data on our own. */
+#define RO_AFTER_INIT_DATA
+
+#include <asm-generic/vmlinux.lds.h>
+
+OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
+OUTPUT_ARCH(s390:64-bit)
+ENTRY(startup_continue)
+jiffies = jiffies_64;
+
+PHDRS {
+ text PT_LOAD FLAGS(5); /* R_E */
+ data PT_LOAD FLAGS(7); /* RWE */
+ note PT_NOTE FLAGS(0); /* ___ */
+}
+
+SECTIONS
+{
+ . = 0x100000;
+ .text : {
+ _stext = .; /* Start of text section */
+ _text = .; /* Text and read-only data */
+ HEAD_TEXT
+ TEXT_TEXT
+ SCHED_TEXT
+ CPUIDLE_TEXT
+ LOCK_TEXT
+ KPROBES_TEXT
+ IRQENTRY_TEXT
+ SOFTIRQENTRY_TEXT
+ *(.text.*_indirect_*)
+ *(.fixup)
+ *(.gnu.warning)
+ . = ALIGN(PAGE_SIZE);
+ _etext = .; /* End of text section */
+ } :text = 0x0700
+
+ NOTES :text :note
+
+ .dummy : { *(.dummy) } :data
+
+ RO_DATA_SECTION(PAGE_SIZE)
+
+ . = ALIGN(PAGE_SIZE);
+ _sdata = .; /* Start of data section */
+
+ . = ALIGN(PAGE_SIZE);
+ __start_ro_after_init = .;
+ .data..ro_after_init : {
+ *(.data..ro_after_init)
+ }
+ EXCEPTION_TABLE(16)
+ . = ALIGN(PAGE_SIZE);
+ __end_ro_after_init = .;
+
+ RW_DATA_SECTION(0x100, PAGE_SIZE, THREAD_SIZE)
+
+ _edata = .; /* End of data section */
+
+ /* will be freed after init */
+ . = ALIGN(PAGE_SIZE); /* Init code and data */
+ __init_begin = .;
+
+ . = ALIGN(PAGE_SIZE);
+ .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
+ _sinittext = .;
+ INIT_TEXT
+ . = ALIGN(PAGE_SIZE);
+ _einittext = .;
+ }
+
+ /*
+ * .exit.text is discarded at runtime, not link time,
+ * to deal with references from __bug_table
+ */
+ .exit.text : {
+ EXIT_TEXT
+ }
+
+ .exit.data : {
+ EXIT_DATA
+ }
+
+ /*
+ * struct alt_inst entries. From the header (alternative.h):
+ * "Alternative instructions for different CPU types or capabilities"
+ * Think locking instructions on spinlocks.
+ * Note, that it is a part of __init region.
+ */
+ . = ALIGN(8);
+ .altinstructions : {
+ __alt_instructions = .;
+ *(.altinstructions)
+ __alt_instructions_end = .;
+ }
+
+ /*
+ * And here are the replacement instructions. The linker sticks
+ * them as binary blobs. The .altinstructions has enough data to
+ * get the address and the length of them to patch the kernel safely.
+ * Note, that it is a part of __init region.
+ */
+ .altinstr_replacement : {
+ *(.altinstr_replacement)
+ }
+
+ /*
+ * Table with the patch locations to undo expolines
+ */
+ .nospec_call_table : {
+ __nospec_call_start = . ;
+ *(.s390_indirect*)
+ __nospec_call_end = . ;
+ }
+ .nospec_return_table : {
+ __nospec_return_start = . ;
+ *(.s390_return*)
+ __nospec_return_end = . ;
+ }
+
+ /* early.c uses stsi, which requires page aligned data. */
+ . = ALIGN(PAGE_SIZE);
+ INIT_DATA_SECTION(0x100)
+
+ PERCPU_SECTION(0x100)
+ . = ALIGN(PAGE_SIZE);
+ __init_end = .; /* freed after init ends here */
+
+ BSS_SECTION(PAGE_SIZE, 4 * PAGE_SIZE, PAGE_SIZE)
+
+ _end = . ;
+
+ /* Debugging sections. */
+ STABS_DEBUG
+ DWARF_DEBUG
+
+ /* Sections to be discarded */
+ DISCARDS
+ /DISCARD/ : {
+ *(.eh_frame)
+ }
+}
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
new file mode 100644
index 000000000..f24395a01
--- /dev/null
+++ b/arch/s390/kernel/vtime.c
@@ -0,0 +1,437 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Virtual cpu timer based timer functions.
+ *
+ * Copyright IBM Corp. 2004, 2012
+ * Author(s): Jan Glauber <jan.glauber@de.ibm.com>
+ */
+
+#include <linux/kernel_stat.h>
+#include <linux/sched/cputime.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/timex.h>
+#include <linux/types.h>
+#include <linux/time.h>
+
+#include <asm/vtimer.h>
+#include <asm/vtime.h>
+#include <asm/cpu_mf.h>
+#include <asm/smp.h>
+
+#include "entry.h"
+
+static void virt_timer_expire(void);
+
+static LIST_HEAD(virt_timer_list);
+static DEFINE_SPINLOCK(virt_timer_lock);
+static atomic64_t virt_timer_current;
+static atomic64_t virt_timer_elapsed;
+
+DEFINE_PER_CPU(u64, mt_cycles[8]);
+static DEFINE_PER_CPU(u64, mt_scaling_mult) = { 1 };
+static DEFINE_PER_CPU(u64, mt_scaling_div) = { 1 };
+static DEFINE_PER_CPU(u64, mt_scaling_jiffies);
+
+static inline u64 get_vtimer(void)
+{
+ u64 timer;
+
+ asm volatile("stpt %0" : "=m" (timer));
+ return timer;
+}
+
+static inline void set_vtimer(u64 expires)
+{
+ u64 timer;
+
+ asm volatile(
+ " stpt %0\n" /* Store current cpu timer value */
+ " spt %1" /* Set new value imm. afterwards */
+ : "=m" (timer) : "m" (expires));
+ S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer;
+ S390_lowcore.last_update_timer = expires;
+}
+
+static inline int virt_timer_forward(u64 elapsed)
+{
+ BUG_ON(!irqs_disabled());
+
+ if (list_empty(&virt_timer_list))
+ return 0;
+ elapsed = atomic64_add_return(elapsed, &virt_timer_elapsed);
+ return elapsed >= atomic64_read(&virt_timer_current);
+}
+
+static void update_mt_scaling(void)
+{
+ u64 cycles_new[8], *cycles_old;
+ u64 delta, fac, mult, div;
+ int i;
+
+ stcctm5(smp_cpu_mtid + 1, cycles_new);
+ cycles_old = this_cpu_ptr(mt_cycles);
+ fac = 1;
+ mult = div = 0;
+ for (i = 0; i <= smp_cpu_mtid; i++) {
+ delta = cycles_new[i] - cycles_old[i];
+ div += delta;
+ mult *= i + 1;
+ mult += delta * fac;
+ fac *= i + 1;
+ }
+ div *= fac;
+ if (div > 0) {
+ /* Update scaling factor */
+ __this_cpu_write(mt_scaling_mult, mult);
+ __this_cpu_write(mt_scaling_div, div);
+ memcpy(cycles_old, cycles_new,
+ sizeof(u64) * (smp_cpu_mtid + 1));
+ }
+ __this_cpu_write(mt_scaling_jiffies, jiffies_64);
+}
+
+static inline u64 update_tsk_timer(unsigned long *tsk_vtime, u64 new)
+{
+ u64 delta;
+
+ delta = new - *tsk_vtime;
+ *tsk_vtime = new;
+ return delta;
+}
+
+
+static inline u64 scale_vtime(u64 vtime)
+{
+ u64 mult = __this_cpu_read(mt_scaling_mult);
+ u64 div = __this_cpu_read(mt_scaling_div);
+
+ if (smp_cpu_mtid)
+ return vtime * mult / div;
+ return vtime;
+}
+
+static void account_system_index_scaled(struct task_struct *p, u64 cputime,
+ enum cpu_usage_stat index)
+{
+ p->stimescaled += cputime_to_nsecs(scale_vtime(cputime));
+ account_system_index_time(p, cputime_to_nsecs(cputime), index);
+}
+
+/*
+ * Update process times based on virtual cpu times stored by entry.S
+ * to the lowcore fields user_timer, system_timer & steal_clock.
+ */
+static int do_account_vtime(struct task_struct *tsk)
+{
+ u64 timer, clock, user, guest, system, hardirq, softirq, steal;
+
+ timer = S390_lowcore.last_update_timer;
+ clock = S390_lowcore.last_update_clock;
+ asm volatile(
+ " stpt %0\n" /* Store current cpu timer value */
+#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES
+ " stckf %1" /* Store current tod clock value */
+#else
+ " stck %1" /* Store current tod clock value */
+#endif
+ : "=m" (S390_lowcore.last_update_timer),
+ "=m" (S390_lowcore.last_update_clock));
+ clock = S390_lowcore.last_update_clock - clock;
+ timer -= S390_lowcore.last_update_timer;
+
+ if (hardirq_count())
+ S390_lowcore.hardirq_timer += timer;
+ else
+ S390_lowcore.system_timer += timer;
+
+ /* Update MT utilization calculation */
+ if (smp_cpu_mtid &&
+ time_after64(jiffies_64, this_cpu_read(mt_scaling_jiffies)))
+ update_mt_scaling();
+
+ /* Calculate cputime delta */
+ user = update_tsk_timer(&tsk->thread.user_timer,
+ READ_ONCE(S390_lowcore.user_timer));
+ guest = update_tsk_timer(&tsk->thread.guest_timer,
+ READ_ONCE(S390_lowcore.guest_timer));
+ system = update_tsk_timer(&tsk->thread.system_timer,
+ READ_ONCE(S390_lowcore.system_timer));
+ hardirq = update_tsk_timer(&tsk->thread.hardirq_timer,
+ READ_ONCE(S390_lowcore.hardirq_timer));
+ softirq = update_tsk_timer(&tsk->thread.softirq_timer,
+ READ_ONCE(S390_lowcore.softirq_timer));
+ S390_lowcore.steal_timer +=
+ clock - user - guest - system - hardirq - softirq;
+
+ /* Push account value */
+ if (user) {
+ account_user_time(tsk, cputime_to_nsecs(user));
+ tsk->utimescaled += cputime_to_nsecs(scale_vtime(user));
+ }
+
+ if (guest) {
+ account_guest_time(tsk, cputime_to_nsecs(guest));
+ tsk->utimescaled += cputime_to_nsecs(scale_vtime(guest));
+ }
+
+ if (system)
+ account_system_index_scaled(tsk, system, CPUTIME_SYSTEM);
+ if (hardirq)
+ account_system_index_scaled(tsk, hardirq, CPUTIME_IRQ);
+ if (softirq)
+ account_system_index_scaled(tsk, softirq, CPUTIME_SOFTIRQ);
+
+ steal = S390_lowcore.steal_timer;
+ if ((s64) steal > 0) {
+ S390_lowcore.steal_timer = 0;
+ account_steal_time(cputime_to_nsecs(steal));
+ }
+
+ return virt_timer_forward(user + guest + system + hardirq + softirq);
+}
+
+void vtime_task_switch(struct task_struct *prev)
+{
+ do_account_vtime(prev);
+ prev->thread.user_timer = S390_lowcore.user_timer;
+ prev->thread.guest_timer = S390_lowcore.guest_timer;
+ prev->thread.system_timer = S390_lowcore.system_timer;
+ prev->thread.hardirq_timer = S390_lowcore.hardirq_timer;
+ prev->thread.softirq_timer = S390_lowcore.softirq_timer;
+ S390_lowcore.user_timer = current->thread.user_timer;
+ S390_lowcore.guest_timer = current->thread.guest_timer;
+ S390_lowcore.system_timer = current->thread.system_timer;
+ S390_lowcore.hardirq_timer = current->thread.hardirq_timer;
+ S390_lowcore.softirq_timer = current->thread.softirq_timer;
+}
+
+/*
+ * In s390, accounting pending user time also implies
+ * accounting system time in order to correctly compute
+ * the stolen time accounting.
+ */
+void vtime_flush(struct task_struct *tsk)
+{
+ if (do_account_vtime(tsk))
+ virt_timer_expire();
+}
+
+/*
+ * Update process times based on virtual cpu times stored by entry.S
+ * to the lowcore fields user_timer, system_timer & steal_clock.
+ */
+void vtime_account_irq_enter(struct task_struct *tsk)
+{
+ u64 timer;
+
+ timer = S390_lowcore.last_update_timer;
+ S390_lowcore.last_update_timer = get_vtimer();
+ timer -= S390_lowcore.last_update_timer;
+
+ if ((tsk->flags & PF_VCPU) && (irq_count() == 0))
+ S390_lowcore.guest_timer += timer;
+ else if (hardirq_count())
+ S390_lowcore.hardirq_timer += timer;
+ else if (in_serving_softirq())
+ S390_lowcore.softirq_timer += timer;
+ else
+ S390_lowcore.system_timer += timer;
+
+ virt_timer_forward(timer);
+}
+EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
+
+void vtime_account_system(struct task_struct *tsk)
+__attribute__((alias("vtime_account_irq_enter")));
+EXPORT_SYMBOL_GPL(vtime_account_system);
+
+/*
+ * Sorted add to a list. List is linear searched until first bigger
+ * element is found.
+ */
+static void list_add_sorted(struct vtimer_list *timer, struct list_head *head)
+{
+ struct vtimer_list *tmp;
+
+ list_for_each_entry(tmp, head, entry) {
+ if (tmp->expires > timer->expires) {
+ list_add_tail(&timer->entry, &tmp->entry);
+ return;
+ }
+ }
+ list_add_tail(&timer->entry, head);
+}
+
+/*
+ * Handler for expired virtual CPU timer.
+ */
+static void virt_timer_expire(void)
+{
+ struct vtimer_list *timer, *tmp;
+ unsigned long elapsed;
+ LIST_HEAD(cb_list);
+
+ /* walk timer list, fire all expired timers */
+ spin_lock(&virt_timer_lock);
+ elapsed = atomic64_read(&virt_timer_elapsed);
+ list_for_each_entry_safe(timer, tmp, &virt_timer_list, entry) {
+ if (timer->expires < elapsed)
+ /* move expired timer to the callback queue */
+ list_move_tail(&timer->entry, &cb_list);
+ else
+ timer->expires -= elapsed;
+ }
+ if (!list_empty(&virt_timer_list)) {
+ timer = list_first_entry(&virt_timer_list,
+ struct vtimer_list, entry);
+ atomic64_set(&virt_timer_current, timer->expires);
+ }
+ atomic64_sub(elapsed, &virt_timer_elapsed);
+ spin_unlock(&virt_timer_lock);
+
+ /* Do callbacks and recharge periodic timers */
+ list_for_each_entry_safe(timer, tmp, &cb_list, entry) {
+ list_del_init(&timer->entry);
+ timer->function(timer->data);
+ if (timer->interval) {
+ /* Recharge interval timer */
+ timer->expires = timer->interval +
+ atomic64_read(&virt_timer_elapsed);
+ spin_lock(&virt_timer_lock);
+ list_add_sorted(timer, &virt_timer_list);
+ spin_unlock(&virt_timer_lock);
+ }
+ }
+}
+
+void init_virt_timer(struct vtimer_list *timer)
+{
+ timer->function = NULL;
+ INIT_LIST_HEAD(&timer->entry);
+}
+EXPORT_SYMBOL(init_virt_timer);
+
+static inline int vtimer_pending(struct vtimer_list *timer)
+{
+ return !list_empty(&timer->entry);
+}
+
+static void internal_add_vtimer(struct vtimer_list *timer)
+{
+ if (list_empty(&virt_timer_list)) {
+ /* First timer, just program it. */
+ atomic64_set(&virt_timer_current, timer->expires);
+ atomic64_set(&virt_timer_elapsed, 0);
+ list_add(&timer->entry, &virt_timer_list);
+ } else {
+ /* Update timer against current base. */
+ timer->expires += atomic64_read(&virt_timer_elapsed);
+ if (likely((s64) timer->expires <
+ (s64) atomic64_read(&virt_timer_current)))
+ /* The new timer expires before the current timer. */
+ atomic64_set(&virt_timer_current, timer->expires);
+ /* Insert new timer into the list. */
+ list_add_sorted(timer, &virt_timer_list);
+ }
+}
+
+static void __add_vtimer(struct vtimer_list *timer, int periodic)
+{
+ unsigned long flags;
+
+ timer->interval = periodic ? timer->expires : 0;
+ spin_lock_irqsave(&virt_timer_lock, flags);
+ internal_add_vtimer(timer);
+ spin_unlock_irqrestore(&virt_timer_lock, flags);
+}
+
+/*
+ * add_virt_timer - add a oneshot virtual CPU timer
+ */
+void add_virt_timer(struct vtimer_list *timer)
+{
+ __add_vtimer(timer, 0);
+}
+EXPORT_SYMBOL(add_virt_timer);
+
+/*
+ * add_virt_timer_int - add an interval virtual CPU timer
+ */
+void add_virt_timer_periodic(struct vtimer_list *timer)
+{
+ __add_vtimer(timer, 1);
+}
+EXPORT_SYMBOL(add_virt_timer_periodic);
+
+static int __mod_vtimer(struct vtimer_list *timer, u64 expires, int periodic)
+{
+ unsigned long flags;
+ int rc;
+
+ BUG_ON(!timer->function);
+
+ if (timer->expires == expires && vtimer_pending(timer))
+ return 1;
+ spin_lock_irqsave(&virt_timer_lock, flags);
+ rc = vtimer_pending(timer);
+ if (rc)
+ list_del_init(&timer->entry);
+ timer->interval = periodic ? expires : 0;
+ timer->expires = expires;
+ internal_add_vtimer(timer);
+ spin_unlock_irqrestore(&virt_timer_lock, flags);
+ return rc;
+}
+
+/*
+ * returns whether it has modified a pending timer (1) or not (0)
+ */
+int mod_virt_timer(struct vtimer_list *timer, u64 expires)
+{
+ return __mod_vtimer(timer, expires, 0);
+}
+EXPORT_SYMBOL(mod_virt_timer);
+
+/*
+ * returns whether it has modified a pending timer (1) or not (0)
+ */
+int mod_virt_timer_periodic(struct vtimer_list *timer, u64 expires)
+{
+ return __mod_vtimer(timer, expires, 1);
+}
+EXPORT_SYMBOL(mod_virt_timer_periodic);
+
+/*
+ * Delete a virtual timer.
+ *
+ * returns whether the deleted timer was pending (1) or not (0)
+ */
+int del_virt_timer(struct vtimer_list *timer)
+{
+ unsigned long flags;
+
+ if (!vtimer_pending(timer))
+ return 0;
+ spin_lock_irqsave(&virt_timer_lock, flags);
+ list_del_init(&timer->entry);
+ spin_unlock_irqrestore(&virt_timer_lock, flags);
+ return 1;
+}
+EXPORT_SYMBOL(del_virt_timer);
+
+/*
+ * Start the virtual CPU timer on the current CPU.
+ */
+void vtime_init(void)
+{
+ /* set initial cpu timer */
+ set_vtimer(VTIMER_MAX_SLICE);
+ /* Setup initial MT scaling values */
+ if (smp_cpu_mtid) {
+ __this_cpu_write(mt_scaling_jiffies, jiffies);
+ __this_cpu_write(mt_scaling_mult, 1);
+ __this_cpu_write(mt_scaling_div, 1);
+ stcctm5(smp_cpu_mtid + 1, this_cpu_ptr(mt_cycles));
+ }
+}
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
new file mode 100644
index 000000000..a3dbd459c
--- /dev/null
+++ b/arch/s390/kvm/Kconfig
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# KVM configuration
+#
+source "virt/kvm/Kconfig"
+
+menuconfig VIRTUALIZATION
+ def_bool y
+ prompt "KVM"
+ ---help---
+ Say Y here to get to see options for using your Linux host to run other
+ operating systems inside virtual machines (guests).
+ This option alone does not add any kernel code.
+
+ If you say N, all options in this submenu will be skipped and disabled.
+
+if VIRTUALIZATION
+
+config KVM
+ def_tristate y
+ prompt "Kernel-based Virtual Machine (KVM) support"
+ depends on HAVE_KVM
+ select PREEMPT_NOTIFIERS
+ select ANON_INODES
+ select HAVE_KVM_CPU_RELAX_INTERCEPT
+ select HAVE_KVM_VCPU_ASYNC_IOCTL
+ select HAVE_KVM_EVENTFD
+ select KVM_ASYNC_PF
+ select KVM_ASYNC_PF_SYNC
+ select HAVE_KVM_IRQCHIP
+ select HAVE_KVM_IRQFD
+ select HAVE_KVM_IRQ_ROUTING
+ select HAVE_KVM_INVALID_WAKEUPS
+ select SRCU
+ select KVM_VFIO
+ ---help---
+ Support hosting paravirtualized guest machines using the SIE
+ virtualization capability on the mainframe. This should work
+ on any 64bit machine.
+
+ This module provides access to the hardware capabilities through
+ a character device node named /dev/kvm.
+
+ To compile this as a module, choose M here: the module
+ will be called kvm.
+
+ If unsure, say N.
+
+config KVM_S390_UCONTROL
+ bool "Userspace controlled virtual machines"
+ depends on KVM
+ ---help---
+ Allow CAP_SYS_ADMIN users to create KVM virtual machines that are
+ controlled by userspace.
+
+ If unsure, say N.
+
+# OK, it's a little counter-intuitive to do this, but it puts it neatly under
+# the virtualization menu.
+source drivers/vhost/Kconfig
+
+endif # VIRTUALIZATION
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
new file mode 100644
index 000000000..05ee90a5e
--- /dev/null
+++ b/arch/s390/kvm/Makefile
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for kernel virtual machines on s390
+#
+# Copyright IBM Corp. 2008
+
+KVM := ../../../virt/kvm
+common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqchip.o $(KVM)/vfio.o
+
+ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
+
+kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o
+kvm-objs += diag.o gaccess.o guestdbg.o vsie.o
+
+obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
new file mode 100644
index 000000000..45634b3d2
--- /dev/null
+++ b/arch/s390/kvm/diag.c
@@ -0,0 +1,263 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * handling diagnose instructions
+ *
+ * Copyright IBM Corp. 2008, 2011
+ *
+ * Author(s): Carsten Otte <cotte@de.ibm.com>
+ * Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <asm/pgalloc.h>
+#include <asm/gmap.h>
+#include <asm/virtio-ccw.h>
+#include "kvm-s390.h"
+#include "trace.h"
+#include "trace-s390.h"
+#include "gaccess.h"
+
+static int diag_release_pages(struct kvm_vcpu *vcpu)
+{
+ unsigned long start, end;
+ unsigned long prefix = kvm_s390_get_prefix(vcpu);
+
+ start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
+ end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + PAGE_SIZE;
+ vcpu->stat.diagnose_10++;
+
+ if (start & ~PAGE_MASK || end & ~PAGE_MASK || start >= end
+ || start < 2 * PAGE_SIZE)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end);
+
+ /*
+ * We checked for start >= end above, so lets check for the
+ * fast path (no prefix swap page involved)
+ */
+ if (end <= prefix || start >= prefix + 2 * PAGE_SIZE) {
+ gmap_discard(vcpu->arch.gmap, start, end);
+ } else {
+ /*
+ * This is slow path. gmap_discard will check for start
+ * so lets split this into before prefix, prefix, after
+ * prefix and let gmap_discard make some of these calls
+ * NOPs.
+ */
+ gmap_discard(vcpu->arch.gmap, start, prefix);
+ if (start <= prefix)
+ gmap_discard(vcpu->arch.gmap, 0, PAGE_SIZE);
+ if (end > prefix + PAGE_SIZE)
+ gmap_discard(vcpu->arch.gmap, PAGE_SIZE, 2 * PAGE_SIZE);
+ gmap_discard(vcpu->arch.gmap, prefix + 2 * PAGE_SIZE, end);
+ }
+ return 0;
+}
+
+static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
+{
+ struct prs_parm {
+ u16 code;
+ u16 subcode;
+ u16 parm_len;
+ u16 parm_version;
+ u64 token_addr;
+ u64 select_mask;
+ u64 compare_mask;
+ u64 zarch;
+ };
+ struct prs_parm parm;
+ int rc;
+ u16 rx = (vcpu->arch.sie_block->ipa & 0xf0) >> 4;
+ u16 ry = (vcpu->arch.sie_block->ipa & 0x0f);
+
+ VCPU_EVENT(vcpu, 3, "diag page reference parameter block at 0x%llx",
+ vcpu->run->s.regs.gprs[rx]);
+ vcpu->stat.diagnose_258++;
+ if (vcpu->run->s.regs.gprs[rx] & 7)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], rx, &parm, sizeof(parm));
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
+ if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ switch (parm.subcode) {
+ case 0: /* TOKEN */
+ VCPU_EVENT(vcpu, 3, "pageref token addr 0x%llx "
+ "select mask 0x%llx compare mask 0x%llx",
+ parm.token_addr, parm.select_mask, parm.compare_mask);
+ if (vcpu->arch.pfault_token != KVM_S390_PFAULT_TOKEN_INVALID) {
+ /*
+ * If the pagefault handshake is already activated,
+ * the token must not be changed. We have to return
+ * decimal 8 instead, as mandated in SC24-6084.
+ */
+ vcpu->run->s.regs.gprs[ry] = 8;
+ return 0;
+ }
+
+ if ((parm.compare_mask & parm.select_mask) != parm.compare_mask ||
+ parm.token_addr & 7 || parm.zarch != 0x8000000000000000ULL)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ if (kvm_is_error_gpa(vcpu->kvm, parm.token_addr))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+ vcpu->arch.pfault_token = parm.token_addr;
+ vcpu->arch.pfault_select = parm.select_mask;
+ vcpu->arch.pfault_compare = parm.compare_mask;
+ vcpu->run->s.regs.gprs[ry] = 0;
+ rc = 0;
+ break;
+ case 1: /*
+ * CANCEL
+ * Specification allows to let already pending tokens survive
+ * the cancel, therefore to reduce code complexity, we assume
+ * all outstanding tokens are already pending.
+ */
+ VCPU_EVENT(vcpu, 3, "pageref cancel addr 0x%llx", parm.token_addr);
+ if (parm.token_addr || parm.select_mask ||
+ parm.compare_mask || parm.zarch)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ vcpu->run->s.regs.gprs[ry] = 0;
+ /*
+ * If the pfault handling was not established or is already
+ * canceled SC24-6084 requests to return decimal 4.
+ */
+ if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
+ vcpu->run->s.regs.gprs[ry] = 4;
+ else
+ vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+
+ rc = 0;
+ break;
+ default:
+ rc = -EOPNOTSUPP;
+ break;
+ }
+
+ return rc;
+}
+
+static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
+{
+ VCPU_EVENT(vcpu, 5, "%s", "diag time slice end");
+ vcpu->stat.diagnose_44++;
+ kvm_vcpu_on_spin(vcpu, true);
+ return 0;
+}
+
+static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu *tcpu;
+ int tid;
+
+ tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
+ vcpu->stat.diagnose_9c++;
+ VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d", tid);
+
+ if (tid == vcpu->vcpu_id)
+ return 0;
+
+ tcpu = kvm_get_vcpu_by_id(vcpu->kvm, tid);
+ if (tcpu)
+ kvm_vcpu_yield_to(tcpu);
+ return 0;
+}
+
+static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
+{
+ unsigned int reg = vcpu->arch.sie_block->ipa & 0xf;
+ unsigned long subcode = vcpu->run->s.regs.gprs[reg] & 0xffff;
+
+ VCPU_EVENT(vcpu, 3, "diag ipl functions, subcode %lx", subcode);
+ vcpu->stat.diagnose_308++;
+ switch (subcode) {
+ case 3:
+ vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR;
+ break;
+ case 4:
+ vcpu->run->s390_reset_flags = 0;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
+ kvm_s390_vcpu_stop(vcpu);
+ vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM;
+ vcpu->run->s390_reset_flags |= KVM_S390_RESET_IPL;
+ vcpu->run->s390_reset_flags |= KVM_S390_RESET_CPU_INIT;
+ vcpu->run->exit_reason = KVM_EXIT_S390_RESET;
+ VCPU_EVENT(vcpu, 3, "requesting userspace resets %llx",
+ vcpu->run->s390_reset_flags);
+ trace_kvm_s390_request_resets(vcpu->run->s390_reset_flags);
+ return -EREMOTE;
+}
+
+static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
+{
+ int ret;
+
+ vcpu->stat.diagnose_500++;
+ /* No virtio-ccw notification? Get out quickly. */
+ if (!vcpu->kvm->arch.css_support ||
+ (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY))
+ return -EOPNOTSUPP;
+
+ VCPU_EVENT(vcpu, 4, "diag 0x500 schid 0x%8.8x queue 0x%x cookie 0x%llx",
+ (u32) vcpu->run->s.regs.gprs[2],
+ (u32) vcpu->run->s.regs.gprs[3],
+ vcpu->run->s.regs.gprs[4]);
+
+ /*
+ * The layout is as follows:
+ * - gpr 2 contains the subchannel id (passed as addr)
+ * - gpr 3 contains the virtqueue index (passed as datamatch)
+ * - gpr 4 contains the index on the bus (optionally)
+ */
+ ret = kvm_io_bus_write_cookie(vcpu, KVM_VIRTIO_CCW_NOTIFY_BUS,
+ vcpu->run->s.regs.gprs[2] & 0xffffffff,
+ 8, &vcpu->run->s.regs.gprs[3],
+ vcpu->run->s.regs.gprs[4]);
+
+ /*
+ * Return cookie in gpr 2, but don't overwrite the register if the
+ * diagnose will be handled by userspace.
+ */
+ if (ret != -EOPNOTSUPP)
+ vcpu->run->s.regs.gprs[2] = ret;
+ /* kvm_io_bus_write_cookie returns -EOPNOTSUPP if it found no match. */
+ return ret < 0 ? ret : 0;
+}
+
+int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
+{
+ int code = kvm_s390_get_base_disp_rs(vcpu, NULL) & 0xffff;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ trace_kvm_s390_handle_diag(vcpu, code);
+ switch (code) {
+ case 0x10:
+ return diag_release_pages(vcpu);
+ case 0x44:
+ return __diag_time_slice_end(vcpu);
+ case 0x9c:
+ return __diag_time_slice_end_directed(vcpu);
+ case 0x258:
+ return __diag_page_ref_service(vcpu);
+ case 0x308:
+ return __diag_ipl_functions(vcpu);
+ case 0x500:
+ return __diag_virtio_hypercall(vcpu);
+ default:
+ vcpu->stat.diagnose_other++;
+ return -EOPNOTSUPP;
+ }
+}
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
new file mode 100644
index 000000000..07d30ffcf
--- /dev/null
+++ b/arch/s390/kvm/gaccess.c
@@ -0,0 +1,1204 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * guest access functions
+ *
+ * Copyright IBM Corp. 2014
+ *
+ */
+
+#include <linux/vmalloc.h>
+#include <linux/mm_types.h>
+#include <linux/err.h>
+
+#include <asm/pgtable.h>
+#include <asm/gmap.h>
+#include "kvm-s390.h"
+#include "gaccess.h"
+#include <asm/switch_to.h>
+
+union asce {
+ unsigned long val;
+ struct {
+ unsigned long origin : 52; /* Region- or Segment-Table Origin */
+ unsigned long : 2;
+ unsigned long g : 1; /* Subspace Group Control */
+ unsigned long p : 1; /* Private Space Control */
+ unsigned long s : 1; /* Storage-Alteration-Event Control */
+ unsigned long x : 1; /* Space-Switch-Event Control */
+ unsigned long r : 1; /* Real-Space Control */
+ unsigned long : 1;
+ unsigned long dt : 2; /* Designation-Type Control */
+ unsigned long tl : 2; /* Region- or Segment-Table Length */
+ };
+};
+
+enum {
+ ASCE_TYPE_SEGMENT = 0,
+ ASCE_TYPE_REGION3 = 1,
+ ASCE_TYPE_REGION2 = 2,
+ ASCE_TYPE_REGION1 = 3
+};
+
+union region1_table_entry {
+ unsigned long val;
+ struct {
+ unsigned long rto: 52;/* Region-Table Origin */
+ unsigned long : 2;
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long : 1;
+ unsigned long tf : 2; /* Region-Second-Table Offset */
+ unsigned long i : 1; /* Region-Invalid Bit */
+ unsigned long : 1;
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long tl : 2; /* Region-Second-Table Length */
+ };
+};
+
+union region2_table_entry {
+ unsigned long val;
+ struct {
+ unsigned long rto: 52;/* Region-Table Origin */
+ unsigned long : 2;
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long : 1;
+ unsigned long tf : 2; /* Region-Third-Table Offset */
+ unsigned long i : 1; /* Region-Invalid Bit */
+ unsigned long : 1;
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long tl : 2; /* Region-Third-Table Length */
+ };
+};
+
+struct region3_table_entry_fc0 {
+ unsigned long sto: 52;/* Segment-Table Origin */
+ unsigned long : 1;
+ unsigned long fc : 1; /* Format-Control */
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long : 1;
+ unsigned long tf : 2; /* Segment-Table Offset */
+ unsigned long i : 1; /* Region-Invalid Bit */
+ unsigned long cr : 1; /* Common-Region Bit */
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long tl : 2; /* Segment-Table Length */
+};
+
+struct region3_table_entry_fc1 {
+ unsigned long rfaa : 33; /* Region-Frame Absolute Address */
+ unsigned long : 14;
+ unsigned long av : 1; /* ACCF-Validity Control */
+ unsigned long acc: 4; /* Access-Control Bits */
+ unsigned long f : 1; /* Fetch-Protection Bit */
+ unsigned long fc : 1; /* Format-Control */
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long iep: 1; /* Instruction-Execution-Protection */
+ unsigned long : 2;
+ unsigned long i : 1; /* Region-Invalid Bit */
+ unsigned long cr : 1; /* Common-Region Bit */
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long : 2;
+};
+
+union region3_table_entry {
+ unsigned long val;
+ struct region3_table_entry_fc0 fc0;
+ struct region3_table_entry_fc1 fc1;
+ struct {
+ unsigned long : 53;
+ unsigned long fc : 1; /* Format-Control */
+ unsigned long : 4;
+ unsigned long i : 1; /* Region-Invalid Bit */
+ unsigned long cr : 1; /* Common-Region Bit */
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long : 2;
+ };
+};
+
+struct segment_entry_fc0 {
+ unsigned long pto: 53;/* Page-Table Origin */
+ unsigned long fc : 1; /* Format-Control */
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long : 3;
+ unsigned long i : 1; /* Segment-Invalid Bit */
+ unsigned long cs : 1; /* Common-Segment Bit */
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long : 2;
+};
+
+struct segment_entry_fc1 {
+ unsigned long sfaa : 44; /* Segment-Frame Absolute Address */
+ unsigned long : 3;
+ unsigned long av : 1; /* ACCF-Validity Control */
+ unsigned long acc: 4; /* Access-Control Bits */
+ unsigned long f : 1; /* Fetch-Protection Bit */
+ unsigned long fc : 1; /* Format-Control */
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long iep: 1; /* Instruction-Execution-Protection */
+ unsigned long : 2;
+ unsigned long i : 1; /* Segment-Invalid Bit */
+ unsigned long cs : 1; /* Common-Segment Bit */
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long : 2;
+};
+
+union segment_table_entry {
+ unsigned long val;
+ struct segment_entry_fc0 fc0;
+ struct segment_entry_fc1 fc1;
+ struct {
+ unsigned long : 53;
+ unsigned long fc : 1; /* Format-Control */
+ unsigned long : 4;
+ unsigned long i : 1; /* Segment-Invalid Bit */
+ unsigned long cs : 1; /* Common-Segment Bit */
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long : 2;
+ };
+};
+
+enum {
+ TABLE_TYPE_SEGMENT = 0,
+ TABLE_TYPE_REGION3 = 1,
+ TABLE_TYPE_REGION2 = 2,
+ TABLE_TYPE_REGION1 = 3
+};
+
+union page_table_entry {
+ unsigned long val;
+ struct {
+ unsigned long pfra : 52; /* Page-Frame Real Address */
+ unsigned long z : 1; /* Zero Bit */
+ unsigned long i : 1; /* Page-Invalid Bit */
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long iep: 1; /* Instruction-Execution-Protection */
+ unsigned long : 8;
+ };
+};
+
+/*
+ * vaddress union in order to easily decode a virtual address into its
+ * region first index, region second index etc. parts.
+ */
+union vaddress {
+ unsigned long addr;
+ struct {
+ unsigned long rfx : 11;
+ unsigned long rsx : 11;
+ unsigned long rtx : 11;
+ unsigned long sx : 11;
+ unsigned long px : 8;
+ unsigned long bx : 12;
+ };
+ struct {
+ unsigned long rfx01 : 2;
+ unsigned long : 9;
+ unsigned long rsx01 : 2;
+ unsigned long : 9;
+ unsigned long rtx01 : 2;
+ unsigned long : 9;
+ unsigned long sx01 : 2;
+ unsigned long : 29;
+ };
+};
+
+/*
+ * raddress union which will contain the result (real or absolute address)
+ * after a page table walk. The rfaa, sfaa and pfra members are used to
+ * simply assign them the value of a region, segment or page table entry.
+ */
+union raddress {
+ unsigned long addr;
+ unsigned long rfaa : 33; /* Region-Frame Absolute Address */
+ unsigned long sfaa : 44; /* Segment-Frame Absolute Address */
+ unsigned long pfra : 52; /* Page-Frame Real Address */
+};
+
+union alet {
+ u32 val;
+ struct {
+ u32 reserved : 7;
+ u32 p : 1;
+ u32 alesn : 8;
+ u32 alen : 16;
+ };
+};
+
+union ald {
+ u32 val;
+ struct {
+ u32 : 1;
+ u32 alo : 24;
+ u32 all : 7;
+ };
+};
+
+struct ale {
+ unsigned long i : 1; /* ALEN-Invalid Bit */
+ unsigned long : 5;
+ unsigned long fo : 1; /* Fetch-Only Bit */
+ unsigned long p : 1; /* Private Bit */
+ unsigned long alesn : 8; /* Access-List-Entry Sequence Number */
+ unsigned long aleax : 16; /* Access-List-Entry Authorization Index */
+ unsigned long : 32;
+ unsigned long : 1;
+ unsigned long asteo : 25; /* ASN-Second-Table-Entry Origin */
+ unsigned long : 6;
+ unsigned long astesn : 32; /* ASTE Sequence Number */
+};
+
+struct aste {
+ unsigned long i : 1; /* ASX-Invalid Bit */
+ unsigned long ato : 29; /* Authority-Table Origin */
+ unsigned long : 1;
+ unsigned long b : 1; /* Base-Space Bit */
+ unsigned long ax : 16; /* Authorization Index */
+ unsigned long atl : 12; /* Authority-Table Length */
+ unsigned long : 2;
+ unsigned long ca : 1; /* Controlled-ASN Bit */
+ unsigned long ra : 1; /* Reusable-ASN Bit */
+ unsigned long asce : 64; /* Address-Space-Control Element */
+ unsigned long ald : 32;
+ unsigned long astesn : 32;
+ /* .. more fields there */
+};
+
+int ipte_lock_held(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.sie_block->eca & ECA_SII) {
+ int rc;
+
+ read_lock(&vcpu->kvm->arch.sca_lock);
+ rc = kvm_s390_get_ipte_control(vcpu->kvm)->kh != 0;
+ read_unlock(&vcpu->kvm->arch.sca_lock);
+ return rc;
+ }
+ return vcpu->kvm->arch.ipte_lock_count != 0;
+}
+
+static void ipte_lock_simple(struct kvm_vcpu *vcpu)
+{
+ union ipte_control old, new, *ic;
+
+ mutex_lock(&vcpu->kvm->arch.ipte_mutex);
+ vcpu->kvm->arch.ipte_lock_count++;
+ if (vcpu->kvm->arch.ipte_lock_count > 1)
+ goto out;
+retry:
+ read_lock(&vcpu->kvm->arch.sca_lock);
+ ic = kvm_s390_get_ipte_control(vcpu->kvm);
+ do {
+ old = READ_ONCE(*ic);
+ if (old.k) {
+ read_unlock(&vcpu->kvm->arch.sca_lock);
+ cond_resched();
+ goto retry;
+ }
+ new = old;
+ new.k = 1;
+ } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+ read_unlock(&vcpu->kvm->arch.sca_lock);
+out:
+ mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
+}
+
+static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
+{
+ union ipte_control old, new, *ic;
+
+ mutex_lock(&vcpu->kvm->arch.ipte_mutex);
+ vcpu->kvm->arch.ipte_lock_count--;
+ if (vcpu->kvm->arch.ipte_lock_count)
+ goto out;
+ read_lock(&vcpu->kvm->arch.sca_lock);
+ ic = kvm_s390_get_ipte_control(vcpu->kvm);
+ do {
+ old = READ_ONCE(*ic);
+ new = old;
+ new.k = 0;
+ } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+ read_unlock(&vcpu->kvm->arch.sca_lock);
+ wake_up(&vcpu->kvm->arch.ipte_wq);
+out:
+ mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
+}
+
+static void ipte_lock_siif(struct kvm_vcpu *vcpu)
+{
+ union ipte_control old, new, *ic;
+
+retry:
+ read_lock(&vcpu->kvm->arch.sca_lock);
+ ic = kvm_s390_get_ipte_control(vcpu->kvm);
+ do {
+ old = READ_ONCE(*ic);
+ if (old.kg) {
+ read_unlock(&vcpu->kvm->arch.sca_lock);
+ cond_resched();
+ goto retry;
+ }
+ new = old;
+ new.k = 1;
+ new.kh++;
+ } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+ read_unlock(&vcpu->kvm->arch.sca_lock);
+}
+
+static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
+{
+ union ipte_control old, new, *ic;
+
+ read_lock(&vcpu->kvm->arch.sca_lock);
+ ic = kvm_s390_get_ipte_control(vcpu->kvm);
+ do {
+ old = READ_ONCE(*ic);
+ new = old;
+ new.kh--;
+ if (!new.kh)
+ new.k = 0;
+ } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+ read_unlock(&vcpu->kvm->arch.sca_lock);
+ if (!new.kh)
+ wake_up(&vcpu->kvm->arch.ipte_wq);
+}
+
+void ipte_lock(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.sie_block->eca & ECA_SII)
+ ipte_lock_siif(vcpu);
+ else
+ ipte_lock_simple(vcpu);
+}
+
+void ipte_unlock(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.sie_block->eca & ECA_SII)
+ ipte_unlock_siif(vcpu);
+ else
+ ipte_unlock_simple(vcpu);
+}
+
+static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, u8 ar,
+ enum gacc_mode mode)
+{
+ union alet alet;
+ struct ale ale;
+ struct aste aste;
+ unsigned long ald_addr, authority_table_addr;
+ union ald ald;
+ int eax, rc;
+ u8 authority_table;
+
+ if (ar >= NUM_ACRS)
+ return -EINVAL;
+
+ save_access_regs(vcpu->run->s.regs.acrs);
+ alet.val = vcpu->run->s.regs.acrs[ar];
+
+ if (ar == 0 || alet.val == 0) {
+ asce->val = vcpu->arch.sie_block->gcr[1];
+ return 0;
+ } else if (alet.val == 1) {
+ asce->val = vcpu->arch.sie_block->gcr[7];
+ return 0;
+ }
+
+ if (alet.reserved)
+ return PGM_ALET_SPECIFICATION;
+
+ if (alet.p)
+ ald_addr = vcpu->arch.sie_block->gcr[5];
+ else
+ ald_addr = vcpu->arch.sie_block->gcr[2];
+ ald_addr &= 0x7fffffc0;
+
+ rc = read_guest_real(vcpu, ald_addr + 16, &ald.val, sizeof(union ald));
+ if (rc)
+ return rc;
+
+ if (alet.alen / 8 > ald.all)
+ return PGM_ALEN_TRANSLATION;
+
+ if (0x7fffffff - ald.alo * 128 < alet.alen * 16)
+ return PGM_ADDRESSING;
+
+ rc = read_guest_real(vcpu, ald.alo * 128 + alet.alen * 16, &ale,
+ sizeof(struct ale));
+ if (rc)
+ return rc;
+
+ if (ale.i == 1)
+ return PGM_ALEN_TRANSLATION;
+ if (ale.alesn != alet.alesn)
+ return PGM_ALE_SEQUENCE;
+
+ rc = read_guest_real(vcpu, ale.asteo * 64, &aste, sizeof(struct aste));
+ if (rc)
+ return rc;
+
+ if (aste.i)
+ return PGM_ASTE_VALIDITY;
+ if (aste.astesn != ale.astesn)
+ return PGM_ASTE_SEQUENCE;
+
+ if (ale.p == 1) {
+ eax = (vcpu->arch.sie_block->gcr[8] >> 16) & 0xffff;
+ if (ale.aleax != eax) {
+ if (eax / 16 > aste.atl)
+ return PGM_EXTENDED_AUTHORITY;
+
+ authority_table_addr = aste.ato * 4 + eax / 4;
+
+ rc = read_guest_real(vcpu, authority_table_addr,
+ &authority_table,
+ sizeof(u8));
+ if (rc)
+ return rc;
+
+ if ((authority_table & (0x40 >> ((eax & 3) * 2))) == 0)
+ return PGM_EXTENDED_AUTHORITY;
+ }
+ }
+
+ if (ale.fo == 1 && mode == GACC_STORE)
+ return PGM_PROTECTION;
+
+ asce->val = aste.asce;
+ return 0;
+}
+
+struct trans_exc_code_bits {
+ unsigned long addr : 52; /* Translation-exception Address */
+ unsigned long fsi : 2; /* Access Exception Fetch/Store Indication */
+ unsigned long : 2;
+ unsigned long b56 : 1;
+ unsigned long : 3;
+ unsigned long b60 : 1;
+ unsigned long b61 : 1;
+ unsigned long as : 2; /* ASCE Identifier */
+};
+
+enum {
+ FSI_UNKNOWN = 0, /* Unknown wether fetch or store */
+ FSI_STORE = 1, /* Exception was due to store operation */
+ FSI_FETCH = 2 /* Exception was due to fetch operation */
+};
+
+enum prot_type {
+ PROT_TYPE_LA = 0,
+ PROT_TYPE_KEYC = 1,
+ PROT_TYPE_ALC = 2,
+ PROT_TYPE_DAT = 3,
+ PROT_TYPE_IEP = 4,
+};
+
+static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva,
+ u8 ar, enum gacc_mode mode, enum prot_type prot)
+{
+ struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
+ struct trans_exc_code_bits *tec;
+
+ memset(pgm, 0, sizeof(*pgm));
+ pgm->code = code;
+ tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
+
+ switch (code) {
+ case PGM_PROTECTION:
+ switch (prot) {
+ case PROT_TYPE_IEP:
+ tec->b61 = 1;
+ /* FALL THROUGH */
+ case PROT_TYPE_LA:
+ tec->b56 = 1;
+ break;
+ case PROT_TYPE_KEYC:
+ tec->b60 = 1;
+ break;
+ case PROT_TYPE_ALC:
+ tec->b60 = 1;
+ /* FALL THROUGH */
+ case PROT_TYPE_DAT:
+ tec->b61 = 1;
+ break;
+ }
+ /* FALL THROUGH */
+ case PGM_ASCE_TYPE:
+ case PGM_PAGE_TRANSLATION:
+ case PGM_REGION_FIRST_TRANS:
+ case PGM_REGION_SECOND_TRANS:
+ case PGM_REGION_THIRD_TRANS:
+ case PGM_SEGMENT_TRANSLATION:
+ /*
+ * op_access_id only applies to MOVE_PAGE -> set bit 61
+ * exc_access_id has to be set to 0 for some instructions. Both
+ * cases have to be handled by the caller.
+ */
+ tec->addr = gva >> PAGE_SHIFT;
+ tec->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH;
+ tec->as = psw_bits(vcpu->arch.sie_block->gpsw).as;
+ /* FALL THROUGH */
+ case PGM_ALEN_TRANSLATION:
+ case PGM_ALE_SEQUENCE:
+ case PGM_ASTE_VALIDITY:
+ case PGM_ASTE_SEQUENCE:
+ case PGM_EXTENDED_AUTHORITY:
+ /*
+ * We can always store exc_access_id, as it is
+ * undefined for non-ar cases. It is undefined for
+ * most DAT protection exceptions.
+ */
+ pgm->exc_access_id = ar;
+ break;
+ }
+ return code;
+}
+
+static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
+ unsigned long ga, u8 ar, enum gacc_mode mode)
+{
+ int rc;
+ struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw);
+
+ if (!psw.dat) {
+ asce->val = 0;
+ asce->r = 1;
+ return 0;
+ }
+
+ if ((mode == GACC_IFETCH) && (psw.as != PSW_BITS_AS_HOME))
+ psw.as = PSW_BITS_AS_PRIMARY;
+
+ switch (psw.as) {
+ case PSW_BITS_AS_PRIMARY:
+ asce->val = vcpu->arch.sie_block->gcr[1];
+ return 0;
+ case PSW_BITS_AS_SECONDARY:
+ asce->val = vcpu->arch.sie_block->gcr[7];
+ return 0;
+ case PSW_BITS_AS_HOME:
+ asce->val = vcpu->arch.sie_block->gcr[13];
+ return 0;
+ case PSW_BITS_AS_ACCREG:
+ rc = ar_translation(vcpu, asce, ar, mode);
+ if (rc > 0)
+ return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_ALC);
+ return rc;
+ }
+ return 0;
+}
+
+static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
+{
+ return kvm_read_guest(kvm, gpa, val, sizeof(*val));
+}
+
+/**
+ * guest_translate - translate a guest virtual into a guest absolute address
+ * @vcpu: virtual cpu
+ * @gva: guest virtual address
+ * @gpa: points to where guest physical (absolute) address should be stored
+ * @asce: effective asce
+ * @mode: indicates the access mode to be used
+ * @prot: returns the type for protection exceptions
+ *
+ * Translate a guest virtual address into a guest absolute address by means
+ * of dynamic address translation as specified by the architecture.
+ * If the resulting absolute address is not available in the configuration
+ * an addressing exception is indicated and @gpa will not be changed.
+ *
+ * Returns: - zero on success; @gpa contains the resulting absolute address
+ * - a negative value if guest access failed due to e.g. broken
+ * guest mapping
+ * - a positve value if an access exception happened. In this case
+ * the returned value is the program interruption code as defined
+ * by the architecture
+ */
+static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
+ unsigned long *gpa, const union asce asce,
+ enum gacc_mode mode, enum prot_type *prot)
+{
+ union vaddress vaddr = {.addr = gva};
+ union raddress raddr = {.addr = gva};
+ union page_table_entry pte;
+ int dat_protection = 0;
+ int iep_protection = 0;
+ union ctlreg0 ctlreg0;
+ unsigned long ptr;
+ int edat1, edat2, iep;
+
+ ctlreg0.val = vcpu->arch.sie_block->gcr[0];
+ edat1 = ctlreg0.edat && test_kvm_facility(vcpu->kvm, 8);
+ edat2 = edat1 && test_kvm_facility(vcpu->kvm, 78);
+ iep = ctlreg0.iep && test_kvm_facility(vcpu->kvm, 130);
+ if (asce.r)
+ goto real_address;
+ ptr = asce.origin * PAGE_SIZE;
+ switch (asce.dt) {
+ case ASCE_TYPE_REGION1:
+ if (vaddr.rfx01 > asce.tl)
+ return PGM_REGION_FIRST_TRANS;
+ ptr += vaddr.rfx * 8;
+ break;
+ case ASCE_TYPE_REGION2:
+ if (vaddr.rfx)
+ return PGM_ASCE_TYPE;
+ if (vaddr.rsx01 > asce.tl)
+ return PGM_REGION_SECOND_TRANS;
+ ptr += vaddr.rsx * 8;
+ break;
+ case ASCE_TYPE_REGION3:
+ if (vaddr.rfx || vaddr.rsx)
+ return PGM_ASCE_TYPE;
+ if (vaddr.rtx01 > asce.tl)
+ return PGM_REGION_THIRD_TRANS;
+ ptr += vaddr.rtx * 8;
+ break;
+ case ASCE_TYPE_SEGMENT:
+ if (vaddr.rfx || vaddr.rsx || vaddr.rtx)
+ return PGM_ASCE_TYPE;
+ if (vaddr.sx01 > asce.tl)
+ return PGM_SEGMENT_TRANSLATION;
+ ptr += vaddr.sx * 8;
+ break;
+ }
+ switch (asce.dt) {
+ case ASCE_TYPE_REGION1: {
+ union region1_table_entry rfte;
+
+ if (kvm_is_error_gpa(vcpu->kvm, ptr))
+ return PGM_ADDRESSING;
+ if (deref_table(vcpu->kvm, ptr, &rfte.val))
+ return -EFAULT;
+ if (rfte.i)
+ return PGM_REGION_FIRST_TRANS;
+ if (rfte.tt != TABLE_TYPE_REGION1)
+ return PGM_TRANSLATION_SPEC;
+ if (vaddr.rsx01 < rfte.tf || vaddr.rsx01 > rfte.tl)
+ return PGM_REGION_SECOND_TRANS;
+ if (edat1)
+ dat_protection |= rfte.p;
+ ptr = rfte.rto * PAGE_SIZE + vaddr.rsx * 8;
+ }
+ /* fallthrough */
+ case ASCE_TYPE_REGION2: {
+ union region2_table_entry rste;
+
+ if (kvm_is_error_gpa(vcpu->kvm, ptr))
+ return PGM_ADDRESSING;
+ if (deref_table(vcpu->kvm, ptr, &rste.val))
+ return -EFAULT;
+ if (rste.i)
+ return PGM_REGION_SECOND_TRANS;
+ if (rste.tt != TABLE_TYPE_REGION2)
+ return PGM_TRANSLATION_SPEC;
+ if (vaddr.rtx01 < rste.tf || vaddr.rtx01 > rste.tl)
+ return PGM_REGION_THIRD_TRANS;
+ if (edat1)
+ dat_protection |= rste.p;
+ ptr = rste.rto * PAGE_SIZE + vaddr.rtx * 8;
+ }
+ /* fallthrough */
+ case ASCE_TYPE_REGION3: {
+ union region3_table_entry rtte;
+
+ if (kvm_is_error_gpa(vcpu->kvm, ptr))
+ return PGM_ADDRESSING;
+ if (deref_table(vcpu->kvm, ptr, &rtte.val))
+ return -EFAULT;
+ if (rtte.i)
+ return PGM_REGION_THIRD_TRANS;
+ if (rtte.tt != TABLE_TYPE_REGION3)
+ return PGM_TRANSLATION_SPEC;
+ if (rtte.cr && asce.p && edat2)
+ return PGM_TRANSLATION_SPEC;
+ if (rtte.fc && edat2) {
+ dat_protection |= rtte.fc1.p;
+ iep_protection = rtte.fc1.iep;
+ raddr.rfaa = rtte.fc1.rfaa;
+ goto absolute_address;
+ }
+ if (vaddr.sx01 < rtte.fc0.tf)
+ return PGM_SEGMENT_TRANSLATION;
+ if (vaddr.sx01 > rtte.fc0.tl)
+ return PGM_SEGMENT_TRANSLATION;
+ if (edat1)
+ dat_protection |= rtte.fc0.p;
+ ptr = rtte.fc0.sto * PAGE_SIZE + vaddr.sx * 8;
+ }
+ /* fallthrough */
+ case ASCE_TYPE_SEGMENT: {
+ union segment_table_entry ste;
+
+ if (kvm_is_error_gpa(vcpu->kvm, ptr))
+ return PGM_ADDRESSING;
+ if (deref_table(vcpu->kvm, ptr, &ste.val))
+ return -EFAULT;
+ if (ste.i)
+ return PGM_SEGMENT_TRANSLATION;
+ if (ste.tt != TABLE_TYPE_SEGMENT)
+ return PGM_TRANSLATION_SPEC;
+ if (ste.cs && asce.p)
+ return PGM_TRANSLATION_SPEC;
+ if (ste.fc && edat1) {
+ dat_protection |= ste.fc1.p;
+ iep_protection = ste.fc1.iep;
+ raddr.sfaa = ste.fc1.sfaa;
+ goto absolute_address;
+ }
+ dat_protection |= ste.fc0.p;
+ ptr = ste.fc0.pto * (PAGE_SIZE / 2) + vaddr.px * 8;
+ }
+ }
+ if (kvm_is_error_gpa(vcpu->kvm, ptr))
+ return PGM_ADDRESSING;
+ if (deref_table(vcpu->kvm, ptr, &pte.val))
+ return -EFAULT;
+ if (pte.i)
+ return PGM_PAGE_TRANSLATION;
+ if (pte.z)
+ return PGM_TRANSLATION_SPEC;
+ dat_protection |= pte.p;
+ iep_protection = pte.iep;
+ raddr.pfra = pte.pfra;
+real_address:
+ raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr);
+absolute_address:
+ if (mode == GACC_STORE && dat_protection) {
+ *prot = PROT_TYPE_DAT;
+ return PGM_PROTECTION;
+ }
+ if (mode == GACC_IFETCH && iep_protection && iep) {
+ *prot = PROT_TYPE_IEP;
+ return PGM_PROTECTION;
+ }
+ if (kvm_is_error_gpa(vcpu->kvm, raddr.addr))
+ return PGM_ADDRESSING;
+ *gpa = raddr.addr;
+ return 0;
+}
+
+static inline int is_low_address(unsigned long ga)
+{
+ /* Check for address ranges 0..511 and 4096..4607 */
+ return (ga & ~0x11fful) == 0;
+}
+
+static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
+ const union asce asce)
+{
+ union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
+ psw_t *psw = &vcpu->arch.sie_block->gpsw;
+
+ if (!ctlreg0.lap)
+ return 0;
+ if (psw_bits(*psw).dat && asce.p)
+ return 0;
+ return 1;
+}
+
+static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
+ unsigned long *pages, unsigned long nr_pages,
+ const union asce asce, enum gacc_mode mode)
+{
+ psw_t *psw = &vcpu->arch.sie_block->gpsw;
+ int lap_enabled, rc = 0;
+ enum prot_type prot;
+
+ lap_enabled = low_address_protection_enabled(vcpu, asce);
+ while (nr_pages) {
+ ga = kvm_s390_logical_to_effective(vcpu, ga);
+ if (mode == GACC_STORE && lap_enabled && is_low_address(ga))
+ return trans_exc(vcpu, PGM_PROTECTION, ga, ar, mode,
+ PROT_TYPE_LA);
+ ga &= PAGE_MASK;
+ if (psw_bits(*psw).dat) {
+ rc = guest_translate(vcpu, ga, pages, asce, mode, &prot);
+ if (rc < 0)
+ return rc;
+ } else {
+ *pages = kvm_s390_real_to_abs(vcpu, ga);
+ if (kvm_is_error_gpa(vcpu->kvm, *pages))
+ rc = PGM_ADDRESSING;
+ }
+ if (rc)
+ return trans_exc(vcpu, rc, ga, ar, mode, prot);
+ ga += PAGE_SIZE;
+ pages++;
+ nr_pages--;
+ }
+ return 0;
+}
+
+int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
+ unsigned long len, enum gacc_mode mode)
+{
+ psw_t *psw = &vcpu->arch.sie_block->gpsw;
+ unsigned long _len, nr_pages, gpa, idx;
+ unsigned long pages_array[2];
+ unsigned long *pages;
+ int need_ipte_lock;
+ union asce asce;
+ int rc;
+
+ if (!len)
+ return 0;
+ ga = kvm_s390_logical_to_effective(vcpu, ga);
+ rc = get_vcpu_asce(vcpu, &asce, ga, ar, mode);
+ if (rc)
+ return rc;
+ nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
+ pages = pages_array;
+ if (nr_pages > ARRAY_SIZE(pages_array))
+ pages = vmalloc(array_size(nr_pages, sizeof(unsigned long)));
+ if (!pages)
+ return -ENOMEM;
+ need_ipte_lock = psw_bits(*psw).dat && !asce.r;
+ if (need_ipte_lock)
+ ipte_lock(vcpu);
+ rc = guest_page_range(vcpu, ga, ar, pages, nr_pages, asce, mode);
+ for (idx = 0; idx < nr_pages && !rc; idx++) {
+ gpa = *(pages + idx) + (ga & ~PAGE_MASK);
+ _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
+ if (mode == GACC_STORE)
+ rc = kvm_write_guest(vcpu->kvm, gpa, data, _len);
+ else
+ rc = kvm_read_guest(vcpu->kvm, gpa, data, _len);
+ len -= _len;
+ ga += _len;
+ data += _len;
+ }
+ if (need_ipte_lock)
+ ipte_unlock(vcpu);
+ if (nr_pages > ARRAY_SIZE(pages_array))
+ vfree(pages);
+ return rc;
+}
+
+int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
+ void *data, unsigned long len, enum gacc_mode mode)
+{
+ unsigned long _len, gpa;
+ int rc = 0;
+
+ while (len && !rc) {
+ gpa = kvm_s390_real_to_abs(vcpu, gra);
+ _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
+ if (mode)
+ rc = write_guest_abs(vcpu, gpa, data, _len);
+ else
+ rc = read_guest_abs(vcpu, gpa, data, _len);
+ len -= _len;
+ gra += _len;
+ data += _len;
+ }
+ return rc;
+}
+
+/**
+ * guest_translate_address - translate guest logical into guest absolute address
+ *
+ * Parameter semantics are the same as the ones from guest_translate.
+ * The memory contents at the guest address are not changed.
+ *
+ * Note: The IPTE lock is not taken during this function, so the caller
+ * has to take care of this.
+ */
+int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
+ unsigned long *gpa, enum gacc_mode mode)
+{
+ psw_t *psw = &vcpu->arch.sie_block->gpsw;
+ enum prot_type prot;
+ union asce asce;
+ int rc;
+
+ gva = kvm_s390_logical_to_effective(vcpu, gva);
+ rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
+ if (rc)
+ return rc;
+ if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) {
+ if (mode == GACC_STORE)
+ return trans_exc(vcpu, PGM_PROTECTION, gva, 0,
+ mode, PROT_TYPE_LA);
+ }
+
+ if (psw_bits(*psw).dat && !asce.r) { /* Use DAT? */
+ rc = guest_translate(vcpu, gva, gpa, asce, mode, &prot);
+ if (rc > 0)
+ return trans_exc(vcpu, rc, gva, 0, mode, prot);
+ } else {
+ *gpa = kvm_s390_real_to_abs(vcpu, gva);
+ if (kvm_is_error_gpa(vcpu->kvm, *gpa))
+ return trans_exc(vcpu, rc, gva, PGM_ADDRESSING, mode, 0);
+ }
+
+ return rc;
+}
+
+/**
+ * check_gva_range - test a range of guest virtual addresses for accessibility
+ */
+int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
+ unsigned long length, enum gacc_mode mode)
+{
+ unsigned long gpa;
+ unsigned long currlen;
+ int rc = 0;
+
+ ipte_lock(vcpu);
+ while (length > 0 && !rc) {
+ currlen = min(length, PAGE_SIZE - (gva % PAGE_SIZE));
+ rc = guest_translate_address(vcpu, gva, ar, &gpa, mode);
+ gva += currlen;
+ length -= currlen;
+ }
+ ipte_unlock(vcpu);
+
+ return rc;
+}
+
+/**
+ * kvm_s390_check_low_addr_prot_real - check for low-address protection
+ * @gra: Guest real address
+ *
+ * Checks whether an address is subject to low-address protection and set
+ * up vcpu->arch.pgm accordingly if necessary.
+ *
+ * Return: 0 if no protection exception, or PGM_PROTECTION if protected.
+ */
+int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra)
+{
+ union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
+
+ if (!ctlreg0.lap || !is_low_address(gra))
+ return 0;
+ return trans_exc(vcpu, PGM_PROTECTION, gra, 0, GACC_STORE, PROT_TYPE_LA);
+}
+
+/**
+ * kvm_s390_shadow_tables - walk the guest page table and create shadow tables
+ * @sg: pointer to the shadow guest address space structure
+ * @saddr: faulting address in the shadow gmap
+ * @pgt: pointer to the page table address result
+ * @fake: pgt references contiguous guest memory block, not a pgtable
+ */
+static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
+ unsigned long *pgt, int *dat_protection,
+ int *fake)
+{
+ struct gmap *parent;
+ union asce asce;
+ union vaddress vaddr;
+ unsigned long ptr;
+ int rc;
+
+ *fake = 0;
+ *dat_protection = 0;
+ parent = sg->parent;
+ vaddr.addr = saddr;
+ asce.val = sg->orig_asce;
+ ptr = asce.origin * PAGE_SIZE;
+ if (asce.r) {
+ *fake = 1;
+ ptr = 0;
+ asce.dt = ASCE_TYPE_REGION1;
+ }
+ switch (asce.dt) {
+ case ASCE_TYPE_REGION1:
+ if (vaddr.rfx01 > asce.tl && !*fake)
+ return PGM_REGION_FIRST_TRANS;
+ break;
+ case ASCE_TYPE_REGION2:
+ if (vaddr.rfx)
+ return PGM_ASCE_TYPE;
+ if (vaddr.rsx01 > asce.tl)
+ return PGM_REGION_SECOND_TRANS;
+ break;
+ case ASCE_TYPE_REGION3:
+ if (vaddr.rfx || vaddr.rsx)
+ return PGM_ASCE_TYPE;
+ if (vaddr.rtx01 > asce.tl)
+ return PGM_REGION_THIRD_TRANS;
+ break;
+ case ASCE_TYPE_SEGMENT:
+ if (vaddr.rfx || vaddr.rsx || vaddr.rtx)
+ return PGM_ASCE_TYPE;
+ if (vaddr.sx01 > asce.tl)
+ return PGM_SEGMENT_TRANSLATION;
+ break;
+ }
+
+ switch (asce.dt) {
+ case ASCE_TYPE_REGION1: {
+ union region1_table_entry rfte;
+
+ if (*fake) {
+ ptr += vaddr.rfx * _REGION1_SIZE;
+ rfte.val = ptr;
+ goto shadow_r2t;
+ }
+ rc = gmap_read_table(parent, ptr + vaddr.rfx * 8, &rfte.val);
+ if (rc)
+ return rc;
+ if (rfte.i)
+ return PGM_REGION_FIRST_TRANS;
+ if (rfte.tt != TABLE_TYPE_REGION1)
+ return PGM_TRANSLATION_SPEC;
+ if (vaddr.rsx01 < rfte.tf || vaddr.rsx01 > rfte.tl)
+ return PGM_REGION_SECOND_TRANS;
+ if (sg->edat_level >= 1)
+ *dat_protection |= rfte.p;
+ ptr = rfte.rto * PAGE_SIZE;
+shadow_r2t:
+ rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake);
+ if (rc)
+ return rc;
+ } /* fallthrough */
+ case ASCE_TYPE_REGION2: {
+ union region2_table_entry rste;
+
+ if (*fake) {
+ ptr += vaddr.rsx * _REGION2_SIZE;
+ rste.val = ptr;
+ goto shadow_r3t;
+ }
+ rc = gmap_read_table(parent, ptr + vaddr.rsx * 8, &rste.val);
+ if (rc)
+ return rc;
+ if (rste.i)
+ return PGM_REGION_SECOND_TRANS;
+ if (rste.tt != TABLE_TYPE_REGION2)
+ return PGM_TRANSLATION_SPEC;
+ if (vaddr.rtx01 < rste.tf || vaddr.rtx01 > rste.tl)
+ return PGM_REGION_THIRD_TRANS;
+ if (sg->edat_level >= 1)
+ *dat_protection |= rste.p;
+ ptr = rste.rto * PAGE_SIZE;
+shadow_r3t:
+ rste.p |= *dat_protection;
+ rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake);
+ if (rc)
+ return rc;
+ } /* fallthrough */
+ case ASCE_TYPE_REGION3: {
+ union region3_table_entry rtte;
+
+ if (*fake) {
+ ptr += vaddr.rtx * _REGION3_SIZE;
+ rtte.val = ptr;
+ goto shadow_sgt;
+ }
+ rc = gmap_read_table(parent, ptr + vaddr.rtx * 8, &rtte.val);
+ if (rc)
+ return rc;
+ if (rtte.i)
+ return PGM_REGION_THIRD_TRANS;
+ if (rtte.tt != TABLE_TYPE_REGION3)
+ return PGM_TRANSLATION_SPEC;
+ if (rtte.cr && asce.p && sg->edat_level >= 2)
+ return PGM_TRANSLATION_SPEC;
+ if (rtte.fc && sg->edat_level >= 2) {
+ *dat_protection |= rtte.fc0.p;
+ *fake = 1;
+ ptr = rtte.fc1.rfaa * _REGION3_SIZE;
+ rtte.val = ptr;
+ goto shadow_sgt;
+ }
+ if (vaddr.sx01 < rtte.fc0.tf || vaddr.sx01 > rtte.fc0.tl)
+ return PGM_SEGMENT_TRANSLATION;
+ if (sg->edat_level >= 1)
+ *dat_protection |= rtte.fc0.p;
+ ptr = rtte.fc0.sto * PAGE_SIZE;
+shadow_sgt:
+ rtte.fc0.p |= *dat_protection;
+ rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake);
+ if (rc)
+ return rc;
+ } /* fallthrough */
+ case ASCE_TYPE_SEGMENT: {
+ union segment_table_entry ste;
+
+ if (*fake) {
+ ptr += vaddr.sx * _SEGMENT_SIZE;
+ ste.val = ptr;
+ goto shadow_pgt;
+ }
+ rc = gmap_read_table(parent, ptr + vaddr.sx * 8, &ste.val);
+ if (rc)
+ return rc;
+ if (ste.i)
+ return PGM_SEGMENT_TRANSLATION;
+ if (ste.tt != TABLE_TYPE_SEGMENT)
+ return PGM_TRANSLATION_SPEC;
+ if (ste.cs && asce.p)
+ return PGM_TRANSLATION_SPEC;
+ *dat_protection |= ste.fc0.p;
+ if (ste.fc && sg->edat_level >= 1) {
+ *fake = 1;
+ ptr = ste.fc1.sfaa * _SEGMENT_SIZE;
+ ste.val = ptr;
+ goto shadow_pgt;
+ }
+ ptr = ste.fc0.pto * (PAGE_SIZE / 2);
+shadow_pgt:
+ ste.fc0.p |= *dat_protection;
+ rc = gmap_shadow_pgt(sg, saddr, ste.val, *fake);
+ if (rc)
+ return rc;
+ }
+ }
+ /* Return the parent address of the page table */
+ *pgt = ptr;
+ return 0;
+}
+
+/**
+ * kvm_s390_shadow_fault - handle fault on a shadow page table
+ * @vcpu: virtual cpu
+ * @sg: pointer to the shadow guest address space structure
+ * @saddr: faulting address in the shadow gmap
+ *
+ * Returns: - 0 if the shadow fault was successfully resolved
+ * - > 0 (pgm exception code) on exceptions while faulting
+ * - -EAGAIN if the caller can retry immediately
+ * - -EFAULT when accessing invalid guest addresses
+ * - -ENOMEM if out of memory
+ */
+int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
+ unsigned long saddr)
+{
+ union vaddress vaddr;
+ union page_table_entry pte;
+ unsigned long pgt;
+ int dat_protection, fake;
+ int rc;
+
+ down_read(&sg->mm->mmap_sem);
+ /*
+ * We don't want any guest-2 tables to change - so the parent
+ * tables/pointers we read stay valid - unshadowing is however
+ * always possible - only guest_table_lock protects us.
+ */
+ ipte_lock(vcpu);
+
+ rc = gmap_shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake);
+ if (rc)
+ rc = kvm_s390_shadow_tables(sg, saddr, &pgt, &dat_protection,
+ &fake);
+
+ vaddr.addr = saddr;
+ if (fake) {
+ pte.val = pgt + vaddr.px * PAGE_SIZE;
+ goto shadow_page;
+ }
+ if (!rc)
+ rc = gmap_read_table(sg->parent, pgt + vaddr.px * 8, &pte.val);
+ if (!rc && pte.i)
+ rc = PGM_PAGE_TRANSLATION;
+ if (!rc && pte.z)
+ rc = PGM_TRANSLATION_SPEC;
+shadow_page:
+ pte.p |= dat_protection;
+ if (!rc)
+ rc = gmap_shadow_page(sg, saddr, __pte(pte.val));
+ ipte_unlock(vcpu);
+ up_read(&sg->mm->mmap_sem);
+ return rc;
+}
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
new file mode 100644
index 000000000..4c56de542
--- /dev/null
+++ b/arch/s390/kvm/gaccess.h
@@ -0,0 +1,393 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * access guest memory
+ *
+ * Copyright IBM Corp. 2008, 2014
+ *
+ * Author(s): Carsten Otte <cotte@de.ibm.com>
+ */
+
+#ifndef __KVM_S390_GACCESS_H
+#define __KVM_S390_GACCESS_H
+
+#include <linux/compiler.h>
+#include <linux/kvm_host.h>
+#include <linux/uaccess.h>
+#include <linux/ptrace.h>
+#include "kvm-s390.h"
+
+/**
+ * kvm_s390_real_to_abs - convert guest real address to guest absolute address
+ * @prefix - guest prefix
+ * @gra - guest real address
+ *
+ * Returns the guest absolute address that corresponds to the passed guest real
+ * address @gra of by applying the given prefix.
+ */
+static inline unsigned long _kvm_s390_real_to_abs(u32 prefix, unsigned long gra)
+{
+ if (gra < 2 * PAGE_SIZE)
+ gra += prefix;
+ else if (gra >= prefix && gra < prefix + 2 * PAGE_SIZE)
+ gra -= prefix;
+ return gra;
+}
+
+/**
+ * kvm_s390_real_to_abs - convert guest real address to guest absolute address
+ * @vcpu - guest virtual cpu
+ * @gra - guest real address
+ *
+ * Returns the guest absolute address that corresponds to the passed guest real
+ * address @gra of a virtual guest cpu by applying its prefix.
+ */
+static inline unsigned long kvm_s390_real_to_abs(struct kvm_vcpu *vcpu,
+ unsigned long gra)
+{
+ return _kvm_s390_real_to_abs(kvm_s390_get_prefix(vcpu), gra);
+}
+
+/**
+ * _kvm_s390_logical_to_effective - convert guest logical to effective address
+ * @psw: psw of the guest
+ * @ga: guest logical address
+ *
+ * Convert a guest logical address to an effective address by applying the
+ * rules of the addressing mode defined by bits 31 and 32 of the given PSW
+ * (extendended/basic addressing mode).
+ *
+ * Depending on the addressing mode, the upper 40 bits (24 bit addressing
+ * mode), 33 bits (31 bit addressing mode) or no bits (64 bit addressing
+ * mode) of @ga will be zeroed and the remaining bits will be returned.
+ */
+static inline unsigned long _kvm_s390_logical_to_effective(psw_t *psw,
+ unsigned long ga)
+{
+ if (psw_bits(*psw).eaba == PSW_BITS_AMODE_64BIT)
+ return ga;
+ if (psw_bits(*psw).eaba == PSW_BITS_AMODE_31BIT)
+ return ga & ((1UL << 31) - 1);
+ return ga & ((1UL << 24) - 1);
+}
+
+/**
+ * kvm_s390_logical_to_effective - convert guest logical to effective address
+ * @vcpu: guest virtual cpu
+ * @ga: guest logical address
+ *
+ * Convert a guest vcpu logical address to a guest vcpu effective address by
+ * applying the rules of the vcpu's addressing mode defined by PSW bits 31
+ * and 32 (extendended/basic addressing mode).
+ *
+ * Depending on the vcpu's addressing mode the upper 40 bits (24 bit addressing
+ * mode), 33 bits (31 bit addressing mode) or no bits (64 bit addressing mode)
+ * of @ga will be zeroed and the remaining bits will be returned.
+ */
+static inline unsigned long kvm_s390_logical_to_effective(struct kvm_vcpu *vcpu,
+ unsigned long ga)
+{
+ return _kvm_s390_logical_to_effective(&vcpu->arch.sie_block->gpsw, ga);
+}
+
+/*
+ * put_guest_lc, read_guest_lc and write_guest_lc are guest access functions
+ * which shall only be used to access the lowcore of a vcpu.
+ * These functions should be used for e.g. interrupt handlers where no
+ * guest memory access protection facilities, like key or low address
+ * protection, are applicable.
+ * At a later point guest vcpu lowcore access should happen via pinned
+ * prefix pages, so that these pages can be accessed directly via the
+ * kernel mapping. All of these *_lc functions can be removed then.
+ */
+
+/**
+ * put_guest_lc - write a simple variable to a guest vcpu's lowcore
+ * @vcpu: virtual cpu
+ * @x: value to copy to guest
+ * @gra: vcpu's destination guest real address
+ *
+ * Copies a simple value from kernel space to a guest vcpu's lowcore.
+ * The size of the variable may be 1, 2, 4 or 8 bytes. The destination
+ * must be located in the vcpu's lowcore. Otherwise the result is undefined.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * Note: an error indicates that either the kernel is out of memory or
+ * the guest memory mapping is broken. In any case the best solution
+ * would be to terminate the guest.
+ * It is wrong to inject a guest exception.
+ */
+#define put_guest_lc(vcpu, x, gra) \
+({ \
+ struct kvm_vcpu *__vcpu = (vcpu); \
+ __typeof__(*(gra)) __x = (x); \
+ unsigned long __gpa; \
+ \
+ __gpa = (unsigned long)(gra); \
+ __gpa += kvm_s390_get_prefix(__vcpu); \
+ kvm_write_guest(__vcpu->kvm, __gpa, &__x, sizeof(__x)); \
+})
+
+/**
+ * write_guest_lc - copy data from kernel space to guest vcpu's lowcore
+ * @vcpu: virtual cpu
+ * @gra: vcpu's source guest real address
+ * @data: source address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy data from kernel space to guest vcpu's lowcore. The entire range must
+ * be located within the vcpu's lowcore, otherwise the result is undefined.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * Note: an error indicates that either the kernel is out of memory or
+ * the guest memory mapping is broken. In any case the best solution
+ * would be to terminate the guest.
+ * It is wrong to inject a guest exception.
+ */
+static inline __must_check
+int write_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
+ unsigned long len)
+{
+ unsigned long gpa = gra + kvm_s390_get_prefix(vcpu);
+
+ return kvm_write_guest(vcpu->kvm, gpa, data, len);
+}
+
+/**
+ * read_guest_lc - copy data from guest vcpu's lowcore to kernel space
+ * @vcpu: virtual cpu
+ * @gra: vcpu's source guest real address
+ * @data: destination address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy data from guest vcpu's lowcore to kernel space. The entire range must
+ * be located within the vcpu's lowcore, otherwise the result is undefined.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * Note: an error indicates that either the kernel is out of memory or
+ * the guest memory mapping is broken. In any case the best solution
+ * would be to terminate the guest.
+ * It is wrong to inject a guest exception.
+ */
+static inline __must_check
+int read_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
+ unsigned long len)
+{
+ unsigned long gpa = gra + kvm_s390_get_prefix(vcpu);
+
+ return kvm_read_guest(vcpu->kvm, gpa, data, len);
+}
+
+enum gacc_mode {
+ GACC_FETCH,
+ GACC_STORE,
+ GACC_IFETCH,
+};
+
+int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
+ u8 ar, unsigned long *gpa, enum gacc_mode mode);
+int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
+ unsigned long length, enum gacc_mode mode);
+
+int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
+ unsigned long len, enum gacc_mode mode);
+
+int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
+ void *data, unsigned long len, enum gacc_mode mode);
+
+/**
+ * write_guest - copy data from kernel space to guest space
+ * @vcpu: virtual cpu
+ * @ga: guest address
+ * @ar: access register
+ * @data: source address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @data (kernel space) to @ga (guest address).
+ * In order to copy data to guest space the PSW of the vcpu is inspected:
+ * If DAT is off data will be copied to guest real or absolute memory.
+ * If DAT is on data will be copied to the address space as specified by
+ * the address space bits of the PSW:
+ * Primary, secondary, home space or access register mode.
+ * The addressing mode of the PSW is also inspected, so that address wrap
+ * around is taken into account for 24-, 31- and 64-bit addressing mode,
+ * if the to be copied data crosses page boundaries in guest address space.
+ * In addition also low address and DAT protection are inspected before
+ * copying any data (key protection is currently not implemented).
+ *
+ * This function modifies the 'struct kvm_s390_pgm_info pgm' member of @vcpu.
+ * In case of an access exception (e.g. protection exception) pgm will contain
+ * all data necessary so that a subsequent call to 'kvm_s390_inject_prog_vcpu()'
+ * will inject a correct exception into the guest.
+ * If no access exception happened, the contents of pgm are undefined when
+ * this function returns.
+ *
+ * Returns: - zero on success
+ * - a negative value if e.g. the guest mapping is broken or in
+ * case of out-of-memory. In this case the contents of pgm are
+ * undefined. Also parts of @data may have been copied to guest
+ * space.
+ * - a positive value if an access exception happened. In this case
+ * the returned value is the program interruption code and the
+ * contents of pgm may be used to inject an exception into the
+ * guest. No data has been copied to guest space.
+ *
+ * Note: in case an access exception is recognized no data has been copied to
+ * guest space (this is also true, if the to be copied data would cross
+ * one or more page boundaries in guest space).
+ * Therefore this function may be used for nullifying and suppressing
+ * instruction emulation.
+ * It may also be used for terminating instructions, if it is undefined
+ * if data has been changed in guest space in case of an exception.
+ */
+static inline __must_check
+int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
+ unsigned long len)
+{
+ return access_guest(vcpu, ga, ar, data, len, GACC_STORE);
+}
+
+/**
+ * read_guest - copy data from guest space to kernel space
+ * @vcpu: virtual cpu
+ * @ga: guest address
+ * @ar: access register
+ * @data: destination address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @ga (guest address) to @data (kernel space).
+ *
+ * The behaviour of read_guest is identical to write_guest, except that
+ * data will be copied from guest space to kernel space.
+ */
+static inline __must_check
+int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
+ unsigned long len)
+{
+ return access_guest(vcpu, ga, ar, data, len, GACC_FETCH);
+}
+
+/**
+ * read_guest_instr - copy instruction data from guest space to kernel space
+ * @vcpu: virtual cpu
+ * @ga: guest address
+ * @data: destination address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from the given address (guest space) to @data (kernel
+ * space).
+ *
+ * The behaviour of read_guest_instr is identical to read_guest, except that
+ * instruction data will be read from primary space when in home-space or
+ * address-space mode.
+ */
+static inline __must_check
+int read_guest_instr(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+ unsigned long len)
+{
+ return access_guest(vcpu, ga, 0, data, len, GACC_IFETCH);
+}
+
+/**
+ * write_guest_abs - copy data from kernel space to guest space absolute
+ * @vcpu: virtual cpu
+ * @gpa: guest physical (absolute) address
+ * @data: source address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @data (kernel space) to @gpa (guest absolute address).
+ * It is up to the caller to ensure that the entire guest memory range is
+ * valid memory before calling this function.
+ * Guest low address and key protection are not checked.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * If an error occurs data may have been copied partially to guest memory.
+ */
+static inline __must_check
+int write_guest_abs(struct kvm_vcpu *vcpu, unsigned long gpa, void *data,
+ unsigned long len)
+{
+ return kvm_write_guest(vcpu->kvm, gpa, data, len);
+}
+
+/**
+ * read_guest_abs - copy data from guest space absolute to kernel space
+ * @vcpu: virtual cpu
+ * @gpa: guest physical (absolute) address
+ * @data: destination address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @gpa (guest absolute address) to @data (kernel space).
+ * It is up to the caller to ensure that the entire guest memory range is
+ * valid memory before calling this function.
+ * Guest key protection is not checked.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * If an error occurs data may have been copied partially to kernel space.
+ */
+static inline __must_check
+int read_guest_abs(struct kvm_vcpu *vcpu, unsigned long gpa, void *data,
+ unsigned long len)
+{
+ return kvm_read_guest(vcpu->kvm, gpa, data, len);
+}
+
+/**
+ * write_guest_real - copy data from kernel space to guest space real
+ * @vcpu: virtual cpu
+ * @gra: guest real address
+ * @data: source address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @data (kernel space) to @gra (guest real address).
+ * It is up to the caller to ensure that the entire guest memory range is
+ * valid memory before calling this function.
+ * Guest low address and key protection are not checked.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * If an error occurs data may have been copied partially to guest memory.
+ */
+static inline __must_check
+int write_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
+ unsigned long len)
+{
+ return access_guest_real(vcpu, gra, data, len, 1);
+}
+
+/**
+ * read_guest_real - copy data from guest space real to kernel space
+ * @vcpu: virtual cpu
+ * @gra: guest real address
+ * @data: destination address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @gra (guest real address) to @data (kernel space).
+ * It is up to the caller to ensure that the entire guest memory range is
+ * valid memory before calling this function.
+ * Guest key protection is not checked.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * If an error occurs data may have been copied partially to kernel space.
+ */
+static inline __must_check
+int read_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
+ unsigned long len)
+{
+ return access_guest_real(vcpu, gra, data, len, 0);
+}
+
+void ipte_lock(struct kvm_vcpu *vcpu);
+void ipte_unlock(struct kvm_vcpu *vcpu);
+int ipte_lock_held(struct kvm_vcpu *vcpu);
+int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra);
+
+int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *shadow,
+ unsigned long saddr);
+
+#endif /* __KVM_S390_GACCESS_H */
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
new file mode 100644
index 000000000..394a5f538
--- /dev/null
+++ b/arch/s390/kvm/guestdbg.c
@@ -0,0 +1,626 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * kvm guest debug support
+ *
+ * Copyright IBM Corp. 2014
+ *
+ * Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
+ */
+#include <linux/kvm_host.h>
+#include <linux/errno.h>
+#include "kvm-s390.h"
+#include "gaccess.h"
+
+/*
+ * Extends the address range given by *start and *stop to include the address
+ * range starting with estart and the length len. Takes care of overflowing
+ * intervals and tries to minimize the overall interval size.
+ */
+static void extend_address_range(u64 *start, u64 *stop, u64 estart, int len)
+{
+ u64 estop;
+
+ if (len > 0)
+ len--;
+ else
+ len = 0;
+
+ estop = estart + len;
+
+ /* 0-0 range represents "not set" */
+ if ((*start == 0) && (*stop == 0)) {
+ *start = estart;
+ *stop = estop;
+ } else if (*start <= *stop) {
+ /* increase the existing range */
+ if (estart < *start)
+ *start = estart;
+ if (estop > *stop)
+ *stop = estop;
+ } else {
+ /* "overflowing" interval, whereby *stop > *start */
+ if (estart <= *stop) {
+ if (estop > *stop)
+ *stop = estop;
+ } else if (estop > *start) {
+ if (estart < *start)
+ *start = estart;
+ }
+ /* minimize the range */
+ else if ((estop - *stop) < (*start - estart))
+ *stop = estop;
+ else
+ *start = estart;
+ }
+}
+
+#define MAX_INST_SIZE 6
+
+static void enable_all_hw_bp(struct kvm_vcpu *vcpu)
+{
+ unsigned long start, len;
+ u64 *cr9 = &vcpu->arch.sie_block->gcr[9];
+ u64 *cr10 = &vcpu->arch.sie_block->gcr[10];
+ u64 *cr11 = &vcpu->arch.sie_block->gcr[11];
+ int i;
+
+ if (vcpu->arch.guestdbg.nr_hw_bp <= 0 ||
+ vcpu->arch.guestdbg.hw_bp_info == NULL)
+ return;
+
+ /*
+ * If the guest is not interested in branching events, we can safely
+ * limit them to the PER address range.
+ */
+ if (!(*cr9 & PER_EVENT_BRANCH))
+ *cr9 |= PER_CONTROL_BRANCH_ADDRESS;
+ *cr9 |= PER_EVENT_IFETCH | PER_EVENT_BRANCH;
+
+ for (i = 0; i < vcpu->arch.guestdbg.nr_hw_bp; i++) {
+ start = vcpu->arch.guestdbg.hw_bp_info[i].addr;
+ len = vcpu->arch.guestdbg.hw_bp_info[i].len;
+
+ /*
+ * The instruction in front of the desired bp has to
+ * report instruction-fetching events
+ */
+ if (start < MAX_INST_SIZE) {
+ len += start;
+ start = 0;
+ } else {
+ start -= MAX_INST_SIZE;
+ len += MAX_INST_SIZE;
+ }
+
+ extend_address_range(cr10, cr11, start, len);
+ }
+}
+
+static void enable_all_hw_wp(struct kvm_vcpu *vcpu)
+{
+ unsigned long start, len;
+ u64 *cr9 = &vcpu->arch.sie_block->gcr[9];
+ u64 *cr10 = &vcpu->arch.sie_block->gcr[10];
+ u64 *cr11 = &vcpu->arch.sie_block->gcr[11];
+ int i;
+
+ if (vcpu->arch.guestdbg.nr_hw_wp <= 0 ||
+ vcpu->arch.guestdbg.hw_wp_info == NULL)
+ return;
+
+ /* if host uses storage alternation for special address
+ * spaces, enable all events and give all to the guest */
+ if (*cr9 & PER_EVENT_STORE && *cr9 & PER_CONTROL_ALTERATION) {
+ *cr9 &= ~PER_CONTROL_ALTERATION;
+ *cr10 = 0;
+ *cr11 = -1UL;
+ } else {
+ *cr9 &= ~PER_CONTROL_ALTERATION;
+ *cr9 |= PER_EVENT_STORE;
+
+ for (i = 0; i < vcpu->arch.guestdbg.nr_hw_wp; i++) {
+ start = vcpu->arch.guestdbg.hw_wp_info[i].addr;
+ len = vcpu->arch.guestdbg.hw_wp_info[i].len;
+
+ extend_address_range(cr10, cr11, start, len);
+ }
+ }
+}
+
+void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.guestdbg.cr0 = vcpu->arch.sie_block->gcr[0];
+ vcpu->arch.guestdbg.cr9 = vcpu->arch.sie_block->gcr[9];
+ vcpu->arch.guestdbg.cr10 = vcpu->arch.sie_block->gcr[10];
+ vcpu->arch.guestdbg.cr11 = vcpu->arch.sie_block->gcr[11];
+}
+
+void kvm_s390_restore_guest_per_regs(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.sie_block->gcr[0] = vcpu->arch.guestdbg.cr0;
+ vcpu->arch.sie_block->gcr[9] = vcpu->arch.guestdbg.cr9;
+ vcpu->arch.sie_block->gcr[10] = vcpu->arch.guestdbg.cr10;
+ vcpu->arch.sie_block->gcr[11] = vcpu->arch.guestdbg.cr11;
+}
+
+void kvm_s390_patch_guest_per_regs(struct kvm_vcpu *vcpu)
+{
+ /*
+ * TODO: if guest psw has per enabled, otherwise 0s!
+ * This reduces the amount of reported events.
+ * Need to intercept all psw changes!
+ */
+
+ if (guestdbg_sstep_enabled(vcpu)) {
+ /* disable timer (clock-comparator) interrupts */
+ vcpu->arch.sie_block->gcr[0] &= ~CR0_CLOCK_COMPARATOR_SUBMASK;
+ vcpu->arch.sie_block->gcr[9] |= PER_EVENT_IFETCH;
+ vcpu->arch.sie_block->gcr[10] = 0;
+ vcpu->arch.sie_block->gcr[11] = -1UL;
+ }
+
+ if (guestdbg_hw_bp_enabled(vcpu)) {
+ enable_all_hw_bp(vcpu);
+ enable_all_hw_wp(vcpu);
+ }
+
+ /* TODO: Instruction-fetching-nullification not allowed for now */
+ if (vcpu->arch.sie_block->gcr[9] & PER_EVENT_NULLIFICATION)
+ vcpu->arch.sie_block->gcr[9] &= ~PER_EVENT_NULLIFICATION;
+}
+
+#define MAX_WP_SIZE 100
+
+static int __import_wp_info(struct kvm_vcpu *vcpu,
+ struct kvm_hw_breakpoint *bp_data,
+ struct kvm_hw_wp_info_arch *wp_info)
+{
+ int ret = 0;
+ wp_info->len = bp_data->len;
+ wp_info->addr = bp_data->addr;
+ wp_info->phys_addr = bp_data->phys_addr;
+ wp_info->old_data = NULL;
+
+ if (wp_info->len < 0 || wp_info->len > MAX_WP_SIZE)
+ return -EINVAL;
+
+ wp_info->old_data = kmalloc(bp_data->len, GFP_KERNEL);
+ if (!wp_info->old_data)
+ return -ENOMEM;
+ /* try to backup the original value */
+ ret = read_guest_abs(vcpu, wp_info->phys_addr, wp_info->old_data,
+ wp_info->len);
+ if (ret) {
+ kfree(wp_info->old_data);
+ wp_info->old_data = NULL;
+ }
+
+ return ret;
+}
+
+#define MAX_BP_COUNT 50
+
+int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg)
+{
+ int ret = 0, nr_wp = 0, nr_bp = 0, i;
+ struct kvm_hw_breakpoint *bp_data = NULL;
+ struct kvm_hw_wp_info_arch *wp_info = NULL;
+ struct kvm_hw_bp_info_arch *bp_info = NULL;
+
+ if (dbg->arch.nr_hw_bp <= 0 || !dbg->arch.hw_bp)
+ return 0;
+ else if (dbg->arch.nr_hw_bp > MAX_BP_COUNT)
+ return -EINVAL;
+
+ bp_data = memdup_user(dbg->arch.hw_bp,
+ sizeof(*bp_data) * dbg->arch.nr_hw_bp);
+ if (IS_ERR(bp_data))
+ return PTR_ERR(bp_data);
+
+ for (i = 0; i < dbg->arch.nr_hw_bp; i++) {
+ switch (bp_data[i].type) {
+ case KVM_HW_WP_WRITE:
+ nr_wp++;
+ break;
+ case KVM_HW_BP:
+ nr_bp++;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (nr_wp > 0) {
+ wp_info = kmalloc_array(nr_wp,
+ sizeof(*wp_info),
+ GFP_KERNEL);
+ if (!wp_info) {
+ ret = -ENOMEM;
+ goto error;
+ }
+ }
+ if (nr_bp > 0) {
+ bp_info = kmalloc_array(nr_bp,
+ sizeof(*bp_info),
+ GFP_KERNEL);
+ if (!bp_info) {
+ ret = -ENOMEM;
+ goto error;
+ }
+ }
+
+ for (nr_wp = 0, nr_bp = 0, i = 0; i < dbg->arch.nr_hw_bp; i++) {
+ switch (bp_data[i].type) {
+ case KVM_HW_WP_WRITE:
+ ret = __import_wp_info(vcpu, &bp_data[i],
+ &wp_info[nr_wp]);
+ if (ret)
+ goto error;
+ nr_wp++;
+ break;
+ case KVM_HW_BP:
+ bp_info[nr_bp].len = bp_data[i].len;
+ bp_info[nr_bp].addr = bp_data[i].addr;
+ nr_bp++;
+ break;
+ }
+ }
+
+ vcpu->arch.guestdbg.nr_hw_bp = nr_bp;
+ vcpu->arch.guestdbg.hw_bp_info = bp_info;
+ vcpu->arch.guestdbg.nr_hw_wp = nr_wp;
+ vcpu->arch.guestdbg.hw_wp_info = wp_info;
+ return 0;
+error:
+ kfree(bp_data);
+ kfree(wp_info);
+ kfree(bp_info);
+ return ret;
+}
+
+void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu)
+{
+ int i;
+ struct kvm_hw_wp_info_arch *hw_wp_info = NULL;
+
+ for (i = 0; i < vcpu->arch.guestdbg.nr_hw_wp; i++) {
+ hw_wp_info = &vcpu->arch.guestdbg.hw_wp_info[i];
+ kfree(hw_wp_info->old_data);
+ hw_wp_info->old_data = NULL;
+ }
+ kfree(vcpu->arch.guestdbg.hw_wp_info);
+ vcpu->arch.guestdbg.hw_wp_info = NULL;
+
+ kfree(vcpu->arch.guestdbg.hw_bp_info);
+ vcpu->arch.guestdbg.hw_bp_info = NULL;
+
+ vcpu->arch.guestdbg.nr_hw_wp = 0;
+ vcpu->arch.guestdbg.nr_hw_bp = 0;
+}
+
+static inline int in_addr_range(u64 addr, u64 a, u64 b)
+{
+ if (a <= b)
+ return (addr >= a) && (addr <= b);
+ else
+ /* "overflowing" interval */
+ return (addr >= a) || (addr <= b);
+}
+
+#define end_of_range(bp_info) (bp_info->addr + bp_info->len - 1)
+
+static struct kvm_hw_bp_info_arch *find_hw_bp(struct kvm_vcpu *vcpu,
+ unsigned long addr)
+{
+ struct kvm_hw_bp_info_arch *bp_info = vcpu->arch.guestdbg.hw_bp_info;
+ int i;
+
+ if (vcpu->arch.guestdbg.nr_hw_bp == 0)
+ return NULL;
+
+ for (i = 0; i < vcpu->arch.guestdbg.nr_hw_bp; i++) {
+ /* addr is directly the start or in the range of a bp */
+ if (addr == bp_info->addr)
+ goto found;
+ if (bp_info->len > 0 &&
+ in_addr_range(addr, bp_info->addr, end_of_range(bp_info)))
+ goto found;
+
+ bp_info++;
+ }
+
+ return NULL;
+found:
+ return bp_info;
+}
+
+static struct kvm_hw_wp_info_arch *any_wp_changed(struct kvm_vcpu *vcpu)
+{
+ int i;
+ struct kvm_hw_wp_info_arch *wp_info = NULL;
+ void *temp = NULL;
+
+ if (vcpu->arch.guestdbg.nr_hw_wp == 0)
+ return NULL;
+
+ for (i = 0; i < vcpu->arch.guestdbg.nr_hw_wp; i++) {
+ wp_info = &vcpu->arch.guestdbg.hw_wp_info[i];
+ if (!wp_info || !wp_info->old_data || wp_info->len <= 0)
+ continue;
+
+ temp = kmalloc(wp_info->len, GFP_KERNEL);
+ if (!temp)
+ continue;
+
+ /* refetch the wp data and compare it to the old value */
+ if (!read_guest_abs(vcpu, wp_info->phys_addr, temp,
+ wp_info->len)) {
+ if (memcmp(temp, wp_info->old_data, wp_info->len)) {
+ kfree(temp);
+ return wp_info;
+ }
+ }
+ kfree(temp);
+ temp = NULL;
+ }
+
+ return NULL;
+}
+
+void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu)
+{
+ vcpu->run->exit_reason = KVM_EXIT_DEBUG;
+ vcpu->guest_debug &= ~KVM_GUESTDBG_EXIT_PENDING;
+}
+
+#define PER_CODE_MASK (PER_EVENT_MASK >> 24)
+#define PER_CODE_BRANCH (PER_EVENT_BRANCH >> 24)
+#define PER_CODE_IFETCH (PER_EVENT_IFETCH >> 24)
+#define PER_CODE_STORE (PER_EVENT_STORE >> 24)
+#define PER_CODE_STORE_REAL (PER_EVENT_STORE_REAL >> 24)
+
+#define per_bp_event(code) \
+ (code & (PER_CODE_IFETCH | PER_CODE_BRANCH))
+#define per_write_wp_event(code) \
+ (code & (PER_CODE_STORE | PER_CODE_STORE_REAL))
+
+static int debug_exit_required(struct kvm_vcpu *vcpu, u8 perc,
+ unsigned long peraddr)
+{
+ struct kvm_debug_exit_arch *debug_exit = &vcpu->run->debug.arch;
+ struct kvm_hw_wp_info_arch *wp_info = NULL;
+ struct kvm_hw_bp_info_arch *bp_info = NULL;
+ unsigned long addr = vcpu->arch.sie_block->gpsw.addr;
+
+ if (guestdbg_hw_bp_enabled(vcpu)) {
+ if (per_write_wp_event(perc) &&
+ vcpu->arch.guestdbg.nr_hw_wp > 0) {
+ wp_info = any_wp_changed(vcpu);
+ if (wp_info) {
+ debug_exit->addr = wp_info->addr;
+ debug_exit->type = KVM_HW_WP_WRITE;
+ goto exit_required;
+ }
+ }
+ if (per_bp_event(perc) &&
+ vcpu->arch.guestdbg.nr_hw_bp > 0) {
+ bp_info = find_hw_bp(vcpu, addr);
+ /* remove duplicate events if PC==PER address */
+ if (bp_info && (addr != peraddr)) {
+ debug_exit->addr = addr;
+ debug_exit->type = KVM_HW_BP;
+ vcpu->arch.guestdbg.last_bp = addr;
+ goto exit_required;
+ }
+ /* breakpoint missed */
+ bp_info = find_hw_bp(vcpu, peraddr);
+ if (bp_info && vcpu->arch.guestdbg.last_bp != peraddr) {
+ debug_exit->addr = peraddr;
+ debug_exit->type = KVM_HW_BP;
+ goto exit_required;
+ }
+ }
+ }
+ if (guestdbg_sstep_enabled(vcpu) && per_bp_event(perc)) {
+ debug_exit->addr = addr;
+ debug_exit->type = KVM_SINGLESTEP;
+ goto exit_required;
+ }
+
+ return 0;
+exit_required:
+ return 1;
+}
+
+static int per_fetched_addr(struct kvm_vcpu *vcpu, unsigned long *addr)
+{
+ u8 exec_ilen = 0;
+ u16 opcode[3];
+ int rc;
+
+ if (vcpu->arch.sie_block->icptcode == ICPT_PROGI) {
+ /* PER address references the fetched or the execute instr */
+ *addr = vcpu->arch.sie_block->peraddr;
+ /*
+ * Manually detect if we have an EXECUTE instruction. As
+ * instructions are always 2 byte aligned we can read the
+ * first two bytes unconditionally
+ */
+ rc = read_guest_instr(vcpu, *addr, &opcode, 2);
+ if (rc)
+ return rc;
+ if (opcode[0] >> 8 == 0x44)
+ exec_ilen = 4;
+ if ((opcode[0] & 0xff0f) == 0xc600)
+ exec_ilen = 6;
+ } else {
+ /* instr was suppressed, calculate the responsible instr */
+ *addr = __rewind_psw(vcpu->arch.sie_block->gpsw,
+ kvm_s390_get_ilen(vcpu));
+ if (vcpu->arch.sie_block->icptstatus & 0x01) {
+ exec_ilen = (vcpu->arch.sie_block->icptstatus & 0x60) >> 4;
+ if (!exec_ilen)
+ exec_ilen = 4;
+ }
+ }
+
+ if (exec_ilen) {
+ /* read the complete EXECUTE instr to detect the fetched addr */
+ rc = read_guest_instr(vcpu, *addr, &opcode, exec_ilen);
+ if (rc)
+ return rc;
+ if (exec_ilen == 6) {
+ /* EXECUTE RELATIVE LONG - RIL-b format */
+ s32 rl = *((s32 *) (opcode + 1));
+
+ /* rl is a _signed_ 32 bit value specifying halfwords */
+ *addr += (u64)(s64) rl * 2;
+ } else {
+ /* EXECUTE - RX-a format */
+ u32 base = (opcode[1] & 0xf000) >> 12;
+ u32 disp = opcode[1] & 0x0fff;
+ u32 index = opcode[0] & 0x000f;
+
+ *addr = base ? vcpu->run->s.regs.gprs[base] : 0;
+ *addr += index ? vcpu->run->s.regs.gprs[index] : 0;
+ *addr += disp;
+ }
+ *addr = kvm_s390_logical_to_effective(vcpu, *addr);
+ }
+ return 0;
+}
+
+#define guest_per_enabled(vcpu) \
+ (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER)
+
+int kvm_s390_handle_per_ifetch_icpt(struct kvm_vcpu *vcpu)
+{
+ const u64 cr10 = vcpu->arch.sie_block->gcr[10];
+ const u64 cr11 = vcpu->arch.sie_block->gcr[11];
+ const u8 ilen = kvm_s390_get_ilen(vcpu);
+ struct kvm_s390_pgm_info pgm_info = {
+ .code = PGM_PER,
+ .per_code = PER_CODE_IFETCH,
+ .per_address = __rewind_psw(vcpu->arch.sie_block->gpsw, ilen),
+ };
+ unsigned long fetched_addr;
+ int rc;
+
+ /*
+ * The PSW points to the next instruction, therefore the intercepted
+ * instruction generated a PER i-fetch event. PER address therefore
+ * points at the previous PSW address (could be an EXECUTE function).
+ */
+ if (!guestdbg_enabled(vcpu))
+ return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
+
+ if (debug_exit_required(vcpu, pgm_info.per_code, pgm_info.per_address))
+ vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING;
+
+ if (!guest_per_enabled(vcpu) ||
+ !(vcpu->arch.sie_block->gcr[9] & PER_EVENT_IFETCH))
+ return 0;
+
+ rc = per_fetched_addr(vcpu, &fetched_addr);
+ if (rc < 0)
+ return rc;
+ if (rc)
+ /* instruction-fetching exceptions */
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+ if (in_addr_range(fetched_addr, cr10, cr11))
+ return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
+ return 0;
+}
+
+static int filter_guest_per_event(struct kvm_vcpu *vcpu)
+{
+ const u8 perc = vcpu->arch.sie_block->perc;
+ u64 addr = vcpu->arch.sie_block->gpsw.addr;
+ u64 cr9 = vcpu->arch.sie_block->gcr[9];
+ u64 cr10 = vcpu->arch.sie_block->gcr[10];
+ u64 cr11 = vcpu->arch.sie_block->gcr[11];
+ /* filter all events, demanded by the guest */
+ u8 guest_perc = perc & (cr9 >> 24) & PER_CODE_MASK;
+ unsigned long fetched_addr;
+ int rc;
+
+ if (!guest_per_enabled(vcpu))
+ guest_perc = 0;
+
+ /* filter "successful-branching" events */
+ if (guest_perc & PER_CODE_BRANCH &&
+ cr9 & PER_CONTROL_BRANCH_ADDRESS &&
+ !in_addr_range(addr, cr10, cr11))
+ guest_perc &= ~PER_CODE_BRANCH;
+
+ /* filter "instruction-fetching" events */
+ if (guest_perc & PER_CODE_IFETCH) {
+ rc = per_fetched_addr(vcpu, &fetched_addr);
+ if (rc < 0)
+ return rc;
+ /*
+ * Don't inject an irq on exceptions. This would make handling
+ * on icpt code 8 very complex (as PSW was already rewound).
+ */
+ if (rc || !in_addr_range(fetched_addr, cr10, cr11))
+ guest_perc &= ~PER_CODE_IFETCH;
+ }
+
+ /* All other PER events will be given to the guest */
+ /* TODO: Check altered address/address space */
+
+ vcpu->arch.sie_block->perc = guest_perc;
+
+ if (!guest_perc)
+ vcpu->arch.sie_block->iprcc &= ~PGM_PER;
+ return 0;
+}
+
+#define pssec(vcpu) (vcpu->arch.sie_block->gcr[1] & _ASCE_SPACE_SWITCH)
+#define hssec(vcpu) (vcpu->arch.sie_block->gcr[13] & _ASCE_SPACE_SWITCH)
+#define old_ssec(vcpu) ((vcpu->arch.sie_block->tecmc >> 31) & 0x1)
+#define old_as_is_home(vcpu) !(vcpu->arch.sie_block->tecmc & 0xffff)
+
+int kvm_s390_handle_per_event(struct kvm_vcpu *vcpu)
+{
+ int rc, new_as;
+
+ if (debug_exit_required(vcpu, vcpu->arch.sie_block->perc,
+ vcpu->arch.sie_block->peraddr))
+ vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING;
+
+ rc = filter_guest_per_event(vcpu);
+ if (rc)
+ return rc;
+
+ /*
+ * Only RP, SAC, SACF, PT, PTI, PR, PC instructions can trigger
+ * a space-switch event. PER events enforce space-switch events
+ * for these instructions. So if no PER event for the guest is left,
+ * we might have to filter the space-switch element out, too.
+ */
+ if (vcpu->arch.sie_block->iprcc == PGM_SPACE_SWITCH) {
+ vcpu->arch.sie_block->iprcc = 0;
+ new_as = psw_bits(vcpu->arch.sie_block->gpsw).as;
+
+ /*
+ * If the AS changed from / to home, we had RP, SAC or SACF
+ * instruction. Check primary and home space-switch-event
+ * controls. (theoretically home -> home produced no event)
+ */
+ if (((new_as == PSW_BITS_AS_HOME) ^ old_as_is_home(vcpu)) &&
+ (pssec(vcpu) || hssec(vcpu)))
+ vcpu->arch.sie_block->iprcc = PGM_SPACE_SWITCH;
+
+ /*
+ * PT, PTI, PR, PC instruction operate on primary AS only. Check
+ * if the primary-space-switch-event control was or got set.
+ */
+ if (new_as == PSW_BITS_AS_PRIMARY && !old_as_is_home(vcpu) &&
+ (pssec(vcpu) || old_ssec(vcpu)))
+ vcpu->arch.sie_block->iprcc = PGM_SPACE_SWITCH;
+ }
+ return 0;
+}
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
new file mode 100644
index 000000000..a389fa85c
--- /dev/null
+++ b/arch/s390/kvm/intercept.c
@@ -0,0 +1,492 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * in-kernel handling for sie intercepts
+ *
+ * Copyright IBM Corp. 2008, 2014
+ *
+ * Author(s): Carsten Otte <cotte@de.ibm.com>
+ * Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+
+#include <asm/kvm_host.h>
+#include <asm/asm-offsets.h>
+#include <asm/irq.h>
+#include <asm/sysinfo.h>
+
+#include "kvm-s390.h"
+#include "gaccess.h"
+#include "trace.h"
+#include "trace-s390.h"
+
+u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
+ u8 ilen = 0;
+
+ switch (vcpu->arch.sie_block->icptcode) {
+ case ICPT_INST:
+ case ICPT_INSTPROGI:
+ case ICPT_OPEREXC:
+ case ICPT_PARTEXEC:
+ case ICPT_IOINST:
+ /* instruction only stored for these icptcodes */
+ ilen = insn_length(vcpu->arch.sie_block->ipa >> 8);
+ /* Use the length of the EXECUTE instruction if necessary */
+ if (sie_block->icptstatus & 1) {
+ ilen = (sie_block->icptstatus >> 4) & 0x6;
+ if (!ilen)
+ ilen = 4;
+ }
+ break;
+ case ICPT_PROGI:
+ /* bit 1+2 of pgmilc are the ilc, so we directly get ilen */
+ ilen = vcpu->arch.sie_block->pgmilc & 0x6;
+ break;
+ }
+ return ilen;
+}
+
+static int handle_stop(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+ int rc = 0;
+ uint8_t flags, stop_pending;
+
+ vcpu->stat.exit_stop_request++;
+
+ /* delay the stop if any non-stop irq is pending */
+ if (kvm_s390_vcpu_has_irq(vcpu, 1))
+ return 0;
+
+ /* avoid races with the injection/SIGP STOP code */
+ spin_lock(&li->lock);
+ flags = li->irq.stop.flags;
+ stop_pending = kvm_s390_is_stop_irq_pending(vcpu);
+ spin_unlock(&li->lock);
+
+ trace_kvm_s390_stop_request(stop_pending, flags);
+ if (!stop_pending)
+ return 0;
+
+ if (flags & KVM_S390_STOP_FLAG_STORE_STATUS) {
+ rc = kvm_s390_vcpu_store_status(vcpu,
+ KVM_S390_STORE_STATUS_NOADDR);
+ if (rc)
+ return rc;
+ }
+
+ if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
+ kvm_s390_vcpu_stop(vcpu);
+ return -EOPNOTSUPP;
+}
+
+static int handle_validity(struct kvm_vcpu *vcpu)
+{
+ int viwhy = vcpu->arch.sie_block->ipb >> 16;
+
+ vcpu->stat.exit_validity++;
+ trace_kvm_s390_intercept_validity(vcpu, viwhy);
+ KVM_EVENT(3, "validity intercept 0x%x for pid %u (kvm 0x%pK)", viwhy,
+ current->pid, vcpu->kvm);
+
+ /* do not warn on invalid runtime instrumentation mode */
+ WARN_ONCE(viwhy != 0x44, "kvm: unhandled validity intercept 0x%x\n",
+ viwhy);
+ return -EINVAL;
+}
+
+static int handle_instruction(struct kvm_vcpu *vcpu)
+{
+ vcpu->stat.exit_instruction++;
+ trace_kvm_s390_intercept_instruction(vcpu,
+ vcpu->arch.sie_block->ipa,
+ vcpu->arch.sie_block->ipb);
+
+ switch (vcpu->arch.sie_block->ipa >> 8) {
+ case 0x01:
+ return kvm_s390_handle_01(vcpu);
+ case 0x82:
+ return kvm_s390_handle_lpsw(vcpu);
+ case 0x83:
+ return kvm_s390_handle_diag(vcpu);
+ case 0xaa:
+ return kvm_s390_handle_aa(vcpu);
+ case 0xae:
+ return kvm_s390_handle_sigp(vcpu);
+ case 0xb2:
+ return kvm_s390_handle_b2(vcpu);
+ case 0xb6:
+ return kvm_s390_handle_stctl(vcpu);
+ case 0xb7:
+ return kvm_s390_handle_lctl(vcpu);
+ case 0xb9:
+ return kvm_s390_handle_b9(vcpu);
+ case 0xe3:
+ return kvm_s390_handle_e3(vcpu);
+ case 0xe5:
+ return kvm_s390_handle_e5(vcpu);
+ case 0xeb:
+ return kvm_s390_handle_eb(vcpu);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int inject_prog_on_prog_intercept(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_pgm_info pgm_info = {
+ .code = vcpu->arch.sie_block->iprcc,
+ /* the PSW has already been rewound */
+ .flags = KVM_S390_PGM_FLAGS_NO_REWIND,
+ };
+
+ switch (vcpu->arch.sie_block->iprcc & ~PGM_PER) {
+ case PGM_AFX_TRANSLATION:
+ case PGM_ASX_TRANSLATION:
+ case PGM_EX_TRANSLATION:
+ case PGM_LFX_TRANSLATION:
+ case PGM_LSTE_SEQUENCE:
+ case PGM_LSX_TRANSLATION:
+ case PGM_LX_TRANSLATION:
+ case PGM_PRIMARY_AUTHORITY:
+ case PGM_SECONDARY_AUTHORITY:
+ case PGM_SPACE_SWITCH:
+ pgm_info.trans_exc_code = vcpu->arch.sie_block->tecmc;
+ break;
+ case PGM_ALEN_TRANSLATION:
+ case PGM_ALE_SEQUENCE:
+ case PGM_ASTE_INSTANCE:
+ case PGM_ASTE_SEQUENCE:
+ case PGM_ASTE_VALIDITY:
+ case PGM_EXTENDED_AUTHORITY:
+ pgm_info.exc_access_id = vcpu->arch.sie_block->eai;
+ break;
+ case PGM_ASCE_TYPE:
+ case PGM_PAGE_TRANSLATION:
+ case PGM_REGION_FIRST_TRANS:
+ case PGM_REGION_SECOND_TRANS:
+ case PGM_REGION_THIRD_TRANS:
+ case PGM_SEGMENT_TRANSLATION:
+ pgm_info.trans_exc_code = vcpu->arch.sie_block->tecmc;
+ pgm_info.exc_access_id = vcpu->arch.sie_block->eai;
+ pgm_info.op_access_id = vcpu->arch.sie_block->oai;
+ break;
+ case PGM_MONITOR:
+ pgm_info.mon_class_nr = vcpu->arch.sie_block->mcn;
+ pgm_info.mon_code = vcpu->arch.sie_block->tecmc;
+ break;
+ case PGM_VECTOR_PROCESSING:
+ case PGM_DATA:
+ pgm_info.data_exc_code = vcpu->arch.sie_block->dxc;
+ break;
+ case PGM_PROTECTION:
+ pgm_info.trans_exc_code = vcpu->arch.sie_block->tecmc;
+ pgm_info.exc_access_id = vcpu->arch.sie_block->eai;
+ break;
+ default:
+ break;
+ }
+
+ if (vcpu->arch.sie_block->iprcc & PGM_PER) {
+ pgm_info.per_code = vcpu->arch.sie_block->perc;
+ pgm_info.per_atmid = vcpu->arch.sie_block->peratmid;
+ pgm_info.per_address = vcpu->arch.sie_block->peraddr;
+ pgm_info.per_access_id = vcpu->arch.sie_block->peraid;
+ }
+ return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
+}
+
+/*
+ * restore ITDB to program-interruption TDB in guest lowcore
+ * and set TX abort indication if required
+*/
+static int handle_itdb(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_itdb *itdb;
+ int rc;
+
+ if (!IS_TE_ENABLED(vcpu) || !IS_ITDB_VALID(vcpu))
+ return 0;
+ if (current->thread.per_flags & PER_FLAG_NO_TE)
+ return 0;
+ itdb = (struct kvm_s390_itdb *)vcpu->arch.sie_block->itdba;
+ rc = write_guest_lc(vcpu, __LC_PGM_TDB, itdb, sizeof(*itdb));
+ if (rc)
+ return rc;
+ memset(itdb, 0, sizeof(*itdb));
+
+ return 0;
+}
+
+#define per_event(vcpu) (vcpu->arch.sie_block->iprcc & PGM_PER)
+
+static int handle_prog(struct kvm_vcpu *vcpu)
+{
+ psw_t psw;
+ int rc;
+
+ vcpu->stat.exit_program_interruption++;
+
+ if (guestdbg_enabled(vcpu) && per_event(vcpu)) {
+ rc = kvm_s390_handle_per_event(vcpu);
+ if (rc)
+ return rc;
+ /* the interrupt might have been filtered out completely */
+ if (vcpu->arch.sie_block->iprcc == 0)
+ return 0;
+ }
+
+ trace_kvm_s390_intercept_prog(vcpu, vcpu->arch.sie_block->iprcc);
+ if (vcpu->arch.sie_block->iprcc == PGM_SPECIFICATION) {
+ rc = read_guest_lc(vcpu, __LC_PGM_NEW_PSW, &psw, sizeof(psw_t));
+ if (rc)
+ return rc;
+ /* Avoid endless loops of specification exceptions */
+ if (!is_valid_psw(&psw))
+ return -EOPNOTSUPP;
+ }
+ rc = handle_itdb(vcpu);
+ if (rc)
+ return rc;
+
+ return inject_prog_on_prog_intercept(vcpu);
+}
+
+/**
+ * handle_external_interrupt - used for external interruption interceptions
+ *
+ * This interception only occurs if the CPUSTAT_EXT_INT bit was set, or if
+ * the new PSW does not have external interrupts disabled. In the first case,
+ * we've got to deliver the interrupt manually, and in the second case, we
+ * drop to userspace to handle the situation there.
+ */
+static int handle_external_interrupt(struct kvm_vcpu *vcpu)
+{
+ u16 eic = vcpu->arch.sie_block->eic;
+ struct kvm_s390_irq irq;
+ psw_t newpsw;
+ int rc;
+
+ vcpu->stat.exit_external_interrupt++;
+
+ rc = read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &newpsw, sizeof(psw_t));
+ if (rc)
+ return rc;
+ /* We can not handle clock comparator or timer interrupt with bad PSW */
+ if ((eic == EXT_IRQ_CLK_COMP || eic == EXT_IRQ_CPU_TIMER) &&
+ (newpsw.mask & PSW_MASK_EXT))
+ return -EOPNOTSUPP;
+
+ switch (eic) {
+ case EXT_IRQ_CLK_COMP:
+ irq.type = KVM_S390_INT_CLOCK_COMP;
+ break;
+ case EXT_IRQ_CPU_TIMER:
+ irq.type = KVM_S390_INT_CPU_TIMER;
+ break;
+ case EXT_IRQ_EXTERNAL_CALL:
+ irq.type = KVM_S390_INT_EXTERNAL_CALL;
+ irq.u.extcall.code = vcpu->arch.sie_block->extcpuaddr;
+ rc = kvm_s390_inject_vcpu(vcpu, &irq);
+ /* ignore if another external call is already pending */
+ if (rc == -EBUSY)
+ return 0;
+ return rc;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return kvm_s390_inject_vcpu(vcpu, &irq);
+}
+
+/**
+ * Handle MOVE PAGE partial execution interception.
+ *
+ * This interception can only happen for guests with DAT disabled and
+ * addresses that are currently not mapped in the host. Thus we try to
+ * set up the mappings for the corresponding user pages here (or throw
+ * addressing exceptions in case of illegal guest addresses).
+ */
+static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
+{
+ unsigned long srcaddr, dstaddr;
+ int reg1, reg2, rc;
+
+ kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+
+ /* Make sure that the source is paged-in */
+ rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg2],
+ reg2, &srcaddr, GACC_FETCH);
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
+ rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0);
+ if (rc != 0)
+ return rc;
+
+ /* Make sure that the destination is paged-in */
+ rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg1],
+ reg1, &dstaddr, GACC_STORE);
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
+ rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1);
+ if (rc != 0)
+ return rc;
+
+ kvm_s390_retry_instr(vcpu);
+
+ return 0;
+}
+
+static int handle_partial_execution(struct kvm_vcpu *vcpu)
+{
+ vcpu->stat.exit_pei++;
+
+ if (vcpu->arch.sie_block->ipa == 0xb254) /* MVPG */
+ return handle_mvpg_pei(vcpu);
+ if (vcpu->arch.sie_block->ipa >> 8 == 0xae) /* SIGP */
+ return kvm_s390_handle_sigp_pei(vcpu);
+
+ return -EOPNOTSUPP;
+}
+
+/*
+ * Handle the sthyi instruction that provides the guest with system
+ * information, like current CPU resources available at each level of
+ * the machine.
+ */
+int handle_sthyi(struct kvm_vcpu *vcpu)
+{
+ int reg1, reg2, r = 0;
+ u64 code, addr, cc = 0, rc = 0;
+ struct sthyi_sctns *sctns = NULL;
+
+ if (!test_kvm_facility(vcpu->kvm, 74))
+ return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
+
+ kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+ code = vcpu->run->s.regs.gprs[reg1];
+ addr = vcpu->run->s.regs.gprs[reg2];
+
+ vcpu->stat.instruction_sthyi++;
+ VCPU_EVENT(vcpu, 3, "STHYI: fc: %llu addr: 0x%016llx", code, addr);
+ trace_kvm_s390_handle_sthyi(vcpu, code, addr);
+
+ if (reg1 == reg2 || reg1 & 1 || reg2 & 1)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ if (code & 0xffff) {
+ cc = 3;
+ rc = 4;
+ goto out;
+ }
+
+ if (addr & ~PAGE_MASK)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ sctns = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!sctns)
+ return -ENOMEM;
+
+ cc = sthyi_fill(sctns, &rc);
+
+out:
+ if (!cc) {
+ r = write_guest(vcpu, addr, reg2, sctns, PAGE_SIZE);
+ if (r) {
+ free_page((unsigned long)sctns);
+ return kvm_s390_inject_prog_cond(vcpu, r);
+ }
+ }
+
+ free_page((unsigned long)sctns);
+ vcpu->run->s.regs.gprs[reg2 + 1] = rc;
+ kvm_s390_set_psw_cc(vcpu, cc);
+ return r;
+}
+
+static int handle_operexc(struct kvm_vcpu *vcpu)
+{
+ psw_t oldpsw, newpsw;
+ int rc;
+
+ vcpu->stat.exit_operation_exception++;
+ trace_kvm_s390_handle_operexc(vcpu, vcpu->arch.sie_block->ipa,
+ vcpu->arch.sie_block->ipb);
+
+ if (vcpu->arch.sie_block->ipa == 0xb256)
+ return handle_sthyi(vcpu);
+
+ if (vcpu->arch.sie_block->ipa == 0 && vcpu->kvm->arch.user_instr0)
+ return -EOPNOTSUPP;
+ rc = read_guest_lc(vcpu, __LC_PGM_NEW_PSW, &newpsw, sizeof(psw_t));
+ if (rc)
+ return rc;
+ /*
+ * Avoid endless loops of operation exceptions, if the pgm new
+ * PSW will cause a new operation exception.
+ * The heuristic checks if the pgm new psw is within 6 bytes before
+ * the faulting psw address (with same DAT, AS settings) and the
+ * new psw is not a wait psw and the fault was not triggered by
+ * problem state.
+ */
+ oldpsw = vcpu->arch.sie_block->gpsw;
+ if (oldpsw.addr - newpsw.addr <= 6 &&
+ !(newpsw.mask & PSW_MASK_WAIT) &&
+ !(oldpsw.mask & PSW_MASK_PSTATE) &&
+ (newpsw.mask & PSW_MASK_ASC) == (oldpsw.mask & PSW_MASK_ASC) &&
+ (newpsw.mask & PSW_MASK_DAT) == (oldpsw.mask & PSW_MASK_DAT))
+ return -EOPNOTSUPP;
+
+ return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
+}
+
+int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
+{
+ int rc, per_rc = 0;
+
+ if (kvm_is_ucontrol(vcpu->kvm))
+ return -EOPNOTSUPP;
+
+ switch (vcpu->arch.sie_block->icptcode) {
+ case ICPT_EXTREQ:
+ vcpu->stat.exit_external_request++;
+ return 0;
+ case ICPT_IOREQ:
+ vcpu->stat.exit_io_request++;
+ return 0;
+ case ICPT_INST:
+ rc = handle_instruction(vcpu);
+ break;
+ case ICPT_PROGI:
+ return handle_prog(vcpu);
+ case ICPT_EXTINT:
+ return handle_external_interrupt(vcpu);
+ case ICPT_WAIT:
+ return kvm_s390_handle_wait(vcpu);
+ case ICPT_VALIDITY:
+ return handle_validity(vcpu);
+ case ICPT_STOP:
+ return handle_stop(vcpu);
+ case ICPT_OPEREXC:
+ rc = handle_operexc(vcpu);
+ break;
+ case ICPT_PARTEXEC:
+ rc = handle_partial_execution(vcpu);
+ break;
+ case ICPT_KSS:
+ rc = kvm_s390_skey_check_enable(vcpu);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ /* process PER, also if the instrution is processed in user space */
+ if (vcpu->arch.sie_block->icptstatus & 0x02 &&
+ (!rc || rc == -EOPNOTSUPP))
+ per_rc = kvm_s390_handle_per_ifetch_icpt(vcpu);
+ return per_rc ? per_rc : rc;
+}
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
new file mode 100644
index 000000000..3dcfe98f7
--- /dev/null
+++ b/arch/s390/kvm/interrupt.c
@@ -0,0 +1,2927 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * handling kvm guest interrupts
+ *
+ * Copyright IBM Corp. 2008, 2015
+ *
+ * Author(s): Carsten Otte <cotte@de.ibm.com>
+ */
+
+#include <linux/interrupt.h>
+#include <linux/kvm_host.h>
+#include <linux/hrtimer.h>
+#include <linux/mmu_context.h>
+#include <linux/signal.h>
+#include <linux/slab.h>
+#include <linux/bitmap.h>
+#include <linux/vmalloc.h>
+#include <asm/asm-offsets.h>
+#include <asm/dis.h>
+#include <linux/uaccess.h>
+#include <asm/sclp.h>
+#include <asm/isc.h>
+#include <asm/gmap.h>
+#include <asm/switch_to.h>
+#include <asm/nmi.h>
+#include "kvm-s390.h"
+#include "gaccess.h"
+#include "trace-s390.h"
+
+#define PFAULT_INIT 0x0600
+#define PFAULT_DONE 0x0680
+#define VIRTIO_PARAM 0x0d00
+
+/* handle external calls via sigp interpretation facility */
+static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id)
+{
+ int c, scn;
+
+ if (!kvm_s390_test_cpuflags(vcpu, CPUSTAT_ECALL_PEND))
+ return 0;
+
+ BUG_ON(!kvm_s390_use_sca_entries());
+ read_lock(&vcpu->kvm->arch.sca_lock);
+ if (vcpu->kvm->arch.use_esca) {
+ struct esca_block *sca = vcpu->kvm->arch.sca;
+ union esca_sigp_ctrl sigp_ctrl =
+ sca->cpu[vcpu->vcpu_id].sigp_ctrl;
+
+ c = sigp_ctrl.c;
+ scn = sigp_ctrl.scn;
+ } else {
+ struct bsca_block *sca = vcpu->kvm->arch.sca;
+ union bsca_sigp_ctrl sigp_ctrl =
+ sca->cpu[vcpu->vcpu_id].sigp_ctrl;
+
+ c = sigp_ctrl.c;
+ scn = sigp_ctrl.scn;
+ }
+ read_unlock(&vcpu->kvm->arch.sca_lock);
+
+ if (src_id)
+ *src_id = scn;
+
+ return c;
+}
+
+static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id)
+{
+ int expect, rc;
+
+ BUG_ON(!kvm_s390_use_sca_entries());
+ read_lock(&vcpu->kvm->arch.sca_lock);
+ if (vcpu->kvm->arch.use_esca) {
+ struct esca_block *sca = vcpu->kvm->arch.sca;
+ union esca_sigp_ctrl *sigp_ctrl =
+ &(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
+ union esca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl;
+
+ new_val.scn = src_id;
+ new_val.c = 1;
+ old_val.c = 0;
+
+ expect = old_val.value;
+ rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value);
+ } else {
+ struct bsca_block *sca = vcpu->kvm->arch.sca;
+ union bsca_sigp_ctrl *sigp_ctrl =
+ &(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
+ union bsca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl;
+
+ new_val.scn = src_id;
+ new_val.c = 1;
+ old_val.c = 0;
+
+ expect = old_val.value;
+ rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value);
+ }
+ read_unlock(&vcpu->kvm->arch.sca_lock);
+
+ if (rc != expect) {
+ /* another external call is pending */
+ return -EBUSY;
+ }
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_ECALL_PEND);
+ return 0;
+}
+
+static void sca_clear_ext_call(struct kvm_vcpu *vcpu)
+{
+ int rc, expect;
+
+ if (!kvm_s390_use_sca_entries())
+ return;
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_ECALL_PEND);
+ read_lock(&vcpu->kvm->arch.sca_lock);
+ if (vcpu->kvm->arch.use_esca) {
+ struct esca_block *sca = vcpu->kvm->arch.sca;
+ union esca_sigp_ctrl *sigp_ctrl =
+ &(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
+ union esca_sigp_ctrl old = *sigp_ctrl;
+
+ expect = old.value;
+ rc = cmpxchg(&sigp_ctrl->value, old.value, 0);
+ } else {
+ struct bsca_block *sca = vcpu->kvm->arch.sca;
+ union bsca_sigp_ctrl *sigp_ctrl =
+ &(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
+ union bsca_sigp_ctrl old = *sigp_ctrl;
+
+ expect = old.value;
+ rc = cmpxchg(&sigp_ctrl->value, old.value, 0);
+ }
+ read_unlock(&vcpu->kvm->arch.sca_lock);
+ WARN_ON(rc != expect); /* cannot clear? */
+}
+
+int psw_extint_disabled(struct kvm_vcpu *vcpu)
+{
+ return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT);
+}
+
+static int psw_ioint_disabled(struct kvm_vcpu *vcpu)
+{
+ return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_IO);
+}
+
+static int psw_mchk_disabled(struct kvm_vcpu *vcpu)
+{
+ return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_MCHECK);
+}
+
+static int psw_interrupts_disabled(struct kvm_vcpu *vcpu)
+{
+ return psw_extint_disabled(vcpu) &&
+ psw_ioint_disabled(vcpu) &&
+ psw_mchk_disabled(vcpu);
+}
+
+static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu)
+{
+ if (psw_extint_disabled(vcpu) ||
+ !(vcpu->arch.sie_block->gcr[0] & CR0_CLOCK_COMPARATOR_SUBMASK))
+ return 0;
+ if (guestdbg_enabled(vcpu) && guestdbg_sstep_enabled(vcpu))
+ /* No timer interrupts when single stepping */
+ return 0;
+ return 1;
+}
+
+static int ckc_irq_pending(struct kvm_vcpu *vcpu)
+{
+ const u64 now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
+ const u64 ckc = vcpu->arch.sie_block->ckc;
+
+ if (vcpu->arch.sie_block->gcr[0] & CR0_CLOCK_COMPARATOR_SIGN) {
+ if ((s64)ckc >= (s64)now)
+ return 0;
+ } else if (ckc >= now) {
+ return 0;
+ }
+ return ckc_interrupts_enabled(vcpu);
+}
+
+static int cpu_timer_interrupts_enabled(struct kvm_vcpu *vcpu)
+{
+ return !psw_extint_disabled(vcpu) &&
+ (vcpu->arch.sie_block->gcr[0] & CR0_CPU_TIMER_SUBMASK);
+}
+
+static int cpu_timer_irq_pending(struct kvm_vcpu *vcpu)
+{
+ if (!cpu_timer_interrupts_enabled(vcpu))
+ return 0;
+ return kvm_s390_get_cpu_timer(vcpu) >> 63;
+}
+
+static uint64_t isc_to_isc_bits(int isc)
+{
+ return (0x80 >> isc) << 24;
+}
+
+static inline u32 isc_to_int_word(u8 isc)
+{
+ return ((u32)isc << 27) | 0x80000000;
+}
+
+static inline u8 int_word_to_isc(u32 int_word)
+{
+ return (int_word & 0x38000000) >> 27;
+}
+
+/*
+ * To use atomic bitmap functions, we have to provide a bitmap address
+ * that is u64 aligned. However, the ipm might be u32 aligned.
+ * Therefore, we logically start the bitmap at the very beginning of the
+ * struct and fixup the bit number.
+ */
+#define IPM_BIT_OFFSET (offsetof(struct kvm_s390_gisa, ipm) * BITS_PER_BYTE)
+
+static inline void kvm_s390_gisa_set_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)
+{
+ set_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa);
+}
+
+static inline u8 kvm_s390_gisa_get_ipm(struct kvm_s390_gisa *gisa)
+{
+ return READ_ONCE(gisa->ipm);
+}
+
+static inline void kvm_s390_gisa_clear_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)
+{
+ clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa);
+}
+
+static inline int kvm_s390_gisa_tac_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)
+{
+ return test_and_clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa);
+}
+
+static inline unsigned long pending_irqs_no_gisa(struct kvm_vcpu *vcpu)
+{
+ return vcpu->kvm->arch.float_int.pending_irqs |
+ vcpu->arch.local_int.pending_irqs;
+}
+
+static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu)
+{
+ return pending_irqs_no_gisa(vcpu) |
+ kvm_s390_gisa_get_ipm(vcpu->kvm->arch.gisa) << IRQ_PEND_IO_ISC_7;
+}
+
+static inline int isc_to_irq_type(unsigned long isc)
+{
+ return IRQ_PEND_IO_ISC_0 - isc;
+}
+
+static inline int irq_type_to_isc(unsigned long irq_type)
+{
+ return IRQ_PEND_IO_ISC_0 - irq_type;
+}
+
+static unsigned long disable_iscs(struct kvm_vcpu *vcpu,
+ unsigned long active_mask)
+{
+ int i;
+
+ for (i = 0; i <= MAX_ISC; i++)
+ if (!(vcpu->arch.sie_block->gcr[6] & isc_to_isc_bits(i)))
+ active_mask &= ~(1UL << (isc_to_irq_type(i)));
+
+ return active_mask;
+}
+
+static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu)
+{
+ unsigned long active_mask;
+
+ active_mask = pending_irqs(vcpu);
+ if (!active_mask)
+ return 0;
+
+ if (psw_extint_disabled(vcpu))
+ active_mask &= ~IRQ_PEND_EXT_MASK;
+ if (psw_ioint_disabled(vcpu))
+ active_mask &= ~IRQ_PEND_IO_MASK;
+ else
+ active_mask = disable_iscs(vcpu, active_mask);
+ if (!(vcpu->arch.sie_block->gcr[0] & CR0_EXTERNAL_CALL_SUBMASK))
+ __clear_bit(IRQ_PEND_EXT_EXTERNAL, &active_mask);
+ if (!(vcpu->arch.sie_block->gcr[0] & CR0_EMERGENCY_SIGNAL_SUBMASK))
+ __clear_bit(IRQ_PEND_EXT_EMERGENCY, &active_mask);
+ if (!(vcpu->arch.sie_block->gcr[0] & CR0_CLOCK_COMPARATOR_SUBMASK))
+ __clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &active_mask);
+ if (!(vcpu->arch.sie_block->gcr[0] & CR0_CPU_TIMER_SUBMASK))
+ __clear_bit(IRQ_PEND_EXT_CPU_TIMER, &active_mask);
+ if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
+ __clear_bit(IRQ_PEND_EXT_SERVICE, &active_mask);
+ if (psw_mchk_disabled(vcpu))
+ active_mask &= ~IRQ_PEND_MCHK_MASK;
+ /*
+ * Check both floating and local interrupt's cr14 because
+ * bit IRQ_PEND_MCHK_REP could be set in both cases.
+ */
+ if (!(vcpu->arch.sie_block->gcr[14] &
+ (vcpu->kvm->arch.float_int.mchk.cr14 |
+ vcpu->arch.local_int.irq.mchk.cr14)))
+ __clear_bit(IRQ_PEND_MCHK_REP, &active_mask);
+
+ /*
+ * STOP irqs will never be actively delivered. They are triggered via
+ * intercept requests and cleared when the stop intercept is performed.
+ */
+ __clear_bit(IRQ_PEND_SIGP_STOP, &active_mask);
+
+ return active_mask;
+}
+
+static void __set_cpu_idle(struct kvm_vcpu *vcpu)
+{
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT);
+ set_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.float_int.idle_mask);
+}
+
+static void __unset_cpu_idle(struct kvm_vcpu *vcpu)
+{
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT);
+ clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.float_int.idle_mask);
+}
+
+static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
+{
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IO_INT | CPUSTAT_EXT_INT |
+ CPUSTAT_STOP_INT);
+ vcpu->arch.sie_block->lctl = 0x0000;
+ vcpu->arch.sie_block->ictl &= ~(ICTL_LPSW | ICTL_STCTL | ICTL_PINT);
+
+ if (guestdbg_enabled(vcpu)) {
+ vcpu->arch.sie_block->lctl |= (LCTL_CR0 | LCTL_CR9 |
+ LCTL_CR10 | LCTL_CR11);
+ vcpu->arch.sie_block->ictl |= (ICTL_STCTL | ICTL_PINT);
+ }
+}
+
+static void set_intercept_indicators_io(struct kvm_vcpu *vcpu)
+{
+ if (!(pending_irqs_no_gisa(vcpu) & IRQ_PEND_IO_MASK))
+ return;
+ else if (psw_ioint_disabled(vcpu))
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_IO_INT);
+ else
+ vcpu->arch.sie_block->lctl |= LCTL_CR6;
+}
+
+static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu)
+{
+ if (!(pending_irqs(vcpu) & IRQ_PEND_EXT_MASK))
+ return;
+ if (psw_extint_disabled(vcpu))
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT);
+ else
+ vcpu->arch.sie_block->lctl |= LCTL_CR0;
+}
+
+static void set_intercept_indicators_mchk(struct kvm_vcpu *vcpu)
+{
+ if (!(pending_irqs(vcpu) & IRQ_PEND_MCHK_MASK))
+ return;
+ if (psw_mchk_disabled(vcpu))
+ vcpu->arch.sie_block->ictl |= ICTL_LPSW;
+ else
+ vcpu->arch.sie_block->lctl |= LCTL_CR14;
+}
+
+static void set_intercept_indicators_stop(struct kvm_vcpu *vcpu)
+{
+ if (kvm_s390_is_stop_irq_pending(vcpu))
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
+}
+
+/* Set interception request for non-deliverable interrupts */
+static void set_intercept_indicators(struct kvm_vcpu *vcpu)
+{
+ set_intercept_indicators_io(vcpu);
+ set_intercept_indicators_ext(vcpu);
+ set_intercept_indicators_mchk(vcpu);
+ set_intercept_indicators_stop(vcpu);
+}
+
+static int __must_check __deliver_cpu_timer(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+ int rc;
+
+ vcpu->stat.deliver_cputm++;
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER,
+ 0, 0);
+
+ rc = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER,
+ (u16 *)__LC_EXT_INT_CODE);
+ rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+ rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ clear_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
+ return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_ckc(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+ int rc;
+
+ vcpu->stat.deliver_ckc++;
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP,
+ 0, 0);
+
+ rc = put_guest_lc(vcpu, EXT_IRQ_CLK_COMP,
+ (u16 __user *)__LC_EXT_INT_CODE);
+ rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+ rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
+ return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_pfault_init(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+ struct kvm_s390_ext_info ext;
+ int rc;
+
+ spin_lock(&li->lock);
+ ext = li->irq.ext;
+ clear_bit(IRQ_PEND_PFAULT_INIT, &li->pending_irqs);
+ li->irq.ext.ext_params2 = 0;
+ spin_unlock(&li->lock);
+
+ VCPU_EVENT(vcpu, 4, "deliver: pfault init token 0x%llx",
+ ext.ext_params2);
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+ KVM_S390_INT_PFAULT_INIT,
+ 0, ext.ext_params2);
+
+ rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *) __LC_EXT_INT_CODE);
+ rc |= put_guest_lc(vcpu, PFAULT_INIT, (u16 *) __LC_EXT_CPU_ADDR);
+ rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= put_guest_lc(vcpu, ext.ext_params2, (u64 *) __LC_EXT_PARAMS2);
+ return rc ? -EFAULT : 0;
+}
+
+static int __write_machine_check(struct kvm_vcpu *vcpu,
+ struct kvm_s390_mchk_info *mchk)
+{
+ unsigned long ext_sa_addr;
+ unsigned long lc;
+ freg_t fprs[NUM_FPRS];
+ union mci mci;
+ int rc;
+
+ mci.val = mchk->mcic;
+ /* take care of lazy register loading */
+ save_fpu_regs();
+ save_access_regs(vcpu->run->s.regs.acrs);
+ if (MACHINE_HAS_GS && vcpu->arch.gs_enabled)
+ save_gs_cb(current->thread.gs_cb);
+
+ /* Extended save area */
+ rc = read_guest_lc(vcpu, __LC_MCESAD, &ext_sa_addr,
+ sizeof(unsigned long));
+ /* Only bits 0 through 63-LC are used for address formation */
+ lc = ext_sa_addr & MCESA_LC_MASK;
+ if (test_kvm_facility(vcpu->kvm, 133)) {
+ switch (lc) {
+ case 0:
+ case 10:
+ ext_sa_addr &= ~0x3ffUL;
+ break;
+ case 11:
+ ext_sa_addr &= ~0x7ffUL;
+ break;
+ case 12:
+ ext_sa_addr &= ~0xfffUL;
+ break;
+ default:
+ ext_sa_addr = 0;
+ break;
+ }
+ } else {
+ ext_sa_addr &= ~0x3ffUL;
+ }
+
+ if (!rc && mci.vr && ext_sa_addr && test_kvm_facility(vcpu->kvm, 129)) {
+ if (write_guest_abs(vcpu, ext_sa_addr, vcpu->run->s.regs.vrs,
+ 512))
+ mci.vr = 0;
+ } else {
+ mci.vr = 0;
+ }
+ if (!rc && mci.gs && ext_sa_addr && test_kvm_facility(vcpu->kvm, 133)
+ && (lc == 11 || lc == 12)) {
+ if (write_guest_abs(vcpu, ext_sa_addr + 1024,
+ &vcpu->run->s.regs.gscb, 32))
+ mci.gs = 0;
+ } else {
+ mci.gs = 0;
+ }
+
+ /* General interruption information */
+ rc |= put_guest_lc(vcpu, 1, (u8 __user *) __LC_AR_MODE_ID);
+ rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= put_guest_lc(vcpu, mci.val, (u64 __user *) __LC_MCCK_CODE);
+
+ /* Register-save areas */
+ if (MACHINE_HAS_VX) {
+ convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
+ rc |= write_guest_lc(vcpu, __LC_FPREGS_SAVE_AREA, fprs, 128);
+ } else {
+ rc |= write_guest_lc(vcpu, __LC_FPREGS_SAVE_AREA,
+ vcpu->run->s.regs.fprs, 128);
+ }
+ rc |= write_guest_lc(vcpu, __LC_GPREGS_SAVE_AREA,
+ vcpu->run->s.regs.gprs, 128);
+ rc |= put_guest_lc(vcpu, current->thread.fpu.fpc,
+ (u32 __user *) __LC_FP_CREG_SAVE_AREA);
+ rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->todpr,
+ (u32 __user *) __LC_TOD_PROGREG_SAVE_AREA);
+ rc |= put_guest_lc(vcpu, kvm_s390_get_cpu_timer(vcpu),
+ (u64 __user *) __LC_CPU_TIMER_SAVE_AREA);
+ rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->ckc >> 8,
+ (u64 __user *) __LC_CLOCK_COMP_SAVE_AREA);
+ rc |= write_guest_lc(vcpu, __LC_AREGS_SAVE_AREA,
+ &vcpu->run->s.regs.acrs, 64);
+ rc |= write_guest_lc(vcpu, __LC_CREGS_SAVE_AREA,
+ &vcpu->arch.sie_block->gcr, 128);
+
+ /* Extended interruption information */
+ rc |= put_guest_lc(vcpu, mchk->ext_damage_code,
+ (u32 __user *) __LC_EXT_DAMAGE_CODE);
+ rc |= put_guest_lc(vcpu, mchk->failing_storage_address,
+ (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR);
+ rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA, &mchk->fixed_logout,
+ sizeof(mchk->fixed_logout));
+ return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+ struct kvm_s390_mchk_info mchk = {};
+ int deliver = 0;
+ int rc = 0;
+
+ spin_lock(&fi->lock);
+ spin_lock(&li->lock);
+ if (test_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs) ||
+ test_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs)) {
+ /*
+ * If there was an exigent machine check pending, then any
+ * repressible machine checks that might have been pending
+ * are indicated along with it, so always clear bits for
+ * repressible and exigent interrupts
+ */
+ mchk = li->irq.mchk;
+ clear_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs);
+ clear_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs);
+ memset(&li->irq.mchk, 0, sizeof(mchk));
+ deliver = 1;
+ }
+ /*
+ * We indicate floating repressible conditions along with
+ * other pending conditions. Channel Report Pending and Channel
+ * Subsystem damage are the only two and and are indicated by
+ * bits in mcic and masked in cr14.
+ */
+ if (test_and_clear_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs)) {
+ mchk.mcic |= fi->mchk.mcic;
+ mchk.cr14 |= fi->mchk.cr14;
+ memset(&fi->mchk, 0, sizeof(mchk));
+ deliver = 1;
+ }
+ spin_unlock(&li->lock);
+ spin_unlock(&fi->lock);
+
+ if (deliver) {
+ VCPU_EVENT(vcpu, 3, "deliver: machine check mcic 0x%llx",
+ mchk.mcic);
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+ KVM_S390_MCHK,
+ mchk.cr14, mchk.mcic);
+ vcpu->stat.deliver_machine_check++;
+ rc = __write_machine_check(vcpu, &mchk);
+ }
+ return rc;
+}
+
+static int __must_check __deliver_restart(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+ int rc;
+
+ VCPU_EVENT(vcpu, 3, "%s", "deliver: cpu restart");
+ vcpu->stat.deliver_restart_signal++;
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0);
+
+ rc = write_guest_lc(vcpu,
+ offsetof(struct lowcore, restart_old_psw),
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, offsetof(struct lowcore, restart_psw),
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ clear_bit(IRQ_PEND_RESTART, &li->pending_irqs);
+ return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_set_prefix(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+ struct kvm_s390_prefix_info prefix;
+
+ spin_lock(&li->lock);
+ prefix = li->irq.prefix;
+ li->irq.prefix.address = 0;
+ clear_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs);
+ spin_unlock(&li->lock);
+
+ vcpu->stat.deliver_prefix_signal++;
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+ KVM_S390_SIGP_SET_PREFIX,
+ prefix.address, 0);
+
+ kvm_s390_set_prefix(vcpu, prefix.address);
+ return 0;
+}
+
+static int __must_check __deliver_emergency_signal(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+ int rc;
+ int cpu_addr;
+
+ spin_lock(&li->lock);
+ cpu_addr = find_first_bit(li->sigp_emerg_pending, KVM_MAX_VCPUS);
+ clear_bit(cpu_addr, li->sigp_emerg_pending);
+ if (bitmap_empty(li->sigp_emerg_pending, KVM_MAX_VCPUS))
+ clear_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
+ spin_unlock(&li->lock);
+
+ VCPU_EVENT(vcpu, 4, "%s", "deliver: sigp emerg");
+ vcpu->stat.deliver_emergency_signal++;
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
+ cpu_addr, 0);
+
+ rc = put_guest_lc(vcpu, EXT_IRQ_EMERGENCY_SIG,
+ (u16 *)__LC_EXT_INT_CODE);
+ rc |= put_guest_lc(vcpu, cpu_addr, (u16 *)__LC_EXT_CPU_ADDR);
+ rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_external_call(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+ struct kvm_s390_extcall_info extcall;
+ int rc;
+
+ spin_lock(&li->lock);
+ extcall = li->irq.extcall;
+ li->irq.extcall.code = 0;
+ clear_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
+ spin_unlock(&li->lock);
+
+ VCPU_EVENT(vcpu, 4, "%s", "deliver: sigp ext call");
+ vcpu->stat.deliver_external_call++;
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+ KVM_S390_INT_EXTERNAL_CALL,
+ extcall.code, 0);
+
+ rc = put_guest_lc(vcpu, EXT_IRQ_EXTERNAL_CALL,
+ (u16 *)__LC_EXT_INT_CODE);
+ rc |= put_guest_lc(vcpu, extcall.code, (u16 *)__LC_EXT_CPU_ADDR);
+ rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+ struct kvm_s390_pgm_info pgm_info;
+ int rc = 0, nullifying = false;
+ u16 ilen;
+
+ spin_lock(&li->lock);
+ pgm_info = li->irq.pgm;
+ clear_bit(IRQ_PEND_PROG, &li->pending_irqs);
+ memset(&li->irq.pgm, 0, sizeof(pgm_info));
+ spin_unlock(&li->lock);
+
+ ilen = pgm_info.flags & KVM_S390_PGM_FLAGS_ILC_MASK;
+ VCPU_EVENT(vcpu, 3, "deliver: program irq code 0x%x, ilen:%d",
+ pgm_info.code, ilen);
+ vcpu->stat.deliver_program++;
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
+ pgm_info.code, 0);
+
+ switch (pgm_info.code & ~PGM_PER) {
+ case PGM_AFX_TRANSLATION:
+ case PGM_ASX_TRANSLATION:
+ case PGM_EX_TRANSLATION:
+ case PGM_LFX_TRANSLATION:
+ case PGM_LSTE_SEQUENCE:
+ case PGM_LSX_TRANSLATION:
+ case PGM_LX_TRANSLATION:
+ case PGM_PRIMARY_AUTHORITY:
+ case PGM_SECONDARY_AUTHORITY:
+ nullifying = true;
+ /* fall through */
+ case PGM_SPACE_SWITCH:
+ rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
+ (u64 *)__LC_TRANS_EXC_CODE);
+ break;
+ case PGM_ALEN_TRANSLATION:
+ case PGM_ALE_SEQUENCE:
+ case PGM_ASTE_INSTANCE:
+ case PGM_ASTE_SEQUENCE:
+ case PGM_ASTE_VALIDITY:
+ case PGM_EXTENDED_AUTHORITY:
+ rc = put_guest_lc(vcpu, pgm_info.exc_access_id,
+ (u8 *)__LC_EXC_ACCESS_ID);
+ nullifying = true;
+ break;
+ case PGM_ASCE_TYPE:
+ case PGM_PAGE_TRANSLATION:
+ case PGM_REGION_FIRST_TRANS:
+ case PGM_REGION_SECOND_TRANS:
+ case PGM_REGION_THIRD_TRANS:
+ case PGM_SEGMENT_TRANSLATION:
+ rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
+ (u64 *)__LC_TRANS_EXC_CODE);
+ rc |= put_guest_lc(vcpu, pgm_info.exc_access_id,
+ (u8 *)__LC_EXC_ACCESS_ID);
+ rc |= put_guest_lc(vcpu, pgm_info.op_access_id,
+ (u8 *)__LC_OP_ACCESS_ID);
+ nullifying = true;
+ break;
+ case PGM_MONITOR:
+ rc = put_guest_lc(vcpu, pgm_info.mon_class_nr,
+ (u16 *)__LC_MON_CLASS_NR);
+ rc |= put_guest_lc(vcpu, pgm_info.mon_code,
+ (u64 *)__LC_MON_CODE);
+ break;
+ case PGM_VECTOR_PROCESSING:
+ case PGM_DATA:
+ rc = put_guest_lc(vcpu, pgm_info.data_exc_code,
+ (u32 *)__LC_DATA_EXC_CODE);
+ break;
+ case PGM_PROTECTION:
+ rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
+ (u64 *)__LC_TRANS_EXC_CODE);
+ rc |= put_guest_lc(vcpu, pgm_info.exc_access_id,
+ (u8 *)__LC_EXC_ACCESS_ID);
+ break;
+ case PGM_STACK_FULL:
+ case PGM_STACK_EMPTY:
+ case PGM_STACK_SPECIFICATION:
+ case PGM_STACK_TYPE:
+ case PGM_STACK_OPERATION:
+ case PGM_TRACE_TABEL:
+ case PGM_CRYPTO_OPERATION:
+ nullifying = true;
+ break;
+ }
+
+ if (pgm_info.code & PGM_PER) {
+ rc |= put_guest_lc(vcpu, pgm_info.per_code,
+ (u8 *) __LC_PER_CODE);
+ rc |= put_guest_lc(vcpu, pgm_info.per_atmid,
+ (u8 *)__LC_PER_ATMID);
+ rc |= put_guest_lc(vcpu, pgm_info.per_address,
+ (u64 *) __LC_PER_ADDRESS);
+ rc |= put_guest_lc(vcpu, pgm_info.per_access_id,
+ (u8 *) __LC_PER_ACCESS_ID);
+ }
+
+ if (nullifying && !(pgm_info.flags & KVM_S390_PGM_FLAGS_NO_REWIND))
+ kvm_s390_rewind_psw(vcpu, ilen);
+
+ /* bit 1+2 of the target are the ilc, so we can directly use ilen */
+ rc |= put_guest_lc(vcpu, ilen, (u16 *) __LC_PGM_ILC);
+ rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->gbea,
+ (u64 *) __LC_LAST_BREAK);
+ rc |= put_guest_lc(vcpu, pgm_info.code,
+ (u16 *)__LC_PGM_INT_CODE);
+ rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_PGM_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_service(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+ struct kvm_s390_ext_info ext;
+ int rc = 0;
+
+ spin_lock(&fi->lock);
+ if (!(test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs))) {
+ spin_unlock(&fi->lock);
+ return 0;
+ }
+ ext = fi->srv_signal;
+ memset(&fi->srv_signal, 0, sizeof(ext));
+ clear_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs);
+ spin_unlock(&fi->lock);
+
+ VCPU_EVENT(vcpu, 4, "deliver: sclp parameter 0x%x",
+ ext.ext_params);
+ vcpu->stat.deliver_service_signal++;
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_SERVICE,
+ ext.ext_params, 0);
+
+ rc = put_guest_lc(vcpu, EXT_IRQ_SERVICE_SIG, (u16 *)__LC_EXT_INT_CODE);
+ rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+ rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= put_guest_lc(vcpu, ext.ext_params,
+ (u32 *)__LC_EXT_PARAMS);
+
+ return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+ struct kvm_s390_interrupt_info *inti;
+ int rc = 0;
+
+ spin_lock(&fi->lock);
+ inti = list_first_entry_or_null(&fi->lists[FIRQ_LIST_PFAULT],
+ struct kvm_s390_interrupt_info,
+ list);
+ if (inti) {
+ list_del(&inti->list);
+ fi->counters[FIRQ_CNTR_PFAULT] -= 1;
+ }
+ if (list_empty(&fi->lists[FIRQ_LIST_PFAULT]))
+ clear_bit(IRQ_PEND_PFAULT_DONE, &fi->pending_irqs);
+ spin_unlock(&fi->lock);
+
+ if (inti) {
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+ KVM_S390_INT_PFAULT_DONE, 0,
+ inti->ext.ext_params2);
+ VCPU_EVENT(vcpu, 4, "deliver: pfault done token 0x%llx",
+ inti->ext.ext_params2);
+
+ rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE,
+ (u16 *)__LC_EXT_INT_CODE);
+ rc |= put_guest_lc(vcpu, PFAULT_DONE,
+ (u16 *)__LC_EXT_CPU_ADDR);
+ rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+ (u64 *)__LC_EXT_PARAMS2);
+ kfree(inti);
+ }
+ return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+ struct kvm_s390_interrupt_info *inti;
+ int rc = 0;
+
+ spin_lock(&fi->lock);
+ inti = list_first_entry_or_null(&fi->lists[FIRQ_LIST_VIRTIO],
+ struct kvm_s390_interrupt_info,
+ list);
+ if (inti) {
+ VCPU_EVENT(vcpu, 4,
+ "deliver: virtio parm: 0x%x,parm64: 0x%llx",
+ inti->ext.ext_params, inti->ext.ext_params2);
+ vcpu->stat.deliver_virtio++;
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+ inti->type,
+ inti->ext.ext_params,
+ inti->ext.ext_params2);
+ list_del(&inti->list);
+ fi->counters[FIRQ_CNTR_VIRTIO] -= 1;
+ }
+ if (list_empty(&fi->lists[FIRQ_LIST_VIRTIO]))
+ clear_bit(IRQ_PEND_VIRTIO, &fi->pending_irqs);
+ spin_unlock(&fi->lock);
+
+ if (inti) {
+ rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE,
+ (u16 *)__LC_EXT_INT_CODE);
+ rc |= put_guest_lc(vcpu, VIRTIO_PARAM,
+ (u16 *)__LC_EXT_CPU_ADDR);
+ rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ rc |= put_guest_lc(vcpu, inti->ext.ext_params,
+ (u32 *)__LC_EXT_PARAMS);
+ rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+ (u64 *)__LC_EXT_PARAMS2);
+ kfree(inti);
+ }
+ return rc ? -EFAULT : 0;
+}
+
+static int __do_deliver_io(struct kvm_vcpu *vcpu, struct kvm_s390_io_info *io)
+{
+ int rc;
+
+ rc = put_guest_lc(vcpu, io->subchannel_id, (u16 *)__LC_SUBCHANNEL_ID);
+ rc |= put_guest_lc(vcpu, io->subchannel_nr, (u16 *)__LC_SUBCHANNEL_NR);
+ rc |= put_guest_lc(vcpu, io->io_int_parm, (u32 *)__LC_IO_INT_PARM);
+ rc |= put_guest_lc(vcpu, io->io_int_word, (u32 *)__LC_IO_INT_WORD);
+ rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
+ unsigned long irq_type)
+{
+ struct list_head *isc_list;
+ struct kvm_s390_float_interrupt *fi;
+ struct kvm_s390_interrupt_info *inti = NULL;
+ struct kvm_s390_io_info io;
+ u32 isc;
+ int rc = 0;
+
+ fi = &vcpu->kvm->arch.float_int;
+
+ spin_lock(&fi->lock);
+ isc = irq_type_to_isc(irq_type);
+ isc_list = &fi->lists[isc];
+ inti = list_first_entry_or_null(isc_list,
+ struct kvm_s390_interrupt_info,
+ list);
+ if (inti) {
+ if (inti->type & KVM_S390_INT_IO_AI_MASK)
+ VCPU_EVENT(vcpu, 4, "%s", "deliver: I/O (AI)");
+ else
+ VCPU_EVENT(vcpu, 4, "deliver: I/O %x ss %x schid %04x",
+ inti->io.subchannel_id >> 8,
+ inti->io.subchannel_id >> 1 & 0x3,
+ inti->io.subchannel_nr);
+
+ vcpu->stat.deliver_io++;
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+ inti->type,
+ ((__u32)inti->io.subchannel_id << 16) |
+ inti->io.subchannel_nr,
+ ((__u64)inti->io.io_int_parm << 32) |
+ inti->io.io_int_word);
+ list_del(&inti->list);
+ fi->counters[FIRQ_CNTR_IO] -= 1;
+ }
+ if (list_empty(isc_list))
+ clear_bit(irq_type, &fi->pending_irqs);
+ spin_unlock(&fi->lock);
+
+ if (inti) {
+ rc = __do_deliver_io(vcpu, &(inti->io));
+ kfree(inti);
+ goto out;
+ }
+
+ if (vcpu->kvm->arch.gisa &&
+ kvm_s390_gisa_tac_ipm_gisc(vcpu->kvm->arch.gisa, isc)) {
+ /*
+ * in case an adapter interrupt was not delivered
+ * in SIE context KVM will handle the delivery
+ */
+ VCPU_EVENT(vcpu, 4, "%s isc %u", "deliver: I/O (AI/gisa)", isc);
+ memset(&io, 0, sizeof(io));
+ io.io_int_word = isc_to_int_word(isc);
+ vcpu->stat.deliver_io++;
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+ KVM_S390_INT_IO(1, 0, 0, 0),
+ ((__u32)io.subchannel_id << 16) |
+ io.subchannel_nr,
+ ((__u64)io.io_int_parm << 32) |
+ io.io_int_word);
+ rc = __do_deliver_io(vcpu, &io);
+ }
+out:
+ return rc;
+}
+
+/* Check whether an external call is pending (deliverable or not) */
+int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+ if (!sclp.has_sigpif)
+ return test_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
+
+ return sca_ext_call_pending(vcpu, NULL);
+}
+
+int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop)
+{
+ if (deliverable_irqs(vcpu))
+ return 1;
+
+ if (kvm_cpu_has_pending_timer(vcpu))
+ return 1;
+
+ /* external call pending and deliverable */
+ if (kvm_s390_ext_call_pending(vcpu) &&
+ !psw_extint_disabled(vcpu) &&
+ (vcpu->arch.sie_block->gcr[0] & CR0_EXTERNAL_CALL_SUBMASK))
+ return 1;
+
+ if (!exclude_stop && kvm_s390_is_stop_irq_pending(vcpu))
+ return 1;
+ return 0;
+}
+
+int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
+{
+ return ckc_irq_pending(vcpu) || cpu_timer_irq_pending(vcpu);
+}
+
+static u64 __calculate_sltime(struct kvm_vcpu *vcpu)
+{
+ const u64 now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
+ const u64 ckc = vcpu->arch.sie_block->ckc;
+ u64 cputm, sltime = 0;
+
+ if (ckc_interrupts_enabled(vcpu)) {
+ if (vcpu->arch.sie_block->gcr[0] & CR0_CLOCK_COMPARATOR_SIGN) {
+ if ((s64)now < (s64)ckc)
+ sltime = tod_to_ns((s64)ckc - (s64)now);
+ } else if (now < ckc) {
+ sltime = tod_to_ns(ckc - now);
+ }
+ /* already expired */
+ if (!sltime)
+ return 0;
+ if (cpu_timer_interrupts_enabled(vcpu)) {
+ cputm = kvm_s390_get_cpu_timer(vcpu);
+ /* already expired? */
+ if (cputm >> 63)
+ return 0;
+ return min(sltime, tod_to_ns(cputm));
+ }
+ } else if (cpu_timer_interrupts_enabled(vcpu)) {
+ sltime = kvm_s390_get_cpu_timer(vcpu);
+ /* already expired? */
+ if (sltime >> 63)
+ return 0;
+ }
+ return sltime;
+}
+
+int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
+{
+ u64 sltime;
+
+ vcpu->stat.exit_wait_state++;
+
+ /* fast path */
+ if (kvm_arch_vcpu_runnable(vcpu))
+ return 0;
+
+ if (psw_interrupts_disabled(vcpu)) {
+ VCPU_EVENT(vcpu, 3, "%s", "disabled wait");
+ return -EOPNOTSUPP; /* disabled wait */
+ }
+
+ if (!ckc_interrupts_enabled(vcpu) &&
+ !cpu_timer_interrupts_enabled(vcpu)) {
+ VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer");
+ __set_cpu_idle(vcpu);
+ goto no_timer;
+ }
+
+ sltime = __calculate_sltime(vcpu);
+ if (!sltime)
+ return 0;
+
+ __set_cpu_idle(vcpu);
+ hrtimer_start(&vcpu->arch.ckc_timer, sltime, HRTIMER_MODE_REL);
+ VCPU_EVENT(vcpu, 4, "enabled wait: %llu ns", sltime);
+no_timer:
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+ kvm_vcpu_block(vcpu);
+ __unset_cpu_idle(vcpu);
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+ hrtimer_cancel(&vcpu->arch.ckc_timer);
+ return 0;
+}
+
+void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
+{
+ /*
+ * We cannot move this into the if, as the CPU might be already
+ * in kvm_vcpu_block without having the waitqueue set (polling)
+ */
+ vcpu->valid_wakeup = true;
+ /*
+ * This is mostly to document, that the read in swait_active could
+ * be moved before other stores, leading to subtle races.
+ * All current users do not store or use an atomic like update
+ */
+ smp_mb__after_atomic();
+ if (swait_active(&vcpu->wq)) {
+ /*
+ * The vcpu gave up the cpu voluntarily, mark it as a good
+ * yield-candidate.
+ */
+ vcpu->preempted = true;
+ swake_up_one(&vcpu->wq);
+ vcpu->stat.halt_wakeup++;
+ }
+ /*
+ * The VCPU might not be sleeping but is executing the VSIE. Let's
+ * kick it, so it leaves the SIE to process the request.
+ */
+ kvm_s390_vsie_kick(vcpu);
+}
+
+enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer)
+{
+ struct kvm_vcpu *vcpu;
+ u64 sltime;
+
+ vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer);
+ sltime = __calculate_sltime(vcpu);
+
+ /*
+ * If the monotonic clock runs faster than the tod clock we might be
+ * woken up too early and have to go back to sleep to avoid deadlocks.
+ */
+ if (sltime && hrtimer_forward_now(timer, ns_to_ktime(sltime)))
+ return HRTIMER_RESTART;
+ kvm_s390_vcpu_wakeup(vcpu);
+ return HRTIMER_NORESTART;
+}
+
+void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+ spin_lock(&li->lock);
+ li->pending_irqs = 0;
+ bitmap_zero(li->sigp_emerg_pending, KVM_MAX_VCPUS);
+ memset(&li->irq, 0, sizeof(li->irq));
+ spin_unlock(&li->lock);
+
+ sca_clear_ext_call(vcpu);
+}
+
+int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+ int rc = 0;
+ unsigned long irq_type;
+ unsigned long irqs;
+
+ __reset_intercept_indicators(vcpu);
+
+ /* pending ckc conditions might have been invalidated */
+ clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
+ if (ckc_irq_pending(vcpu))
+ set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
+
+ /* pending cpu timer conditions might have been invalidated */
+ clear_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
+ if (cpu_timer_irq_pending(vcpu))
+ set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
+
+ while ((irqs = deliverable_irqs(vcpu)) && !rc) {
+ /* bits are in the reverse order of interrupt priority */
+ irq_type = find_last_bit(&irqs, IRQ_PEND_COUNT);
+ switch (irq_type) {
+ case IRQ_PEND_IO_ISC_0:
+ case IRQ_PEND_IO_ISC_1:
+ case IRQ_PEND_IO_ISC_2:
+ case IRQ_PEND_IO_ISC_3:
+ case IRQ_PEND_IO_ISC_4:
+ case IRQ_PEND_IO_ISC_5:
+ case IRQ_PEND_IO_ISC_6:
+ case IRQ_PEND_IO_ISC_7:
+ rc = __deliver_io(vcpu, irq_type);
+ break;
+ case IRQ_PEND_MCHK_EX:
+ case IRQ_PEND_MCHK_REP:
+ rc = __deliver_machine_check(vcpu);
+ break;
+ case IRQ_PEND_PROG:
+ rc = __deliver_prog(vcpu);
+ break;
+ case IRQ_PEND_EXT_EMERGENCY:
+ rc = __deliver_emergency_signal(vcpu);
+ break;
+ case IRQ_PEND_EXT_EXTERNAL:
+ rc = __deliver_external_call(vcpu);
+ break;
+ case IRQ_PEND_EXT_CLOCK_COMP:
+ rc = __deliver_ckc(vcpu);
+ break;
+ case IRQ_PEND_EXT_CPU_TIMER:
+ rc = __deliver_cpu_timer(vcpu);
+ break;
+ case IRQ_PEND_RESTART:
+ rc = __deliver_restart(vcpu);
+ break;
+ case IRQ_PEND_SET_PREFIX:
+ rc = __deliver_set_prefix(vcpu);
+ break;
+ case IRQ_PEND_PFAULT_INIT:
+ rc = __deliver_pfault_init(vcpu);
+ break;
+ case IRQ_PEND_EXT_SERVICE:
+ rc = __deliver_service(vcpu);
+ break;
+ case IRQ_PEND_PFAULT_DONE:
+ rc = __deliver_pfault_done(vcpu);
+ break;
+ case IRQ_PEND_VIRTIO:
+ rc = __deliver_virtio(vcpu);
+ break;
+ default:
+ WARN_ONCE(1, "Unknown pending irq type %ld", irq_type);
+ clear_bit(irq_type, &li->pending_irqs);
+ }
+ }
+
+ set_intercept_indicators(vcpu);
+
+ return rc;
+}
+
+static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+ vcpu->stat.inject_program++;
+ VCPU_EVENT(vcpu, 3, "inject: program irq code 0x%x", irq->u.pgm.code);
+ trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
+ irq->u.pgm.code, 0);
+
+ if (!(irq->u.pgm.flags & KVM_S390_PGM_FLAGS_ILC_VALID)) {
+ /* auto detection if no valid ILC was given */
+ irq->u.pgm.flags &= ~KVM_S390_PGM_FLAGS_ILC_MASK;
+ irq->u.pgm.flags |= kvm_s390_get_ilen(vcpu);
+ irq->u.pgm.flags |= KVM_S390_PGM_FLAGS_ILC_VALID;
+ }
+
+ if (irq->u.pgm.code == PGM_PER) {
+ li->irq.pgm.code |= PGM_PER;
+ li->irq.pgm.flags = irq->u.pgm.flags;
+ /* only modify PER related information */
+ li->irq.pgm.per_address = irq->u.pgm.per_address;
+ li->irq.pgm.per_code = irq->u.pgm.per_code;
+ li->irq.pgm.per_atmid = irq->u.pgm.per_atmid;
+ li->irq.pgm.per_access_id = irq->u.pgm.per_access_id;
+ } else if (!(irq->u.pgm.code & PGM_PER)) {
+ li->irq.pgm.code = (li->irq.pgm.code & PGM_PER) |
+ irq->u.pgm.code;
+ li->irq.pgm.flags = irq->u.pgm.flags;
+ /* only modify non-PER information */
+ li->irq.pgm.trans_exc_code = irq->u.pgm.trans_exc_code;
+ li->irq.pgm.mon_code = irq->u.pgm.mon_code;
+ li->irq.pgm.data_exc_code = irq->u.pgm.data_exc_code;
+ li->irq.pgm.mon_class_nr = irq->u.pgm.mon_class_nr;
+ li->irq.pgm.exc_access_id = irq->u.pgm.exc_access_id;
+ li->irq.pgm.op_access_id = irq->u.pgm.op_access_id;
+ } else {
+ li->irq.pgm = irq->u.pgm;
+ }
+ set_bit(IRQ_PEND_PROG, &li->pending_irqs);
+ return 0;
+}
+
+static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+ vcpu->stat.inject_pfault_init++;
+ VCPU_EVENT(vcpu, 4, "inject: pfault init parameter block at 0x%llx",
+ irq->u.ext.ext_params2);
+ trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_PFAULT_INIT,
+ irq->u.ext.ext_params,
+ irq->u.ext.ext_params2);
+
+ li->irq.ext = irq->u.ext;
+ set_bit(IRQ_PEND_PFAULT_INIT, &li->pending_irqs);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT);
+ return 0;
+}
+
+static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+ struct kvm_s390_extcall_info *extcall = &li->irq.extcall;
+ uint16_t src_id = irq->u.extcall.code;
+
+ vcpu->stat.inject_external_call++;
+ VCPU_EVENT(vcpu, 4, "inject: external call source-cpu:%u",
+ src_id);
+ trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EXTERNAL_CALL,
+ src_id, 0);
+
+ /* sending vcpu invalid */
+ if (kvm_get_vcpu_by_id(vcpu->kvm, src_id) == NULL)
+ return -EINVAL;
+
+ if (sclp.has_sigpif)
+ return sca_inject_ext_call(vcpu, src_id);
+
+ if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs))
+ return -EBUSY;
+ *extcall = irq->u.extcall;
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT);
+ return 0;
+}
+
+static int __inject_set_prefix(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+ struct kvm_s390_prefix_info *prefix = &li->irq.prefix;
+
+ vcpu->stat.inject_set_prefix++;
+ VCPU_EVENT(vcpu, 3, "inject: set prefix to %x",
+ irq->u.prefix.address);
+ trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_SET_PREFIX,
+ irq->u.prefix.address, 0);
+
+ if (!is_vcpu_stopped(vcpu))
+ return -EBUSY;
+
+ *prefix = irq->u.prefix;
+ set_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs);
+ return 0;
+}
+
+#define KVM_S390_STOP_SUPP_FLAGS (KVM_S390_STOP_FLAG_STORE_STATUS)
+static int __inject_sigp_stop(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+ struct kvm_s390_stop_info *stop = &li->irq.stop;
+ int rc = 0;
+
+ vcpu->stat.inject_stop_signal++;
+ trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_STOP, 0, 0);
+
+ if (irq->u.stop.flags & ~KVM_S390_STOP_SUPP_FLAGS)
+ return -EINVAL;
+
+ if (is_vcpu_stopped(vcpu)) {
+ if (irq->u.stop.flags & KVM_S390_STOP_FLAG_STORE_STATUS)
+ rc = kvm_s390_store_status_unloaded(vcpu,
+ KVM_S390_STORE_STATUS_NOADDR);
+ return rc;
+ }
+
+ if (test_and_set_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs))
+ return -EBUSY;
+ stop->flags = irq->u.stop.flags;
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
+ return 0;
+}
+
+static int __inject_sigp_restart(struct kvm_vcpu *vcpu,
+ struct kvm_s390_irq *irq)
+{
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+ vcpu->stat.inject_restart++;
+ VCPU_EVENT(vcpu, 3, "%s", "inject: restart int");
+ trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0);
+
+ set_bit(IRQ_PEND_RESTART, &li->pending_irqs);
+ return 0;
+}
+
+static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
+ struct kvm_s390_irq *irq)
+{
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+ vcpu->stat.inject_emergency_signal++;
+ VCPU_EVENT(vcpu, 4, "inject: emergency from cpu %u",
+ irq->u.emerg.code);
+ trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
+ irq->u.emerg.code, 0);
+
+ /* sending vcpu invalid */
+ if (kvm_get_vcpu_by_id(vcpu->kvm, irq->u.emerg.code) == NULL)
+ return -EINVAL;
+
+ set_bit(irq->u.emerg.code, li->sigp_emerg_pending);
+ set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT);
+ return 0;
+}
+
+static int __inject_mchk(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+ struct kvm_s390_mchk_info *mchk = &li->irq.mchk;
+
+ vcpu->stat.inject_mchk++;
+ VCPU_EVENT(vcpu, 3, "inject: machine check mcic 0x%llx",
+ irq->u.mchk.mcic);
+ trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_MCHK, 0,
+ irq->u.mchk.mcic);
+
+ /*
+ * Because repressible machine checks can be indicated along with
+ * exigent machine checks (PoP, Chapter 11, Interruption action)
+ * we need to combine cr14, mcic and external damage code.
+ * Failing storage address and the logout area should not be or'ed
+ * together, we just indicate the last occurrence of the corresponding
+ * machine check
+ */
+ mchk->cr14 |= irq->u.mchk.cr14;
+ mchk->mcic |= irq->u.mchk.mcic;
+ mchk->ext_damage_code |= irq->u.mchk.ext_damage_code;
+ mchk->failing_storage_address = irq->u.mchk.failing_storage_address;
+ memcpy(&mchk->fixed_logout, &irq->u.mchk.fixed_logout,
+ sizeof(mchk->fixed_logout));
+ if (mchk->mcic & MCHK_EX_MASK)
+ set_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs);
+ else if (mchk->mcic & MCHK_REP_MASK)
+ set_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs);
+ return 0;
+}
+
+static int __inject_ckc(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+ vcpu->stat.inject_ckc++;
+ VCPU_EVENT(vcpu, 3, "%s", "inject: clock comparator external");
+ trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP,
+ 0, 0);
+
+ set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT);
+ return 0;
+}
+
+static int __inject_cpu_timer(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+ vcpu->stat.inject_cputm++;
+ VCPU_EVENT(vcpu, 3, "%s", "inject: cpu timer external");
+ trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER,
+ 0, 0);
+
+ set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT);
+ return 0;
+}
+
+static struct kvm_s390_interrupt_info *get_io_int(struct kvm *kvm,
+ int isc, u32 schid)
+{
+ struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+ struct list_head *isc_list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc];
+ struct kvm_s390_interrupt_info *iter;
+ u16 id = (schid & 0xffff0000U) >> 16;
+ u16 nr = schid & 0x0000ffffU;
+
+ spin_lock(&fi->lock);
+ list_for_each_entry(iter, isc_list, list) {
+ if (schid && (id != iter->io.subchannel_id ||
+ nr != iter->io.subchannel_nr))
+ continue;
+ /* found an appropriate entry */
+ list_del_init(&iter->list);
+ fi->counters[FIRQ_CNTR_IO] -= 1;
+ if (list_empty(isc_list))
+ clear_bit(isc_to_irq_type(isc), &fi->pending_irqs);
+ spin_unlock(&fi->lock);
+ return iter;
+ }
+ spin_unlock(&fi->lock);
+ return NULL;
+}
+
+static struct kvm_s390_interrupt_info *get_top_io_int(struct kvm *kvm,
+ u64 isc_mask, u32 schid)
+{
+ struct kvm_s390_interrupt_info *inti = NULL;
+ int isc;
+
+ for (isc = 0; isc <= MAX_ISC && !inti; isc++) {
+ if (isc_mask & isc_to_isc_bits(isc))
+ inti = get_io_int(kvm, isc, schid);
+ }
+ return inti;
+}
+
+static int get_top_gisa_isc(struct kvm *kvm, u64 isc_mask, u32 schid)
+{
+ unsigned long active_mask;
+ int isc;
+
+ if (schid)
+ goto out;
+ if (!kvm->arch.gisa)
+ goto out;
+
+ active_mask = (isc_mask & kvm_s390_gisa_get_ipm(kvm->arch.gisa) << 24) << 32;
+ while (active_mask) {
+ isc = __fls(active_mask) ^ (BITS_PER_LONG - 1);
+ if (kvm_s390_gisa_tac_ipm_gisc(kvm->arch.gisa, isc))
+ return isc;
+ clear_bit_inv(isc, &active_mask);
+ }
+out:
+ return -EINVAL;
+}
+
+/*
+ * Dequeue and return an I/O interrupt matching any of the interruption
+ * subclasses as designated by the isc mask in cr6 and the schid (if != 0).
+ * Take into account the interrupts pending in the interrupt list and in GISA.
+ *
+ * Note that for a guest that does not enable I/O interrupts
+ * but relies on TPI, a flood of classic interrupts may starve
+ * out adapter interrupts on the same isc. Linux does not do
+ * that, and it is possible to work around the issue by configuring
+ * different iscs for classic and adapter interrupts in the guest,
+ * but we may want to revisit this in the future.
+ */
+struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
+ u64 isc_mask, u32 schid)
+{
+ struct kvm_s390_interrupt_info *inti, *tmp_inti;
+ int isc;
+
+ inti = get_top_io_int(kvm, isc_mask, schid);
+
+ isc = get_top_gisa_isc(kvm, isc_mask, schid);
+ if (isc < 0)
+ /* no AI in GISA */
+ goto out;
+
+ if (!inti)
+ /* AI in GISA but no classical IO int */
+ goto gisa_out;
+
+ /* both types of interrupts present */
+ if (int_word_to_isc(inti->io.io_int_word) <= isc) {
+ /* classical IO int with higher priority */
+ kvm_s390_gisa_set_ipm_gisc(kvm->arch.gisa, isc);
+ goto out;
+ }
+gisa_out:
+ tmp_inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+ if (tmp_inti) {
+ tmp_inti->type = KVM_S390_INT_IO(1, 0, 0, 0);
+ tmp_inti->io.io_int_word = isc_to_int_word(isc);
+ if (inti)
+ kvm_s390_reinject_io_int(kvm, inti);
+ inti = tmp_inti;
+ } else
+ kvm_s390_gisa_set_ipm_gisc(kvm->arch.gisa, isc);
+out:
+ return inti;
+}
+
+#define SCCB_MASK 0xFFFFFFF8
+#define SCCB_EVENT_PENDING 0x3
+
+static int __inject_service(struct kvm *kvm,
+ struct kvm_s390_interrupt_info *inti)
+{
+ struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+
+ kvm->stat.inject_service_signal++;
+ spin_lock(&fi->lock);
+ fi->srv_signal.ext_params |= inti->ext.ext_params & SCCB_EVENT_PENDING;
+ /*
+ * Early versions of the QEMU s390 bios will inject several
+ * service interrupts after another without handling a
+ * condition code indicating busy.
+ * We will silently ignore those superfluous sccb values.
+ * A future version of QEMU will take care of serialization
+ * of servc requests
+ */
+ if (fi->srv_signal.ext_params & SCCB_MASK)
+ goto out;
+ fi->srv_signal.ext_params |= inti->ext.ext_params & SCCB_MASK;
+ set_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs);
+out:
+ spin_unlock(&fi->lock);
+ kfree(inti);
+ return 0;
+}
+
+static int __inject_virtio(struct kvm *kvm,
+ struct kvm_s390_interrupt_info *inti)
+{
+ struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+
+ kvm->stat.inject_virtio++;
+ spin_lock(&fi->lock);
+ if (fi->counters[FIRQ_CNTR_VIRTIO] >= KVM_S390_MAX_VIRTIO_IRQS) {
+ spin_unlock(&fi->lock);
+ return -EBUSY;
+ }
+ fi->counters[FIRQ_CNTR_VIRTIO] += 1;
+ list_add_tail(&inti->list, &fi->lists[FIRQ_LIST_VIRTIO]);
+ set_bit(IRQ_PEND_VIRTIO, &fi->pending_irqs);
+ spin_unlock(&fi->lock);
+ return 0;
+}
+
+static int __inject_pfault_done(struct kvm *kvm,
+ struct kvm_s390_interrupt_info *inti)
+{
+ struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+
+ kvm->stat.inject_pfault_done++;
+ spin_lock(&fi->lock);
+ if (fi->counters[FIRQ_CNTR_PFAULT] >=
+ (ASYNC_PF_PER_VCPU * KVM_MAX_VCPUS)) {
+ spin_unlock(&fi->lock);
+ return -EBUSY;
+ }
+ fi->counters[FIRQ_CNTR_PFAULT] += 1;
+ list_add_tail(&inti->list, &fi->lists[FIRQ_LIST_PFAULT]);
+ set_bit(IRQ_PEND_PFAULT_DONE, &fi->pending_irqs);
+ spin_unlock(&fi->lock);
+ return 0;
+}
+
+#define CR_PENDING_SUBCLASS 28
+static int __inject_float_mchk(struct kvm *kvm,
+ struct kvm_s390_interrupt_info *inti)
+{
+ struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+
+ kvm->stat.inject_float_mchk++;
+ spin_lock(&fi->lock);
+ fi->mchk.cr14 |= inti->mchk.cr14 & (1UL << CR_PENDING_SUBCLASS);
+ fi->mchk.mcic |= inti->mchk.mcic;
+ set_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs);
+ spin_unlock(&fi->lock);
+ kfree(inti);
+ return 0;
+}
+
+static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
+{
+ struct kvm_s390_float_interrupt *fi;
+ struct list_head *list;
+ int isc;
+
+ kvm->stat.inject_io++;
+ isc = int_word_to_isc(inti->io.io_int_word);
+
+ if (kvm->arch.gisa && inti->type & KVM_S390_INT_IO_AI_MASK) {
+ VM_EVENT(kvm, 4, "%s isc %1u", "inject: I/O (AI/gisa)", isc);
+ kvm_s390_gisa_set_ipm_gisc(kvm->arch.gisa, isc);
+ kfree(inti);
+ return 0;
+ }
+
+ fi = &kvm->arch.float_int;
+ spin_lock(&fi->lock);
+ if (fi->counters[FIRQ_CNTR_IO] >= KVM_S390_MAX_FLOAT_IRQS) {
+ spin_unlock(&fi->lock);
+ return -EBUSY;
+ }
+ fi->counters[FIRQ_CNTR_IO] += 1;
+
+ if (inti->type & KVM_S390_INT_IO_AI_MASK)
+ VM_EVENT(kvm, 4, "%s", "inject: I/O (AI)");
+ else
+ VM_EVENT(kvm, 4, "inject: I/O %x ss %x schid %04x",
+ inti->io.subchannel_id >> 8,
+ inti->io.subchannel_id >> 1 & 0x3,
+ inti->io.subchannel_nr);
+ list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc];
+ list_add_tail(&inti->list, list);
+ set_bit(isc_to_irq_type(isc), &fi->pending_irqs);
+ spin_unlock(&fi->lock);
+ return 0;
+}
+
+/*
+ * Find a destination VCPU for a floating irq and kick it.
+ */
+static void __floating_irq_kick(struct kvm *kvm, u64 type)
+{
+ struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+ struct kvm_vcpu *dst_vcpu;
+ int sigcpu, online_vcpus, nr_tries = 0;
+
+ online_vcpus = atomic_read(&kvm->online_vcpus);
+ if (!online_vcpus)
+ return;
+
+ /* find idle VCPUs first, then round robin */
+ sigcpu = find_first_bit(fi->idle_mask, online_vcpus);
+ if (sigcpu == online_vcpus) {
+ do {
+ sigcpu = fi->next_rr_cpu;
+ fi->next_rr_cpu = (fi->next_rr_cpu + 1) % online_vcpus;
+ /* avoid endless loops if all vcpus are stopped */
+ if (nr_tries++ >= online_vcpus)
+ return;
+ } while (is_vcpu_stopped(kvm_get_vcpu(kvm, sigcpu)));
+ }
+ dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
+
+ /* make the VCPU drop out of the SIE, or wake it up if sleeping */
+ switch (type) {
+ case KVM_S390_MCHK:
+ kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_STOP_INT);
+ break;
+ case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+ if (!(type & KVM_S390_INT_IO_AI_MASK && kvm->arch.gisa))
+ kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT);
+ break;
+ default:
+ kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_EXT_INT);
+ break;
+ }
+ kvm_s390_vcpu_wakeup(dst_vcpu);
+}
+
+static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
+{
+ u64 type = READ_ONCE(inti->type);
+ int rc;
+
+ switch (type) {
+ case KVM_S390_MCHK:
+ rc = __inject_float_mchk(kvm, inti);
+ break;
+ case KVM_S390_INT_VIRTIO:
+ rc = __inject_virtio(kvm, inti);
+ break;
+ case KVM_S390_INT_SERVICE:
+ rc = __inject_service(kvm, inti);
+ break;
+ case KVM_S390_INT_PFAULT_DONE:
+ rc = __inject_pfault_done(kvm, inti);
+ break;
+ case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+ rc = __inject_io(kvm, inti);
+ break;
+ default:
+ rc = -EINVAL;
+ }
+ if (rc)
+ return rc;
+
+ __floating_irq_kick(kvm, type);
+ return 0;
+}
+
+int kvm_s390_inject_vm(struct kvm *kvm,
+ struct kvm_s390_interrupt *s390int)
+{
+ struct kvm_s390_interrupt_info *inti;
+ int rc;
+
+ inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+ if (!inti)
+ return -ENOMEM;
+
+ inti->type = s390int->type;
+ switch (inti->type) {
+ case KVM_S390_INT_VIRTIO:
+ VM_EVENT(kvm, 5, "inject: virtio parm:%x,parm64:%llx",
+ s390int->parm, s390int->parm64);
+ inti->ext.ext_params = s390int->parm;
+ inti->ext.ext_params2 = s390int->parm64;
+ break;
+ case KVM_S390_INT_SERVICE:
+ VM_EVENT(kvm, 4, "inject: sclp parm:%x", s390int->parm);
+ inti->ext.ext_params = s390int->parm;
+ break;
+ case KVM_S390_INT_PFAULT_DONE:
+ inti->ext.ext_params2 = s390int->parm64;
+ break;
+ case KVM_S390_MCHK:
+ VM_EVENT(kvm, 3, "inject: machine check mcic 0x%llx",
+ s390int->parm64);
+ inti->mchk.cr14 = s390int->parm; /* upper bits are not used */
+ inti->mchk.mcic = s390int->parm64;
+ break;
+ case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+ inti->io.subchannel_id = s390int->parm >> 16;
+ inti->io.subchannel_nr = s390int->parm & 0x0000ffffu;
+ inti->io.io_int_parm = s390int->parm64 >> 32;
+ inti->io.io_int_word = s390int->parm64 & 0x00000000ffffffffull;
+ break;
+ default:
+ kfree(inti);
+ return -EINVAL;
+ }
+ trace_kvm_s390_inject_vm(s390int->type, s390int->parm, s390int->parm64,
+ 2);
+
+ rc = __inject_vm(kvm, inti);
+ if (rc)
+ kfree(inti);
+ return rc;
+}
+
+int kvm_s390_reinject_io_int(struct kvm *kvm,
+ struct kvm_s390_interrupt_info *inti)
+{
+ return __inject_vm(kvm, inti);
+}
+
+int s390int_to_s390irq(struct kvm_s390_interrupt *s390int,
+ struct kvm_s390_irq *irq)
+{
+ irq->type = s390int->type;
+ switch (irq->type) {
+ case KVM_S390_PROGRAM_INT:
+ if (s390int->parm & 0xffff0000)
+ return -EINVAL;
+ irq->u.pgm.code = s390int->parm;
+ break;
+ case KVM_S390_SIGP_SET_PREFIX:
+ irq->u.prefix.address = s390int->parm;
+ break;
+ case KVM_S390_SIGP_STOP:
+ irq->u.stop.flags = s390int->parm;
+ break;
+ case KVM_S390_INT_EXTERNAL_CALL:
+ if (s390int->parm & 0xffff0000)
+ return -EINVAL;
+ irq->u.extcall.code = s390int->parm;
+ break;
+ case KVM_S390_INT_EMERGENCY:
+ if (s390int->parm & 0xffff0000)
+ return -EINVAL;
+ irq->u.emerg.code = s390int->parm;
+ break;
+ case KVM_S390_MCHK:
+ irq->u.mchk.mcic = s390int->parm64;
+ break;
+ case KVM_S390_INT_PFAULT_INIT:
+ irq->u.ext.ext_params = s390int->parm;
+ irq->u.ext.ext_params2 = s390int->parm64;
+ break;
+ case KVM_S390_RESTART:
+ case KVM_S390_INT_CLOCK_COMP:
+ case KVM_S390_INT_CPU_TIMER:
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int kvm_s390_is_stop_irq_pending(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+ return test_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs);
+}
+
+int kvm_s390_is_restart_irq_pending(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+ return test_bit(IRQ_PEND_RESTART, &li->pending_irqs);
+}
+
+void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+ spin_lock(&li->lock);
+ li->irq.stop.flags = 0;
+ clear_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs);
+ spin_unlock(&li->lock);
+}
+
+static int do_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+ int rc;
+
+ switch (irq->type) {
+ case KVM_S390_PROGRAM_INT:
+ rc = __inject_prog(vcpu, irq);
+ break;
+ case KVM_S390_SIGP_SET_PREFIX:
+ rc = __inject_set_prefix(vcpu, irq);
+ break;
+ case KVM_S390_SIGP_STOP:
+ rc = __inject_sigp_stop(vcpu, irq);
+ break;
+ case KVM_S390_RESTART:
+ rc = __inject_sigp_restart(vcpu, irq);
+ break;
+ case KVM_S390_INT_CLOCK_COMP:
+ rc = __inject_ckc(vcpu);
+ break;
+ case KVM_S390_INT_CPU_TIMER:
+ rc = __inject_cpu_timer(vcpu);
+ break;
+ case KVM_S390_INT_EXTERNAL_CALL:
+ rc = __inject_extcall(vcpu, irq);
+ break;
+ case KVM_S390_INT_EMERGENCY:
+ rc = __inject_sigp_emergency(vcpu, irq);
+ break;
+ case KVM_S390_MCHK:
+ rc = __inject_mchk(vcpu, irq);
+ break;
+ case KVM_S390_INT_PFAULT_INIT:
+ rc = __inject_pfault_init(vcpu, irq);
+ break;
+ case KVM_S390_INT_VIRTIO:
+ case KVM_S390_INT_SERVICE:
+ case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+ default:
+ rc = -EINVAL;
+ }
+
+ return rc;
+}
+
+int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+ int rc;
+
+ spin_lock(&li->lock);
+ rc = do_inject_vcpu(vcpu, irq);
+ spin_unlock(&li->lock);
+ if (!rc)
+ kvm_s390_vcpu_wakeup(vcpu);
+ return rc;
+}
+
+static inline void clear_irq_list(struct list_head *_list)
+{
+ struct kvm_s390_interrupt_info *inti, *n;
+
+ list_for_each_entry_safe(inti, n, _list, list) {
+ list_del(&inti->list);
+ kfree(inti);
+ }
+}
+
+static void inti_to_irq(struct kvm_s390_interrupt_info *inti,
+ struct kvm_s390_irq *irq)
+{
+ irq->type = inti->type;
+ switch (inti->type) {
+ case KVM_S390_INT_PFAULT_INIT:
+ case KVM_S390_INT_PFAULT_DONE:
+ case KVM_S390_INT_VIRTIO:
+ irq->u.ext = inti->ext;
+ break;
+ case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+ irq->u.io = inti->io;
+ break;
+ }
+}
+
+void kvm_s390_clear_float_irqs(struct kvm *kvm)
+{
+ struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+ int i;
+
+ spin_lock(&fi->lock);
+ fi->pending_irqs = 0;
+ memset(&fi->srv_signal, 0, sizeof(fi->srv_signal));
+ memset(&fi->mchk, 0, sizeof(fi->mchk));
+ for (i = 0; i < FIRQ_LIST_COUNT; i++)
+ clear_irq_list(&fi->lists[i]);
+ for (i = 0; i < FIRQ_MAX_COUNT; i++)
+ fi->counters[i] = 0;
+ spin_unlock(&fi->lock);
+ kvm_s390_gisa_clear(kvm);
+};
+
+static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len)
+{
+ struct kvm_s390_interrupt_info *inti;
+ struct kvm_s390_float_interrupt *fi;
+ struct kvm_s390_irq *buf;
+ struct kvm_s390_irq *irq;
+ int max_irqs;
+ int ret = 0;
+ int n = 0;
+ int i;
+
+ if (len > KVM_S390_FLIC_MAX_BUFFER || len == 0)
+ return -EINVAL;
+
+ /*
+ * We are already using -ENOMEM to signal
+ * userspace it may retry with a bigger buffer,
+ * so we need to use something else for this case
+ */
+ buf = vzalloc(len);
+ if (!buf)
+ return -ENOBUFS;
+
+ max_irqs = len / sizeof(struct kvm_s390_irq);
+
+ if (kvm->arch.gisa &&
+ kvm_s390_gisa_get_ipm(kvm->arch.gisa)) {
+ for (i = 0; i <= MAX_ISC; i++) {
+ if (n == max_irqs) {
+ /* signal userspace to try again */
+ ret = -ENOMEM;
+ goto out_nolock;
+ }
+ if (kvm_s390_gisa_tac_ipm_gisc(kvm->arch.gisa, i)) {
+ irq = (struct kvm_s390_irq *) &buf[n];
+ irq->type = KVM_S390_INT_IO(1, 0, 0, 0);
+ irq->u.io.io_int_word = isc_to_int_word(i);
+ n++;
+ }
+ }
+ }
+ fi = &kvm->arch.float_int;
+ spin_lock(&fi->lock);
+ for (i = 0; i < FIRQ_LIST_COUNT; i++) {
+ list_for_each_entry(inti, &fi->lists[i], list) {
+ if (n == max_irqs) {
+ /* signal userspace to try again */
+ ret = -ENOMEM;
+ goto out;
+ }
+ inti_to_irq(inti, &buf[n]);
+ n++;
+ }
+ }
+ if (test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs)) {
+ if (n == max_irqs) {
+ /* signal userspace to try again */
+ ret = -ENOMEM;
+ goto out;
+ }
+ irq = (struct kvm_s390_irq *) &buf[n];
+ irq->type = KVM_S390_INT_SERVICE;
+ irq->u.ext = fi->srv_signal;
+ n++;
+ }
+ if (test_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs)) {
+ if (n == max_irqs) {
+ /* signal userspace to try again */
+ ret = -ENOMEM;
+ goto out;
+ }
+ irq = (struct kvm_s390_irq *) &buf[n];
+ irq->type = KVM_S390_MCHK;
+ irq->u.mchk = fi->mchk;
+ n++;
+}
+
+out:
+ spin_unlock(&fi->lock);
+out_nolock:
+ if (!ret && n > 0) {
+ if (copy_to_user(usrbuf, buf, sizeof(struct kvm_s390_irq) * n))
+ ret = -EFAULT;
+ }
+ vfree(buf);
+
+ return ret < 0 ? ret : n;
+}
+
+static int flic_ais_mode_get_all(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+ struct kvm_s390_ais_all ais;
+
+ if (attr->attr < sizeof(ais))
+ return -EINVAL;
+
+ if (!test_kvm_facility(kvm, 72))
+ return -EOPNOTSUPP;
+
+ mutex_lock(&fi->ais_lock);
+ ais.simm = fi->simm;
+ ais.nimm = fi->nimm;
+ mutex_unlock(&fi->ais_lock);
+
+ if (copy_to_user((void __user *)attr->addr, &ais, sizeof(ais)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int flic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+ int r;
+
+ switch (attr->group) {
+ case KVM_DEV_FLIC_GET_ALL_IRQS:
+ r = get_all_floating_irqs(dev->kvm, (u8 __user *) attr->addr,
+ attr->attr);
+ break;
+ case KVM_DEV_FLIC_AISM_ALL:
+ r = flic_ais_mode_get_all(dev->kvm, attr);
+ break;
+ default:
+ r = -EINVAL;
+ }
+
+ return r;
+}
+
+static inline int copy_irq_from_user(struct kvm_s390_interrupt_info *inti,
+ u64 addr)
+{
+ struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr;
+ void *target = NULL;
+ void __user *source;
+ u64 size;
+
+ if (get_user(inti->type, (u64 __user *)addr))
+ return -EFAULT;
+
+ switch (inti->type) {
+ case KVM_S390_INT_PFAULT_INIT:
+ case KVM_S390_INT_PFAULT_DONE:
+ case KVM_S390_INT_VIRTIO:
+ case KVM_S390_INT_SERVICE:
+ target = (void *) &inti->ext;
+ source = &uptr->u.ext;
+ size = sizeof(inti->ext);
+ break;
+ case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+ target = (void *) &inti->io;
+ source = &uptr->u.io;
+ size = sizeof(inti->io);
+ break;
+ case KVM_S390_MCHK:
+ target = (void *) &inti->mchk;
+ source = &uptr->u.mchk;
+ size = sizeof(inti->mchk);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (copy_from_user(target, source, size))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int enqueue_floating_irq(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ struct kvm_s390_interrupt_info *inti = NULL;
+ int r = 0;
+ int len = attr->attr;
+
+ if (len % sizeof(struct kvm_s390_irq) != 0)
+ return -EINVAL;
+ else if (len > KVM_S390_FLIC_MAX_BUFFER)
+ return -EINVAL;
+
+ while (len >= sizeof(struct kvm_s390_irq)) {
+ inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+ if (!inti)
+ return -ENOMEM;
+
+ r = copy_irq_from_user(inti, attr->addr);
+ if (r) {
+ kfree(inti);
+ return r;
+ }
+ r = __inject_vm(dev->kvm, inti);
+ if (r) {
+ kfree(inti);
+ return r;
+ }
+ len -= sizeof(struct kvm_s390_irq);
+ attr->addr += sizeof(struct kvm_s390_irq);
+ }
+
+ return r;
+}
+
+static struct s390_io_adapter *get_io_adapter(struct kvm *kvm, unsigned int id)
+{
+ if (id >= MAX_S390_IO_ADAPTERS)
+ return NULL;
+ return kvm->arch.adapters[id];
+}
+
+static int register_io_adapter(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ struct s390_io_adapter *adapter;
+ struct kvm_s390_io_adapter adapter_info;
+
+ if (copy_from_user(&adapter_info,
+ (void __user *)attr->addr, sizeof(adapter_info)))
+ return -EFAULT;
+
+ if ((adapter_info.id >= MAX_S390_IO_ADAPTERS) ||
+ (dev->kvm->arch.adapters[adapter_info.id] != NULL))
+ return -EINVAL;
+
+ adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
+ if (!adapter)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&adapter->maps);
+ init_rwsem(&adapter->maps_lock);
+ atomic_set(&adapter->nr_maps, 0);
+ adapter->id = adapter_info.id;
+ adapter->isc = adapter_info.isc;
+ adapter->maskable = adapter_info.maskable;
+ adapter->masked = false;
+ adapter->swap = adapter_info.swap;
+ adapter->suppressible = (adapter_info.flags) &
+ KVM_S390_ADAPTER_SUPPRESSIBLE;
+ dev->kvm->arch.adapters[adapter->id] = adapter;
+
+ return 0;
+}
+
+int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked)
+{
+ int ret;
+ struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
+
+ if (!adapter || !adapter->maskable)
+ return -EINVAL;
+ ret = adapter->masked;
+ adapter->masked = masked;
+ return ret;
+}
+
+static int kvm_s390_adapter_map(struct kvm *kvm, unsigned int id, __u64 addr)
+{
+ struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
+ struct s390_map_info *map;
+ int ret;
+
+ if (!adapter || !addr)
+ return -EINVAL;
+
+ map = kzalloc(sizeof(*map), GFP_KERNEL);
+ if (!map) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ INIT_LIST_HEAD(&map->list);
+ map->guest_addr = addr;
+ map->addr = gmap_translate(kvm->arch.gmap, addr);
+ if (map->addr == -EFAULT) {
+ ret = -EFAULT;
+ goto out;
+ }
+ ret = get_user_pages_fast(map->addr, 1, 1, &map->page);
+ if (ret < 0)
+ goto out;
+ BUG_ON(ret != 1);
+ down_write(&adapter->maps_lock);
+ if (atomic_inc_return(&adapter->nr_maps) < MAX_S390_ADAPTER_MAPS) {
+ list_add_tail(&map->list, &adapter->maps);
+ ret = 0;
+ } else {
+ put_page(map->page);
+ ret = -EINVAL;
+ }
+ up_write(&adapter->maps_lock);
+out:
+ if (ret)
+ kfree(map);
+ return ret;
+}
+
+static int kvm_s390_adapter_unmap(struct kvm *kvm, unsigned int id, __u64 addr)
+{
+ struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
+ struct s390_map_info *map, *tmp;
+ int found = 0;
+
+ if (!adapter || !addr)
+ return -EINVAL;
+
+ down_write(&adapter->maps_lock);
+ list_for_each_entry_safe(map, tmp, &adapter->maps, list) {
+ if (map->guest_addr == addr) {
+ found = 1;
+ atomic_dec(&adapter->nr_maps);
+ list_del(&map->list);
+ put_page(map->page);
+ kfree(map);
+ break;
+ }
+ }
+ up_write(&adapter->maps_lock);
+
+ return found ? 0 : -EINVAL;
+}
+
+void kvm_s390_destroy_adapters(struct kvm *kvm)
+{
+ int i;
+ struct s390_map_info *map, *tmp;
+
+ for (i = 0; i < MAX_S390_IO_ADAPTERS; i++) {
+ if (!kvm->arch.adapters[i])
+ continue;
+ list_for_each_entry_safe(map, tmp,
+ &kvm->arch.adapters[i]->maps, list) {
+ list_del(&map->list);
+ put_page(map->page);
+ kfree(map);
+ }
+ kfree(kvm->arch.adapters[i]);
+ }
+}
+
+static int modify_io_adapter(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ struct kvm_s390_io_adapter_req req;
+ struct s390_io_adapter *adapter;
+ int ret;
+
+ if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req)))
+ return -EFAULT;
+
+ adapter = get_io_adapter(dev->kvm, req.id);
+ if (!adapter)
+ return -EINVAL;
+ switch (req.type) {
+ case KVM_S390_IO_ADAPTER_MASK:
+ ret = kvm_s390_mask_adapter(dev->kvm, req.id, req.mask);
+ if (ret > 0)
+ ret = 0;
+ break;
+ case KVM_S390_IO_ADAPTER_MAP:
+ ret = kvm_s390_adapter_map(dev->kvm, req.id, req.addr);
+ break;
+ case KVM_S390_IO_ADAPTER_UNMAP:
+ ret = kvm_s390_adapter_unmap(dev->kvm, req.id, req.addr);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static int clear_io_irq(struct kvm *kvm, struct kvm_device_attr *attr)
+
+{
+ const u64 isc_mask = 0xffUL << 24; /* all iscs set */
+ u32 schid;
+
+ if (attr->flags)
+ return -EINVAL;
+ if (attr->attr != sizeof(schid))
+ return -EINVAL;
+ if (copy_from_user(&schid, (void __user *) attr->addr, sizeof(schid)))
+ return -EFAULT;
+ if (!schid)
+ return -EINVAL;
+ kfree(kvm_s390_get_io_int(kvm, isc_mask, schid));
+ /*
+ * If userspace is conforming to the architecture, we can have at most
+ * one pending I/O interrupt per subchannel, so this is effectively a
+ * clear all.
+ */
+ return 0;
+}
+
+static int modify_ais_mode(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+ struct kvm_s390_ais_req req;
+ int ret = 0;
+
+ if (!test_kvm_facility(kvm, 72))
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req)))
+ return -EFAULT;
+
+ if (req.isc > MAX_ISC)
+ return -EINVAL;
+
+ trace_kvm_s390_modify_ais_mode(req.isc,
+ (fi->simm & AIS_MODE_MASK(req.isc)) ?
+ (fi->nimm & AIS_MODE_MASK(req.isc)) ?
+ 2 : KVM_S390_AIS_MODE_SINGLE :
+ KVM_S390_AIS_MODE_ALL, req.mode);
+
+ mutex_lock(&fi->ais_lock);
+ switch (req.mode) {
+ case KVM_S390_AIS_MODE_ALL:
+ fi->simm &= ~AIS_MODE_MASK(req.isc);
+ fi->nimm &= ~AIS_MODE_MASK(req.isc);
+ break;
+ case KVM_S390_AIS_MODE_SINGLE:
+ fi->simm |= AIS_MODE_MASK(req.isc);
+ fi->nimm &= ~AIS_MODE_MASK(req.isc);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+ mutex_unlock(&fi->ais_lock);
+
+ return ret;
+}
+
+static int kvm_s390_inject_airq(struct kvm *kvm,
+ struct s390_io_adapter *adapter)
+{
+ struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+ struct kvm_s390_interrupt s390int = {
+ .type = KVM_S390_INT_IO(1, 0, 0, 0),
+ .parm = 0,
+ .parm64 = isc_to_int_word(adapter->isc),
+ };
+ int ret = 0;
+
+ if (!test_kvm_facility(kvm, 72) || !adapter->suppressible)
+ return kvm_s390_inject_vm(kvm, &s390int);
+
+ mutex_lock(&fi->ais_lock);
+ if (fi->nimm & AIS_MODE_MASK(adapter->isc)) {
+ trace_kvm_s390_airq_suppressed(adapter->id, adapter->isc);
+ goto out;
+ }
+
+ ret = kvm_s390_inject_vm(kvm, &s390int);
+ if (!ret && (fi->simm & AIS_MODE_MASK(adapter->isc))) {
+ fi->nimm |= AIS_MODE_MASK(adapter->isc);
+ trace_kvm_s390_modify_ais_mode(adapter->isc,
+ KVM_S390_AIS_MODE_SINGLE, 2);
+ }
+out:
+ mutex_unlock(&fi->ais_lock);
+ return ret;
+}
+
+static int flic_inject_airq(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ unsigned int id = attr->attr;
+ struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
+
+ if (!adapter)
+ return -EINVAL;
+
+ return kvm_s390_inject_airq(kvm, adapter);
+}
+
+static int flic_ais_mode_set_all(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+ struct kvm_s390_ais_all ais;
+
+ if (!test_kvm_facility(kvm, 72))
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&ais, (void __user *)attr->addr, sizeof(ais)))
+ return -EFAULT;
+
+ mutex_lock(&fi->ais_lock);
+ fi->simm = ais.simm;
+ fi->nimm = ais.nimm;
+ mutex_unlock(&fi->ais_lock);
+
+ return 0;
+}
+
+static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+ int r = 0;
+ unsigned int i;
+ struct kvm_vcpu *vcpu;
+
+ switch (attr->group) {
+ case KVM_DEV_FLIC_ENQUEUE:
+ r = enqueue_floating_irq(dev, attr);
+ break;
+ case KVM_DEV_FLIC_CLEAR_IRQS:
+ kvm_s390_clear_float_irqs(dev->kvm);
+ break;
+ case KVM_DEV_FLIC_APF_ENABLE:
+ dev->kvm->arch.gmap->pfault_enabled = 1;
+ break;
+ case KVM_DEV_FLIC_APF_DISABLE_WAIT:
+ dev->kvm->arch.gmap->pfault_enabled = 0;
+ /*
+ * Make sure no async faults are in transition when
+ * clearing the queues. So we don't need to worry
+ * about late coming workers.
+ */
+ synchronize_srcu(&dev->kvm->srcu);
+ kvm_for_each_vcpu(i, vcpu, dev->kvm)
+ kvm_clear_async_pf_completion_queue(vcpu);
+ break;
+ case KVM_DEV_FLIC_ADAPTER_REGISTER:
+ r = register_io_adapter(dev, attr);
+ break;
+ case KVM_DEV_FLIC_ADAPTER_MODIFY:
+ r = modify_io_adapter(dev, attr);
+ break;
+ case KVM_DEV_FLIC_CLEAR_IO_IRQ:
+ r = clear_io_irq(dev->kvm, attr);
+ break;
+ case KVM_DEV_FLIC_AISM:
+ r = modify_ais_mode(dev->kvm, attr);
+ break;
+ case KVM_DEV_FLIC_AIRQ_INJECT:
+ r = flic_inject_airq(dev->kvm, attr);
+ break;
+ case KVM_DEV_FLIC_AISM_ALL:
+ r = flic_ais_mode_set_all(dev->kvm, attr);
+ break;
+ default:
+ r = -EINVAL;
+ }
+
+ return r;
+}
+
+static int flic_has_attr(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ switch (attr->group) {
+ case KVM_DEV_FLIC_GET_ALL_IRQS:
+ case KVM_DEV_FLIC_ENQUEUE:
+ case KVM_DEV_FLIC_CLEAR_IRQS:
+ case KVM_DEV_FLIC_APF_ENABLE:
+ case KVM_DEV_FLIC_APF_DISABLE_WAIT:
+ case KVM_DEV_FLIC_ADAPTER_REGISTER:
+ case KVM_DEV_FLIC_ADAPTER_MODIFY:
+ case KVM_DEV_FLIC_CLEAR_IO_IRQ:
+ case KVM_DEV_FLIC_AISM:
+ case KVM_DEV_FLIC_AIRQ_INJECT:
+ case KVM_DEV_FLIC_AISM_ALL:
+ return 0;
+ }
+ return -ENXIO;
+}
+
+static int flic_create(struct kvm_device *dev, u32 type)
+{
+ if (!dev)
+ return -EINVAL;
+ if (dev->kvm->arch.flic)
+ return -EINVAL;
+ dev->kvm->arch.flic = dev;
+ return 0;
+}
+
+static void flic_destroy(struct kvm_device *dev)
+{
+ dev->kvm->arch.flic = NULL;
+ kfree(dev);
+}
+
+/* s390 floating irq controller (flic) */
+struct kvm_device_ops kvm_flic_ops = {
+ .name = "kvm-flic",
+ .get_attr = flic_get_attr,
+ .set_attr = flic_set_attr,
+ .has_attr = flic_has_attr,
+ .create = flic_create,
+ .destroy = flic_destroy,
+};
+
+static unsigned long get_ind_bit(__u64 addr, unsigned long bit_nr, bool swap)
+{
+ unsigned long bit;
+
+ bit = bit_nr + (addr % PAGE_SIZE) * 8;
+
+ return swap ? (bit ^ (BITS_PER_LONG - 1)) : bit;
+}
+
+static struct s390_map_info *get_map_info(struct s390_io_adapter *adapter,
+ u64 addr)
+{
+ struct s390_map_info *map;
+
+ if (!adapter)
+ return NULL;
+
+ list_for_each_entry(map, &adapter->maps, list) {
+ if (map->guest_addr == addr)
+ return map;
+ }
+ return NULL;
+}
+
+static int adapter_indicators_set(struct kvm *kvm,
+ struct s390_io_adapter *adapter,
+ struct kvm_s390_adapter_int *adapter_int)
+{
+ unsigned long bit;
+ int summary_set, idx;
+ struct s390_map_info *info;
+ void *map;
+
+ info = get_map_info(adapter, adapter_int->ind_addr);
+ if (!info)
+ return -1;
+ map = page_address(info->page);
+ bit = get_ind_bit(info->addr, adapter_int->ind_offset, adapter->swap);
+ set_bit(bit, map);
+ idx = srcu_read_lock(&kvm->srcu);
+ mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT);
+ set_page_dirty_lock(info->page);
+ info = get_map_info(adapter, adapter_int->summary_addr);
+ if (!info) {
+ srcu_read_unlock(&kvm->srcu, idx);
+ return -1;
+ }
+ map = page_address(info->page);
+ bit = get_ind_bit(info->addr, adapter_int->summary_offset,
+ adapter->swap);
+ summary_set = test_and_set_bit(bit, map);
+ mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT);
+ set_page_dirty_lock(info->page);
+ srcu_read_unlock(&kvm->srcu, idx);
+ return summary_set ? 0 : 1;
+}
+
+/*
+ * < 0 - not injected due to error
+ * = 0 - coalesced, summary indicator already active
+ * > 0 - injected interrupt
+ */
+static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e,
+ struct kvm *kvm, int irq_source_id, int level,
+ bool line_status)
+{
+ int ret;
+ struct s390_io_adapter *adapter;
+
+ /* We're only interested in the 0->1 transition. */
+ if (!level)
+ return 0;
+ adapter = get_io_adapter(kvm, e->adapter.adapter_id);
+ if (!adapter)
+ return -1;
+ down_read(&adapter->maps_lock);
+ ret = adapter_indicators_set(kvm, adapter, &e->adapter);
+ up_read(&adapter->maps_lock);
+ if ((ret > 0) && !adapter->masked) {
+ ret = kvm_s390_inject_airq(kvm, adapter);
+ if (ret == 0)
+ ret = 1;
+ }
+ return ret;
+}
+
+/*
+ * Inject the machine check to the guest.
+ */
+void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu,
+ struct mcck_volatile_info *mcck_info)
+{
+ struct kvm_s390_interrupt_info inti;
+ struct kvm_s390_irq irq;
+ struct kvm_s390_mchk_info *mchk;
+ union mci mci;
+ __u64 cr14 = 0; /* upper bits are not used */
+ int rc;
+
+ mci.val = mcck_info->mcic;
+ if (mci.sr)
+ cr14 |= CR14_RECOVERY_SUBMASK;
+ if (mci.dg)
+ cr14 |= CR14_DEGRADATION_SUBMASK;
+ if (mci.w)
+ cr14 |= CR14_WARNING_SUBMASK;
+
+ mchk = mci.ck ? &inti.mchk : &irq.u.mchk;
+ mchk->cr14 = cr14;
+ mchk->mcic = mcck_info->mcic;
+ mchk->ext_damage_code = mcck_info->ext_damage_code;
+ mchk->failing_storage_address = mcck_info->failing_storage_address;
+ if (mci.ck) {
+ /* Inject the floating machine check */
+ inti.type = KVM_S390_MCHK;
+ rc = __inject_vm(vcpu->kvm, &inti);
+ } else {
+ /* Inject the machine check to specified vcpu */
+ irq.type = KVM_S390_MCHK;
+ rc = kvm_s390_inject_vcpu(vcpu, &irq);
+ }
+ WARN_ON_ONCE(rc);
+}
+
+int kvm_set_routing_entry(struct kvm *kvm,
+ struct kvm_kernel_irq_routing_entry *e,
+ const struct kvm_irq_routing_entry *ue)
+{
+ int ret;
+
+ switch (ue->type) {
+ case KVM_IRQ_ROUTING_S390_ADAPTER:
+ e->set = set_adapter_int;
+ e->adapter.summary_addr = ue->u.adapter.summary_addr;
+ e->adapter.ind_addr = ue->u.adapter.ind_addr;
+ e->adapter.summary_offset = ue->u.adapter.summary_offset;
+ e->adapter.ind_offset = ue->u.adapter.ind_offset;
+ e->adapter.adapter_id = ue->u.adapter.adapter_id;
+ ret = 0;
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm,
+ int irq_source_id, int level, bool line_status)
+{
+ return -EINVAL;
+}
+
+int kvm_s390_set_irq_state(struct kvm_vcpu *vcpu, void __user *irqstate, int len)
+{
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+ struct kvm_s390_irq *buf;
+ int r = 0;
+ int n;
+
+ buf = vmalloc(len);
+ if (!buf)
+ return -ENOMEM;
+
+ if (copy_from_user((void *) buf, irqstate, len)) {
+ r = -EFAULT;
+ goto out_free;
+ }
+
+ /*
+ * Don't allow setting the interrupt state
+ * when there are already interrupts pending
+ */
+ spin_lock(&li->lock);
+ if (li->pending_irqs) {
+ r = -EBUSY;
+ goto out_unlock;
+ }
+
+ for (n = 0; n < len / sizeof(*buf); n++) {
+ r = do_inject_vcpu(vcpu, &buf[n]);
+ if (r)
+ break;
+ }
+
+out_unlock:
+ spin_unlock(&li->lock);
+out_free:
+ vfree(buf);
+
+ return r;
+}
+
+static void store_local_irq(struct kvm_s390_local_interrupt *li,
+ struct kvm_s390_irq *irq,
+ unsigned long irq_type)
+{
+ switch (irq_type) {
+ case IRQ_PEND_MCHK_EX:
+ case IRQ_PEND_MCHK_REP:
+ irq->type = KVM_S390_MCHK;
+ irq->u.mchk = li->irq.mchk;
+ break;
+ case IRQ_PEND_PROG:
+ irq->type = KVM_S390_PROGRAM_INT;
+ irq->u.pgm = li->irq.pgm;
+ break;
+ case IRQ_PEND_PFAULT_INIT:
+ irq->type = KVM_S390_INT_PFAULT_INIT;
+ irq->u.ext = li->irq.ext;
+ break;
+ case IRQ_PEND_EXT_EXTERNAL:
+ irq->type = KVM_S390_INT_EXTERNAL_CALL;
+ irq->u.extcall = li->irq.extcall;
+ break;
+ case IRQ_PEND_EXT_CLOCK_COMP:
+ irq->type = KVM_S390_INT_CLOCK_COMP;
+ break;
+ case IRQ_PEND_EXT_CPU_TIMER:
+ irq->type = KVM_S390_INT_CPU_TIMER;
+ break;
+ case IRQ_PEND_SIGP_STOP:
+ irq->type = KVM_S390_SIGP_STOP;
+ irq->u.stop = li->irq.stop;
+ break;
+ case IRQ_PEND_RESTART:
+ irq->type = KVM_S390_RESTART;
+ break;
+ case IRQ_PEND_SET_PREFIX:
+ irq->type = KVM_S390_SIGP_SET_PREFIX;
+ irq->u.prefix = li->irq.prefix;
+ break;
+ }
+}
+
+int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len)
+{
+ int scn;
+ unsigned long sigp_emerg_pending[BITS_TO_LONGS(KVM_MAX_VCPUS)];
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+ unsigned long pending_irqs;
+ struct kvm_s390_irq irq;
+ unsigned long irq_type;
+ int cpuaddr;
+ int n = 0;
+
+ spin_lock(&li->lock);
+ pending_irqs = li->pending_irqs;
+ memcpy(&sigp_emerg_pending, &li->sigp_emerg_pending,
+ sizeof(sigp_emerg_pending));
+ spin_unlock(&li->lock);
+
+ for_each_set_bit(irq_type, &pending_irqs, IRQ_PEND_COUNT) {
+ memset(&irq, 0, sizeof(irq));
+ if (irq_type == IRQ_PEND_EXT_EMERGENCY)
+ continue;
+ if (n + sizeof(irq) > len)
+ return -ENOBUFS;
+ store_local_irq(&vcpu->arch.local_int, &irq, irq_type);
+ if (copy_to_user(&buf[n], &irq, sizeof(irq)))
+ return -EFAULT;
+ n += sizeof(irq);
+ }
+
+ if (test_bit(IRQ_PEND_EXT_EMERGENCY, &pending_irqs)) {
+ for_each_set_bit(cpuaddr, sigp_emerg_pending, KVM_MAX_VCPUS) {
+ memset(&irq, 0, sizeof(irq));
+ if (n + sizeof(irq) > len)
+ return -ENOBUFS;
+ irq.type = KVM_S390_INT_EMERGENCY;
+ irq.u.emerg.code = cpuaddr;
+ if (copy_to_user(&buf[n], &irq, sizeof(irq)))
+ return -EFAULT;
+ n += sizeof(irq);
+ }
+ }
+
+ if (sca_ext_call_pending(vcpu, &scn)) {
+ if (n + sizeof(irq) > len)
+ return -ENOBUFS;
+ memset(&irq, 0, sizeof(irq));
+ irq.type = KVM_S390_INT_EXTERNAL_CALL;
+ irq.u.extcall.code = scn;
+ if (copy_to_user(&buf[n], &irq, sizeof(irq)))
+ return -EFAULT;
+ n += sizeof(irq);
+ }
+
+ return n;
+}
+
+void kvm_s390_gisa_clear(struct kvm *kvm)
+{
+ if (kvm->arch.gisa) {
+ memset(kvm->arch.gisa, 0, sizeof(struct kvm_s390_gisa));
+ kvm->arch.gisa->next_alert = (u32)(u64)kvm->arch.gisa;
+ VM_EVENT(kvm, 3, "gisa 0x%pK cleared", kvm->arch.gisa);
+ }
+}
+
+void kvm_s390_gisa_init(struct kvm *kvm)
+{
+ if (css_general_characteristics.aiv) {
+ kvm->arch.gisa = &kvm->arch.sie_page2->gisa;
+ VM_EVENT(kvm, 3, "gisa 0x%pK initialized", kvm->arch.gisa);
+ kvm_s390_gisa_clear(kvm);
+ }
+}
+
+void kvm_s390_gisa_destroy(struct kvm *kvm)
+{
+ if (!kvm->arch.gisa)
+ return;
+ kvm->arch.gisa = NULL;
+}
diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h
new file mode 100644
index 000000000..484608c71
--- /dev/null
+++ b/arch/s390/kvm/irq.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * s390 irqchip routines
+ *
+ * Copyright IBM Corp. 2014
+ *
+ * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ */
+#ifndef __KVM_IRQ_H
+#define __KVM_IRQ_H
+
+#include <linux/kvm_host.h>
+
+static inline int irqchip_in_kernel(struct kvm *kvm)
+{
+ return 1;
+}
+
+#endif
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
new file mode 100644
index 000000000..3aade928c
--- /dev/null
+++ b/arch/s390/kvm/kvm-s390.c
@@ -0,0 +1,4253 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * hosting IBM Z kernel virtual machines (s390x)
+ *
+ * Copyright IBM Corp. 2008, 2018
+ *
+ * Author(s): Carsten Otte <cotte@de.ibm.com>
+ * Christian Borntraeger <borntraeger@de.ibm.com>
+ * Heiko Carstens <heiko.carstens@de.ibm.com>
+ * Christian Ehrhardt <ehrhardt@de.ibm.com>
+ * Jason J. Herne <jjherne@us.ibm.com>
+ */
+
+#include <linux/compiler.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/hrtimer.h>
+#include <linux/init.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/mman.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/timer.h>
+#include <linux/vmalloc.h>
+#include <linux/bitmap.h>
+#include <linux/sched/signal.h>
+#include <linux/string.h>
+
+#include <asm/asm-offsets.h>
+#include <asm/lowcore.h>
+#include <asm/stp.h>
+#include <asm/pgtable.h>
+#include <asm/gmap.h>
+#include <asm/nmi.h>
+#include <asm/switch_to.h>
+#include <asm/isc.h>
+#include <asm/sclp.h>
+#include <asm/cpacf.h>
+#include <asm/timex.h>
+#include "kvm-s390.h"
+#include "gaccess.h"
+
+#define KMSG_COMPONENT "kvm-s390"
+#undef pr_fmt
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+#include "trace-s390.h"
+
+#define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
+#define LOCAL_IRQS 32
+#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
+ (KVM_MAX_VCPUS + LOCAL_IRQS))
+
+#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
+#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
+
+struct kvm_stats_debugfs_item debugfs_entries[] = {
+ { "userspace_handled", VCPU_STAT(exit_userspace) },
+ { "exit_null", VCPU_STAT(exit_null) },
+ { "exit_validity", VCPU_STAT(exit_validity) },
+ { "exit_stop_request", VCPU_STAT(exit_stop_request) },
+ { "exit_external_request", VCPU_STAT(exit_external_request) },
+ { "exit_io_request", VCPU_STAT(exit_io_request) },
+ { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
+ { "exit_instruction", VCPU_STAT(exit_instruction) },
+ { "exit_pei", VCPU_STAT(exit_pei) },
+ { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
+ { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
+ { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
+ { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
+ { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
+ { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
+ { "halt_wakeup", VCPU_STAT(halt_wakeup) },
+ { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
+ { "instruction_lctl", VCPU_STAT(instruction_lctl) },
+ { "instruction_stctl", VCPU_STAT(instruction_stctl) },
+ { "instruction_stctg", VCPU_STAT(instruction_stctg) },
+ { "deliver_ckc", VCPU_STAT(deliver_ckc) },
+ { "deliver_cputm", VCPU_STAT(deliver_cputm) },
+ { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
+ { "deliver_external_call", VCPU_STAT(deliver_external_call) },
+ { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
+ { "deliver_virtio", VCPU_STAT(deliver_virtio) },
+ { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
+ { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
+ { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
+ { "deliver_program", VCPU_STAT(deliver_program) },
+ { "deliver_io", VCPU_STAT(deliver_io) },
+ { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
+ { "exit_wait_state", VCPU_STAT(exit_wait_state) },
+ { "inject_ckc", VCPU_STAT(inject_ckc) },
+ { "inject_cputm", VCPU_STAT(inject_cputm) },
+ { "inject_external_call", VCPU_STAT(inject_external_call) },
+ { "inject_float_mchk", VM_STAT(inject_float_mchk) },
+ { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
+ { "inject_io", VM_STAT(inject_io) },
+ { "inject_mchk", VCPU_STAT(inject_mchk) },
+ { "inject_pfault_done", VM_STAT(inject_pfault_done) },
+ { "inject_program", VCPU_STAT(inject_program) },
+ { "inject_restart", VCPU_STAT(inject_restart) },
+ { "inject_service_signal", VM_STAT(inject_service_signal) },
+ { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
+ { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
+ { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
+ { "inject_virtio", VM_STAT(inject_virtio) },
+ { "instruction_epsw", VCPU_STAT(instruction_epsw) },
+ { "instruction_gs", VCPU_STAT(instruction_gs) },
+ { "instruction_io_other", VCPU_STAT(instruction_io_other) },
+ { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
+ { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
+ { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
+ { "instruction_ptff", VCPU_STAT(instruction_ptff) },
+ { "instruction_stidp", VCPU_STAT(instruction_stidp) },
+ { "instruction_sck", VCPU_STAT(instruction_sck) },
+ { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
+ { "instruction_spx", VCPU_STAT(instruction_spx) },
+ { "instruction_stpx", VCPU_STAT(instruction_stpx) },
+ { "instruction_stap", VCPU_STAT(instruction_stap) },
+ { "instruction_iske", VCPU_STAT(instruction_iske) },
+ { "instruction_ri", VCPU_STAT(instruction_ri) },
+ { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
+ { "instruction_sske", VCPU_STAT(instruction_sske) },
+ { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
+ { "instruction_essa", VCPU_STAT(instruction_essa) },
+ { "instruction_stsi", VCPU_STAT(instruction_stsi) },
+ { "instruction_stfl", VCPU_STAT(instruction_stfl) },
+ { "instruction_tb", VCPU_STAT(instruction_tb) },
+ { "instruction_tpi", VCPU_STAT(instruction_tpi) },
+ { "instruction_tprot", VCPU_STAT(instruction_tprot) },
+ { "instruction_tsch", VCPU_STAT(instruction_tsch) },
+ { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
+ { "instruction_sie", VCPU_STAT(instruction_sie) },
+ { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
+ { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
+ { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
+ { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
+ { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
+ { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
+ { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
+ { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
+ { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
+ { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
+ { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
+ { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
+ { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
+ { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
+ { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
+ { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
+ { "instruction_diag_10", VCPU_STAT(diagnose_10) },
+ { "instruction_diag_44", VCPU_STAT(diagnose_44) },
+ { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
+ { "instruction_diag_258", VCPU_STAT(diagnose_258) },
+ { "instruction_diag_308", VCPU_STAT(diagnose_308) },
+ { "instruction_diag_500", VCPU_STAT(diagnose_500) },
+ { "instruction_diag_other", VCPU_STAT(diagnose_other) },
+ { NULL }
+};
+
+struct kvm_s390_tod_clock_ext {
+ __u8 epoch_idx;
+ __u64 tod;
+ __u8 reserved[7];
+} __packed;
+
+/* allow nested virtualization in KVM (if enabled by user space) */
+static int nested;
+module_param(nested, int, S_IRUGO);
+MODULE_PARM_DESC(nested, "Nested virtualization support");
+
+/* allow 1m huge page guest backing, if !nested */
+static int hpage;
+module_param(hpage, int, 0444);
+MODULE_PARM_DESC(hpage, "1m huge page backing support");
+
+/*
+ * For now we handle at most 16 double words as this is what the s390 base
+ * kernel handles and stores in the prefix page. If we ever need to go beyond
+ * this, this requires changes to code, but the external uapi can stay.
+ */
+#define SIZE_INTERNAL 16
+
+/*
+ * Base feature mask that defines default mask for facilities. Consists of the
+ * defines in FACILITIES_KVM and the non-hypervisor managed bits.
+ */
+static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
+/*
+ * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
+ * and defines the facilities that can be enabled via a cpu model.
+ */
+static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
+
+static unsigned long kvm_s390_fac_size(void)
+{
+ BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
+ BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
+ BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
+ sizeof(S390_lowcore.stfle_fac_list));
+
+ return SIZE_INTERNAL;
+}
+
+/* available cpu features supported by kvm */
+static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
+/* available subfunctions indicated via query / "test bit" */
+static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
+
+static struct gmap_notifier gmap_notifier;
+static struct gmap_notifier vsie_gmap_notifier;
+debug_info_t *kvm_s390_dbf;
+
+/* Section: not file related */
+int kvm_arch_hardware_enable(void)
+{
+ /* every s390 is virtualization enabled ;-) */
+ return 0;
+}
+
+static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
+ unsigned long end);
+
+static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
+{
+ u8 delta_idx = 0;
+
+ /*
+ * The TOD jumps by delta, we have to compensate this by adding
+ * -delta to the epoch.
+ */
+ delta = -delta;
+
+ /* sign-extension - we're adding to signed values below */
+ if ((s64)delta < 0)
+ delta_idx = -1;
+
+ scb->epoch += delta;
+ if (scb->ecd & ECD_MEF) {
+ scb->epdx += delta_idx;
+ if (scb->epoch < delta)
+ scb->epdx += 1;
+ }
+}
+
+/*
+ * This callback is executed during stop_machine(). All CPUs are therefore
+ * temporarily stopped. In order not to change guest behavior, we have to
+ * disable preemption whenever we touch the epoch of kvm and the VCPUs,
+ * so a CPU won't be stopped while calculating with the epoch.
+ */
+static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
+ void *v)
+{
+ struct kvm *kvm;
+ struct kvm_vcpu *vcpu;
+ int i;
+ unsigned long long *delta = v;
+
+ list_for_each_entry(kvm, &vm_list, vm_list) {
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
+ if (i == 0) {
+ kvm->arch.epoch = vcpu->arch.sie_block->epoch;
+ kvm->arch.epdx = vcpu->arch.sie_block->epdx;
+ }
+ if (vcpu->arch.cputm_enabled)
+ vcpu->arch.cputm_start += *delta;
+ if (vcpu->arch.vsie_block)
+ kvm_clock_sync_scb(vcpu->arch.vsie_block,
+ *delta);
+ }
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block kvm_clock_notifier = {
+ .notifier_call = kvm_clock_sync,
+};
+
+int kvm_arch_hardware_setup(void)
+{
+ gmap_notifier.notifier_call = kvm_gmap_notifier;
+ gmap_register_pte_notifier(&gmap_notifier);
+ vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
+ gmap_register_pte_notifier(&vsie_gmap_notifier);
+ atomic_notifier_chain_register(&s390_epoch_delta_notifier,
+ &kvm_clock_notifier);
+ return 0;
+}
+
+void kvm_arch_hardware_unsetup(void)
+{
+ gmap_unregister_pte_notifier(&gmap_notifier);
+ gmap_unregister_pte_notifier(&vsie_gmap_notifier);
+ atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
+ &kvm_clock_notifier);
+}
+
+static void allow_cpu_feat(unsigned long nr)
+{
+ set_bit_inv(nr, kvm_s390_available_cpu_feat);
+}
+
+static inline int plo_test_bit(unsigned char nr)
+{
+ register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
+ int cc;
+
+ asm volatile(
+ /* Parameter registers are ignored for "test bit" */
+ " plo 0,0,0,0(0)\n"
+ " ipm %0\n"
+ " srl %0,28\n"
+ : "=d" (cc)
+ : "d" (r0)
+ : "cc");
+ return cc == 0;
+}
+
+static void kvm_s390_cpu_feat_init(void)
+{
+ int i;
+
+ for (i = 0; i < 256; ++i) {
+ if (plo_test_bit(i))
+ kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
+ }
+
+ if (test_facility(28)) /* TOD-clock steering */
+ ptff(kvm_s390_available_subfunc.ptff,
+ sizeof(kvm_s390_available_subfunc.ptff),
+ PTFF_QAF);
+
+ if (test_facility(17)) { /* MSA */
+ __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
+ kvm_s390_available_subfunc.kmac);
+ __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
+ kvm_s390_available_subfunc.kmc);
+ __cpacf_query(CPACF_KM, (cpacf_mask_t *)
+ kvm_s390_available_subfunc.km);
+ __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
+ kvm_s390_available_subfunc.kimd);
+ __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
+ kvm_s390_available_subfunc.klmd);
+ }
+ if (test_facility(76)) /* MSA3 */
+ __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
+ kvm_s390_available_subfunc.pckmo);
+ if (test_facility(77)) { /* MSA4 */
+ __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
+ kvm_s390_available_subfunc.kmctr);
+ __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
+ kvm_s390_available_subfunc.kmf);
+ __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
+ kvm_s390_available_subfunc.kmo);
+ __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
+ kvm_s390_available_subfunc.pcc);
+ }
+ if (test_facility(57)) /* MSA5 */
+ __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
+ kvm_s390_available_subfunc.ppno);
+
+ if (test_facility(146)) /* MSA8 */
+ __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
+ kvm_s390_available_subfunc.kma);
+
+ if (MACHINE_HAS_ESOP)
+ allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
+ /*
+ * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
+ * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
+ */
+ if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
+ !test_facility(3) || !nested)
+ return;
+ allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
+ if (sclp.has_64bscao)
+ allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
+ if (sclp.has_siif)
+ allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
+ if (sclp.has_gpere)
+ allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
+ if (sclp.has_gsls)
+ allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
+ if (sclp.has_ib)
+ allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
+ if (sclp.has_cei)
+ allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
+ if (sclp.has_ibs)
+ allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
+ if (sclp.has_kss)
+ allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
+ /*
+ * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
+ * all skey handling functions read/set the skey from the PGSTE
+ * instead of the real storage key.
+ *
+ * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
+ * pages being detected as preserved although they are resident.
+ *
+ * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
+ * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
+ *
+ * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
+ * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
+ * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
+ *
+ * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
+ * cannot easily shadow the SCA because of the ipte lock.
+ */
+}
+
+int kvm_arch_init(void *opaque)
+{
+ int rc;
+
+ kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
+ if (!kvm_s390_dbf)
+ return -ENOMEM;
+
+ if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
+ rc = -ENOMEM;
+ goto out_debug_unreg;
+ }
+
+ kvm_s390_cpu_feat_init();
+
+ /* Register floating interrupt controller interface. */
+ rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
+ if (rc) {
+ pr_err("Failed to register FLIC rc=%d\n", rc);
+ goto out_debug_unreg;
+ }
+ return 0;
+
+out_debug_unreg:
+ debug_unregister(kvm_s390_dbf);
+ return rc;
+}
+
+void kvm_arch_exit(void)
+{
+ debug_unregister(kvm_s390_dbf);
+}
+
+/* Section: device related */
+long kvm_arch_dev_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ if (ioctl == KVM_S390_ENABLE_SIE)
+ return s390_enable_sie();
+ return -EINVAL;
+}
+
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
+{
+ int r;
+
+ switch (ext) {
+ case KVM_CAP_S390_PSW:
+ case KVM_CAP_S390_GMAP:
+ case KVM_CAP_SYNC_MMU:
+#ifdef CONFIG_KVM_S390_UCONTROL
+ case KVM_CAP_S390_UCONTROL:
+#endif
+ case KVM_CAP_ASYNC_PF:
+ case KVM_CAP_SYNC_REGS:
+ case KVM_CAP_ONE_REG:
+ case KVM_CAP_ENABLE_CAP:
+ case KVM_CAP_S390_CSS_SUPPORT:
+ case KVM_CAP_IOEVENTFD:
+ case KVM_CAP_DEVICE_CTRL:
+ case KVM_CAP_ENABLE_CAP_VM:
+ case KVM_CAP_S390_IRQCHIP:
+ case KVM_CAP_VM_ATTRIBUTES:
+ case KVM_CAP_MP_STATE:
+ case KVM_CAP_IMMEDIATE_EXIT:
+ case KVM_CAP_S390_INJECT_IRQ:
+ case KVM_CAP_S390_USER_SIGP:
+ case KVM_CAP_S390_USER_STSI:
+ case KVM_CAP_S390_SKEYS:
+ case KVM_CAP_S390_IRQ_STATE:
+ case KVM_CAP_S390_USER_INSTR0:
+ case KVM_CAP_S390_CMMA_MIGRATION:
+ case KVM_CAP_S390_AIS:
+ case KVM_CAP_S390_AIS_MIGRATION:
+ r = 1;
+ break;
+ case KVM_CAP_S390_HPAGE_1M:
+ r = 0;
+ if (hpage && !kvm_is_ucontrol(kvm))
+ r = 1;
+ break;
+ case KVM_CAP_S390_MEM_OP:
+ r = MEM_OP_MAX_SIZE;
+ break;
+ case KVM_CAP_NR_VCPUS:
+ case KVM_CAP_MAX_VCPUS:
+ case KVM_CAP_MAX_VCPU_ID:
+ r = KVM_S390_BSCA_CPU_SLOTS;
+ if (!kvm_s390_use_sca_entries())
+ r = KVM_MAX_VCPUS;
+ else if (sclp.has_esca && sclp.has_64bscao)
+ r = KVM_S390_ESCA_CPU_SLOTS;
+ break;
+ case KVM_CAP_NR_MEMSLOTS:
+ r = KVM_USER_MEM_SLOTS;
+ break;
+ case KVM_CAP_S390_COW:
+ r = MACHINE_HAS_ESOP;
+ break;
+ case KVM_CAP_S390_VECTOR_REGISTERS:
+ r = MACHINE_HAS_VX;
+ break;
+ case KVM_CAP_S390_RI:
+ r = test_facility(64);
+ break;
+ case KVM_CAP_S390_GS:
+ r = test_facility(133);
+ break;
+ case KVM_CAP_S390_BPB:
+ r = test_facility(82);
+ break;
+ default:
+ r = 0;
+ }
+ return r;
+}
+
+static void kvm_s390_sync_dirty_log(struct kvm *kvm,
+ struct kvm_memory_slot *memslot)
+{
+ int i;
+ gfn_t cur_gfn, last_gfn;
+ unsigned long gaddr, vmaddr;
+ struct gmap *gmap = kvm->arch.gmap;
+ DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
+
+ /* Loop over all guest segments */
+ cur_gfn = memslot->base_gfn;
+ last_gfn = memslot->base_gfn + memslot->npages;
+ for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
+ gaddr = gfn_to_gpa(cur_gfn);
+ vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
+ if (kvm_is_error_hva(vmaddr))
+ continue;
+
+ bitmap_zero(bitmap, _PAGE_ENTRIES);
+ gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
+ for (i = 0; i < _PAGE_ENTRIES; i++) {
+ if (test_bit(i, bitmap))
+ mark_page_dirty(kvm, cur_gfn + i);
+ }
+
+ if (fatal_signal_pending(current))
+ return;
+ cond_resched();
+ }
+}
+
+/* Section: vm related */
+static void sca_del_vcpu(struct kvm_vcpu *vcpu);
+
+/*
+ * Get (and clear) the dirty memory log for a memory slot.
+ */
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
+ struct kvm_dirty_log *log)
+{
+ int r;
+ unsigned long n;
+ struct kvm_memslots *slots;
+ struct kvm_memory_slot *memslot;
+ int is_dirty = 0;
+
+ if (kvm_is_ucontrol(kvm))
+ return -EINVAL;
+
+ mutex_lock(&kvm->slots_lock);
+
+ r = -EINVAL;
+ if (log->slot >= KVM_USER_MEM_SLOTS)
+ goto out;
+
+ slots = kvm_memslots(kvm);
+ memslot = id_to_memslot(slots, log->slot);
+ r = -ENOENT;
+ if (!memslot->dirty_bitmap)
+ goto out;
+
+ kvm_s390_sync_dirty_log(kvm, memslot);
+ r = kvm_get_dirty_log(kvm, log, &is_dirty);
+ if (r)
+ goto out;
+
+ /* Clear the dirty log */
+ if (is_dirty) {
+ n = kvm_dirty_bitmap_bytes(memslot);
+ memset(memslot->dirty_bitmap, 0, n);
+ }
+ r = 0;
+out:
+ mutex_unlock(&kvm->slots_lock);
+ return r;
+}
+
+static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
+{
+ unsigned int i;
+ struct kvm_vcpu *vcpu;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
+ }
+}
+
+static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
+{
+ int r;
+
+ if (cap->flags)
+ return -EINVAL;
+
+ switch (cap->cap) {
+ case KVM_CAP_S390_IRQCHIP:
+ VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
+ kvm->arch.use_irqchip = 1;
+ r = 0;
+ break;
+ case KVM_CAP_S390_USER_SIGP:
+ VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
+ kvm->arch.user_sigp = 1;
+ r = 0;
+ break;
+ case KVM_CAP_S390_VECTOR_REGISTERS:
+ mutex_lock(&kvm->lock);
+ if (kvm->created_vcpus) {
+ r = -EBUSY;
+ } else if (MACHINE_HAS_VX) {
+ set_kvm_facility(kvm->arch.model.fac_mask, 129);
+ set_kvm_facility(kvm->arch.model.fac_list, 129);
+ if (test_facility(134)) {
+ set_kvm_facility(kvm->arch.model.fac_mask, 134);
+ set_kvm_facility(kvm->arch.model.fac_list, 134);
+ }
+ if (test_facility(135)) {
+ set_kvm_facility(kvm->arch.model.fac_mask, 135);
+ set_kvm_facility(kvm->arch.model.fac_list, 135);
+ }
+ r = 0;
+ } else
+ r = -EINVAL;
+ mutex_unlock(&kvm->lock);
+ VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
+ r ? "(not available)" : "(success)");
+ break;
+ case KVM_CAP_S390_RI:
+ r = -EINVAL;
+ mutex_lock(&kvm->lock);
+ if (kvm->created_vcpus) {
+ r = -EBUSY;
+ } else if (test_facility(64)) {
+ set_kvm_facility(kvm->arch.model.fac_mask, 64);
+ set_kvm_facility(kvm->arch.model.fac_list, 64);
+ r = 0;
+ }
+ mutex_unlock(&kvm->lock);
+ VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
+ r ? "(not available)" : "(success)");
+ break;
+ case KVM_CAP_S390_AIS:
+ mutex_lock(&kvm->lock);
+ if (kvm->created_vcpus) {
+ r = -EBUSY;
+ } else {
+ set_kvm_facility(kvm->arch.model.fac_mask, 72);
+ set_kvm_facility(kvm->arch.model.fac_list, 72);
+ r = 0;
+ }
+ mutex_unlock(&kvm->lock);
+ VM_EVENT(kvm, 3, "ENABLE: AIS %s",
+ r ? "(not available)" : "(success)");
+ break;
+ case KVM_CAP_S390_GS:
+ r = -EINVAL;
+ mutex_lock(&kvm->lock);
+ if (kvm->created_vcpus) {
+ r = -EBUSY;
+ } else if (test_facility(133)) {
+ set_kvm_facility(kvm->arch.model.fac_mask, 133);
+ set_kvm_facility(kvm->arch.model.fac_list, 133);
+ r = 0;
+ }
+ mutex_unlock(&kvm->lock);
+ VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
+ r ? "(not available)" : "(success)");
+ break;
+ case KVM_CAP_S390_HPAGE_1M:
+ mutex_lock(&kvm->lock);
+ if (kvm->created_vcpus)
+ r = -EBUSY;
+ else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
+ r = -EINVAL;
+ else {
+ r = 0;
+ down_write(&kvm->mm->mmap_sem);
+ kvm->mm->context.allow_gmap_hpage_1m = 1;
+ up_write(&kvm->mm->mmap_sem);
+ /*
+ * We might have to create fake 4k page
+ * tables. To avoid that the hardware works on
+ * stale PGSTEs, we emulate these instructions.
+ */
+ kvm->arch.use_skf = 0;
+ kvm->arch.use_pfmfi = 0;
+ }
+ mutex_unlock(&kvm->lock);
+ VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
+ r ? "(not available)" : "(success)");
+ break;
+ case KVM_CAP_S390_USER_STSI:
+ VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
+ kvm->arch.user_stsi = 1;
+ r = 0;
+ break;
+ case KVM_CAP_S390_USER_INSTR0:
+ VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
+ kvm->arch.user_instr0 = 1;
+ icpt_operexc_on_all_vcpus(kvm);
+ r = 0;
+ break;
+ default:
+ r = -EINVAL;
+ break;
+ }
+ return r;
+}
+
+static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ int ret;
+
+ switch (attr->attr) {
+ case KVM_S390_VM_MEM_LIMIT_SIZE:
+ ret = 0;
+ VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
+ kvm->arch.mem_limit);
+ if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
+ ret = -EFAULT;
+ break;
+ default:
+ ret = -ENXIO;
+ break;
+ }
+ return ret;
+}
+
+static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ int ret;
+ unsigned int idx;
+ switch (attr->attr) {
+ case KVM_S390_VM_MEM_ENABLE_CMMA:
+ ret = -ENXIO;
+ if (!sclp.has_cmma)
+ break;
+
+ VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
+ mutex_lock(&kvm->lock);
+ if (kvm->created_vcpus)
+ ret = -EBUSY;
+ else if (kvm->mm->context.allow_gmap_hpage_1m)
+ ret = -EINVAL;
+ else {
+ kvm->arch.use_cmma = 1;
+ /* Not compatible with cmma. */
+ kvm->arch.use_pfmfi = 0;
+ ret = 0;
+ }
+ mutex_unlock(&kvm->lock);
+ break;
+ case KVM_S390_VM_MEM_CLR_CMMA:
+ ret = -ENXIO;
+ if (!sclp.has_cmma)
+ break;
+ ret = -EINVAL;
+ if (!kvm->arch.use_cmma)
+ break;
+
+ VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
+ mutex_lock(&kvm->lock);
+ idx = srcu_read_lock(&kvm->srcu);
+ s390_reset_cmma(kvm->arch.gmap->mm);
+ srcu_read_unlock(&kvm->srcu, idx);
+ mutex_unlock(&kvm->lock);
+ ret = 0;
+ break;
+ case KVM_S390_VM_MEM_LIMIT_SIZE: {
+ unsigned long new_limit;
+
+ if (kvm_is_ucontrol(kvm))
+ return -EINVAL;
+
+ if (get_user(new_limit, (u64 __user *)attr->addr))
+ return -EFAULT;
+
+ if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
+ new_limit > kvm->arch.mem_limit)
+ return -E2BIG;
+
+ if (!new_limit)
+ return -EINVAL;
+
+ /* gmap_create takes last usable address */
+ if (new_limit != KVM_S390_NO_MEM_LIMIT)
+ new_limit -= 1;
+
+ ret = -EBUSY;
+ mutex_lock(&kvm->lock);
+ if (!kvm->created_vcpus) {
+ /* gmap_create will round the limit up */
+ struct gmap *new = gmap_create(current->mm, new_limit);
+
+ if (!new) {
+ ret = -ENOMEM;
+ } else {
+ gmap_remove(kvm->arch.gmap);
+ new->private = kvm;
+ kvm->arch.gmap = new;
+ ret = 0;
+ }
+ }
+ mutex_unlock(&kvm->lock);
+ VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
+ VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
+ (void *) kvm->arch.gmap->asce);
+ break;
+ }
+ default:
+ ret = -ENXIO;
+ break;
+ }
+ return ret;
+}
+
+static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
+
+void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
+{
+ struct kvm_vcpu *vcpu;
+ int i;
+
+ kvm_s390_vcpu_block_all(kvm);
+
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ kvm_s390_vcpu_crypto_setup(vcpu);
+
+ kvm_s390_vcpu_unblock_all(kvm);
+}
+
+static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ if (!test_kvm_facility(kvm, 76))
+ return -EINVAL;
+
+ mutex_lock(&kvm->lock);
+ switch (attr->attr) {
+ case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
+ get_random_bytes(
+ kvm->arch.crypto.crycb->aes_wrapping_key_mask,
+ sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
+ kvm->arch.crypto.aes_kw = 1;
+ VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
+ break;
+ case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
+ get_random_bytes(
+ kvm->arch.crypto.crycb->dea_wrapping_key_mask,
+ sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
+ kvm->arch.crypto.dea_kw = 1;
+ VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
+ break;
+ case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
+ kvm->arch.crypto.aes_kw = 0;
+ memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
+ sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
+ VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
+ break;
+ case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
+ kvm->arch.crypto.dea_kw = 0;
+ memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
+ sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
+ VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
+ break;
+ default:
+ mutex_unlock(&kvm->lock);
+ return -ENXIO;
+ }
+
+ kvm_s390_vcpu_crypto_reset_all(kvm);
+ mutex_unlock(&kvm->lock);
+ return 0;
+}
+
+static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
+{
+ int cx;
+ struct kvm_vcpu *vcpu;
+
+ kvm_for_each_vcpu(cx, vcpu, kvm)
+ kvm_s390_sync_request(req, vcpu);
+}
+
+/*
+ * Must be called with kvm->srcu held to avoid races on memslots, and with
+ * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
+ */
+static int kvm_s390_vm_start_migration(struct kvm *kvm)
+{
+ struct kvm_memory_slot *ms;
+ struct kvm_memslots *slots;
+ unsigned long ram_pages = 0;
+ int slotnr;
+
+ /* migration mode already enabled */
+ if (kvm->arch.migration_mode)
+ return 0;
+ slots = kvm_memslots(kvm);
+ if (!slots || !slots->used_slots)
+ return -EINVAL;
+
+ if (!kvm->arch.use_cmma) {
+ kvm->arch.migration_mode = 1;
+ return 0;
+ }
+ /* mark all the pages in active slots as dirty */
+ for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
+ ms = slots->memslots + slotnr;
+ if (!ms->dirty_bitmap)
+ return -EINVAL;
+ /*
+ * The second half of the bitmap is only used on x86,
+ * and would be wasted otherwise, so we put it to good
+ * use here to keep track of the state of the storage
+ * attributes.
+ */
+ memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
+ ram_pages += ms->npages;
+ }
+ atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
+ kvm->arch.migration_mode = 1;
+ kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
+ return 0;
+}
+
+/*
+ * Must be called with kvm->slots_lock to avoid races with ourselves and
+ * kvm_s390_vm_start_migration.
+ */
+static int kvm_s390_vm_stop_migration(struct kvm *kvm)
+{
+ /* migration mode already disabled */
+ if (!kvm->arch.migration_mode)
+ return 0;
+ kvm->arch.migration_mode = 0;
+ if (kvm->arch.use_cmma)
+ kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
+ return 0;
+}
+
+static int kvm_s390_vm_set_migration(struct kvm *kvm,
+ struct kvm_device_attr *attr)
+{
+ int res = -ENXIO;
+
+ mutex_lock(&kvm->slots_lock);
+ switch (attr->attr) {
+ case KVM_S390_VM_MIGRATION_START:
+ res = kvm_s390_vm_start_migration(kvm);
+ break;
+ case KVM_S390_VM_MIGRATION_STOP:
+ res = kvm_s390_vm_stop_migration(kvm);
+ break;
+ default:
+ break;
+ }
+ mutex_unlock(&kvm->slots_lock);
+
+ return res;
+}
+
+static int kvm_s390_vm_get_migration(struct kvm *kvm,
+ struct kvm_device_attr *attr)
+{
+ u64 mig = kvm->arch.migration_mode;
+
+ if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
+ return -ENXIO;
+
+ if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
+ return -EFAULT;
+ return 0;
+}
+
+static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ struct kvm_s390_vm_tod_clock gtod;
+
+ if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
+ return -EFAULT;
+
+ if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
+ return -EINVAL;
+ kvm_s390_set_tod_clock(kvm, &gtod);
+
+ VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
+ gtod.epoch_idx, gtod.tod);
+
+ return 0;
+}
+
+static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ u8 gtod_high;
+
+ if (copy_from_user(&gtod_high, (void __user *)attr->addr,
+ sizeof(gtod_high)))
+ return -EFAULT;
+
+ if (gtod_high != 0)
+ return -EINVAL;
+ VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
+
+ return 0;
+}
+
+static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ struct kvm_s390_vm_tod_clock gtod = { 0 };
+
+ if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
+ sizeof(gtod.tod)))
+ return -EFAULT;
+
+ kvm_s390_set_tod_clock(kvm, &gtod);
+ VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
+ return 0;
+}
+
+static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ int ret;
+
+ if (attr->flags)
+ return -EINVAL;
+
+ switch (attr->attr) {
+ case KVM_S390_VM_TOD_EXT:
+ ret = kvm_s390_set_tod_ext(kvm, attr);
+ break;
+ case KVM_S390_VM_TOD_HIGH:
+ ret = kvm_s390_set_tod_high(kvm, attr);
+ break;
+ case KVM_S390_VM_TOD_LOW:
+ ret = kvm_s390_set_tod_low(kvm, attr);
+ break;
+ default:
+ ret = -ENXIO;
+ break;
+ }
+ return ret;
+}
+
+static void kvm_s390_get_tod_clock(struct kvm *kvm,
+ struct kvm_s390_vm_tod_clock *gtod)
+{
+ struct kvm_s390_tod_clock_ext htod;
+
+ preempt_disable();
+
+ get_tod_clock_ext((char *)&htod);
+
+ gtod->tod = htod.tod + kvm->arch.epoch;
+ gtod->epoch_idx = 0;
+ if (test_kvm_facility(kvm, 139)) {
+ gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
+ if (gtod->tod < htod.tod)
+ gtod->epoch_idx += 1;
+ }
+
+ preempt_enable();
+}
+
+static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ struct kvm_s390_vm_tod_clock gtod;
+
+ memset(&gtod, 0, sizeof(gtod));
+ kvm_s390_get_tod_clock(kvm, &gtod);
+ if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
+ return -EFAULT;
+
+ VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
+ gtod.epoch_idx, gtod.tod);
+ return 0;
+}
+
+static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ u8 gtod_high = 0;
+
+ if (copy_to_user((void __user *)attr->addr, &gtod_high,
+ sizeof(gtod_high)))
+ return -EFAULT;
+ VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
+
+ return 0;
+}
+
+static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ u64 gtod;
+
+ gtod = kvm_s390_get_tod_clock_fast(kvm);
+ if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
+ return -EFAULT;
+ VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
+
+ return 0;
+}
+
+static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ int ret;
+
+ if (attr->flags)
+ return -EINVAL;
+
+ switch (attr->attr) {
+ case KVM_S390_VM_TOD_EXT:
+ ret = kvm_s390_get_tod_ext(kvm, attr);
+ break;
+ case KVM_S390_VM_TOD_HIGH:
+ ret = kvm_s390_get_tod_high(kvm, attr);
+ break;
+ case KVM_S390_VM_TOD_LOW:
+ ret = kvm_s390_get_tod_low(kvm, attr);
+ break;
+ default:
+ ret = -ENXIO;
+ break;
+ }
+ return ret;
+}
+
+static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ struct kvm_s390_vm_cpu_processor *proc;
+ u16 lowest_ibc, unblocked_ibc;
+ int ret = 0;
+
+ mutex_lock(&kvm->lock);
+ if (kvm->created_vcpus) {
+ ret = -EBUSY;
+ goto out;
+ }
+ proc = kzalloc(sizeof(*proc), GFP_KERNEL);
+ if (!proc) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ if (!copy_from_user(proc, (void __user *)attr->addr,
+ sizeof(*proc))) {
+ kvm->arch.model.cpuid = proc->cpuid;
+ lowest_ibc = sclp.ibc >> 16 & 0xfff;
+ unblocked_ibc = sclp.ibc & 0xfff;
+ if (lowest_ibc && proc->ibc) {
+ if (proc->ibc > unblocked_ibc)
+ kvm->arch.model.ibc = unblocked_ibc;
+ else if (proc->ibc < lowest_ibc)
+ kvm->arch.model.ibc = lowest_ibc;
+ else
+ kvm->arch.model.ibc = proc->ibc;
+ }
+ memcpy(kvm->arch.model.fac_list, proc->fac_list,
+ S390_ARCH_FAC_LIST_SIZE_BYTE);
+ VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
+ kvm->arch.model.ibc,
+ kvm->arch.model.cpuid);
+ VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
+ kvm->arch.model.fac_list[0],
+ kvm->arch.model.fac_list[1],
+ kvm->arch.model.fac_list[2]);
+ } else
+ ret = -EFAULT;
+ kfree(proc);
+out:
+ mutex_unlock(&kvm->lock);
+ return ret;
+}
+
+static int kvm_s390_set_processor_feat(struct kvm *kvm,
+ struct kvm_device_attr *attr)
+{
+ struct kvm_s390_vm_cpu_feat data;
+
+ if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
+ return -EFAULT;
+ if (!bitmap_subset((unsigned long *) data.feat,
+ kvm_s390_available_cpu_feat,
+ KVM_S390_VM_CPU_FEAT_NR_BITS))
+ return -EINVAL;
+
+ mutex_lock(&kvm->lock);
+ if (kvm->created_vcpus) {
+ mutex_unlock(&kvm->lock);
+ return -EBUSY;
+ }
+ bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
+ KVM_S390_VM_CPU_FEAT_NR_BITS);
+ mutex_unlock(&kvm->lock);
+ VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
+ data.feat[0],
+ data.feat[1],
+ data.feat[2]);
+ return 0;
+}
+
+static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
+ struct kvm_device_attr *attr)
+{
+ /*
+ * Once supported by kernel + hw, we have to store the subfunctions
+ * in kvm->arch and remember that user space configured them.
+ */
+ return -ENXIO;
+}
+
+static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ int ret = -ENXIO;
+
+ switch (attr->attr) {
+ case KVM_S390_VM_CPU_PROCESSOR:
+ ret = kvm_s390_set_processor(kvm, attr);
+ break;
+ case KVM_S390_VM_CPU_PROCESSOR_FEAT:
+ ret = kvm_s390_set_processor_feat(kvm, attr);
+ break;
+ case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
+ ret = kvm_s390_set_processor_subfunc(kvm, attr);
+ break;
+ }
+ return ret;
+}
+
+static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ struct kvm_s390_vm_cpu_processor *proc;
+ int ret = 0;
+
+ proc = kzalloc(sizeof(*proc), GFP_KERNEL);
+ if (!proc) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ proc->cpuid = kvm->arch.model.cpuid;
+ proc->ibc = kvm->arch.model.ibc;
+ memcpy(&proc->fac_list, kvm->arch.model.fac_list,
+ S390_ARCH_FAC_LIST_SIZE_BYTE);
+ VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
+ kvm->arch.model.ibc,
+ kvm->arch.model.cpuid);
+ VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
+ kvm->arch.model.fac_list[0],
+ kvm->arch.model.fac_list[1],
+ kvm->arch.model.fac_list[2]);
+ if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
+ ret = -EFAULT;
+ kfree(proc);
+out:
+ return ret;
+}
+
+static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ struct kvm_s390_vm_cpu_machine *mach;
+ int ret = 0;
+
+ mach = kzalloc(sizeof(*mach), GFP_KERNEL);
+ if (!mach) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ get_cpu_id((struct cpuid *) &mach->cpuid);
+ mach->ibc = sclp.ibc;
+ memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
+ S390_ARCH_FAC_LIST_SIZE_BYTE);
+ memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
+ sizeof(S390_lowcore.stfle_fac_list));
+ VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
+ kvm->arch.model.ibc,
+ kvm->arch.model.cpuid);
+ VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
+ mach->fac_mask[0],
+ mach->fac_mask[1],
+ mach->fac_mask[2]);
+ VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
+ mach->fac_list[0],
+ mach->fac_list[1],
+ mach->fac_list[2]);
+ if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
+ ret = -EFAULT;
+ kfree(mach);
+out:
+ return ret;
+}
+
+static int kvm_s390_get_processor_feat(struct kvm *kvm,
+ struct kvm_device_attr *attr)
+{
+ struct kvm_s390_vm_cpu_feat data;
+
+ bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
+ KVM_S390_VM_CPU_FEAT_NR_BITS);
+ if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
+ return -EFAULT;
+ VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
+ data.feat[0],
+ data.feat[1],
+ data.feat[2]);
+ return 0;
+}
+
+static int kvm_s390_get_machine_feat(struct kvm *kvm,
+ struct kvm_device_attr *attr)
+{
+ struct kvm_s390_vm_cpu_feat data;
+
+ bitmap_copy((unsigned long *) data.feat,
+ kvm_s390_available_cpu_feat,
+ KVM_S390_VM_CPU_FEAT_NR_BITS);
+ if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
+ return -EFAULT;
+ VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
+ data.feat[0],
+ data.feat[1],
+ data.feat[2]);
+ return 0;
+}
+
+static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
+ struct kvm_device_attr *attr)
+{
+ /*
+ * Once we can actually configure subfunctions (kernel + hw support),
+ * we have to check if they were already set by user space, if so copy
+ * them from kvm->arch.
+ */
+ return -ENXIO;
+}
+
+static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
+ struct kvm_device_attr *attr)
+{
+ if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
+ sizeof(struct kvm_s390_vm_cpu_subfunc)))
+ return -EFAULT;
+ return 0;
+}
+static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ int ret = -ENXIO;
+
+ switch (attr->attr) {
+ case KVM_S390_VM_CPU_PROCESSOR:
+ ret = kvm_s390_get_processor(kvm, attr);
+ break;
+ case KVM_S390_VM_CPU_MACHINE:
+ ret = kvm_s390_get_machine(kvm, attr);
+ break;
+ case KVM_S390_VM_CPU_PROCESSOR_FEAT:
+ ret = kvm_s390_get_processor_feat(kvm, attr);
+ break;
+ case KVM_S390_VM_CPU_MACHINE_FEAT:
+ ret = kvm_s390_get_machine_feat(kvm, attr);
+ break;
+ case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
+ ret = kvm_s390_get_processor_subfunc(kvm, attr);
+ break;
+ case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
+ ret = kvm_s390_get_machine_subfunc(kvm, attr);
+ break;
+ }
+ return ret;
+}
+
+static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ int ret;
+
+ switch (attr->group) {
+ case KVM_S390_VM_MEM_CTRL:
+ ret = kvm_s390_set_mem_control(kvm, attr);
+ break;
+ case KVM_S390_VM_TOD:
+ ret = kvm_s390_set_tod(kvm, attr);
+ break;
+ case KVM_S390_VM_CPU_MODEL:
+ ret = kvm_s390_set_cpu_model(kvm, attr);
+ break;
+ case KVM_S390_VM_CRYPTO:
+ ret = kvm_s390_vm_set_crypto(kvm, attr);
+ break;
+ case KVM_S390_VM_MIGRATION:
+ ret = kvm_s390_vm_set_migration(kvm, attr);
+ break;
+ default:
+ ret = -ENXIO;
+ break;
+ }
+
+ return ret;
+}
+
+static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ int ret;
+
+ switch (attr->group) {
+ case KVM_S390_VM_MEM_CTRL:
+ ret = kvm_s390_get_mem_control(kvm, attr);
+ break;
+ case KVM_S390_VM_TOD:
+ ret = kvm_s390_get_tod(kvm, attr);
+ break;
+ case KVM_S390_VM_CPU_MODEL:
+ ret = kvm_s390_get_cpu_model(kvm, attr);
+ break;
+ case KVM_S390_VM_MIGRATION:
+ ret = kvm_s390_vm_get_migration(kvm, attr);
+ break;
+ default:
+ ret = -ENXIO;
+ break;
+ }
+
+ return ret;
+}
+
+static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ int ret;
+
+ switch (attr->group) {
+ case KVM_S390_VM_MEM_CTRL:
+ switch (attr->attr) {
+ case KVM_S390_VM_MEM_ENABLE_CMMA:
+ case KVM_S390_VM_MEM_CLR_CMMA:
+ ret = sclp.has_cmma ? 0 : -ENXIO;
+ break;
+ case KVM_S390_VM_MEM_LIMIT_SIZE:
+ ret = 0;
+ break;
+ default:
+ ret = -ENXIO;
+ break;
+ }
+ break;
+ case KVM_S390_VM_TOD:
+ switch (attr->attr) {
+ case KVM_S390_VM_TOD_LOW:
+ case KVM_S390_VM_TOD_HIGH:
+ ret = 0;
+ break;
+ default:
+ ret = -ENXIO;
+ break;
+ }
+ break;
+ case KVM_S390_VM_CPU_MODEL:
+ switch (attr->attr) {
+ case KVM_S390_VM_CPU_PROCESSOR:
+ case KVM_S390_VM_CPU_MACHINE:
+ case KVM_S390_VM_CPU_PROCESSOR_FEAT:
+ case KVM_S390_VM_CPU_MACHINE_FEAT:
+ case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
+ ret = 0;
+ break;
+ /* configuring subfunctions is not supported yet */
+ case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
+ default:
+ ret = -ENXIO;
+ break;
+ }
+ break;
+ case KVM_S390_VM_CRYPTO:
+ switch (attr->attr) {
+ case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
+ case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
+ case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
+ case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
+ ret = 0;
+ break;
+ default:
+ ret = -ENXIO;
+ break;
+ }
+ break;
+ case KVM_S390_VM_MIGRATION:
+ ret = 0;
+ break;
+ default:
+ ret = -ENXIO;
+ break;
+ }
+
+ return ret;
+}
+
+static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
+{
+ uint8_t *keys;
+ uint64_t hva;
+ int srcu_idx, i, r = 0;
+
+ if (args->flags != 0)
+ return -EINVAL;
+
+ /* Is this guest using storage keys? */
+ if (!mm_uses_skeys(current->mm))
+ return KVM_S390_GET_SKEYS_NONE;
+
+ /* Enforce sane limit on memory allocation */
+ if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
+ return -EINVAL;
+
+ keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
+ if (!keys)
+ return -ENOMEM;
+
+ down_read(&current->mm->mmap_sem);
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ for (i = 0; i < args->count; i++) {
+ hva = gfn_to_hva(kvm, args->start_gfn + i);
+ if (kvm_is_error_hva(hva)) {
+ r = -EFAULT;
+ break;
+ }
+
+ r = get_guest_storage_key(current->mm, hva, &keys[i]);
+ if (r)
+ break;
+ }
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ up_read(&current->mm->mmap_sem);
+
+ if (!r) {
+ r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
+ sizeof(uint8_t) * args->count);
+ if (r)
+ r = -EFAULT;
+ }
+
+ kvfree(keys);
+ return r;
+}
+
+static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
+{
+ uint8_t *keys;
+ uint64_t hva;
+ int srcu_idx, i, r = 0;
+ bool unlocked;
+
+ if (args->flags != 0)
+ return -EINVAL;
+
+ /* Enforce sane limit on memory allocation */
+ if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
+ return -EINVAL;
+
+ keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
+ if (!keys)
+ return -ENOMEM;
+
+ r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
+ sizeof(uint8_t) * args->count);
+ if (r) {
+ r = -EFAULT;
+ goto out;
+ }
+
+ /* Enable storage key handling for the guest */
+ r = s390_enable_skey();
+ if (r)
+ goto out;
+
+ i = 0;
+ down_read(&current->mm->mmap_sem);
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ while (i < args->count) {
+ unlocked = false;
+ hva = gfn_to_hva(kvm, args->start_gfn + i);
+ if (kvm_is_error_hva(hva)) {
+ r = -EFAULT;
+ break;
+ }
+
+ /* Lowest order bit is reserved */
+ if (keys[i] & 0x01) {
+ r = -EINVAL;
+ break;
+ }
+
+ r = set_guest_storage_key(current->mm, hva, keys[i], 0);
+ if (r) {
+ r = fixup_user_fault(current, current->mm, hva,
+ FAULT_FLAG_WRITE, &unlocked);
+ if (r)
+ break;
+ }
+ if (!r)
+ i++;
+ }
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ up_read(&current->mm->mmap_sem);
+out:
+ kvfree(keys);
+ return r;
+}
+
+/*
+ * Base address and length must be sent at the start of each block, therefore
+ * it's cheaper to send some clean data, as long as it's less than the size of
+ * two longs.
+ */
+#define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
+/* for consistency */
+#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
+
+/*
+ * Similar to gfn_to_memslot, but returns the index of a memslot also when the
+ * address falls in a hole. In that case the index of one of the memslots
+ * bordering the hole is returned.
+ */
+static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
+{
+ int start = 0, end = slots->used_slots;
+ int slot = atomic_read(&slots->lru_slot);
+ struct kvm_memory_slot *memslots = slots->memslots;
+
+ if (gfn >= memslots[slot].base_gfn &&
+ gfn < memslots[slot].base_gfn + memslots[slot].npages)
+ return slot;
+
+ while (start < end) {
+ slot = start + (end - start) / 2;
+
+ if (gfn >= memslots[slot].base_gfn)
+ end = slot;
+ else
+ start = slot + 1;
+ }
+
+ if (start >= slots->used_slots)
+ return slots->used_slots - 1;
+
+ if (gfn >= memslots[start].base_gfn &&
+ gfn < memslots[start].base_gfn + memslots[start].npages) {
+ atomic_set(&slots->lru_slot, start);
+ }
+
+ return start;
+}
+
+static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
+ u8 *res, unsigned long bufsize)
+{
+ unsigned long pgstev, hva, cur_gfn = args->start_gfn;
+
+ args->count = 0;
+ while (args->count < bufsize) {
+ hva = gfn_to_hva(kvm, cur_gfn);
+ /*
+ * We return an error if the first value was invalid, but we
+ * return successfully if at least one value was copied.
+ */
+ if (kvm_is_error_hva(hva))
+ return args->count ? 0 : -EFAULT;
+ if (get_pgste(kvm->mm, hva, &pgstev) < 0)
+ pgstev = 0;
+ res[args->count++] = (pgstev >> 24) & 0x43;
+ cur_gfn++;
+ }
+
+ return 0;
+}
+
+static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
+ unsigned long cur_gfn)
+{
+ int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
+ struct kvm_memory_slot *ms = slots->memslots + slotidx;
+ unsigned long ofs = cur_gfn - ms->base_gfn;
+
+ if (ms->base_gfn + ms->npages <= cur_gfn) {
+ slotidx--;
+ /* If we are above the highest slot, wrap around */
+ if (slotidx < 0)
+ slotidx = slots->used_slots - 1;
+
+ ms = slots->memslots + slotidx;
+ ofs = 0;
+ }
+ ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
+ while ((slotidx > 0) && (ofs >= ms->npages)) {
+ slotidx--;
+ ms = slots->memslots + slotidx;
+ ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
+ }
+ return ms->base_gfn + ofs;
+}
+
+static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
+ u8 *res, unsigned long bufsize)
+{
+ unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
+ struct kvm_memslots *slots = kvm_memslots(kvm);
+ struct kvm_memory_slot *ms;
+
+ cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
+ ms = gfn_to_memslot(kvm, cur_gfn);
+ args->count = 0;
+ args->start_gfn = cur_gfn;
+ if (!ms)
+ return 0;
+ next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
+ mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
+
+ while (args->count < bufsize) {
+ hva = gfn_to_hva(kvm, cur_gfn);
+ if (kvm_is_error_hva(hva))
+ return 0;
+ /* Decrement only if we actually flipped the bit to 0 */
+ if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
+ atomic64_dec(&kvm->arch.cmma_dirty_pages);
+ if (get_pgste(kvm->mm, hva, &pgstev) < 0)
+ pgstev = 0;
+ /* Save the value */
+ res[args->count++] = (pgstev >> 24) & 0x43;
+ /* If the next bit is too far away, stop. */
+ if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
+ return 0;
+ /* If we reached the previous "next", find the next one */
+ if (cur_gfn == next_gfn)
+ next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
+ /* Reached the end of memory or of the buffer, stop */
+ if ((next_gfn >= mem_end) ||
+ (next_gfn - args->start_gfn >= bufsize))
+ return 0;
+ cur_gfn++;
+ /* Reached the end of the current memslot, take the next one. */
+ if (cur_gfn - ms->base_gfn >= ms->npages) {
+ ms = gfn_to_memslot(kvm, cur_gfn);
+ if (!ms)
+ return 0;
+ }
+ }
+ return 0;
+}
+
+/*
+ * This function searches for the next page with dirty CMMA attributes, and
+ * saves the attributes in the buffer up to either the end of the buffer or
+ * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
+ * no trailing clean bytes are saved.
+ * In case no dirty bits were found, or if CMMA was not enabled or used, the
+ * output buffer will indicate 0 as length.
+ */
+static int kvm_s390_get_cmma_bits(struct kvm *kvm,
+ struct kvm_s390_cmma_log *args)
+{
+ unsigned long bufsize;
+ int srcu_idx, peek, ret;
+ u8 *values;
+
+ if (!kvm->arch.use_cmma)
+ return -ENXIO;
+ /* Invalid/unsupported flags were specified */
+ if (args->flags & ~KVM_S390_CMMA_PEEK)
+ return -EINVAL;
+ /* Migration mode query, and we are not doing a migration */
+ peek = !!(args->flags & KVM_S390_CMMA_PEEK);
+ if (!peek && !kvm->arch.migration_mode)
+ return -EINVAL;
+ /* CMMA is disabled or was not used, or the buffer has length zero */
+ bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
+ if (!bufsize || !kvm->mm->context.uses_cmm) {
+ memset(args, 0, sizeof(*args));
+ return 0;
+ }
+ /* We are not peeking, and there are no dirty pages */
+ if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
+ memset(args, 0, sizeof(*args));
+ return 0;
+ }
+
+ values = vmalloc(bufsize);
+ if (!values)
+ return -ENOMEM;
+
+ down_read(&kvm->mm->mmap_sem);
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ if (peek)
+ ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
+ else
+ ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ up_read(&kvm->mm->mmap_sem);
+
+ if (kvm->arch.migration_mode)
+ args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
+ else
+ args->remaining = 0;
+
+ if (copy_to_user((void __user *)args->values, values, args->count))
+ ret = -EFAULT;
+
+ vfree(values);
+ return ret;
+}
+
+/*
+ * This function sets the CMMA attributes for the given pages. If the input
+ * buffer has zero length, no action is taken, otherwise the attributes are
+ * set and the mm->context.uses_cmm flag is set.
+ */
+static int kvm_s390_set_cmma_bits(struct kvm *kvm,
+ const struct kvm_s390_cmma_log *args)
+{
+ unsigned long hva, mask, pgstev, i;
+ uint8_t *bits;
+ int srcu_idx, r = 0;
+
+ mask = args->mask;
+
+ if (!kvm->arch.use_cmma)
+ return -ENXIO;
+ /* invalid/unsupported flags */
+ if (args->flags != 0)
+ return -EINVAL;
+ /* Enforce sane limit on memory allocation */
+ if (args->count > KVM_S390_CMMA_SIZE_MAX)
+ return -EINVAL;
+ /* Nothing to do */
+ if (args->count == 0)
+ return 0;
+
+ bits = vmalloc(array_size(sizeof(*bits), args->count));
+ if (!bits)
+ return -ENOMEM;
+
+ r = copy_from_user(bits, (void __user *)args->values, args->count);
+ if (r) {
+ r = -EFAULT;
+ goto out;
+ }
+
+ down_read(&kvm->mm->mmap_sem);
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ for (i = 0; i < args->count; i++) {
+ hva = gfn_to_hva(kvm, args->start_gfn + i);
+ if (kvm_is_error_hva(hva)) {
+ r = -EFAULT;
+ break;
+ }
+
+ pgstev = bits[i];
+ pgstev = pgstev << 24;
+ mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
+ set_pgste_bits(kvm->mm, hva, mask, pgstev);
+ }
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ up_read(&kvm->mm->mmap_sem);
+
+ if (!kvm->mm->context.uses_cmm) {
+ down_write(&kvm->mm->mmap_sem);
+ kvm->mm->context.uses_cmm = 1;
+ up_write(&kvm->mm->mmap_sem);
+ }
+out:
+ vfree(bits);
+ return r;
+}
+
+long kvm_arch_vm_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ struct kvm *kvm = filp->private_data;
+ void __user *argp = (void __user *)arg;
+ struct kvm_device_attr attr;
+ int r;
+
+ switch (ioctl) {
+ case KVM_S390_INTERRUPT: {
+ struct kvm_s390_interrupt s390int;
+
+ r = -EFAULT;
+ if (copy_from_user(&s390int, argp, sizeof(s390int)))
+ break;
+ r = kvm_s390_inject_vm(kvm, &s390int);
+ break;
+ }
+ case KVM_ENABLE_CAP: {
+ struct kvm_enable_cap cap;
+ r = -EFAULT;
+ if (copy_from_user(&cap, argp, sizeof(cap)))
+ break;
+ r = kvm_vm_ioctl_enable_cap(kvm, &cap);
+ break;
+ }
+ case KVM_CREATE_IRQCHIP: {
+ struct kvm_irq_routing_entry routing;
+
+ r = -EINVAL;
+ if (kvm->arch.use_irqchip) {
+ /* Set up dummy routing. */
+ memset(&routing, 0, sizeof(routing));
+ r = kvm_set_irq_routing(kvm, &routing, 0, 0);
+ }
+ break;
+ }
+ case KVM_SET_DEVICE_ATTR: {
+ r = -EFAULT;
+ if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
+ break;
+ r = kvm_s390_vm_set_attr(kvm, &attr);
+ break;
+ }
+ case KVM_GET_DEVICE_ATTR: {
+ r = -EFAULT;
+ if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
+ break;
+ r = kvm_s390_vm_get_attr(kvm, &attr);
+ break;
+ }
+ case KVM_HAS_DEVICE_ATTR: {
+ r = -EFAULT;
+ if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
+ break;
+ r = kvm_s390_vm_has_attr(kvm, &attr);
+ break;
+ }
+ case KVM_S390_GET_SKEYS: {
+ struct kvm_s390_skeys args;
+
+ r = -EFAULT;
+ if (copy_from_user(&args, argp,
+ sizeof(struct kvm_s390_skeys)))
+ break;
+ r = kvm_s390_get_skeys(kvm, &args);
+ break;
+ }
+ case KVM_S390_SET_SKEYS: {
+ struct kvm_s390_skeys args;
+
+ r = -EFAULT;
+ if (copy_from_user(&args, argp,
+ sizeof(struct kvm_s390_skeys)))
+ break;
+ r = kvm_s390_set_skeys(kvm, &args);
+ break;
+ }
+ case KVM_S390_GET_CMMA_BITS: {
+ struct kvm_s390_cmma_log args;
+
+ r = -EFAULT;
+ if (copy_from_user(&args, argp, sizeof(args)))
+ break;
+ mutex_lock(&kvm->slots_lock);
+ r = kvm_s390_get_cmma_bits(kvm, &args);
+ mutex_unlock(&kvm->slots_lock);
+ if (!r) {
+ r = copy_to_user(argp, &args, sizeof(args));
+ if (r)
+ r = -EFAULT;
+ }
+ break;
+ }
+ case KVM_S390_SET_CMMA_BITS: {
+ struct kvm_s390_cmma_log args;
+
+ r = -EFAULT;
+ if (copy_from_user(&args, argp, sizeof(args)))
+ break;
+ mutex_lock(&kvm->slots_lock);
+ r = kvm_s390_set_cmma_bits(kvm, &args);
+ mutex_unlock(&kvm->slots_lock);
+ break;
+ }
+ default:
+ r = -ENOTTY;
+ }
+
+ return r;
+}
+
+static int kvm_s390_query_ap_config(u8 *config)
+{
+ u32 fcn_code = 0x04000000UL;
+ u32 cc = 0;
+
+ memset(config, 0, 128);
+ asm volatile(
+ "lgr 0,%1\n"
+ "lgr 2,%2\n"
+ ".long 0xb2af0000\n" /* PQAP(QCI) */
+ "0: ipm %0\n"
+ "srl %0,28\n"
+ "1:\n"
+ EX_TABLE(0b, 1b)
+ : "+r" (cc)
+ : "r" (fcn_code), "r" (config)
+ : "cc", "0", "2", "memory"
+ );
+
+ return cc;
+}
+
+static int kvm_s390_apxa_installed(void)
+{
+ u8 config[128];
+ int cc;
+
+ if (test_facility(12)) {
+ cc = kvm_s390_query_ap_config(config);
+
+ if (cc)
+ pr_err("PQAP(QCI) failed with cc=%d", cc);
+ else
+ return config[0] & 0x40;
+ }
+
+ return 0;
+}
+
+static void kvm_s390_set_crycb_format(struct kvm *kvm)
+{
+ kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
+
+ if (kvm_s390_apxa_installed())
+ kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
+ else
+ kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
+}
+
+static u64 kvm_s390_get_initial_cpuid(void)
+{
+ struct cpuid cpuid;
+
+ get_cpu_id(&cpuid);
+ cpuid.version = 0xff;
+ return *((u64 *) &cpuid);
+}
+
+static void kvm_s390_crypto_init(struct kvm *kvm)
+{
+ if (!test_kvm_facility(kvm, 76))
+ return;
+
+ kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
+ kvm_s390_set_crycb_format(kvm);
+
+ /* Enable AES/DEA protected key functions by default */
+ kvm->arch.crypto.aes_kw = 1;
+ kvm->arch.crypto.dea_kw = 1;
+ get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
+ sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
+ get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
+ sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
+}
+
+static void sca_dispose(struct kvm *kvm)
+{
+ if (kvm->arch.use_esca)
+ free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
+ else
+ free_page((unsigned long)(kvm->arch.sca));
+ kvm->arch.sca = NULL;
+}
+
+int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
+{
+ gfp_t alloc_flags = GFP_KERNEL;
+ int i, rc;
+ char debug_name[16];
+ static unsigned long sca_offset;
+
+ rc = -EINVAL;
+#ifdef CONFIG_KVM_S390_UCONTROL
+ if (type & ~KVM_VM_S390_UCONTROL)
+ goto out_err;
+ if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
+ goto out_err;
+#else
+ if (type)
+ goto out_err;
+#endif
+
+ rc = s390_enable_sie();
+ if (rc)
+ goto out_err;
+
+ rc = -ENOMEM;
+
+ if (!sclp.has_64bscao)
+ alloc_flags |= GFP_DMA;
+ rwlock_init(&kvm->arch.sca_lock);
+ /* start with basic SCA */
+ kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
+ if (!kvm->arch.sca)
+ goto out_err;
+ mutex_lock(&kvm_lock);
+ sca_offset += 16;
+ if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
+ sca_offset = 0;
+ kvm->arch.sca = (struct bsca_block *)
+ ((char *) kvm->arch.sca + sca_offset);
+ mutex_unlock(&kvm_lock);
+
+ sprintf(debug_name, "kvm-%u", current->pid);
+
+ kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
+ if (!kvm->arch.dbf)
+ goto out_err;
+
+ BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
+ kvm->arch.sie_page2 =
+ (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
+ if (!kvm->arch.sie_page2)
+ goto out_err;
+
+ kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
+
+ for (i = 0; i < kvm_s390_fac_size(); i++) {
+ kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
+ (kvm_s390_fac_base[i] |
+ kvm_s390_fac_ext[i]);
+ kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
+ kvm_s390_fac_base[i];
+ }
+
+ /* we are always in czam mode - even on pre z14 machines */
+ set_kvm_facility(kvm->arch.model.fac_mask, 138);
+ set_kvm_facility(kvm->arch.model.fac_list, 138);
+ /* we emulate STHYI in kvm */
+ set_kvm_facility(kvm->arch.model.fac_mask, 74);
+ set_kvm_facility(kvm->arch.model.fac_list, 74);
+ if (MACHINE_HAS_TLB_GUEST) {
+ set_kvm_facility(kvm->arch.model.fac_mask, 147);
+ set_kvm_facility(kvm->arch.model.fac_list, 147);
+ }
+
+ kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
+ kvm->arch.model.ibc = sclp.ibc & 0x0fff;
+
+ kvm_s390_crypto_init(kvm);
+
+ mutex_init(&kvm->arch.float_int.ais_lock);
+ spin_lock_init(&kvm->arch.float_int.lock);
+ for (i = 0; i < FIRQ_LIST_COUNT; i++)
+ INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
+ init_waitqueue_head(&kvm->arch.ipte_wq);
+ mutex_init(&kvm->arch.ipte_mutex);
+
+ debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
+ VM_EVENT(kvm, 3, "vm created with type %lu", type);
+
+ if (type & KVM_VM_S390_UCONTROL) {
+ kvm->arch.gmap = NULL;
+ kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
+ } else {
+ if (sclp.hamax == U64_MAX)
+ kvm->arch.mem_limit = TASK_SIZE_MAX;
+ else
+ kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
+ sclp.hamax + 1);
+ kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
+ if (!kvm->arch.gmap)
+ goto out_err;
+ kvm->arch.gmap->private = kvm;
+ kvm->arch.gmap->pfault_enabled = 0;
+ }
+
+ kvm->arch.use_pfmfi = sclp.has_pfmfi;
+ kvm->arch.use_skf = sclp.has_skey;
+ spin_lock_init(&kvm->arch.start_stop_lock);
+ kvm_s390_vsie_init(kvm);
+ kvm_s390_gisa_init(kvm);
+ KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
+
+ return 0;
+out_err:
+ free_page((unsigned long)kvm->arch.sie_page2);
+ debug_unregister(kvm->arch.dbf);
+ sca_dispose(kvm);
+ KVM_EVENT(3, "creation of vm failed: %d", rc);
+ return rc;
+}
+
+bool kvm_arch_has_vcpu_debugfs(void)
+{
+ return false;
+}
+
+int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
+{
+ return 0;
+}
+
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+ VCPU_EVENT(vcpu, 3, "%s", "free cpu");
+ trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
+ kvm_s390_clear_local_irqs(vcpu);
+ kvm_clear_async_pf_completion_queue(vcpu);
+ if (!kvm_is_ucontrol(vcpu->kvm))
+ sca_del_vcpu(vcpu);
+
+ if (kvm_is_ucontrol(vcpu->kvm))
+ gmap_remove(vcpu->arch.gmap);
+
+ if (vcpu->kvm->arch.use_cmma)
+ kvm_s390_vcpu_unsetup_cmma(vcpu);
+ free_page((unsigned long)(vcpu->arch.sie_block));
+
+ kvm_vcpu_uninit(vcpu);
+ kmem_cache_free(kvm_vcpu_cache, vcpu);
+}
+
+static void kvm_free_vcpus(struct kvm *kvm)
+{
+ unsigned int i;
+ struct kvm_vcpu *vcpu;
+
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ kvm_arch_vcpu_destroy(vcpu);
+
+ mutex_lock(&kvm->lock);
+ for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
+ kvm->vcpus[i] = NULL;
+
+ atomic_set(&kvm->online_vcpus, 0);
+ mutex_unlock(&kvm->lock);
+}
+
+void kvm_arch_destroy_vm(struct kvm *kvm)
+{
+ kvm_free_vcpus(kvm);
+ sca_dispose(kvm);
+ debug_unregister(kvm->arch.dbf);
+ kvm_s390_gisa_destroy(kvm);
+ free_page((unsigned long)kvm->arch.sie_page2);
+ if (!kvm_is_ucontrol(kvm))
+ gmap_remove(kvm->arch.gmap);
+ kvm_s390_destroy_adapters(kvm);
+ kvm_s390_clear_float_irqs(kvm);
+ kvm_s390_vsie_destroy(kvm);
+ KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
+}
+
+/* Section: vcpu related */
+static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.gmap = gmap_create(current->mm, -1UL);
+ if (!vcpu->arch.gmap)
+ return -ENOMEM;
+ vcpu->arch.gmap->private = vcpu->kvm;
+
+ return 0;
+}
+
+static void sca_del_vcpu(struct kvm_vcpu *vcpu)
+{
+ if (!kvm_s390_use_sca_entries())
+ return;
+ read_lock(&vcpu->kvm->arch.sca_lock);
+ if (vcpu->kvm->arch.use_esca) {
+ struct esca_block *sca = vcpu->kvm->arch.sca;
+
+ clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
+ sca->cpu[vcpu->vcpu_id].sda = 0;
+ } else {
+ struct bsca_block *sca = vcpu->kvm->arch.sca;
+
+ clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
+ sca->cpu[vcpu->vcpu_id].sda = 0;
+ }
+ read_unlock(&vcpu->kvm->arch.sca_lock);
+}
+
+static void sca_add_vcpu(struct kvm_vcpu *vcpu)
+{
+ if (!kvm_s390_use_sca_entries()) {
+ struct bsca_block *sca = vcpu->kvm->arch.sca;
+
+ /* we still need the basic sca for the ipte control */
+ vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
+ vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
+ return;
+ }
+ read_lock(&vcpu->kvm->arch.sca_lock);
+ if (vcpu->kvm->arch.use_esca) {
+ struct esca_block *sca = vcpu->kvm->arch.sca;
+
+ sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
+ vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
+ vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
+ vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
+ set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
+ } else {
+ struct bsca_block *sca = vcpu->kvm->arch.sca;
+
+ sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
+ vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
+ vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
+ set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
+ }
+ read_unlock(&vcpu->kvm->arch.sca_lock);
+}
+
+/* Basic SCA to Extended SCA data copy routines */
+static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
+{
+ d->sda = s->sda;
+ d->sigp_ctrl.c = s->sigp_ctrl.c;
+ d->sigp_ctrl.scn = s->sigp_ctrl.scn;
+}
+
+static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
+{
+ int i;
+
+ d->ipte_control = s->ipte_control;
+ d->mcn[0] = s->mcn;
+ for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
+ sca_copy_entry(&d->cpu[i], &s->cpu[i]);
+}
+
+static int sca_switch_to_extended(struct kvm *kvm)
+{
+ struct bsca_block *old_sca = kvm->arch.sca;
+ struct esca_block *new_sca;
+ struct kvm_vcpu *vcpu;
+ unsigned int vcpu_idx;
+ u32 scaol, scaoh;
+
+ new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
+ if (!new_sca)
+ return -ENOMEM;
+
+ scaoh = (u32)((u64)(new_sca) >> 32);
+ scaol = (u32)(u64)(new_sca) & ~0x3fU;
+
+ kvm_s390_vcpu_block_all(kvm);
+ write_lock(&kvm->arch.sca_lock);
+
+ sca_copy_b_to_e(new_sca, old_sca);
+
+ kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
+ vcpu->arch.sie_block->scaoh = scaoh;
+ vcpu->arch.sie_block->scaol = scaol;
+ vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
+ }
+ kvm->arch.sca = new_sca;
+ kvm->arch.use_esca = 1;
+
+ write_unlock(&kvm->arch.sca_lock);
+ kvm_s390_vcpu_unblock_all(kvm);
+
+ free_page((unsigned long)old_sca);
+
+ VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
+ old_sca, kvm->arch.sca);
+ return 0;
+}
+
+static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
+{
+ int rc;
+
+ if (!kvm_s390_use_sca_entries()) {
+ if (id < KVM_MAX_VCPUS)
+ return true;
+ return false;
+ }
+ if (id < KVM_S390_BSCA_CPU_SLOTS)
+ return true;
+ if (!sclp.has_esca || !sclp.has_64bscao)
+ return false;
+
+ mutex_lock(&kvm->lock);
+ rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
+ mutex_unlock(&kvm->lock);
+
+ return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
+}
+
+int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+ kvm_clear_async_pf_completion_queue(vcpu);
+ vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
+ KVM_SYNC_GPRS |
+ KVM_SYNC_ACRS |
+ KVM_SYNC_CRS |
+ KVM_SYNC_ARCH0 |
+ KVM_SYNC_PFAULT;
+ kvm_s390_set_prefix(vcpu, 0);
+ if (test_kvm_facility(vcpu->kvm, 64))
+ vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
+ if (test_kvm_facility(vcpu->kvm, 82))
+ vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
+ if (test_kvm_facility(vcpu->kvm, 133))
+ vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
+ if (test_kvm_facility(vcpu->kvm, 156))
+ vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
+ /* fprs can be synchronized via vrs, even if the guest has no vx. With
+ * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
+ */
+ if (MACHINE_HAS_VX)
+ vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
+ else
+ vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
+
+ if (kvm_is_ucontrol(vcpu->kvm))
+ return __kvm_ucontrol_vcpu_init(vcpu);
+
+ return 0;
+}
+
+/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
+static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
+{
+ WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
+ raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
+ vcpu->arch.cputm_start = get_tod_clock_fast();
+ raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
+}
+
+/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
+static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
+{
+ WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
+ raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
+ vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
+ vcpu->arch.cputm_start = 0;
+ raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
+}
+
+/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
+static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
+{
+ WARN_ON_ONCE(vcpu->arch.cputm_enabled);
+ vcpu->arch.cputm_enabled = true;
+ __start_cpu_timer_accounting(vcpu);
+}
+
+/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
+static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
+{
+ WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
+ __stop_cpu_timer_accounting(vcpu);
+ vcpu->arch.cputm_enabled = false;
+}
+
+static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
+{
+ preempt_disable(); /* protect from TOD sync and vcpu_load/put */
+ __enable_cpu_timer_accounting(vcpu);
+ preempt_enable();
+}
+
+static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
+{
+ preempt_disable(); /* protect from TOD sync and vcpu_load/put */
+ __disable_cpu_timer_accounting(vcpu);
+ preempt_enable();
+}
+
+/* set the cpu timer - may only be called from the VCPU thread itself */
+void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
+{
+ preempt_disable(); /* protect from TOD sync and vcpu_load/put */
+ raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
+ if (vcpu->arch.cputm_enabled)
+ vcpu->arch.cputm_start = get_tod_clock_fast();
+ vcpu->arch.sie_block->cputm = cputm;
+ raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
+ preempt_enable();
+}
+
+/* update and get the cpu timer - can also be called from other VCPU threads */
+__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
+{
+ unsigned int seq;
+ __u64 value;
+
+ if (unlikely(!vcpu->arch.cputm_enabled))
+ return vcpu->arch.sie_block->cputm;
+
+ preempt_disable(); /* protect from TOD sync and vcpu_load/put */
+ do {
+ seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
+ /*
+ * If the writer would ever execute a read in the critical
+ * section, e.g. in irq context, we have a deadlock.
+ */
+ WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
+ value = vcpu->arch.sie_block->cputm;
+ /* if cputm_start is 0, accounting is being started/stopped */
+ if (likely(vcpu->arch.cputm_start))
+ value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
+ } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
+ preempt_enable();
+ return value;
+}
+
+void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+
+ gmap_enable(vcpu->arch.enabled_gmap);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
+ if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
+ __start_cpu_timer_accounting(vcpu);
+ vcpu->cpu = cpu;
+}
+
+void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
+{
+ vcpu->cpu = -1;
+ if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
+ __stop_cpu_timer_accounting(vcpu);
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
+ vcpu->arch.enabled_gmap = gmap_get_enabled();
+ gmap_disable(vcpu->arch.enabled_gmap);
+
+}
+
+static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
+{
+ /* this equals initial cpu reset in pop, but we don't switch to ESA */
+ vcpu->arch.sie_block->gpsw.mask = 0UL;
+ vcpu->arch.sie_block->gpsw.addr = 0UL;
+ kvm_s390_set_prefix(vcpu, 0);
+ kvm_s390_set_cpu_timer(vcpu, 0);
+ vcpu->arch.sie_block->ckc = 0UL;
+ vcpu->arch.sie_block->todpr = 0;
+ memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
+ vcpu->arch.sie_block->gcr[0] = CR0_UNUSED_56 |
+ CR0_INTERRUPT_KEY_SUBMASK |
+ CR0_MEASUREMENT_ALERT_SUBMASK;
+ vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
+ CR14_UNUSED_33 |
+ CR14_EXTERNAL_DAMAGE_SUBMASK;
+ vcpu->run->s.regs.fpc = 0;
+ vcpu->arch.sie_block->gbea = 1;
+ vcpu->arch.sie_block->pp = 0;
+ vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
+ vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+ kvm_clear_async_pf_completion_queue(vcpu);
+ if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
+ kvm_s390_vcpu_stop(vcpu);
+ kvm_s390_clear_local_irqs(vcpu);
+}
+
+void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
+{
+ mutex_lock(&vcpu->kvm->lock);
+ preempt_disable();
+ vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
+ vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
+ preempt_enable();
+ mutex_unlock(&vcpu->kvm->lock);
+ if (!kvm_is_ucontrol(vcpu->kvm)) {
+ vcpu->arch.gmap = vcpu->kvm->arch.gmap;
+ sca_add_vcpu(vcpu);
+ }
+ if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
+ vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
+ /* make vcpu_load load the right gmap on the first trigger */
+ vcpu->arch.enabled_gmap = vcpu->arch.gmap;
+}
+
+static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
+{
+ if (!test_kvm_facility(vcpu->kvm, 76))
+ return;
+
+ vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
+
+ if (vcpu->kvm->arch.crypto.aes_kw)
+ vcpu->arch.sie_block->ecb3 |= ECB3_AES;
+ if (vcpu->kvm->arch.crypto.dea_kw)
+ vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
+
+ vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
+}
+
+void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
+{
+ free_page(vcpu->arch.sie_block->cbrlo);
+ vcpu->arch.sie_block->cbrlo = 0;
+}
+
+int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
+ if (!vcpu->arch.sie_block->cbrlo)
+ return -ENOMEM;
+ return 0;
+}
+
+static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
+
+ vcpu->arch.sie_block->ibc = model->ibc;
+ if (test_kvm_facility(vcpu->kvm, 7))
+ vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
+}
+
+int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+ int rc = 0;
+
+ atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
+ CPUSTAT_SM |
+ CPUSTAT_STOPPED);
+
+ if (test_kvm_facility(vcpu->kvm, 78))
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
+ else if (test_kvm_facility(vcpu->kvm, 8))
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
+
+ kvm_s390_vcpu_setup_model(vcpu);
+
+ /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
+ if (MACHINE_HAS_ESOP)
+ vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
+ if (test_kvm_facility(vcpu->kvm, 9))
+ vcpu->arch.sie_block->ecb |= ECB_SRSI;
+ if (test_kvm_facility(vcpu->kvm, 73))
+ vcpu->arch.sie_block->ecb |= ECB_TE;
+
+ if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
+ vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
+ if (test_kvm_facility(vcpu->kvm, 130))
+ vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
+ vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
+ if (sclp.has_cei)
+ vcpu->arch.sie_block->eca |= ECA_CEI;
+ if (sclp.has_ib)
+ vcpu->arch.sie_block->eca |= ECA_IB;
+ if (sclp.has_siif)
+ vcpu->arch.sie_block->eca |= ECA_SII;
+ if (sclp.has_sigpif)
+ vcpu->arch.sie_block->eca |= ECA_SIGPI;
+ if (test_kvm_facility(vcpu->kvm, 129)) {
+ vcpu->arch.sie_block->eca |= ECA_VX;
+ vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
+ }
+ if (test_kvm_facility(vcpu->kvm, 139))
+ vcpu->arch.sie_block->ecd |= ECD_MEF;
+ if (test_kvm_facility(vcpu->kvm, 156))
+ vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
+ if (vcpu->arch.sie_block->gd) {
+ vcpu->arch.sie_block->eca |= ECA_AIV;
+ VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
+ vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
+ }
+ vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
+ | SDNXC;
+ vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
+
+ if (sclp.has_kss)
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
+ else
+ vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
+
+ if (vcpu->kvm->arch.use_cmma) {
+ rc = kvm_s390_vcpu_setup_cmma(vcpu);
+ if (rc)
+ return rc;
+ }
+ hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
+
+ kvm_s390_vcpu_crypto_setup(vcpu);
+
+ return rc;
+}
+
+struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
+ unsigned int id)
+{
+ struct kvm_vcpu *vcpu;
+ struct sie_page *sie_page;
+ int rc = -EINVAL;
+
+ if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
+ goto out;
+
+ rc = -ENOMEM;
+
+ vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+ if (!vcpu)
+ goto out;
+
+ BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
+ sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
+ if (!sie_page)
+ goto out_free_cpu;
+
+ vcpu->arch.sie_block = &sie_page->sie_block;
+ vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
+
+ /* the real guest size will always be smaller than msl */
+ vcpu->arch.sie_block->mso = 0;
+ vcpu->arch.sie_block->msl = sclp.hamax;
+
+ vcpu->arch.sie_block->icpua = id;
+ spin_lock_init(&vcpu->arch.local_int.lock);
+ vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa;
+ if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
+ vcpu->arch.sie_block->gd |= GISA_FORMAT1;
+ seqcount_init(&vcpu->arch.cputm_seqcount);
+
+ rc = kvm_vcpu_init(vcpu, kvm, id);
+ if (rc)
+ goto out_free_sie_block;
+ VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
+ vcpu->arch.sie_block);
+ trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
+
+ return vcpu;
+out_free_sie_block:
+ free_page((unsigned long)(vcpu->arch.sie_block));
+out_free_cpu:
+ kmem_cache_free(kvm_vcpu_cache, vcpu);
+out:
+ return ERR_PTR(rc);
+}
+
+int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
+{
+ return kvm_s390_vcpu_has_irq(vcpu, 0);
+}
+
+bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
+{
+ return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
+}
+
+void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
+{
+ atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
+ exit_sie(vcpu);
+}
+
+void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
+{
+ atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
+}
+
+static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
+{
+ atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
+ exit_sie(vcpu);
+}
+
+static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
+{
+ atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
+}
+
+/*
+ * Kick a guest cpu out of SIE and wait until SIE is not running.
+ * If the CPU is not running (e.g. waiting as idle) the function will
+ * return immediately. */
+void exit_sie(struct kvm_vcpu *vcpu)
+{
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
+ while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
+ cpu_relax();
+}
+
+/* Kick a guest cpu out of SIE to process a request synchronously */
+void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
+{
+ kvm_make_request(req, vcpu);
+ kvm_s390_vcpu_request(vcpu);
+}
+
+static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
+ unsigned long end)
+{
+ struct kvm *kvm = gmap->private;
+ struct kvm_vcpu *vcpu;
+ unsigned long prefix;
+ int i;
+
+ if (gmap_is_shadow(gmap))
+ return;
+ if (start >= 1UL << 31)
+ /* We are only interested in prefix pages */
+ return;
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ /* match against both prefix pages */
+ prefix = kvm_s390_get_prefix(vcpu);
+ if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
+ VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
+ start, end);
+ kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
+ }
+ }
+}
+
+int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
+{
+ /* kvm common code refers to this, but never calls it */
+ BUG();
+ return 0;
+}
+
+static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
+ struct kvm_one_reg *reg)
+{
+ int r = -EINVAL;
+
+ switch (reg->id) {
+ case KVM_REG_S390_TODPR:
+ r = put_user(vcpu->arch.sie_block->todpr,
+ (u32 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_EPOCHDIFF:
+ r = put_user(vcpu->arch.sie_block->epoch,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_CPU_TIMER:
+ r = put_user(kvm_s390_get_cpu_timer(vcpu),
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_CLOCK_COMP:
+ r = put_user(vcpu->arch.sie_block->ckc,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_PFTOKEN:
+ r = put_user(vcpu->arch.pfault_token,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_PFCOMPARE:
+ r = put_user(vcpu->arch.pfault_compare,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_PFSELECT:
+ r = put_user(vcpu->arch.pfault_select,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_PP:
+ r = put_user(vcpu->arch.sie_block->pp,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_GBEA:
+ r = put_user(vcpu->arch.sie_block->gbea,
+ (u64 __user *)reg->addr);
+ break;
+ default:
+ break;
+ }
+
+ return r;
+}
+
+static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
+ struct kvm_one_reg *reg)
+{
+ int r = -EINVAL;
+ __u64 val;
+
+ switch (reg->id) {
+ case KVM_REG_S390_TODPR:
+ r = get_user(vcpu->arch.sie_block->todpr,
+ (u32 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_EPOCHDIFF:
+ r = get_user(vcpu->arch.sie_block->epoch,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_CPU_TIMER:
+ r = get_user(val, (u64 __user *)reg->addr);
+ if (!r)
+ kvm_s390_set_cpu_timer(vcpu, val);
+ break;
+ case KVM_REG_S390_CLOCK_COMP:
+ r = get_user(vcpu->arch.sie_block->ckc,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_PFTOKEN:
+ r = get_user(vcpu->arch.pfault_token,
+ (u64 __user *)reg->addr);
+ if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
+ kvm_clear_async_pf_completion_queue(vcpu);
+ break;
+ case KVM_REG_S390_PFCOMPARE:
+ r = get_user(vcpu->arch.pfault_compare,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_PFSELECT:
+ r = get_user(vcpu->arch.pfault_select,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_PP:
+ r = get_user(vcpu->arch.sie_block->pp,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_GBEA:
+ r = get_user(vcpu->arch.sie_block->gbea,
+ (u64 __user *)reg->addr);
+ break;
+ default:
+ break;
+ }
+
+ return r;
+}
+
+static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
+{
+ kvm_s390_vcpu_initial_reset(vcpu);
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ vcpu_load(vcpu);
+ memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
+ vcpu_put(vcpu);
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ vcpu_load(vcpu);
+ memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
+ vcpu_put(vcpu);
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ vcpu_load(vcpu);
+
+ memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
+ memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
+
+ vcpu_put(vcpu);
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ vcpu_load(vcpu);
+
+ memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
+ memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
+
+ vcpu_put(vcpu);
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ int ret = 0;
+
+ vcpu_load(vcpu);
+
+ if (test_fp_ctl(fpu->fpc)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ vcpu->run->s.regs.fpc = fpu->fpc;
+ if (MACHINE_HAS_VX)
+ convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
+ (freg_t *) fpu->fprs);
+ else
+ memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
+
+out:
+ vcpu_put(vcpu);
+ return ret;
+}
+
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ vcpu_load(vcpu);
+
+ /* make sure we have the latest values */
+ save_fpu_regs();
+ if (MACHINE_HAS_VX)
+ convert_vx_to_fp((freg_t *) fpu->fprs,
+ (__vector128 *) vcpu->run->s.regs.vrs);
+ else
+ memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
+ fpu->fpc = vcpu->run->s.regs.fpc;
+
+ vcpu_put(vcpu);
+ return 0;
+}
+
+static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
+{
+ int rc = 0;
+
+ if (!is_vcpu_stopped(vcpu))
+ rc = -EBUSY;
+ else {
+ vcpu->run->psw_mask = psw.mask;
+ vcpu->run->psw_addr = psw.addr;
+ }
+ return rc;
+}
+
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+ struct kvm_translation *tr)
+{
+ return -EINVAL; /* not implemented yet */
+}
+
+#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
+ KVM_GUESTDBG_USE_HW_BP | \
+ KVM_GUESTDBG_ENABLE)
+
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg)
+{
+ int rc = 0;
+
+ vcpu_load(vcpu);
+
+ vcpu->guest_debug = 0;
+ kvm_s390_clear_bp_data(vcpu);
+
+ if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
+ rc = -EINVAL;
+ goto out;
+ }
+ if (!sclp.has_gpere) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ if (dbg->control & KVM_GUESTDBG_ENABLE) {
+ vcpu->guest_debug = dbg->control;
+ /* enforce guest PER */
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
+
+ if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
+ rc = kvm_s390_import_bp_data(vcpu, dbg);
+ } else {
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
+ vcpu->arch.guestdbg.last_bp = 0;
+ }
+
+ if (rc) {
+ vcpu->guest_debug = 0;
+ kvm_s390_clear_bp_data(vcpu);
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
+ }
+
+out:
+ vcpu_put(vcpu);
+ return rc;
+}
+
+int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
+ struct kvm_mp_state *mp_state)
+{
+ int ret;
+
+ vcpu_load(vcpu);
+
+ /* CHECK_STOP and LOAD are not supported yet */
+ ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
+ KVM_MP_STATE_OPERATING;
+
+ vcpu_put(vcpu);
+ return ret;
+}
+
+int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
+ struct kvm_mp_state *mp_state)
+{
+ int rc = 0;
+
+ vcpu_load(vcpu);
+
+ /* user space knows about this interface - let it control the state */
+ vcpu->kvm->arch.user_cpu_state_ctrl = 1;
+
+ switch (mp_state->mp_state) {
+ case KVM_MP_STATE_STOPPED:
+ kvm_s390_vcpu_stop(vcpu);
+ break;
+ case KVM_MP_STATE_OPERATING:
+ kvm_s390_vcpu_start(vcpu);
+ break;
+ case KVM_MP_STATE_LOAD:
+ case KVM_MP_STATE_CHECK_STOP:
+ /* fall through - CHECK_STOP and LOAD are not supported yet */
+ default:
+ rc = -ENXIO;
+ }
+
+ vcpu_put(vcpu);
+ return rc;
+}
+
+static bool ibs_enabled(struct kvm_vcpu *vcpu)
+{
+ return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
+}
+
+static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
+{
+retry:
+ kvm_s390_vcpu_request_handled(vcpu);
+ if (!kvm_request_pending(vcpu))
+ return 0;
+ /*
+ * We use MMU_RELOAD just to re-arm the ipte notifier for the
+ * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
+ * This ensures that the ipte instruction for this request has
+ * already finished. We might race against a second unmapper that
+ * wants to set the blocking bit. Lets just retry the request loop.
+ */
+ if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
+ int rc;
+ rc = gmap_mprotect_notify(vcpu->arch.gmap,
+ kvm_s390_get_prefix(vcpu),
+ PAGE_SIZE * 2, PROT_WRITE);
+ if (rc) {
+ kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
+ return rc;
+ }
+ goto retry;
+ }
+
+ if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
+ vcpu->arch.sie_block->ihcpu = 0xffff;
+ goto retry;
+ }
+
+ if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
+ if (!ibs_enabled(vcpu)) {
+ trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
+ }
+ goto retry;
+ }
+
+ if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
+ if (ibs_enabled(vcpu)) {
+ trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
+ }
+ goto retry;
+ }
+
+ if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
+ vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
+ goto retry;
+ }
+
+ if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
+ /*
+ * Disable CMM virtualization; we will emulate the ESSA
+ * instruction manually, in order to provide additional
+ * functionalities needed for live migration.
+ */
+ vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
+ goto retry;
+ }
+
+ if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
+ /*
+ * Re-enable CMM virtualization if CMMA is available and
+ * CMM has been used.
+ */
+ if ((vcpu->kvm->arch.use_cmma) &&
+ (vcpu->kvm->mm->context.uses_cmm))
+ vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
+ goto retry;
+ }
+
+ /* nothing to do, just clear the request */
+ kvm_clear_request(KVM_REQ_UNHALT, vcpu);
+
+ return 0;
+}
+
+void kvm_s390_set_tod_clock(struct kvm *kvm,
+ const struct kvm_s390_vm_tod_clock *gtod)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_s390_tod_clock_ext htod;
+ int i;
+
+ mutex_lock(&kvm->lock);
+ preempt_disable();
+
+ get_tod_clock_ext((char *)&htod);
+
+ kvm->arch.epoch = gtod->tod - htod.tod;
+ kvm->arch.epdx = 0;
+ if (test_kvm_facility(kvm, 139)) {
+ kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
+ if (kvm->arch.epoch > gtod->tod)
+ kvm->arch.epdx -= 1;
+ }
+
+ kvm_s390_vcpu_block_all(kvm);
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ vcpu->arch.sie_block->epoch = kvm->arch.epoch;
+ vcpu->arch.sie_block->epdx = kvm->arch.epdx;
+ }
+
+ kvm_s390_vcpu_unblock_all(kvm);
+ preempt_enable();
+ mutex_unlock(&kvm->lock);
+}
+
+/**
+ * kvm_arch_fault_in_page - fault-in guest page if necessary
+ * @vcpu: The corresponding virtual cpu
+ * @gpa: Guest physical address
+ * @writable: Whether the page should be writable or not
+ *
+ * Make sure that a guest page has been faulted-in on the host.
+ *
+ * Return: Zero on success, negative error code otherwise.
+ */
+long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
+{
+ return gmap_fault(vcpu->arch.gmap, gpa,
+ writable ? FAULT_FLAG_WRITE : 0);
+}
+
+static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
+ unsigned long token)
+{
+ struct kvm_s390_interrupt inti;
+ struct kvm_s390_irq irq;
+
+ if (start_token) {
+ irq.u.ext.ext_params2 = token;
+ irq.type = KVM_S390_INT_PFAULT_INIT;
+ WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
+ } else {
+ inti.type = KVM_S390_INT_PFAULT_DONE;
+ inti.parm64 = token;
+ WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
+ }
+}
+
+void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work)
+{
+ trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
+ __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
+}
+
+void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work)
+{
+ trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
+ __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
+}
+
+void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work)
+{
+ /* s390 will always inject the page directly */
+}
+
+bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
+{
+ /*
+ * s390 will always inject the page directly,
+ * but we still want check_async_completion to cleanup
+ */
+ return true;
+}
+
+static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
+{
+ hva_t hva;
+ struct kvm_arch_async_pf arch;
+ int rc;
+
+ if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
+ return 0;
+ if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
+ vcpu->arch.pfault_compare)
+ return 0;
+ if (psw_extint_disabled(vcpu))
+ return 0;
+ if (kvm_s390_vcpu_has_irq(vcpu, 0))
+ return 0;
+ if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
+ return 0;
+ if (!vcpu->arch.gmap->pfault_enabled)
+ return 0;
+
+ hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
+ hva += current->thread.gmap_addr & ~PAGE_MASK;
+ if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
+ return 0;
+
+ rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
+ return rc;
+}
+
+static int vcpu_pre_run(struct kvm_vcpu *vcpu)
+{
+ int rc, cpuflags;
+
+ /*
+ * On s390 notifications for arriving pages will be delivered directly
+ * to the guest but the house keeping for completed pfaults is
+ * handled outside the worker.
+ */
+ kvm_check_async_pf_completion(vcpu);
+
+ vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
+ vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
+
+ if (need_resched())
+ schedule();
+
+ if (test_cpu_flag(CIF_MCCK_PENDING))
+ s390_handle_mcck();
+
+ if (!kvm_is_ucontrol(vcpu->kvm)) {
+ rc = kvm_s390_deliver_pending_interrupts(vcpu);
+ if (rc)
+ return rc;
+ }
+
+ rc = kvm_s390_handle_requests(vcpu);
+ if (rc)
+ return rc;
+
+ if (guestdbg_enabled(vcpu)) {
+ kvm_s390_backup_guest_per_regs(vcpu);
+ kvm_s390_patch_guest_per_regs(vcpu);
+ }
+
+ vcpu->arch.sie_block->icptcode = 0;
+ cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
+ VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
+ trace_kvm_s390_sie_enter(vcpu, cpuflags);
+
+ return 0;
+}
+
+static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_pgm_info pgm_info = {
+ .code = PGM_ADDRESSING,
+ };
+ u8 opcode, ilen;
+ int rc;
+
+ VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
+ trace_kvm_s390_sie_fault(vcpu);
+
+ /*
+ * We want to inject an addressing exception, which is defined as a
+ * suppressing or terminating exception. However, since we came here
+ * by a DAT access exception, the PSW still points to the faulting
+ * instruction since DAT exceptions are nullifying. So we've got
+ * to look up the current opcode to get the length of the instruction
+ * to be able to forward the PSW.
+ */
+ rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
+ ilen = insn_length(opcode);
+ if (rc < 0) {
+ return rc;
+ } else if (rc) {
+ /* Instruction-Fetching Exceptions - we can't detect the ilen.
+ * Forward by arbitrary ilc, injection will take care of
+ * nullification if necessary.
+ */
+ pgm_info = vcpu->arch.pgm;
+ ilen = 4;
+ }
+ pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
+ kvm_s390_forward_psw(vcpu, ilen);
+ return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
+}
+
+static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
+{
+ struct mcck_volatile_info *mcck_info;
+ struct sie_page *sie_page;
+
+ VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
+ vcpu->arch.sie_block->icptcode);
+ trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
+
+ if (guestdbg_enabled(vcpu))
+ kvm_s390_restore_guest_per_regs(vcpu);
+
+ vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
+ vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
+
+ if (exit_reason == -EINTR) {
+ VCPU_EVENT(vcpu, 3, "%s", "machine check");
+ sie_page = container_of(vcpu->arch.sie_block,
+ struct sie_page, sie_block);
+ mcck_info = &sie_page->mcck_info;
+ kvm_s390_reinject_machine_check(vcpu, mcck_info);
+ return 0;
+ }
+
+ if (vcpu->arch.sie_block->icptcode > 0) {
+ int rc = kvm_handle_sie_intercept(vcpu);
+
+ if (rc != -EOPNOTSUPP)
+ return rc;
+ vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
+ vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
+ vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
+ vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
+ return -EREMOTE;
+ } else if (exit_reason != -EFAULT) {
+ vcpu->stat.exit_null++;
+ return 0;
+ } else if (kvm_is_ucontrol(vcpu->kvm)) {
+ vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
+ vcpu->run->s390_ucontrol.trans_exc_code =
+ current->thread.gmap_addr;
+ vcpu->run->s390_ucontrol.pgm_code = 0x10;
+ return -EREMOTE;
+ } else if (current->thread.gmap_pfault) {
+ trace_kvm_s390_major_guest_pfault(vcpu);
+ current->thread.gmap_pfault = 0;
+ if (kvm_arch_setup_async_pf(vcpu))
+ return 0;
+ return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
+ }
+ return vcpu_post_run_fault_in_sie(vcpu);
+}
+
+static int __vcpu_run(struct kvm_vcpu *vcpu)
+{
+ int rc, exit_reason;
+
+ /*
+ * We try to hold kvm->srcu during most of vcpu_run (except when run-
+ * ning the guest), so that memslots (and other stuff) are protected
+ */
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+ do {
+ rc = vcpu_pre_run(vcpu);
+ if (rc)
+ break;
+
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+ /*
+ * As PF_VCPU will be used in fault handler, between
+ * guest_enter and guest_exit should be no uaccess.
+ */
+ local_irq_disable();
+ guest_enter_irqoff();
+ __disable_cpu_timer_accounting(vcpu);
+ local_irq_enable();
+ exit_reason = sie64a(vcpu->arch.sie_block,
+ vcpu->run->s.regs.gprs);
+ local_irq_disable();
+ __enable_cpu_timer_accounting(vcpu);
+ guest_exit_irqoff();
+ local_irq_enable();
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+ rc = vcpu_post_run(vcpu, exit_reason);
+ } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
+
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+ return rc;
+}
+
+static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ struct runtime_instr_cb *riccb;
+ struct gs_cb *gscb;
+
+ riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
+ gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
+ vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
+ vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
+ if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
+ kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
+ if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
+ memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
+ /* some control register changes require a tlb flush */
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ }
+ if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
+ kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
+ vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
+ vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
+ vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
+ vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
+ }
+ if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
+ vcpu->arch.pfault_token = kvm_run->s.regs.pft;
+ vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
+ vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
+ if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
+ kvm_clear_async_pf_completion_queue(vcpu);
+ }
+ /*
+ * If userspace sets the riccb (e.g. after migration) to a valid state,
+ * we should enable RI here instead of doing the lazy enablement.
+ */
+ if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
+ test_kvm_facility(vcpu->kvm, 64) &&
+ riccb->v &&
+ !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
+ VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
+ vcpu->arch.sie_block->ecb3 |= ECB3_RI;
+ }
+ /*
+ * If userspace sets the gscb (e.g. after migration) to non-zero,
+ * we should enable GS here instead of doing the lazy enablement.
+ */
+ if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
+ test_kvm_facility(vcpu->kvm, 133) &&
+ gscb->gssm &&
+ !vcpu->arch.gs_enabled) {
+ VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
+ vcpu->arch.sie_block->ecb |= ECB_GS;
+ vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
+ vcpu->arch.gs_enabled = 1;
+ }
+ if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
+ test_kvm_facility(vcpu->kvm, 82)) {
+ vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
+ vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
+ }
+ save_access_regs(vcpu->arch.host_acrs);
+ restore_access_regs(vcpu->run->s.regs.acrs);
+ /* save host (userspace) fprs/vrs */
+ save_fpu_regs();
+ vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
+ vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
+ if (MACHINE_HAS_VX)
+ current->thread.fpu.regs = vcpu->run->s.regs.vrs;
+ else
+ current->thread.fpu.regs = vcpu->run->s.regs.fprs;
+ current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
+ if (test_fp_ctl(current->thread.fpu.fpc))
+ /* User space provided an invalid FPC, let's clear it */
+ current->thread.fpu.fpc = 0;
+ if (MACHINE_HAS_GS) {
+ preempt_disable();
+ __ctl_set_bit(2, 4);
+ if (current->thread.gs_cb) {
+ vcpu->arch.host_gscb = current->thread.gs_cb;
+ save_gs_cb(vcpu->arch.host_gscb);
+ }
+ if (vcpu->arch.gs_enabled) {
+ current->thread.gs_cb = (struct gs_cb *)
+ &vcpu->run->s.regs.gscb;
+ restore_gs_cb(current->thread.gs_cb);
+ }
+ preempt_enable();
+ }
+ /* SIE will load etoken directly from SDNX and therefore kvm_run */
+
+ kvm_run->kvm_dirty_regs = 0;
+}
+
+static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
+ kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
+ kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
+ memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
+ kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
+ kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
+ kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
+ kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
+ kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
+ kvm_run->s.regs.pft = vcpu->arch.pfault_token;
+ kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
+ kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
+ kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
+ save_access_regs(vcpu->run->s.regs.acrs);
+ restore_access_regs(vcpu->arch.host_acrs);
+ /* Save guest register state */
+ save_fpu_regs();
+ vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
+ /* Restore will be done lazily at return */
+ current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
+ current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
+ if (MACHINE_HAS_GS) {
+ preempt_disable();
+ __ctl_set_bit(2, 4);
+ if (vcpu->arch.gs_enabled)
+ save_gs_cb(current->thread.gs_cb);
+ current->thread.gs_cb = vcpu->arch.host_gscb;
+ restore_gs_cb(vcpu->arch.host_gscb);
+ if (!vcpu->arch.host_gscb)
+ __ctl_clear_bit(2, 4);
+ vcpu->arch.host_gscb = NULL;
+ preempt_enable();
+ }
+ /* SIE will save etoken directly into SDNX and therefore kvm_run */
+}
+
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ int rc;
+
+ if (kvm_run->immediate_exit)
+ return -EINTR;
+
+ vcpu_load(vcpu);
+
+ if (guestdbg_exit_pending(vcpu)) {
+ kvm_s390_prepare_debug_exit(vcpu);
+ rc = 0;
+ goto out;
+ }
+
+ kvm_sigset_activate(vcpu);
+
+ if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
+ kvm_s390_vcpu_start(vcpu);
+ } else if (is_vcpu_stopped(vcpu)) {
+ pr_err_ratelimited("can't run stopped vcpu %d\n",
+ vcpu->vcpu_id);
+ rc = -EINVAL;
+ goto out;
+ }
+
+ sync_regs(vcpu, kvm_run);
+ enable_cpu_timer_accounting(vcpu);
+
+ might_fault();
+ rc = __vcpu_run(vcpu);
+
+ if (signal_pending(current) && !rc) {
+ kvm_run->exit_reason = KVM_EXIT_INTR;
+ rc = -EINTR;
+ }
+
+ if (guestdbg_exit_pending(vcpu) && !rc) {
+ kvm_s390_prepare_debug_exit(vcpu);
+ rc = 0;
+ }
+
+ if (rc == -EREMOTE) {
+ /* userspace support is needed, kvm_run has been prepared */
+ rc = 0;
+ }
+
+ disable_cpu_timer_accounting(vcpu);
+ store_regs(vcpu, kvm_run);
+
+ kvm_sigset_deactivate(vcpu);
+
+ vcpu->stat.exit_userspace++;
+out:
+ vcpu_put(vcpu);
+ return rc;
+}
+
+/*
+ * store status at address
+ * we use have two special cases:
+ * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
+ * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
+ */
+int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
+{
+ unsigned char archmode = 1;
+ freg_t fprs[NUM_FPRS];
+ unsigned int px;
+ u64 clkcomp, cputm;
+ int rc;
+
+ px = kvm_s390_get_prefix(vcpu);
+ if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
+ if (write_guest_abs(vcpu, 163, &archmode, 1))
+ return -EFAULT;
+ gpa = 0;
+ } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
+ if (write_guest_real(vcpu, 163, &archmode, 1))
+ return -EFAULT;
+ gpa = px;
+ } else
+ gpa -= __LC_FPREGS_SAVE_AREA;
+
+ /* manually convert vector registers if necessary */
+ if (MACHINE_HAS_VX) {
+ convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
+ rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
+ fprs, 128);
+ } else {
+ rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
+ vcpu->run->s.regs.fprs, 128);
+ }
+ rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
+ vcpu->run->s.regs.gprs, 128);
+ rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
+ &vcpu->arch.sie_block->gpsw, 16);
+ rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
+ &px, 4);
+ rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
+ &vcpu->run->s.regs.fpc, 4);
+ rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
+ &vcpu->arch.sie_block->todpr, 4);
+ cputm = kvm_s390_get_cpu_timer(vcpu);
+ rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
+ &cputm, 8);
+ clkcomp = vcpu->arch.sie_block->ckc >> 8;
+ rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
+ &clkcomp, 8);
+ rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
+ &vcpu->run->s.regs.acrs, 64);
+ rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
+ &vcpu->arch.sie_block->gcr, 128);
+ return rc ? -EFAULT : 0;
+}
+
+int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+ /*
+ * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
+ * switch in the run ioctl. Let's update our copies before we save
+ * it into the save area
+ */
+ save_fpu_regs();
+ vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
+ save_access_regs(vcpu->run->s.regs.acrs);
+
+ return kvm_s390_store_status_unloaded(vcpu, addr);
+}
+
+static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
+{
+ kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
+ kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
+}
+
+static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
+{
+ unsigned int i;
+ struct kvm_vcpu *vcpu;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ __disable_ibs_on_vcpu(vcpu);
+ }
+}
+
+static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
+{
+ if (!sclp.has_ibs)
+ return;
+ kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
+ kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
+}
+
+void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
+{
+ int i, online_vcpus, started_vcpus = 0;
+
+ if (!is_vcpu_stopped(vcpu))
+ return;
+
+ trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
+ /* Only one cpu at a time may enter/leave the STOPPED state. */
+ spin_lock(&vcpu->kvm->arch.start_stop_lock);
+ online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
+
+ for (i = 0; i < online_vcpus; i++) {
+ if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
+ started_vcpus++;
+ }
+
+ if (started_vcpus == 0) {
+ /* we're the only active VCPU -> speed it up */
+ __enable_ibs_on_vcpu(vcpu);
+ } else if (started_vcpus == 1) {
+ /*
+ * As we are starting a second VCPU, we have to disable
+ * the IBS facility on all VCPUs to remove potentially
+ * oustanding ENABLE requests.
+ */
+ __disable_ibs_on_all_vcpus(vcpu->kvm);
+ }
+
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
+ /*
+ * Another VCPU might have used IBS while we were offline.
+ * Let's play safe and flush the VCPU at startup.
+ */
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ spin_unlock(&vcpu->kvm->arch.start_stop_lock);
+ return;
+}
+
+void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
+{
+ int i, online_vcpus, started_vcpus = 0;
+ struct kvm_vcpu *started_vcpu = NULL;
+
+ if (is_vcpu_stopped(vcpu))
+ return;
+
+ trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
+ /* Only one cpu at a time may enter/leave the STOPPED state. */
+ spin_lock(&vcpu->kvm->arch.start_stop_lock);
+ online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
+
+ /*
+ * Set the VCPU to STOPPED and THEN clear the interrupt flag,
+ * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
+ * have been fully processed. This will ensure that the VCPU
+ * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
+ */
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
+ kvm_s390_clear_stop_irq(vcpu);
+
+ __disable_ibs_on_vcpu(vcpu);
+
+ for (i = 0; i < online_vcpus; i++) {
+ if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
+ started_vcpus++;
+ started_vcpu = vcpu->kvm->vcpus[i];
+ }
+ }
+
+ if (started_vcpus == 1) {
+ /*
+ * As we only have one VCPU left, we want to enable the
+ * IBS facility for that VCPU to speed it up.
+ */
+ __enable_ibs_on_vcpu(started_vcpu);
+ }
+
+ spin_unlock(&vcpu->kvm->arch.start_stop_lock);
+ return;
+}
+
+static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
+ struct kvm_enable_cap *cap)
+{
+ int r;
+
+ if (cap->flags)
+ return -EINVAL;
+
+ switch (cap->cap) {
+ case KVM_CAP_S390_CSS_SUPPORT:
+ if (!vcpu->kvm->arch.css_support) {
+ vcpu->kvm->arch.css_support = 1;
+ VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
+ trace_kvm_s390_enable_css(vcpu->kvm);
+ }
+ r = 0;
+ break;
+ default:
+ r = -EINVAL;
+ break;
+ }
+ return r;
+}
+
+static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
+ struct kvm_s390_mem_op *mop)
+{
+ void __user *uaddr = (void __user *)mop->buf;
+ void *tmpbuf = NULL;
+ int r, srcu_idx;
+ const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
+ | KVM_S390_MEMOP_F_CHECK_ONLY;
+
+ if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
+ return -EINVAL;
+
+ if (mop->size > MEM_OP_MAX_SIZE)
+ return -E2BIG;
+
+ if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
+ tmpbuf = vmalloc(mop->size);
+ if (!tmpbuf)
+ return -ENOMEM;
+ }
+
+ srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+ switch (mop->op) {
+ case KVM_S390_MEMOP_LOGICAL_READ:
+ if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
+ r = check_gva_range(vcpu, mop->gaddr, mop->ar,
+ mop->size, GACC_FETCH);
+ break;
+ }
+ r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
+ if (r == 0) {
+ if (copy_to_user(uaddr, tmpbuf, mop->size))
+ r = -EFAULT;
+ }
+ break;
+ case KVM_S390_MEMOP_LOGICAL_WRITE:
+ if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
+ r = check_gva_range(vcpu, mop->gaddr, mop->ar,
+ mop->size, GACC_STORE);
+ break;
+ }
+ if (copy_from_user(tmpbuf, uaddr, mop->size)) {
+ r = -EFAULT;
+ break;
+ }
+ r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
+ break;
+ default:
+ r = -EINVAL;
+ }
+
+ srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+
+ if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
+ kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
+
+ vfree(tmpbuf);
+ return r;
+}
+
+long kvm_arch_vcpu_async_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ struct kvm_vcpu *vcpu = filp->private_data;
+ void __user *argp = (void __user *)arg;
+
+ switch (ioctl) {
+ case KVM_S390_IRQ: {
+ struct kvm_s390_irq s390irq;
+
+ if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
+ return -EFAULT;
+ return kvm_s390_inject_vcpu(vcpu, &s390irq);
+ }
+ case KVM_S390_INTERRUPT: {
+ struct kvm_s390_interrupt s390int;
+ struct kvm_s390_irq s390irq = {};
+
+ if (copy_from_user(&s390int, argp, sizeof(s390int)))
+ return -EFAULT;
+ if (s390int_to_s390irq(&s390int, &s390irq))
+ return -EINVAL;
+ return kvm_s390_inject_vcpu(vcpu, &s390irq);
+ }
+ }
+ return -ENOIOCTLCMD;
+}
+
+long kvm_arch_vcpu_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ struct kvm_vcpu *vcpu = filp->private_data;
+ void __user *argp = (void __user *)arg;
+ int idx;
+ long r;
+
+ vcpu_load(vcpu);
+
+ switch (ioctl) {
+ case KVM_S390_STORE_STATUS:
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ r = kvm_s390_store_status_unloaded(vcpu, arg);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ break;
+ case KVM_S390_SET_INITIAL_PSW: {
+ psw_t psw;
+
+ r = -EFAULT;
+ if (copy_from_user(&psw, argp, sizeof(psw)))
+ break;
+ r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
+ break;
+ }
+ case KVM_S390_INITIAL_RESET:
+ r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
+ break;
+ case KVM_SET_ONE_REG:
+ case KVM_GET_ONE_REG: {
+ struct kvm_one_reg reg;
+ r = -EFAULT;
+ if (copy_from_user(&reg, argp, sizeof(reg)))
+ break;
+ if (ioctl == KVM_SET_ONE_REG)
+ r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
+ else
+ r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
+ break;
+ }
+#ifdef CONFIG_KVM_S390_UCONTROL
+ case KVM_S390_UCAS_MAP: {
+ struct kvm_s390_ucas_mapping ucasmap;
+
+ if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
+ r = -EFAULT;
+ break;
+ }
+
+ if (!kvm_is_ucontrol(vcpu->kvm)) {
+ r = -EINVAL;
+ break;
+ }
+
+ r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
+ ucasmap.vcpu_addr, ucasmap.length);
+ break;
+ }
+ case KVM_S390_UCAS_UNMAP: {
+ struct kvm_s390_ucas_mapping ucasmap;
+
+ if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
+ r = -EFAULT;
+ break;
+ }
+
+ if (!kvm_is_ucontrol(vcpu->kvm)) {
+ r = -EINVAL;
+ break;
+ }
+
+ r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
+ ucasmap.length);
+ break;
+ }
+#endif
+ case KVM_S390_VCPU_FAULT: {
+ r = gmap_fault(vcpu->arch.gmap, arg, 0);
+ break;
+ }
+ case KVM_ENABLE_CAP:
+ {
+ struct kvm_enable_cap cap;
+ r = -EFAULT;
+ if (copy_from_user(&cap, argp, sizeof(cap)))
+ break;
+ r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
+ break;
+ }
+ case KVM_S390_MEM_OP: {
+ struct kvm_s390_mem_op mem_op;
+
+ if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
+ r = kvm_s390_guest_mem_op(vcpu, &mem_op);
+ else
+ r = -EFAULT;
+ break;
+ }
+ case KVM_S390_SET_IRQ_STATE: {
+ struct kvm_s390_irq_state irq_state;
+
+ r = -EFAULT;
+ if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
+ break;
+ if (irq_state.len > VCPU_IRQS_MAX_BUF ||
+ irq_state.len == 0 ||
+ irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
+ r = -EINVAL;
+ break;
+ }
+ /* do not use irq_state.flags, it will break old QEMUs */
+ r = kvm_s390_set_irq_state(vcpu,
+ (void __user *) irq_state.buf,
+ irq_state.len);
+ break;
+ }
+ case KVM_S390_GET_IRQ_STATE: {
+ struct kvm_s390_irq_state irq_state;
+
+ r = -EFAULT;
+ if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
+ break;
+ if (irq_state.len == 0) {
+ r = -EINVAL;
+ break;
+ }
+ /* do not use irq_state.flags, it will break old QEMUs */
+ r = kvm_s390_get_irq_state(vcpu,
+ (__u8 __user *) irq_state.buf,
+ irq_state.len);
+ break;
+ }
+ default:
+ r = -ENOTTY;
+ }
+
+ vcpu_put(vcpu);
+ return r;
+}
+
+vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
+{
+#ifdef CONFIG_KVM_S390_UCONTROL
+ if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
+ && (kvm_is_ucontrol(vcpu->kvm))) {
+ vmf->page = virt_to_page(vcpu->arch.sie_block);
+ get_page(vmf->page);
+ return 0;
+ }
+#endif
+ return VM_FAULT_SIGBUS;
+}
+
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+ unsigned long npages)
+{
+ return 0;
+}
+
+/* Section: memory related */
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+ struct kvm_memory_slot *memslot,
+ const struct kvm_userspace_memory_region *mem,
+ enum kvm_mr_change change)
+{
+ /* A few sanity checks. We can have memory slots which have to be
+ located/ended at a segment boundary (1MB). The memory in userland is
+ ok to be fragmented into various different vmas. It is okay to mmap()
+ and munmap() stuff in this slot after doing this call at any time */
+
+ if (mem->userspace_addr & 0xffffful)
+ return -EINVAL;
+
+ if (mem->memory_size & 0xffffful)
+ return -EINVAL;
+
+ if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
+ return -EINVAL;
+
+ return 0;
+}
+
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+ const struct kvm_userspace_memory_region *mem,
+ const struct kvm_memory_slot *old,
+ const struct kvm_memory_slot *new,
+ enum kvm_mr_change change)
+{
+ int rc = 0;
+
+ switch (change) {
+ case KVM_MR_DELETE:
+ rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
+ old->npages * PAGE_SIZE);
+ break;
+ case KVM_MR_MOVE:
+ rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
+ old->npages * PAGE_SIZE);
+ if (rc)
+ break;
+ /* FALLTHROUGH */
+ case KVM_MR_CREATE:
+ rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
+ mem->guest_phys_addr, mem->memory_size);
+ break;
+ case KVM_MR_FLAGS_ONLY:
+ break;
+ default:
+ WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
+ }
+ if (rc)
+ pr_warn("failed to commit memory region\n");
+ return;
+}
+
+static inline unsigned long nonhyp_mask(int i)
+{
+ unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
+
+ return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
+}
+
+void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
+{
+ vcpu->valid_wakeup = false;
+}
+
+static int __init kvm_s390_init(void)
+{
+ int i;
+
+ if (!sclp.has_sief2) {
+ pr_info("SIE not available\n");
+ return -ENODEV;
+ }
+
+ if (nested && hpage) {
+ pr_info("nested (vSIE) and hpage (huge page backing) can currently not be activated concurrently");
+ return -EINVAL;
+ }
+
+ for (i = 0; i < 16; i++)
+ kvm_s390_fac_base[i] |=
+ S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
+
+ return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
+}
+
+static void __exit kvm_s390_exit(void)
+{
+ kvm_exit();
+}
+
+module_init(kvm_s390_init);
+module_exit(kvm_s390_exit);
+
+/*
+ * Enable autoloading of the kvm module.
+ * Note that we add the module alias here instead of virt/kvm/kvm_main.c
+ * since x86 takes a different approach.
+ */
+#include <linux/miscdevice.h>
+MODULE_ALIAS_MISCDEV(KVM_MINOR);
+MODULE_ALIAS("devname:kvm");
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
new file mode 100644
index 000000000..a7264c113
--- /dev/null
+++ b/arch/s390/kvm/kvm-s390.h
@@ -0,0 +1,427 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * definition for kvm on s390
+ *
+ * Copyright IBM Corp. 2008, 2009
+ *
+ * Author(s): Carsten Otte <cotte@de.ibm.com>
+ * Christian Borntraeger <borntraeger@de.ibm.com>
+ * Christian Ehrhardt <ehrhardt@de.ibm.com>
+ */
+
+#ifndef ARCH_S390_KVM_S390_H
+#define ARCH_S390_KVM_S390_H
+
+#include <linux/hrtimer.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <asm/facility.h>
+#include <asm/processor.h>
+#include <asm/sclp.h>
+
+/* Transactional Memory Execution related macros */
+#define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & ECB_TE))
+#define TDB_FORMAT1 1
+#define IS_ITDB_VALID(vcpu) ((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1))
+
+extern debug_info_t *kvm_s390_dbf;
+#define KVM_EVENT(d_loglevel, d_string, d_args...)\
+do { \
+ debug_sprintf_event(kvm_s390_dbf, d_loglevel, d_string "\n", \
+ d_args); \
+} while (0)
+
+#define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
+do { \
+ debug_sprintf_event(d_kvm->arch.dbf, d_loglevel, d_string "\n", \
+ d_args); \
+} while (0)
+
+#define VCPU_EVENT(d_vcpu, d_loglevel, d_string, d_args...)\
+do { \
+ debug_sprintf_event(d_vcpu->kvm->arch.dbf, d_loglevel, \
+ "%02d[%016lx-%016lx]: " d_string "\n", d_vcpu->vcpu_id, \
+ d_vcpu->arch.sie_block->gpsw.mask, d_vcpu->arch.sie_block->gpsw.addr,\
+ d_args); \
+} while (0)
+
+static inline void kvm_s390_set_cpuflags(struct kvm_vcpu *vcpu, u32 flags)
+{
+ atomic_or(flags, &vcpu->arch.sie_block->cpuflags);
+}
+
+static inline void kvm_s390_clear_cpuflags(struct kvm_vcpu *vcpu, u32 flags)
+{
+ atomic_andnot(flags, &vcpu->arch.sie_block->cpuflags);
+}
+
+static inline bool kvm_s390_test_cpuflags(struct kvm_vcpu *vcpu, u32 flags)
+{
+ return (atomic_read(&vcpu->arch.sie_block->cpuflags) & flags) == flags;
+}
+
+static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu)
+{
+ return kvm_s390_test_cpuflags(vcpu, CPUSTAT_STOPPED);
+}
+
+static inline int is_vcpu_idle(struct kvm_vcpu *vcpu)
+{
+ return test_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.float_int.idle_mask);
+}
+
+static inline int kvm_is_ucontrol(struct kvm *kvm)
+{
+#ifdef CONFIG_KVM_S390_UCONTROL
+ if (kvm->arch.gmap)
+ return 0;
+ return 1;
+#else
+ return 0;
+#endif
+}
+
+#define GUEST_PREFIX_SHIFT 13
+static inline u32 kvm_s390_get_prefix(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.sie_block->prefix << GUEST_PREFIX_SHIFT;
+}
+
+static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix)
+{
+ VCPU_EVENT(vcpu, 3, "set prefix of cpu %03u to 0x%x", vcpu->vcpu_id,
+ prefix);
+ vcpu->arch.sie_block->prefix = prefix >> GUEST_PREFIX_SHIFT;
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
+}
+
+static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu, u8 *ar)
+{
+ u32 base2 = vcpu->arch.sie_block->ipb >> 28;
+ u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+
+ if (ar)
+ *ar = base2;
+
+ return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
+}
+
+static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu,
+ u64 *address1, u64 *address2,
+ u8 *ar_b1, u8 *ar_b2)
+{
+ u32 base1 = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28;
+ u32 disp1 = (vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16;
+ u32 base2 = (vcpu->arch.sie_block->ipb & 0xf000) >> 12;
+ u32 disp2 = vcpu->arch.sie_block->ipb & 0x0fff;
+
+ *address1 = (base1 ? vcpu->run->s.regs.gprs[base1] : 0) + disp1;
+ *address2 = (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
+
+ if (ar_b1)
+ *ar_b1 = base1;
+ if (ar_b2)
+ *ar_b2 = base2;
+}
+
+static inline void kvm_s390_get_regs_rre(struct kvm_vcpu *vcpu, int *r1, int *r2)
+{
+ if (r1)
+ *r1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 20;
+ if (r2)
+ *r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16;
+}
+
+static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu, u8 *ar)
+{
+ u32 base2 = vcpu->arch.sie_block->ipb >> 28;
+ u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) +
+ ((vcpu->arch.sie_block->ipb & 0xff00) << 4);
+ /* The displacement is a 20bit _SIGNED_ value */
+ if (disp2 & 0x80000)
+ disp2+=0xfff00000;
+
+ if (ar)
+ *ar = base2;
+
+ return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + (long)(int)disp2;
+}
+
+static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu, u8 *ar)
+{
+ u32 base2 = vcpu->arch.sie_block->ipb >> 28;
+ u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+
+ if (ar)
+ *ar = base2;
+
+ return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
+}
+
+/* Set the condition code in the guest program status word */
+static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc)
+{
+ vcpu->arch.sie_block->gpsw.mask &= ~(3UL << 44);
+ vcpu->arch.sie_block->gpsw.mask |= cc << 44;
+}
+
+/* test availability of facility in a kvm instance */
+static inline int test_kvm_facility(struct kvm *kvm, unsigned long nr)
+{
+ return __test_facility(nr, kvm->arch.model.fac_mask) &&
+ __test_facility(nr, kvm->arch.model.fac_list);
+}
+
+static inline int set_kvm_facility(u64 *fac_list, unsigned long nr)
+{
+ unsigned char *ptr;
+
+ if (nr >= MAX_FACILITY_BIT)
+ return -EINVAL;
+ ptr = (unsigned char *) fac_list + (nr >> 3);
+ *ptr |= (0x80UL >> (nr & 7));
+ return 0;
+}
+
+static inline int test_kvm_cpu_feat(struct kvm *kvm, unsigned long nr)
+{
+ WARN_ON_ONCE(nr >= KVM_S390_VM_CPU_FEAT_NR_BITS);
+ return test_bit_inv(nr, kvm->arch.cpu_feat);
+}
+
+/* are cpu states controlled by user space */
+static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm)
+{
+ return kvm->arch.user_cpu_state_ctrl != 0;
+}
+
+/* implemented in interrupt.c */
+int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
+void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu);
+enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
+int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
+void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu);
+void kvm_s390_clear_float_irqs(struct kvm *kvm);
+int __must_check kvm_s390_inject_vm(struct kvm *kvm,
+ struct kvm_s390_interrupt *s390int);
+int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
+ struct kvm_s390_irq *irq);
+static inline int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
+ struct kvm_s390_pgm_info *pgm_info)
+{
+ struct kvm_s390_irq irq = {
+ .type = KVM_S390_PROGRAM_INT,
+ .u.pgm = *pgm_info,
+ };
+
+ return kvm_s390_inject_vcpu(vcpu, &irq);
+}
+static inline int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
+{
+ struct kvm_s390_irq irq = {
+ .type = KVM_S390_PROGRAM_INT,
+ .u.pgm.code = code,
+ };
+
+ return kvm_s390_inject_vcpu(vcpu, &irq);
+}
+struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
+ u64 isc_mask, u32 schid);
+int kvm_s390_reinject_io_int(struct kvm *kvm,
+ struct kvm_s390_interrupt_info *inti);
+int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked);
+
+/* implemented in intercept.c */
+u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu);
+int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
+static inline void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilen)
+{
+ struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
+
+ sie_block->gpsw.addr = __rewind_psw(sie_block->gpsw, ilen);
+}
+static inline void kvm_s390_forward_psw(struct kvm_vcpu *vcpu, int ilen)
+{
+ kvm_s390_rewind_psw(vcpu, -ilen);
+}
+static inline void kvm_s390_retry_instr(struct kvm_vcpu *vcpu)
+{
+ /* don't inject PER events if we re-execute the instruction */
+ vcpu->arch.sie_block->icptstatus &= ~0x02;
+ kvm_s390_rewind_psw(vcpu, kvm_s390_get_ilen(vcpu));
+}
+
+int handle_sthyi(struct kvm_vcpu *vcpu);
+
+/* implemented in priv.c */
+int is_valid_psw(psw_t *psw);
+int kvm_s390_handle_aa(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_e3(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_e5(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_01(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_b9(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_eb(struct kvm_vcpu *vcpu);
+int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu);
+
+/* implemented in vsie.c */
+int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu);
+void kvm_s390_vsie_kick(struct kvm_vcpu *vcpu);
+void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
+ unsigned long end);
+void kvm_s390_vsie_init(struct kvm *kvm);
+void kvm_s390_vsie_destroy(struct kvm *kvm);
+
+/* implemented in sigp.c */
+int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu);
+
+/* implemented in kvm-s390.c */
+void kvm_s390_set_tod_clock(struct kvm *kvm,
+ const struct kvm_s390_vm_tod_clock *gtod);
+long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
+int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
+int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr);
+void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu);
+void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu);
+void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu);
+void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu);
+void exit_sie(struct kvm_vcpu *vcpu);
+void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu);
+int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu);
+void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
+void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm);
+__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu);
+
+/* implemented in diag.c */
+int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
+
+static inline void kvm_s390_vcpu_block_all(struct kvm *kvm)
+{
+ int i;
+ struct kvm_vcpu *vcpu;
+
+ WARN_ON(!mutex_is_locked(&kvm->lock));
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ kvm_s390_vcpu_block(vcpu);
+}
+
+static inline void kvm_s390_vcpu_unblock_all(struct kvm *kvm)
+{
+ int i;
+ struct kvm_vcpu *vcpu;
+
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ kvm_s390_vcpu_unblock(vcpu);
+}
+
+static inline u64 kvm_s390_get_tod_clock_fast(struct kvm *kvm)
+{
+ u64 rc;
+
+ preempt_disable();
+ rc = get_tod_clock_fast() + kvm->arch.epoch;
+ preempt_enable();
+ return rc;
+}
+
+/**
+ * kvm_s390_inject_prog_cond - conditionally inject a program check
+ * @vcpu: virtual cpu
+ * @rc: original return/error code
+ *
+ * This function is supposed to be used after regular guest access functions
+ * failed, to conditionally inject a program check to a vcpu. The typical
+ * pattern would look like
+ *
+ * rc = write_guest(vcpu, addr, data, len);
+ * if (rc)
+ * return kvm_s390_inject_prog_cond(vcpu, rc);
+ *
+ * A negative return code from guest access functions implies an internal error
+ * like e.g. out of memory. In these cases no program check should be injected
+ * to the guest.
+ * A positive value implies that an exception happened while accessing a guest's
+ * memory. In this case all data belonging to the corresponding program check
+ * has been stored in vcpu->arch.pgm and can be injected with
+ * kvm_s390_inject_prog_irq().
+ *
+ * Returns: - the original @rc value if @rc was negative (internal error)
+ * - zero if @rc was already zero
+ * - zero or error code from injecting if @rc was positive
+ * (program check injected to @vcpu)
+ */
+static inline int kvm_s390_inject_prog_cond(struct kvm_vcpu *vcpu, int rc)
+{
+ if (rc <= 0)
+ return rc;
+ return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
+}
+
+int s390int_to_s390irq(struct kvm_s390_interrupt *s390int,
+ struct kvm_s390_irq *s390irq);
+
+/* implemented in interrupt.c */
+int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop);
+int psw_extint_disabled(struct kvm_vcpu *vcpu);
+void kvm_s390_destroy_adapters(struct kvm *kvm);
+int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu);
+extern struct kvm_device_ops kvm_flic_ops;
+int kvm_s390_is_stop_irq_pending(struct kvm_vcpu *vcpu);
+int kvm_s390_is_restart_irq_pending(struct kvm_vcpu *vcpu);
+void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu);
+int kvm_s390_set_irq_state(struct kvm_vcpu *vcpu,
+ void __user *buf, int len);
+int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu,
+ __u8 __user *buf, int len);
+void kvm_s390_gisa_init(struct kvm *kvm);
+void kvm_s390_gisa_clear(struct kvm *kvm);
+void kvm_s390_gisa_destroy(struct kvm *kvm);
+
+/* implemented in guestdbg.c */
+void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu);
+void kvm_s390_restore_guest_per_regs(struct kvm_vcpu *vcpu);
+void kvm_s390_patch_guest_per_regs(struct kvm_vcpu *vcpu);
+int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg);
+void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu);
+void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_per_ifetch_icpt(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_per_event(struct kvm_vcpu *vcpu);
+
+/* support for Basic/Extended SCA handling */
+static inline union ipte_control *kvm_s390_get_ipte_control(struct kvm *kvm)
+{
+ struct bsca_block *sca = kvm->arch.sca; /* SCA version doesn't matter */
+
+ return &sca->ipte_control;
+}
+static inline int kvm_s390_use_sca_entries(void)
+{
+ /*
+ * Without SIGP interpretation, only SRS interpretation (if available)
+ * might use the entries. By not setting the entries and keeping them
+ * invalid, hardware will not access them but intercept.
+ */
+ return sclp.has_sigpif;
+}
+void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu,
+ struct mcck_volatile_info *mcck_info);
+
+/**
+ * kvm_s390_vcpu_crypto_reset_all
+ *
+ * Reset the crypto attributes for each vcpu. This can be done while the vcpus
+ * are running as each vcpu will be removed from SIE before resetting the crypt
+ * attributes and restored to SIE afterward.
+ *
+ * Note: The kvm->lock must be held while calling this function
+ *
+ * @kvm: the KVM guest
+ */
+void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm);
+#endif
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
new file mode 100644
index 000000000..9abdfb8b1
--- /dev/null
+++ b/arch/s390/kvm/priv.c
@@ -0,0 +1,1452 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * handling privileged instructions
+ *
+ * Copyright IBM Corp. 2008, 2018
+ *
+ * Author(s): Carsten Otte <cotte@de.ibm.com>
+ * Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#include <linux/kvm.h>
+#include <linux/gfp.h>
+#include <linux/errno.h>
+#include <linux/compat.h>
+#include <linux/mm_types.h>
+
+#include <asm/asm-offsets.h>
+#include <asm/facility.h>
+#include <asm/current.h>
+#include <asm/debug.h>
+#include <asm/ebcdic.h>
+#include <asm/sysinfo.h>
+#include <asm/pgtable.h>
+#include <asm/page-states.h>
+#include <asm/pgalloc.h>
+#include <asm/gmap.h>
+#include <asm/io.h>
+#include <asm/ptrace.h>
+#include <asm/sclp.h>
+#include "gaccess.h"
+#include "kvm-s390.h"
+#include "trace.h"
+
+static int handle_ri(struct kvm_vcpu *vcpu)
+{
+ vcpu->stat.instruction_ri++;
+
+ if (test_kvm_facility(vcpu->kvm, 64)) {
+ VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (lazy)");
+ vcpu->arch.sie_block->ecb3 |= ECB3_RI;
+ kvm_s390_retry_instr(vcpu);
+ return 0;
+ } else
+ return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
+}
+
+int kvm_s390_handle_aa(struct kvm_vcpu *vcpu)
+{
+ if ((vcpu->arch.sie_block->ipa & 0xf) <= 4)
+ return handle_ri(vcpu);
+ else
+ return -EOPNOTSUPP;
+}
+
+static int handle_gs(struct kvm_vcpu *vcpu)
+{
+ vcpu->stat.instruction_gs++;
+
+ if (test_kvm_facility(vcpu->kvm, 133)) {
+ VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (lazy)");
+ preempt_disable();
+ __ctl_set_bit(2, 4);
+ current->thread.gs_cb = (struct gs_cb *)&vcpu->run->s.regs.gscb;
+ restore_gs_cb(current->thread.gs_cb);
+ preempt_enable();
+ vcpu->arch.sie_block->ecb |= ECB_GS;
+ vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
+ vcpu->arch.gs_enabled = 1;
+ kvm_s390_retry_instr(vcpu);
+ return 0;
+ } else
+ return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
+}
+
+int kvm_s390_handle_e3(struct kvm_vcpu *vcpu)
+{
+ int code = vcpu->arch.sie_block->ipb & 0xff;
+
+ if (code == 0x49 || code == 0x4d)
+ return handle_gs(vcpu);
+ else
+ return -EOPNOTSUPP;
+}
+/* Handle SCK (SET CLOCK) interception */
+static int handle_set_clock(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_vm_tod_clock gtod = { 0 };
+ int rc;
+ u8 ar;
+ u64 op2;
+
+ vcpu->stat.instruction_sck++;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ op2 = kvm_s390_get_base_disp_s(vcpu, &ar);
+ if (op2 & 7) /* Operand must be on a doubleword boundary */
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ rc = read_guest(vcpu, op2, ar, &gtod.tod, sizeof(gtod.tod));
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
+
+ VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", gtod.tod);
+ kvm_s390_set_tod_clock(vcpu->kvm, &gtod);
+
+ kvm_s390_set_psw_cc(vcpu, 0);
+ return 0;
+}
+
+static int handle_set_prefix(struct kvm_vcpu *vcpu)
+{
+ u64 operand2;
+ u32 address;
+ int rc;
+ u8 ar;
+
+ vcpu->stat.instruction_spx++;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ operand2 = kvm_s390_get_base_disp_s(vcpu, &ar);
+
+ /* must be word boundary */
+ if (operand2 & 3)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ /* get the value */
+ rc = read_guest(vcpu, operand2, ar, &address, sizeof(address));
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
+
+ address &= 0x7fffe000u;
+
+ /*
+ * Make sure the new value is valid memory. We only need to check the
+ * first page, since address is 8k aligned and memory pieces are always
+ * at least 1MB aligned and have at least a size of 1MB.
+ */
+ if (kvm_is_error_gpa(vcpu->kvm, address))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+ kvm_s390_set_prefix(vcpu, address);
+ trace_kvm_s390_handle_prefix(vcpu, 1, address);
+ return 0;
+}
+
+static int handle_store_prefix(struct kvm_vcpu *vcpu)
+{
+ u64 operand2;
+ u32 address;
+ int rc;
+ u8 ar;
+
+ vcpu->stat.instruction_stpx++;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ operand2 = kvm_s390_get_base_disp_s(vcpu, &ar);
+
+ /* must be word boundary */
+ if (operand2 & 3)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ address = kvm_s390_get_prefix(vcpu);
+
+ /* get the value */
+ rc = write_guest(vcpu, operand2, ar, &address, sizeof(address));
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
+
+ VCPU_EVENT(vcpu, 3, "STPX: storing prefix 0x%x into 0x%llx", address, operand2);
+ trace_kvm_s390_handle_prefix(vcpu, 0, address);
+ return 0;
+}
+
+static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
+{
+ u16 vcpu_id = vcpu->vcpu_id;
+ u64 ga;
+ int rc;
+ u8 ar;
+
+ vcpu->stat.instruction_stap++;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ ga = kvm_s390_get_base_disp_s(vcpu, &ar);
+
+ if (ga & 1)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ rc = write_guest(vcpu, ga, ar, &vcpu_id, sizeof(vcpu_id));
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
+
+ VCPU_EVENT(vcpu, 3, "STAP: storing cpu address (%u) to 0x%llx", vcpu_id, ga);
+ trace_kvm_s390_handle_stap(vcpu, ga);
+ return 0;
+}
+
+int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu)
+{
+ int rc;
+
+ trace_kvm_s390_skey_related_inst(vcpu);
+ /* Already enabled? */
+ if (vcpu->arch.skey_enabled)
+ return 0;
+
+ rc = s390_enable_skey();
+ VCPU_EVENT(vcpu, 3, "enabling storage keys for guest: %d", rc);
+ if (rc)
+ return rc;
+
+ if (kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS))
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_KSS);
+ if (!vcpu->kvm->arch.use_skf)
+ vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
+ else
+ vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
+ vcpu->arch.skey_enabled = true;
+ return 0;
+}
+
+static int try_handle_skey(struct kvm_vcpu *vcpu)
+{
+ int rc;
+
+ rc = kvm_s390_skey_check_enable(vcpu);
+ if (rc)
+ return rc;
+ if (vcpu->kvm->arch.use_skf) {
+ /* with storage-key facility, SIE interprets it for us */
+ kvm_s390_retry_instr(vcpu);
+ VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
+ return -EAGAIN;
+ }
+ return 0;
+}
+
+static int handle_iske(struct kvm_vcpu *vcpu)
+{
+ unsigned long gaddr, vmaddr;
+ unsigned char key;
+ int reg1, reg2;
+ bool unlocked;
+ int rc;
+
+ vcpu->stat.instruction_iske++;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ rc = try_handle_skey(vcpu);
+ if (rc)
+ return rc != -EAGAIN ? rc : 0;
+
+ kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+
+ gaddr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
+ gaddr = kvm_s390_logical_to_effective(vcpu, gaddr);
+ gaddr = kvm_s390_real_to_abs(vcpu, gaddr);
+ vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gaddr));
+ if (kvm_is_error_hva(vmaddr))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+retry:
+ unlocked = false;
+ down_read(&current->mm->mmap_sem);
+ rc = get_guest_storage_key(current->mm, vmaddr, &key);
+
+ if (rc) {
+ rc = fixup_user_fault(current, current->mm, vmaddr,
+ FAULT_FLAG_WRITE, &unlocked);
+ if (!rc) {
+ up_read(&current->mm->mmap_sem);
+ goto retry;
+ }
+ }
+ up_read(&current->mm->mmap_sem);
+ if (rc == -EFAULT)
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ if (rc < 0)
+ return rc;
+ vcpu->run->s.regs.gprs[reg1] &= ~0xff;
+ vcpu->run->s.regs.gprs[reg1] |= key;
+ return 0;
+}
+
+static int handle_rrbe(struct kvm_vcpu *vcpu)
+{
+ unsigned long vmaddr, gaddr;
+ int reg1, reg2;
+ bool unlocked;
+ int rc;
+
+ vcpu->stat.instruction_rrbe++;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ rc = try_handle_skey(vcpu);
+ if (rc)
+ return rc != -EAGAIN ? rc : 0;
+
+ kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+
+ gaddr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
+ gaddr = kvm_s390_logical_to_effective(vcpu, gaddr);
+ gaddr = kvm_s390_real_to_abs(vcpu, gaddr);
+ vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gaddr));
+ if (kvm_is_error_hva(vmaddr))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+retry:
+ unlocked = false;
+ down_read(&current->mm->mmap_sem);
+ rc = reset_guest_reference_bit(current->mm, vmaddr);
+ if (rc < 0) {
+ rc = fixup_user_fault(current, current->mm, vmaddr,
+ FAULT_FLAG_WRITE, &unlocked);
+ if (!rc) {
+ up_read(&current->mm->mmap_sem);
+ goto retry;
+ }
+ }
+ up_read(&current->mm->mmap_sem);
+ if (rc == -EFAULT)
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ if (rc < 0)
+ return rc;
+ kvm_s390_set_psw_cc(vcpu, rc);
+ return 0;
+}
+
+#define SSKE_NQ 0x8
+#define SSKE_MR 0x4
+#define SSKE_MC 0x2
+#define SSKE_MB 0x1
+static int handle_sske(struct kvm_vcpu *vcpu)
+{
+ unsigned char m3 = vcpu->arch.sie_block->ipb >> 28;
+ unsigned long start, end;
+ unsigned char key, oldkey;
+ int reg1, reg2;
+ bool unlocked;
+ int rc;
+
+ vcpu->stat.instruction_sske++;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ rc = try_handle_skey(vcpu);
+ if (rc)
+ return rc != -EAGAIN ? rc : 0;
+
+ if (!test_kvm_facility(vcpu->kvm, 8))
+ m3 &= ~SSKE_MB;
+ if (!test_kvm_facility(vcpu->kvm, 10))
+ m3 &= ~(SSKE_MC | SSKE_MR);
+ if (!test_kvm_facility(vcpu->kvm, 14))
+ m3 &= ~SSKE_NQ;
+
+ kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+
+ key = vcpu->run->s.regs.gprs[reg1] & 0xfe;
+ start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
+ start = kvm_s390_logical_to_effective(vcpu, start);
+ if (m3 & SSKE_MB) {
+ /* start already designates an absolute address */
+ end = (start + _SEGMENT_SIZE) & ~(_SEGMENT_SIZE - 1);
+ } else {
+ start = kvm_s390_real_to_abs(vcpu, start);
+ end = start + PAGE_SIZE;
+ }
+
+ while (start != end) {
+ unsigned long vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start));
+ unlocked = false;
+
+ if (kvm_is_error_hva(vmaddr))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+ down_read(&current->mm->mmap_sem);
+ rc = cond_set_guest_storage_key(current->mm, vmaddr, key, &oldkey,
+ m3 & SSKE_NQ, m3 & SSKE_MR,
+ m3 & SSKE_MC);
+
+ if (rc < 0) {
+ rc = fixup_user_fault(current, current->mm, vmaddr,
+ FAULT_FLAG_WRITE, &unlocked);
+ rc = !rc ? -EAGAIN : rc;
+ }
+ up_read(&current->mm->mmap_sem);
+ if (rc == -EFAULT)
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ if (rc == -EAGAIN)
+ continue;
+ if (rc < 0)
+ return rc;
+ start += PAGE_SIZE;
+ }
+
+ if (m3 & (SSKE_MC | SSKE_MR)) {
+ if (m3 & SSKE_MB) {
+ /* skey in reg1 is unpredictable */
+ kvm_s390_set_psw_cc(vcpu, 3);
+ } else {
+ kvm_s390_set_psw_cc(vcpu, rc);
+ vcpu->run->s.regs.gprs[reg1] &= ~0xff00UL;
+ vcpu->run->s.regs.gprs[reg1] |= (u64) oldkey << 8;
+ }
+ }
+ if (m3 & SSKE_MB) {
+ if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT)
+ vcpu->run->s.regs.gprs[reg2] &= ~PAGE_MASK;
+ else
+ vcpu->run->s.regs.gprs[reg2] &= ~0xfffff000UL;
+ end = kvm_s390_logical_to_effective(vcpu, end);
+ vcpu->run->s.regs.gprs[reg2] |= end;
+ }
+ return 0;
+}
+
+static int handle_ipte_interlock(struct kvm_vcpu *vcpu)
+{
+ vcpu->stat.instruction_ipte_interlock++;
+ if (psw_bits(vcpu->arch.sie_block->gpsw).pstate)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+ wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu));
+ kvm_s390_retry_instr(vcpu);
+ VCPU_EVENT(vcpu, 4, "%s", "retrying ipte interlock operation");
+ return 0;
+}
+
+static int handle_test_block(struct kvm_vcpu *vcpu)
+{
+ gpa_t addr;
+ int reg2;
+
+ vcpu->stat.instruction_tb++;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ kvm_s390_get_regs_rre(vcpu, NULL, &reg2);
+ addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
+ addr = kvm_s390_logical_to_effective(vcpu, addr);
+ if (kvm_s390_check_low_addr_prot_real(vcpu, addr))
+ return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
+ addr = kvm_s390_real_to_abs(vcpu, addr);
+
+ if (kvm_is_error_gpa(vcpu->kvm, addr))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ /*
+ * We don't expect errors on modern systems, and do not care
+ * about storage keys (yet), so let's just clear the page.
+ */
+ if (kvm_clear_guest(vcpu->kvm, addr, PAGE_SIZE))
+ return -EFAULT;
+ kvm_s390_set_psw_cc(vcpu, 0);
+ vcpu->run->s.regs.gprs[0] = 0;
+ return 0;
+}
+
+static int handle_tpi(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_interrupt_info *inti;
+ unsigned long len;
+ u32 tpi_data[3];
+ int rc;
+ u64 addr;
+ u8 ar;
+
+ vcpu->stat.instruction_tpi++;
+
+ addr = kvm_s390_get_base_disp_s(vcpu, &ar);
+ if (addr & 3)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->arch.sie_block->gcr[6], 0);
+ if (!inti) {
+ kvm_s390_set_psw_cc(vcpu, 0);
+ return 0;
+ }
+
+ tpi_data[0] = inti->io.subchannel_id << 16 | inti->io.subchannel_nr;
+ tpi_data[1] = inti->io.io_int_parm;
+ tpi_data[2] = inti->io.io_int_word;
+ if (addr) {
+ /*
+ * Store the two-word I/O interruption code into the
+ * provided area.
+ */
+ len = sizeof(tpi_data) - 4;
+ rc = write_guest(vcpu, addr, ar, &tpi_data, len);
+ if (rc) {
+ rc = kvm_s390_inject_prog_cond(vcpu, rc);
+ goto reinject_interrupt;
+ }
+ } else {
+ /*
+ * Store the three-word I/O interruption code into
+ * the appropriate lowcore area.
+ */
+ len = sizeof(tpi_data);
+ if (write_guest_lc(vcpu, __LC_SUBCHANNEL_ID, &tpi_data, len)) {
+ /* failed writes to the low core are not recoverable */
+ rc = -EFAULT;
+ goto reinject_interrupt;
+ }
+ }
+
+ /* irq was successfully handed to the guest */
+ kfree(inti);
+ kvm_s390_set_psw_cc(vcpu, 1);
+ return 0;
+reinject_interrupt:
+ /*
+ * If we encounter a problem storing the interruption code, the
+ * instruction is suppressed from the guest's view: reinject the
+ * interrupt.
+ */
+ if (kvm_s390_reinject_io_int(vcpu->kvm, inti)) {
+ kfree(inti);
+ rc = -EFAULT;
+ }
+ /* don't set the cc, a pgm irq was injected or we drop to user space */
+ return rc ? -EFAULT : 0;
+}
+
+static int handle_tsch(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_interrupt_info *inti = NULL;
+ const u64 isc_mask = 0xffUL << 24; /* all iscs set */
+
+ vcpu->stat.instruction_tsch++;
+
+ /* a valid schid has at least one bit set */
+ if (vcpu->run->s.regs.gprs[1])
+ inti = kvm_s390_get_io_int(vcpu->kvm, isc_mask,
+ vcpu->run->s.regs.gprs[1]);
+
+ /*
+ * Prepare exit to userspace.
+ * We indicate whether we dequeued a pending I/O interrupt
+ * so that userspace can re-inject it if the instruction gets
+ * a program check. While this may re-order the pending I/O
+ * interrupts, this is no problem since the priority is kept
+ * intact.
+ */
+ vcpu->run->exit_reason = KVM_EXIT_S390_TSCH;
+ vcpu->run->s390_tsch.dequeued = !!inti;
+ if (inti) {
+ vcpu->run->s390_tsch.subchannel_id = inti->io.subchannel_id;
+ vcpu->run->s390_tsch.subchannel_nr = inti->io.subchannel_nr;
+ vcpu->run->s390_tsch.io_int_parm = inti->io.io_int_parm;
+ vcpu->run->s390_tsch.io_int_word = inti->io.io_int_word;
+ }
+ vcpu->run->s390_tsch.ipb = vcpu->arch.sie_block->ipb;
+ kfree(inti);
+ return -EREMOTE;
+}
+
+static int handle_io_inst(struct kvm_vcpu *vcpu)
+{
+ VCPU_EVENT(vcpu, 4, "%s", "I/O instruction");
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ if (vcpu->kvm->arch.css_support) {
+ /*
+ * Most I/O instructions will be handled by userspace.
+ * Exceptions are tpi and the interrupt portion of tsch.
+ */
+ if (vcpu->arch.sie_block->ipa == 0xb236)
+ return handle_tpi(vcpu);
+ if (vcpu->arch.sie_block->ipa == 0xb235)
+ return handle_tsch(vcpu);
+ /* Handle in userspace. */
+ vcpu->stat.instruction_io_other++;
+ return -EOPNOTSUPP;
+ } else {
+ /*
+ * Set condition code 3 to stop the guest from issuing channel
+ * I/O instructions.
+ */
+ kvm_s390_set_psw_cc(vcpu, 3);
+ return 0;
+ }
+}
+
+static int handle_stfl(struct kvm_vcpu *vcpu)
+{
+ int rc;
+ unsigned int fac;
+
+ vcpu->stat.instruction_stfl++;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ /*
+ * We need to shift the lower 32 facility bits (bit 0-31) from a u64
+ * into a u32 memory representation. They will remain bits 0-31.
+ */
+ fac = *vcpu->kvm->arch.model.fac_list >> 32;
+ rc = write_guest_lc(vcpu, offsetof(struct lowcore, stfl_fac_list),
+ &fac, sizeof(fac));
+ if (rc)
+ return rc;
+ VCPU_EVENT(vcpu, 3, "STFL: store facility list 0x%x", fac);
+ trace_kvm_s390_handle_stfl(vcpu, fac);
+ return 0;
+}
+
+#define PSW_MASK_ADDR_MODE (PSW_MASK_EA | PSW_MASK_BA)
+#define PSW_MASK_UNASSIGNED 0xb80800fe7fffffffUL
+#define PSW_ADDR_24 0x0000000000ffffffUL
+#define PSW_ADDR_31 0x000000007fffffffUL
+
+int is_valid_psw(psw_t *psw)
+{
+ if (psw->mask & PSW_MASK_UNASSIGNED)
+ return 0;
+ if ((psw->mask & PSW_MASK_ADDR_MODE) == PSW_MASK_BA) {
+ if (psw->addr & ~PSW_ADDR_31)
+ return 0;
+ }
+ if (!(psw->mask & PSW_MASK_ADDR_MODE) && (psw->addr & ~PSW_ADDR_24))
+ return 0;
+ if ((psw->mask & PSW_MASK_ADDR_MODE) == PSW_MASK_EA)
+ return 0;
+ if (psw->addr & 1)
+ return 0;
+ return 1;
+}
+
+int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
+{
+ psw_t *gpsw = &vcpu->arch.sie_block->gpsw;
+ psw_compat_t new_psw;
+ u64 addr;
+ int rc;
+ u8 ar;
+
+ vcpu->stat.instruction_lpsw++;
+
+ if (gpsw->mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ addr = kvm_s390_get_base_disp_s(vcpu, &ar);
+ if (addr & 7)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ rc = read_guest(vcpu, addr, ar, &new_psw, sizeof(new_psw));
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
+ if (!(new_psw.mask & PSW32_MASK_BASE))
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ gpsw->mask = (new_psw.mask & ~PSW32_MASK_BASE) << 32;
+ gpsw->mask |= new_psw.addr & PSW32_ADDR_AMODE;
+ gpsw->addr = new_psw.addr & ~PSW32_ADDR_AMODE;
+ if (!is_valid_psw(gpsw))
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ return 0;
+}
+
+static int handle_lpswe(struct kvm_vcpu *vcpu)
+{
+ psw_t new_psw;
+ u64 addr;
+ int rc;
+ u8 ar;
+
+ vcpu->stat.instruction_lpswe++;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ addr = kvm_s390_get_base_disp_s(vcpu, &ar);
+ if (addr & 7)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ rc = read_guest(vcpu, addr, ar, &new_psw, sizeof(new_psw));
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
+ vcpu->arch.sie_block->gpsw = new_psw;
+ if (!is_valid_psw(&vcpu->arch.sie_block->gpsw))
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ return 0;
+}
+
+static int handle_stidp(struct kvm_vcpu *vcpu)
+{
+ u64 stidp_data = vcpu->kvm->arch.model.cpuid;
+ u64 operand2;
+ int rc;
+ u8 ar;
+
+ vcpu->stat.instruction_stidp++;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ operand2 = kvm_s390_get_base_disp_s(vcpu, &ar);
+
+ if (operand2 & 7)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ rc = write_guest(vcpu, operand2, ar, &stidp_data, sizeof(stidp_data));
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
+
+ VCPU_EVENT(vcpu, 3, "STIDP: store cpu id 0x%llx", stidp_data);
+ return 0;
+}
+
+static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem)
+{
+ int cpus = 0;
+ int n;
+
+ cpus = atomic_read(&vcpu->kvm->online_vcpus);
+
+ /* deal with other level 3 hypervisors */
+ if (stsi(mem, 3, 2, 2))
+ mem->count = 0;
+ if (mem->count < 8)
+ mem->count++;
+ for (n = mem->count - 1; n > 0 ; n--)
+ memcpy(&mem->vm[n], &mem->vm[n - 1], sizeof(mem->vm[0]));
+
+ memset(&mem->vm[0], 0, sizeof(mem->vm[0]));
+ mem->vm[0].cpus_total = cpus;
+ mem->vm[0].cpus_configured = cpus;
+ mem->vm[0].cpus_standby = 0;
+ mem->vm[0].cpus_reserved = 0;
+ mem->vm[0].caf = 1000;
+ memcpy(mem->vm[0].name, "KVMguest", 8);
+ ASCEBC(mem->vm[0].name, 8);
+ memcpy(mem->vm[0].cpi, "KVM/Linux ", 16);
+ ASCEBC(mem->vm[0].cpi, 16);
+}
+
+static void insert_stsi_usr_data(struct kvm_vcpu *vcpu, u64 addr, u8 ar,
+ u8 fc, u8 sel1, u16 sel2)
+{
+ vcpu->run->exit_reason = KVM_EXIT_S390_STSI;
+ vcpu->run->s390_stsi.addr = addr;
+ vcpu->run->s390_stsi.ar = ar;
+ vcpu->run->s390_stsi.fc = fc;
+ vcpu->run->s390_stsi.sel1 = sel1;
+ vcpu->run->s390_stsi.sel2 = sel2;
+}
+
+static int handle_stsi(struct kvm_vcpu *vcpu)
+{
+ int fc = (vcpu->run->s.regs.gprs[0] & 0xf0000000) >> 28;
+ int sel1 = vcpu->run->s.regs.gprs[0] & 0xff;
+ int sel2 = vcpu->run->s.regs.gprs[1] & 0xffff;
+ unsigned long mem = 0;
+ u64 operand2;
+ int rc = 0;
+ u8 ar;
+
+ vcpu->stat.instruction_stsi++;
+ VCPU_EVENT(vcpu, 3, "STSI: fc: %u sel1: %u sel2: %u", fc, sel1, sel2);
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ if (fc > 3) {
+ kvm_s390_set_psw_cc(vcpu, 3);
+ return 0;
+ }
+
+ if (vcpu->run->s.regs.gprs[0] & 0x0fffff00
+ || vcpu->run->s.regs.gprs[1] & 0xffff0000)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ if (fc == 0) {
+ vcpu->run->s.regs.gprs[0] = 3 << 28;
+ kvm_s390_set_psw_cc(vcpu, 0);
+ return 0;
+ }
+
+ operand2 = kvm_s390_get_base_disp_s(vcpu, &ar);
+
+ if (operand2 & 0xfff)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ switch (fc) {
+ case 1: /* same handling for 1 and 2 */
+ case 2:
+ mem = get_zeroed_page(GFP_KERNEL);
+ if (!mem)
+ goto out_no_data;
+ if (stsi((void *) mem, fc, sel1, sel2))
+ goto out_no_data;
+ break;
+ case 3:
+ if (sel1 != 2 || sel2 != 2)
+ goto out_no_data;
+ mem = get_zeroed_page(GFP_KERNEL);
+ if (!mem)
+ goto out_no_data;
+ handle_stsi_3_2_2(vcpu, (void *) mem);
+ break;
+ }
+
+ rc = write_guest(vcpu, operand2, ar, (void *)mem, PAGE_SIZE);
+ if (rc) {
+ rc = kvm_s390_inject_prog_cond(vcpu, rc);
+ goto out;
+ }
+ if (vcpu->kvm->arch.user_stsi) {
+ insert_stsi_usr_data(vcpu, operand2, ar, fc, sel1, sel2);
+ rc = -EREMOTE;
+ }
+ trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2);
+ free_page(mem);
+ kvm_s390_set_psw_cc(vcpu, 0);
+ vcpu->run->s.regs.gprs[0] = 0;
+ return rc;
+out_no_data:
+ kvm_s390_set_psw_cc(vcpu, 3);
+out:
+ free_page(mem);
+ return rc;
+}
+
+int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)
+{
+ switch (vcpu->arch.sie_block->ipa & 0x00ff) {
+ case 0x02:
+ return handle_stidp(vcpu);
+ case 0x04:
+ return handle_set_clock(vcpu);
+ case 0x10:
+ return handle_set_prefix(vcpu);
+ case 0x11:
+ return handle_store_prefix(vcpu);
+ case 0x12:
+ return handle_store_cpu_address(vcpu);
+ case 0x14:
+ return kvm_s390_handle_vsie(vcpu);
+ case 0x21:
+ case 0x50:
+ return handle_ipte_interlock(vcpu);
+ case 0x29:
+ return handle_iske(vcpu);
+ case 0x2a:
+ return handle_rrbe(vcpu);
+ case 0x2b:
+ return handle_sske(vcpu);
+ case 0x2c:
+ return handle_test_block(vcpu);
+ case 0x30:
+ case 0x31:
+ case 0x32:
+ case 0x33:
+ case 0x34:
+ case 0x35:
+ case 0x36:
+ case 0x37:
+ case 0x38:
+ case 0x39:
+ case 0x3a:
+ case 0x3b:
+ case 0x3c:
+ case 0x5f:
+ case 0x74:
+ case 0x76:
+ return handle_io_inst(vcpu);
+ case 0x56:
+ return handle_sthyi(vcpu);
+ case 0x7d:
+ return handle_stsi(vcpu);
+ case 0xb1:
+ return handle_stfl(vcpu);
+ case 0xb2:
+ return handle_lpswe(vcpu);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int handle_epsw(struct kvm_vcpu *vcpu)
+{
+ int reg1, reg2;
+
+ vcpu->stat.instruction_epsw++;
+
+ kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+
+ /* This basically extracts the mask half of the psw. */
+ vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000UL;
+ vcpu->run->s.regs.gprs[reg1] |= vcpu->arch.sie_block->gpsw.mask >> 32;
+ if (reg2) {
+ vcpu->run->s.regs.gprs[reg2] &= 0xffffffff00000000UL;
+ vcpu->run->s.regs.gprs[reg2] |=
+ vcpu->arch.sie_block->gpsw.mask & 0x00000000ffffffffUL;
+ }
+ return 0;
+}
+
+#define PFMF_RESERVED 0xfffc0101UL
+#define PFMF_SK 0x00020000UL
+#define PFMF_CF 0x00010000UL
+#define PFMF_UI 0x00008000UL
+#define PFMF_FSC 0x00007000UL
+#define PFMF_NQ 0x00000800UL
+#define PFMF_MR 0x00000400UL
+#define PFMF_MC 0x00000200UL
+#define PFMF_KEY 0x000000feUL
+
+static int handle_pfmf(struct kvm_vcpu *vcpu)
+{
+ bool mr = false, mc = false, nq;
+ int reg1, reg2;
+ unsigned long start, end;
+ unsigned char key;
+
+ vcpu->stat.instruction_pfmf++;
+
+ kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+
+ if (!test_kvm_facility(vcpu->kvm, 8))
+ return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ if (vcpu->run->s.regs.gprs[reg1] & PFMF_RESERVED)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ /* Only provide non-quiescing support if enabled for the guest */
+ if (vcpu->run->s.regs.gprs[reg1] & PFMF_NQ &&
+ !test_kvm_facility(vcpu->kvm, 14))
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ /* Only provide conditional-SSKE support if enabled for the guest */
+ if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK &&
+ test_kvm_facility(vcpu->kvm, 10)) {
+ mr = vcpu->run->s.regs.gprs[reg1] & PFMF_MR;
+ mc = vcpu->run->s.regs.gprs[reg1] & PFMF_MC;
+ }
+
+ nq = vcpu->run->s.regs.gprs[reg1] & PFMF_NQ;
+ key = vcpu->run->s.regs.gprs[reg1] & PFMF_KEY;
+ start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
+ start = kvm_s390_logical_to_effective(vcpu, start);
+
+ if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
+ if (kvm_s390_check_low_addr_prot_real(vcpu, start))
+ return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
+ }
+
+ switch (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
+ case 0x00000000:
+ /* only 4k frames specify a real address */
+ start = kvm_s390_real_to_abs(vcpu, start);
+ end = (start + PAGE_SIZE) & ~(PAGE_SIZE - 1);
+ break;
+ case 0x00001000:
+ end = (start + _SEGMENT_SIZE) & ~(_SEGMENT_SIZE - 1);
+ break;
+ case 0x00002000:
+ /* only support 2G frame size if EDAT2 is available and we are
+ not in 24-bit addressing mode */
+ if (!test_kvm_facility(vcpu->kvm, 78) ||
+ psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_24BIT)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ end = (start + _REGION3_SIZE) & ~(_REGION3_SIZE - 1);
+ break;
+ default:
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ }
+
+ while (start != end) {
+ unsigned long vmaddr;
+ bool unlocked = false;
+
+ /* Translate guest address to host address */
+ vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start));
+ if (kvm_is_error_hva(vmaddr))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+ if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
+ if (kvm_clear_guest(vcpu->kvm, start, PAGE_SIZE))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ }
+
+ if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK) {
+ int rc = kvm_s390_skey_check_enable(vcpu);
+
+ if (rc)
+ return rc;
+ down_read(&current->mm->mmap_sem);
+ rc = cond_set_guest_storage_key(current->mm, vmaddr,
+ key, NULL, nq, mr, mc);
+ if (rc < 0) {
+ rc = fixup_user_fault(current, current->mm, vmaddr,
+ FAULT_FLAG_WRITE, &unlocked);
+ rc = !rc ? -EAGAIN : rc;
+ }
+ up_read(&current->mm->mmap_sem);
+ if (rc == -EFAULT)
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ if (rc == -EAGAIN)
+ continue;
+ if (rc < 0)
+ return rc;
+ }
+ start += PAGE_SIZE;
+ }
+ if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
+ if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT) {
+ vcpu->run->s.regs.gprs[reg2] = end;
+ } else {
+ vcpu->run->s.regs.gprs[reg2] &= ~0xffffffffUL;
+ end = kvm_s390_logical_to_effective(vcpu, end);
+ vcpu->run->s.regs.gprs[reg2] |= end;
+ }
+ }
+ return 0;
+}
+
+/*
+ * Must be called with relevant read locks held (kvm->mm->mmap_sem, kvm->srcu)
+ */
+static inline int __do_essa(struct kvm_vcpu *vcpu, const int orc)
+{
+ int r1, r2, nappended, entries;
+ unsigned long gfn, hva, res, pgstev, ptev;
+ unsigned long *cbrlo;
+
+ /*
+ * We don't need to set SD.FPF.SK to 1 here, because if we have a
+ * machine check here we either handle it or crash
+ */
+
+ kvm_s390_get_regs_rre(vcpu, &r1, &r2);
+ gfn = vcpu->run->s.regs.gprs[r2] >> PAGE_SHIFT;
+ hva = gfn_to_hva(vcpu->kvm, gfn);
+ entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3;
+
+ if (kvm_is_error_hva(hva))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+ nappended = pgste_perform_essa(vcpu->kvm->mm, hva, orc, &ptev, &pgstev);
+ if (nappended < 0) {
+ res = orc ? 0x10 : 0;
+ vcpu->run->s.regs.gprs[r1] = res; /* Exception Indication */
+ return 0;
+ }
+ res = (pgstev & _PGSTE_GPS_USAGE_MASK) >> 22;
+ /*
+ * Set the block-content state part of the result. 0 means resident, so
+ * nothing to do if the page is valid. 2 is for preserved pages
+ * (non-present and non-zero), and 3 for zero pages (non-present and
+ * zero).
+ */
+ if (ptev & _PAGE_INVALID) {
+ res |= 2;
+ if (pgstev & _PGSTE_GPS_ZERO)
+ res |= 1;
+ }
+ if (pgstev & _PGSTE_GPS_NODAT)
+ res |= 0x20;
+ vcpu->run->s.regs.gprs[r1] = res;
+ /*
+ * It is possible that all the normal 511 slots were full, in which case
+ * we will now write in the 512th slot, which is reserved for host use.
+ * In both cases we let the normal essa handling code process all the
+ * slots, including the reserved one, if needed.
+ */
+ if (nappended > 0) {
+ cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo & PAGE_MASK);
+ cbrlo[entries] = gfn << PAGE_SHIFT;
+ }
+
+ if (orc) {
+ struct kvm_memory_slot *ms = gfn_to_memslot(vcpu->kvm, gfn);
+
+ /* Increment only if we are really flipping the bit */
+ if (ms && !test_and_set_bit(gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
+ atomic64_inc(&vcpu->kvm->arch.cmma_dirty_pages);
+ }
+
+ return nappended;
+}
+
+static int handle_essa(struct kvm_vcpu *vcpu)
+{
+ /* entries expected to be 1FF */
+ int entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3;
+ unsigned long *cbrlo;
+ struct gmap *gmap;
+ int i, orc;
+
+ VCPU_EVENT(vcpu, 4, "ESSA: release %d pages", entries);
+ gmap = vcpu->arch.gmap;
+ vcpu->stat.instruction_essa++;
+ if (!vcpu->kvm->arch.use_cmma)
+ return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+ /* Check for invalid operation request code */
+ orc = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28;
+ /* ORCs 0-6 are always valid */
+ if (orc > (test_kvm_facility(vcpu->kvm, 147) ? ESSA_SET_STABLE_NODAT
+ : ESSA_SET_STABLE_IF_RESIDENT))
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ if (!vcpu->kvm->arch.migration_mode) {
+ /*
+ * CMMA is enabled in the KVM settings, but is disabled in
+ * the SIE block and in the mm_context, and we are not doing
+ * a migration. Enable CMMA in the mm_context.
+ * Since we need to take a write lock to write to the context
+ * to avoid races with storage keys handling, we check if the
+ * value really needs to be written to; if the value is
+ * already correct, we do nothing and avoid the lock.
+ */
+ if (vcpu->kvm->mm->context.uses_cmm == 0) {
+ down_write(&vcpu->kvm->mm->mmap_sem);
+ vcpu->kvm->mm->context.uses_cmm = 1;
+ up_write(&vcpu->kvm->mm->mmap_sem);
+ }
+ /*
+ * If we are here, we are supposed to have CMMA enabled in
+ * the SIE block. Enabling CMMA works on a per-CPU basis,
+ * while the context use_cmma flag is per process.
+ * It's possible that the context flag is enabled and the
+ * SIE flag is not, so we set the flag always; if it was
+ * already set, nothing changes, otherwise we enable it
+ * on this CPU too.
+ */
+ vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
+ /* Retry the ESSA instruction */
+ kvm_s390_retry_instr(vcpu);
+ } else {
+ int srcu_idx;
+
+ down_read(&vcpu->kvm->mm->mmap_sem);
+ srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ i = __do_essa(vcpu, orc);
+ srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+ up_read(&vcpu->kvm->mm->mmap_sem);
+ if (i < 0)
+ return i;
+ /* Account for the possible extra cbrl entry */
+ entries += i;
+ }
+ vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */
+ cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo);
+ down_read(&gmap->mm->mmap_sem);
+ for (i = 0; i < entries; ++i)
+ __gmap_zap(gmap, cbrlo[i]);
+ up_read(&gmap->mm->mmap_sem);
+ return 0;
+}
+
+int kvm_s390_handle_b9(struct kvm_vcpu *vcpu)
+{
+ switch (vcpu->arch.sie_block->ipa & 0x00ff) {
+ case 0x8a:
+ case 0x8e:
+ case 0x8f:
+ return handle_ipte_interlock(vcpu);
+ case 0x8d:
+ return handle_epsw(vcpu);
+ case 0xab:
+ return handle_essa(vcpu);
+ case 0xaf:
+ return handle_pfmf(vcpu);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
+{
+ int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+ int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+ int reg, rc, nr_regs;
+ u32 ctl_array[16];
+ u64 ga;
+ u8 ar;
+
+ vcpu->stat.instruction_lctl++;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ ga = kvm_s390_get_base_disp_rs(vcpu, &ar);
+
+ if (ga & 3)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ VCPU_EVENT(vcpu, 4, "LCTL: r1:%d, r3:%d, addr: 0x%llx", reg1, reg3, ga);
+ trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, ga);
+
+ nr_regs = ((reg3 - reg1) & 0xf) + 1;
+ rc = read_guest(vcpu, ga, ar, ctl_array, nr_regs * sizeof(u32));
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
+ reg = reg1;
+ nr_regs = 0;
+ do {
+ vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
+ vcpu->arch.sie_block->gcr[reg] |= ctl_array[nr_regs++];
+ if (reg == reg3)
+ break;
+ reg = (reg + 1) % 16;
+ } while (1);
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ return 0;
+}
+
+int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu)
+{
+ int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+ int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+ int reg, rc, nr_regs;
+ u32 ctl_array[16];
+ u64 ga;
+ u8 ar;
+
+ vcpu->stat.instruction_stctl++;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ ga = kvm_s390_get_base_disp_rs(vcpu, &ar);
+
+ if (ga & 3)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ VCPU_EVENT(vcpu, 4, "STCTL r1:%d, r3:%d, addr: 0x%llx", reg1, reg3, ga);
+ trace_kvm_s390_handle_stctl(vcpu, 0, reg1, reg3, ga);
+
+ reg = reg1;
+ nr_regs = 0;
+ do {
+ ctl_array[nr_regs++] = vcpu->arch.sie_block->gcr[reg];
+ if (reg == reg3)
+ break;
+ reg = (reg + 1) % 16;
+ } while (1);
+ rc = write_guest(vcpu, ga, ar, ctl_array, nr_regs * sizeof(u32));
+ return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0;
+}
+
+static int handle_lctlg(struct kvm_vcpu *vcpu)
+{
+ int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+ int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+ int reg, rc, nr_regs;
+ u64 ctl_array[16];
+ u64 ga;
+ u8 ar;
+
+ vcpu->stat.instruction_lctlg++;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ ga = kvm_s390_get_base_disp_rsy(vcpu, &ar);
+
+ if (ga & 7)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ VCPU_EVENT(vcpu, 4, "LCTLG: r1:%d, r3:%d, addr: 0x%llx", reg1, reg3, ga);
+ trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, ga);
+
+ nr_regs = ((reg3 - reg1) & 0xf) + 1;
+ rc = read_guest(vcpu, ga, ar, ctl_array, nr_regs * sizeof(u64));
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
+ reg = reg1;
+ nr_regs = 0;
+ do {
+ vcpu->arch.sie_block->gcr[reg] = ctl_array[nr_regs++];
+ if (reg == reg3)
+ break;
+ reg = (reg + 1) % 16;
+ } while (1);
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ return 0;
+}
+
+static int handle_stctg(struct kvm_vcpu *vcpu)
+{
+ int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+ int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+ int reg, rc, nr_regs;
+ u64 ctl_array[16];
+ u64 ga;
+ u8 ar;
+
+ vcpu->stat.instruction_stctg++;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ ga = kvm_s390_get_base_disp_rsy(vcpu, &ar);
+
+ if (ga & 7)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ VCPU_EVENT(vcpu, 4, "STCTG r1:%d, r3:%d, addr: 0x%llx", reg1, reg3, ga);
+ trace_kvm_s390_handle_stctl(vcpu, 1, reg1, reg3, ga);
+
+ reg = reg1;
+ nr_regs = 0;
+ do {
+ ctl_array[nr_regs++] = vcpu->arch.sie_block->gcr[reg];
+ if (reg == reg3)
+ break;
+ reg = (reg + 1) % 16;
+ } while (1);
+ rc = write_guest(vcpu, ga, ar, ctl_array, nr_regs * sizeof(u64));
+ return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0;
+}
+
+int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
+{
+ switch (vcpu->arch.sie_block->ipb & 0x000000ff) {
+ case 0x25:
+ return handle_stctg(vcpu);
+ case 0x2f:
+ return handle_lctlg(vcpu);
+ case 0x60:
+ case 0x61:
+ case 0x62:
+ return handle_ri(vcpu);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int handle_tprot(struct kvm_vcpu *vcpu)
+{
+ u64 address1, address2;
+ unsigned long hva, gpa;
+ int ret = 0, cc = 0;
+ bool writable;
+ u8 ar;
+
+ vcpu->stat.instruction_tprot++;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ kvm_s390_get_base_disp_sse(vcpu, &address1, &address2, &ar, NULL);
+
+ /* we only handle the Linux memory detection case:
+ * access key == 0
+ * everything else goes to userspace. */
+ if (address2 & 0xf0)
+ return -EOPNOTSUPP;
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT)
+ ipte_lock(vcpu);
+ ret = guest_translate_address(vcpu, address1, ar, &gpa, GACC_STORE);
+ if (ret == PGM_PROTECTION) {
+ /* Write protected? Try again with read-only... */
+ cc = 1;
+ ret = guest_translate_address(vcpu, address1, ar, &gpa,
+ GACC_FETCH);
+ }
+ if (ret) {
+ if (ret == PGM_ADDRESSING || ret == PGM_TRANSLATION_SPEC) {
+ ret = kvm_s390_inject_program_int(vcpu, ret);
+ } else if (ret > 0) {
+ /* Translation not available */
+ kvm_s390_set_psw_cc(vcpu, 3);
+ ret = 0;
+ }
+ goto out_unlock;
+ }
+
+ hva = gfn_to_hva_prot(vcpu->kvm, gpa_to_gfn(gpa), &writable);
+ if (kvm_is_error_hva(hva)) {
+ ret = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ } else {
+ if (!writable)
+ cc = 1; /* Write not permitted ==> read-only */
+ kvm_s390_set_psw_cc(vcpu, cc);
+ /* Note: CC2 only occurs for storage keys (not supported yet) */
+ }
+out_unlock:
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT)
+ ipte_unlock(vcpu);
+ return ret;
+}
+
+int kvm_s390_handle_e5(struct kvm_vcpu *vcpu)
+{
+ switch (vcpu->arch.sie_block->ipa & 0x00ff) {
+ case 0x01:
+ return handle_tprot(vcpu);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int handle_sckpf(struct kvm_vcpu *vcpu)
+{
+ u32 value;
+
+ vcpu->stat.instruction_sckpf++;
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ if (vcpu->run->s.regs.gprs[0] & 0x00000000ffff0000)
+ return kvm_s390_inject_program_int(vcpu,
+ PGM_SPECIFICATION);
+
+ value = vcpu->run->s.regs.gprs[0] & 0x000000000000ffff;
+ vcpu->arch.sie_block->todpr = value;
+
+ return 0;
+}
+
+static int handle_ptff(struct kvm_vcpu *vcpu)
+{
+ vcpu->stat.instruction_ptff++;
+
+ /* we don't emulate any control instructions yet */
+ kvm_s390_set_psw_cc(vcpu, 3);
+ return 0;
+}
+
+int kvm_s390_handle_01(struct kvm_vcpu *vcpu)
+{
+ switch (vcpu->arch.sie_block->ipa & 0x00ff) {
+ case 0x04:
+ return handle_ptff(vcpu);
+ case 0x07:
+ return handle_sckpf(vcpu);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
new file mode 100644
index 000000000..3dc921e85
--- /dev/null
+++ b/arch/s390/kvm/sigp.c
@@ -0,0 +1,507 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * handling interprocessor communication
+ *
+ * Copyright IBM Corp. 2008, 2013
+ *
+ * Author(s): Carsten Otte <cotte@de.ibm.com>
+ * Christian Borntraeger <borntraeger@de.ibm.com>
+ * Christian Ehrhardt <ehrhardt@de.ibm.com>
+ */
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/slab.h>
+#include <asm/sigp.h>
+#include "gaccess.h"
+#include "kvm-s390.h"
+#include "trace.h"
+
+static int __sigp_sense(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
+ u64 *reg)
+{
+ const bool stopped = kvm_s390_test_cpuflags(dst_vcpu, CPUSTAT_STOPPED);
+ int rc;
+ int ext_call_pending;
+
+ ext_call_pending = kvm_s390_ext_call_pending(dst_vcpu);
+ if (!stopped && !ext_call_pending)
+ rc = SIGP_CC_ORDER_CODE_ACCEPTED;
+ else {
+ *reg &= 0xffffffff00000000UL;
+ if (ext_call_pending)
+ *reg |= SIGP_STATUS_EXT_CALL_PENDING;
+ if (stopped)
+ *reg |= SIGP_STATUS_STOPPED;
+ rc = SIGP_CC_STATUS_STORED;
+ }
+
+ VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", dst_vcpu->vcpu_id,
+ rc);
+ return rc;
+}
+
+static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu *dst_vcpu)
+{
+ struct kvm_s390_irq irq = {
+ .type = KVM_S390_INT_EMERGENCY,
+ .u.emerg.code = vcpu->vcpu_id,
+ };
+ int rc = 0;
+
+ rc = kvm_s390_inject_vcpu(dst_vcpu, &irq);
+ if (!rc)
+ VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x",
+ dst_vcpu->vcpu_id);
+
+ return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED;
+}
+
+static int __sigp_emergency(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu)
+{
+ return __inject_sigp_emergency(vcpu, dst_vcpu);
+}
+
+static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu *dst_vcpu,
+ u16 asn, u64 *reg)
+{
+ const u64 psw_int_mask = PSW_MASK_IO | PSW_MASK_EXT;
+ u16 p_asn, s_asn;
+ psw_t *psw;
+ bool idle;
+
+ idle = is_vcpu_idle(vcpu);
+ psw = &dst_vcpu->arch.sie_block->gpsw;
+ p_asn = dst_vcpu->arch.sie_block->gcr[4] & 0xffff; /* Primary ASN */
+ s_asn = dst_vcpu->arch.sie_block->gcr[3] & 0xffff; /* Secondary ASN */
+
+ /* Inject the emergency signal? */
+ if (!is_vcpu_stopped(vcpu)
+ || (psw->mask & psw_int_mask) != psw_int_mask
+ || (idle && psw->addr != 0)
+ || (!idle && (asn == p_asn || asn == s_asn))) {
+ return __inject_sigp_emergency(vcpu, dst_vcpu);
+ } else {
+ *reg &= 0xffffffff00000000UL;
+ *reg |= SIGP_STATUS_INCORRECT_STATE;
+ return SIGP_CC_STATUS_STORED;
+ }
+}
+
+static int __sigp_external_call(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu *dst_vcpu, u64 *reg)
+{
+ struct kvm_s390_irq irq = {
+ .type = KVM_S390_INT_EXTERNAL_CALL,
+ .u.extcall.code = vcpu->vcpu_id,
+ };
+ int rc;
+
+ rc = kvm_s390_inject_vcpu(dst_vcpu, &irq);
+ if (rc == -EBUSY) {
+ *reg &= 0xffffffff00000000UL;
+ *reg |= SIGP_STATUS_EXT_CALL_PENDING;
+ return SIGP_CC_STATUS_STORED;
+ } else if (rc == 0) {
+ VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x",
+ dst_vcpu->vcpu_id);
+ }
+
+ return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED;
+}
+
+static int __sigp_stop(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu)
+{
+ struct kvm_s390_irq irq = {
+ .type = KVM_S390_SIGP_STOP,
+ };
+ int rc;
+
+ rc = kvm_s390_inject_vcpu(dst_vcpu, &irq);
+ if (rc == -EBUSY)
+ rc = SIGP_CC_BUSY;
+ else if (rc == 0)
+ VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x",
+ dst_vcpu->vcpu_id);
+
+ return rc;
+}
+
+static int __sigp_stop_and_store_status(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu *dst_vcpu, u64 *reg)
+{
+ struct kvm_s390_irq irq = {
+ .type = KVM_S390_SIGP_STOP,
+ .u.stop.flags = KVM_S390_STOP_FLAG_STORE_STATUS,
+ };
+ int rc;
+
+ rc = kvm_s390_inject_vcpu(dst_vcpu, &irq);
+ if (rc == -EBUSY)
+ rc = SIGP_CC_BUSY;
+ else if (rc == 0)
+ VCPU_EVENT(vcpu, 4, "sent sigp stop and store status to cpu %x",
+ dst_vcpu->vcpu_id);
+
+ return rc;
+}
+
+static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter,
+ u64 *status_reg)
+{
+ unsigned int i;
+ struct kvm_vcpu *v;
+ bool all_stopped = true;
+
+ kvm_for_each_vcpu(i, v, vcpu->kvm) {
+ if (v == vcpu)
+ continue;
+ if (!is_vcpu_stopped(v))
+ all_stopped = false;
+ }
+
+ *status_reg &= 0xffffffff00000000UL;
+
+ /* Reject set arch order, with czam we're always in z/Arch mode. */
+ *status_reg |= (all_stopped ? SIGP_STATUS_INVALID_PARAMETER :
+ SIGP_STATUS_INCORRECT_STATE);
+ return SIGP_CC_STATUS_STORED;
+}
+
+static int __sigp_set_prefix(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
+ u32 address, u64 *reg)
+{
+ struct kvm_s390_irq irq = {
+ .type = KVM_S390_SIGP_SET_PREFIX,
+ .u.prefix.address = address & 0x7fffe000u,
+ };
+ int rc;
+
+ /*
+ * Make sure the new value is valid memory. We only need to check the
+ * first page, since address is 8k aligned and memory pieces are always
+ * at least 1MB aligned and have at least a size of 1MB.
+ */
+ if (kvm_is_error_gpa(vcpu->kvm, irq.u.prefix.address)) {
+ *reg &= 0xffffffff00000000UL;
+ *reg |= SIGP_STATUS_INVALID_PARAMETER;
+ return SIGP_CC_STATUS_STORED;
+ }
+
+ rc = kvm_s390_inject_vcpu(dst_vcpu, &irq);
+ if (rc == -EBUSY) {
+ *reg &= 0xffffffff00000000UL;
+ *reg |= SIGP_STATUS_INCORRECT_STATE;
+ return SIGP_CC_STATUS_STORED;
+ }
+
+ return rc;
+}
+
+static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu *dst_vcpu,
+ u32 addr, u64 *reg)
+{
+ int rc;
+
+ if (!kvm_s390_test_cpuflags(dst_vcpu, CPUSTAT_STOPPED)) {
+ *reg &= 0xffffffff00000000UL;
+ *reg |= SIGP_STATUS_INCORRECT_STATE;
+ return SIGP_CC_STATUS_STORED;
+ }
+
+ addr &= 0x7ffffe00;
+ rc = kvm_s390_store_status_unloaded(dst_vcpu, addr);
+ if (rc == -EFAULT) {
+ *reg &= 0xffffffff00000000UL;
+ *reg |= SIGP_STATUS_INVALID_PARAMETER;
+ rc = SIGP_CC_STATUS_STORED;
+ }
+ return rc;
+}
+
+static int __sigp_sense_running(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu *dst_vcpu, u64 *reg)
+{
+ int rc;
+
+ if (!test_kvm_facility(vcpu->kvm, 9)) {
+ *reg &= 0xffffffff00000000UL;
+ *reg |= SIGP_STATUS_INVALID_ORDER;
+ return SIGP_CC_STATUS_STORED;
+ }
+
+ if (kvm_s390_test_cpuflags(dst_vcpu, CPUSTAT_RUNNING)) {
+ /* running */
+ rc = SIGP_CC_ORDER_CODE_ACCEPTED;
+ } else {
+ /* not running */
+ *reg &= 0xffffffff00000000UL;
+ *reg |= SIGP_STATUS_NOT_RUNNING;
+ rc = SIGP_CC_STATUS_STORED;
+ }
+
+ VCPU_EVENT(vcpu, 4, "sensed running status of cpu %x rc %x",
+ dst_vcpu->vcpu_id, rc);
+
+ return rc;
+}
+
+static int __prepare_sigp_re_start(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu *dst_vcpu, u8 order_code)
+{
+ struct kvm_s390_local_interrupt *li = &dst_vcpu->arch.local_int;
+ /* handle (RE)START in user space */
+ int rc = -EOPNOTSUPP;
+
+ /* make sure we don't race with STOP irq injection */
+ spin_lock(&li->lock);
+ if (kvm_s390_is_stop_irq_pending(dst_vcpu))
+ rc = SIGP_CC_BUSY;
+ spin_unlock(&li->lock);
+
+ return rc;
+}
+
+static int __prepare_sigp_cpu_reset(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu *dst_vcpu, u8 order_code)
+{
+ /* handle (INITIAL) CPU RESET in user space */
+ return -EOPNOTSUPP;
+}
+
+static int __prepare_sigp_unknown(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu *dst_vcpu)
+{
+ /* handle unknown orders in user space */
+ return -EOPNOTSUPP;
+}
+
+static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code,
+ u16 cpu_addr, u32 parameter, u64 *status_reg)
+{
+ int rc;
+ struct kvm_vcpu *dst_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr);
+
+ if (!dst_vcpu)
+ return SIGP_CC_NOT_OPERATIONAL;
+
+ /*
+ * SIGP RESTART, SIGP STOP, and SIGP STOP AND STORE STATUS orders
+ * are processed asynchronously. Until the affected VCPU finishes
+ * its work and calls back into KVM to clear the (RESTART or STOP)
+ * interrupt, we need to return any new non-reset orders "busy".
+ *
+ * This is important because a single VCPU could issue:
+ * 1) SIGP STOP $DESTINATION
+ * 2) SIGP SENSE $DESTINATION
+ *
+ * If the SIGP SENSE would not be rejected as "busy", it could
+ * return an incorrect answer as to whether the VCPU is STOPPED
+ * or OPERATING.
+ */
+ if (order_code != SIGP_INITIAL_CPU_RESET &&
+ order_code != SIGP_CPU_RESET) {
+ /*
+ * Lockless check. Both SIGP STOP and SIGP (RE)START
+ * properly synchronize everything while processing
+ * their orders, while the guest cannot observe a
+ * difference when issuing other orders from two
+ * different VCPUs.
+ */
+ if (kvm_s390_is_stop_irq_pending(dst_vcpu) ||
+ kvm_s390_is_restart_irq_pending(dst_vcpu))
+ return SIGP_CC_BUSY;
+ }
+
+ switch (order_code) {
+ case SIGP_SENSE:
+ vcpu->stat.instruction_sigp_sense++;
+ rc = __sigp_sense(vcpu, dst_vcpu, status_reg);
+ break;
+ case SIGP_EXTERNAL_CALL:
+ vcpu->stat.instruction_sigp_external_call++;
+ rc = __sigp_external_call(vcpu, dst_vcpu, status_reg);
+ break;
+ case SIGP_EMERGENCY_SIGNAL:
+ vcpu->stat.instruction_sigp_emergency++;
+ rc = __sigp_emergency(vcpu, dst_vcpu);
+ break;
+ case SIGP_STOP:
+ vcpu->stat.instruction_sigp_stop++;
+ rc = __sigp_stop(vcpu, dst_vcpu);
+ break;
+ case SIGP_STOP_AND_STORE_STATUS:
+ vcpu->stat.instruction_sigp_stop_store_status++;
+ rc = __sigp_stop_and_store_status(vcpu, dst_vcpu, status_reg);
+ break;
+ case SIGP_STORE_STATUS_AT_ADDRESS:
+ vcpu->stat.instruction_sigp_store_status++;
+ rc = __sigp_store_status_at_addr(vcpu, dst_vcpu, parameter,
+ status_reg);
+ break;
+ case SIGP_SET_PREFIX:
+ vcpu->stat.instruction_sigp_prefix++;
+ rc = __sigp_set_prefix(vcpu, dst_vcpu, parameter, status_reg);
+ break;
+ case SIGP_COND_EMERGENCY_SIGNAL:
+ vcpu->stat.instruction_sigp_cond_emergency++;
+ rc = __sigp_conditional_emergency(vcpu, dst_vcpu, parameter,
+ status_reg);
+ break;
+ case SIGP_SENSE_RUNNING:
+ vcpu->stat.instruction_sigp_sense_running++;
+ rc = __sigp_sense_running(vcpu, dst_vcpu, status_reg);
+ break;
+ case SIGP_START:
+ vcpu->stat.instruction_sigp_start++;
+ rc = __prepare_sigp_re_start(vcpu, dst_vcpu, order_code);
+ break;
+ case SIGP_RESTART:
+ vcpu->stat.instruction_sigp_restart++;
+ rc = __prepare_sigp_re_start(vcpu, dst_vcpu, order_code);
+ break;
+ case SIGP_INITIAL_CPU_RESET:
+ vcpu->stat.instruction_sigp_init_cpu_reset++;
+ rc = __prepare_sigp_cpu_reset(vcpu, dst_vcpu, order_code);
+ break;
+ case SIGP_CPU_RESET:
+ vcpu->stat.instruction_sigp_cpu_reset++;
+ rc = __prepare_sigp_cpu_reset(vcpu, dst_vcpu, order_code);
+ break;
+ default:
+ vcpu->stat.instruction_sigp_unknown++;
+ rc = __prepare_sigp_unknown(vcpu, dst_vcpu);
+ }
+
+ if (rc == -EOPNOTSUPP)
+ VCPU_EVENT(vcpu, 4,
+ "sigp order %u -> cpu %x: handled in user space",
+ order_code, dst_vcpu->vcpu_id);
+
+ return rc;
+}
+
+static int handle_sigp_order_in_user_space(struct kvm_vcpu *vcpu, u8 order_code,
+ u16 cpu_addr)
+{
+ if (!vcpu->kvm->arch.user_sigp)
+ return 0;
+
+ switch (order_code) {
+ case SIGP_SENSE:
+ case SIGP_EXTERNAL_CALL:
+ case SIGP_EMERGENCY_SIGNAL:
+ case SIGP_COND_EMERGENCY_SIGNAL:
+ case SIGP_SENSE_RUNNING:
+ return 0;
+ /* update counters as we're directly dropping to user space */
+ case SIGP_STOP:
+ vcpu->stat.instruction_sigp_stop++;
+ break;
+ case SIGP_STOP_AND_STORE_STATUS:
+ vcpu->stat.instruction_sigp_stop_store_status++;
+ break;
+ case SIGP_STORE_STATUS_AT_ADDRESS:
+ vcpu->stat.instruction_sigp_store_status++;
+ break;
+ case SIGP_STORE_ADDITIONAL_STATUS:
+ vcpu->stat.instruction_sigp_store_adtl_status++;
+ break;
+ case SIGP_SET_PREFIX:
+ vcpu->stat.instruction_sigp_prefix++;
+ break;
+ case SIGP_START:
+ vcpu->stat.instruction_sigp_start++;
+ break;
+ case SIGP_RESTART:
+ vcpu->stat.instruction_sigp_restart++;
+ break;
+ case SIGP_INITIAL_CPU_RESET:
+ vcpu->stat.instruction_sigp_init_cpu_reset++;
+ break;
+ case SIGP_CPU_RESET:
+ vcpu->stat.instruction_sigp_cpu_reset++;
+ break;
+ default:
+ vcpu->stat.instruction_sigp_unknown++;
+ }
+ VCPU_EVENT(vcpu, 3, "SIGP: order %u for CPU %d handled in userspace",
+ order_code, cpu_addr);
+
+ return 1;
+}
+
+int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
+{
+ int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+ int r3 = vcpu->arch.sie_block->ipa & 0x000f;
+ u32 parameter;
+ u16 cpu_addr = vcpu->run->s.regs.gprs[r3];
+ u8 order_code;
+ int rc;
+
+ /* sigp in userspace can exit */
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ order_code = kvm_s390_get_base_disp_rs(vcpu, NULL);
+ if (handle_sigp_order_in_user_space(vcpu, order_code, cpu_addr))
+ return -EOPNOTSUPP;
+
+ if (r1 % 2)
+ parameter = vcpu->run->s.regs.gprs[r1];
+ else
+ parameter = vcpu->run->s.regs.gprs[r1 + 1];
+
+ trace_kvm_s390_handle_sigp(vcpu, order_code, cpu_addr, parameter);
+ switch (order_code) {
+ case SIGP_SET_ARCHITECTURE:
+ vcpu->stat.instruction_sigp_arch++;
+ rc = __sigp_set_arch(vcpu, parameter,
+ &vcpu->run->s.regs.gprs[r1]);
+ break;
+ default:
+ rc = handle_sigp_dst(vcpu, order_code, cpu_addr,
+ parameter,
+ &vcpu->run->s.regs.gprs[r1]);
+ }
+
+ if (rc < 0)
+ return rc;
+
+ kvm_s390_set_psw_cc(vcpu, rc);
+ return 0;
+}
+
+/*
+ * Handle SIGP partial execution interception.
+ *
+ * This interception will occur at the source cpu when a source cpu sends an
+ * external call to a target cpu and the target cpu has the WAIT bit set in
+ * its cpuflags. Interception will occurr after the interrupt indicator bits at
+ * the target cpu have been set. All error cases will lead to instruction
+ * interception, therefore nothing is to be checked or prepared.
+ */
+int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu)
+{
+ int r3 = vcpu->arch.sie_block->ipa & 0x000f;
+ u16 cpu_addr = vcpu->run->s.regs.gprs[r3];
+ struct kvm_vcpu *dest_vcpu;
+ u8 order_code = kvm_s390_get_base_disp_rs(vcpu, NULL);
+
+ trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr);
+
+ if (order_code == SIGP_EXTERNAL_CALL) {
+ dest_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr);
+ BUG_ON(dest_vcpu == NULL);
+
+ kvm_s390_vcpu_wakeup(dest_vcpu);
+ kvm_s390_set_psw_cc(vcpu, SIGP_CC_ORDER_CODE_ACCEPTED);
+ return 0;
+ }
+
+ return -EOPNOTSUPP;
+}
diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h
new file mode 100644
index 000000000..6f0209d45
--- /dev/null
+++ b/arch/s390/kvm/trace-s390.h
@@ -0,0 +1,340 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(_TRACE_KVMS390_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVMS390_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm-s390
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace-s390
+
+/*
+ * The TRACE_SYSTEM_VAR defaults to TRACE_SYSTEM, but must be a
+ * legitimate C variable. It is not exported to user space.
+ */
+#undef TRACE_SYSTEM_VAR
+#define TRACE_SYSTEM_VAR kvm_s390
+
+/*
+ * Trace point for the creation of the kvm instance.
+ */
+TRACE_EVENT(kvm_s390_create_vm,
+ TP_PROTO(unsigned long type),
+ TP_ARGS(type),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, type)
+ ),
+
+ TP_fast_assign(
+ __entry->type = type;
+ ),
+
+ TP_printk("create vm%s",
+ __entry->type & KVM_VM_S390_UCONTROL ? " (UCONTROL)" : "")
+ );
+
+/*
+ * Trace points for creation and destruction of vpcus.
+ */
+TRACE_EVENT(kvm_s390_create_vcpu,
+ TP_PROTO(unsigned int id, struct kvm_vcpu *vcpu,
+ struct kvm_s390_sie_block *sie_block),
+ TP_ARGS(id, vcpu, sie_block),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, id)
+ __field(struct kvm_vcpu *, vcpu)
+ __field(struct kvm_s390_sie_block *, sie_block)
+ ),
+
+ TP_fast_assign(
+ __entry->id = id;
+ __entry->vcpu = vcpu;
+ __entry->sie_block = sie_block;
+ ),
+
+ TP_printk("create cpu %d at 0x%pK, sie block at 0x%pK",
+ __entry->id, __entry->vcpu, __entry->sie_block)
+ );
+
+TRACE_EVENT(kvm_s390_destroy_vcpu,
+ TP_PROTO(unsigned int id),
+ TP_ARGS(id),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, id)
+ ),
+
+ TP_fast_assign(
+ __entry->id = id;
+ ),
+
+ TP_printk("destroy cpu %d", __entry->id)
+ );
+
+/*
+ * Trace point for start and stop of vpcus.
+ */
+TRACE_EVENT(kvm_s390_vcpu_start_stop,
+ TP_PROTO(unsigned int id, int state),
+ TP_ARGS(id, state),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, id)
+ __field(int, state)
+ ),
+
+ TP_fast_assign(
+ __entry->id = id;
+ __entry->state = state;
+ ),
+
+ TP_printk("%s cpu %d", __entry->state ? "starting" : "stopping",
+ __entry->id)
+ );
+
+/*
+ * Trace points for injection of interrupts, either per machine or
+ * per vcpu.
+ */
+
+#define kvm_s390_int_type \
+ {KVM_S390_SIGP_STOP, "sigp stop"}, \
+ {KVM_S390_PROGRAM_INT, "program interrupt"}, \
+ {KVM_S390_SIGP_SET_PREFIX, "sigp set prefix"}, \
+ {KVM_S390_RESTART, "sigp restart"}, \
+ {KVM_S390_INT_PFAULT_INIT, "pfault init"}, \
+ {KVM_S390_INT_PFAULT_DONE, "pfault done"}, \
+ {KVM_S390_MCHK, "machine check"}, \
+ {KVM_S390_INT_CLOCK_COMP, "clock comparator"}, \
+ {KVM_S390_INT_CPU_TIMER, "cpu timer"}, \
+ {KVM_S390_INT_VIRTIO, "virtio interrupt"}, \
+ {KVM_S390_INT_SERVICE, "sclp interrupt"}, \
+ {KVM_S390_INT_EMERGENCY, "sigp emergency"}, \
+ {KVM_S390_INT_EXTERNAL_CALL, "sigp ext call"}
+
+#define get_irq_name(__type) \
+ (__type > KVM_S390_INT_IO_MAX ? \
+ __print_symbolic(__type, kvm_s390_int_type) : \
+ (__type & KVM_S390_INT_IO_AI_MASK ? \
+ "adapter I/O interrupt" : "subchannel I/O interrupt"))
+
+TRACE_EVENT(kvm_s390_inject_vm,
+ TP_PROTO(__u64 type, __u32 parm, __u64 parm64, int who),
+ TP_ARGS(type, parm, parm64, who),
+
+ TP_STRUCT__entry(
+ __field(__u32, inttype)
+ __field(__u32, parm)
+ __field(__u64, parm64)
+ __field(int, who)
+ ),
+
+ TP_fast_assign(
+ __entry->inttype = type & 0x00000000ffffffff;
+ __entry->parm = parm;
+ __entry->parm64 = parm64;
+ __entry->who = who;
+ ),
+
+ TP_printk("inject%s: type:%x (%s) parm:%x parm64:%llx",
+ (__entry->who == 1) ? " (from kernel)" :
+ (__entry->who == 2) ? " (from user)" : "",
+ __entry->inttype, get_irq_name(__entry->inttype),
+ __entry->parm, __entry->parm64)
+ );
+
+TRACE_EVENT(kvm_s390_inject_vcpu,
+ TP_PROTO(unsigned int id, __u64 type, __u32 parm, __u64 parm64),
+ TP_ARGS(id, type, parm, parm64),
+
+ TP_STRUCT__entry(
+ __field(int, id)
+ __field(__u32, inttype)
+ __field(__u32, parm)
+ __field(__u64, parm64)
+ ),
+
+ TP_fast_assign(
+ __entry->id = id;
+ __entry->inttype = type & 0x00000000ffffffff;
+ __entry->parm = parm;
+ __entry->parm64 = parm64;
+ ),
+
+ TP_printk("inject (vcpu %d): type:%x (%s) parm:%x parm64:%llx",
+ __entry->id, __entry->inttype,
+ get_irq_name(__entry->inttype), __entry->parm,
+ __entry->parm64)
+ );
+
+/*
+ * Trace point for the actual delivery of interrupts.
+ */
+TRACE_EVENT(kvm_s390_deliver_interrupt,
+ TP_PROTO(unsigned int id, __u64 type, __u64 data0, __u64 data1),
+ TP_ARGS(id, type, data0, data1),
+
+ TP_STRUCT__entry(
+ __field(int, id)
+ __field(__u32, inttype)
+ __field(__u64, data0)
+ __field(__u64, data1)
+ ),
+
+ TP_fast_assign(
+ __entry->id = id;
+ __entry->inttype = type & 0x00000000ffffffff;
+ __entry->data0 = data0;
+ __entry->data1 = data1;
+ ),
+
+ TP_printk("deliver interrupt (vcpu %d): type:%x (%s) " \
+ "data:%08llx %016llx",
+ __entry->id, __entry->inttype,
+ get_irq_name(__entry->inttype), __entry->data0,
+ __entry->data1)
+ );
+
+/*
+ * Trace point for resets that may be requested from userspace.
+ */
+TRACE_EVENT(kvm_s390_request_resets,
+ TP_PROTO(__u64 resets),
+ TP_ARGS(resets),
+
+ TP_STRUCT__entry(
+ __field(__u64, resets)
+ ),
+
+ TP_fast_assign(
+ __entry->resets = resets;
+ ),
+
+ TP_printk("requesting userspace resets %llx",
+ __entry->resets)
+ );
+
+/*
+ * Trace point for a vcpu's stop requests.
+ */
+TRACE_EVENT(kvm_s390_stop_request,
+ TP_PROTO(unsigned char stop_irq, unsigned char flags),
+ TP_ARGS(stop_irq, flags),
+
+ TP_STRUCT__entry(
+ __field(unsigned char, stop_irq)
+ __field(unsigned char, flags)
+ ),
+
+ TP_fast_assign(
+ __entry->stop_irq = stop_irq;
+ __entry->flags = flags;
+ ),
+
+ TP_printk("stop request, stop irq = %u, flags = %08x",
+ __entry->stop_irq, __entry->flags)
+ );
+
+
+/*
+ * Trace point for enabling channel I/O instruction support.
+ */
+TRACE_EVENT(kvm_s390_enable_css,
+ TP_PROTO(void *kvm),
+ TP_ARGS(kvm),
+
+ TP_STRUCT__entry(
+ __field(void *, kvm)
+ ),
+
+ TP_fast_assign(
+ __entry->kvm = kvm;
+ ),
+
+ TP_printk("enabling channel I/O support (kvm @ %pK)\n",
+ __entry->kvm)
+ );
+
+/*
+ * Trace point for enabling and disabling interlocking-and-broadcasting
+ * suppression.
+ */
+TRACE_EVENT(kvm_s390_enable_disable_ibs,
+ TP_PROTO(unsigned int id, int state),
+ TP_ARGS(id, state),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, id)
+ __field(int, state)
+ ),
+
+ TP_fast_assign(
+ __entry->id = id;
+ __entry->state = state;
+ ),
+
+ TP_printk("%s ibs on cpu %d",
+ __entry->state ? "enabling" : "disabling", __entry->id)
+ );
+
+/*
+ * Trace point for modifying ais mode for a given isc.
+ */
+TRACE_EVENT(kvm_s390_modify_ais_mode,
+ TP_PROTO(__u8 isc, __u16 from, __u16 to),
+ TP_ARGS(isc, from, to),
+
+ TP_STRUCT__entry(
+ __field(__u8, isc)
+ __field(__u16, from)
+ __field(__u16, to)
+ ),
+
+ TP_fast_assign(
+ __entry->isc = isc;
+ __entry->from = from;
+ __entry->to = to;
+ ),
+
+ TP_printk("for isc %x, modifying interruption mode from %s to %s",
+ __entry->isc,
+ (__entry->from == KVM_S390_AIS_MODE_ALL) ?
+ "ALL-Interruptions Mode" :
+ (__entry->from == KVM_S390_AIS_MODE_SINGLE) ?
+ "Single-Interruption Mode" : "No-Interruptions Mode",
+ (__entry->to == KVM_S390_AIS_MODE_ALL) ?
+ "ALL-Interruptions Mode" :
+ (__entry->to == KVM_S390_AIS_MODE_SINGLE) ?
+ "Single-Interruption Mode" : "No-Interruptions Mode")
+ );
+
+/*
+ * Trace point for suppressed adapter I/O interrupt.
+ */
+TRACE_EVENT(kvm_s390_airq_suppressed,
+ TP_PROTO(__u32 id, __u8 isc),
+ TP_ARGS(id, isc),
+
+ TP_STRUCT__entry(
+ __field(__u32, id)
+ __field(__u8, isc)
+ ),
+
+ TP_fast_assign(
+ __entry->id = id;
+ __entry->isc = isc;
+ ),
+
+ TP_printk("adapter I/O interrupt suppressed (id:%x isc:%x)",
+ __entry->id, __entry->isc)
+ );
+
+
+#endif /* _TRACE_KVMS390_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h
new file mode 100644
index 000000000..aa419eb6a
--- /dev/null
+++ b/arch/s390/kvm/trace.h
@@ -0,0 +1,462 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_H
+
+#include <linux/tracepoint.h>
+#include <asm/sie.h>
+#include <asm/debug.h>
+#include <asm/dis.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+
+/*
+ * Helpers for vcpu-specific tracepoints containing the same information
+ * as s390dbf VCPU_EVENTs.
+ */
+#define VCPU_PROTO_COMMON struct kvm_vcpu *vcpu
+#define VCPU_ARGS_COMMON vcpu
+#define VCPU_FIELD_COMMON __field(int, id) \
+ __field(unsigned long, pswmask) \
+ __field(unsigned long, pswaddr)
+#define VCPU_ASSIGN_COMMON do { \
+ __entry->id = vcpu->vcpu_id; \
+ __entry->pswmask = vcpu->arch.sie_block->gpsw.mask; \
+ __entry->pswaddr = vcpu->arch.sie_block->gpsw.addr; \
+ } while (0);
+#define VCPU_TP_PRINTK(p_str, p_args...) \
+ TP_printk("%02d[%016lx-%016lx]: " p_str, __entry->id, \
+ __entry->pswmask, __entry->pswaddr, p_args)
+
+TRACE_EVENT(kvm_s390_skey_related_inst,
+ TP_PROTO(VCPU_PROTO_COMMON),
+ TP_ARGS(VCPU_ARGS_COMMON),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ ),
+ VCPU_TP_PRINTK("%s", "storage key related instruction")
+ );
+
+TRACE_EVENT(kvm_s390_major_guest_pfault,
+ TP_PROTO(VCPU_PROTO_COMMON),
+ TP_ARGS(VCPU_ARGS_COMMON),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ ),
+ VCPU_TP_PRINTK("%s", "major fault, maybe applicable for pfault")
+ );
+
+TRACE_EVENT(kvm_s390_pfault_init,
+ TP_PROTO(VCPU_PROTO_COMMON, long pfault_token),
+ TP_ARGS(VCPU_ARGS_COMMON, pfault_token),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(long, pfault_token)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->pfault_token = pfault_token;
+ ),
+ VCPU_TP_PRINTK("init pfault token %ld", __entry->pfault_token)
+ );
+
+TRACE_EVENT(kvm_s390_pfault_done,
+ TP_PROTO(VCPU_PROTO_COMMON, long pfault_token),
+ TP_ARGS(VCPU_ARGS_COMMON, pfault_token),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(long, pfault_token)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->pfault_token = pfault_token;
+ ),
+ VCPU_TP_PRINTK("done pfault token %ld", __entry->pfault_token)
+ );
+
+/*
+ * Tracepoints for SIE entry and exit.
+ */
+TRACE_EVENT(kvm_s390_sie_enter,
+ TP_PROTO(VCPU_PROTO_COMMON, int cpuflags),
+ TP_ARGS(VCPU_ARGS_COMMON, cpuflags),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(int, cpuflags)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->cpuflags = cpuflags;
+ ),
+
+ VCPU_TP_PRINTK("entering sie flags %x", __entry->cpuflags)
+ );
+
+TRACE_EVENT(kvm_s390_sie_fault,
+ TP_PROTO(VCPU_PROTO_COMMON),
+ TP_ARGS(VCPU_ARGS_COMMON),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ ),
+
+ VCPU_TP_PRINTK("%s", "fault in sie instruction")
+ );
+
+TRACE_EVENT(kvm_s390_sie_exit,
+ TP_PROTO(VCPU_PROTO_COMMON, u8 icptcode),
+ TP_ARGS(VCPU_ARGS_COMMON, icptcode),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(u8, icptcode)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->icptcode = icptcode;
+ ),
+
+ VCPU_TP_PRINTK("exit sie icptcode %d (%s)", __entry->icptcode,
+ __print_symbolic(__entry->icptcode,
+ sie_intercept_code))
+ );
+
+/*
+ * Trace point for intercepted instructions.
+ */
+TRACE_EVENT(kvm_s390_intercept_instruction,
+ TP_PROTO(VCPU_PROTO_COMMON, __u16 ipa, __u32 ipb),
+ TP_ARGS(VCPU_ARGS_COMMON, ipa, ipb),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(__u64, instruction)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->instruction = ((__u64)ipa << 48) |
+ ((__u64)ipb << 16);
+ ),
+
+ VCPU_TP_PRINTK("intercepted instruction %016llx (%s)",
+ __entry->instruction,
+ __print_symbolic(icpt_insn_decoder(__entry->instruction),
+ icpt_insn_codes))
+ );
+
+/*
+ * Trace point for intercepted program interruptions.
+ */
+TRACE_EVENT(kvm_s390_intercept_prog,
+ TP_PROTO(VCPU_PROTO_COMMON, __u16 code),
+ TP_ARGS(VCPU_ARGS_COMMON, code),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(__u16, code)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->code = code;
+ ),
+
+ VCPU_TP_PRINTK("intercepted program interruption %04x (%s)",
+ __entry->code,
+ __print_symbolic(__entry->code,
+ icpt_prog_codes))
+ );
+
+/*
+ * Trace point for validity intercepts.
+ */
+TRACE_EVENT(kvm_s390_intercept_validity,
+ TP_PROTO(VCPU_PROTO_COMMON, __u16 viwhy),
+ TP_ARGS(VCPU_ARGS_COMMON, viwhy),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(__u16, viwhy)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->viwhy = viwhy;
+ ),
+
+ VCPU_TP_PRINTK("got validity intercept %04x", __entry->viwhy)
+ );
+
+/*
+ * Trace points for instructions that are of special interest.
+ */
+
+TRACE_EVENT(kvm_s390_handle_sigp,
+ TP_PROTO(VCPU_PROTO_COMMON, __u8 order_code, __u16 cpu_addr, \
+ __u32 parameter),
+ TP_ARGS(VCPU_ARGS_COMMON, order_code, cpu_addr, parameter),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(__u8, order_code)
+ __field(__u16, cpu_addr)
+ __field(__u32, parameter)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->order_code = order_code;
+ __entry->cpu_addr = cpu_addr;
+ __entry->parameter = parameter;
+ ),
+
+ VCPU_TP_PRINTK("handle sigp order %02x (%s), cpu address %04x, " \
+ "parameter %08x", __entry->order_code,
+ __print_symbolic(__entry->order_code,
+ sigp_order_codes),
+ __entry->cpu_addr, __entry->parameter)
+ );
+
+TRACE_EVENT(kvm_s390_handle_sigp_pei,
+ TP_PROTO(VCPU_PROTO_COMMON, __u8 order_code, __u16 cpu_addr),
+ TP_ARGS(VCPU_ARGS_COMMON, order_code, cpu_addr),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(__u8, order_code)
+ __field(__u16, cpu_addr)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->order_code = order_code;
+ __entry->cpu_addr = cpu_addr;
+ ),
+
+ VCPU_TP_PRINTK("handle sigp pei order %02x (%s), cpu address %04x",
+ __entry->order_code,
+ __print_symbolic(__entry->order_code,
+ sigp_order_codes),
+ __entry->cpu_addr)
+ );
+
+TRACE_EVENT(kvm_s390_handle_diag,
+ TP_PROTO(VCPU_PROTO_COMMON, __u16 code),
+ TP_ARGS(VCPU_ARGS_COMMON, code),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(__u16, code)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->code = code;
+ ),
+
+ VCPU_TP_PRINTK("handle diagnose call %04x (%s)", __entry->code,
+ __print_symbolic(__entry->code, diagnose_codes))
+ );
+
+TRACE_EVENT(kvm_s390_handle_lctl,
+ TP_PROTO(VCPU_PROTO_COMMON, int g, int reg1, int reg3, u64 addr),
+ TP_ARGS(VCPU_ARGS_COMMON, g, reg1, reg3, addr),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(int, g)
+ __field(int, reg1)
+ __field(int, reg3)
+ __field(u64, addr)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->g = g;
+ __entry->reg1 = reg1;
+ __entry->reg3 = reg3;
+ __entry->addr = addr;
+ ),
+
+ VCPU_TP_PRINTK("%s: loading cr %x-%x from %016llx",
+ __entry->g ? "lctlg" : "lctl",
+ __entry->reg1, __entry->reg3, __entry->addr)
+ );
+
+TRACE_EVENT(kvm_s390_handle_stctl,
+ TP_PROTO(VCPU_PROTO_COMMON, int g, int reg1, int reg3, u64 addr),
+ TP_ARGS(VCPU_ARGS_COMMON, g, reg1, reg3, addr),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(int, g)
+ __field(int, reg1)
+ __field(int, reg3)
+ __field(u64, addr)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->g = g;
+ __entry->reg1 = reg1;
+ __entry->reg3 = reg3;
+ __entry->addr = addr;
+ ),
+
+ VCPU_TP_PRINTK("%s: storing cr %x-%x to %016llx",
+ __entry->g ? "stctg" : "stctl",
+ __entry->reg1, __entry->reg3, __entry->addr)
+ );
+
+TRACE_EVENT(kvm_s390_handle_prefix,
+ TP_PROTO(VCPU_PROTO_COMMON, int set, u32 address),
+ TP_ARGS(VCPU_ARGS_COMMON, set, address),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(int, set)
+ __field(u32, address)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->set = set;
+ __entry->address = address;
+ ),
+
+ VCPU_TP_PRINTK("%s prefix to %08x",
+ __entry->set ? "setting" : "storing",
+ __entry->address)
+ );
+
+TRACE_EVENT(kvm_s390_handle_stap,
+ TP_PROTO(VCPU_PROTO_COMMON, u64 address),
+ TP_ARGS(VCPU_ARGS_COMMON, address),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(u64, address)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->address = address;
+ ),
+
+ VCPU_TP_PRINTK("storing cpu address to %016llx",
+ __entry->address)
+ );
+
+TRACE_EVENT(kvm_s390_handle_stfl,
+ TP_PROTO(VCPU_PROTO_COMMON, unsigned int facility_list),
+ TP_ARGS(VCPU_ARGS_COMMON, facility_list),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(unsigned int, facility_list)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->facility_list = facility_list;
+ ),
+
+ VCPU_TP_PRINTK("store facility list value %08x",
+ __entry->facility_list)
+ );
+
+TRACE_EVENT(kvm_s390_handle_stsi,
+ TP_PROTO(VCPU_PROTO_COMMON, int fc, int sel1, int sel2, u64 addr),
+ TP_ARGS(VCPU_ARGS_COMMON, fc, sel1, sel2, addr),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(int, fc)
+ __field(int, sel1)
+ __field(int, sel2)
+ __field(u64, addr)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->fc = fc;
+ __entry->sel1 = sel1;
+ __entry->sel2 = sel2;
+ __entry->addr = addr;
+ ),
+
+ VCPU_TP_PRINTK("STSI %d.%d.%d information stored to %016llx",
+ __entry->fc, __entry->sel1, __entry->sel2,
+ __entry->addr)
+ );
+
+TRACE_EVENT(kvm_s390_handle_operexc,
+ TP_PROTO(VCPU_PROTO_COMMON, __u16 ipa, __u32 ipb),
+ TP_ARGS(VCPU_ARGS_COMMON, ipa, ipb),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(__u64, instruction)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->instruction = ((__u64)ipa << 48) |
+ ((__u64)ipb << 16);
+ ),
+
+ VCPU_TP_PRINTK("operation exception on instruction %016llx (%s)",
+ __entry->instruction,
+ __print_symbolic(icpt_insn_decoder(__entry->instruction),
+ icpt_insn_codes))
+ );
+
+TRACE_EVENT(kvm_s390_handle_sthyi,
+ TP_PROTO(VCPU_PROTO_COMMON, u64 code, u64 addr),
+ TP_ARGS(VCPU_ARGS_COMMON, code, addr),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(u64, code)
+ __field(u64, addr)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->code = code;
+ __entry->addr = addr;
+ ),
+
+ VCPU_TP_PRINTK("STHYI fc: %llu addr: %016llx",
+ __entry->code, __entry->addr)
+ );
+
+#endif /* _TRACE_KVM_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
new file mode 100644
index 000000000..17d73b71d
--- /dev/null
+++ b/arch/s390/kvm/vsie.c
@@ -0,0 +1,1200 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * kvm nested virtualization support for s390x
+ *
+ * Copyright IBM Corp. 2016, 2018
+ *
+ * Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
+ */
+#include <linux/vmalloc.h>
+#include <linux/kvm_host.h>
+#include <linux/bug.h>
+#include <linux/list.h>
+#include <linux/bitmap.h>
+#include <linux/sched/signal.h>
+
+#include <asm/gmap.h>
+#include <asm/mmu_context.h>
+#include <asm/sclp.h>
+#include <asm/nmi.h>
+#include <asm/dis.h>
+#include "kvm-s390.h"
+#include "gaccess.h"
+
+struct vsie_page {
+ struct kvm_s390_sie_block scb_s; /* 0x0000 */
+ /*
+ * the backup info for machine check. ensure it's at
+ * the same offset as that in struct sie_page!
+ */
+ struct mcck_volatile_info mcck_info; /* 0x0200 */
+ /*
+ * The pinned original scb. Be aware that other VCPUs can modify
+ * it while we read from it. Values that are used for conditions or
+ * are reused conditionally, should be accessed via READ_ONCE.
+ */
+ struct kvm_s390_sie_block *scb_o; /* 0x0218 */
+ /* the shadow gmap in use by the vsie_page */
+ struct gmap *gmap; /* 0x0220 */
+ /* address of the last reported fault to guest2 */
+ unsigned long fault_addr; /* 0x0228 */
+ /* calculated guest addresses of satellite control blocks */
+ gpa_t sca_gpa; /* 0x0230 */
+ gpa_t itdba_gpa; /* 0x0238 */
+ gpa_t gvrd_gpa; /* 0x0240 */
+ gpa_t riccbd_gpa; /* 0x0248 */
+ gpa_t sdnx_gpa; /* 0x0250 */
+ __u8 reserved[0x0700 - 0x0258]; /* 0x0258 */
+ struct kvm_s390_crypto_cb crycb; /* 0x0700 */
+ __u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */
+};
+
+/* trigger a validity icpt for the given scb */
+static int set_validity_icpt(struct kvm_s390_sie_block *scb,
+ __u16 reason_code)
+{
+ scb->ipa = 0x1000;
+ scb->ipb = ((__u32) reason_code) << 16;
+ scb->icptcode = ICPT_VALIDITY;
+ return 1;
+}
+
+/* mark the prefix as unmapped, this will block the VSIE */
+static void prefix_unmapped(struct vsie_page *vsie_page)
+{
+ atomic_or(PROG_REQUEST, &vsie_page->scb_s.prog20);
+}
+
+/* mark the prefix as unmapped and wait until the VSIE has been left */
+static void prefix_unmapped_sync(struct vsie_page *vsie_page)
+{
+ prefix_unmapped(vsie_page);
+ if (vsie_page->scb_s.prog0c & PROG_IN_SIE)
+ atomic_or(CPUSTAT_STOP_INT, &vsie_page->scb_s.cpuflags);
+ while (vsie_page->scb_s.prog0c & PROG_IN_SIE)
+ cpu_relax();
+}
+
+/* mark the prefix as mapped, this will allow the VSIE to run */
+static void prefix_mapped(struct vsie_page *vsie_page)
+{
+ atomic_andnot(PROG_REQUEST, &vsie_page->scb_s.prog20);
+}
+
+/* test if the prefix is mapped into the gmap shadow */
+static int prefix_is_mapped(struct vsie_page *vsie_page)
+{
+ return !(atomic_read(&vsie_page->scb_s.prog20) & PROG_REQUEST);
+}
+
+/* copy the updated intervention request bits into the shadow scb */
+static void update_intervention_requests(struct vsie_page *vsie_page)
+{
+ const int bits = CPUSTAT_STOP_INT | CPUSTAT_IO_INT | CPUSTAT_EXT_INT;
+ int cpuflags;
+
+ cpuflags = atomic_read(&vsie_page->scb_o->cpuflags);
+ atomic_andnot(bits, &vsie_page->scb_s.cpuflags);
+ atomic_or(cpuflags & bits, &vsie_page->scb_s.cpuflags);
+}
+
+/* shadow (filter and validate) the cpuflags */
+static int prepare_cpuflags(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+{
+ struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+ struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
+ int newflags, cpuflags = atomic_read(&scb_o->cpuflags);
+
+ /* we don't allow ESA/390 guests */
+ if (!(cpuflags & CPUSTAT_ZARCH))
+ return set_validity_icpt(scb_s, 0x0001U);
+
+ if (cpuflags & (CPUSTAT_RRF | CPUSTAT_MCDS))
+ return set_validity_icpt(scb_s, 0x0001U);
+ else if (cpuflags & (CPUSTAT_SLSV | CPUSTAT_SLSR))
+ return set_validity_icpt(scb_s, 0x0007U);
+
+ /* intervention requests will be set later */
+ newflags = CPUSTAT_ZARCH;
+ if (cpuflags & CPUSTAT_GED && test_kvm_facility(vcpu->kvm, 8))
+ newflags |= CPUSTAT_GED;
+ if (cpuflags & CPUSTAT_GED2 && test_kvm_facility(vcpu->kvm, 78)) {
+ if (cpuflags & CPUSTAT_GED)
+ return set_validity_icpt(scb_s, 0x0001U);
+ newflags |= CPUSTAT_GED2;
+ }
+ if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_GPERE))
+ newflags |= cpuflags & CPUSTAT_P;
+ if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_GSLS))
+ newflags |= cpuflags & CPUSTAT_SM;
+ if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IBS))
+ newflags |= cpuflags & CPUSTAT_IBS;
+ if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_KSS))
+ newflags |= cpuflags & CPUSTAT_KSS;
+
+ atomic_set(&scb_s->cpuflags, newflags);
+ return 0;
+}
+
+/*
+ * Create a shadow copy of the crycb block and setup key wrapping, if
+ * requested for guest 3 and enabled for guest 2.
+ *
+ * We only accept format-1 (no AP in g2), but convert it into format-2
+ * There is nothing to do for format-0.
+ *
+ * Returns: - 0 if shadowed or nothing to do
+ * - > 0 if control has to be given to guest 2
+ */
+static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+{
+ struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+ struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
+ const uint32_t crycbd_o = READ_ONCE(scb_o->crycbd);
+ const u32 crycb_addr = crycbd_o & 0x7ffffff8U;
+ unsigned long *b1, *b2;
+ u8 ecb3_flags;
+
+ scb_s->crycbd = 0;
+ if (!(crycbd_o & vcpu->arch.sie_block->crycbd & CRYCB_FORMAT1))
+ return 0;
+ /* format-1 is supported with message-security-assist extension 3 */
+ if (!test_kvm_facility(vcpu->kvm, 76))
+ return 0;
+ /* we may only allow it if enabled for guest 2 */
+ ecb3_flags = scb_o->ecb3 & vcpu->arch.sie_block->ecb3 &
+ (ECB3_AES | ECB3_DEA);
+ if (!ecb3_flags)
+ return 0;
+
+ if ((crycb_addr & PAGE_MASK) != ((crycb_addr + 128) & PAGE_MASK))
+ return set_validity_icpt(scb_s, 0x003CU);
+ else if (!crycb_addr)
+ return set_validity_icpt(scb_s, 0x0039U);
+
+ /* copy only the wrapping keys */
+ if (read_guest_real(vcpu, crycb_addr + 72,
+ vsie_page->crycb.dea_wrapping_key_mask, 56))
+ return set_validity_icpt(scb_s, 0x0035U);
+
+ scb_s->ecb3 |= ecb3_flags;
+ scb_s->crycbd = ((__u32)(__u64) &vsie_page->crycb) | CRYCB_FORMAT1 |
+ CRYCB_FORMAT2;
+
+ /* xor both blocks in one run */
+ b1 = (unsigned long *) vsie_page->crycb.dea_wrapping_key_mask;
+ b2 = (unsigned long *)
+ vcpu->kvm->arch.crypto.crycb->dea_wrapping_key_mask;
+ /* as 56%8 == 0, bitmap_xor won't overwrite any data */
+ bitmap_xor(b1, b1, b2, BITS_PER_BYTE * 56);
+ return 0;
+}
+
+/* shadow (round up/down) the ibc to avoid validity icpt */
+static void prepare_ibc(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+{
+ struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+ struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
+ /* READ_ONCE does not work on bitfields - use a temporary variable */
+ const uint32_t __new_ibc = scb_o->ibc;
+ const uint32_t new_ibc = READ_ONCE(__new_ibc) & 0x0fffU;
+ __u64 min_ibc = (sclp.ibc >> 16) & 0x0fffU;
+
+ scb_s->ibc = 0;
+ /* ibc installed in g2 and requested for g3 */
+ if (vcpu->kvm->arch.model.ibc && new_ibc) {
+ scb_s->ibc = new_ibc;
+ /* takte care of the minimum ibc level of the machine */
+ if (scb_s->ibc < min_ibc)
+ scb_s->ibc = min_ibc;
+ /* take care of the maximum ibc level set for the guest */
+ if (scb_s->ibc > vcpu->kvm->arch.model.ibc)
+ scb_s->ibc = vcpu->kvm->arch.model.ibc;
+ }
+}
+
+/* unshadow the scb, copying parameters back to the real scb */
+static void unshadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+{
+ struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+ struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
+
+ /* interception */
+ scb_o->icptcode = scb_s->icptcode;
+ scb_o->icptstatus = scb_s->icptstatus;
+ scb_o->ipa = scb_s->ipa;
+ scb_o->ipb = scb_s->ipb;
+ scb_o->gbea = scb_s->gbea;
+
+ /* timer */
+ scb_o->cputm = scb_s->cputm;
+ scb_o->ckc = scb_s->ckc;
+ scb_o->todpr = scb_s->todpr;
+
+ /* guest state */
+ scb_o->gpsw = scb_s->gpsw;
+ scb_o->gg14 = scb_s->gg14;
+ scb_o->gg15 = scb_s->gg15;
+ memcpy(scb_o->gcr, scb_s->gcr, 128);
+ scb_o->pp = scb_s->pp;
+
+ /* branch prediction */
+ if (test_kvm_facility(vcpu->kvm, 82)) {
+ scb_o->fpf &= ~FPF_BPBC;
+ scb_o->fpf |= scb_s->fpf & FPF_BPBC;
+ }
+
+ /* interrupt intercept */
+ switch (scb_s->icptcode) {
+ case ICPT_PROGI:
+ case ICPT_INSTPROGI:
+ case ICPT_EXTINT:
+ memcpy((void *)((u64)scb_o + 0xc0),
+ (void *)((u64)scb_s + 0xc0), 0xf0 - 0xc0);
+ break;
+ case ICPT_PARTEXEC:
+ /* MVPG only */
+ memcpy((void *)((u64)scb_o + 0xc0),
+ (void *)((u64)scb_s + 0xc0), 0xd0 - 0xc0);
+ break;
+ }
+
+ if (scb_s->ihcpu != 0xffffU)
+ scb_o->ihcpu = scb_s->ihcpu;
+}
+
+/*
+ * Setup the shadow scb by copying and checking the relevant parts of the g2
+ * provided scb.
+ *
+ * Returns: - 0 if the scb has been shadowed
+ * - > 0 if control has to be given to guest 2
+ */
+static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+{
+ struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
+ struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+ /* READ_ONCE does not work on bitfields - use a temporary variable */
+ const uint32_t __new_prefix = scb_o->prefix;
+ const uint32_t new_prefix = READ_ONCE(__new_prefix);
+ const bool wants_tx = READ_ONCE(scb_o->ecb) & ECB_TE;
+ bool had_tx = scb_s->ecb & ECB_TE;
+ unsigned long new_mso = 0;
+ int rc;
+
+ /* make sure we don't have any leftovers when reusing the scb */
+ scb_s->icptcode = 0;
+ scb_s->eca = 0;
+ scb_s->ecb = 0;
+ scb_s->ecb2 = 0;
+ scb_s->ecb3 = 0;
+ scb_s->ecd = 0;
+ scb_s->fac = 0;
+ scb_s->fpf = 0;
+
+ rc = prepare_cpuflags(vcpu, vsie_page);
+ if (rc)
+ goto out;
+
+ /* timer */
+ scb_s->cputm = scb_o->cputm;
+ scb_s->ckc = scb_o->ckc;
+ scb_s->todpr = scb_o->todpr;
+ scb_s->epoch = scb_o->epoch;
+
+ /* guest state */
+ scb_s->gpsw = scb_o->gpsw;
+ scb_s->gg14 = scb_o->gg14;
+ scb_s->gg15 = scb_o->gg15;
+ memcpy(scb_s->gcr, scb_o->gcr, 128);
+ scb_s->pp = scb_o->pp;
+
+ /* interception / execution handling */
+ scb_s->gbea = scb_o->gbea;
+ scb_s->lctl = scb_o->lctl;
+ scb_s->svcc = scb_o->svcc;
+ scb_s->ictl = scb_o->ictl;
+ /*
+ * SKEY handling functions can't deal with false setting of PTE invalid
+ * bits. Therefore we cannot provide interpretation and would later
+ * have to provide own emulation handlers.
+ */
+ if (!(atomic_read(&scb_s->cpuflags) & CPUSTAT_KSS))
+ scb_s->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
+
+ scb_s->icpua = scb_o->icpua;
+
+ if (!(atomic_read(&scb_s->cpuflags) & CPUSTAT_SM))
+ new_mso = READ_ONCE(scb_o->mso) & 0xfffffffffff00000UL;
+ /* if the hva of the prefix changes, we have to remap the prefix */
+ if (scb_s->mso != new_mso || scb_s->prefix != new_prefix)
+ prefix_unmapped(vsie_page);
+ /* SIE will do mso/msl validity and exception checks for us */
+ scb_s->msl = scb_o->msl & 0xfffffffffff00000UL;
+ scb_s->mso = new_mso;
+ scb_s->prefix = new_prefix;
+
+ /* We have to definetly flush the tlb if this scb never ran */
+ if (scb_s->ihcpu != 0xffffU)
+ scb_s->ihcpu = scb_o->ihcpu;
+
+ /* MVPG and Protection Exception Interpretation are always available */
+ scb_s->eca |= scb_o->eca & (ECA_MVPGI | ECA_PROTEXCI);
+ /* Host-protection-interruption introduced with ESOP */
+ if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_ESOP))
+ scb_s->ecb |= scb_o->ecb & ECB_HOSTPROTINT;
+ /* transactional execution */
+ if (test_kvm_facility(vcpu->kvm, 73) && wants_tx) {
+ /* remap the prefix is tx is toggled on */
+ if (!had_tx)
+ prefix_unmapped(vsie_page);
+ scb_s->ecb |= ECB_TE;
+ }
+ /* branch prediction */
+ if (test_kvm_facility(vcpu->kvm, 82))
+ scb_s->fpf |= scb_o->fpf & FPF_BPBC;
+ /* SIMD */
+ if (test_kvm_facility(vcpu->kvm, 129)) {
+ scb_s->eca |= scb_o->eca & ECA_VX;
+ scb_s->ecd |= scb_o->ecd & ECD_HOSTREGMGMT;
+ }
+ /* Run-time-Instrumentation */
+ if (test_kvm_facility(vcpu->kvm, 64))
+ scb_s->ecb3 |= scb_o->ecb3 & ECB3_RI;
+ /* Instruction Execution Prevention */
+ if (test_kvm_facility(vcpu->kvm, 130))
+ scb_s->ecb2 |= scb_o->ecb2 & ECB2_IEP;
+ /* Guarded Storage */
+ if (test_kvm_facility(vcpu->kvm, 133)) {
+ scb_s->ecb |= scb_o->ecb & ECB_GS;
+ scb_s->ecd |= scb_o->ecd & ECD_HOSTREGMGMT;
+ }
+ if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_SIIF))
+ scb_s->eca |= scb_o->eca & ECA_SII;
+ if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IB))
+ scb_s->eca |= scb_o->eca & ECA_IB;
+ if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI))
+ scb_s->eca |= scb_o->eca & ECA_CEI;
+ /* Epoch Extension */
+ if (test_kvm_facility(vcpu->kvm, 139))
+ scb_s->ecd |= scb_o->ecd & ECD_MEF;
+
+ /* etoken */
+ if (test_kvm_facility(vcpu->kvm, 156))
+ scb_s->ecd |= scb_o->ecd & ECD_ETOKENF;
+
+ prepare_ibc(vcpu, vsie_page);
+ rc = shadow_crycb(vcpu, vsie_page);
+out:
+ if (rc)
+ unshadow_scb(vcpu, vsie_page);
+ return rc;
+}
+
+void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
+ unsigned long end)
+{
+ struct kvm *kvm = gmap->private;
+ struct vsie_page *cur;
+ unsigned long prefix;
+ struct page *page;
+ int i;
+
+ if (!gmap_is_shadow(gmap))
+ return;
+ if (start >= 1UL << 31)
+ /* We are only interested in prefix pages */
+ return;
+
+ /*
+ * Only new shadow blocks are added to the list during runtime,
+ * therefore we can safely reference them all the time.
+ */
+ for (i = 0; i < kvm->arch.vsie.page_count; i++) {
+ page = READ_ONCE(kvm->arch.vsie.pages[i]);
+ if (!page)
+ continue;
+ cur = page_to_virt(page);
+ if (READ_ONCE(cur->gmap) != gmap)
+ continue;
+ prefix = cur->scb_s.prefix << GUEST_PREFIX_SHIFT;
+ /* with mso/msl, the prefix lies at an offset */
+ prefix += cur->scb_s.mso;
+ if (prefix <= end && start <= prefix + 2 * PAGE_SIZE - 1)
+ prefix_unmapped_sync(cur);
+ }
+}
+
+/*
+ * Map the first prefix page and if tx is enabled also the second prefix page.
+ *
+ * The prefix will be protected, a gmap notifier will inform about unmaps.
+ * The shadow scb must not be executed until the prefix is remapped, this is
+ * guaranteed by properly handling PROG_REQUEST.
+ *
+ * Returns: - 0 on if successfully mapped or already mapped
+ * - > 0 if control has to be given to guest 2
+ * - -EAGAIN if the caller can retry immediately
+ * - -ENOMEM if out of memory
+ */
+static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+{
+ struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+ u64 prefix = scb_s->prefix << GUEST_PREFIX_SHIFT;
+ int rc;
+
+ if (prefix_is_mapped(vsie_page))
+ return 0;
+
+ /* mark it as mapped so we can catch any concurrent unmappers */
+ prefix_mapped(vsie_page);
+
+ /* with mso/msl, the prefix lies at offset *mso* */
+ prefix += scb_s->mso;
+
+ rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix);
+ if (!rc && (scb_s->ecb & ECB_TE))
+ rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
+ prefix + PAGE_SIZE);
+ /*
+ * We don't have to mprotect, we will be called for all unshadows.
+ * SIE will detect if protection applies and trigger a validity.
+ */
+ if (rc)
+ prefix_unmapped(vsie_page);
+ if (rc > 0 || rc == -EFAULT)
+ rc = set_validity_icpt(scb_s, 0x0037U);
+ return rc;
+}
+
+/*
+ * Pin the guest page given by gpa and set hpa to the pinned host address.
+ * Will always be pinned writable.
+ *
+ * Returns: - 0 on success
+ * - -EINVAL if the gpa is not valid guest storage
+ */
+static int pin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t *hpa)
+{
+ struct page *page;
+
+ page = gfn_to_page(kvm, gpa_to_gfn(gpa));
+ if (is_error_page(page))
+ return -EINVAL;
+ *hpa = (hpa_t) page_to_virt(page) + (gpa & ~PAGE_MASK);
+ return 0;
+}
+
+/* Unpins a page previously pinned via pin_guest_page, marking it as dirty. */
+static void unpin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t hpa)
+{
+ kvm_release_pfn_dirty(hpa >> PAGE_SHIFT);
+ /* mark the page always as dirty for migration */
+ mark_page_dirty(kvm, gpa_to_gfn(gpa));
+}
+
+/* unpin all blocks previously pinned by pin_blocks(), marking them dirty */
+static void unpin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+{
+ struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+ hpa_t hpa;
+
+ hpa = (u64) scb_s->scaoh << 32 | scb_s->scaol;
+ if (hpa) {
+ unpin_guest_page(vcpu->kvm, vsie_page->sca_gpa, hpa);
+ vsie_page->sca_gpa = 0;
+ scb_s->scaol = 0;
+ scb_s->scaoh = 0;
+ }
+
+ hpa = scb_s->itdba;
+ if (hpa) {
+ unpin_guest_page(vcpu->kvm, vsie_page->itdba_gpa, hpa);
+ vsie_page->itdba_gpa = 0;
+ scb_s->itdba = 0;
+ }
+
+ hpa = scb_s->gvrd;
+ if (hpa) {
+ unpin_guest_page(vcpu->kvm, vsie_page->gvrd_gpa, hpa);
+ vsie_page->gvrd_gpa = 0;
+ scb_s->gvrd = 0;
+ }
+
+ hpa = scb_s->riccbd;
+ if (hpa) {
+ unpin_guest_page(vcpu->kvm, vsie_page->riccbd_gpa, hpa);
+ vsie_page->riccbd_gpa = 0;
+ scb_s->riccbd = 0;
+ }
+
+ hpa = scb_s->sdnxo;
+ if (hpa) {
+ unpin_guest_page(vcpu->kvm, vsie_page->sdnx_gpa, hpa);
+ vsie_page->sdnx_gpa = 0;
+ scb_s->sdnxo = 0;
+ }
+}
+
+/*
+ * Instead of shadowing some blocks, we can simply forward them because the
+ * addresses in the scb are 64 bit long.
+ *
+ * This works as long as the data lies in one page. If blocks ever exceed one
+ * page, we have to fall back to shadowing.
+ *
+ * As we reuse the sca, the vcpu pointers contained in it are invalid. We must
+ * therefore not enable any facilities that access these pointers (e.g. SIGPIF).
+ *
+ * Returns: - 0 if all blocks were pinned.
+ * - > 0 if control has to be given to guest 2
+ * - -ENOMEM if out of memory
+ */
+static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+{
+ struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
+ struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+ hpa_t hpa;
+ gpa_t gpa;
+ int rc = 0;
+
+ gpa = READ_ONCE(scb_o->scaol) & ~0xfUL;
+ if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_64BSCAO))
+ gpa |= (u64) READ_ONCE(scb_o->scaoh) << 32;
+ if (gpa) {
+ if (gpa < 2 * PAGE_SIZE)
+ rc = set_validity_icpt(scb_s, 0x0038U);
+ else if ((gpa & ~0x1fffUL) == kvm_s390_get_prefix(vcpu))
+ rc = set_validity_icpt(scb_s, 0x0011U);
+ else if ((gpa & PAGE_MASK) !=
+ ((gpa + sizeof(struct bsca_block) - 1) & PAGE_MASK))
+ rc = set_validity_icpt(scb_s, 0x003bU);
+ if (!rc) {
+ rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
+ if (rc)
+ rc = set_validity_icpt(scb_s, 0x0034U);
+ }
+ if (rc)
+ goto unpin;
+ vsie_page->sca_gpa = gpa;
+ scb_s->scaoh = (u32)((u64)hpa >> 32);
+ scb_s->scaol = (u32)(u64)hpa;
+ }
+
+ gpa = READ_ONCE(scb_o->itdba) & ~0xffUL;
+ if (gpa && (scb_s->ecb & ECB_TE)) {
+ if (gpa < 2 * PAGE_SIZE) {
+ rc = set_validity_icpt(scb_s, 0x0080U);
+ goto unpin;
+ }
+ /* 256 bytes cannot cross page boundaries */
+ rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
+ if (rc) {
+ rc = set_validity_icpt(scb_s, 0x0080U);
+ goto unpin;
+ }
+ vsie_page->itdba_gpa = gpa;
+ scb_s->itdba = hpa;
+ }
+
+ gpa = READ_ONCE(scb_o->gvrd) & ~0x1ffUL;
+ if (gpa && (scb_s->eca & ECA_VX) && !(scb_s->ecd & ECD_HOSTREGMGMT)) {
+ if (gpa < 2 * PAGE_SIZE) {
+ rc = set_validity_icpt(scb_s, 0x1310U);
+ goto unpin;
+ }
+ /*
+ * 512 bytes vector registers cannot cross page boundaries
+ * if this block gets bigger, we have to shadow it.
+ */
+ rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
+ if (rc) {
+ rc = set_validity_icpt(scb_s, 0x1310U);
+ goto unpin;
+ }
+ vsie_page->gvrd_gpa = gpa;
+ scb_s->gvrd = hpa;
+ }
+
+ gpa = READ_ONCE(scb_o->riccbd) & ~0x3fUL;
+ if (gpa && (scb_s->ecb3 & ECB3_RI)) {
+ if (gpa < 2 * PAGE_SIZE) {
+ rc = set_validity_icpt(scb_s, 0x0043U);
+ goto unpin;
+ }
+ /* 64 bytes cannot cross page boundaries */
+ rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
+ if (rc) {
+ rc = set_validity_icpt(scb_s, 0x0043U);
+ goto unpin;
+ }
+ /* Validity 0x0044 will be checked by SIE */
+ vsie_page->riccbd_gpa = gpa;
+ scb_s->riccbd = hpa;
+ }
+ if (((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) ||
+ (scb_s->ecd & ECD_ETOKENF)) {
+ unsigned long sdnxc;
+
+ gpa = READ_ONCE(scb_o->sdnxo) & ~0xfUL;
+ sdnxc = READ_ONCE(scb_o->sdnxo) & 0xfUL;
+ if (!gpa || gpa < 2 * PAGE_SIZE) {
+ rc = set_validity_icpt(scb_s, 0x10b0U);
+ goto unpin;
+ }
+ if (sdnxc < 6 || sdnxc > 12) {
+ rc = set_validity_icpt(scb_s, 0x10b1U);
+ goto unpin;
+ }
+ if (gpa & ((1 << sdnxc) - 1)) {
+ rc = set_validity_icpt(scb_s, 0x10b2U);
+ goto unpin;
+ }
+ /* Due to alignment rules (checked above) this cannot
+ * cross page boundaries
+ */
+ rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
+ if (rc) {
+ rc = set_validity_icpt(scb_s, 0x10b0U);
+ goto unpin;
+ }
+ vsie_page->sdnx_gpa = gpa;
+ scb_s->sdnxo = hpa | sdnxc;
+ }
+ return 0;
+unpin:
+ unpin_blocks(vcpu, vsie_page);
+ return rc;
+}
+
+/* unpin the scb provided by guest 2, marking it as dirty */
+static void unpin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
+ gpa_t gpa)
+{
+ hpa_t hpa = (hpa_t) vsie_page->scb_o;
+
+ if (hpa)
+ unpin_guest_page(vcpu->kvm, gpa, hpa);
+ vsie_page->scb_o = NULL;
+}
+
+/*
+ * Pin the scb at gpa provided by guest 2 at vsie_page->scb_o.
+ *
+ * Returns: - 0 if the scb was pinned.
+ * - > 0 if control has to be given to guest 2
+ */
+static int pin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
+ gpa_t gpa)
+{
+ hpa_t hpa;
+ int rc;
+
+ rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
+ if (rc) {
+ rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ WARN_ON_ONCE(rc);
+ return 1;
+ }
+ vsie_page->scb_o = (struct kvm_s390_sie_block *) hpa;
+ return 0;
+}
+
+/*
+ * Inject a fault into guest 2.
+ *
+ * Returns: - > 0 if control has to be given to guest 2
+ * < 0 if an error occurred during injection.
+ */
+static int inject_fault(struct kvm_vcpu *vcpu, __u16 code, __u64 vaddr,
+ bool write_flag)
+{
+ struct kvm_s390_pgm_info pgm = {
+ .code = code,
+ .trans_exc_code =
+ /* 0-51: virtual address */
+ (vaddr & 0xfffffffffffff000UL) |
+ /* 52-53: store / fetch */
+ (((unsigned int) !write_flag) + 1) << 10,
+ /* 62-63: asce id (alway primary == 0) */
+ .exc_access_id = 0, /* always primary */
+ .op_access_id = 0, /* not MVPG */
+ };
+ int rc;
+
+ if (code == PGM_PROTECTION)
+ pgm.trans_exc_code |= 0x4UL;
+
+ rc = kvm_s390_inject_prog_irq(vcpu, &pgm);
+ return rc ? rc : 1;
+}
+
+/*
+ * Handle a fault during vsie execution on a gmap shadow.
+ *
+ * Returns: - 0 if the fault was resolved
+ * - > 0 if control has to be given to guest 2
+ * - < 0 if an error occurred
+ */
+static int handle_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+{
+ int rc;
+
+ if (current->thread.gmap_int_code == PGM_PROTECTION)
+ /* we can directly forward all protection exceptions */
+ return inject_fault(vcpu, PGM_PROTECTION,
+ current->thread.gmap_addr, 1);
+
+ rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
+ current->thread.gmap_addr);
+ if (rc > 0) {
+ rc = inject_fault(vcpu, rc,
+ current->thread.gmap_addr,
+ current->thread.gmap_write_flag);
+ if (rc >= 0)
+ vsie_page->fault_addr = current->thread.gmap_addr;
+ }
+ return rc;
+}
+
+/*
+ * Retry the previous fault that required guest 2 intervention. This avoids
+ * one superfluous SIE re-entry and direct exit.
+ *
+ * Will ignore any errors. The next SIE fault will do proper fault handling.
+ */
+static void handle_last_fault(struct kvm_vcpu *vcpu,
+ struct vsie_page *vsie_page)
+{
+ if (vsie_page->fault_addr)
+ kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
+ vsie_page->fault_addr);
+ vsie_page->fault_addr = 0;
+}
+
+static inline void clear_vsie_icpt(struct vsie_page *vsie_page)
+{
+ vsie_page->scb_s.icptcode = 0;
+}
+
+/* rewind the psw and clear the vsie icpt, so we can retry execution */
+static void retry_vsie_icpt(struct vsie_page *vsie_page)
+{
+ struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+ int ilen = insn_length(scb_s->ipa >> 8);
+
+ /* take care of EXECUTE instructions */
+ if (scb_s->icptstatus & 1) {
+ ilen = (scb_s->icptstatus >> 4) & 0x6;
+ if (!ilen)
+ ilen = 4;
+ }
+ scb_s->gpsw.addr = __rewind_psw(scb_s->gpsw, ilen);
+ clear_vsie_icpt(vsie_page);
+}
+
+/*
+ * Try to shadow + enable the guest 2 provided facility list.
+ * Retry instruction execution if enabled for and provided by guest 2.
+ *
+ * Returns: - 0 if handled (retry or guest 2 icpt)
+ * - > 0 if control has to be given to guest 2
+ */
+static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+{
+ struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+ __u32 fac = READ_ONCE(vsie_page->scb_o->fac) & 0x7ffffff8U;
+
+ if (fac && test_kvm_facility(vcpu->kvm, 7)) {
+ retry_vsie_icpt(vsie_page);
+ if (read_guest_real(vcpu, fac, &vsie_page->fac,
+ sizeof(vsie_page->fac)))
+ return set_validity_icpt(scb_s, 0x1090U);
+ scb_s->fac = (__u32)(__u64) &vsie_page->fac;
+ }
+ return 0;
+}
+
+/*
+ * Run the vsie on a shadow scb and a shadow gmap, without any further
+ * sanity checks, handling SIE faults.
+ *
+ * Returns: - 0 everything went fine
+ * - > 0 if control has to be given to guest 2
+ * - < 0 if an error occurred
+ */
+static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+ __releases(vcpu->kvm->srcu)
+ __acquires(vcpu->kvm->srcu)
+{
+ struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+ struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
+ int guest_bp_isolation;
+ int rc;
+
+ handle_last_fault(vcpu, vsie_page);
+
+ if (need_resched())
+ schedule();
+ if (test_cpu_flag(CIF_MCCK_PENDING))
+ s390_handle_mcck();
+
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+
+ /* save current guest state of bp isolation override */
+ guest_bp_isolation = test_thread_flag(TIF_ISOLATE_BP_GUEST);
+
+ /*
+ * The guest is running with BPBC, so we have to force it on for our
+ * nested guest. This is done by enabling BPBC globally, so the BPBC
+ * control in the SCB (which the nested guest can modify) is simply
+ * ignored.
+ */
+ if (test_kvm_facility(vcpu->kvm, 82) &&
+ vcpu->arch.sie_block->fpf & FPF_BPBC)
+ set_thread_flag(TIF_ISOLATE_BP_GUEST);
+
+ local_irq_disable();
+ guest_enter_irqoff();
+ local_irq_enable();
+
+ rc = sie64a(scb_s, vcpu->run->s.regs.gprs);
+
+ local_irq_disable();
+ guest_exit_irqoff();
+ local_irq_enable();
+
+ /* restore guest state for bp isolation override */
+ if (!guest_bp_isolation)
+ clear_thread_flag(TIF_ISOLATE_BP_GUEST);
+
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+ if (rc == -EINTR) {
+ VCPU_EVENT(vcpu, 3, "%s", "machine check");
+ kvm_s390_reinject_machine_check(vcpu, &vsie_page->mcck_info);
+ return 0;
+ }
+
+ if (rc > 0)
+ rc = 0; /* we could still have an icpt */
+ else if (rc == -EFAULT)
+ return handle_fault(vcpu, vsie_page);
+
+ switch (scb_s->icptcode) {
+ case ICPT_INST:
+ if (scb_s->ipa == 0xb2b0)
+ rc = handle_stfle(vcpu, vsie_page);
+ break;
+ case ICPT_STOP:
+ /* stop not requested by g2 - must have been a kick */
+ if (!(atomic_read(&scb_o->cpuflags) & CPUSTAT_STOP_INT))
+ clear_vsie_icpt(vsie_page);
+ break;
+ case ICPT_VALIDITY:
+ if ((scb_s->ipa & 0xf000) != 0xf000)
+ scb_s->ipa += 0x1000;
+ break;
+ }
+ return rc;
+}
+
+static void release_gmap_shadow(struct vsie_page *vsie_page)
+{
+ if (vsie_page->gmap)
+ gmap_put(vsie_page->gmap);
+ WRITE_ONCE(vsie_page->gmap, NULL);
+ prefix_unmapped(vsie_page);
+}
+
+static int acquire_gmap_shadow(struct kvm_vcpu *vcpu,
+ struct vsie_page *vsie_page)
+{
+ unsigned long asce;
+ union ctlreg0 cr0;
+ struct gmap *gmap;
+ int edat;
+
+ asce = vcpu->arch.sie_block->gcr[1];
+ cr0.val = vcpu->arch.sie_block->gcr[0];
+ edat = cr0.edat && test_kvm_facility(vcpu->kvm, 8);
+ edat += edat && test_kvm_facility(vcpu->kvm, 78);
+
+ /*
+ * ASCE or EDAT could have changed since last icpt, or the gmap
+ * we're holding has been unshadowed. If the gmap is still valid,
+ * we can safely reuse it.
+ */
+ if (vsie_page->gmap && gmap_shadow_valid(vsie_page->gmap, asce, edat))
+ return 0;
+
+ /* release the old shadow - if any, and mark the prefix as unmapped */
+ release_gmap_shadow(vsie_page);
+ gmap = gmap_shadow(vcpu->arch.gmap, asce, edat);
+ if (IS_ERR(gmap))
+ return PTR_ERR(gmap);
+ gmap->private = vcpu->kvm;
+ WRITE_ONCE(vsie_page->gmap, gmap);
+ return 0;
+}
+
+/*
+ * Register the shadow scb at the VCPU, e.g. for kicking out of vsie.
+ */
+static void register_shadow_scb(struct kvm_vcpu *vcpu,
+ struct vsie_page *vsie_page)
+{
+ struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+
+ WRITE_ONCE(vcpu->arch.vsie_block, &vsie_page->scb_s);
+ /*
+ * External calls have to lead to a kick of the vcpu and
+ * therefore the vsie -> Simulate Wait state.
+ */
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT);
+ /*
+ * We have to adjust the g3 epoch by the g2 epoch. The epoch will
+ * automatically be adjusted on tod clock changes via kvm_sync_clock.
+ */
+ preempt_disable();
+ scb_s->epoch += vcpu->kvm->arch.epoch;
+
+ if (scb_s->ecd & ECD_MEF) {
+ scb_s->epdx += vcpu->kvm->arch.epdx;
+ if (scb_s->epoch < vcpu->kvm->arch.epoch)
+ scb_s->epdx += 1;
+ }
+
+ preempt_enable();
+}
+
+/*
+ * Unregister a shadow scb from a VCPU.
+ */
+static void unregister_shadow_scb(struct kvm_vcpu *vcpu)
+{
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT);
+ WRITE_ONCE(vcpu->arch.vsie_block, NULL);
+}
+
+/*
+ * Run the vsie on a shadowed scb, managing the gmap shadow, handling
+ * prefix pages and faults.
+ *
+ * Returns: - 0 if no errors occurred
+ * - > 0 if control has to be given to guest 2
+ * - -ENOMEM if out of memory
+ */
+static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+{
+ struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+ int rc = 0;
+
+ while (1) {
+ rc = acquire_gmap_shadow(vcpu, vsie_page);
+ if (!rc)
+ rc = map_prefix(vcpu, vsie_page);
+ if (!rc) {
+ gmap_enable(vsie_page->gmap);
+ update_intervention_requests(vsie_page);
+ rc = do_vsie_run(vcpu, vsie_page);
+ gmap_enable(vcpu->arch.gmap);
+ }
+ atomic_andnot(PROG_BLOCK_SIE, &scb_s->prog20);
+
+ if (rc == -EAGAIN)
+ rc = 0;
+ if (rc || scb_s->icptcode || signal_pending(current) ||
+ kvm_s390_vcpu_has_irq(vcpu, 0))
+ break;
+ }
+
+ if (rc == -EFAULT) {
+ /*
+ * Addressing exceptions are always presentes as intercepts.
+ * As addressing exceptions are suppressing and our guest 3 PSW
+ * points at the responsible instruction, we have to
+ * forward the PSW and set the ilc. If we can't read guest 3
+ * instruction, we can use an arbitrary ilc. Let's always use
+ * ilen = 4 for now, so we can avoid reading in guest 3 virtual
+ * memory. (we could also fake the shadow so the hardware
+ * handles it).
+ */
+ scb_s->icptcode = ICPT_PROGI;
+ scb_s->iprcc = PGM_ADDRESSING;
+ scb_s->pgmilc = 4;
+ scb_s->gpsw.addr = __rewind_psw(scb_s->gpsw, 4);
+ rc = 1;
+ }
+ return rc;
+}
+
+/*
+ * Get or create a vsie page for a scb address.
+ *
+ * Returns: - address of a vsie page (cached or new one)
+ * - NULL if the same scb address is already used by another VCPU
+ * - ERR_PTR(-ENOMEM) if out of memory
+ */
+static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
+{
+ struct vsie_page *vsie_page;
+ struct page *page;
+ int nr_vcpus;
+
+ rcu_read_lock();
+ page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9);
+ rcu_read_unlock();
+ if (page) {
+ if (page_ref_inc_return(page) == 2)
+ return page_to_virt(page);
+ page_ref_dec(page);
+ }
+
+ /*
+ * We want at least #online_vcpus shadows, so every VCPU can execute
+ * the VSIE in parallel.
+ */
+ nr_vcpus = atomic_read(&kvm->online_vcpus);
+
+ mutex_lock(&kvm->arch.vsie.mutex);
+ if (kvm->arch.vsie.page_count < nr_vcpus) {
+ page = alloc_page(GFP_KERNEL | __GFP_ZERO | GFP_DMA);
+ if (!page) {
+ mutex_unlock(&kvm->arch.vsie.mutex);
+ return ERR_PTR(-ENOMEM);
+ }
+ page_ref_inc(page);
+ kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = page;
+ kvm->arch.vsie.page_count++;
+ } else {
+ /* reuse an existing entry that belongs to nobody */
+ while (true) {
+ page = kvm->arch.vsie.pages[kvm->arch.vsie.next];
+ if (page_ref_inc_return(page) == 2)
+ break;
+ page_ref_dec(page);
+ kvm->arch.vsie.next++;
+ kvm->arch.vsie.next %= nr_vcpus;
+ }
+ radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9);
+ }
+ page->index = addr;
+ /* double use of the same address */
+ if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9, page)) {
+ page_ref_dec(page);
+ mutex_unlock(&kvm->arch.vsie.mutex);
+ return NULL;
+ }
+ mutex_unlock(&kvm->arch.vsie.mutex);
+
+ vsie_page = page_to_virt(page);
+ memset(&vsie_page->scb_s, 0, sizeof(struct kvm_s390_sie_block));
+ release_gmap_shadow(vsie_page);
+ vsie_page->fault_addr = 0;
+ vsie_page->scb_s.ihcpu = 0xffffU;
+ return vsie_page;
+}
+
+/* put a vsie page acquired via get_vsie_page */
+static void put_vsie_page(struct kvm *kvm, struct vsie_page *vsie_page)
+{
+ struct page *page = pfn_to_page(__pa(vsie_page) >> PAGE_SHIFT);
+
+ page_ref_dec(page);
+}
+
+int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu)
+{
+ struct vsie_page *vsie_page;
+ unsigned long scb_addr;
+ int rc;
+
+ vcpu->stat.instruction_sie++;
+ if (!test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_SIEF2))
+ return -EOPNOTSUPP;
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ BUILD_BUG_ON(sizeof(struct vsie_page) != PAGE_SIZE);
+ scb_addr = kvm_s390_get_base_disp_s(vcpu, NULL);
+
+ /* 512 byte alignment */
+ if (unlikely(scb_addr & 0x1ffUL))
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ if (signal_pending(current) || kvm_s390_vcpu_has_irq(vcpu, 0))
+ return 0;
+
+ vsie_page = get_vsie_page(vcpu->kvm, scb_addr);
+ if (IS_ERR(vsie_page))
+ return PTR_ERR(vsie_page);
+ else if (!vsie_page)
+ /* double use of sie control block - simply do nothing */
+ return 0;
+
+ rc = pin_scb(vcpu, vsie_page, scb_addr);
+ if (rc)
+ goto out_put;
+ rc = shadow_scb(vcpu, vsie_page);
+ if (rc)
+ goto out_unpin_scb;
+ rc = pin_blocks(vcpu, vsie_page);
+ if (rc)
+ goto out_unshadow;
+ register_shadow_scb(vcpu, vsie_page);
+ rc = vsie_run(vcpu, vsie_page);
+ unregister_shadow_scb(vcpu);
+ unpin_blocks(vcpu, vsie_page);
+out_unshadow:
+ unshadow_scb(vcpu, vsie_page);
+out_unpin_scb:
+ unpin_scb(vcpu, vsie_page, scb_addr);
+out_put:
+ put_vsie_page(vcpu->kvm, vsie_page);
+
+ return rc < 0 ? rc : 0;
+}
+
+/* Init the vsie data structures. To be called when a vm is initialized. */
+void kvm_s390_vsie_init(struct kvm *kvm)
+{
+ mutex_init(&kvm->arch.vsie.mutex);
+ INIT_RADIX_TREE(&kvm->arch.vsie.addr_to_page, GFP_KERNEL);
+}
+
+/* Destroy the vsie data structures. To be called when a vm is destroyed. */
+void kvm_s390_vsie_destroy(struct kvm *kvm)
+{
+ struct vsie_page *vsie_page;
+ struct page *page;
+ int i;
+
+ mutex_lock(&kvm->arch.vsie.mutex);
+ for (i = 0; i < kvm->arch.vsie.page_count; i++) {
+ page = kvm->arch.vsie.pages[i];
+ kvm->arch.vsie.pages[i] = NULL;
+ vsie_page = page_to_virt(page);
+ release_gmap_shadow(vsie_page);
+ /* free the radix tree entry */
+ radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9);
+ __free_page(page);
+ }
+ kvm->arch.vsie.page_count = 0;
+ mutex_unlock(&kvm->arch.vsie.mutex);
+}
+
+void kvm_s390_vsie_kick(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_sie_block *scb = READ_ONCE(vcpu->arch.vsie_block);
+
+ /*
+ * Even if the VCPU lets go of the shadow sie block reference, it is
+ * still valid in the cache. So we can safely kick it.
+ */
+ if (scb) {
+ atomic_or(PROG_BLOCK_SIE, &scb->prog20);
+ if (scb->prog0c & PROG_IN_SIE)
+ atomic_or(CPUSTAT_STOP_INT, &scb->cpuflags);
+ }
+}
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
new file mode 100644
index 000000000..5418d10dc
--- /dev/null
+++ b/arch/s390/lib/Makefile
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for s390-specific library files..
+#
+
+lib-y += delay.o string.o uaccess.o find.o
+obj-y += mem.o xor.o
+lib-$(CONFIG_SMP) += spinlock.o
+lib-$(CONFIG_KPROBES) += probes.o
+lib-$(CONFIG_UPROBES) += probes.o
+
+# Instrumenting memory accesses to __user data (in different address space)
+# produce false positives
+KASAN_SANITIZE_uaccess.o := n
+
+chkbss := mem.o
+include $(srctree)/arch/s390/scripts/Makefile.chkbss
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
new file mode 100644
index 000000000..d4aa10795
--- /dev/null
+++ b/arch/s390/lib/delay.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Precise Delay Loops for S390
+ *
+ * Copyright IBM Corp. 1999, 2008
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ * Heiko Carstens <heiko.carstens@de.ibm.com>,
+ */
+
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/timex.h>
+#include <linux/export.h>
+#include <linux/irqflags.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <asm/vtimer.h>
+#include <asm/div64.h>
+#include <asm/idle.h>
+
+void __delay(unsigned long loops)
+{
+ /*
+ * To end the bloody studid and useless discussion about the
+ * BogoMips number I took the liberty to define the __delay
+ * function in a way that that resulting BogoMips number will
+ * yield the megahertz number of the cpu. The important function
+ * is udelay and that is done using the tod clock. -- martin.
+ */
+ asm volatile("0: brct %0,0b" : : "d" ((loops/2) + 1));
+}
+EXPORT_SYMBOL(__delay);
+
+static void __udelay_disabled(unsigned long long usecs)
+{
+ unsigned long cr0, cr0_new, psw_mask;
+ struct s390_idle_data idle;
+ u64 end;
+
+ end = get_tod_clock() + (usecs << 12);
+ __ctl_store(cr0, 0, 0);
+ cr0_new = cr0 & ~CR0_IRQ_SUBCLASS_MASK;
+ cr0_new |= (1UL << (63 - 52)); /* enable clock comparator irq */
+ __ctl_load(cr0_new, 0, 0);
+ psw_mask = __extract_psw() | PSW_MASK_EXT | PSW_MASK_WAIT;
+ set_clock_comparator(end);
+ set_cpu_flag(CIF_IGNORE_IRQ);
+ psw_idle(&idle, psw_mask);
+ clear_cpu_flag(CIF_IGNORE_IRQ);
+ set_clock_comparator(S390_lowcore.clock_comparator);
+ __ctl_load(cr0, 0, 0);
+}
+
+static void __udelay_enabled(unsigned long long usecs)
+{
+ u64 clock_saved, end;
+
+ end = get_tod_clock_fast() + (usecs << 12);
+ do {
+ clock_saved = 0;
+ if (tod_after(S390_lowcore.clock_comparator, end)) {
+ clock_saved = local_tick_disable();
+ set_clock_comparator(end);
+ }
+ enabled_wait();
+ if (clock_saved)
+ local_tick_enable(clock_saved);
+ } while (get_tod_clock_fast() < end);
+}
+
+/*
+ * Waits for 'usecs' microseconds using the TOD clock comparator.
+ */
+void __udelay(unsigned long long usecs)
+{
+ unsigned long flags;
+
+ preempt_disable();
+ local_irq_save(flags);
+ if (in_irq()) {
+ __udelay_disabled(usecs);
+ goto out;
+ }
+ if (in_softirq()) {
+ if (raw_irqs_disabled_flags(flags))
+ __udelay_disabled(usecs);
+ else
+ __udelay_enabled(usecs);
+ goto out;
+ }
+ if (raw_irqs_disabled_flags(flags)) {
+ local_bh_disable();
+ __udelay_disabled(usecs);
+ _local_bh_enable();
+ goto out;
+ }
+ __udelay_enabled(usecs);
+out:
+ local_irq_restore(flags);
+ preempt_enable();
+}
+EXPORT_SYMBOL(__udelay);
+
+/*
+ * Simple udelay variant. To be used on startup and reboot
+ * when the interrupt handler isn't working.
+ */
+void udelay_simple(unsigned long long usecs)
+{
+ u64 end;
+
+ end = get_tod_clock_fast() + (usecs << 12);
+ while (get_tod_clock_fast() < end)
+ cpu_relax();
+}
+
+void __ndelay(unsigned long long nsecs)
+{
+ u64 end;
+
+ nsecs <<= 9;
+ do_div(nsecs, 125);
+ end = get_tod_clock_fast() + nsecs;
+ if (nsecs & ~0xfffUL)
+ __udelay(nsecs >> 12);
+ while (get_tod_clock_fast() < end)
+ barrier();
+}
+EXPORT_SYMBOL(__ndelay);
diff --git a/arch/s390/lib/find.c b/arch/s390/lib/find.c
new file mode 100644
index 000000000..96a8a2e2d
--- /dev/null
+++ b/arch/s390/lib/find.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * MSB0 numbered special bitops handling.
+ *
+ * The bits are numbered:
+ * |0..............63|64............127|128...........191|192...........255|
+ *
+ * The reason for this bit numbering is the fact that the hardware sets bits
+ * in a bitmap starting at bit 0 (MSB) and we don't want to scan the bitmap
+ * from the 'wrong end'.
+ */
+
+#include <linux/compiler.h>
+#include <linux/bitops.h>
+#include <linux/export.h>
+
+unsigned long find_first_bit_inv(const unsigned long *addr, unsigned long size)
+{
+ const unsigned long *p = addr;
+ unsigned long result = 0;
+ unsigned long tmp;
+
+ while (size & ~(BITS_PER_LONG - 1)) {
+ if ((tmp = *(p++)))
+ goto found;
+ result += BITS_PER_LONG;
+ size -= BITS_PER_LONG;
+ }
+ if (!size)
+ return result;
+ tmp = (*p) & (~0UL << (BITS_PER_LONG - size));
+ if (!tmp) /* Are any bits set? */
+ return result + size; /* Nope. */
+found:
+ return result + (__fls(tmp) ^ (BITS_PER_LONG - 1));
+}
+EXPORT_SYMBOL(find_first_bit_inv);
+
+unsigned long find_next_bit_inv(const unsigned long *addr, unsigned long size,
+ unsigned long offset)
+{
+ const unsigned long *p = addr + (offset / BITS_PER_LONG);
+ unsigned long result = offset & ~(BITS_PER_LONG - 1);
+ unsigned long tmp;
+
+ if (offset >= size)
+ return size;
+ size -= result;
+ offset %= BITS_PER_LONG;
+ if (offset) {
+ tmp = *(p++);
+ tmp &= (~0UL >> offset);
+ if (size < BITS_PER_LONG)
+ goto found_first;
+ if (tmp)
+ goto found_middle;
+ size -= BITS_PER_LONG;
+ result += BITS_PER_LONG;
+ }
+ while (size & ~(BITS_PER_LONG-1)) {
+ if ((tmp = *(p++)))
+ goto found_middle;
+ result += BITS_PER_LONG;
+ size -= BITS_PER_LONG;
+ }
+ if (!size)
+ return result;
+ tmp = *p;
+found_first:
+ tmp &= (~0UL << (BITS_PER_LONG - size));
+ if (!tmp) /* Are any bits set? */
+ return result + size; /* Nope. */
+found_middle:
+ return result + (__fls(tmp) ^ (BITS_PER_LONG - 1));
+}
+EXPORT_SYMBOL(find_next_bit_inv);
diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S
new file mode 100644
index 000000000..40c4d59c9
--- /dev/null
+++ b/arch/s390/lib/mem.S
@@ -0,0 +1,184 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * String handling functions.
+ *
+ * Copyright IBM Corp. 2012
+ */
+
+#include <linux/linkage.h>
+#include <asm/export.h>
+#include <asm/nospec-insn.h>
+
+ GEN_BR_THUNK %r14
+
+/*
+ * void *memmove(void *dest, const void *src, size_t n)
+ */
+ENTRY(memmove)
+ ltgr %r4,%r4
+ lgr %r1,%r2
+ jz .Lmemmove_exit
+ aghi %r4,-1
+ clgr %r2,%r3
+ jnh .Lmemmove_forward
+ la %r5,1(%r4,%r3)
+ clgr %r2,%r5
+ jl .Lmemmove_reverse
+.Lmemmove_forward:
+ srlg %r0,%r4,8
+ ltgr %r0,%r0
+ jz .Lmemmove_forward_remainder
+.Lmemmove_forward_loop:
+ mvc 0(256,%r1),0(%r3)
+ la %r1,256(%r1)
+ la %r3,256(%r3)
+ brctg %r0,.Lmemmove_forward_loop
+.Lmemmove_forward_remainder:
+ larl %r5,.Lmemmove_mvc
+ ex %r4,0(%r5)
+.Lmemmove_exit:
+ BR_EX %r14
+.Lmemmove_reverse:
+ ic %r0,0(%r4,%r3)
+ stc %r0,0(%r4,%r1)
+ brctg %r4,.Lmemmove_reverse
+ ic %r0,0(%r4,%r3)
+ stc %r0,0(%r4,%r1)
+ BR_EX %r14
+.Lmemmove_mvc:
+ mvc 0(1,%r1),0(%r3)
+EXPORT_SYMBOL(memmove)
+
+/*
+ * memset implementation
+ *
+ * This code corresponds to the C construct below. We do distinguish
+ * between clearing (c == 0) and setting a memory array (c != 0) simply
+ * because nearly all memset invocations in the kernel clear memory and
+ * the xc instruction is preferred in such cases.
+ *
+ * void *memset(void *s, int c, size_t n)
+ * {
+ * if (likely(c == 0))
+ * return __builtin_memset(s, 0, n);
+ * return __builtin_memset(s, c, n);
+ * }
+ */
+ENTRY(memset)
+ ltgr %r4,%r4
+ jz .Lmemset_exit
+ ltgr %r3,%r3
+ jnz .Lmemset_fill
+ aghi %r4,-1
+ srlg %r3,%r4,8
+ ltgr %r3,%r3
+ lgr %r1,%r2
+ jz .Lmemset_clear_remainder
+.Lmemset_clear_loop:
+ xc 0(256,%r1),0(%r1)
+ la %r1,256(%r1)
+ brctg %r3,.Lmemset_clear_loop
+.Lmemset_clear_remainder:
+ larl %r3,.Lmemset_xc
+ ex %r4,0(%r3)
+.Lmemset_exit:
+ BR_EX %r14
+.Lmemset_fill:
+ cghi %r4,1
+ lgr %r1,%r2
+ je .Lmemset_fill_exit
+ aghi %r4,-2
+ srlg %r5,%r4,8
+ ltgr %r5,%r5
+ jz .Lmemset_fill_remainder
+.Lmemset_fill_loop:
+ stc %r3,0(%r1)
+ mvc 1(255,%r1),0(%r1)
+ la %r1,256(%r1)
+ brctg %r5,.Lmemset_fill_loop
+.Lmemset_fill_remainder:
+ stc %r3,0(%r1)
+ larl %r5,.Lmemset_mvc
+ ex %r4,0(%r5)
+ BR_EX %r14
+.Lmemset_fill_exit:
+ stc %r3,0(%r1)
+ BR_EX %r14
+.Lmemset_xc:
+ xc 0(1,%r1),0(%r1)
+.Lmemset_mvc:
+ mvc 1(1,%r1),0(%r1)
+EXPORT_SYMBOL(memset)
+
+/*
+ * memcpy implementation
+ *
+ * void *memcpy(void *dest, const void *src, size_t n)
+ */
+ENTRY(memcpy)
+ ltgr %r4,%r4
+ jz .Lmemcpy_exit
+ aghi %r4,-1
+ srlg %r5,%r4,8
+ ltgr %r5,%r5
+ lgr %r1,%r2
+ jnz .Lmemcpy_loop
+.Lmemcpy_remainder:
+ larl %r5,.Lmemcpy_mvc
+ ex %r4,0(%r5)
+.Lmemcpy_exit:
+ BR_EX %r14
+.Lmemcpy_loop:
+ mvc 0(256,%r1),0(%r3)
+ la %r1,256(%r1)
+ la %r3,256(%r3)
+ brctg %r5,.Lmemcpy_loop
+ j .Lmemcpy_remainder
+.Lmemcpy_mvc:
+ mvc 0(1,%r1),0(%r3)
+EXPORT_SYMBOL(memcpy)
+
+/*
+ * __memset16/32/64
+ *
+ * void *__memset16(uint16_t *s, uint16_t v, size_t count)
+ * void *__memset32(uint32_t *s, uint32_t v, size_t count)
+ * void *__memset64(uint64_t *s, uint64_t v, size_t count)
+ */
+.macro __MEMSET bits,bytes,insn
+ENTRY(__memset\bits)
+ ltgr %r4,%r4
+ jz .L__memset_exit\bits
+ cghi %r4,\bytes
+ je .L__memset_store\bits
+ aghi %r4,-(\bytes+1)
+ srlg %r5,%r4,8
+ ltgr %r5,%r5
+ lgr %r1,%r2
+ jz .L__memset_remainder\bits
+.L__memset_loop\bits:
+ \insn %r3,0(%r1)
+ mvc \bytes(256-\bytes,%r1),0(%r1)
+ la %r1,256(%r1)
+ brctg %r5,.L__memset_loop\bits
+.L__memset_remainder\bits:
+ \insn %r3,0(%r1)
+ larl %r5,.L__memset_mvc\bits
+ ex %r4,0(%r5)
+ BR_EX %r14
+.L__memset_store\bits:
+ \insn %r3,0(%r2)
+.L__memset_exit\bits:
+ BR_EX %r14
+.L__memset_mvc\bits:
+ mvc \bytes(1,%r1),0(%r1)
+.endm
+
+__MEMSET 16,2,sth
+EXPORT_SYMBOL(__memset16)
+
+__MEMSET 32,4,st
+EXPORT_SYMBOL(__memset32)
+
+__MEMSET 64,8,stg
+EXPORT_SYMBOL(__memset64)
diff --git a/arch/s390/lib/probes.c b/arch/s390/lib/probes.c
new file mode 100644
index 000000000..1e184a034
--- /dev/null
+++ b/arch/s390/lib/probes.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Common helper functions for kprobes and uprobes
+ *
+ * Copyright IBM Corp. 2014
+ */
+
+#include <linux/errno.h>
+#include <asm/kprobes.h>
+#include <asm/dis.h>
+
+int probe_is_prohibited_opcode(u16 *insn)
+{
+ if (!is_known_insn((unsigned char *)insn))
+ return -EINVAL;
+ switch (insn[0] >> 8) {
+ case 0x0c: /* bassm */
+ case 0x0b: /* bsm */
+ case 0x83: /* diag */
+ case 0x44: /* ex */
+ case 0xac: /* stnsm */
+ case 0xad: /* stosm */
+ return -EINVAL;
+ case 0xc6:
+ switch (insn[0] & 0x0f) {
+ case 0x00: /* exrl */
+ return -EINVAL;
+ }
+ }
+ switch (insn[0]) {
+ case 0x0101: /* pr */
+ case 0xb25a: /* bsa */
+ case 0xb240: /* bakr */
+ case 0xb258: /* bsg */
+ case 0xb218: /* pc */
+ case 0xb228: /* pt */
+ case 0xb98d: /* epsw */
+ case 0xe560: /* tbegin */
+ case 0xe561: /* tbeginc */
+ case 0xb2f8: /* tend */
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int probe_get_fixup_type(u16 *insn)
+{
+ /* default fixup method */
+ int fixup = FIXUP_PSW_NORMAL;
+
+ switch (insn[0] >> 8) {
+ case 0x05: /* balr */
+ case 0x0d: /* basr */
+ fixup = FIXUP_RETURN_REGISTER;
+ /* if r2 = 0, no branch will be taken */
+ if ((insn[0] & 0x0f) == 0)
+ fixup |= FIXUP_BRANCH_NOT_TAKEN;
+ break;
+ case 0x06: /* bctr */
+ case 0x07: /* bcr */
+ fixup = FIXUP_BRANCH_NOT_TAKEN;
+ break;
+ case 0x45: /* bal */
+ case 0x4d: /* bas */
+ fixup = FIXUP_RETURN_REGISTER;
+ break;
+ case 0x47: /* bc */
+ case 0x46: /* bct */
+ case 0x86: /* bxh */
+ case 0x87: /* bxle */
+ fixup = FIXUP_BRANCH_NOT_TAKEN;
+ break;
+ case 0x82: /* lpsw */
+ fixup = FIXUP_NOT_REQUIRED;
+ break;
+ case 0xb2: /* lpswe */
+ if ((insn[0] & 0xff) == 0xb2)
+ fixup = FIXUP_NOT_REQUIRED;
+ break;
+ case 0xa7: /* bras */
+ if ((insn[0] & 0x0f) == 0x05)
+ fixup |= FIXUP_RETURN_REGISTER;
+ break;
+ case 0xc0:
+ if ((insn[0] & 0x0f) == 0x05) /* brasl */
+ fixup |= FIXUP_RETURN_REGISTER;
+ break;
+ case 0xeb:
+ switch (insn[2] & 0xff) {
+ case 0x44: /* bxhg */
+ case 0x45: /* bxleg */
+ fixup = FIXUP_BRANCH_NOT_TAKEN;
+ break;
+ }
+ break;
+ case 0xe3: /* bctg */
+ if ((insn[2] & 0xff) == 0x46)
+ fixup = FIXUP_BRANCH_NOT_TAKEN;
+ break;
+ case 0xec:
+ switch (insn[2] & 0xff) {
+ case 0xe5: /* clgrb */
+ case 0xe6: /* cgrb */
+ case 0xf6: /* crb */
+ case 0xf7: /* clrb */
+ case 0xfc: /* cgib */
+ case 0xfd: /* cglib */
+ case 0xfe: /* cib */
+ case 0xff: /* clib */
+ fixup = FIXUP_BRANCH_NOT_TAKEN;
+ break;
+ }
+ break;
+ }
+ return fixup;
+}
+
+int probe_is_insn_relative_long(u16 *insn)
+{
+ /* Check if we have a RIL-b or RIL-c format instruction which
+ * we need to modify in order to avoid instruction emulation. */
+ switch (insn[0] >> 8) {
+ case 0xc0:
+ if ((insn[0] & 0x0f) == 0x00) /* larl */
+ return true;
+ break;
+ case 0xc4:
+ switch (insn[0] & 0x0f) {
+ case 0x02: /* llhrl */
+ case 0x04: /* lghrl */
+ case 0x05: /* lhrl */
+ case 0x06: /* llghrl */
+ case 0x07: /* sthrl */
+ case 0x08: /* lgrl */
+ case 0x0b: /* stgrl */
+ case 0x0c: /* lgfrl */
+ case 0x0d: /* lrl */
+ case 0x0e: /* llgfrl */
+ case 0x0f: /* strl */
+ return true;
+ }
+ break;
+ case 0xc6:
+ switch (insn[0] & 0x0f) {
+ case 0x02: /* pfdrl */
+ case 0x04: /* cghrl */
+ case 0x05: /* chrl */
+ case 0x06: /* clghrl */
+ case 0x07: /* clhrl */
+ case 0x08: /* cgrl */
+ case 0x0a: /* clgrl */
+ case 0x0c: /* cgfrl */
+ case 0x0d: /* crl */
+ case 0x0e: /* clgfrl */
+ case 0x0f: /* clrl */
+ return true;
+ }
+ break;
+ }
+ return false;
+}
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
new file mode 100644
index 000000000..30a7c8c29
--- /dev/null
+++ b/arch/s390/lib/spinlock.c
@@ -0,0 +1,325 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Out of line spinlock code.
+ *
+ * Copyright IBM Corp. 2004, 2006
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+
+#include <linux/types.h>
+#include <linux/export.h>
+#include <linux/spinlock.h>
+#include <linux/jiffies.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/percpu.h>
+#include <asm/alternative.h>
+#include <asm/io.h>
+
+int spin_retry = -1;
+
+static int __init spin_retry_init(void)
+{
+ if (spin_retry < 0)
+ spin_retry = 1000;
+ return 0;
+}
+early_initcall(spin_retry_init);
+
+/**
+ * spin_retry= parameter
+ */
+static int __init spin_retry_setup(char *str)
+{
+ spin_retry = simple_strtoul(str, &str, 0);
+ return 1;
+}
+__setup("spin_retry=", spin_retry_setup);
+
+struct spin_wait {
+ struct spin_wait *next, *prev;
+ int node_id;
+} __aligned(32);
+
+static DEFINE_PER_CPU_ALIGNED(struct spin_wait, spin_wait[4]);
+
+#define _Q_LOCK_CPU_OFFSET 0
+#define _Q_LOCK_STEAL_OFFSET 16
+#define _Q_TAIL_IDX_OFFSET 18
+#define _Q_TAIL_CPU_OFFSET 20
+
+#define _Q_LOCK_CPU_MASK 0x0000ffff
+#define _Q_LOCK_STEAL_ADD 0x00010000
+#define _Q_LOCK_STEAL_MASK 0x00030000
+#define _Q_TAIL_IDX_MASK 0x000c0000
+#define _Q_TAIL_CPU_MASK 0xfff00000
+
+#define _Q_LOCK_MASK (_Q_LOCK_CPU_MASK | _Q_LOCK_STEAL_MASK)
+#define _Q_TAIL_MASK (_Q_TAIL_IDX_MASK | _Q_TAIL_CPU_MASK)
+
+void arch_spin_lock_setup(int cpu)
+{
+ struct spin_wait *node;
+ int ix;
+
+ node = per_cpu_ptr(&spin_wait[0], cpu);
+ for (ix = 0; ix < 4; ix++, node++) {
+ memset(node, 0, sizeof(*node));
+ node->node_id = ((cpu + 1) << _Q_TAIL_CPU_OFFSET) +
+ (ix << _Q_TAIL_IDX_OFFSET);
+ }
+}
+
+static inline int arch_load_niai4(int *lock)
+{
+ int owner;
+
+ asm volatile(
+ ALTERNATIVE("", ".long 0xb2fa0040", 49) /* NIAI 4 */
+ " l %0,%1\n"
+ : "=d" (owner) : "Q" (*lock) : "memory");
+ return owner;
+}
+
+static inline int arch_cmpxchg_niai8(int *lock, int old, int new)
+{
+ int expected = old;
+
+ asm volatile(
+ ALTERNATIVE("", ".long 0xb2fa0080", 49) /* NIAI 8 */
+ " cs %0,%3,%1\n"
+ : "=d" (old), "=Q" (*lock)
+ : "0" (old), "d" (new), "Q" (*lock)
+ : "cc", "memory");
+ return expected == old;
+}
+
+static inline struct spin_wait *arch_spin_decode_tail(int lock)
+{
+ int ix, cpu;
+
+ ix = (lock & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;
+ cpu = (lock & _Q_TAIL_CPU_MASK) >> _Q_TAIL_CPU_OFFSET;
+ return per_cpu_ptr(&spin_wait[ix], cpu - 1);
+}
+
+static inline int arch_spin_yield_target(int lock, struct spin_wait *node)
+{
+ if (lock & _Q_LOCK_CPU_MASK)
+ return lock & _Q_LOCK_CPU_MASK;
+ if (node == NULL || node->prev == NULL)
+ return 0; /* 0 -> no target cpu */
+ while (node->prev)
+ node = node->prev;
+ return node->node_id >> _Q_TAIL_CPU_OFFSET;
+}
+
+static inline void arch_spin_lock_queued(arch_spinlock_t *lp)
+{
+ struct spin_wait *node, *next;
+ int lockval, ix, node_id, tail_id, old, new, owner, count;
+
+ ix = S390_lowcore.spinlock_index++;
+ barrier();
+ lockval = SPINLOCK_LOCKVAL; /* cpu + 1 */
+ node = this_cpu_ptr(&spin_wait[ix]);
+ node->prev = node->next = NULL;
+ node_id = node->node_id;
+
+ /* Enqueue the node for this CPU in the spinlock wait queue */
+ while (1) {
+ old = READ_ONCE(lp->lock);
+ if ((old & _Q_LOCK_CPU_MASK) == 0 &&
+ (old & _Q_LOCK_STEAL_MASK) != _Q_LOCK_STEAL_MASK) {
+ /*
+ * The lock is free but there may be waiters.
+ * With no waiters simply take the lock, if there
+ * are waiters try to steal the lock. The lock may
+ * be stolen three times before the next queued
+ * waiter will get the lock.
+ */
+ new = (old ? (old + _Q_LOCK_STEAL_ADD) : 0) | lockval;
+ if (__atomic_cmpxchg_bool(&lp->lock, old, new))
+ /* Got the lock */
+ goto out;
+ /* lock passing in progress */
+ continue;
+ }
+ /* Make the node of this CPU the new tail. */
+ new = node_id | (old & _Q_LOCK_MASK);
+ if (__atomic_cmpxchg_bool(&lp->lock, old, new))
+ break;
+ }
+ /* Set the 'next' pointer of the tail node in the queue */
+ tail_id = old & _Q_TAIL_MASK;
+ if (tail_id != 0) {
+ node->prev = arch_spin_decode_tail(tail_id);
+ WRITE_ONCE(node->prev->next, node);
+ }
+
+ /* Pass the virtual CPU to the lock holder if it is not running */
+ owner = arch_spin_yield_target(old, node);
+ if (owner && arch_vcpu_is_preempted(owner - 1))
+ smp_yield_cpu(owner - 1);
+
+ /* Spin on the CPU local node->prev pointer */
+ if (tail_id != 0) {
+ count = spin_retry;
+ while (READ_ONCE(node->prev) != NULL) {
+ if (count-- >= 0)
+ continue;
+ count = spin_retry;
+ /* Query running state of lock holder again. */
+ owner = arch_spin_yield_target(old, node);
+ if (owner && arch_vcpu_is_preempted(owner - 1))
+ smp_yield_cpu(owner - 1);
+ }
+ }
+
+ /* Spin on the lock value in the spinlock_t */
+ count = spin_retry;
+ while (1) {
+ old = READ_ONCE(lp->lock);
+ owner = old & _Q_LOCK_CPU_MASK;
+ if (!owner) {
+ tail_id = old & _Q_TAIL_MASK;
+ new = ((tail_id != node_id) ? tail_id : 0) | lockval;
+ if (__atomic_cmpxchg_bool(&lp->lock, old, new))
+ /* Got the lock */
+ break;
+ continue;
+ }
+ if (count-- >= 0)
+ continue;
+ count = spin_retry;
+ if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(owner - 1))
+ smp_yield_cpu(owner - 1);
+ }
+
+ /* Pass lock_spin job to next CPU in the queue */
+ if (node_id && tail_id != node_id) {
+ /* Wait until the next CPU has set up the 'next' pointer */
+ while ((next = READ_ONCE(node->next)) == NULL)
+ ;
+ next->prev = NULL;
+ }
+
+ out:
+ S390_lowcore.spinlock_index--;
+}
+
+static inline void arch_spin_lock_classic(arch_spinlock_t *lp)
+{
+ int lockval, old, new, owner, count;
+
+ lockval = SPINLOCK_LOCKVAL; /* cpu + 1 */
+
+ /* Pass the virtual CPU to the lock holder if it is not running */
+ owner = arch_spin_yield_target(READ_ONCE(lp->lock), NULL);
+ if (owner && arch_vcpu_is_preempted(owner - 1))
+ smp_yield_cpu(owner - 1);
+
+ count = spin_retry;
+ while (1) {
+ old = arch_load_niai4(&lp->lock);
+ owner = old & _Q_LOCK_CPU_MASK;
+ /* Try to get the lock if it is free. */
+ if (!owner) {
+ new = (old & _Q_TAIL_MASK) | lockval;
+ if (arch_cmpxchg_niai8(&lp->lock, old, new)) {
+ /* Got the lock */
+ return;
+ }
+ continue;
+ }
+ if (count-- >= 0)
+ continue;
+ count = spin_retry;
+ if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(owner - 1))
+ smp_yield_cpu(owner - 1);
+ }
+}
+
+void arch_spin_lock_wait(arch_spinlock_t *lp)
+{
+ /* Use classic spinlocks + niai if the steal time is >= 10% */
+ if (test_cpu_flag(CIF_DEDICATED_CPU))
+ arch_spin_lock_queued(lp);
+ else
+ arch_spin_lock_classic(lp);
+}
+EXPORT_SYMBOL(arch_spin_lock_wait);
+
+int arch_spin_trylock_retry(arch_spinlock_t *lp)
+{
+ int cpu = SPINLOCK_LOCKVAL;
+ int owner, count;
+
+ for (count = spin_retry; count > 0; count--) {
+ owner = READ_ONCE(lp->lock);
+ /* Try to get the lock if it is free. */
+ if (!owner) {
+ if (__atomic_cmpxchg_bool(&lp->lock, 0, cpu))
+ return 1;
+ }
+ }
+ return 0;
+}
+EXPORT_SYMBOL(arch_spin_trylock_retry);
+
+void arch_read_lock_wait(arch_rwlock_t *rw)
+{
+ if (unlikely(in_interrupt())) {
+ while (READ_ONCE(rw->cnts) & 0x10000)
+ barrier();
+ return;
+ }
+
+ /* Remove this reader again to allow recursive read locking */
+ __atomic_add_const(-1, &rw->cnts);
+ /* Put the reader into the wait queue */
+ arch_spin_lock(&rw->wait);
+ /* Now add this reader to the count value again */
+ __atomic_add_const(1, &rw->cnts);
+ /* Loop until the writer is done */
+ while (READ_ONCE(rw->cnts) & 0x10000)
+ barrier();
+ arch_spin_unlock(&rw->wait);
+}
+EXPORT_SYMBOL(arch_read_lock_wait);
+
+void arch_write_lock_wait(arch_rwlock_t *rw)
+{
+ int old;
+
+ /* Add this CPU to the write waiters */
+ __atomic_add(0x20000, &rw->cnts);
+
+ /* Put the writer into the wait queue */
+ arch_spin_lock(&rw->wait);
+
+ while (1) {
+ old = READ_ONCE(rw->cnts);
+ if ((old & 0x1ffff) == 0 &&
+ __atomic_cmpxchg_bool(&rw->cnts, old, old | 0x10000))
+ /* Got the lock */
+ break;
+ barrier();
+ }
+
+ arch_spin_unlock(&rw->wait);
+}
+EXPORT_SYMBOL(arch_write_lock_wait);
+
+void arch_spin_relax(arch_spinlock_t *lp)
+{
+ int cpu;
+
+ cpu = READ_ONCE(lp->lock) & _Q_LOCK_CPU_MASK;
+ if (!cpu)
+ return;
+ if (MACHINE_IS_LPAR && !arch_vcpu_is_preempted(cpu - 1))
+ return;
+ smp_yield_cpu(cpu - 1);
+}
+EXPORT_SYMBOL(arch_spin_relax);
diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c
new file mode 100644
index 000000000..c1832eba5
--- /dev/null
+++ b/arch/s390/lib/string.c
@@ -0,0 +1,343 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Optimized string functions
+ *
+ * S390 version
+ * Copyright IBM Corp. 2004
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+
+#define IN_ARCH_STRING_C 1
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/export.h>
+
+/*
+ * Helper functions to find the end of a string
+ */
+static inline char *__strend(const char *s)
+{
+ register unsigned long r0 asm("0") = 0;
+
+ asm volatile ("0: srst %0,%1\n"
+ " jo 0b"
+ : "+d" (r0), "+a" (s) : : "cc", "memory");
+ return (char *) r0;
+}
+
+static inline char *__strnend(const char *s, size_t n)
+{
+ register unsigned long r0 asm("0") = 0;
+ const char *p = s + n;
+
+ asm volatile ("0: srst %0,%1\n"
+ " jo 0b"
+ : "+d" (p), "+a" (s) : "d" (r0) : "cc", "memory");
+ return (char *) p;
+}
+
+/**
+ * strlen - Find the length of a string
+ * @s: The string to be sized
+ *
+ * returns the length of @s
+ */
+size_t strlen(const char *s)
+{
+ return __strend(s) - s;
+}
+EXPORT_SYMBOL(strlen);
+
+/**
+ * strnlen - Find the length of a length-limited string
+ * @s: The string to be sized
+ * @n: The maximum number of bytes to search
+ *
+ * returns the minimum of the length of @s and @n
+ */
+size_t strnlen(const char *s, size_t n)
+{
+ return __strnend(s, n) - s;
+}
+EXPORT_SYMBOL(strnlen);
+
+/**
+ * strcpy - Copy a %NUL terminated string
+ * @dest: Where to copy the string to
+ * @src: Where to copy the string from
+ *
+ * returns a pointer to @dest
+ */
+char *strcpy(char *dest, const char *src)
+{
+ register int r0 asm("0") = 0;
+ char *ret = dest;
+
+ asm volatile ("0: mvst %0,%1\n"
+ " jo 0b"
+ : "+&a" (dest), "+&a" (src) : "d" (r0)
+ : "cc", "memory" );
+ return ret;
+}
+EXPORT_SYMBOL(strcpy);
+
+/**
+ * strlcpy - Copy a %NUL terminated string into a sized buffer
+ * @dest: Where to copy the string to
+ * @src: Where to copy the string from
+ * @size: size of destination buffer
+ *
+ * Compatible with *BSD: the result is always a valid
+ * NUL-terminated string that fits in the buffer (unless,
+ * of course, the buffer size is zero). It does not pad
+ * out the result like strncpy() does.
+ */
+size_t strlcpy(char *dest, const char *src, size_t size)
+{
+ size_t ret = __strend(src) - src;
+
+ if (size) {
+ size_t len = (ret >= size) ? size-1 : ret;
+ dest[len] = '\0';
+ memcpy(dest, src, len);
+ }
+ return ret;
+}
+EXPORT_SYMBOL(strlcpy);
+
+/**
+ * strncpy - Copy a length-limited, %NUL-terminated string
+ * @dest: Where to copy the string to
+ * @src: Where to copy the string from
+ * @n: The maximum number of bytes to copy
+ *
+ * The result is not %NUL-terminated if the source exceeds
+ * @n bytes.
+ */
+char *strncpy(char *dest, const char *src, size_t n)
+{
+ size_t len = __strnend(src, n) - src;
+ memset(dest + len, 0, n - len);
+ memcpy(dest, src, len);
+ return dest;
+}
+EXPORT_SYMBOL(strncpy);
+
+/**
+ * strcat - Append one %NUL-terminated string to another
+ * @dest: The string to be appended to
+ * @src: The string to append to it
+ *
+ * returns a pointer to @dest
+ */
+char *strcat(char *dest, const char *src)
+{
+ register int r0 asm("0") = 0;
+ unsigned long dummy;
+ char *ret = dest;
+
+ asm volatile ("0: srst %0,%1\n"
+ " jo 0b\n"
+ "1: mvst %0,%2\n"
+ " jo 1b"
+ : "=&a" (dummy), "+a" (dest), "+a" (src)
+ : "d" (r0), "0" (0UL) : "cc", "memory" );
+ return ret;
+}
+EXPORT_SYMBOL(strcat);
+
+/**
+ * strlcat - Append a length-limited, %NUL-terminated string to another
+ * @dest: The string to be appended to
+ * @src: The string to append to it
+ * @n: The size of the destination buffer.
+ */
+size_t strlcat(char *dest, const char *src, size_t n)
+{
+ size_t dsize = __strend(dest) - dest;
+ size_t len = __strend(src) - src;
+ size_t res = dsize + len;
+
+ if (dsize < n) {
+ dest += dsize;
+ n -= dsize;
+ if (len >= n)
+ len = n - 1;
+ dest[len] = '\0';
+ memcpy(dest, src, len);
+ }
+ return res;
+}
+EXPORT_SYMBOL(strlcat);
+
+/**
+ * strncat - Append a length-limited, %NUL-terminated string to another
+ * @dest: The string to be appended to
+ * @src: The string to append to it
+ * @n: The maximum numbers of bytes to copy
+ *
+ * returns a pointer to @dest
+ *
+ * Note that in contrast to strncpy, strncat ensures the result is
+ * terminated.
+ */
+char *strncat(char *dest, const char *src, size_t n)
+{
+ size_t len = __strnend(src, n) - src;
+ char *p = __strend(dest);
+
+ p[len] = '\0';
+ memcpy(p, src, len);
+ return dest;
+}
+EXPORT_SYMBOL(strncat);
+
+/**
+ * strcmp - Compare two strings
+ * @s1: One string
+ * @s2: Another string
+ *
+ * returns 0 if @s1 and @s2 are equal,
+ * < 0 if @s1 is less than @s2
+ * > 0 if @s1 is greater than @s2
+ */
+int strcmp(const char *s1, const char *s2)
+{
+ register int r0 asm("0") = 0;
+ int ret = 0;
+
+ asm volatile ("0: clst %2,%3\n"
+ " jo 0b\n"
+ " je 1f\n"
+ " ic %0,0(%2)\n"
+ " ic %1,0(%3)\n"
+ " sr %0,%1\n"
+ "1:"
+ : "+d" (ret), "+d" (r0), "+a" (s1), "+a" (s2)
+ : : "cc", "memory");
+ return ret;
+}
+EXPORT_SYMBOL(strcmp);
+
+/**
+ * strrchr - Find the last occurrence of a character in a string
+ * @s: The string to be searched
+ * @c: The character to search for
+ */
+char *strrchr(const char *s, int c)
+{
+ ssize_t len = __strend(s) - s;
+
+ do {
+ if (s[len] == (char)c)
+ return (char *)s + len;
+ } while (--len >= 0);
+ return NULL;
+}
+EXPORT_SYMBOL(strrchr);
+
+static inline int clcle(const char *s1, unsigned long l1,
+ const char *s2, unsigned long l2)
+{
+ register unsigned long r2 asm("2") = (unsigned long) s1;
+ register unsigned long r3 asm("3") = (unsigned long) l1;
+ register unsigned long r4 asm("4") = (unsigned long) s2;
+ register unsigned long r5 asm("5") = (unsigned long) l2;
+ int cc;
+
+ asm volatile ("0: clcle %1,%3,0\n"
+ " jo 0b\n"
+ " ipm %0\n"
+ " srl %0,28"
+ : "=&d" (cc), "+a" (r2), "+a" (r3),
+ "+a" (r4), "+a" (r5) : : "cc", "memory");
+ return cc;
+}
+
+/**
+ * strstr - Find the first substring in a %NUL terminated string
+ * @s1: The string to be searched
+ * @s2: The string to search for
+ */
+char *strstr(const char *s1, const char *s2)
+{
+ int l1, l2;
+
+ l2 = __strend(s2) - s2;
+ if (!l2)
+ return (char *) s1;
+ l1 = __strend(s1) - s1;
+ while (l1-- >= l2) {
+ int cc;
+
+ cc = clcle(s1, l2, s2, l2);
+ if (!cc)
+ return (char *) s1;
+ s1++;
+ }
+ return NULL;
+}
+EXPORT_SYMBOL(strstr);
+
+/**
+ * memchr - Find a character in an area of memory.
+ * @s: The memory area
+ * @c: The byte to search for
+ * @n: The size of the area.
+ *
+ * returns the address of the first occurrence of @c, or %NULL
+ * if @c is not found
+ */
+void *memchr(const void *s, int c, size_t n)
+{
+ register int r0 asm("0") = (char) c;
+ const void *ret = s + n;
+
+ asm volatile ("0: srst %0,%1\n"
+ " jo 0b\n"
+ " jl 1f\n"
+ " la %0,0\n"
+ "1:"
+ : "+a" (ret), "+&a" (s) : "d" (r0) : "cc", "memory");
+ return (void *) ret;
+}
+EXPORT_SYMBOL(memchr);
+
+/**
+ * memcmp - Compare two areas of memory
+ * @s1: One area of memory
+ * @s2: Another area of memory
+ * @count: The size of the area.
+ */
+int memcmp(const void *s1, const void *s2, size_t n)
+{
+ int ret;
+
+ ret = clcle(s1, n, s2, n);
+ if (ret)
+ ret = ret == 1 ? -1 : 1;
+ return ret;
+}
+EXPORT_SYMBOL(memcmp);
+
+/**
+ * memscan - Find a character in an area of memory.
+ * @s: The memory area
+ * @c: The byte to search for
+ * @n: The size of the area.
+ *
+ * returns the address of the first occurrence of @c, or 1 byte past
+ * the area if @c is not found
+ */
+void *memscan(void *s, int c, size_t n)
+{
+ register int r0 asm("0") = (char) c;
+ const void *ret = s + n;
+
+ asm volatile ("0: srst %0,%1\n"
+ " jo 0b\n"
+ : "+a" (ret), "+&a" (s) : "d" (r0) : "cc", "memory");
+ return (void *) ret;
+}
+EXPORT_SYMBOL(memscan);
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
new file mode 100644
index 000000000..0267405ab
--- /dev/null
+++ b/arch/s390/lib/uaccess.c
@@ -0,0 +1,448 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Standard user space access functions based on mvcp/mvcs and doing
+ * interesting things in the secondary space mode.
+ *
+ * Copyright IBM Corp. 2006,2014
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ * Gerald Schaefer (gerald.schaefer@de.ibm.com)
+ */
+
+#include <linux/jump_label.h>
+#include <linux/uaccess.h>
+#include <linux/export.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <asm/mmu_context.h>
+#include <asm/facility.h>
+
+#ifndef CONFIG_HAVE_MARCH_Z10_FEATURES
+static DEFINE_STATIC_KEY_FALSE(have_mvcos);
+
+static int __init uaccess_init(void)
+{
+ if (test_facility(27))
+ static_branch_enable(&have_mvcos);
+ return 0;
+}
+early_initcall(uaccess_init);
+
+static inline int copy_with_mvcos(void)
+{
+ if (static_branch_likely(&have_mvcos))
+ return 1;
+ return 0;
+}
+#else
+static inline int copy_with_mvcos(void)
+{
+ return 1;
+}
+#endif
+
+void set_fs(mm_segment_t fs)
+{
+ current->thread.mm_segment = fs;
+ if (fs == USER_DS) {
+ __ctl_load(S390_lowcore.user_asce, 1, 1);
+ clear_cpu_flag(CIF_ASCE_PRIMARY);
+ } else {
+ __ctl_load(S390_lowcore.kernel_asce, 1, 1);
+ set_cpu_flag(CIF_ASCE_PRIMARY);
+ }
+ if (fs & 1) {
+ if (fs == USER_DS_SACF)
+ __ctl_load(S390_lowcore.user_asce, 7, 7);
+ else
+ __ctl_load(S390_lowcore.kernel_asce, 7, 7);
+ set_cpu_flag(CIF_ASCE_SECONDARY);
+ }
+}
+EXPORT_SYMBOL(set_fs);
+
+mm_segment_t enable_sacf_uaccess(void)
+{
+ mm_segment_t old_fs;
+ unsigned long asce, cr;
+ unsigned long flags;
+
+ old_fs = current->thread.mm_segment;
+ if (old_fs & 1)
+ return old_fs;
+ /* protect against a concurrent page table upgrade */
+ local_irq_save(flags);
+ current->thread.mm_segment |= 1;
+ asce = S390_lowcore.kernel_asce;
+ if (likely(old_fs == USER_DS)) {
+ __ctl_store(cr, 1, 1);
+ if (cr != S390_lowcore.kernel_asce) {
+ __ctl_load(S390_lowcore.kernel_asce, 1, 1);
+ set_cpu_flag(CIF_ASCE_PRIMARY);
+ }
+ asce = S390_lowcore.user_asce;
+ }
+ __ctl_store(cr, 7, 7);
+ if (cr != asce) {
+ __ctl_load(asce, 7, 7);
+ set_cpu_flag(CIF_ASCE_SECONDARY);
+ }
+ local_irq_restore(flags);
+ return old_fs;
+}
+EXPORT_SYMBOL(enable_sacf_uaccess);
+
+void disable_sacf_uaccess(mm_segment_t old_fs)
+{
+ current->thread.mm_segment = old_fs;
+ if (old_fs == USER_DS && test_facility(27)) {
+ __ctl_load(S390_lowcore.user_asce, 1, 1);
+ clear_cpu_flag(CIF_ASCE_PRIMARY);
+ }
+}
+EXPORT_SYMBOL(disable_sacf_uaccess);
+
+static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr,
+ unsigned long size)
+{
+ register unsigned long reg0 asm("0") = 0x01UL;
+ unsigned long tmp1, tmp2;
+
+ tmp1 = -4096UL;
+ asm volatile(
+ "0: .insn ss,0xc80000000000,0(%0,%2),0(%1),0\n"
+ "6: jz 4f\n"
+ "1: algr %0,%3\n"
+ " slgr %1,%3\n"
+ " slgr %2,%3\n"
+ " j 0b\n"
+ "2: la %4,4095(%1)\n"/* %4 = ptr + 4095 */
+ " nr %4,%3\n" /* %4 = (ptr + 4095) & -4096 */
+ " slgr %4,%1\n"
+ " clgr %0,%4\n" /* copy crosses next page boundary? */
+ " jnh 5f\n"
+ "3: .insn ss,0xc80000000000,0(%4,%2),0(%1),0\n"
+ "7: slgr %0,%4\n"
+ " j 5f\n"
+ "4: slgr %0,%0\n"
+ "5:\n"
+ EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b)
+ : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
+ : "d" (reg0) : "cc", "memory");
+ return size;
+}
+
+static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
+ unsigned long size)
+{
+ unsigned long tmp1, tmp2;
+ mm_segment_t old_fs;
+
+ old_fs = enable_sacf_uaccess();
+ tmp1 = -256UL;
+ asm volatile(
+ " sacf 0\n"
+ "0: mvcp 0(%0,%2),0(%1),%3\n"
+ "7: jz 5f\n"
+ "1: algr %0,%3\n"
+ " la %1,256(%1)\n"
+ " la %2,256(%2)\n"
+ "2: mvcp 0(%0,%2),0(%1),%3\n"
+ "8: jnz 1b\n"
+ " j 5f\n"
+ "3: la %4,255(%1)\n" /* %4 = ptr + 255 */
+ " lghi %3,-4096\n"
+ " nr %4,%3\n" /* %4 = (ptr + 255) & -4096 */
+ " slgr %4,%1\n"
+ " clgr %0,%4\n" /* copy crosses next page boundary? */
+ " jnh 6f\n"
+ "4: mvcp 0(%4,%2),0(%1),%3\n"
+ "9: slgr %0,%4\n"
+ " j 6f\n"
+ "5: slgr %0,%0\n"
+ "6: sacf 768\n"
+ EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b)
+ EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b)
+ : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
+ : : "cc", "memory");
+ disable_sacf_uaccess(old_fs);
+ return size;
+}
+
+unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+ if (copy_with_mvcos())
+ return copy_from_user_mvcos(to, from, n);
+ return copy_from_user_mvcp(to, from, n);
+}
+EXPORT_SYMBOL(raw_copy_from_user);
+
+static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x,
+ unsigned long size)
+{
+ register unsigned long reg0 asm("0") = 0x010000UL;
+ unsigned long tmp1, tmp2;
+
+ tmp1 = -4096UL;
+ asm volatile(
+ "0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n"
+ "6: jz 4f\n"
+ "1: algr %0,%3\n"
+ " slgr %1,%3\n"
+ " slgr %2,%3\n"
+ " j 0b\n"
+ "2: la %4,4095(%1)\n"/* %4 = ptr + 4095 */
+ " nr %4,%3\n" /* %4 = (ptr + 4095) & -4096 */
+ " slgr %4,%1\n"
+ " clgr %0,%4\n" /* copy crosses next page boundary? */
+ " jnh 5f\n"
+ "3: .insn ss,0xc80000000000,0(%4,%1),0(%2),0\n"
+ "7: slgr %0,%4\n"
+ " j 5f\n"
+ "4: slgr %0,%0\n"
+ "5:\n"
+ EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b)
+ : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
+ : "d" (reg0) : "cc", "memory");
+ return size;
+}
+
+static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x,
+ unsigned long size)
+{
+ unsigned long tmp1, tmp2;
+ mm_segment_t old_fs;
+
+ old_fs = enable_sacf_uaccess();
+ tmp1 = -256UL;
+ asm volatile(
+ " sacf 0\n"
+ "0: mvcs 0(%0,%1),0(%2),%3\n"
+ "7: jz 5f\n"
+ "1: algr %0,%3\n"
+ " la %1,256(%1)\n"
+ " la %2,256(%2)\n"
+ "2: mvcs 0(%0,%1),0(%2),%3\n"
+ "8: jnz 1b\n"
+ " j 5f\n"
+ "3: la %4,255(%1)\n" /* %4 = ptr + 255 */
+ " lghi %3,-4096\n"
+ " nr %4,%3\n" /* %4 = (ptr + 255) & -4096 */
+ " slgr %4,%1\n"
+ " clgr %0,%4\n" /* copy crosses next page boundary? */
+ " jnh 6f\n"
+ "4: mvcs 0(%4,%1),0(%2),%3\n"
+ "9: slgr %0,%4\n"
+ " j 6f\n"
+ "5: slgr %0,%0\n"
+ "6: sacf 768\n"
+ EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b)
+ EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b)
+ : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
+ : : "cc", "memory");
+ disable_sacf_uaccess(old_fs);
+ return size;
+}
+
+unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+ if (copy_with_mvcos())
+ return copy_to_user_mvcos(to, from, n);
+ return copy_to_user_mvcs(to, from, n);
+}
+EXPORT_SYMBOL(raw_copy_to_user);
+
+static inline unsigned long copy_in_user_mvcos(void __user *to, const void __user *from,
+ unsigned long size)
+{
+ register unsigned long reg0 asm("0") = 0x010001UL;
+ unsigned long tmp1, tmp2;
+
+ tmp1 = -4096UL;
+ /* FIXME: copy with reduced length. */
+ asm volatile(
+ "0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n"
+ " jz 2f\n"
+ "1: algr %0,%3\n"
+ " slgr %1,%3\n"
+ " slgr %2,%3\n"
+ " j 0b\n"
+ "2:slgr %0,%0\n"
+ "3: \n"
+ EX_TABLE(0b,3b)
+ : "+a" (size), "+a" (to), "+a" (from), "+a" (tmp1), "=a" (tmp2)
+ : "d" (reg0) : "cc", "memory");
+ return size;
+}
+
+static inline unsigned long copy_in_user_mvc(void __user *to, const void __user *from,
+ unsigned long size)
+{
+ mm_segment_t old_fs;
+ unsigned long tmp1;
+
+ old_fs = enable_sacf_uaccess();
+ asm volatile(
+ " sacf 256\n"
+ " aghi %0,-1\n"
+ " jo 5f\n"
+ " bras %3,3f\n"
+ "0: aghi %0,257\n"
+ "1: mvc 0(1,%1),0(%2)\n"
+ " la %1,1(%1)\n"
+ " la %2,1(%2)\n"
+ " aghi %0,-1\n"
+ " jnz 1b\n"
+ " j 5f\n"
+ "2: mvc 0(256,%1),0(%2)\n"
+ " la %1,256(%1)\n"
+ " la %2,256(%2)\n"
+ "3: aghi %0,-256\n"
+ " jnm 2b\n"
+ "4: ex %0,1b-0b(%3)\n"
+ "5: slgr %0,%0\n"
+ "6: sacf 768\n"
+ EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b)
+ : "+a" (size), "+a" (to), "+a" (from), "=a" (tmp1)
+ : : "cc", "memory");
+ disable_sacf_uaccess(old_fs);
+ return size;
+}
+
+unsigned long raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
+{
+ if (copy_with_mvcos())
+ return copy_in_user_mvcos(to, from, n);
+ return copy_in_user_mvc(to, from, n);
+}
+EXPORT_SYMBOL(raw_copy_in_user);
+
+static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size)
+{
+ register unsigned long reg0 asm("0") = 0x010000UL;
+ unsigned long tmp1, tmp2;
+
+ tmp1 = -4096UL;
+ asm volatile(
+ "0: .insn ss,0xc80000000000,0(%0,%1),0(%4),0\n"
+ " jz 4f\n"
+ "1: algr %0,%2\n"
+ " slgr %1,%2\n"
+ " j 0b\n"
+ "2: la %3,4095(%1)\n"/* %4 = to + 4095 */
+ " nr %3,%2\n" /* %4 = (to + 4095) & -4096 */
+ " slgr %3,%1\n"
+ " clgr %0,%3\n" /* copy crosses next page boundary? */
+ " jnh 5f\n"
+ "3: .insn ss,0xc80000000000,0(%3,%1),0(%4),0\n"
+ " slgr %0,%3\n"
+ " j 5f\n"
+ "4: slgr %0,%0\n"
+ "5:\n"
+ EX_TABLE(0b,2b) EX_TABLE(3b,5b)
+ : "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2)
+ : "a" (empty_zero_page), "d" (reg0) : "cc", "memory");
+ return size;
+}
+
+static inline unsigned long clear_user_xc(void __user *to, unsigned long size)
+{
+ mm_segment_t old_fs;
+ unsigned long tmp1, tmp2;
+
+ old_fs = enable_sacf_uaccess();
+ asm volatile(
+ " sacf 256\n"
+ " aghi %0,-1\n"
+ " jo 5f\n"
+ " bras %3,3f\n"
+ " xc 0(1,%1),0(%1)\n"
+ "0: aghi %0,257\n"
+ " la %2,255(%1)\n" /* %2 = ptr + 255 */
+ " srl %2,12\n"
+ " sll %2,12\n" /* %2 = (ptr + 255) & -4096 */
+ " slgr %2,%1\n"
+ " clgr %0,%2\n" /* clear crosses next page boundary? */
+ " jnh 5f\n"
+ " aghi %2,-1\n"
+ "1: ex %2,0(%3)\n"
+ " aghi %2,1\n"
+ " slgr %0,%2\n"
+ " j 5f\n"
+ "2: xc 0(256,%1),0(%1)\n"
+ " la %1,256(%1)\n"
+ "3: aghi %0,-256\n"
+ " jnm 2b\n"
+ "4: ex %0,0(%3)\n"
+ "5: slgr %0,%0\n"
+ "6: sacf 768\n"
+ EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b)
+ : "+a" (size), "+a" (to), "=a" (tmp1), "=a" (tmp2)
+ : : "cc", "memory");
+ disable_sacf_uaccess(old_fs);
+ return size;
+}
+
+unsigned long __clear_user(void __user *to, unsigned long size)
+{
+ if (copy_with_mvcos())
+ return clear_user_mvcos(to, size);
+ return clear_user_xc(to, size);
+}
+EXPORT_SYMBOL(__clear_user);
+
+static inline unsigned long strnlen_user_srst(const char __user *src,
+ unsigned long size)
+{
+ register unsigned long reg0 asm("0") = 0;
+ unsigned long tmp1, tmp2;
+
+ asm volatile(
+ " la %2,0(%1)\n"
+ " la %3,0(%0,%1)\n"
+ " slgr %0,%0\n"
+ " sacf 256\n"
+ "0: srst %3,%2\n"
+ " jo 0b\n"
+ " la %0,1(%3)\n" /* strnlen_user results includes \0 */
+ " slgr %0,%1\n"
+ "1: sacf 768\n"
+ EX_TABLE(0b,1b)
+ : "+a" (size), "+a" (src), "=a" (tmp1), "=a" (tmp2)
+ : "d" (reg0) : "cc", "memory");
+ return size;
+}
+
+unsigned long __strnlen_user(const char __user *src, unsigned long size)
+{
+ mm_segment_t old_fs;
+ unsigned long len;
+
+ if (unlikely(!size))
+ return 0;
+ old_fs = enable_sacf_uaccess();
+ len = strnlen_user_srst(src, size);
+ disable_sacf_uaccess(old_fs);
+ return len;
+}
+EXPORT_SYMBOL(__strnlen_user);
+
+long __strncpy_from_user(char *dst, const char __user *src, long size)
+{
+ size_t done, len, offset, len_str;
+
+ if (unlikely(size <= 0))
+ return 0;
+ done = 0;
+ do {
+ offset = (size_t)src & (L1_CACHE_BYTES - 1);
+ len = min(size - done, L1_CACHE_BYTES - offset);
+ if (copy_from_user(dst, src, len))
+ return -EFAULT;
+ len_str = strnlen(dst, len);
+ done += len_str;
+ src += len_str;
+ dst += len_str;
+ } while ((len_str == len) && (done < size));
+ return done;
+}
+EXPORT_SYMBOL(__strncpy_from_user);
diff --git a/arch/s390/lib/xor.c b/arch/s390/lib/xor.c
new file mode 100644
index 000000000..96580590c
--- /dev/null
+++ b/arch/s390/lib/xor.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Optimized xor_block operation for RAID4/5
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/export.h>
+#include <linux/raid/xor.h>
+
+static void xor_xc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+{
+ asm volatile(
+ " larl 1,2f\n"
+ " aghi %0,-1\n"
+ " jm 3f\n"
+ " srlg 0,%0,8\n"
+ " ltgr 0,0\n"
+ " jz 1f\n"
+ "0: xc 0(256,%1),0(%2)\n"
+ " la %1,256(%1)\n"
+ " la %2,256(%2)\n"
+ " brctg 0,0b\n"
+ "1: ex %0,0(1)\n"
+ " j 3f\n"
+ "2: xc 0(1,%1),0(%2)\n"
+ "3:\n"
+ : : "d" (bytes), "a" (p1), "a" (p2)
+ : "0", "1", "cc", "memory");
+}
+
+static void xor_xc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3)
+{
+ asm volatile(
+ " larl 1,2f\n"
+ " aghi %0,-1\n"
+ " jm 3f\n"
+ " srlg 0,%0,8\n"
+ " ltgr 0,0\n"
+ " jz 1f\n"
+ "0: xc 0(256,%1),0(%2)\n"
+ " xc 0(256,%1),0(%3)\n"
+ " la %1,256(%1)\n"
+ " la %2,256(%2)\n"
+ " la %3,256(%3)\n"
+ " brctg 0,0b\n"
+ "1: ex %0,0(1)\n"
+ " ex %0,6(1)\n"
+ " j 3f\n"
+ "2: xc 0(1,%1),0(%2)\n"
+ " xc 0(1,%1),0(%3)\n"
+ "3:\n"
+ : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3)
+ : : "0", "1", "cc", "memory");
+}
+
+static void xor_xc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3, unsigned long *p4)
+{
+ asm volatile(
+ " larl 1,2f\n"
+ " aghi %0,-1\n"
+ " jm 3f\n"
+ " srlg 0,%0,8\n"
+ " ltgr 0,0\n"
+ " jz 1f\n"
+ "0: xc 0(256,%1),0(%2)\n"
+ " xc 0(256,%1),0(%3)\n"
+ " xc 0(256,%1),0(%4)\n"
+ " la %1,256(%1)\n"
+ " la %2,256(%2)\n"
+ " la %3,256(%3)\n"
+ " la %4,256(%4)\n"
+ " brctg 0,0b\n"
+ "1: ex %0,0(1)\n"
+ " ex %0,6(1)\n"
+ " ex %0,12(1)\n"
+ " j 3f\n"
+ "2: xc 0(1,%1),0(%2)\n"
+ " xc 0(1,%1),0(%3)\n"
+ " xc 0(1,%1),0(%4)\n"
+ "3:\n"
+ : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4)
+ : : "0", "1", "cc", "memory");
+}
+
+static void xor_xc_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3, unsigned long *p4, unsigned long *p5)
+{
+ /* Get around a gcc oddity */
+ register unsigned long *reg7 asm ("7") = p5;
+
+ asm volatile(
+ " larl 1,2f\n"
+ " aghi %0,-1\n"
+ " jm 3f\n"
+ " srlg 0,%0,8\n"
+ " ltgr 0,0\n"
+ " jz 1f\n"
+ "0: xc 0(256,%1),0(%2)\n"
+ " xc 0(256,%1),0(%3)\n"
+ " xc 0(256,%1),0(%4)\n"
+ " xc 0(256,%1),0(%5)\n"
+ " la %1,256(%1)\n"
+ " la %2,256(%2)\n"
+ " la %3,256(%3)\n"
+ " la %4,256(%4)\n"
+ " la %5,256(%5)\n"
+ " brctg 0,0b\n"
+ "1: ex %0,0(1)\n"
+ " ex %0,6(1)\n"
+ " ex %0,12(1)\n"
+ " ex %0,18(1)\n"
+ " j 3f\n"
+ "2: xc 0(1,%1),0(%2)\n"
+ " xc 0(1,%1),0(%3)\n"
+ " xc 0(1,%1),0(%4)\n"
+ " xc 0(1,%1),0(%5)\n"
+ "3:\n"
+ : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4),
+ "+a" (reg7)
+ : : "0", "1", "cc", "memory");
+}
+
+struct xor_block_template xor_block_xc = {
+ .name = "xc",
+ .do_2 = xor_xc_2,
+ .do_3 = xor_xc_3,
+ .do_4 = xor_xc_4,
+ .do_5 = xor_xc_5,
+};
+EXPORT_SYMBOL(xor_block_xc);
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
new file mode 100644
index 000000000..33fe41850
--- /dev/null
+++ b/arch/s390/mm/Makefile
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the linux s390-specific parts of the memory manager.
+#
+
+obj-y := init.o fault.o extmem.o mmap.o vmem.o maccess.o
+obj-y += page-states.o gup.o pageattr.o mem_detect.o
+obj-y += pgtable.o pgalloc.o
+
+obj-$(CONFIG_CMM) += cmm.o
+obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
+obj-$(CONFIG_S390_PTDUMP) += dump_pagetables.o
+obj-$(CONFIG_PGSTE) += gmap.o
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
new file mode 100644
index 000000000..a51c892f1
--- /dev/null
+++ b/arch/s390/mm/cmm.c
@@ -0,0 +1,487 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Collaborative memory management interface.
+ *
+ * Copyright IBM Corp 2003, 2010
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ *
+ */
+
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/gfp.h>
+#include <linux/sched.h>
+#include <linux/sysctl.h>
+#include <linux/ctype.h>
+#include <linux/swap.h>
+#include <linux/kthread.h>
+#include <linux/oom.h>
+#include <linux/suspend.h>
+#include <linux/uaccess.h>
+
+#include <asm/pgalloc.h>
+#include <asm/diag.h>
+
+#ifdef CONFIG_CMM_IUCV
+static char *cmm_default_sender = "VMRMSVM";
+#endif
+static char *sender;
+module_param(sender, charp, 0400);
+MODULE_PARM_DESC(sender,
+ "Guest name that may send SMSG messages (default VMRMSVM)");
+
+#include "../../../drivers/s390/net/smsgiucv.h"
+
+#define CMM_NR_PAGES ((PAGE_SIZE / sizeof(unsigned long)) - 2)
+
+struct cmm_page_array {
+ struct cmm_page_array *next;
+ unsigned long index;
+ unsigned long pages[CMM_NR_PAGES];
+};
+
+static long cmm_pages;
+static long cmm_timed_pages;
+static volatile long cmm_pages_target;
+static volatile long cmm_timed_pages_target;
+static long cmm_timeout_pages;
+static long cmm_timeout_seconds;
+static int cmm_suspended;
+
+static struct cmm_page_array *cmm_page_list;
+static struct cmm_page_array *cmm_timed_page_list;
+static DEFINE_SPINLOCK(cmm_lock);
+
+static struct task_struct *cmm_thread_ptr;
+static DECLARE_WAIT_QUEUE_HEAD(cmm_thread_wait);
+
+static void cmm_timer_fn(struct timer_list *);
+static void cmm_set_timer(void);
+static DEFINE_TIMER(cmm_timer, cmm_timer_fn);
+
+static long cmm_alloc_pages(long nr, long *counter,
+ struct cmm_page_array **list)
+{
+ struct cmm_page_array *pa, *npa;
+ unsigned long addr;
+
+ while (nr) {
+ addr = __get_free_page(GFP_NOIO);
+ if (!addr)
+ break;
+ spin_lock(&cmm_lock);
+ pa = *list;
+ if (!pa || pa->index >= CMM_NR_PAGES) {
+ /* Need a new page for the page list. */
+ spin_unlock(&cmm_lock);
+ npa = (struct cmm_page_array *)
+ __get_free_page(GFP_NOIO);
+ if (!npa) {
+ free_page(addr);
+ break;
+ }
+ spin_lock(&cmm_lock);
+ pa = *list;
+ if (!pa || pa->index >= CMM_NR_PAGES) {
+ npa->next = pa;
+ npa->index = 0;
+ pa = npa;
+ *list = pa;
+ } else
+ free_page((unsigned long) npa);
+ }
+ diag10_range(addr >> PAGE_SHIFT, 1);
+ pa->pages[pa->index++] = addr;
+ (*counter)++;
+ spin_unlock(&cmm_lock);
+ nr--;
+ }
+ return nr;
+}
+
+static long cmm_free_pages(long nr, long *counter, struct cmm_page_array **list)
+{
+ struct cmm_page_array *pa;
+ unsigned long addr;
+
+ spin_lock(&cmm_lock);
+ pa = *list;
+ while (nr) {
+ if (!pa || pa->index <= 0)
+ break;
+ addr = pa->pages[--pa->index];
+ if (pa->index == 0) {
+ pa = pa->next;
+ free_page((unsigned long) *list);
+ *list = pa;
+ }
+ free_page(addr);
+ (*counter)--;
+ nr--;
+ }
+ spin_unlock(&cmm_lock);
+ return nr;
+}
+
+static int cmm_oom_notify(struct notifier_block *self,
+ unsigned long dummy, void *parm)
+{
+ unsigned long *freed = parm;
+ long nr = 256;
+
+ nr = cmm_free_pages(nr, &cmm_timed_pages, &cmm_timed_page_list);
+ if (nr > 0)
+ nr = cmm_free_pages(nr, &cmm_pages, &cmm_page_list);
+ cmm_pages_target = cmm_pages;
+ cmm_timed_pages_target = cmm_timed_pages;
+ *freed += 256 - nr;
+ return NOTIFY_OK;
+}
+
+static struct notifier_block cmm_oom_nb = {
+ .notifier_call = cmm_oom_notify,
+};
+
+static int cmm_thread(void *dummy)
+{
+ int rc;
+
+ while (1) {
+ rc = wait_event_interruptible(cmm_thread_wait,
+ (!cmm_suspended && (cmm_pages != cmm_pages_target ||
+ cmm_timed_pages != cmm_timed_pages_target)) ||
+ kthread_should_stop());
+ if (kthread_should_stop() || rc == -ERESTARTSYS) {
+ cmm_pages_target = cmm_pages;
+ cmm_timed_pages_target = cmm_timed_pages;
+ break;
+ }
+ if (cmm_pages_target > cmm_pages) {
+ if (cmm_alloc_pages(1, &cmm_pages, &cmm_page_list))
+ cmm_pages_target = cmm_pages;
+ } else if (cmm_pages_target < cmm_pages) {
+ cmm_free_pages(1, &cmm_pages, &cmm_page_list);
+ }
+ if (cmm_timed_pages_target > cmm_timed_pages) {
+ if (cmm_alloc_pages(1, &cmm_timed_pages,
+ &cmm_timed_page_list))
+ cmm_timed_pages_target = cmm_timed_pages;
+ } else if (cmm_timed_pages_target < cmm_timed_pages) {
+ cmm_free_pages(1, &cmm_timed_pages,
+ &cmm_timed_page_list);
+ }
+ if (cmm_timed_pages > 0 && !timer_pending(&cmm_timer))
+ cmm_set_timer();
+ }
+ return 0;
+}
+
+static void cmm_kick_thread(void)
+{
+ wake_up(&cmm_thread_wait);
+}
+
+static void cmm_set_timer(void)
+{
+ if (cmm_timed_pages_target <= 0 || cmm_timeout_seconds <= 0) {
+ if (timer_pending(&cmm_timer))
+ del_timer(&cmm_timer);
+ return;
+ }
+ mod_timer(&cmm_timer, jiffies + cmm_timeout_seconds * HZ);
+}
+
+static void cmm_timer_fn(struct timer_list *unused)
+{
+ long nr;
+
+ nr = cmm_timed_pages_target - cmm_timeout_pages;
+ if (nr < 0)
+ cmm_timed_pages_target = 0;
+ else
+ cmm_timed_pages_target = nr;
+ cmm_kick_thread();
+ cmm_set_timer();
+}
+
+static void cmm_set_pages(long nr)
+{
+ cmm_pages_target = nr;
+ cmm_kick_thread();
+}
+
+static long cmm_get_pages(void)
+{
+ return cmm_pages;
+}
+
+static void cmm_add_timed_pages(long nr)
+{
+ cmm_timed_pages_target += nr;
+ cmm_kick_thread();
+}
+
+static long cmm_get_timed_pages(void)
+{
+ return cmm_timed_pages;
+}
+
+static void cmm_set_timeout(long nr, long seconds)
+{
+ cmm_timeout_pages = nr;
+ cmm_timeout_seconds = seconds;
+ cmm_set_timer();
+}
+
+static int cmm_skip_blanks(char *cp, char **endp)
+{
+ char *str;
+
+ for (str = cp; *str == ' ' || *str == '\t'; str++)
+ ;
+ *endp = str;
+ return str != cp;
+}
+
+static int cmm_pages_handler(struct ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ long nr = cmm_get_pages();
+ struct ctl_table ctl_entry = {
+ .procname = ctl->procname,
+ .data = &nr,
+ .maxlen = sizeof(long),
+ };
+ int rc;
+
+ rc = proc_doulongvec_minmax(&ctl_entry, write, buffer, lenp, ppos);
+ if (rc < 0 || !write)
+ return rc;
+
+ cmm_set_pages(nr);
+ return 0;
+}
+
+static int cmm_timed_pages_handler(struct ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ long nr = cmm_get_timed_pages();
+ struct ctl_table ctl_entry = {
+ .procname = ctl->procname,
+ .data = &nr,
+ .maxlen = sizeof(long),
+ };
+ int rc;
+
+ rc = proc_doulongvec_minmax(&ctl_entry, write, buffer, lenp, ppos);
+ if (rc < 0 || !write)
+ return rc;
+
+ cmm_add_timed_pages(nr);
+ return 0;
+}
+
+static int cmm_timeout_handler(struct ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ char buf[64], *p;
+ long nr, seconds;
+ unsigned int len;
+
+ if (!*lenp || (*ppos && !write)) {
+ *lenp = 0;
+ return 0;
+ }
+
+ if (write) {
+ len = min(*lenp, sizeof(buf));
+ if (copy_from_user(buf, buffer, len))
+ return -EFAULT;
+ buf[len - 1] = '\0';
+ cmm_skip_blanks(buf, &p);
+ nr = simple_strtoul(p, &p, 0);
+ cmm_skip_blanks(p, &p);
+ seconds = simple_strtoul(p, &p, 0);
+ cmm_set_timeout(nr, seconds);
+ *ppos += *lenp;
+ } else {
+ len = sprintf(buf, "%ld %ld\n",
+ cmm_timeout_pages, cmm_timeout_seconds);
+ if (len > *lenp)
+ len = *lenp;
+ if (copy_to_user(buffer, buf, len))
+ return -EFAULT;
+ *lenp = len;
+ *ppos += len;
+ }
+ return 0;
+}
+
+static struct ctl_table cmm_table[] = {
+ {
+ .procname = "cmm_pages",
+ .mode = 0644,
+ .proc_handler = cmm_pages_handler,
+ },
+ {
+ .procname = "cmm_timed_pages",
+ .mode = 0644,
+ .proc_handler = cmm_timed_pages_handler,
+ },
+ {
+ .procname = "cmm_timeout",
+ .mode = 0644,
+ .proc_handler = cmm_timeout_handler,
+ },
+ { }
+};
+
+static struct ctl_table cmm_dir_table[] = {
+ {
+ .procname = "vm",
+ .maxlen = 0,
+ .mode = 0555,
+ .child = cmm_table,
+ },
+ { }
+};
+
+#ifdef CONFIG_CMM_IUCV
+#define SMSG_PREFIX "CMM"
+static void cmm_smsg_target(const char *from, char *msg)
+{
+ long nr, seconds;
+
+ if (strlen(sender) > 0 && strcmp(from, sender) != 0)
+ return;
+ if (!cmm_skip_blanks(msg + strlen(SMSG_PREFIX), &msg))
+ return;
+ if (strncmp(msg, "SHRINK", 6) == 0) {
+ if (!cmm_skip_blanks(msg + 6, &msg))
+ return;
+ nr = simple_strtoul(msg, &msg, 0);
+ cmm_skip_blanks(msg, &msg);
+ if (*msg == '\0')
+ cmm_set_pages(nr);
+ } else if (strncmp(msg, "RELEASE", 7) == 0) {
+ if (!cmm_skip_blanks(msg + 7, &msg))
+ return;
+ nr = simple_strtoul(msg, &msg, 0);
+ cmm_skip_blanks(msg, &msg);
+ if (*msg == '\0')
+ cmm_add_timed_pages(nr);
+ } else if (strncmp(msg, "REUSE", 5) == 0) {
+ if (!cmm_skip_blanks(msg + 5, &msg))
+ return;
+ nr = simple_strtoul(msg, &msg, 0);
+ if (!cmm_skip_blanks(msg, &msg))
+ return;
+ seconds = simple_strtoul(msg, &msg, 0);
+ cmm_skip_blanks(msg, &msg);
+ if (*msg == '\0')
+ cmm_set_timeout(nr, seconds);
+ }
+}
+#endif
+
+static struct ctl_table_header *cmm_sysctl_header;
+
+static int cmm_suspend(void)
+{
+ cmm_suspended = 1;
+ cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list);
+ cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list);
+ return 0;
+}
+
+static int cmm_resume(void)
+{
+ cmm_suspended = 0;
+ cmm_kick_thread();
+ return 0;
+}
+
+static int cmm_power_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ switch (event) {
+ case PM_POST_HIBERNATION:
+ return cmm_resume();
+ case PM_HIBERNATION_PREPARE:
+ return cmm_suspend();
+ default:
+ return NOTIFY_DONE;
+ }
+}
+
+static struct notifier_block cmm_power_notifier = {
+ .notifier_call = cmm_power_event,
+};
+
+static int __init cmm_init(void)
+{
+ int rc = -ENOMEM;
+
+ cmm_sysctl_header = register_sysctl_table(cmm_dir_table);
+ if (!cmm_sysctl_header)
+ goto out_sysctl;
+#ifdef CONFIG_CMM_IUCV
+ /* convert sender to uppercase characters */
+ if (sender) {
+ int len = strlen(sender);
+ while (len--)
+ sender[len] = toupper(sender[len]);
+ } else {
+ sender = cmm_default_sender;
+ }
+
+ rc = smsg_register_callback(SMSG_PREFIX, cmm_smsg_target);
+ if (rc < 0)
+ goto out_smsg;
+#endif
+ rc = register_oom_notifier(&cmm_oom_nb);
+ if (rc < 0)
+ goto out_oom_notify;
+ rc = register_pm_notifier(&cmm_power_notifier);
+ if (rc)
+ goto out_pm;
+ cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
+ if (!IS_ERR(cmm_thread_ptr))
+ return 0;
+
+ rc = PTR_ERR(cmm_thread_ptr);
+ unregister_pm_notifier(&cmm_power_notifier);
+out_pm:
+ unregister_oom_notifier(&cmm_oom_nb);
+out_oom_notify:
+#ifdef CONFIG_CMM_IUCV
+ smsg_unregister_callback(SMSG_PREFIX, cmm_smsg_target);
+out_smsg:
+#endif
+ unregister_sysctl_table(cmm_sysctl_header);
+out_sysctl:
+ del_timer_sync(&cmm_timer);
+ return rc;
+}
+module_init(cmm_init);
+
+static void __exit cmm_exit(void)
+{
+ unregister_sysctl_table(cmm_sysctl_header);
+#ifdef CONFIG_CMM_IUCV
+ smsg_unregister_callback(SMSG_PREFIX, cmm_smsg_target);
+#endif
+ unregister_pm_notifier(&cmm_power_notifier);
+ unregister_oom_notifier(&cmm_oom_nb);
+ kthread_stop(cmm_thread_ptr);
+ del_timer_sync(&cmm_timer);
+ cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list);
+ cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list);
+}
+module_exit(cmm_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
new file mode 100644
index 000000000..7cdea2ec5
--- /dev/null
+++ b/arch/s390/mm/dump_pagetables.c
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <asm/sections.h>
+#include <asm/pgtable.h>
+
+static unsigned long max_addr;
+
+struct addr_marker {
+ unsigned long start_address;
+ const char *name;
+};
+
+enum address_markers_idx {
+ IDENTITY_NR = 0,
+ KERNEL_START_NR,
+ KERNEL_END_NR,
+ VMEMMAP_NR,
+ VMALLOC_NR,
+ MODULES_NR,
+};
+
+static struct addr_marker address_markers[] = {
+ [IDENTITY_NR] = {0, "Identity Mapping"},
+ [KERNEL_START_NR] = {(unsigned long)_stext, "Kernel Image Start"},
+ [KERNEL_END_NR] = {(unsigned long)_end, "Kernel Image End"},
+ [VMEMMAP_NR] = {0, "vmemmap Area"},
+ [VMALLOC_NR] = {0, "vmalloc Area"},
+ [MODULES_NR] = {0, "Modules Area"},
+ { -1, NULL }
+};
+
+struct pg_state {
+ int level;
+ unsigned int current_prot;
+ unsigned long start_address;
+ unsigned long current_address;
+ const struct addr_marker *marker;
+};
+
+static void print_prot(struct seq_file *m, unsigned int pr, int level)
+{
+ static const char * const level_name[] =
+ { "ASCE", "PGD", "PUD", "PMD", "PTE" };
+
+ seq_printf(m, "%s ", level_name[level]);
+ if (pr & _PAGE_INVALID) {
+ seq_printf(m, "I\n");
+ return;
+ }
+ seq_puts(m, (pr & _PAGE_PROTECT) ? "RO " : "RW ");
+ seq_puts(m, (pr & _PAGE_NOEXEC) ? "NX\n" : "X\n");
+}
+
+static void note_page(struct seq_file *m, struct pg_state *st,
+ unsigned int new_prot, int level)
+{
+ static const char units[] = "KMGTPE";
+ int width = sizeof(unsigned long) * 2;
+ const char *unit = units;
+ unsigned int prot, cur;
+ unsigned long delta;
+
+ /*
+ * If we have a "break" in the series, we need to flush the state
+ * that we have now. "break" is either changing perms, levels or
+ * address space marker.
+ */
+ prot = new_prot;
+ cur = st->current_prot;
+
+ if (!st->level) {
+ /* First entry */
+ st->current_prot = new_prot;
+ st->level = level;
+ st->marker = address_markers;
+ seq_printf(m, "---[ %s ]---\n", st->marker->name);
+ } else if (prot != cur || level != st->level ||
+ st->current_address >= st->marker[1].start_address) {
+ /* Print the actual finished series */
+ seq_printf(m, "0x%0*lx-0x%0*lx",
+ width, st->start_address,
+ width, st->current_address);
+ delta = (st->current_address - st->start_address) >> 10;
+ while (!(delta & 0x3ff) && unit[1]) {
+ delta >>= 10;
+ unit++;
+ }
+ seq_printf(m, "%9lu%c ", delta, *unit);
+ print_prot(m, st->current_prot, st->level);
+ if (st->current_address >= st->marker[1].start_address) {
+ st->marker++;
+ seq_printf(m, "---[ %s ]---\n", st->marker->name);
+ }
+ st->start_address = st->current_address;
+ st->current_prot = new_prot;
+ st->level = level;
+ }
+}
+
+/*
+ * The actual page table walker functions. In order to keep the
+ * implementation of print_prot() short, we only check and pass
+ * _PAGE_INVALID and _PAGE_PROTECT flags to note_page() if a region,
+ * segment or page table entry is invalid or read-only.
+ * After all it's just a hint that the current level being walked
+ * contains an invalid or read-only entry.
+ */
+static void walk_pte_level(struct seq_file *m, struct pg_state *st,
+ pmd_t *pmd, unsigned long addr)
+{
+ unsigned int prot;
+ pte_t *pte;
+ int i;
+
+ for (i = 0; i < PTRS_PER_PTE && addr < max_addr; i++) {
+ st->current_address = addr;
+ pte = pte_offset_kernel(pmd, addr);
+ prot = pte_val(*pte) &
+ (_PAGE_PROTECT | _PAGE_INVALID | _PAGE_NOEXEC);
+ note_page(m, st, prot, 4);
+ addr += PAGE_SIZE;
+ }
+}
+
+static void walk_pmd_level(struct seq_file *m, struct pg_state *st,
+ pud_t *pud, unsigned long addr)
+{
+ unsigned int prot;
+ pmd_t *pmd;
+ int i;
+
+ for (i = 0; i < PTRS_PER_PMD && addr < max_addr; i++) {
+ st->current_address = addr;
+ pmd = pmd_offset(pud, addr);
+ if (!pmd_none(*pmd)) {
+ if (pmd_large(*pmd)) {
+ prot = pmd_val(*pmd) &
+ (_SEGMENT_ENTRY_PROTECT |
+ _SEGMENT_ENTRY_NOEXEC);
+ note_page(m, st, prot, 3);
+ } else
+ walk_pte_level(m, st, pmd, addr);
+ } else
+ note_page(m, st, _PAGE_INVALID, 3);
+ addr += PMD_SIZE;
+ }
+}
+
+static void walk_pud_level(struct seq_file *m, struct pg_state *st,
+ p4d_t *p4d, unsigned long addr)
+{
+ unsigned int prot;
+ pud_t *pud;
+ int i;
+
+ for (i = 0; i < PTRS_PER_PUD && addr < max_addr; i++) {
+ st->current_address = addr;
+ pud = pud_offset(p4d, addr);
+ if (!pud_none(*pud))
+ if (pud_large(*pud)) {
+ prot = pud_val(*pud) &
+ (_REGION_ENTRY_PROTECT |
+ _REGION_ENTRY_NOEXEC);
+ note_page(m, st, prot, 2);
+ } else
+ walk_pmd_level(m, st, pud, addr);
+ else
+ note_page(m, st, _PAGE_INVALID, 2);
+ addr += PUD_SIZE;
+ }
+}
+
+static void walk_p4d_level(struct seq_file *m, struct pg_state *st,
+ pgd_t *pgd, unsigned long addr)
+{
+ p4d_t *p4d;
+ int i;
+
+ for (i = 0; i < PTRS_PER_P4D && addr < max_addr; i++) {
+ st->current_address = addr;
+ p4d = p4d_offset(pgd, addr);
+ if (!p4d_none(*p4d))
+ walk_pud_level(m, st, p4d, addr);
+ else
+ note_page(m, st, _PAGE_INVALID, 2);
+ addr += P4D_SIZE;
+ }
+}
+
+static void walk_pgd_level(struct seq_file *m)
+{
+ unsigned long addr = 0;
+ struct pg_state st;
+ pgd_t *pgd;
+ int i;
+
+ memset(&st, 0, sizeof(st));
+ for (i = 0; i < PTRS_PER_PGD && addr < max_addr; i++) {
+ st.current_address = addr;
+ pgd = pgd_offset_k(addr);
+ if (!pgd_none(*pgd))
+ walk_p4d_level(m, &st, pgd, addr);
+ else
+ note_page(m, &st, _PAGE_INVALID, 1);
+ addr += PGDIR_SIZE;
+ cond_resched();
+ }
+ /* Flush out the last page */
+ st.current_address = max_addr;
+ note_page(m, &st, 0, 0);
+}
+
+static int ptdump_show(struct seq_file *m, void *v)
+{
+ walk_pgd_level(m);
+ return 0;
+}
+
+static int ptdump_open(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, ptdump_show, NULL);
+}
+
+static const struct file_operations ptdump_fops = {
+ .open = ptdump_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int pt_dump_init(void)
+{
+ /*
+ * Figure out the maximum virtual address being accessible with the
+ * kernel ASCE. We need this to keep the page table walker functions
+ * from accessing non-existent entries.
+ */
+ max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2;
+ max_addr = 1UL << (max_addr * 11 + 31);
+ address_markers[MODULES_NR].start_address = MODULES_VADDR;
+ address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap;
+ address_markers[VMALLOC_NR].start_address = VMALLOC_START;
+ debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops);
+ return 0;
+}
+device_initcall(pt_dump_init);
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
new file mode 100644
index 000000000..84111a43e
--- /dev/null
+++ b/arch/s390/mm/extmem.c
@@ -0,0 +1,763 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Author(s)......: Carsten Otte <cotte@de.ibm.com>
+ * Rob M van der Heij <rvdheij@nl.ibm.com>
+ * Steven Shultz <shultzss@us.ibm.com>
+ * Bugreports.to..: <Linux390@de.ibm.com>
+ * Copyright IBM Corp. 2002, 2004
+ */
+
+#define KMSG_COMPONENT "extmem"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/bootmem.h>
+#include <linux/ctype.h>
+#include <linux/ioport.h>
+#include <asm/diag.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/ebcdic.h>
+#include <asm/errno.h>
+#include <asm/extmem.h>
+#include <asm/cpcmd.h>
+#include <asm/setup.h>
+
+#define DCSS_LOADSHR 0x00
+#define DCSS_LOADNSR 0x04
+#define DCSS_PURGESEG 0x08
+#define DCSS_FINDSEG 0x0c
+#define DCSS_LOADNOLY 0x10
+#define DCSS_SEGEXT 0x18
+#define DCSS_LOADSHRX 0x20
+#define DCSS_LOADNSRX 0x24
+#define DCSS_FINDSEGX 0x2c
+#define DCSS_SEGEXTX 0x38
+#define DCSS_FINDSEGA 0x0c
+
+struct qrange {
+ unsigned long start; /* last byte type */
+ unsigned long end; /* last byte reserved */
+};
+
+struct qout64 {
+ unsigned long segstart;
+ unsigned long segend;
+ int segcnt;
+ int segrcnt;
+ struct qrange range[6];
+};
+
+struct qrange_old {
+ unsigned int start; /* last byte type */
+ unsigned int end; /* last byte reserved */
+};
+
+/* output area format for the Diag x'64' old subcode x'18' */
+struct qout64_old {
+ int segstart;
+ int segend;
+ int segcnt;
+ int segrcnt;
+ struct qrange_old range[6];
+};
+
+struct qin64 {
+ char qopcode;
+ char rsrv1[3];
+ char qrcode;
+ char rsrv2[3];
+ char qname[8];
+ unsigned int qoutptr;
+ short int qoutlen;
+};
+
+struct dcss_segment {
+ struct list_head list;
+ char dcss_name[8];
+ char res_name[16];
+ unsigned long start_addr;
+ unsigned long end;
+ atomic_t ref_count;
+ int do_nonshared;
+ unsigned int vm_segtype;
+ struct qrange range[6];
+ int segcnt;
+ struct resource *res;
+};
+
+static DEFINE_MUTEX(dcss_lock);
+static LIST_HEAD(dcss_list);
+static char *segtype_string[] = { "SW", "EW", "SR", "ER", "SN", "EN", "SC",
+ "EW/EN-MIXED" };
+static int loadshr_scode, loadnsr_scode;
+static int segext_scode, purgeseg_scode;
+static int scode_set;
+
+/* set correct Diag x'64' subcodes. */
+static int
+dcss_set_subcodes(void)
+{
+ char *name = kmalloc(8, GFP_KERNEL | GFP_DMA);
+ unsigned long rx, ry;
+ int rc;
+
+ if (name == NULL)
+ return -ENOMEM;
+
+ rx = (unsigned long) name;
+ ry = DCSS_FINDSEGX;
+
+ strcpy(name, "dummy");
+ diag_stat_inc(DIAG_STAT_X064);
+ asm volatile(
+ " diag %0,%1,0x64\n"
+ "0: ipm %2\n"
+ " srl %2,28\n"
+ " j 2f\n"
+ "1: la %2,3\n"
+ "2:\n"
+ EX_TABLE(0b, 1b)
+ : "+d" (rx), "+d" (ry), "=d" (rc) : : "cc", "memory");
+
+ kfree(name);
+ /* Diag x'64' new subcodes are supported, set to new subcodes */
+ if (rc != 3) {
+ loadshr_scode = DCSS_LOADSHRX;
+ loadnsr_scode = DCSS_LOADNSRX;
+ purgeseg_scode = DCSS_PURGESEG;
+ segext_scode = DCSS_SEGEXTX;
+ return 0;
+ }
+ /* Diag x'64' new subcodes are not supported, set to old subcodes */
+ loadshr_scode = DCSS_LOADNOLY;
+ loadnsr_scode = DCSS_LOADNSR;
+ purgeseg_scode = DCSS_PURGESEG;
+ segext_scode = DCSS_SEGEXT;
+ return 0;
+}
+
+/*
+ * Create the 8 bytes, ebcdic VM segment name from
+ * an ascii name.
+ */
+static void
+dcss_mkname(char *name, char *dcss_name)
+{
+ int i;
+
+ for (i = 0; i < 8; i++) {
+ if (name[i] == '\0')
+ break;
+ dcss_name[i] = toupper(name[i]);
+ }
+ for (; i < 8; i++)
+ dcss_name[i] = ' ';
+ ASCEBC(dcss_name, 8);
+}
+
+
+/*
+ * search all segments in dcss_list, and return the one
+ * namend *name. If not found, return NULL.
+ */
+static struct dcss_segment *
+segment_by_name (char *name)
+{
+ char dcss_name[9];
+ struct list_head *l;
+ struct dcss_segment *tmp, *retval = NULL;
+
+ BUG_ON(!mutex_is_locked(&dcss_lock));
+ dcss_mkname (name, dcss_name);
+ list_for_each (l, &dcss_list) {
+ tmp = list_entry (l, struct dcss_segment, list);
+ if (memcmp(tmp->dcss_name, dcss_name, 8) == 0) {
+ retval = tmp;
+ break;
+ }
+ }
+ return retval;
+}
+
+
+/*
+ * Perform a function on a dcss segment.
+ */
+static inline int
+dcss_diag(int *func, void *parameter,
+ unsigned long *ret1, unsigned long *ret2)
+{
+ unsigned long rx, ry;
+ int rc;
+
+ if (scode_set == 0) {
+ rc = dcss_set_subcodes();
+ if (rc < 0)
+ return rc;
+ scode_set = 1;
+ }
+ rx = (unsigned long) parameter;
+ ry = (unsigned long) *func;
+
+ /* 64-bit Diag x'64' new subcode, keep in 64-bit addressing mode */
+ diag_stat_inc(DIAG_STAT_X064);
+ if (*func > DCSS_SEGEXT)
+ asm volatile(
+ " diag %0,%1,0x64\n"
+ " ipm %2\n"
+ " srl %2,28\n"
+ : "+d" (rx), "+d" (ry), "=d" (rc) : : "cc");
+ /* 31-bit Diag x'64' old subcode, switch to 31-bit addressing mode */
+ else
+ asm volatile(
+ " sam31\n"
+ " diag %0,%1,0x64\n"
+ " sam64\n"
+ " ipm %2\n"
+ " srl %2,28\n"
+ : "+d" (rx), "+d" (ry), "=d" (rc) : : "cc");
+ *ret1 = rx;
+ *ret2 = ry;
+ return rc;
+}
+
+static inline int
+dcss_diag_translate_rc (int vm_rc) {
+ if (vm_rc == 44)
+ return -ENOENT;
+ return -EIO;
+}
+
+
+/* do a diag to get info about a segment.
+ * fills start_address, end and vm_segtype fields
+ */
+static int
+query_segment_type (struct dcss_segment *seg)
+{
+ unsigned long dummy, vmrc;
+ int diag_cc, rc, i;
+ struct qout64 *qout;
+ struct qin64 *qin;
+
+ qin = kmalloc(sizeof(*qin), GFP_KERNEL | GFP_DMA);
+ qout = kmalloc(sizeof(*qout), GFP_KERNEL | GFP_DMA);
+ if ((qin == NULL) || (qout == NULL)) {
+ rc = -ENOMEM;
+ goto out_free;
+ }
+
+ /* initialize diag input parameters */
+ qin->qopcode = DCSS_FINDSEGA;
+ qin->qoutptr = (unsigned long) qout;
+ qin->qoutlen = sizeof(struct qout64);
+ memcpy (qin->qname, seg->dcss_name, 8);
+
+ diag_cc = dcss_diag(&segext_scode, qin, &dummy, &vmrc);
+
+ if (diag_cc < 0) {
+ rc = diag_cc;
+ goto out_free;
+ }
+ if (diag_cc > 1) {
+ pr_warn("Querying a DCSS type failed with rc=%ld\n", vmrc);
+ rc = dcss_diag_translate_rc (vmrc);
+ goto out_free;
+ }
+
+ /* Only old format of output area of Diagnose x'64' is supported,
+ copy data for the new format. */
+ if (segext_scode == DCSS_SEGEXT) {
+ struct qout64_old *qout_old;
+ qout_old = kzalloc(sizeof(*qout_old), GFP_KERNEL | GFP_DMA);
+ if (qout_old == NULL) {
+ rc = -ENOMEM;
+ goto out_free;
+ }
+ memcpy(qout_old, qout, sizeof(struct qout64_old));
+ qout->segstart = (unsigned long) qout_old->segstart;
+ qout->segend = (unsigned long) qout_old->segend;
+ qout->segcnt = qout_old->segcnt;
+ qout->segrcnt = qout_old->segrcnt;
+
+ if (qout->segcnt > 6)
+ qout->segrcnt = 6;
+ for (i = 0; i < qout->segrcnt; i++) {
+ qout->range[i].start =
+ (unsigned long) qout_old->range[i].start;
+ qout->range[i].end =
+ (unsigned long) qout_old->range[i].end;
+ }
+ kfree(qout_old);
+ }
+ if (qout->segcnt > 6) {
+ rc = -EOPNOTSUPP;
+ goto out_free;
+ }
+
+ if (qout->segcnt == 1) {
+ seg->vm_segtype = qout->range[0].start & 0xff;
+ } else {
+ /* multi-part segment. only one type supported here:
+ - all parts are contiguous
+ - all parts are either EW or EN type
+ - maximum 6 parts allowed */
+ unsigned long start = qout->segstart >> PAGE_SHIFT;
+ for (i=0; i<qout->segcnt; i++) {
+ if (((qout->range[i].start & 0xff) != SEG_TYPE_EW) &&
+ ((qout->range[i].start & 0xff) != SEG_TYPE_EN)) {
+ rc = -EOPNOTSUPP;
+ goto out_free;
+ }
+ if (start != qout->range[i].start >> PAGE_SHIFT) {
+ rc = -EOPNOTSUPP;
+ goto out_free;
+ }
+ start = (qout->range[i].end >> PAGE_SHIFT) + 1;
+ }
+ seg->vm_segtype = SEG_TYPE_EWEN;
+ }
+
+ /* analyze diag output and update seg */
+ seg->start_addr = qout->segstart;
+ seg->end = qout->segend;
+
+ memcpy (seg->range, qout->range, 6*sizeof(struct qrange));
+ seg->segcnt = qout->segcnt;
+
+ rc = 0;
+
+ out_free:
+ kfree(qin);
+ kfree(qout);
+ return rc;
+}
+
+/*
+ * get info about a segment
+ * possible return values:
+ * -ENOSYS : we are not running on VM
+ * -EIO : could not perform query diagnose
+ * -ENOENT : no such segment
+ * -EOPNOTSUPP: multi-part segment cannot be used with linux
+ * -ENOMEM : out of memory
+ * 0 .. 6 : type of segment as defined in include/asm-s390/extmem.h
+ */
+int
+segment_type (char* name)
+{
+ int rc;
+ struct dcss_segment seg;
+
+ if (!MACHINE_IS_VM)
+ return -ENOSYS;
+
+ dcss_mkname(name, seg.dcss_name);
+ rc = query_segment_type (&seg);
+ if (rc < 0)
+ return rc;
+ return seg.vm_segtype;
+}
+
+/*
+ * check if segment collides with other segments that are currently loaded
+ * returns 1 if this is the case, 0 if no collision was found
+ */
+static int
+segment_overlaps_others (struct dcss_segment *seg)
+{
+ struct list_head *l;
+ struct dcss_segment *tmp;
+
+ BUG_ON(!mutex_is_locked(&dcss_lock));
+ list_for_each(l, &dcss_list) {
+ tmp = list_entry(l, struct dcss_segment, list);
+ if ((tmp->start_addr >> 20) > (seg->end >> 20))
+ continue;
+ if ((tmp->end >> 20) < (seg->start_addr >> 20))
+ continue;
+ if (seg == tmp)
+ continue;
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * real segment loading function, called from segment_load
+ */
+static int
+__segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long *end)
+{
+ unsigned long start_addr, end_addr, dummy;
+ struct dcss_segment *seg;
+ int rc, diag_cc;
+
+ start_addr = end_addr = 0;
+ seg = kmalloc(sizeof(*seg), GFP_KERNEL | GFP_DMA);
+ if (seg == NULL) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ dcss_mkname (name, seg->dcss_name);
+ rc = query_segment_type (seg);
+ if (rc < 0)
+ goto out_free;
+
+ if (loadshr_scode == DCSS_LOADSHRX) {
+ if (segment_overlaps_others(seg)) {
+ rc = -EBUSY;
+ goto out_free;
+ }
+ }
+
+ rc = vmem_add_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
+
+ if (rc)
+ goto out_free;
+
+ seg->res = kzalloc(sizeof(struct resource), GFP_KERNEL);
+ if (seg->res == NULL) {
+ rc = -ENOMEM;
+ goto out_shared;
+ }
+ seg->res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
+ seg->res->start = seg->start_addr;
+ seg->res->end = seg->end;
+ memcpy(&seg->res_name, seg->dcss_name, 8);
+ EBCASC(seg->res_name, 8);
+ seg->res_name[8] = '\0';
+ strlcat(seg->res_name, " (DCSS)", sizeof(seg->res_name));
+ seg->res->name = seg->res_name;
+ rc = seg->vm_segtype;
+ if (rc == SEG_TYPE_SC ||
+ ((rc == SEG_TYPE_SR || rc == SEG_TYPE_ER) && !do_nonshared))
+ seg->res->flags |= IORESOURCE_READONLY;
+ if (request_resource(&iomem_resource, seg->res)) {
+ rc = -EBUSY;
+ kfree(seg->res);
+ goto out_shared;
+ }
+
+ if (do_nonshared)
+ diag_cc = dcss_diag(&loadnsr_scode, seg->dcss_name,
+ &start_addr, &end_addr);
+ else
+ diag_cc = dcss_diag(&loadshr_scode, seg->dcss_name,
+ &start_addr, &end_addr);
+ if (diag_cc < 0) {
+ dcss_diag(&purgeseg_scode, seg->dcss_name,
+ &dummy, &dummy);
+ rc = diag_cc;
+ goto out_resource;
+ }
+ if (diag_cc > 1) {
+ pr_warn("Loading DCSS %s failed with rc=%ld\n", name, end_addr);
+ rc = dcss_diag_translate_rc(end_addr);
+ dcss_diag(&purgeseg_scode, seg->dcss_name,
+ &dummy, &dummy);
+ goto out_resource;
+ }
+ seg->start_addr = start_addr;
+ seg->end = end_addr;
+ seg->do_nonshared = do_nonshared;
+ atomic_set(&seg->ref_count, 1);
+ list_add(&seg->list, &dcss_list);
+ *addr = seg->start_addr;
+ *end = seg->end;
+ if (do_nonshared)
+ pr_info("DCSS %s of range %p to %p and type %s loaded as "
+ "exclusive-writable\n", name, (void*) seg->start_addr,
+ (void*) seg->end, segtype_string[seg->vm_segtype]);
+ else {
+ pr_info("DCSS %s of range %p to %p and type %s loaded in "
+ "shared access mode\n", name, (void*) seg->start_addr,
+ (void*) seg->end, segtype_string[seg->vm_segtype]);
+ }
+ goto out;
+ out_resource:
+ release_resource(seg->res);
+ kfree(seg->res);
+ out_shared:
+ vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
+ out_free:
+ kfree(seg);
+ out:
+ return rc;
+}
+
+/*
+ * this function loads a DCSS segment
+ * name : name of the DCSS
+ * do_nonshared : 0 indicates that the dcss should be shared with other linux images
+ * 1 indicates that the dcss should be exclusive for this linux image
+ * addr : will be filled with start address of the segment
+ * end : will be filled with end address of the segment
+ * return values:
+ * -ENOSYS : we are not running on VM
+ * -EIO : could not perform query or load diagnose
+ * -ENOENT : no such segment
+ * -EOPNOTSUPP: multi-part segment cannot be used with linux
+ * -ENOSPC : segment cannot be used (overlaps with storage)
+ * -EBUSY : segment can temporarily not be used (overlaps with dcss)
+ * -ERANGE : segment cannot be used (exceeds kernel mapping range)
+ * -EPERM : segment is currently loaded with incompatible permissions
+ * -ENOMEM : out of memory
+ * 0 .. 6 : type of segment as defined in include/asm-s390/extmem.h
+ */
+int
+segment_load (char *name, int do_nonshared, unsigned long *addr,
+ unsigned long *end)
+{
+ struct dcss_segment *seg;
+ int rc;
+
+ if (!MACHINE_IS_VM)
+ return -ENOSYS;
+
+ mutex_lock(&dcss_lock);
+ seg = segment_by_name (name);
+ if (seg == NULL)
+ rc = __segment_load (name, do_nonshared, addr, end);
+ else {
+ if (do_nonshared == seg->do_nonshared) {
+ atomic_inc(&seg->ref_count);
+ *addr = seg->start_addr;
+ *end = seg->end;
+ rc = seg->vm_segtype;
+ } else {
+ *addr = *end = 0;
+ rc = -EPERM;
+ }
+ }
+ mutex_unlock(&dcss_lock);
+ return rc;
+}
+
+/*
+ * this function modifies the shared state of a DCSS segment. note that
+ * name : name of the DCSS
+ * do_nonshared : 0 indicates that the dcss should be shared with other linux images
+ * 1 indicates that the dcss should be exclusive for this linux image
+ * return values:
+ * -EIO : could not perform load diagnose (segment gone!)
+ * -ENOENT : no such segment (segment gone!)
+ * -EAGAIN : segment is in use by other exploiters, try later
+ * -EINVAL : no segment with the given name is currently loaded - name invalid
+ * -EBUSY : segment can temporarily not be used (overlaps with dcss)
+ * 0 : operation succeeded
+ */
+int
+segment_modify_shared (char *name, int do_nonshared)
+{
+ struct dcss_segment *seg;
+ unsigned long start_addr, end_addr, dummy;
+ int rc, diag_cc;
+
+ start_addr = end_addr = 0;
+ mutex_lock(&dcss_lock);
+ seg = segment_by_name (name);
+ if (seg == NULL) {
+ rc = -EINVAL;
+ goto out_unlock;
+ }
+ if (do_nonshared == seg->do_nonshared) {
+ pr_info("DCSS %s is already in the requested access "
+ "mode\n", name);
+ rc = 0;
+ goto out_unlock;
+ }
+ if (atomic_read (&seg->ref_count) != 1) {
+ pr_warn("DCSS %s is in use and cannot be reloaded\n", name);
+ rc = -EAGAIN;
+ goto out_unlock;
+ }
+ release_resource(seg->res);
+ if (do_nonshared)
+ seg->res->flags &= ~IORESOURCE_READONLY;
+ else
+ if (seg->vm_segtype == SEG_TYPE_SR ||
+ seg->vm_segtype == SEG_TYPE_ER)
+ seg->res->flags |= IORESOURCE_READONLY;
+
+ if (request_resource(&iomem_resource, seg->res)) {
+ pr_warn("DCSS %s overlaps with used memory resources and cannot be reloaded\n",
+ name);
+ rc = -EBUSY;
+ kfree(seg->res);
+ goto out_del_mem;
+ }
+
+ dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy);
+ if (do_nonshared)
+ diag_cc = dcss_diag(&loadnsr_scode, seg->dcss_name,
+ &start_addr, &end_addr);
+ else
+ diag_cc = dcss_diag(&loadshr_scode, seg->dcss_name,
+ &start_addr, &end_addr);
+ if (diag_cc < 0) {
+ rc = diag_cc;
+ goto out_del_res;
+ }
+ if (diag_cc > 1) {
+ pr_warn("Reloading DCSS %s failed with rc=%ld\n",
+ name, end_addr);
+ rc = dcss_diag_translate_rc(end_addr);
+ goto out_del_res;
+ }
+ seg->start_addr = start_addr;
+ seg->end = end_addr;
+ seg->do_nonshared = do_nonshared;
+ rc = 0;
+ goto out_unlock;
+ out_del_res:
+ release_resource(seg->res);
+ kfree(seg->res);
+ out_del_mem:
+ vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
+ list_del(&seg->list);
+ dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy);
+ kfree(seg);
+ out_unlock:
+ mutex_unlock(&dcss_lock);
+ return rc;
+}
+
+/*
+ * Decrease the use count of a DCSS segment and remove
+ * it from the address space if nobody is using it
+ * any longer.
+ */
+void
+segment_unload(char *name)
+{
+ unsigned long dummy;
+ struct dcss_segment *seg;
+
+ if (!MACHINE_IS_VM)
+ return;
+
+ mutex_lock(&dcss_lock);
+ seg = segment_by_name (name);
+ if (seg == NULL) {
+ pr_err("Unloading unknown DCSS %s failed\n", name);
+ goto out_unlock;
+ }
+ if (atomic_dec_return(&seg->ref_count) != 0)
+ goto out_unlock;
+ release_resource(seg->res);
+ kfree(seg->res);
+ vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
+ list_del(&seg->list);
+ dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy);
+ kfree(seg);
+out_unlock:
+ mutex_unlock(&dcss_lock);
+}
+
+/*
+ * save segment content permanently
+ */
+void
+segment_save(char *name)
+{
+ struct dcss_segment *seg;
+ char cmd1[160];
+ char cmd2[80];
+ int i, response;
+
+ if (!MACHINE_IS_VM)
+ return;
+
+ mutex_lock(&dcss_lock);
+ seg = segment_by_name (name);
+
+ if (seg == NULL) {
+ pr_err("Saving unknown DCSS %s failed\n", name);
+ goto out;
+ }
+
+ sprintf(cmd1, "DEFSEG %s", name);
+ for (i=0; i<seg->segcnt; i++) {
+ sprintf(cmd1+strlen(cmd1), " %lX-%lX %s",
+ seg->range[i].start >> PAGE_SHIFT,
+ seg->range[i].end >> PAGE_SHIFT,
+ segtype_string[seg->range[i].start & 0xff]);
+ }
+ sprintf(cmd2, "SAVESEG %s", name);
+ response = 0;
+ cpcmd(cmd1, NULL, 0, &response);
+ if (response) {
+ pr_err("Saving a DCSS failed with DEFSEG response code "
+ "%i\n", response);
+ goto out;
+ }
+ cpcmd(cmd2, NULL, 0, &response);
+ if (response) {
+ pr_err("Saving a DCSS failed with SAVESEG response code "
+ "%i\n", response);
+ goto out;
+ }
+out:
+ mutex_unlock(&dcss_lock);
+}
+
+/*
+ * print appropriate error message for segment_load()/segment_type()
+ * return code
+ */
+void segment_warning(int rc, char *seg_name)
+{
+ switch (rc) {
+ case -ENOENT:
+ pr_err("DCSS %s cannot be loaded or queried\n", seg_name);
+ break;
+ case -ENOSYS:
+ pr_err("DCSS %s cannot be loaded or queried without "
+ "z/VM\n", seg_name);
+ break;
+ case -EIO:
+ pr_err("Loading or querying DCSS %s resulted in a "
+ "hardware error\n", seg_name);
+ break;
+ case -EOPNOTSUPP:
+ pr_err("DCSS %s has multiple page ranges and cannot be "
+ "loaded or queried\n", seg_name);
+ break;
+ case -ENOSPC:
+ pr_err("DCSS %s overlaps with used storage and cannot "
+ "be loaded\n", seg_name);
+ break;
+ case -EBUSY:
+ pr_err("%s needs used memory resources and cannot be "
+ "loaded or queried\n", seg_name);
+ break;
+ case -EPERM:
+ pr_err("DCSS %s is already loaded in a different access "
+ "mode\n", seg_name);
+ break;
+ case -ENOMEM:
+ pr_err("There is not enough memory to load or query "
+ "DCSS %s\n", seg_name);
+ break;
+ case -ERANGE:
+ pr_err("DCSS %s exceeds the kernel mapping range (%lu) "
+ "and cannot be loaded\n", seg_name, VMEM_MAX_PHYS);
+ break;
+ default:
+ break;
+ }
+}
+
+EXPORT_SYMBOL(segment_load);
+EXPORT_SYMBOL(segment_unload);
+EXPORT_SYMBOL(segment_save);
+EXPORT_SYMBOL(segment_type);
+EXPORT_SYMBOL(segment_modify_shared);
+EXPORT_SYMBOL(segment_warning);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
new file mode 100644
index 000000000..a6e3c7022
--- /dev/null
+++ b/arch/s390/mm/fault.c
@@ -0,0 +1,837 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999
+ * Author(s): Hartmut Penner (hp@de.ibm.com)
+ * Ulrich Weigand (uweigand@de.ibm.com)
+ *
+ * Derived from "arch/i386/mm/fault.c"
+ * Copyright (C) 1995 Linus Torvalds
+ */
+
+#include <linux/kernel_stat.h>
+#include <linux/perf_event.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/sched/debug.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/compat.h>
+#include <linux/smp.h>
+#include <linux/kdebug.h>
+#include <linux/init.h>
+#include <linux/console.h>
+#include <linux/extable.h>
+#include <linux/hardirq.h>
+#include <linux/kprobes.h>
+#include <linux/uaccess.h>
+#include <linux/hugetlb.h>
+#include <asm/asm-offsets.h>
+#include <asm/diag.h>
+#include <asm/pgtable.h>
+#include <asm/gmap.h>
+#include <asm/irq.h>
+#include <asm/mmu_context.h>
+#include <asm/facility.h>
+#include "../kernel/entry.h"
+
+#define __FAIL_ADDR_MASK -4096L
+#define __SUBCODE_MASK 0x0600
+#define __PF_RES_FIELD 0x8000000000000000ULL
+
+#define VM_FAULT_BADCONTEXT 0x010000
+#define VM_FAULT_BADMAP 0x020000
+#define VM_FAULT_BADACCESS 0x040000
+#define VM_FAULT_SIGNAL 0x080000
+#define VM_FAULT_PFAULT 0x100000
+
+enum fault_type {
+ KERNEL_FAULT,
+ USER_FAULT,
+ VDSO_FAULT,
+ GMAP_FAULT,
+};
+
+static unsigned long store_indication __read_mostly;
+
+static int __init fault_init(void)
+{
+ if (test_facility(75))
+ store_indication = 0xc00;
+ return 0;
+}
+early_initcall(fault_init);
+
+static inline int notify_page_fault(struct pt_regs *regs)
+{
+ int ret = 0;
+
+ /* kprobe_running() needs smp_processor_id() */
+ if (kprobes_built_in() && !user_mode(regs)) {
+ preempt_disable();
+ if (kprobe_running() && kprobe_fault_handler(regs, 14))
+ ret = 1;
+ preempt_enable();
+ }
+ return ret;
+}
+
+
+/*
+ * Unlock any spinlocks which will prevent us from getting the
+ * message out.
+ */
+void bust_spinlocks(int yes)
+{
+ if (yes) {
+ oops_in_progress = 1;
+ } else {
+ int loglevel_save = console_loglevel;
+ console_unblank();
+ oops_in_progress = 0;
+ /*
+ * OK, the message is on the console. Now we call printk()
+ * without oops_in_progress set so that printk will give klogd
+ * a poke. Hold onto your hats...
+ */
+ console_loglevel = 15;
+ printk(" ");
+ console_loglevel = loglevel_save;
+ }
+}
+
+/*
+ * Find out which address space caused the exception.
+ */
+static inline enum fault_type get_fault_type(struct pt_regs *regs)
+{
+ unsigned long trans_exc_code;
+
+ trans_exc_code = regs->int_parm_long & 3;
+ if (likely(trans_exc_code == 0)) {
+ /* primary space exception */
+ if (IS_ENABLED(CONFIG_PGSTE) &&
+ test_pt_regs_flag(regs, PIF_GUEST_FAULT))
+ return GMAP_FAULT;
+ if (current->thread.mm_segment == USER_DS)
+ return USER_FAULT;
+ return KERNEL_FAULT;
+ }
+ if (trans_exc_code == 2) {
+ /* secondary space exception */
+ if (current->thread.mm_segment & 1) {
+ if (current->thread.mm_segment == USER_DS_SACF)
+ return USER_FAULT;
+ return KERNEL_FAULT;
+ }
+ return VDSO_FAULT;
+ }
+ if (trans_exc_code == 1) {
+ /* access register mode, not used in the kernel */
+ return USER_FAULT;
+ }
+ /* home space exception -> access via kernel ASCE */
+ return KERNEL_FAULT;
+}
+
+static int bad_address(void *p)
+{
+ unsigned long dummy;
+
+ return probe_kernel_address((unsigned long *)p, dummy);
+}
+
+static void dump_pagetable(unsigned long asce, unsigned long address)
+{
+ unsigned long *table = __va(asce & _ASCE_ORIGIN);
+
+ pr_alert("AS:%016lx ", asce);
+ switch (asce & _ASCE_TYPE_MASK) {
+ case _ASCE_TYPE_REGION1:
+ table += (address & _REGION1_INDEX) >> _REGION1_SHIFT;
+ if (bad_address(table))
+ goto bad;
+ pr_cont("R1:%016lx ", *table);
+ if (*table & _REGION_ENTRY_INVALID)
+ goto out;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ /* fallthrough */
+ case _ASCE_TYPE_REGION2:
+ table += (address & _REGION2_INDEX) >> _REGION2_SHIFT;
+ if (bad_address(table))
+ goto bad;
+ pr_cont("R2:%016lx ", *table);
+ if (*table & _REGION_ENTRY_INVALID)
+ goto out;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ /* fallthrough */
+ case _ASCE_TYPE_REGION3:
+ table += (address & _REGION3_INDEX) >> _REGION3_SHIFT;
+ if (bad_address(table))
+ goto bad;
+ pr_cont("R3:%016lx ", *table);
+ if (*table & (_REGION_ENTRY_INVALID | _REGION3_ENTRY_LARGE))
+ goto out;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ /* fallthrough */
+ case _ASCE_TYPE_SEGMENT:
+ table += (address & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
+ if (bad_address(table))
+ goto bad;
+ pr_cont("S:%016lx ", *table);
+ if (*table & (_SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_LARGE))
+ goto out;
+ table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
+ }
+ table += (address & _PAGE_INDEX) >> _PAGE_SHIFT;
+ if (bad_address(table))
+ goto bad;
+ pr_cont("P:%016lx ", *table);
+out:
+ pr_cont("\n");
+ return;
+bad:
+ pr_cont("BAD\n");
+}
+
+static void dump_fault_info(struct pt_regs *regs)
+{
+ unsigned long asce;
+
+ pr_alert("Failing address: %016lx TEID: %016lx\n",
+ regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long);
+ pr_alert("Fault in ");
+ switch (regs->int_parm_long & 3) {
+ case 3:
+ pr_cont("home space ");
+ break;
+ case 2:
+ pr_cont("secondary space ");
+ break;
+ case 1:
+ pr_cont("access register ");
+ break;
+ case 0:
+ pr_cont("primary space ");
+ break;
+ }
+ pr_cont("mode while using ");
+ switch (get_fault_type(regs)) {
+ case USER_FAULT:
+ asce = S390_lowcore.user_asce;
+ pr_cont("user ");
+ break;
+ case VDSO_FAULT:
+ asce = S390_lowcore.vdso_asce;
+ pr_cont("vdso ");
+ break;
+ case GMAP_FAULT:
+ asce = ((struct gmap *) S390_lowcore.gmap)->asce;
+ pr_cont("gmap ");
+ break;
+ case KERNEL_FAULT:
+ asce = S390_lowcore.kernel_asce;
+ pr_cont("kernel ");
+ break;
+ }
+ pr_cont("ASCE.\n");
+ dump_pagetable(asce, regs->int_parm_long & __FAIL_ADDR_MASK);
+}
+
+int show_unhandled_signals = 1;
+
+void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault)
+{
+ if ((task_pid_nr(current) > 1) && !show_unhandled_signals)
+ return;
+ if (!unhandled_signal(current, signr))
+ return;
+ if (!printk_ratelimit())
+ return;
+ printk(KERN_ALERT "User process fault: interruption code %04x ilc:%d ",
+ regs->int_code & 0xffff, regs->int_code >> 17);
+ print_vma_addr(KERN_CONT "in ", regs->psw.addr);
+ printk(KERN_CONT "\n");
+ if (is_mm_fault)
+ dump_fault_info(regs);
+ show_regs(regs);
+}
+
+/*
+ * Send SIGSEGV to task. This is an external routine
+ * to keep the stack usage of do_page_fault small.
+ */
+static noinline void do_sigsegv(struct pt_regs *regs, int si_code)
+{
+ report_user_fault(regs, SIGSEGV, 1);
+ force_sig_fault(SIGSEGV, si_code,
+ (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK),
+ current);
+}
+
+static noinline void do_no_context(struct pt_regs *regs)
+{
+ const struct exception_table_entry *fixup;
+
+ /* Are we prepared to handle this kernel fault? */
+ fixup = search_exception_tables(regs->psw.addr);
+ if (fixup) {
+ regs->psw.addr = extable_fixup(fixup);
+ return;
+ }
+
+ /*
+ * Oops. The kernel tried to access some bad page. We'll have to
+ * terminate things with extreme prejudice.
+ */
+ if (get_fault_type(regs) == KERNEL_FAULT)
+ printk(KERN_ALERT "Unable to handle kernel pointer dereference"
+ " in virtual kernel address space\n");
+ else
+ printk(KERN_ALERT "Unable to handle kernel paging request"
+ " in virtual user address space\n");
+ dump_fault_info(regs);
+ die(regs, "Oops");
+ do_exit(SIGKILL);
+}
+
+static noinline void do_low_address(struct pt_regs *regs)
+{
+ /* Low-address protection hit in kernel mode means
+ NULL pointer write access in kernel mode. */
+ if (regs->psw.mask & PSW_MASK_PSTATE) {
+ /* Low-address protection hit in user mode 'cannot happen'. */
+ die (regs, "Low-address protection");
+ do_exit(SIGKILL);
+ }
+
+ do_no_context(regs);
+}
+
+static noinline void do_sigbus(struct pt_regs *regs)
+{
+ /*
+ * Send a sigbus, regardless of whether we were in kernel
+ * or user mode.
+ */
+ force_sig_fault(SIGBUS, BUS_ADRERR,
+ (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK),
+ current);
+}
+
+static noinline int signal_return(struct pt_regs *regs)
+{
+ u16 instruction;
+ int rc;
+
+ rc = __get_user(instruction, (u16 __user *) regs->psw.addr);
+ if (rc)
+ return rc;
+ if (instruction == 0x0a77) {
+ set_pt_regs_flag(regs, PIF_SYSCALL);
+ regs->int_code = 0x00040077;
+ return 0;
+ } else if (instruction == 0x0aad) {
+ set_pt_regs_flag(regs, PIF_SYSCALL);
+ regs->int_code = 0x000400ad;
+ return 0;
+ }
+ return -EACCES;
+}
+
+static noinline void do_fault_error(struct pt_regs *regs, int access,
+ vm_fault_t fault)
+{
+ int si_code;
+
+ switch (fault) {
+ case VM_FAULT_BADACCESS:
+ if (access == VM_EXEC && signal_return(regs) == 0)
+ break;
+ case VM_FAULT_BADMAP:
+ /* Bad memory access. Check if it is kernel or user space. */
+ if (user_mode(regs)) {
+ /* User mode accesses just cause a SIGSEGV */
+ si_code = (fault == VM_FAULT_BADMAP) ?
+ SEGV_MAPERR : SEGV_ACCERR;
+ do_sigsegv(regs, si_code);
+ break;
+ }
+ case VM_FAULT_BADCONTEXT:
+ case VM_FAULT_PFAULT:
+ do_no_context(regs);
+ break;
+ case VM_FAULT_SIGNAL:
+ if (!user_mode(regs))
+ do_no_context(regs);
+ break;
+ default: /* fault & VM_FAULT_ERROR */
+ if (fault & VM_FAULT_OOM) {
+ if (!user_mode(regs))
+ do_no_context(regs);
+ else
+ pagefault_out_of_memory();
+ } else if (fault & VM_FAULT_SIGSEGV) {
+ /* Kernel mode? Handle exceptions or die */
+ if (!user_mode(regs))
+ do_no_context(regs);
+ else
+ do_sigsegv(regs, SEGV_MAPERR);
+ } else if (fault & VM_FAULT_SIGBUS) {
+ /* Kernel mode? Handle exceptions or die */
+ if (!user_mode(regs))
+ do_no_context(regs);
+ else
+ do_sigbus(regs);
+ } else
+ BUG();
+ break;
+ }
+}
+
+/*
+ * This routine handles page faults. It determines the address,
+ * and the problem, and then passes it off to one of the appropriate
+ * routines.
+ *
+ * interruption code (int_code):
+ * 04 Protection -> Write-Protection (suprression)
+ * 10 Segment translation -> Not present (nullification)
+ * 11 Page translation -> Not present (nullification)
+ * 3b Region third trans. -> Not present (nullification)
+ */
+static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
+{
+ struct gmap *gmap;
+ struct task_struct *tsk;
+ struct mm_struct *mm;
+ struct vm_area_struct *vma;
+ enum fault_type type;
+ unsigned long trans_exc_code;
+ unsigned long address;
+ unsigned int flags;
+ vm_fault_t fault;
+
+ tsk = current;
+ /*
+ * The instruction that caused the program check has
+ * been nullified. Don't signal single step via SIGTRAP.
+ */
+ clear_pt_regs_flag(regs, PIF_PER_TRAP);
+
+ if (notify_page_fault(regs))
+ return 0;
+
+ mm = tsk->mm;
+ trans_exc_code = regs->int_parm_long;
+
+ /*
+ * Verify that the fault happened in user space, that
+ * we are not in an interrupt and that there is a
+ * user context.
+ */
+ fault = VM_FAULT_BADCONTEXT;
+ type = get_fault_type(regs);
+ switch (type) {
+ case KERNEL_FAULT:
+ goto out;
+ case VDSO_FAULT:
+ fault = VM_FAULT_BADMAP;
+ goto out;
+ case USER_FAULT:
+ case GMAP_FAULT:
+ if (faulthandler_disabled() || !mm)
+ goto out;
+ break;
+ }
+
+ address = trans_exc_code & __FAIL_ADDR_MASK;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+ flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+ if (user_mode(regs))
+ flags |= FAULT_FLAG_USER;
+ if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400)
+ flags |= FAULT_FLAG_WRITE;
+ down_read(&mm->mmap_sem);
+
+ gmap = NULL;
+ if (IS_ENABLED(CONFIG_PGSTE) && type == GMAP_FAULT) {
+ gmap = (struct gmap *) S390_lowcore.gmap;
+ current->thread.gmap_addr = address;
+ current->thread.gmap_write_flag = !!(flags & FAULT_FLAG_WRITE);
+ current->thread.gmap_int_code = regs->int_code & 0xffff;
+ address = __gmap_translate(gmap, address);
+ if (address == -EFAULT) {
+ fault = VM_FAULT_BADMAP;
+ goto out_up;
+ }
+ if (gmap->pfault_enabled)
+ flags |= FAULT_FLAG_RETRY_NOWAIT;
+ }
+
+retry:
+ fault = VM_FAULT_BADMAP;
+ vma = find_vma(mm, address);
+ if (!vma)
+ goto out_up;
+
+ if (unlikely(vma->vm_start > address)) {
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ goto out_up;
+ if (expand_stack(vma, address))
+ goto out_up;
+ }
+
+ /*
+ * Ok, we have a good vm_area for this memory access, so
+ * we can handle it..
+ */
+ fault = VM_FAULT_BADACCESS;
+ if (unlikely(!(vma->vm_flags & access)))
+ goto out_up;
+
+ if (is_vm_hugetlb_page(vma))
+ address &= HPAGE_MASK;
+ /*
+ * If for any reason at all we couldn't handle the fault,
+ * make sure we exit gracefully rather than endlessly redo
+ * the fault.
+ */
+ fault = handle_mm_fault(vma, address, flags);
+ /* No reason to continue if interrupted by SIGKILL. */
+ if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) {
+ fault = VM_FAULT_SIGNAL;
+ if (flags & FAULT_FLAG_RETRY_NOWAIT)
+ goto out_up;
+ goto out;
+ }
+ if (unlikely(fault & VM_FAULT_ERROR))
+ goto out_up;
+
+ /*
+ * Major/minor page fault accounting is only done on the
+ * initial attempt. If we go through a retry, it is extremely
+ * likely that the page will be found in page cache at that point.
+ */
+ if (flags & FAULT_FLAG_ALLOW_RETRY) {
+ if (fault & VM_FAULT_MAJOR) {
+ tsk->maj_flt++;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
+ regs, address);
+ } else {
+ tsk->min_flt++;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
+ regs, address);
+ }
+ if (fault & VM_FAULT_RETRY) {
+ if (IS_ENABLED(CONFIG_PGSTE) && gmap &&
+ (flags & FAULT_FLAG_RETRY_NOWAIT)) {
+ /* FAULT_FLAG_RETRY_NOWAIT has been set,
+ * mmap_sem has not been released */
+ current->thread.gmap_pfault = 1;
+ fault = VM_FAULT_PFAULT;
+ goto out_up;
+ }
+ /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
+ * of starvation. */
+ flags &= ~(FAULT_FLAG_ALLOW_RETRY |
+ FAULT_FLAG_RETRY_NOWAIT);
+ flags |= FAULT_FLAG_TRIED;
+ down_read(&mm->mmap_sem);
+ goto retry;
+ }
+ }
+ if (IS_ENABLED(CONFIG_PGSTE) && gmap) {
+ address = __gmap_link(gmap, current->thread.gmap_addr,
+ address);
+ if (address == -EFAULT) {
+ fault = VM_FAULT_BADMAP;
+ goto out_up;
+ }
+ if (address == -ENOMEM) {
+ fault = VM_FAULT_OOM;
+ goto out_up;
+ }
+ }
+ fault = 0;
+out_up:
+ up_read(&mm->mmap_sem);
+out:
+ return fault;
+}
+
+void do_protection_exception(struct pt_regs *regs)
+{
+ unsigned long trans_exc_code;
+ int access;
+ vm_fault_t fault;
+
+ trans_exc_code = regs->int_parm_long;
+ /*
+ * Protection exceptions are suppressing, decrement psw address.
+ * The exception to this rule are aborted transactions, for these
+ * the PSW already points to the correct location.
+ */
+ if (!(regs->int_code & 0x200))
+ regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16);
+ /*
+ * Check for low-address protection. This needs to be treated
+ * as a special case because the translation exception code
+ * field is not guaranteed to contain valid data in this case.
+ */
+ if (unlikely(!(trans_exc_code & 4))) {
+ do_low_address(regs);
+ return;
+ }
+ if (unlikely(MACHINE_HAS_NX && (trans_exc_code & 0x80))) {
+ regs->int_parm_long = (trans_exc_code & ~PAGE_MASK) |
+ (regs->psw.addr & PAGE_MASK);
+ access = VM_EXEC;
+ fault = VM_FAULT_BADACCESS;
+ } else {
+ access = VM_WRITE;
+ fault = do_exception(regs, access);
+ }
+ if (unlikely(fault))
+ do_fault_error(regs, access, fault);
+}
+NOKPROBE_SYMBOL(do_protection_exception);
+
+void do_dat_exception(struct pt_regs *regs)
+{
+ int access;
+ vm_fault_t fault;
+
+ access = VM_READ | VM_EXEC | VM_WRITE;
+ fault = do_exception(regs, access);
+ if (unlikely(fault))
+ do_fault_error(regs, access, fault);
+}
+NOKPROBE_SYMBOL(do_dat_exception);
+
+#ifdef CONFIG_PFAULT
+/*
+ * 'pfault' pseudo page faults routines.
+ */
+static int pfault_disable;
+
+static int __init nopfault(char *str)
+{
+ pfault_disable = 1;
+ return 1;
+}
+
+__setup("nopfault", nopfault);
+
+struct pfault_refbk {
+ u16 refdiagc;
+ u16 reffcode;
+ u16 refdwlen;
+ u16 refversn;
+ u64 refgaddr;
+ u64 refselmk;
+ u64 refcmpmk;
+ u64 reserved;
+} __attribute__ ((packed, aligned(8)));
+
+int pfault_init(void)
+{
+ struct pfault_refbk refbk = {
+ .refdiagc = 0x258,
+ .reffcode = 0,
+ .refdwlen = 5,
+ .refversn = 2,
+ .refgaddr = __LC_LPP,
+ .refselmk = 1ULL << 48,
+ .refcmpmk = 1ULL << 48,
+ .reserved = __PF_RES_FIELD };
+ int rc;
+
+ if (pfault_disable)
+ return -1;
+ diag_stat_inc(DIAG_STAT_X258);
+ asm volatile(
+ " diag %1,%0,0x258\n"
+ "0: j 2f\n"
+ "1: la %0,8\n"
+ "2:\n"
+ EX_TABLE(0b,1b)
+ : "=d" (rc) : "a" (&refbk), "m" (refbk) : "cc");
+ return rc;
+}
+
+void pfault_fini(void)
+{
+ struct pfault_refbk refbk = {
+ .refdiagc = 0x258,
+ .reffcode = 1,
+ .refdwlen = 5,
+ .refversn = 2,
+ };
+
+ if (pfault_disable)
+ return;
+ diag_stat_inc(DIAG_STAT_X258);
+ asm volatile(
+ " diag %0,0,0x258\n"
+ "0: nopr %%r7\n"
+ EX_TABLE(0b,0b)
+ : : "a" (&refbk), "m" (refbk) : "cc");
+}
+
+static DEFINE_SPINLOCK(pfault_lock);
+static LIST_HEAD(pfault_list);
+
+#define PF_COMPLETE 0x0080
+
+/*
+ * The mechanism of our pfault code: if Linux is running as guest, runs a user
+ * space process and the user space process accesses a page that the host has
+ * paged out we get a pfault interrupt.
+ *
+ * This allows us, within the guest, to schedule a different process. Without
+ * this mechanism the host would have to suspend the whole virtual cpu until
+ * the page has been paged in.
+ *
+ * So when we get such an interrupt then we set the state of the current task
+ * to uninterruptible and also set the need_resched flag. Both happens within
+ * interrupt context(!). If we later on want to return to user space we
+ * recognize the need_resched flag and then call schedule(). It's not very
+ * obvious how this works...
+ *
+ * Of course we have a lot of additional fun with the completion interrupt (->
+ * host signals that a page of a process has been paged in and the process can
+ * continue to run). This interrupt can arrive on any cpu and, since we have
+ * virtual cpus, actually appear before the interrupt that signals that a page
+ * is missing.
+ */
+static void pfault_interrupt(struct ext_code ext_code,
+ unsigned int param32, unsigned long param64)
+{
+ struct task_struct *tsk;
+ __u16 subcode;
+ pid_t pid;
+
+ /*
+ * Get the external interruption subcode & pfault initial/completion
+ * signal bit. VM stores this in the 'cpu address' field associated
+ * with the external interrupt.
+ */
+ subcode = ext_code.subcode;
+ if ((subcode & 0xff00) != __SUBCODE_MASK)
+ return;
+ inc_irq_stat(IRQEXT_PFL);
+ /* Get the token (= pid of the affected task). */
+ pid = param64 & LPP_PID_MASK;
+ rcu_read_lock();
+ tsk = find_task_by_pid_ns(pid, &init_pid_ns);
+ if (tsk)
+ get_task_struct(tsk);
+ rcu_read_unlock();
+ if (!tsk)
+ return;
+ spin_lock(&pfault_lock);
+ if (subcode & PF_COMPLETE) {
+ /* signal bit is set -> a page has been swapped in by VM */
+ if (tsk->thread.pfault_wait == 1) {
+ /* Initial interrupt was faster than the completion
+ * interrupt. pfault_wait is valid. Set pfault_wait
+ * back to zero and wake up the process. This can
+ * safely be done because the task is still sleeping
+ * and can't produce new pfaults. */
+ tsk->thread.pfault_wait = 0;
+ list_del(&tsk->thread.list);
+ wake_up_process(tsk);
+ put_task_struct(tsk);
+ } else {
+ /* Completion interrupt was faster than initial
+ * interrupt. Set pfault_wait to -1 so the initial
+ * interrupt doesn't put the task to sleep.
+ * If the task is not running, ignore the completion
+ * interrupt since it must be a leftover of a PFAULT
+ * CANCEL operation which didn't remove all pending
+ * completion interrupts. */
+ if (tsk->state == TASK_RUNNING)
+ tsk->thread.pfault_wait = -1;
+ }
+ } else {
+ /* signal bit not set -> a real page is missing. */
+ if (WARN_ON_ONCE(tsk != current))
+ goto out;
+ if (tsk->thread.pfault_wait == 1) {
+ /* Already on the list with a reference: put to sleep */
+ goto block;
+ } else if (tsk->thread.pfault_wait == -1) {
+ /* Completion interrupt was faster than the initial
+ * interrupt (pfault_wait == -1). Set pfault_wait
+ * back to zero and exit. */
+ tsk->thread.pfault_wait = 0;
+ } else {
+ /* Initial interrupt arrived before completion
+ * interrupt. Let the task sleep.
+ * An extra task reference is needed since a different
+ * cpu may set the task state to TASK_RUNNING again
+ * before the scheduler is reached. */
+ get_task_struct(tsk);
+ tsk->thread.pfault_wait = 1;
+ list_add(&tsk->thread.list, &pfault_list);
+block:
+ /* Since this must be a userspace fault, there
+ * is no kernel task state to trample. Rely on the
+ * return to userspace schedule() to block. */
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+ set_tsk_need_resched(tsk);
+ set_preempt_need_resched();
+ }
+ }
+out:
+ spin_unlock(&pfault_lock);
+ put_task_struct(tsk);
+}
+
+static int pfault_cpu_dead(unsigned int cpu)
+{
+ struct thread_struct *thread, *next;
+ struct task_struct *tsk;
+
+ spin_lock_irq(&pfault_lock);
+ list_for_each_entry_safe(thread, next, &pfault_list, list) {
+ thread->pfault_wait = 0;
+ list_del(&thread->list);
+ tsk = container_of(thread, struct task_struct, thread);
+ wake_up_process(tsk);
+ put_task_struct(tsk);
+ }
+ spin_unlock_irq(&pfault_lock);
+ return 0;
+}
+
+static int __init pfault_irq_init(void)
+{
+ int rc;
+
+ rc = register_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt);
+ if (rc)
+ goto out_extint;
+ rc = pfault_init() == 0 ? 0 : -EOPNOTSUPP;
+ if (rc)
+ goto out_pfault;
+ irq_subclass_register(IRQ_SUBCLASS_SERVICE_SIGNAL);
+ cpuhp_setup_state_nocalls(CPUHP_S390_PFAULT_DEAD, "s390/pfault:dead",
+ NULL, pfault_cpu_dead);
+ return 0;
+
+out_pfault:
+ unregister_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt);
+out_extint:
+ pfault_disable = 1;
+ return rc;
+}
+early_initcall(pfault_irq_init);
+
+#endif /* CONFIG_PFAULT */
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
new file mode 100644
index 000000000..65ccb9d79
--- /dev/null
+++ b/arch/s390/mm/gmap.c
@@ -0,0 +1,2645 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM guest address space mapping code
+ *
+ * Copyright IBM Corp. 2007, 2016, 2018
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ * David Hildenbrand <david@redhat.com>
+ * Janosch Frank <frankja@linux.vnet.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/swapops.h>
+#include <linux/ksm.h>
+#include <linux/mman.h>
+
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/gmap.h>
+#include <asm/tlb.h>
+
+#define GMAP_SHADOW_FAKE_TABLE 1ULL
+
+/**
+ * gmap_alloc - allocate and initialize a guest address space
+ * @mm: pointer to the parent mm_struct
+ * @limit: maximum address of the gmap address space
+ *
+ * Returns a guest address space structure.
+ */
+static struct gmap *gmap_alloc(unsigned long limit)
+{
+ struct gmap *gmap;
+ struct page *page;
+ unsigned long *table;
+ unsigned long etype, atype;
+
+ if (limit < _REGION3_SIZE) {
+ limit = _REGION3_SIZE - 1;
+ atype = _ASCE_TYPE_SEGMENT;
+ etype = _SEGMENT_ENTRY_EMPTY;
+ } else if (limit < _REGION2_SIZE) {
+ limit = _REGION2_SIZE - 1;
+ atype = _ASCE_TYPE_REGION3;
+ etype = _REGION3_ENTRY_EMPTY;
+ } else if (limit < _REGION1_SIZE) {
+ limit = _REGION1_SIZE - 1;
+ atype = _ASCE_TYPE_REGION2;
+ etype = _REGION2_ENTRY_EMPTY;
+ } else {
+ limit = -1UL;
+ atype = _ASCE_TYPE_REGION1;
+ etype = _REGION1_ENTRY_EMPTY;
+ }
+ gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL);
+ if (!gmap)
+ goto out;
+ INIT_LIST_HEAD(&gmap->crst_list);
+ INIT_LIST_HEAD(&gmap->children);
+ INIT_LIST_HEAD(&gmap->pt_list);
+ INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL);
+ INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC);
+ INIT_RADIX_TREE(&gmap->host_to_rmap, GFP_ATOMIC);
+ spin_lock_init(&gmap->guest_table_lock);
+ spin_lock_init(&gmap->shadow_lock);
+ atomic_set(&gmap->ref_count, 1);
+ page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
+ if (!page)
+ goto out_free;
+ page->index = 0;
+ list_add(&page->lru, &gmap->crst_list);
+ table = (unsigned long *) page_to_phys(page);
+ crst_table_init(table, etype);
+ gmap->table = table;
+ gmap->asce = atype | _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS | __pa(table);
+ gmap->asce_end = limit;
+ return gmap;
+
+out_free:
+ kfree(gmap);
+out:
+ return NULL;
+}
+
+/**
+ * gmap_create - create a guest address space
+ * @mm: pointer to the parent mm_struct
+ * @limit: maximum size of the gmap address space
+ *
+ * Returns a guest address space structure.
+ */
+struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit)
+{
+ struct gmap *gmap;
+ unsigned long gmap_asce;
+
+ gmap = gmap_alloc(limit);
+ if (!gmap)
+ return NULL;
+ gmap->mm = mm;
+ spin_lock(&mm->context.lock);
+ list_add_rcu(&gmap->list, &mm->context.gmap_list);
+ if (list_is_singular(&mm->context.gmap_list))
+ gmap_asce = gmap->asce;
+ else
+ gmap_asce = -1UL;
+ WRITE_ONCE(mm->context.gmap_asce, gmap_asce);
+ spin_unlock(&mm->context.lock);
+ return gmap;
+}
+EXPORT_SYMBOL_GPL(gmap_create);
+
+static void gmap_flush_tlb(struct gmap *gmap)
+{
+ if (MACHINE_HAS_IDTE)
+ __tlb_flush_idte(gmap->asce);
+ else
+ __tlb_flush_global();
+}
+
+static void gmap_radix_tree_free(struct radix_tree_root *root)
+{
+ struct radix_tree_iter iter;
+ unsigned long indices[16];
+ unsigned long index;
+ void __rcu **slot;
+ int i, nr;
+
+ /* A radix tree is freed by deleting all of its entries */
+ index = 0;
+ do {
+ nr = 0;
+ radix_tree_for_each_slot(slot, root, &iter, index) {
+ indices[nr] = iter.index;
+ if (++nr == 16)
+ break;
+ }
+ for (i = 0; i < nr; i++) {
+ index = indices[i];
+ radix_tree_delete(root, index);
+ }
+ } while (nr > 0);
+}
+
+static void gmap_rmap_radix_tree_free(struct radix_tree_root *root)
+{
+ struct gmap_rmap *rmap, *rnext, *head;
+ struct radix_tree_iter iter;
+ unsigned long indices[16];
+ unsigned long index;
+ void __rcu **slot;
+ int i, nr;
+
+ /* A radix tree is freed by deleting all of its entries */
+ index = 0;
+ do {
+ nr = 0;
+ radix_tree_for_each_slot(slot, root, &iter, index) {
+ indices[nr] = iter.index;
+ if (++nr == 16)
+ break;
+ }
+ for (i = 0; i < nr; i++) {
+ index = indices[i];
+ head = radix_tree_delete(root, index);
+ gmap_for_each_rmap_safe(rmap, rnext, head)
+ kfree(rmap);
+ }
+ } while (nr > 0);
+}
+
+/**
+ * gmap_free - free a guest address space
+ * @gmap: pointer to the guest address space structure
+ *
+ * No locks required. There are no references to this gmap anymore.
+ */
+static void gmap_free(struct gmap *gmap)
+{
+ struct page *page, *next;
+
+ /* Flush tlb of all gmaps (if not already done for shadows) */
+ if (!(gmap_is_shadow(gmap) && gmap->removed))
+ gmap_flush_tlb(gmap);
+ /* Free all segment & region tables. */
+ list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
+ __free_pages(page, CRST_ALLOC_ORDER);
+ gmap_radix_tree_free(&gmap->guest_to_host);
+ gmap_radix_tree_free(&gmap->host_to_guest);
+
+ /* Free additional data for a shadow gmap */
+ if (gmap_is_shadow(gmap)) {
+ /* Free all page tables. */
+ list_for_each_entry_safe(page, next, &gmap->pt_list, lru)
+ page_table_free_pgste(page);
+ gmap_rmap_radix_tree_free(&gmap->host_to_rmap);
+ /* Release reference to the parent */
+ gmap_put(gmap->parent);
+ }
+
+ kfree(gmap);
+}
+
+/**
+ * gmap_get - increase reference counter for guest address space
+ * @gmap: pointer to the guest address space structure
+ *
+ * Returns the gmap pointer
+ */
+struct gmap *gmap_get(struct gmap *gmap)
+{
+ atomic_inc(&gmap->ref_count);
+ return gmap;
+}
+EXPORT_SYMBOL_GPL(gmap_get);
+
+/**
+ * gmap_put - decrease reference counter for guest address space
+ * @gmap: pointer to the guest address space structure
+ *
+ * If the reference counter reaches zero the guest address space is freed.
+ */
+void gmap_put(struct gmap *gmap)
+{
+ if (atomic_dec_return(&gmap->ref_count) == 0)
+ gmap_free(gmap);
+}
+EXPORT_SYMBOL_GPL(gmap_put);
+
+/**
+ * gmap_remove - remove a guest address space but do not free it yet
+ * @gmap: pointer to the guest address space structure
+ */
+void gmap_remove(struct gmap *gmap)
+{
+ struct gmap *sg, *next;
+ unsigned long gmap_asce;
+
+ /* Remove all shadow gmaps linked to this gmap */
+ if (!list_empty(&gmap->children)) {
+ spin_lock(&gmap->shadow_lock);
+ list_for_each_entry_safe(sg, next, &gmap->children, list) {
+ list_del(&sg->list);
+ gmap_put(sg);
+ }
+ spin_unlock(&gmap->shadow_lock);
+ }
+ /* Remove gmap from the pre-mm list */
+ spin_lock(&gmap->mm->context.lock);
+ list_del_rcu(&gmap->list);
+ if (list_empty(&gmap->mm->context.gmap_list))
+ gmap_asce = 0;
+ else if (list_is_singular(&gmap->mm->context.gmap_list))
+ gmap_asce = list_first_entry(&gmap->mm->context.gmap_list,
+ struct gmap, list)->asce;
+ else
+ gmap_asce = -1UL;
+ WRITE_ONCE(gmap->mm->context.gmap_asce, gmap_asce);
+ spin_unlock(&gmap->mm->context.lock);
+ synchronize_rcu();
+ /* Put reference */
+ gmap_put(gmap);
+}
+EXPORT_SYMBOL_GPL(gmap_remove);
+
+/**
+ * gmap_enable - switch primary space to the guest address space
+ * @gmap: pointer to the guest address space structure
+ */
+void gmap_enable(struct gmap *gmap)
+{
+ S390_lowcore.gmap = (unsigned long) gmap;
+}
+EXPORT_SYMBOL_GPL(gmap_enable);
+
+/**
+ * gmap_disable - switch back to the standard primary address space
+ * @gmap: pointer to the guest address space structure
+ */
+void gmap_disable(struct gmap *gmap)
+{
+ S390_lowcore.gmap = 0UL;
+}
+EXPORT_SYMBOL_GPL(gmap_disable);
+
+/**
+ * gmap_get_enabled - get a pointer to the currently enabled gmap
+ *
+ * Returns a pointer to the currently enabled gmap. 0 if none is enabled.
+ */
+struct gmap *gmap_get_enabled(void)
+{
+ return (struct gmap *) S390_lowcore.gmap;
+}
+EXPORT_SYMBOL_GPL(gmap_get_enabled);
+
+/*
+ * gmap_alloc_table is assumed to be called with mmap_sem held
+ */
+static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
+ unsigned long init, unsigned long gaddr)
+{
+ struct page *page;
+ unsigned long *new;
+
+ /* since we dont free the gmap table until gmap_free we can unlock */
+ page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
+ if (!page)
+ return -ENOMEM;
+ new = (unsigned long *) page_to_phys(page);
+ crst_table_init(new, init);
+ spin_lock(&gmap->guest_table_lock);
+ if (*table & _REGION_ENTRY_INVALID) {
+ list_add(&page->lru, &gmap->crst_list);
+ *table = (unsigned long) new | _REGION_ENTRY_LENGTH |
+ (*table & _REGION_ENTRY_TYPE_MASK);
+ page->index = gaddr;
+ page = NULL;
+ }
+ spin_unlock(&gmap->guest_table_lock);
+ if (page)
+ __free_pages(page, CRST_ALLOC_ORDER);
+ return 0;
+}
+
+/**
+ * __gmap_segment_gaddr - find virtual address from segment pointer
+ * @entry: pointer to a segment table entry in the guest address space
+ *
+ * Returns the virtual address in the guest address space for the segment
+ */
+static unsigned long __gmap_segment_gaddr(unsigned long *entry)
+{
+ struct page *page;
+ unsigned long offset, mask;
+
+ offset = (unsigned long) entry / sizeof(unsigned long);
+ offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE;
+ mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1);
+ page = virt_to_page((void *)((unsigned long) entry & mask));
+ return page->index + offset;
+}
+
+/**
+ * __gmap_unlink_by_vmaddr - unlink a single segment via a host address
+ * @gmap: pointer to the guest address space structure
+ * @vmaddr: address in the host process address space
+ *
+ * Returns 1 if a TLB flush is required
+ */
+static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr)
+{
+ unsigned long *entry;
+ int flush = 0;
+
+ BUG_ON(gmap_is_shadow(gmap));
+ spin_lock(&gmap->guest_table_lock);
+ entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
+ if (entry) {
+ flush = (*entry != _SEGMENT_ENTRY_EMPTY);
+ *entry = _SEGMENT_ENTRY_EMPTY;
+ }
+ spin_unlock(&gmap->guest_table_lock);
+ return flush;
+}
+
+/**
+ * __gmap_unmap_by_gaddr - unmap a single segment via a guest address
+ * @gmap: pointer to the guest address space structure
+ * @gaddr: address in the guest address space
+ *
+ * Returns 1 if a TLB flush is required
+ */
+static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr)
+{
+ unsigned long vmaddr;
+
+ vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host,
+ gaddr >> PMD_SHIFT);
+ return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0;
+}
+
+/**
+ * gmap_unmap_segment - unmap segment from the guest address space
+ * @gmap: pointer to the guest address space structure
+ * @to: address in the guest address space
+ * @len: length of the memory area to unmap
+ *
+ * Returns 0 if the unmap succeeded, -EINVAL if not.
+ */
+int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
+{
+ unsigned long off;
+ int flush;
+
+ BUG_ON(gmap_is_shadow(gmap));
+ if ((to | len) & (PMD_SIZE - 1))
+ return -EINVAL;
+ if (len == 0 || to + len < to)
+ return -EINVAL;
+
+ flush = 0;
+ down_write(&gmap->mm->mmap_sem);
+ for (off = 0; off < len; off += PMD_SIZE)
+ flush |= __gmap_unmap_by_gaddr(gmap, to + off);
+ up_write(&gmap->mm->mmap_sem);
+ if (flush)
+ gmap_flush_tlb(gmap);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(gmap_unmap_segment);
+
+/**
+ * gmap_map_segment - map a segment to the guest address space
+ * @gmap: pointer to the guest address space structure
+ * @from: source address in the parent address space
+ * @to: target address in the guest address space
+ * @len: length of the memory area to map
+ *
+ * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not.
+ */
+int gmap_map_segment(struct gmap *gmap, unsigned long from,
+ unsigned long to, unsigned long len)
+{
+ unsigned long off;
+ int flush;
+
+ BUG_ON(gmap_is_shadow(gmap));
+ if ((from | to | len) & (PMD_SIZE - 1))
+ return -EINVAL;
+ if (len == 0 || from + len < from || to + len < to ||
+ from + len - 1 > TASK_SIZE_MAX || to + len - 1 > gmap->asce_end)
+ return -EINVAL;
+
+ flush = 0;
+ down_write(&gmap->mm->mmap_sem);
+ for (off = 0; off < len; off += PMD_SIZE) {
+ /* Remove old translation */
+ flush |= __gmap_unmap_by_gaddr(gmap, to + off);
+ /* Store new translation */
+ if (radix_tree_insert(&gmap->guest_to_host,
+ (to + off) >> PMD_SHIFT,
+ (void *) from + off))
+ break;
+ }
+ up_write(&gmap->mm->mmap_sem);
+ if (flush)
+ gmap_flush_tlb(gmap);
+ if (off >= len)
+ return 0;
+ gmap_unmap_segment(gmap, to, len);
+ return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(gmap_map_segment);
+
+/**
+ * __gmap_translate - translate a guest address to a user space address
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
+ *
+ * Returns user space address which corresponds to the guest address or
+ * -EFAULT if no such mapping exists.
+ * This function does not establish potentially missing page table entries.
+ * The mmap_sem of the mm that belongs to the address space must be held
+ * when this function gets called.
+ *
+ * Note: Can also be called for shadow gmaps.
+ */
+unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
+{
+ unsigned long vmaddr;
+
+ vmaddr = (unsigned long)
+ radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT);
+ /* Note: guest_to_host is empty for a shadow gmap */
+ return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT;
+}
+EXPORT_SYMBOL_GPL(__gmap_translate);
+
+/**
+ * gmap_translate - translate a guest address to a user space address
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
+ *
+ * Returns user space address which corresponds to the guest address or
+ * -EFAULT if no such mapping exists.
+ * This function does not establish potentially missing page table entries.
+ */
+unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr)
+{
+ unsigned long rc;
+
+ down_read(&gmap->mm->mmap_sem);
+ rc = __gmap_translate(gmap, gaddr);
+ up_read(&gmap->mm->mmap_sem);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_translate);
+
+/**
+ * gmap_unlink - disconnect a page table from the gmap shadow tables
+ * @gmap: pointer to guest mapping meta data structure
+ * @table: pointer to the host page table
+ * @vmaddr: vm address associated with the host page table
+ */
+void gmap_unlink(struct mm_struct *mm, unsigned long *table,
+ unsigned long vmaddr)
+{
+ struct gmap *gmap;
+ int flush;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
+ flush = __gmap_unlink_by_vmaddr(gmap, vmaddr);
+ if (flush)
+ gmap_flush_tlb(gmap);
+ }
+ rcu_read_unlock();
+}
+
+static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *old, pmd_t new,
+ unsigned long gaddr);
+
+/**
+ * gmap_link - set up shadow page tables to connect a host to a guest address
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
+ * @vmaddr: vm address
+ *
+ * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
+ * if the vm address is already mapped to a different guest segment.
+ * The mmap_sem of the mm that belongs to the address space must be held
+ * when this function gets called.
+ */
+int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
+{
+ struct mm_struct *mm;
+ unsigned long *table;
+ spinlock_t *ptl;
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+ u64 unprot;
+ int rc;
+
+ BUG_ON(gmap_is_shadow(gmap));
+ /* Create higher level tables in the gmap page table */
+ table = gmap->table;
+ if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) {
+ table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT;
+ if ((*table & _REGION_ENTRY_INVALID) &&
+ gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY,
+ gaddr & _REGION1_MASK))
+ return -ENOMEM;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ }
+ if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) {
+ table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT;
+ if ((*table & _REGION_ENTRY_INVALID) &&
+ gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY,
+ gaddr & _REGION2_MASK))
+ return -ENOMEM;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ }
+ if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) {
+ table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT;
+ if ((*table & _REGION_ENTRY_INVALID) &&
+ gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY,
+ gaddr & _REGION3_MASK))
+ return -ENOMEM;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ }
+ table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
+ /* Walk the parent mm page table */
+ mm = gmap->mm;
+ pgd = pgd_offset(mm, vmaddr);
+ VM_BUG_ON(pgd_none(*pgd));
+ p4d = p4d_offset(pgd, vmaddr);
+ VM_BUG_ON(p4d_none(*p4d));
+ pud = pud_offset(p4d, vmaddr);
+ VM_BUG_ON(pud_none(*pud));
+ /* large puds cannot yet be handled */
+ if (pud_large(*pud))
+ return -EFAULT;
+ pmd = pmd_offset(pud, vmaddr);
+ VM_BUG_ON(pmd_none(*pmd));
+ /* Are we allowed to use huge pages? */
+ if (pmd_large(*pmd) && !gmap->mm->context.allow_gmap_hpage_1m)
+ return -EFAULT;
+ /* Link gmap segment table entry location to page table. */
+ rc = radix_tree_preload(GFP_KERNEL);
+ if (rc)
+ return rc;
+ ptl = pmd_lock(mm, pmd);
+ spin_lock(&gmap->guest_table_lock);
+ if (*table == _SEGMENT_ENTRY_EMPTY) {
+ rc = radix_tree_insert(&gmap->host_to_guest,
+ vmaddr >> PMD_SHIFT, table);
+ if (!rc) {
+ if (pmd_large(*pmd)) {
+ *table = (pmd_val(*pmd) &
+ _SEGMENT_ENTRY_HARDWARE_BITS_LARGE)
+ | _SEGMENT_ENTRY_GMAP_UC;
+ } else
+ *table = pmd_val(*pmd) &
+ _SEGMENT_ENTRY_HARDWARE_BITS;
+ }
+ } else if (*table & _SEGMENT_ENTRY_PROTECT &&
+ !(pmd_val(*pmd) & _SEGMENT_ENTRY_PROTECT)) {
+ unprot = (u64)*table;
+ unprot &= ~_SEGMENT_ENTRY_PROTECT;
+ unprot |= _SEGMENT_ENTRY_GMAP_UC;
+ gmap_pmdp_xchg(gmap, (pmd_t *)table, __pmd(unprot), gaddr);
+ }
+ spin_unlock(&gmap->guest_table_lock);
+ spin_unlock(ptl);
+ radix_tree_preload_end();
+ return rc;
+}
+
+/**
+ * gmap_fault - resolve a fault on a guest address
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
+ * @fault_flags: flags to pass down to handle_mm_fault()
+ *
+ * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
+ * if the vm address is already mapped to a different guest segment.
+ */
+int gmap_fault(struct gmap *gmap, unsigned long gaddr,
+ unsigned int fault_flags)
+{
+ unsigned long vmaddr;
+ int rc;
+ bool unlocked;
+
+ down_read(&gmap->mm->mmap_sem);
+
+retry:
+ unlocked = false;
+ vmaddr = __gmap_translate(gmap, gaddr);
+ if (IS_ERR_VALUE(vmaddr)) {
+ rc = vmaddr;
+ goto out_up;
+ }
+ if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags,
+ &unlocked)) {
+ rc = -EFAULT;
+ goto out_up;
+ }
+ /*
+ * In the case that fixup_user_fault unlocked the mmap_sem during
+ * faultin redo __gmap_translate to not race with a map/unmap_segment.
+ */
+ if (unlocked)
+ goto retry;
+
+ rc = __gmap_link(gmap, gaddr, vmaddr);
+out_up:
+ up_read(&gmap->mm->mmap_sem);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_fault);
+
+/*
+ * this function is assumed to be called with mmap_sem held
+ */
+void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
+{
+ unsigned long vmaddr;
+ spinlock_t *ptl;
+ pte_t *ptep;
+
+ /* Find the vm address for the guest address */
+ vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host,
+ gaddr >> PMD_SHIFT);
+ if (vmaddr) {
+ vmaddr |= gaddr & ~PMD_MASK;
+ /* Get pointer to the page table entry */
+ ptep = get_locked_pte(gmap->mm, vmaddr, &ptl);
+ if (likely(ptep)) {
+ ptep_zap_unused(gmap->mm, vmaddr, ptep, 0);
+ pte_unmap_unlock(ptep, ptl);
+ }
+ }
+}
+EXPORT_SYMBOL_GPL(__gmap_zap);
+
+void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
+{
+ unsigned long gaddr, vmaddr, size;
+ struct vm_area_struct *vma;
+
+ down_read(&gmap->mm->mmap_sem);
+ for (gaddr = from; gaddr < to;
+ gaddr = (gaddr + PMD_SIZE) & PMD_MASK) {
+ /* Find the vm address for the guest address */
+ vmaddr = (unsigned long)
+ radix_tree_lookup(&gmap->guest_to_host,
+ gaddr >> PMD_SHIFT);
+ if (!vmaddr)
+ continue;
+ vmaddr |= gaddr & ~PMD_MASK;
+ /* Find vma in the parent mm */
+ vma = find_vma(gmap->mm, vmaddr);
+ if (!vma)
+ continue;
+ /*
+ * We do not discard pages that are backed by
+ * hugetlbfs, so we don't have to refault them.
+ */
+ if (is_vm_hugetlb_page(vma))
+ continue;
+ size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
+ zap_page_range(vma, vmaddr, size);
+ }
+ up_read(&gmap->mm->mmap_sem);
+}
+EXPORT_SYMBOL_GPL(gmap_discard);
+
+static LIST_HEAD(gmap_notifier_list);
+static DEFINE_SPINLOCK(gmap_notifier_lock);
+
+/**
+ * gmap_register_pte_notifier - register a pte invalidation callback
+ * @nb: pointer to the gmap notifier block
+ */
+void gmap_register_pte_notifier(struct gmap_notifier *nb)
+{
+ spin_lock(&gmap_notifier_lock);
+ list_add_rcu(&nb->list, &gmap_notifier_list);
+ spin_unlock(&gmap_notifier_lock);
+}
+EXPORT_SYMBOL_GPL(gmap_register_pte_notifier);
+
+/**
+ * gmap_unregister_pte_notifier - remove a pte invalidation callback
+ * @nb: pointer to the gmap notifier block
+ */
+void gmap_unregister_pte_notifier(struct gmap_notifier *nb)
+{
+ spin_lock(&gmap_notifier_lock);
+ list_del_rcu(&nb->list);
+ spin_unlock(&gmap_notifier_lock);
+ synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(gmap_unregister_pte_notifier);
+
+/**
+ * gmap_call_notifier - call all registered invalidation callbacks
+ * @gmap: pointer to guest mapping meta data structure
+ * @start: start virtual address in the guest address space
+ * @end: end virtual address in the guest address space
+ */
+static void gmap_call_notifier(struct gmap *gmap, unsigned long start,
+ unsigned long end)
+{
+ struct gmap_notifier *nb;
+
+ list_for_each_entry(nb, &gmap_notifier_list, list)
+ nb->notifier_call(gmap, start, end);
+}
+
+/**
+ * gmap_table_walk - walk the gmap page tables
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: virtual address in the guest address space
+ * @level: page table level to stop at
+ *
+ * Returns a table entry pointer for the given guest address and @level
+ * @level=0 : returns a pointer to a page table table entry (or NULL)
+ * @level=1 : returns a pointer to a segment table entry (or NULL)
+ * @level=2 : returns a pointer to a region-3 table entry (or NULL)
+ * @level=3 : returns a pointer to a region-2 table entry (or NULL)
+ * @level=4 : returns a pointer to a region-1 table entry (or NULL)
+ *
+ * Returns NULL if the gmap page tables could not be walked to the
+ * requested level.
+ *
+ * Note: Can also be called for shadow gmaps.
+ */
+static inline unsigned long *gmap_table_walk(struct gmap *gmap,
+ unsigned long gaddr, int level)
+{
+ const int asce_type = gmap->asce & _ASCE_TYPE_MASK;
+ unsigned long *table;
+
+ if ((gmap->asce & _ASCE_TYPE_MASK) + 4 < (level * 4))
+ return NULL;
+ if (gmap_is_shadow(gmap) && gmap->removed)
+ return NULL;
+
+ if (asce_type != _ASCE_TYPE_REGION1 &&
+ gaddr & (-1UL << (31 + (asce_type >> 2) * 11)))
+ return NULL;
+
+ table = gmap->table;
+ switch (gmap->asce & _ASCE_TYPE_MASK) {
+ case _ASCE_TYPE_REGION1:
+ table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT;
+ if (level == 4)
+ break;
+ if (*table & _REGION_ENTRY_INVALID)
+ return NULL;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ /* Fallthrough */
+ case _ASCE_TYPE_REGION2:
+ table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT;
+ if (level == 3)
+ break;
+ if (*table & _REGION_ENTRY_INVALID)
+ return NULL;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ /* Fallthrough */
+ case _ASCE_TYPE_REGION3:
+ table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT;
+ if (level == 2)
+ break;
+ if (*table & _REGION_ENTRY_INVALID)
+ return NULL;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ /* Fallthrough */
+ case _ASCE_TYPE_SEGMENT:
+ table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
+ if (level == 1)
+ break;
+ if (*table & _REGION_ENTRY_INVALID)
+ return NULL;
+ table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
+ table += (gaddr & _PAGE_INDEX) >> _PAGE_SHIFT;
+ }
+ return table;
+}
+
+/**
+ * gmap_pte_op_walk - walk the gmap page table, get the page table lock
+ * and return the pte pointer
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: virtual address in the guest address space
+ * @ptl: pointer to the spinlock pointer
+ *
+ * Returns a pointer to the locked pte for a guest address, or NULL
+ */
+static pte_t *gmap_pte_op_walk(struct gmap *gmap, unsigned long gaddr,
+ spinlock_t **ptl)
+{
+ unsigned long *table;
+
+ BUG_ON(gmap_is_shadow(gmap));
+ /* Walk the gmap page table, lock and get pte pointer */
+ table = gmap_table_walk(gmap, gaddr, 1); /* get segment pointer */
+ if (!table || *table & _SEGMENT_ENTRY_INVALID)
+ return NULL;
+ return pte_alloc_map_lock(gmap->mm, (pmd_t *) table, gaddr, ptl);
+}
+
+/**
+ * gmap_pte_op_fixup - force a page in and connect the gmap page table
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: virtual address in the guest address space
+ * @vmaddr: address in the host process address space
+ * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
+ *
+ * Returns 0 if the caller can retry __gmap_translate (might fail again),
+ * -ENOMEM if out of memory and -EFAULT if anything goes wrong while fixing
+ * up or connecting the gmap page table.
+ */
+static int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr,
+ unsigned long vmaddr, int prot)
+{
+ struct mm_struct *mm = gmap->mm;
+ unsigned int fault_flags;
+ bool unlocked = false;
+
+ BUG_ON(gmap_is_shadow(gmap));
+ fault_flags = (prot == PROT_WRITE) ? FAULT_FLAG_WRITE : 0;
+ if (fixup_user_fault(current, mm, vmaddr, fault_flags, &unlocked))
+ return -EFAULT;
+ if (unlocked)
+ /* lost mmap_sem, caller has to retry __gmap_translate */
+ return 0;
+ /* Connect the page tables */
+ return __gmap_link(gmap, gaddr, vmaddr);
+}
+
+/**
+ * gmap_pte_op_end - release the page table lock
+ * @ptl: pointer to the spinlock pointer
+ */
+static void gmap_pte_op_end(spinlock_t *ptl)
+{
+ if (ptl)
+ spin_unlock(ptl);
+}
+
+/**
+ * gmap_pmd_op_walk - walk the gmap tables, get the guest table lock
+ * and return the pmd pointer
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: virtual address in the guest address space
+ *
+ * Returns a pointer to the pmd for a guest address, or NULL
+ */
+static inline pmd_t *gmap_pmd_op_walk(struct gmap *gmap, unsigned long gaddr)
+{
+ pmd_t *pmdp;
+
+ BUG_ON(gmap_is_shadow(gmap));
+ spin_lock(&gmap->guest_table_lock);
+ pmdp = (pmd_t *) gmap_table_walk(gmap, gaddr, 1);
+
+ if (!pmdp || pmd_none(*pmdp)) {
+ spin_unlock(&gmap->guest_table_lock);
+ return NULL;
+ }
+
+ /* 4k page table entries are locked via the pte (pte_alloc_map_lock). */
+ if (!pmd_large(*pmdp))
+ spin_unlock(&gmap->guest_table_lock);
+ return pmdp;
+}
+
+/**
+ * gmap_pmd_op_end - release the guest_table_lock if needed
+ * @gmap: pointer to the guest mapping meta data structure
+ * @pmdp: pointer to the pmd
+ */
+static inline void gmap_pmd_op_end(struct gmap *gmap, pmd_t *pmdp)
+{
+ if (pmd_large(*pmdp))
+ spin_unlock(&gmap->guest_table_lock);
+}
+
+/*
+ * gmap_protect_pmd - remove access rights to memory and set pmd notification bits
+ * @pmdp: pointer to the pmd to be protected
+ * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
+ * @bits: notification bits to set
+ *
+ * Returns:
+ * 0 if successfully protected
+ * -EAGAIN if a fixup is needed
+ * -EINVAL if unsupported notifier bits have been specified
+ *
+ * Expected to be called with sg->mm->mmap_sem in read and
+ * guest_table_lock held.
+ */
+static int gmap_protect_pmd(struct gmap *gmap, unsigned long gaddr,
+ pmd_t *pmdp, int prot, unsigned long bits)
+{
+ int pmd_i = pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID;
+ int pmd_p = pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT;
+ pmd_t new = *pmdp;
+
+ /* Fixup needed */
+ if ((pmd_i && (prot != PROT_NONE)) || (pmd_p && (prot == PROT_WRITE)))
+ return -EAGAIN;
+
+ if (prot == PROT_NONE && !pmd_i) {
+ pmd_val(new) |= _SEGMENT_ENTRY_INVALID;
+ gmap_pmdp_xchg(gmap, pmdp, new, gaddr);
+ }
+
+ if (prot == PROT_READ && !pmd_p) {
+ pmd_val(new) &= ~_SEGMENT_ENTRY_INVALID;
+ pmd_val(new) |= _SEGMENT_ENTRY_PROTECT;
+ gmap_pmdp_xchg(gmap, pmdp, new, gaddr);
+ }
+
+ if (bits & GMAP_NOTIFY_MPROT)
+ pmd_val(*pmdp) |= _SEGMENT_ENTRY_GMAP_IN;
+
+ /* Shadow GMAP protection needs split PMDs */
+ if (bits & GMAP_NOTIFY_SHADOW)
+ return -EINVAL;
+
+ return 0;
+}
+
+/*
+ * gmap_protect_pte - remove access rights to memory and set pgste bits
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: virtual address in the guest address space
+ * @pmdp: pointer to the pmd associated with the pte
+ * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
+ * @bits: notification bits to set
+ *
+ * Returns 0 if successfully protected, -ENOMEM if out of memory and
+ * -EAGAIN if a fixup is needed.
+ *
+ * Expected to be called with sg->mm->mmap_sem in read
+ */
+static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr,
+ pmd_t *pmdp, int prot, unsigned long bits)
+{
+ int rc;
+ pte_t *ptep;
+ spinlock_t *ptl = NULL;
+ unsigned long pbits = 0;
+
+ if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
+ return -EAGAIN;
+
+ ptep = pte_alloc_map_lock(gmap->mm, pmdp, gaddr, &ptl);
+ if (!ptep)
+ return -ENOMEM;
+
+ pbits |= (bits & GMAP_NOTIFY_MPROT) ? PGSTE_IN_BIT : 0;
+ pbits |= (bits & GMAP_NOTIFY_SHADOW) ? PGSTE_VSIE_BIT : 0;
+ /* Protect and unlock. */
+ rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot, pbits);
+ gmap_pte_op_end(ptl);
+ return rc;
+}
+
+/*
+ * gmap_protect_range - remove access rights to memory and set pgste bits
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: virtual address in the guest address space
+ * @len: size of area
+ * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
+ * @bits: pgste notification bits to set
+ *
+ * Returns 0 if successfully protected, -ENOMEM if out of memory and
+ * -EFAULT if gaddr is invalid (or mapping for shadows is missing).
+ *
+ * Called with sg->mm->mmap_sem in read.
+ */
+static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr,
+ unsigned long len, int prot, unsigned long bits)
+{
+ unsigned long vmaddr, dist;
+ pmd_t *pmdp;
+ int rc;
+
+ BUG_ON(gmap_is_shadow(gmap));
+ while (len) {
+ rc = -EAGAIN;
+ pmdp = gmap_pmd_op_walk(gmap, gaddr);
+ if (pmdp) {
+ if (!pmd_large(*pmdp)) {
+ rc = gmap_protect_pte(gmap, gaddr, pmdp, prot,
+ bits);
+ if (!rc) {
+ len -= PAGE_SIZE;
+ gaddr += PAGE_SIZE;
+ }
+ } else {
+ rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot,
+ bits);
+ if (!rc) {
+ dist = HPAGE_SIZE - (gaddr & ~HPAGE_MASK);
+ len = len < dist ? 0 : len - dist;
+ gaddr = (gaddr & HPAGE_MASK) + HPAGE_SIZE;
+ }
+ }
+ gmap_pmd_op_end(gmap, pmdp);
+ }
+ if (rc) {
+ if (rc == -EINVAL)
+ return rc;
+
+ /* -EAGAIN, fixup of userspace mm and gmap */
+ vmaddr = __gmap_translate(gmap, gaddr);
+ if (IS_ERR_VALUE(vmaddr))
+ return vmaddr;
+ rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, prot);
+ if (rc)
+ return rc;
+ }
+ }
+ return 0;
+}
+
+/**
+ * gmap_mprotect_notify - change access rights for a range of ptes and
+ * call the notifier if any pte changes again
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: virtual address in the guest address space
+ * @len: size of area
+ * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
+ *
+ * Returns 0 if for each page in the given range a gmap mapping exists,
+ * the new access rights could be set and the notifier could be armed.
+ * If the gmap mapping is missing for one or more pages -EFAULT is
+ * returned. If no memory could be allocated -ENOMEM is returned.
+ * This function establishes missing page table entries.
+ */
+int gmap_mprotect_notify(struct gmap *gmap, unsigned long gaddr,
+ unsigned long len, int prot)
+{
+ int rc;
+
+ if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK) || gmap_is_shadow(gmap))
+ return -EINVAL;
+ if (!MACHINE_HAS_ESOP && prot == PROT_READ)
+ return -EINVAL;
+ down_read(&gmap->mm->mmap_sem);
+ rc = gmap_protect_range(gmap, gaddr, len, prot, GMAP_NOTIFY_MPROT);
+ up_read(&gmap->mm->mmap_sem);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_mprotect_notify);
+
+/**
+ * gmap_read_table - get an unsigned long value from a guest page table using
+ * absolute addressing, without marking the page referenced.
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: virtual address in the guest address space
+ * @val: pointer to the unsigned long value to return
+ *
+ * Returns 0 if the value was read, -ENOMEM if out of memory and -EFAULT
+ * if reading using the virtual address failed. -EINVAL if called on a gmap
+ * shadow.
+ *
+ * Called with gmap->mm->mmap_sem in read.
+ */
+int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val)
+{
+ unsigned long address, vmaddr;
+ spinlock_t *ptl;
+ pte_t *ptep, pte;
+ int rc;
+
+ if (gmap_is_shadow(gmap))
+ return -EINVAL;
+
+ while (1) {
+ rc = -EAGAIN;
+ ptep = gmap_pte_op_walk(gmap, gaddr, &ptl);
+ if (ptep) {
+ pte = *ptep;
+ if (pte_present(pte) && (pte_val(pte) & _PAGE_READ)) {
+ address = pte_val(pte) & PAGE_MASK;
+ address += gaddr & ~PAGE_MASK;
+ *val = *(unsigned long *) address;
+ pte_val(*ptep) |= _PAGE_YOUNG;
+ /* Do *NOT* clear the _PAGE_INVALID bit! */
+ rc = 0;
+ }
+ gmap_pte_op_end(ptl);
+ }
+ if (!rc)
+ break;
+ vmaddr = __gmap_translate(gmap, gaddr);
+ if (IS_ERR_VALUE(vmaddr)) {
+ rc = vmaddr;
+ break;
+ }
+ rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, PROT_READ);
+ if (rc)
+ break;
+ }
+ return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_read_table);
+
+/**
+ * gmap_insert_rmap - add a rmap to the host_to_rmap radix tree
+ * @sg: pointer to the shadow guest address space structure
+ * @vmaddr: vm address associated with the rmap
+ * @rmap: pointer to the rmap structure
+ *
+ * Called with the sg->guest_table_lock
+ */
+static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr,
+ struct gmap_rmap *rmap)
+{
+ void __rcu **slot;
+
+ BUG_ON(!gmap_is_shadow(sg));
+ slot = radix_tree_lookup_slot(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT);
+ if (slot) {
+ rmap->next = radix_tree_deref_slot_protected(slot,
+ &sg->guest_table_lock);
+ radix_tree_replace_slot(&sg->host_to_rmap, slot, rmap);
+ } else {
+ rmap->next = NULL;
+ radix_tree_insert(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT,
+ rmap);
+ }
+}
+
+/**
+ * gmap_protect_rmap - restrict access rights to memory (RO) and create an rmap
+ * @sg: pointer to the shadow guest address space structure
+ * @raddr: rmap address in the shadow gmap
+ * @paddr: address in the parent guest address space
+ * @len: length of the memory area to protect
+ *
+ * Returns 0 if successfully protected and the rmap was created, -ENOMEM
+ * if out of memory and -EFAULT if paddr is invalid.
+ */
+static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr,
+ unsigned long paddr, unsigned long len)
+{
+ struct gmap *parent;
+ struct gmap_rmap *rmap;
+ unsigned long vmaddr;
+ spinlock_t *ptl;
+ pte_t *ptep;
+ int rc;
+
+ BUG_ON(!gmap_is_shadow(sg));
+ parent = sg->parent;
+ while (len) {
+ vmaddr = __gmap_translate(parent, paddr);
+ if (IS_ERR_VALUE(vmaddr))
+ return vmaddr;
+ rmap = kzalloc(sizeof(*rmap), GFP_KERNEL);
+ if (!rmap)
+ return -ENOMEM;
+ rmap->raddr = raddr;
+ rc = radix_tree_preload(GFP_KERNEL);
+ if (rc) {
+ kfree(rmap);
+ return rc;
+ }
+ rc = -EAGAIN;
+ ptep = gmap_pte_op_walk(parent, paddr, &ptl);
+ if (ptep) {
+ spin_lock(&sg->guest_table_lock);
+ rc = ptep_force_prot(parent->mm, paddr, ptep, PROT_READ,
+ PGSTE_VSIE_BIT);
+ if (!rc)
+ gmap_insert_rmap(sg, vmaddr, rmap);
+ spin_unlock(&sg->guest_table_lock);
+ gmap_pte_op_end(ptl);
+ }
+ radix_tree_preload_end();
+ if (rc) {
+ kfree(rmap);
+ rc = gmap_pte_op_fixup(parent, paddr, vmaddr, PROT_READ);
+ if (rc)
+ return rc;
+ continue;
+ }
+ paddr += PAGE_SIZE;
+ len -= PAGE_SIZE;
+ }
+ return 0;
+}
+
+#define _SHADOW_RMAP_MASK 0x7
+#define _SHADOW_RMAP_REGION1 0x5
+#define _SHADOW_RMAP_REGION2 0x4
+#define _SHADOW_RMAP_REGION3 0x3
+#define _SHADOW_RMAP_SEGMENT 0x2
+#define _SHADOW_RMAP_PGTABLE 0x1
+
+/**
+ * gmap_idte_one - invalidate a single region or segment table entry
+ * @asce: region or segment table *origin* + table-type bits
+ * @vaddr: virtual address to identify the table entry to flush
+ *
+ * The invalid bit of a single region or segment table entry is set
+ * and the associated TLB entries depending on the entry are flushed.
+ * The table-type of the @asce identifies the portion of the @vaddr
+ * that is used as the invalidation index.
+ */
+static inline void gmap_idte_one(unsigned long asce, unsigned long vaddr)
+{
+ asm volatile(
+ " .insn rrf,0xb98e0000,%0,%1,0,0"
+ : : "a" (asce), "a" (vaddr) : "cc", "memory");
+}
+
+/**
+ * gmap_unshadow_page - remove a page from a shadow page table
+ * @sg: pointer to the shadow guest address space structure
+ * @raddr: rmap address in the shadow guest address space
+ *
+ * Called with the sg->guest_table_lock
+ */
+static void gmap_unshadow_page(struct gmap *sg, unsigned long raddr)
+{
+ unsigned long *table;
+
+ BUG_ON(!gmap_is_shadow(sg));
+ table = gmap_table_walk(sg, raddr, 0); /* get page table pointer */
+ if (!table || *table & _PAGE_INVALID)
+ return;
+ gmap_call_notifier(sg, raddr, raddr + _PAGE_SIZE - 1);
+ ptep_unshadow_pte(sg->mm, raddr, (pte_t *) table);
+}
+
+/**
+ * __gmap_unshadow_pgt - remove all entries from a shadow page table
+ * @sg: pointer to the shadow guest address space structure
+ * @raddr: rmap address in the shadow guest address space
+ * @pgt: pointer to the start of a shadow page table
+ *
+ * Called with the sg->guest_table_lock
+ */
+static void __gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr,
+ unsigned long *pgt)
+{
+ int i;
+
+ BUG_ON(!gmap_is_shadow(sg));
+ for (i = 0; i < _PAGE_ENTRIES; i++, raddr += _PAGE_SIZE)
+ pgt[i] = _PAGE_INVALID;
+}
+
+/**
+ * gmap_unshadow_pgt - remove a shadow page table from a segment entry
+ * @sg: pointer to the shadow guest address space structure
+ * @raddr: address in the shadow guest address space
+ *
+ * Called with the sg->guest_table_lock
+ */
+static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr)
+{
+ unsigned long sto, *ste, *pgt;
+ struct page *page;
+
+ BUG_ON(!gmap_is_shadow(sg));
+ ste = gmap_table_walk(sg, raddr, 1); /* get segment pointer */
+ if (!ste || !(*ste & _SEGMENT_ENTRY_ORIGIN))
+ return;
+ gmap_call_notifier(sg, raddr, raddr + _SEGMENT_SIZE - 1);
+ sto = (unsigned long) (ste - ((raddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT));
+ gmap_idte_one(sto | _ASCE_TYPE_SEGMENT, raddr);
+ pgt = (unsigned long *)(*ste & _SEGMENT_ENTRY_ORIGIN);
+ *ste = _SEGMENT_ENTRY_EMPTY;
+ __gmap_unshadow_pgt(sg, raddr, pgt);
+ /* Free page table */
+ page = pfn_to_page(__pa(pgt) >> PAGE_SHIFT);
+ list_del(&page->lru);
+ page_table_free_pgste(page);
+}
+
+/**
+ * __gmap_unshadow_sgt - remove all entries from a shadow segment table
+ * @sg: pointer to the shadow guest address space structure
+ * @raddr: rmap address in the shadow guest address space
+ * @sgt: pointer to the start of a shadow segment table
+ *
+ * Called with the sg->guest_table_lock
+ */
+static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr,
+ unsigned long *sgt)
+{
+ unsigned long *pgt;
+ struct page *page;
+ int i;
+
+ BUG_ON(!gmap_is_shadow(sg));
+ for (i = 0; i < _CRST_ENTRIES; i++, raddr += _SEGMENT_SIZE) {
+ if (!(sgt[i] & _SEGMENT_ENTRY_ORIGIN))
+ continue;
+ pgt = (unsigned long *)(sgt[i] & _REGION_ENTRY_ORIGIN);
+ sgt[i] = _SEGMENT_ENTRY_EMPTY;
+ __gmap_unshadow_pgt(sg, raddr, pgt);
+ /* Free page table */
+ page = pfn_to_page(__pa(pgt) >> PAGE_SHIFT);
+ list_del(&page->lru);
+ page_table_free_pgste(page);
+ }
+}
+
+/**
+ * gmap_unshadow_sgt - remove a shadow segment table from a region-3 entry
+ * @sg: pointer to the shadow guest address space structure
+ * @raddr: rmap address in the shadow guest address space
+ *
+ * Called with the shadow->guest_table_lock
+ */
+static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr)
+{
+ unsigned long r3o, *r3e, *sgt;
+ struct page *page;
+
+ BUG_ON(!gmap_is_shadow(sg));
+ r3e = gmap_table_walk(sg, raddr, 2); /* get region-3 pointer */
+ if (!r3e || !(*r3e & _REGION_ENTRY_ORIGIN))
+ return;
+ gmap_call_notifier(sg, raddr, raddr + _REGION3_SIZE - 1);
+ r3o = (unsigned long) (r3e - ((raddr & _REGION3_INDEX) >> _REGION3_SHIFT));
+ gmap_idte_one(r3o | _ASCE_TYPE_REGION3, raddr);
+ sgt = (unsigned long *)(*r3e & _REGION_ENTRY_ORIGIN);
+ *r3e = _REGION3_ENTRY_EMPTY;
+ __gmap_unshadow_sgt(sg, raddr, sgt);
+ /* Free segment table */
+ page = pfn_to_page(__pa(sgt) >> PAGE_SHIFT);
+ list_del(&page->lru);
+ __free_pages(page, CRST_ALLOC_ORDER);
+}
+
+/**
+ * __gmap_unshadow_r3t - remove all entries from a shadow region-3 table
+ * @sg: pointer to the shadow guest address space structure
+ * @raddr: address in the shadow guest address space
+ * @r3t: pointer to the start of a shadow region-3 table
+ *
+ * Called with the sg->guest_table_lock
+ */
+static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr,
+ unsigned long *r3t)
+{
+ unsigned long *sgt;
+ struct page *page;
+ int i;
+
+ BUG_ON(!gmap_is_shadow(sg));
+ for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION3_SIZE) {
+ if (!(r3t[i] & _REGION_ENTRY_ORIGIN))
+ continue;
+ sgt = (unsigned long *)(r3t[i] & _REGION_ENTRY_ORIGIN);
+ r3t[i] = _REGION3_ENTRY_EMPTY;
+ __gmap_unshadow_sgt(sg, raddr, sgt);
+ /* Free segment table */
+ page = pfn_to_page(__pa(sgt) >> PAGE_SHIFT);
+ list_del(&page->lru);
+ __free_pages(page, CRST_ALLOC_ORDER);
+ }
+}
+
+/**
+ * gmap_unshadow_r3t - remove a shadow region-3 table from a region-2 entry
+ * @sg: pointer to the shadow guest address space structure
+ * @raddr: rmap address in the shadow guest address space
+ *
+ * Called with the sg->guest_table_lock
+ */
+static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr)
+{
+ unsigned long r2o, *r2e, *r3t;
+ struct page *page;
+
+ BUG_ON(!gmap_is_shadow(sg));
+ r2e = gmap_table_walk(sg, raddr, 3); /* get region-2 pointer */
+ if (!r2e || !(*r2e & _REGION_ENTRY_ORIGIN))
+ return;
+ gmap_call_notifier(sg, raddr, raddr + _REGION2_SIZE - 1);
+ r2o = (unsigned long) (r2e - ((raddr & _REGION2_INDEX) >> _REGION2_SHIFT));
+ gmap_idte_one(r2o | _ASCE_TYPE_REGION2, raddr);
+ r3t = (unsigned long *)(*r2e & _REGION_ENTRY_ORIGIN);
+ *r2e = _REGION2_ENTRY_EMPTY;
+ __gmap_unshadow_r3t(sg, raddr, r3t);
+ /* Free region 3 table */
+ page = pfn_to_page(__pa(r3t) >> PAGE_SHIFT);
+ list_del(&page->lru);
+ __free_pages(page, CRST_ALLOC_ORDER);
+}
+
+/**
+ * __gmap_unshadow_r2t - remove all entries from a shadow region-2 table
+ * @sg: pointer to the shadow guest address space structure
+ * @raddr: rmap address in the shadow guest address space
+ * @r2t: pointer to the start of a shadow region-2 table
+ *
+ * Called with the sg->guest_table_lock
+ */
+static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr,
+ unsigned long *r2t)
+{
+ unsigned long *r3t;
+ struct page *page;
+ int i;
+
+ BUG_ON(!gmap_is_shadow(sg));
+ for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION2_SIZE) {
+ if (!(r2t[i] & _REGION_ENTRY_ORIGIN))
+ continue;
+ r3t = (unsigned long *)(r2t[i] & _REGION_ENTRY_ORIGIN);
+ r2t[i] = _REGION2_ENTRY_EMPTY;
+ __gmap_unshadow_r3t(sg, raddr, r3t);
+ /* Free region 3 table */
+ page = pfn_to_page(__pa(r3t) >> PAGE_SHIFT);
+ list_del(&page->lru);
+ __free_pages(page, CRST_ALLOC_ORDER);
+ }
+}
+
+/**
+ * gmap_unshadow_r2t - remove a shadow region-2 table from a region-1 entry
+ * @sg: pointer to the shadow guest address space structure
+ * @raddr: rmap address in the shadow guest address space
+ *
+ * Called with the sg->guest_table_lock
+ */
+static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr)
+{
+ unsigned long r1o, *r1e, *r2t;
+ struct page *page;
+
+ BUG_ON(!gmap_is_shadow(sg));
+ r1e = gmap_table_walk(sg, raddr, 4); /* get region-1 pointer */
+ if (!r1e || !(*r1e & _REGION_ENTRY_ORIGIN))
+ return;
+ gmap_call_notifier(sg, raddr, raddr + _REGION1_SIZE - 1);
+ r1o = (unsigned long) (r1e - ((raddr & _REGION1_INDEX) >> _REGION1_SHIFT));
+ gmap_idte_one(r1o | _ASCE_TYPE_REGION1, raddr);
+ r2t = (unsigned long *)(*r1e & _REGION_ENTRY_ORIGIN);
+ *r1e = _REGION1_ENTRY_EMPTY;
+ __gmap_unshadow_r2t(sg, raddr, r2t);
+ /* Free region 2 table */
+ page = pfn_to_page(__pa(r2t) >> PAGE_SHIFT);
+ list_del(&page->lru);
+ __free_pages(page, CRST_ALLOC_ORDER);
+}
+
+/**
+ * __gmap_unshadow_r1t - remove all entries from a shadow region-1 table
+ * @sg: pointer to the shadow guest address space structure
+ * @raddr: rmap address in the shadow guest address space
+ * @r1t: pointer to the start of a shadow region-1 table
+ *
+ * Called with the shadow->guest_table_lock
+ */
+static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr,
+ unsigned long *r1t)
+{
+ unsigned long asce, *r2t;
+ struct page *page;
+ int i;
+
+ BUG_ON(!gmap_is_shadow(sg));
+ asce = (unsigned long) r1t | _ASCE_TYPE_REGION1;
+ for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION1_SIZE) {
+ if (!(r1t[i] & _REGION_ENTRY_ORIGIN))
+ continue;
+ r2t = (unsigned long *)(r1t[i] & _REGION_ENTRY_ORIGIN);
+ __gmap_unshadow_r2t(sg, raddr, r2t);
+ /* Clear entry and flush translation r1t -> r2t */
+ gmap_idte_one(asce, raddr);
+ r1t[i] = _REGION1_ENTRY_EMPTY;
+ /* Free region 2 table */
+ page = pfn_to_page(__pa(r2t) >> PAGE_SHIFT);
+ list_del(&page->lru);
+ __free_pages(page, CRST_ALLOC_ORDER);
+ }
+}
+
+/**
+ * gmap_unshadow - remove a shadow page table completely
+ * @sg: pointer to the shadow guest address space structure
+ *
+ * Called with sg->guest_table_lock
+ */
+static void gmap_unshadow(struct gmap *sg)
+{
+ unsigned long *table;
+
+ BUG_ON(!gmap_is_shadow(sg));
+ if (sg->removed)
+ return;
+ sg->removed = 1;
+ gmap_call_notifier(sg, 0, -1UL);
+ gmap_flush_tlb(sg);
+ table = (unsigned long *)(sg->asce & _ASCE_ORIGIN);
+ switch (sg->asce & _ASCE_TYPE_MASK) {
+ case _ASCE_TYPE_REGION1:
+ __gmap_unshadow_r1t(sg, 0, table);
+ break;
+ case _ASCE_TYPE_REGION2:
+ __gmap_unshadow_r2t(sg, 0, table);
+ break;
+ case _ASCE_TYPE_REGION3:
+ __gmap_unshadow_r3t(sg, 0, table);
+ break;
+ case _ASCE_TYPE_SEGMENT:
+ __gmap_unshadow_sgt(sg, 0, table);
+ break;
+ }
+}
+
+/**
+ * gmap_find_shadow - find a specific asce in the list of shadow tables
+ * @parent: pointer to the parent gmap
+ * @asce: ASCE for which the shadow table is created
+ * @edat_level: edat level to be used for the shadow translation
+ *
+ * Returns the pointer to a gmap if a shadow table with the given asce is
+ * already available, ERR_PTR(-EAGAIN) if another one is just being created,
+ * otherwise NULL
+ */
+static struct gmap *gmap_find_shadow(struct gmap *parent, unsigned long asce,
+ int edat_level)
+{
+ struct gmap *sg;
+
+ list_for_each_entry(sg, &parent->children, list) {
+ if (sg->orig_asce != asce || sg->edat_level != edat_level ||
+ sg->removed)
+ continue;
+ if (!sg->initialized)
+ return ERR_PTR(-EAGAIN);
+ atomic_inc(&sg->ref_count);
+ return sg;
+ }
+ return NULL;
+}
+
+/**
+ * gmap_shadow_valid - check if a shadow guest address space matches the
+ * given properties and is still valid
+ * @sg: pointer to the shadow guest address space structure
+ * @asce: ASCE for which the shadow table is requested
+ * @edat_level: edat level to be used for the shadow translation
+ *
+ * Returns 1 if the gmap shadow is still valid and matches the given
+ * properties, the caller can continue using it. Returns 0 otherwise, the
+ * caller has to request a new shadow gmap in this case.
+ *
+ */
+int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level)
+{
+ if (sg->removed)
+ return 0;
+ return sg->orig_asce == asce && sg->edat_level == edat_level;
+}
+EXPORT_SYMBOL_GPL(gmap_shadow_valid);
+
+/**
+ * gmap_shadow - create/find a shadow guest address space
+ * @parent: pointer to the parent gmap
+ * @asce: ASCE for which the shadow table is created
+ * @edat_level: edat level to be used for the shadow translation
+ *
+ * The pages of the top level page table referred by the asce parameter
+ * will be set to read-only and marked in the PGSTEs of the kvm process.
+ * The shadow table will be removed automatically on any change to the
+ * PTE mapping for the source table.
+ *
+ * Returns a guest address space structure, ERR_PTR(-ENOMEM) if out of memory,
+ * ERR_PTR(-EAGAIN) if the caller has to retry and ERR_PTR(-EFAULT) if the
+ * parent gmap table could not be protected.
+ */
+struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
+ int edat_level)
+{
+ struct gmap *sg, *new;
+ unsigned long limit;
+ int rc;
+
+ BUG_ON(parent->mm->context.allow_gmap_hpage_1m);
+ BUG_ON(gmap_is_shadow(parent));
+ spin_lock(&parent->shadow_lock);
+ sg = gmap_find_shadow(parent, asce, edat_level);
+ spin_unlock(&parent->shadow_lock);
+ if (sg)
+ return sg;
+ /* Create a new shadow gmap */
+ limit = -1UL >> (33 - (((asce & _ASCE_TYPE_MASK) >> 2) * 11));
+ if (asce & _ASCE_REAL_SPACE)
+ limit = -1UL;
+ new = gmap_alloc(limit);
+ if (!new)
+ return ERR_PTR(-ENOMEM);
+ new->mm = parent->mm;
+ new->parent = gmap_get(parent);
+ new->orig_asce = asce;
+ new->edat_level = edat_level;
+ new->initialized = false;
+ spin_lock(&parent->shadow_lock);
+ /* Recheck if another CPU created the same shadow */
+ sg = gmap_find_shadow(parent, asce, edat_level);
+ if (sg) {
+ spin_unlock(&parent->shadow_lock);
+ gmap_free(new);
+ return sg;
+ }
+ if (asce & _ASCE_REAL_SPACE) {
+ /* only allow one real-space gmap shadow */
+ list_for_each_entry(sg, &parent->children, list) {
+ if (sg->orig_asce & _ASCE_REAL_SPACE) {
+ spin_lock(&sg->guest_table_lock);
+ gmap_unshadow(sg);
+ spin_unlock(&sg->guest_table_lock);
+ list_del(&sg->list);
+ gmap_put(sg);
+ break;
+ }
+ }
+ }
+ atomic_set(&new->ref_count, 2);
+ list_add(&new->list, &parent->children);
+ if (asce & _ASCE_REAL_SPACE) {
+ /* nothing to protect, return right away */
+ new->initialized = true;
+ spin_unlock(&parent->shadow_lock);
+ return new;
+ }
+ spin_unlock(&parent->shadow_lock);
+ /* protect after insertion, so it will get properly invalidated */
+ down_read(&parent->mm->mmap_sem);
+ rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN,
+ ((asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE,
+ PROT_READ, GMAP_NOTIFY_SHADOW);
+ up_read(&parent->mm->mmap_sem);
+ spin_lock(&parent->shadow_lock);
+ new->initialized = true;
+ if (rc) {
+ list_del(&new->list);
+ gmap_free(new);
+ new = ERR_PTR(rc);
+ }
+ spin_unlock(&parent->shadow_lock);
+ return new;
+}
+EXPORT_SYMBOL_GPL(gmap_shadow);
+
+/**
+ * gmap_shadow_r2t - create an empty shadow region 2 table
+ * @sg: pointer to the shadow guest address space structure
+ * @saddr: faulting address in the shadow gmap
+ * @r2t: parent gmap address of the region 2 table to get shadowed
+ * @fake: r2t references contiguous guest memory block, not a r2t
+ *
+ * The r2t parameter specifies the address of the source table. The
+ * four pages of the source table are made read-only in the parent gmap
+ * address space. A write to the source table area @r2t will automatically
+ * remove the shadow r2 table and all of its decendents.
+ *
+ * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the
+ * shadow table structure is incomplete, -ENOMEM if out of memory and
+ * -EFAULT if an address in the parent gmap could not be resolved.
+ *
+ * Called with sg->mm->mmap_sem in read.
+ */
+int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
+ int fake)
+{
+ unsigned long raddr, origin, offset, len;
+ unsigned long *s_r2t, *table;
+ struct page *page;
+ int rc;
+
+ BUG_ON(!gmap_is_shadow(sg));
+ /* Allocate a shadow region second table */
+ page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
+ if (!page)
+ return -ENOMEM;
+ page->index = r2t & _REGION_ENTRY_ORIGIN;
+ if (fake)
+ page->index |= GMAP_SHADOW_FAKE_TABLE;
+ s_r2t = (unsigned long *) page_to_phys(page);
+ /* Install shadow region second table */
+ spin_lock(&sg->guest_table_lock);
+ table = gmap_table_walk(sg, saddr, 4); /* get region-1 pointer */
+ if (!table) {
+ rc = -EAGAIN; /* Race with unshadow */
+ goto out_free;
+ }
+ if (!(*table & _REGION_ENTRY_INVALID)) {
+ rc = 0; /* Already established */
+ goto out_free;
+ } else if (*table & _REGION_ENTRY_ORIGIN) {
+ rc = -EAGAIN; /* Race with shadow */
+ goto out_free;
+ }
+ crst_table_init(s_r2t, _REGION2_ENTRY_EMPTY);
+ /* mark as invalid as long as the parent table is not protected */
+ *table = (unsigned long) s_r2t | _REGION_ENTRY_LENGTH |
+ _REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID;
+ if (sg->edat_level >= 1)
+ *table |= (r2t & _REGION_ENTRY_PROTECT);
+ list_add(&page->lru, &sg->crst_list);
+ if (fake) {
+ /* nothing to protect for fake tables */
+ *table &= ~_REGION_ENTRY_INVALID;
+ spin_unlock(&sg->guest_table_lock);
+ return 0;
+ }
+ spin_unlock(&sg->guest_table_lock);
+ /* Make r2t read-only in parent gmap page table */
+ raddr = (saddr & _REGION1_MASK) | _SHADOW_RMAP_REGION1;
+ origin = r2t & _REGION_ENTRY_ORIGIN;
+ offset = ((r2t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
+ len = ((r2t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
+ rc = gmap_protect_rmap(sg, raddr, origin + offset, len);
+ spin_lock(&sg->guest_table_lock);
+ if (!rc) {
+ table = gmap_table_walk(sg, saddr, 4);
+ if (!table || (*table & _REGION_ENTRY_ORIGIN) !=
+ (unsigned long) s_r2t)
+ rc = -EAGAIN; /* Race with unshadow */
+ else
+ *table &= ~_REGION_ENTRY_INVALID;
+ } else {
+ gmap_unshadow_r2t(sg, raddr);
+ }
+ spin_unlock(&sg->guest_table_lock);
+ return rc;
+out_free:
+ spin_unlock(&sg->guest_table_lock);
+ __free_pages(page, CRST_ALLOC_ORDER);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_shadow_r2t);
+
+/**
+ * gmap_shadow_r3t - create a shadow region 3 table
+ * @sg: pointer to the shadow guest address space structure
+ * @saddr: faulting address in the shadow gmap
+ * @r3t: parent gmap address of the region 3 table to get shadowed
+ * @fake: r3t references contiguous guest memory block, not a r3t
+ *
+ * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the
+ * shadow table structure is incomplete, -ENOMEM if out of memory and
+ * -EFAULT if an address in the parent gmap could not be resolved.
+ *
+ * Called with sg->mm->mmap_sem in read.
+ */
+int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
+ int fake)
+{
+ unsigned long raddr, origin, offset, len;
+ unsigned long *s_r3t, *table;
+ struct page *page;
+ int rc;
+
+ BUG_ON(!gmap_is_shadow(sg));
+ /* Allocate a shadow region second table */
+ page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
+ if (!page)
+ return -ENOMEM;
+ page->index = r3t & _REGION_ENTRY_ORIGIN;
+ if (fake)
+ page->index |= GMAP_SHADOW_FAKE_TABLE;
+ s_r3t = (unsigned long *) page_to_phys(page);
+ /* Install shadow region second table */
+ spin_lock(&sg->guest_table_lock);
+ table = gmap_table_walk(sg, saddr, 3); /* get region-2 pointer */
+ if (!table) {
+ rc = -EAGAIN; /* Race with unshadow */
+ goto out_free;
+ }
+ if (!(*table & _REGION_ENTRY_INVALID)) {
+ rc = 0; /* Already established */
+ goto out_free;
+ } else if (*table & _REGION_ENTRY_ORIGIN) {
+ rc = -EAGAIN; /* Race with shadow */
+ goto out_free;
+ }
+ crst_table_init(s_r3t, _REGION3_ENTRY_EMPTY);
+ /* mark as invalid as long as the parent table is not protected */
+ *table = (unsigned long) s_r3t | _REGION_ENTRY_LENGTH |
+ _REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID;
+ if (sg->edat_level >= 1)
+ *table |= (r3t & _REGION_ENTRY_PROTECT);
+ list_add(&page->lru, &sg->crst_list);
+ if (fake) {
+ /* nothing to protect for fake tables */
+ *table &= ~_REGION_ENTRY_INVALID;
+ spin_unlock(&sg->guest_table_lock);
+ return 0;
+ }
+ spin_unlock(&sg->guest_table_lock);
+ /* Make r3t read-only in parent gmap page table */
+ raddr = (saddr & _REGION2_MASK) | _SHADOW_RMAP_REGION2;
+ origin = r3t & _REGION_ENTRY_ORIGIN;
+ offset = ((r3t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
+ len = ((r3t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
+ rc = gmap_protect_rmap(sg, raddr, origin + offset, len);
+ spin_lock(&sg->guest_table_lock);
+ if (!rc) {
+ table = gmap_table_walk(sg, saddr, 3);
+ if (!table || (*table & _REGION_ENTRY_ORIGIN) !=
+ (unsigned long) s_r3t)
+ rc = -EAGAIN; /* Race with unshadow */
+ else
+ *table &= ~_REGION_ENTRY_INVALID;
+ } else {
+ gmap_unshadow_r3t(sg, raddr);
+ }
+ spin_unlock(&sg->guest_table_lock);
+ return rc;
+out_free:
+ spin_unlock(&sg->guest_table_lock);
+ __free_pages(page, CRST_ALLOC_ORDER);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_shadow_r3t);
+
+/**
+ * gmap_shadow_sgt - create a shadow segment table
+ * @sg: pointer to the shadow guest address space structure
+ * @saddr: faulting address in the shadow gmap
+ * @sgt: parent gmap address of the segment table to get shadowed
+ * @fake: sgt references contiguous guest memory block, not a sgt
+ *
+ * Returns: 0 if successfully shadowed or already shadowed, -EAGAIN if the
+ * shadow table structure is incomplete, -ENOMEM if out of memory and
+ * -EFAULT if an address in the parent gmap could not be resolved.
+ *
+ * Called with sg->mm->mmap_sem in read.
+ */
+int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
+ int fake)
+{
+ unsigned long raddr, origin, offset, len;
+ unsigned long *s_sgt, *table;
+ struct page *page;
+ int rc;
+
+ BUG_ON(!gmap_is_shadow(sg) || (sgt & _REGION3_ENTRY_LARGE));
+ /* Allocate a shadow segment table */
+ page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
+ if (!page)
+ return -ENOMEM;
+ page->index = sgt & _REGION_ENTRY_ORIGIN;
+ if (fake)
+ page->index |= GMAP_SHADOW_FAKE_TABLE;
+ s_sgt = (unsigned long *) page_to_phys(page);
+ /* Install shadow region second table */
+ spin_lock(&sg->guest_table_lock);
+ table = gmap_table_walk(sg, saddr, 2); /* get region-3 pointer */
+ if (!table) {
+ rc = -EAGAIN; /* Race with unshadow */
+ goto out_free;
+ }
+ if (!(*table & _REGION_ENTRY_INVALID)) {
+ rc = 0; /* Already established */
+ goto out_free;
+ } else if (*table & _REGION_ENTRY_ORIGIN) {
+ rc = -EAGAIN; /* Race with shadow */
+ goto out_free;
+ }
+ crst_table_init(s_sgt, _SEGMENT_ENTRY_EMPTY);
+ /* mark as invalid as long as the parent table is not protected */
+ *table = (unsigned long) s_sgt | _REGION_ENTRY_LENGTH |
+ _REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID;
+ if (sg->edat_level >= 1)
+ *table |= sgt & _REGION_ENTRY_PROTECT;
+ list_add(&page->lru, &sg->crst_list);
+ if (fake) {
+ /* nothing to protect for fake tables */
+ *table &= ~_REGION_ENTRY_INVALID;
+ spin_unlock(&sg->guest_table_lock);
+ return 0;
+ }
+ spin_unlock(&sg->guest_table_lock);
+ /* Make sgt read-only in parent gmap page table */
+ raddr = (saddr & _REGION3_MASK) | _SHADOW_RMAP_REGION3;
+ origin = sgt & _REGION_ENTRY_ORIGIN;
+ offset = ((sgt & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
+ len = ((sgt & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
+ rc = gmap_protect_rmap(sg, raddr, origin + offset, len);
+ spin_lock(&sg->guest_table_lock);
+ if (!rc) {
+ table = gmap_table_walk(sg, saddr, 2);
+ if (!table || (*table & _REGION_ENTRY_ORIGIN) !=
+ (unsigned long) s_sgt)
+ rc = -EAGAIN; /* Race with unshadow */
+ else
+ *table &= ~_REGION_ENTRY_INVALID;
+ } else {
+ gmap_unshadow_sgt(sg, raddr);
+ }
+ spin_unlock(&sg->guest_table_lock);
+ return rc;
+out_free:
+ spin_unlock(&sg->guest_table_lock);
+ __free_pages(page, CRST_ALLOC_ORDER);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_shadow_sgt);
+
+/**
+ * gmap_shadow_lookup_pgtable - find a shadow page table
+ * @sg: pointer to the shadow guest address space structure
+ * @saddr: the address in the shadow aguest address space
+ * @pgt: parent gmap address of the page table to get shadowed
+ * @dat_protection: if the pgtable is marked as protected by dat
+ * @fake: pgt references contiguous guest memory block, not a pgtable
+ *
+ * Returns 0 if the shadow page table was found and -EAGAIN if the page
+ * table was not found.
+ *
+ * Called with sg->mm->mmap_sem in read.
+ */
+int gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr,
+ unsigned long *pgt, int *dat_protection,
+ int *fake)
+{
+ unsigned long *table;
+ struct page *page;
+ int rc;
+
+ BUG_ON(!gmap_is_shadow(sg));
+ spin_lock(&sg->guest_table_lock);
+ table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */
+ if (table && !(*table & _SEGMENT_ENTRY_INVALID)) {
+ /* Shadow page tables are full pages (pte+pgste) */
+ page = pfn_to_page(*table >> PAGE_SHIFT);
+ *pgt = page->index & ~GMAP_SHADOW_FAKE_TABLE;
+ *dat_protection = !!(*table & _SEGMENT_ENTRY_PROTECT);
+ *fake = !!(page->index & GMAP_SHADOW_FAKE_TABLE);
+ rc = 0;
+ } else {
+ rc = -EAGAIN;
+ }
+ spin_unlock(&sg->guest_table_lock);
+ return rc;
+
+}
+EXPORT_SYMBOL_GPL(gmap_shadow_pgt_lookup);
+
+/**
+ * gmap_shadow_pgt - instantiate a shadow page table
+ * @sg: pointer to the shadow guest address space structure
+ * @saddr: faulting address in the shadow gmap
+ * @pgt: parent gmap address of the page table to get shadowed
+ * @fake: pgt references contiguous guest memory block, not a pgtable
+ *
+ * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the
+ * shadow table structure is incomplete, -ENOMEM if out of memory,
+ * -EFAULT if an address in the parent gmap could not be resolved and
+ *
+ * Called with gmap->mm->mmap_sem in read
+ */
+int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
+ int fake)
+{
+ unsigned long raddr, origin;
+ unsigned long *s_pgt, *table;
+ struct page *page;
+ int rc;
+
+ BUG_ON(!gmap_is_shadow(sg) || (pgt & _SEGMENT_ENTRY_LARGE));
+ /* Allocate a shadow page table */
+ page = page_table_alloc_pgste(sg->mm);
+ if (!page)
+ return -ENOMEM;
+ page->index = pgt & _SEGMENT_ENTRY_ORIGIN;
+ if (fake)
+ page->index |= GMAP_SHADOW_FAKE_TABLE;
+ s_pgt = (unsigned long *) page_to_phys(page);
+ /* Install shadow page table */
+ spin_lock(&sg->guest_table_lock);
+ table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */
+ if (!table) {
+ rc = -EAGAIN; /* Race with unshadow */
+ goto out_free;
+ }
+ if (!(*table & _SEGMENT_ENTRY_INVALID)) {
+ rc = 0; /* Already established */
+ goto out_free;
+ } else if (*table & _SEGMENT_ENTRY_ORIGIN) {
+ rc = -EAGAIN; /* Race with shadow */
+ goto out_free;
+ }
+ /* mark as invalid as long as the parent table is not protected */
+ *table = (unsigned long) s_pgt | _SEGMENT_ENTRY |
+ (pgt & _SEGMENT_ENTRY_PROTECT) | _SEGMENT_ENTRY_INVALID;
+ list_add(&page->lru, &sg->pt_list);
+ if (fake) {
+ /* nothing to protect for fake tables */
+ *table &= ~_SEGMENT_ENTRY_INVALID;
+ spin_unlock(&sg->guest_table_lock);
+ return 0;
+ }
+ spin_unlock(&sg->guest_table_lock);
+ /* Make pgt read-only in parent gmap page table (not the pgste) */
+ raddr = (saddr & _SEGMENT_MASK) | _SHADOW_RMAP_SEGMENT;
+ origin = pgt & _SEGMENT_ENTRY_ORIGIN & PAGE_MASK;
+ rc = gmap_protect_rmap(sg, raddr, origin, PAGE_SIZE);
+ spin_lock(&sg->guest_table_lock);
+ if (!rc) {
+ table = gmap_table_walk(sg, saddr, 1);
+ if (!table || (*table & _SEGMENT_ENTRY_ORIGIN) !=
+ (unsigned long) s_pgt)
+ rc = -EAGAIN; /* Race with unshadow */
+ else
+ *table &= ~_SEGMENT_ENTRY_INVALID;
+ } else {
+ gmap_unshadow_pgt(sg, raddr);
+ }
+ spin_unlock(&sg->guest_table_lock);
+ return rc;
+out_free:
+ spin_unlock(&sg->guest_table_lock);
+ page_table_free_pgste(page);
+ return rc;
+
+}
+EXPORT_SYMBOL_GPL(gmap_shadow_pgt);
+
+/**
+ * gmap_shadow_page - create a shadow page mapping
+ * @sg: pointer to the shadow guest address space structure
+ * @saddr: faulting address in the shadow gmap
+ * @pte: pte in parent gmap address space to get shadowed
+ *
+ * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the
+ * shadow table structure is incomplete, -ENOMEM if out of memory and
+ * -EFAULT if an address in the parent gmap could not be resolved.
+ *
+ * Called with sg->mm->mmap_sem in read.
+ */
+int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte)
+{
+ struct gmap *parent;
+ struct gmap_rmap *rmap;
+ unsigned long vmaddr, paddr;
+ spinlock_t *ptl;
+ pte_t *sptep, *tptep;
+ int prot;
+ int rc;
+
+ BUG_ON(!gmap_is_shadow(sg));
+ parent = sg->parent;
+ prot = (pte_val(pte) & _PAGE_PROTECT) ? PROT_READ : PROT_WRITE;
+
+ rmap = kzalloc(sizeof(*rmap), GFP_KERNEL);
+ if (!rmap)
+ return -ENOMEM;
+ rmap->raddr = (saddr & PAGE_MASK) | _SHADOW_RMAP_PGTABLE;
+
+ while (1) {
+ paddr = pte_val(pte) & PAGE_MASK;
+ vmaddr = __gmap_translate(parent, paddr);
+ if (IS_ERR_VALUE(vmaddr)) {
+ rc = vmaddr;
+ break;
+ }
+ rc = radix_tree_preload(GFP_KERNEL);
+ if (rc)
+ break;
+ rc = -EAGAIN;
+ sptep = gmap_pte_op_walk(parent, paddr, &ptl);
+ if (sptep) {
+ spin_lock(&sg->guest_table_lock);
+ /* Get page table pointer */
+ tptep = (pte_t *) gmap_table_walk(sg, saddr, 0);
+ if (!tptep) {
+ spin_unlock(&sg->guest_table_lock);
+ gmap_pte_op_end(ptl);
+ radix_tree_preload_end();
+ break;
+ }
+ rc = ptep_shadow_pte(sg->mm, saddr, sptep, tptep, pte);
+ if (rc > 0) {
+ /* Success and a new mapping */
+ gmap_insert_rmap(sg, vmaddr, rmap);
+ rmap = NULL;
+ rc = 0;
+ }
+ gmap_pte_op_end(ptl);
+ spin_unlock(&sg->guest_table_lock);
+ }
+ radix_tree_preload_end();
+ if (!rc)
+ break;
+ rc = gmap_pte_op_fixup(parent, paddr, vmaddr, prot);
+ if (rc)
+ break;
+ }
+ kfree(rmap);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_shadow_page);
+
+/**
+ * gmap_shadow_notify - handle notifications for shadow gmap
+ *
+ * Called with sg->parent->shadow_lock.
+ */
+static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr,
+ unsigned long gaddr)
+{
+ struct gmap_rmap *rmap, *rnext, *head;
+ unsigned long start, end, bits, raddr;
+
+ BUG_ON(!gmap_is_shadow(sg));
+
+ spin_lock(&sg->guest_table_lock);
+ if (sg->removed) {
+ spin_unlock(&sg->guest_table_lock);
+ return;
+ }
+ /* Check for top level table */
+ start = sg->orig_asce & _ASCE_ORIGIN;
+ end = start + ((sg->orig_asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE;
+ if (!(sg->orig_asce & _ASCE_REAL_SPACE) && gaddr >= start &&
+ gaddr < end) {
+ /* The complete shadow table has to go */
+ gmap_unshadow(sg);
+ spin_unlock(&sg->guest_table_lock);
+ list_del(&sg->list);
+ gmap_put(sg);
+ return;
+ }
+ /* Remove the page table tree from on specific entry */
+ head = radix_tree_delete(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT);
+ gmap_for_each_rmap_safe(rmap, rnext, head) {
+ bits = rmap->raddr & _SHADOW_RMAP_MASK;
+ raddr = rmap->raddr ^ bits;
+ switch (bits) {
+ case _SHADOW_RMAP_REGION1:
+ gmap_unshadow_r2t(sg, raddr);
+ break;
+ case _SHADOW_RMAP_REGION2:
+ gmap_unshadow_r3t(sg, raddr);
+ break;
+ case _SHADOW_RMAP_REGION3:
+ gmap_unshadow_sgt(sg, raddr);
+ break;
+ case _SHADOW_RMAP_SEGMENT:
+ gmap_unshadow_pgt(sg, raddr);
+ break;
+ case _SHADOW_RMAP_PGTABLE:
+ gmap_unshadow_page(sg, raddr);
+ break;
+ }
+ kfree(rmap);
+ }
+ spin_unlock(&sg->guest_table_lock);
+}
+
+/**
+ * ptep_notify - call all invalidation callbacks for a specific pte.
+ * @mm: pointer to the process mm_struct
+ * @addr: virtual address in the process address space
+ * @pte: pointer to the page table entry
+ * @bits: bits from the pgste that caused the notify call
+ *
+ * This function is assumed to be called with the page table lock held
+ * for the pte to notify.
+ */
+void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
+ pte_t *pte, unsigned long bits)
+{
+ unsigned long offset, gaddr = 0;
+ unsigned long *table;
+ struct gmap *gmap, *sg, *next;
+
+ offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
+ offset = offset * (PAGE_SIZE / sizeof(pte_t));
+ rcu_read_lock();
+ list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
+ spin_lock(&gmap->guest_table_lock);
+ table = radix_tree_lookup(&gmap->host_to_guest,
+ vmaddr >> PMD_SHIFT);
+ if (table)
+ gaddr = __gmap_segment_gaddr(table) + offset;
+ spin_unlock(&gmap->guest_table_lock);
+ if (!table)
+ continue;
+
+ if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) {
+ spin_lock(&gmap->shadow_lock);
+ list_for_each_entry_safe(sg, next,
+ &gmap->children, list)
+ gmap_shadow_notify(sg, vmaddr, gaddr);
+ spin_unlock(&gmap->shadow_lock);
+ }
+ if (bits & PGSTE_IN_BIT)
+ gmap_call_notifier(gmap, gaddr, gaddr + PAGE_SIZE - 1);
+ }
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(ptep_notify);
+
+static void pmdp_notify_gmap(struct gmap *gmap, pmd_t *pmdp,
+ unsigned long gaddr)
+{
+ pmd_val(*pmdp) &= ~_SEGMENT_ENTRY_GMAP_IN;
+ gmap_call_notifier(gmap, gaddr, gaddr + HPAGE_SIZE - 1);
+}
+
+/**
+ * gmap_pmdp_xchg - exchange a gmap pmd with another
+ * @gmap: pointer to the guest address space structure
+ * @pmdp: pointer to the pmd entry
+ * @new: replacement entry
+ * @gaddr: the affected guest address
+ *
+ * This function is assumed to be called with the guest_table_lock
+ * held.
+ */
+static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *pmdp, pmd_t new,
+ unsigned long gaddr)
+{
+ gaddr &= HPAGE_MASK;
+ pmdp_notify_gmap(gmap, pmdp, gaddr);
+ pmd_val(new) &= ~_SEGMENT_ENTRY_GMAP_IN;
+ if (MACHINE_HAS_TLB_GUEST)
+ __pmdp_idte(gaddr, (pmd_t *)pmdp, IDTE_GUEST_ASCE, gmap->asce,
+ IDTE_GLOBAL);
+ else if (MACHINE_HAS_IDTE)
+ __pmdp_idte(gaddr, (pmd_t *)pmdp, 0, 0, IDTE_GLOBAL);
+ else
+ __pmdp_csp(pmdp);
+ *pmdp = new;
+}
+
+static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr,
+ int purge)
+{
+ pmd_t *pmdp;
+ struct gmap *gmap;
+ unsigned long gaddr;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
+ spin_lock(&gmap->guest_table_lock);
+ pmdp = (pmd_t *)radix_tree_delete(&gmap->host_to_guest,
+ vmaddr >> PMD_SHIFT);
+ if (pmdp) {
+ gaddr = __gmap_segment_gaddr((unsigned long *)pmdp);
+ pmdp_notify_gmap(gmap, pmdp, gaddr);
+ WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
+ _SEGMENT_ENTRY_GMAP_UC));
+ if (purge)
+ __pmdp_csp(pmdp);
+ pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY;
+ }
+ spin_unlock(&gmap->guest_table_lock);
+ }
+ rcu_read_unlock();
+}
+
+/**
+ * gmap_pmdp_invalidate - invalidate all affected guest pmd entries without
+ * flushing
+ * @mm: pointer to the process mm_struct
+ * @vmaddr: virtual address in the process address space
+ */
+void gmap_pmdp_invalidate(struct mm_struct *mm, unsigned long vmaddr)
+{
+ gmap_pmdp_clear(mm, vmaddr, 0);
+}
+EXPORT_SYMBOL_GPL(gmap_pmdp_invalidate);
+
+/**
+ * gmap_pmdp_csp - csp all affected guest pmd entries
+ * @mm: pointer to the process mm_struct
+ * @vmaddr: virtual address in the process address space
+ */
+void gmap_pmdp_csp(struct mm_struct *mm, unsigned long vmaddr)
+{
+ gmap_pmdp_clear(mm, vmaddr, 1);
+}
+EXPORT_SYMBOL_GPL(gmap_pmdp_csp);
+
+/**
+ * gmap_pmdp_idte_local - invalidate and clear a guest pmd entry
+ * @mm: pointer to the process mm_struct
+ * @vmaddr: virtual address in the process address space
+ */
+void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr)
+{
+ unsigned long *entry, gaddr;
+ struct gmap *gmap;
+ pmd_t *pmdp;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
+ spin_lock(&gmap->guest_table_lock);
+ entry = radix_tree_delete(&gmap->host_to_guest,
+ vmaddr >> PMD_SHIFT);
+ if (entry) {
+ pmdp = (pmd_t *)entry;
+ gaddr = __gmap_segment_gaddr(entry);
+ pmdp_notify_gmap(gmap, pmdp, gaddr);
+ WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
+ _SEGMENT_ENTRY_GMAP_UC));
+ if (MACHINE_HAS_TLB_GUEST)
+ __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,
+ gmap->asce, IDTE_LOCAL);
+ else if (MACHINE_HAS_IDTE)
+ __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_LOCAL);
+ *entry = _SEGMENT_ENTRY_EMPTY;
+ }
+ spin_unlock(&gmap->guest_table_lock);
+ }
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(gmap_pmdp_idte_local);
+
+/**
+ * gmap_pmdp_idte_global - invalidate and clear a guest pmd entry
+ * @mm: pointer to the process mm_struct
+ * @vmaddr: virtual address in the process address space
+ */
+void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr)
+{
+ unsigned long *entry, gaddr;
+ struct gmap *gmap;
+ pmd_t *pmdp;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
+ spin_lock(&gmap->guest_table_lock);
+ entry = radix_tree_delete(&gmap->host_to_guest,
+ vmaddr >> PMD_SHIFT);
+ if (entry) {
+ pmdp = (pmd_t *)entry;
+ gaddr = __gmap_segment_gaddr(entry);
+ pmdp_notify_gmap(gmap, pmdp, gaddr);
+ WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
+ _SEGMENT_ENTRY_GMAP_UC));
+ if (MACHINE_HAS_TLB_GUEST)
+ __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,
+ gmap->asce, IDTE_GLOBAL);
+ else if (MACHINE_HAS_IDTE)
+ __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_GLOBAL);
+ else
+ __pmdp_csp(pmdp);
+ *entry = _SEGMENT_ENTRY_EMPTY;
+ }
+ spin_unlock(&gmap->guest_table_lock);
+ }
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(gmap_pmdp_idte_global);
+
+/**
+ * gmap_test_and_clear_dirty_pmd - test and reset segment dirty status
+ * @gmap: pointer to guest address space
+ * @pmdp: pointer to the pmd to be tested
+ * @gaddr: virtual address in the guest address space
+ *
+ * This function is assumed to be called with the guest_table_lock
+ * held.
+ */
+bool gmap_test_and_clear_dirty_pmd(struct gmap *gmap, pmd_t *pmdp,
+ unsigned long gaddr)
+{
+ if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
+ return false;
+
+ /* Already protected memory, which did not change is clean */
+ if (pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT &&
+ !(pmd_val(*pmdp) & _SEGMENT_ENTRY_GMAP_UC))
+ return false;
+
+ /* Clear UC indication and reset protection */
+ pmd_val(*pmdp) &= ~_SEGMENT_ENTRY_GMAP_UC;
+ gmap_protect_pmd(gmap, gaddr, pmdp, PROT_READ, 0);
+ return true;
+}
+
+/**
+ * gmap_sync_dirty_log_pmd - set bitmap based on dirty status of segment
+ * @gmap: pointer to guest address space
+ * @bitmap: dirty bitmap for this pmd
+ * @gaddr: virtual address in the guest address space
+ * @vmaddr: virtual address in the host address space
+ *
+ * This function is assumed to be called with the guest_table_lock
+ * held.
+ */
+void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4],
+ unsigned long gaddr, unsigned long vmaddr)
+{
+ int i;
+ pmd_t *pmdp;
+ pte_t *ptep;
+ spinlock_t *ptl;
+
+ pmdp = gmap_pmd_op_walk(gmap, gaddr);
+ if (!pmdp)
+ return;
+
+ if (pmd_large(*pmdp)) {
+ if (gmap_test_and_clear_dirty_pmd(gmap, pmdp, gaddr))
+ bitmap_fill(bitmap, _PAGE_ENTRIES);
+ } else {
+ for (i = 0; i < _PAGE_ENTRIES; i++, vmaddr += PAGE_SIZE) {
+ ptep = pte_alloc_map_lock(gmap->mm, pmdp, vmaddr, &ptl);
+ if (!ptep)
+ continue;
+ if (ptep_test_and_clear_uc(gmap->mm, vmaddr, ptep))
+ set_bit(i, bitmap);
+ spin_unlock(ptl);
+ }
+ }
+ gmap_pmd_op_end(gmap, pmdp);
+}
+EXPORT_SYMBOL_GPL(gmap_sync_dirty_log_pmd);
+
+static inline void thp_split_mm(struct mm_struct *mm)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ struct vm_area_struct *vma;
+ unsigned long addr;
+
+ for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
+ for (addr = vma->vm_start;
+ addr < vma->vm_end;
+ addr += PAGE_SIZE)
+ follow_page(vma, addr, FOLL_SPLIT);
+ vma->vm_flags &= ~VM_HUGEPAGE;
+ vma->vm_flags |= VM_NOHUGEPAGE;
+ }
+ mm->def_flags |= VM_NOHUGEPAGE;
+#endif
+}
+
+/*
+ * Remove all empty zero pages from the mapping for lazy refaulting
+ * - This must be called after mm->context.has_pgste is set, to avoid
+ * future creation of zero pages
+ * - This must be called after THP was enabled
+ */
+static int __zap_zero_pages(pmd_t *pmd, unsigned long start,
+ unsigned long end, struct mm_walk *walk)
+{
+ unsigned long addr;
+
+ for (addr = start; addr != end; addr += PAGE_SIZE) {
+ pte_t *ptep;
+ spinlock_t *ptl;
+
+ ptep = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+ if (is_zero_pfn(pte_pfn(*ptep)))
+ ptep_xchg_direct(walk->mm, addr, ptep, __pte(_PAGE_INVALID));
+ pte_unmap_unlock(ptep, ptl);
+ }
+ return 0;
+}
+
+static inline void zap_zero_pages(struct mm_struct *mm)
+{
+ struct mm_walk walk = { .pmd_entry = __zap_zero_pages };
+
+ walk.mm = mm;
+ walk_page_range(0, TASK_SIZE, &walk);
+}
+
+/*
+ * switch on pgstes for its userspace process (for kvm)
+ */
+int s390_enable_sie(void)
+{
+ struct mm_struct *mm = current->mm;
+
+ /* Do we have pgstes? if yes, we are done */
+ if (mm_has_pgste(mm))
+ return 0;
+ /* Fail if the page tables are 2K */
+ if (!mm_alloc_pgste(mm))
+ return -EINVAL;
+ down_write(&mm->mmap_sem);
+ mm->context.has_pgste = 1;
+ /* split thp mappings and disable thp for future mappings */
+ thp_split_mm(mm);
+ zap_zero_pages(mm);
+ up_write(&mm->mmap_sem);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(s390_enable_sie);
+
+/*
+ * Enable storage key handling from now on and initialize the storage
+ * keys with the default key.
+ */
+static int __s390_enable_skey_pte(pte_t *pte, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ /* Clear storage key */
+ ptep_zap_key(walk->mm, addr, pte);
+ return 0;
+}
+
+static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr,
+ unsigned long hmask, unsigned long next,
+ struct mm_walk *walk)
+{
+ pmd_t *pmd = (pmd_t *)pte;
+ unsigned long start, end;
+ struct page *page = pmd_page(*pmd);
+
+ /*
+ * The write check makes sure we do not set a key on shared
+ * memory. This is needed as the walker does not differentiate
+ * between actual guest memory and the process executable or
+ * shared libraries.
+ */
+ if (pmd_val(*pmd) & _SEGMENT_ENTRY_INVALID ||
+ !(pmd_val(*pmd) & _SEGMENT_ENTRY_WRITE))
+ return 0;
+
+ start = pmd_val(*pmd) & HPAGE_MASK;
+ end = start + HPAGE_SIZE - 1;
+ __storage_key_init_range(start, end);
+ set_bit(PG_arch_1, &page->flags);
+ return 0;
+}
+
+int s390_enable_skey(void)
+{
+ struct mm_walk walk = {
+ .hugetlb_entry = __s390_enable_skey_hugetlb,
+ .pte_entry = __s390_enable_skey_pte,
+ };
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ int rc = 0;
+
+ down_write(&mm->mmap_sem);
+ if (mm_uses_skeys(mm))
+ goto out_up;
+
+ mm->context.uses_skeys = 1;
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ if (ksm_madvise(vma, vma->vm_start, vma->vm_end,
+ MADV_UNMERGEABLE, &vma->vm_flags)) {
+ mm->context.uses_skeys = 0;
+ rc = -ENOMEM;
+ goto out_up;
+ }
+ }
+ mm->def_flags &= ~VM_MERGEABLE;
+
+ walk.mm = mm;
+ walk_page_range(0, TASK_SIZE, &walk);
+
+out_up:
+ up_write(&mm->mmap_sem);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(s390_enable_skey);
+
+/*
+ * Reset CMMA state, make all pages stable again.
+ */
+static int __s390_reset_cmma(pte_t *pte, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ ptep_zap_unused(walk->mm, addr, pte, 1);
+ return 0;
+}
+
+void s390_reset_cmma(struct mm_struct *mm)
+{
+ struct mm_walk walk = { .pte_entry = __s390_reset_cmma };
+
+ down_write(&mm->mmap_sem);
+ walk.mm = mm;
+ walk_page_range(0, TASK_SIZE, &walk);
+ up_write(&mm->mmap_sem);
+}
+EXPORT_SYMBOL_GPL(s390_reset_cmma);
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
new file mode 100644
index 000000000..5389bf5bc
--- /dev/null
+++ b/arch/s390/mm/gup.c
@@ -0,0 +1,310 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Lockless get_user_pages_fast for s390
+ *
+ * Copyright IBM Corp. 2010
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/vmstat.h>
+#include <linux/pagemap.h>
+#include <linux/rwsem.h>
+#include <asm/pgtable.h>
+
+/*
+ * The performance critical leaf functions are made noinline otherwise gcc
+ * inlines everything into a single function which results in too much
+ * register pressure.
+ */
+static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
+ unsigned long end, int write, struct page **pages, int *nr)
+{
+ struct page *head, *page;
+ unsigned long mask;
+ pte_t *ptep, pte;
+
+ mask = (write ? _PAGE_PROTECT : 0) | _PAGE_INVALID | _PAGE_SPECIAL;
+
+ ptep = ((pte_t *) pmd_deref(pmd)) + pte_index(addr);
+ do {
+ pte = *ptep;
+ barrier();
+ /* Similar to the PMD case, NUMA hinting must take slow path */
+ if (pte_protnone(pte))
+ return 0;
+ if ((pte_val(pte) & mask) != 0)
+ return 0;
+ VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+ page = pte_page(pte);
+ head = compound_head(page);
+ if (unlikely(WARN_ON_ONCE(page_ref_count(head) < 0)
+ || !page_cache_get_speculative(head)))
+ return 0;
+ if (unlikely(pte_val(pte) != pte_val(*ptep))) {
+ put_page(head);
+ return 0;
+ }
+ VM_BUG_ON_PAGE(compound_head(page) != head, page);
+ pages[*nr] = page;
+ (*nr)++;
+
+ } while (ptep++, addr += PAGE_SIZE, addr != end);
+
+ return 1;
+}
+
+static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
+ unsigned long end, int write, struct page **pages, int *nr)
+{
+ struct page *head, *page;
+ unsigned long mask;
+ int refs;
+
+ mask = (write ? _SEGMENT_ENTRY_PROTECT : 0) | _SEGMENT_ENTRY_INVALID;
+ if ((pmd_val(pmd) & mask) != 0)
+ return 0;
+ VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT));
+
+ refs = 0;
+ head = pmd_page(pmd);
+ page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
+ do {
+ VM_BUG_ON(compound_head(page) != head);
+ pages[*nr] = page;
+ (*nr)++;
+ page++;
+ refs++;
+ } while (addr += PAGE_SIZE, addr != end);
+
+ if (unlikely(WARN_ON_ONCE(page_ref_count(head) < 0)
+ || !page_cache_add_speculative(head, refs))) {
+ *nr -= refs;
+ return 0;
+ }
+
+ if (unlikely(pmd_val(pmd) != pmd_val(*pmdp))) {
+ *nr -= refs;
+ while (refs--)
+ put_page(head);
+ return 0;
+ }
+
+ return 1;
+}
+
+
+static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
+ unsigned long end, int write, struct page **pages, int *nr)
+{
+ unsigned long next;
+ pmd_t *pmdp, pmd;
+
+ pmdp = (pmd_t *) pudp;
+ if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
+ pmdp = (pmd_t *) pud_deref(pud);
+ pmdp += pmd_index(addr);
+ do {
+ pmd = *pmdp;
+ barrier();
+ next = pmd_addr_end(addr, end);
+ if (pmd_none(pmd))
+ return 0;
+ if (unlikely(pmd_large(pmd))) {
+ /*
+ * NUMA hinting faults need to be handled in the GUP
+ * slowpath for accounting purposes and so that they
+ * can be serialised against THP migration.
+ */
+ if (pmd_protnone(pmd))
+ return 0;
+ if (!gup_huge_pmd(pmdp, pmd, addr, next,
+ write, pages, nr))
+ return 0;
+ } else if (!gup_pte_range(pmdp, pmd, addr, next,
+ write, pages, nr))
+ return 0;
+ } while (pmdp++, addr = next, addr != end);
+
+ return 1;
+}
+
+static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr,
+ unsigned long end, int write, struct page **pages, int *nr)
+{
+ struct page *head, *page;
+ unsigned long mask;
+ int refs;
+
+ mask = (write ? _REGION_ENTRY_PROTECT : 0) | _REGION_ENTRY_INVALID;
+ if ((pud_val(pud) & mask) != 0)
+ return 0;
+ VM_BUG_ON(!pfn_valid(pud_pfn(pud)));
+
+ refs = 0;
+ head = pud_page(pud);
+ page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
+ do {
+ VM_BUG_ON_PAGE(compound_head(page) != head, page);
+ pages[*nr] = page;
+ (*nr)++;
+ page++;
+ refs++;
+ } while (addr += PAGE_SIZE, addr != end);
+
+ if (unlikely(WARN_ON_ONCE(page_ref_count(head) < 0)
+ || !page_cache_add_speculative(head, refs))) {
+ *nr -= refs;
+ return 0;
+ }
+
+ if (unlikely(pud_val(pud) != pud_val(*pudp))) {
+ *nr -= refs;
+ while (refs--)
+ put_page(head);
+ return 0;
+ }
+
+ return 1;
+}
+
+static inline int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr,
+ unsigned long end, int write, struct page **pages, int *nr)
+{
+ unsigned long next;
+ pud_t *pudp, pud;
+
+ pudp = (pud_t *) p4dp;
+ if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
+ pudp = (pud_t *) p4d_deref(p4d);
+ pudp += pud_index(addr);
+ do {
+ pud = *pudp;
+ barrier();
+ next = pud_addr_end(addr, end);
+ if (pud_none(pud))
+ return 0;
+ if (unlikely(pud_large(pud))) {
+ if (!gup_huge_pud(pudp, pud, addr, next, write, pages,
+ nr))
+ return 0;
+ } else if (!gup_pmd_range(pudp, pud, addr, next, write, pages,
+ nr))
+ return 0;
+ } while (pudp++, addr = next, addr != end);
+
+ return 1;
+}
+
+static inline int gup_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
+ unsigned long end, int write, struct page **pages, int *nr)
+{
+ unsigned long next;
+ p4d_t *p4dp, p4d;
+
+ p4dp = (p4d_t *) pgdp;
+ if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1)
+ p4dp = (p4d_t *) pgd_deref(pgd);
+ p4dp += p4d_index(addr);
+ do {
+ p4d = *p4dp;
+ barrier();
+ next = p4d_addr_end(addr, end);
+ if (p4d_none(p4d))
+ return 0;
+ if (!gup_pud_range(p4dp, p4d, addr, next, write, pages, nr))
+ return 0;
+ } while (p4dp++, addr = next, addr != end);
+
+ return 1;
+}
+
+/*
+ * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
+ * back to the regular GUP.
+ * Note a difference with get_user_pages_fast: this always returns the
+ * number of pages pinned, 0 if no pages were pinned.
+ */
+int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
+ struct page **pages)
+{
+ struct mm_struct *mm = current->mm;
+ unsigned long addr, len, end;
+ unsigned long next, flags;
+ pgd_t *pgdp, pgd;
+ int nr = 0;
+
+ start &= PAGE_MASK;
+ addr = start;
+ len = (unsigned long) nr_pages << PAGE_SHIFT;
+ end = start + len;
+ if ((end <= start) || (end > mm->context.asce_limit))
+ return 0;
+ /*
+ * local_irq_save() doesn't prevent pagetable teardown, but does
+ * prevent the pagetables from being freed on s390.
+ *
+ * So long as we atomically load page table pointers versus teardown,
+ * we can follow the address down to the the page and take a ref on it.
+ */
+ local_irq_save(flags);
+ pgdp = pgd_offset(mm, addr);
+ do {
+ pgd = *pgdp;
+ barrier();
+ next = pgd_addr_end(addr, end);
+ if (pgd_none(pgd))
+ break;
+ if (!gup_p4d_range(pgdp, pgd, addr, next, write, pages, &nr))
+ break;
+ } while (pgdp++, addr = next, addr != end);
+ local_irq_restore(flags);
+
+ return nr;
+}
+
+/**
+ * get_user_pages_fast() - pin user pages in memory
+ * @start: starting user address
+ * @nr_pages: number of pages from start to pin
+ * @write: whether pages will be written to
+ * @pages: array that receives pointers to the pages pinned.
+ * Should be at least nr_pages long.
+ *
+ * Attempt to pin user pages in memory without taking mm->mmap_sem.
+ * If not successful, it will fall back to taking the lock and
+ * calling get_user_pages().
+ *
+ * Returns number of pages pinned. This may be fewer than the number
+ * requested. If nr_pages is 0 or negative, returns 0. If no pages
+ * were pinned, returns -errno.
+ */
+int get_user_pages_fast(unsigned long start, int nr_pages, int write,
+ struct page **pages)
+{
+ int nr, ret;
+
+ might_sleep();
+ start &= PAGE_MASK;
+ /*
+ * The FAST_GUP case requires FOLL_WRITE even for pure reads,
+ * because get_user_pages() may need to cause an early COW in
+ * order to avoid confusing the normal COW routines. So only
+ * targets that are already writable are safe to do by just
+ * looking at the page tables.
+ */
+ nr = __get_user_pages_fast(start, nr_pages, 1, pages);
+ if (nr == nr_pages)
+ return nr;
+
+ /* Try to get the remaining pages with get_user_pages */
+ start += nr << PAGE_SHIFT;
+ pages += nr;
+ ret = get_user_pages_unlocked(start, nr_pages - nr, pages,
+ write ? FOLL_WRITE : 0);
+ /* Have to be a bit careful with return values */
+ if (nr > 0)
+ ret = (ret < 0) ? nr : ret + nr;
+ return ret;
+}
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
new file mode 100644
index 000000000..ff8234bca
--- /dev/null
+++ b/arch/s390/mm/hugetlbpage.c
@@ -0,0 +1,370 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * IBM System z Huge TLB Page Support for Kernel.
+ *
+ * Copyright IBM Corp. 2007,2020
+ * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
+ */
+
+#define KMSG_COMPONENT "hugetlb"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/mman.h>
+#include <linux/sched/mm.h>
+#include <linux/security.h>
+
+/*
+ * If the bit selected by single-bit bitmask "a" is set within "x", move
+ * it to the position indicated by single-bit bitmask "b".
+ */
+#define move_set_bit(x, a, b) (((x) & (a)) >> ilog2(a) << ilog2(b))
+
+static inline unsigned long __pte_to_rste(pte_t pte)
+{
+ unsigned long rste;
+
+ /*
+ * Convert encoding pte bits pmd / pud bits
+ * lIR.uswrdy.p dy..R...I...wr
+ * empty 010.000000.0 -> 00..0...1...00
+ * prot-none, clean, old 111.000000.1 -> 00..1...1...00
+ * prot-none, clean, young 111.000001.1 -> 01..1...1...00
+ * prot-none, dirty, old 111.000010.1 -> 10..1...1...00
+ * prot-none, dirty, young 111.000011.1 -> 11..1...1...00
+ * read-only, clean, old 111.000100.1 -> 00..1...1...01
+ * read-only, clean, young 101.000101.1 -> 01..1...0...01
+ * read-only, dirty, old 111.000110.1 -> 10..1...1...01
+ * read-only, dirty, young 101.000111.1 -> 11..1...0...01
+ * read-write, clean, old 111.001100.1 -> 00..1...1...11
+ * read-write, clean, young 101.001101.1 -> 01..1...0...11
+ * read-write, dirty, old 110.001110.1 -> 10..0...1...11
+ * read-write, dirty, young 100.001111.1 -> 11..0...0...11
+ * HW-bits: R read-only, I invalid
+ * SW-bits: p present, y young, d dirty, r read, w write, s special,
+ * u unused, l large
+ */
+ if (pte_present(pte)) {
+ rste = pte_val(pte) & PAGE_MASK;
+ rste |= move_set_bit(pte_val(pte), _PAGE_READ,
+ _SEGMENT_ENTRY_READ);
+ rste |= move_set_bit(pte_val(pte), _PAGE_WRITE,
+ _SEGMENT_ENTRY_WRITE);
+ rste |= move_set_bit(pte_val(pte), _PAGE_INVALID,
+ _SEGMENT_ENTRY_INVALID);
+ rste |= move_set_bit(pte_val(pte), _PAGE_PROTECT,
+ _SEGMENT_ENTRY_PROTECT);
+ rste |= move_set_bit(pte_val(pte), _PAGE_DIRTY,
+ _SEGMENT_ENTRY_DIRTY);
+ rste |= move_set_bit(pte_val(pte), _PAGE_YOUNG,
+ _SEGMENT_ENTRY_YOUNG);
+#ifdef CONFIG_MEM_SOFT_DIRTY
+ rste |= move_set_bit(pte_val(pte), _PAGE_SOFT_DIRTY,
+ _SEGMENT_ENTRY_SOFT_DIRTY);
+#endif
+ rste |= move_set_bit(pte_val(pte), _PAGE_NOEXEC,
+ _SEGMENT_ENTRY_NOEXEC);
+ } else
+ rste = _SEGMENT_ENTRY_EMPTY;
+ return rste;
+}
+
+static inline pte_t __rste_to_pte(unsigned long rste)
+{
+ int present;
+ pte_t pte;
+
+ if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
+ present = pud_present(__pud(rste));
+ else
+ present = pmd_present(__pmd(rste));
+
+ /*
+ * Convert encoding pmd / pud bits pte bits
+ * dy..R...I...wr lIR.uswrdy.p
+ * empty 00..0...1...00 -> 010.000000.0
+ * prot-none, clean, old 00..1...1...00 -> 111.000000.1
+ * prot-none, clean, young 01..1...1...00 -> 111.000001.1
+ * prot-none, dirty, old 10..1...1...00 -> 111.000010.1
+ * prot-none, dirty, young 11..1...1...00 -> 111.000011.1
+ * read-only, clean, old 00..1...1...01 -> 111.000100.1
+ * read-only, clean, young 01..1...0...01 -> 101.000101.1
+ * read-only, dirty, old 10..1...1...01 -> 111.000110.1
+ * read-only, dirty, young 11..1...0...01 -> 101.000111.1
+ * read-write, clean, old 00..1...1...11 -> 111.001100.1
+ * read-write, clean, young 01..1...0...11 -> 101.001101.1
+ * read-write, dirty, old 10..0...1...11 -> 110.001110.1
+ * read-write, dirty, young 11..0...0...11 -> 100.001111.1
+ * HW-bits: R read-only, I invalid
+ * SW-bits: p present, y young, d dirty, r read, w write, s special,
+ * u unused, l large
+ */
+ if (present) {
+ pte_val(pte) = rste & _SEGMENT_ENTRY_ORIGIN_LARGE;
+ pte_val(pte) |= _PAGE_LARGE | _PAGE_PRESENT;
+ pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_READ,
+ _PAGE_READ);
+ pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_WRITE,
+ _PAGE_WRITE);
+ pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_INVALID,
+ _PAGE_INVALID);
+ pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_PROTECT,
+ _PAGE_PROTECT);
+ pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_DIRTY,
+ _PAGE_DIRTY);
+ pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_YOUNG,
+ _PAGE_YOUNG);
+#ifdef CONFIG_MEM_SOFT_DIRTY
+ pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY,
+ _PAGE_SOFT_DIRTY);
+#endif
+ pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_NOEXEC,
+ _PAGE_NOEXEC);
+ } else
+ pte_val(pte) = _PAGE_INVALID;
+ return pte;
+}
+
+static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste)
+{
+ struct page *page;
+ unsigned long size, paddr;
+
+ if (!mm_uses_skeys(mm) ||
+ rste & _SEGMENT_ENTRY_INVALID)
+ return;
+
+ if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) {
+ page = pud_page(__pud(rste));
+ size = PUD_SIZE;
+ paddr = rste & PUD_MASK;
+ } else {
+ page = pmd_page(__pmd(rste));
+ size = PMD_SIZE;
+ paddr = rste & PMD_MASK;
+ }
+
+ if (!test_and_set_bit(PG_arch_1, &page->flags))
+ __storage_key_init_range(paddr, paddr + size - 1);
+}
+
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte)
+{
+ unsigned long rste;
+
+ rste = __pte_to_rste(pte);
+ if (!MACHINE_HAS_NX)
+ rste &= ~_SEGMENT_ENTRY_NOEXEC;
+
+ /* Set correct table type for 2G hugepages */
+ if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) {
+ if (likely(pte_present(pte)))
+ rste |= _REGION3_ENTRY_LARGE;
+ rste |= _REGION_ENTRY_TYPE_R3;
+ } else if (likely(pte_present(pte)))
+ rste |= _SEGMENT_ENTRY_LARGE;
+
+ clear_huge_pte_skeys(mm, rste);
+ pte_val(*ptep) = rste;
+}
+
+pte_t huge_ptep_get(pte_t *ptep)
+{
+ return __rste_to_pte(pte_val(*ptep));
+}
+
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ pte_t pte = huge_ptep_get(ptep);
+ pmd_t *pmdp = (pmd_t *) ptep;
+ pud_t *pudp = (pud_t *) ptep;
+
+ if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
+ pudp_xchg_direct(mm, addr, pudp, __pud(_REGION3_ENTRY_EMPTY));
+ else
+ pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
+ return pte;
+}
+
+pte_t *huge_pte_alloc(struct mm_struct *mm,
+ unsigned long addr, unsigned long sz)
+{
+ pgd_t *pgdp;
+ p4d_t *p4dp;
+ pud_t *pudp;
+ pmd_t *pmdp = NULL;
+
+ pgdp = pgd_offset(mm, addr);
+ p4dp = p4d_alloc(mm, pgdp, addr);
+ if (p4dp) {
+ pudp = pud_alloc(mm, p4dp, addr);
+ if (pudp) {
+ if (sz == PUD_SIZE)
+ return (pte_t *) pudp;
+ else if (sz == PMD_SIZE)
+ pmdp = pmd_alloc(mm, pudp, addr);
+ }
+ }
+ return (pte_t *) pmdp;
+}
+
+pte_t *huge_pte_offset(struct mm_struct *mm,
+ unsigned long addr, unsigned long sz)
+{
+ pgd_t *pgdp;
+ p4d_t *p4dp;
+ pud_t *pudp;
+ pmd_t *pmdp = NULL;
+
+ pgdp = pgd_offset(mm, addr);
+ if (pgd_present(*pgdp)) {
+ p4dp = p4d_offset(pgdp, addr);
+ if (p4d_present(*p4dp)) {
+ pudp = pud_offset(p4dp, addr);
+ if (pud_present(*pudp)) {
+ if (pud_large(*pudp))
+ return (pte_t *) pudp;
+ pmdp = pmd_offset(pudp, addr);
+ }
+ }
+ }
+ return (pte_t *) pmdp;
+}
+
+int pmd_huge(pmd_t pmd)
+{
+ return pmd_large(pmd);
+}
+
+int pud_huge(pud_t pud)
+{
+ return pud_large(pud);
+}
+
+struct page *
+follow_huge_pud(struct mm_struct *mm, unsigned long address,
+ pud_t *pud, int flags)
+{
+ if (flags & FOLL_GET)
+ return NULL;
+
+ return pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT);
+}
+
+static __init int setup_hugepagesz(char *opt)
+{
+ unsigned long size;
+ char *string = opt;
+
+ size = memparse(opt, &opt);
+ if (MACHINE_HAS_EDAT1 && size == PMD_SIZE) {
+ hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
+ } else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE) {
+ hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
+ } else {
+ hugetlb_bad_size();
+ pr_err("hugepagesz= specifies an unsupported page size %s\n",
+ string);
+ return 0;
+ }
+ return 1;
+}
+__setup("hugepagesz=", setup_hugepagesz);
+
+static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
+ unsigned long addr, unsigned long len,
+ unsigned long pgoff, unsigned long flags)
+{
+ struct hstate *h = hstate_file(file);
+ struct vm_unmapped_area_info info;
+
+ info.flags = 0;
+ info.length = len;
+ info.low_limit = current->mm->mmap_base;
+ info.high_limit = TASK_SIZE;
+ info.align_mask = PAGE_MASK & ~huge_page_mask(h);
+ info.align_offset = 0;
+ return vm_unmapped_area(&info);
+}
+
+static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
+ unsigned long addr0, unsigned long len,
+ unsigned long pgoff, unsigned long flags)
+{
+ struct hstate *h = hstate_file(file);
+ struct vm_unmapped_area_info info;
+ unsigned long addr;
+
+ info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+ info.length = len;
+ info.low_limit = max(PAGE_SIZE, mmap_min_addr);
+ info.high_limit = current->mm->mmap_base;
+ info.align_mask = PAGE_MASK & ~huge_page_mask(h);
+ info.align_offset = 0;
+ addr = vm_unmapped_area(&info);
+
+ /*
+ * A failed mmap() very likely causes application failure,
+ * so fall back to the bottom-up function here. This scenario
+ * can happen with large stack limits and large mmap()
+ * allocations.
+ */
+ if (addr & ~PAGE_MASK) {
+ VM_BUG_ON(addr != -ENOMEM);
+ info.flags = 0;
+ info.low_limit = TASK_UNMAPPED_BASE;
+ info.high_limit = TASK_SIZE;
+ addr = vm_unmapped_area(&info);
+ }
+
+ return addr;
+}
+
+unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+ struct hstate *h = hstate_file(file);
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ int rc;
+
+ if (len & ~huge_page_mask(h))
+ return -EINVAL;
+ if (len > TASK_SIZE - mmap_min_addr)
+ return -ENOMEM;
+
+ if (flags & MAP_FIXED) {
+ if (prepare_hugepage_range(file, addr, len))
+ return -EINVAL;
+ goto check_asce_limit;
+ }
+
+ if (addr) {
+ addr = ALIGN(addr, huge_page_size(h));
+ vma = find_vma(mm, addr);
+ if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
+ (!vma || addr + len <= vm_start_gap(vma)))
+ goto check_asce_limit;
+ }
+
+ if (mm->get_unmapped_area == arch_get_unmapped_area)
+ addr = hugetlb_get_unmapped_area_bottomup(file, addr, len,
+ pgoff, flags);
+ else
+ addr = hugetlb_get_unmapped_area_topdown(file, addr, len,
+ pgoff, flags);
+ if (addr & ~PAGE_MASK)
+ return addr;
+
+check_asce_limit:
+ if (addr + len > current->mm->context.asce_limit &&
+ addr + len <= TASK_SIZE) {
+ rc = crst_table_upgrade(mm, addr + len);
+ if (rc)
+ return (unsigned long) rc;
+ }
+ return addr;
+}
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
new file mode 100644
index 000000000..379a925d9
--- /dev/null
+++ b/arch/s390/mm/init.c
@@ -0,0 +1,251 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999
+ * Author(s): Hartmut Penner (hp@de.ibm.com)
+ *
+ * Derived from "arch/i386/mm/init.c"
+ * Copyright (C) 1995 Linus Torvalds
+ */
+
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/smp.h>
+#include <linux/init.h>
+#include <linux/pagemap.h>
+#include <linux/bootmem.h>
+#include <linux/memory.h>
+#include <linux/pfn.h>
+#include <linux/poison.h>
+#include <linux/initrd.h>
+#include <linux/export.h>
+#include <linux/cma.h>
+#include <linux/gfp.h>
+#include <linux/memblock.h>
+#include <asm/processor.h>
+#include <linux/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/dma.h>
+#include <asm/lowcore.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/sections.h>
+#include <asm/ctl_reg.h>
+#include <asm/sclp.h>
+#include <asm/set_memory.h>
+
+pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(.bss..swapper_pg_dir);
+
+unsigned long empty_zero_page, zero_page_mask;
+EXPORT_SYMBOL(empty_zero_page);
+EXPORT_SYMBOL(zero_page_mask);
+
+static void __init setup_zero_pages(void)
+{
+ unsigned int order;
+ struct page *page;
+ int i;
+
+ /* Latest machines require a mapping granularity of 512KB */
+ order = 7;
+
+ /* Limit number of empty zero pages for small memory sizes */
+ while (order > 2 && (totalram_pages >> 10) < (1UL << order))
+ order--;
+
+ empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
+ if (!empty_zero_page)
+ panic("Out of memory in setup_zero_pages");
+
+ page = virt_to_page((void *) empty_zero_page);
+ split_page(page, order);
+ for (i = 1 << order; i > 0; i--) {
+ mark_page_reserved(page);
+ page++;
+ }
+
+ zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK;
+}
+
+/*
+ * paging_init() sets up the page tables
+ */
+void __init paging_init(void)
+{
+ unsigned long max_zone_pfns[MAX_NR_ZONES];
+ unsigned long pgd_type, asce_bits;
+ psw_t psw;
+
+ init_mm.pgd = swapper_pg_dir;
+ if (VMALLOC_END > _REGION2_SIZE) {
+ asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
+ pgd_type = _REGION2_ENTRY_EMPTY;
+ } else {
+ asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
+ pgd_type = _REGION3_ENTRY_EMPTY;
+ }
+ init_mm.context.asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits;
+ S390_lowcore.kernel_asce = init_mm.context.asce;
+ S390_lowcore.user_asce = S390_lowcore.kernel_asce;
+ crst_table_init((unsigned long *) init_mm.pgd, pgd_type);
+ vmem_map_init();
+
+ /* enable virtual mapping in kernel mode */
+ __ctl_load(S390_lowcore.kernel_asce, 1, 1);
+ __ctl_load(S390_lowcore.kernel_asce, 7, 7);
+ __ctl_load(S390_lowcore.kernel_asce, 13, 13);
+ psw.mask = __extract_psw();
+ psw_bits(psw).dat = 1;
+ psw_bits(psw).as = PSW_BITS_AS_HOME;
+ __load_psw_mask(psw.mask);
+
+ sparse_memory_present_with_active_regions(MAX_NUMNODES);
+ sparse_init();
+ memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
+ max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS);
+ max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
+ free_area_init_nodes(max_zone_pfns);
+}
+
+void mark_rodata_ro(void)
+{
+ unsigned long size = __end_ro_after_init - __start_ro_after_init;
+
+ set_memory_ro((unsigned long)__start_ro_after_init, size >> PAGE_SHIFT);
+ pr_info("Write protected read-only-after-init data: %luk\n", size >> 10);
+}
+
+void __init mem_init(void)
+{
+ cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask);
+ cpumask_set_cpu(0, mm_cpumask(&init_mm));
+
+ set_max_mapnr(max_low_pfn);
+ high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
+
+ /* Setup guest page hinting */
+ cmma_init();
+
+ /* this will put all low memory onto the freelists */
+ free_all_bootmem();
+ setup_zero_pages(); /* Setup zeroed pages. */
+
+ cmma_init_nodat();
+
+ mem_init_print_info(NULL);
+}
+
+void free_initmem(void)
+{
+ __set_memory((unsigned long)_sinittext,
+ (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT,
+ SET_MEMORY_RW | SET_MEMORY_NX);
+ free_initmem_default(POISON_FREE_INITMEM);
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void __init free_initrd_mem(unsigned long start, unsigned long end)
+{
+ free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM,
+ "initrd");
+}
+#endif
+
+unsigned long memory_block_size_bytes(void)
+{
+ /*
+ * Make sure the memory block size is always greater
+ * or equal than the memory increment size.
+ */
+ return max_t(unsigned long, MIN_MEMORY_BLOCK_SIZE, sclp.rzm);
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+
+#ifdef CONFIG_CMA
+
+/* Prevent memory blocks which contain cma regions from going offline */
+
+struct s390_cma_mem_data {
+ unsigned long start;
+ unsigned long end;
+};
+
+static int s390_cma_check_range(struct cma *cma, void *data)
+{
+ struct s390_cma_mem_data *mem_data;
+ unsigned long start, end;
+
+ mem_data = data;
+ start = cma_get_base(cma);
+ end = start + cma_get_size(cma);
+ if (end < mem_data->start)
+ return 0;
+ if (start >= mem_data->end)
+ return 0;
+ return -EBUSY;
+}
+
+static int s390_cma_mem_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct s390_cma_mem_data mem_data;
+ struct memory_notify *arg;
+ int rc = 0;
+
+ arg = data;
+ mem_data.start = arg->start_pfn << PAGE_SHIFT;
+ mem_data.end = mem_data.start + (arg->nr_pages << PAGE_SHIFT);
+ if (action == MEM_GOING_OFFLINE)
+ rc = cma_for_each_area(s390_cma_check_range, &mem_data);
+ return notifier_from_errno(rc);
+}
+
+static struct notifier_block s390_cma_mem_nb = {
+ .notifier_call = s390_cma_mem_notifier,
+};
+
+static int __init s390_cma_mem_init(void)
+{
+ return register_memory_notifier(&s390_cma_mem_nb);
+}
+device_initcall(s390_cma_mem_init);
+
+#endif /* CONFIG_CMA */
+
+int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
+ bool want_memblock)
+{
+ unsigned long start_pfn = PFN_DOWN(start);
+ unsigned long size_pages = PFN_DOWN(size);
+ int rc;
+
+ rc = vmem_add_mapping(start, size);
+ if (rc)
+ return rc;
+
+ rc = __add_pages(nid, start_pfn, size_pages, altmap, want_memblock);
+ if (rc)
+ vmem_remove_mapping(start, size);
+ return rc;
+}
+
+void arch_remove_memory(int nid, u64 start, u64 size,
+ struct vmem_altmap *altmap)
+{
+ unsigned long start_pfn = start >> PAGE_SHIFT;
+ unsigned long nr_pages = size >> PAGE_SHIFT;
+
+ __remove_pages(start_pfn, nr_pages, altmap);
+ vmem_remove_mapping(start, size);
+}
+#endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
new file mode 100644
index 000000000..7be064758
--- /dev/null
+++ b/arch/s390/mm/maccess.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Access kernel memory without faulting -- s390 specific implementation.
+ *
+ * Copyright IBM Corp. 2009, 2015
+ *
+ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>,
+ *
+ */
+
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/gfp.h>
+#include <linux/cpu.h>
+#include <asm/ctl_reg.h>
+#include <asm/io.h>
+
+static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t size)
+{
+ unsigned long aligned, offset, count;
+ char tmp[8];
+
+ aligned = (unsigned long) dst & ~7UL;
+ offset = (unsigned long) dst & 7UL;
+ size = min(8UL - offset, size);
+ count = size - 1;
+ asm volatile(
+ " bras 1,0f\n"
+ " mvc 0(1,%4),0(%5)\n"
+ "0: mvc 0(8,%3),0(%0)\n"
+ " ex %1,0(1)\n"
+ " lg %1,0(%3)\n"
+ " lra %0,0(%0)\n"
+ " sturg %1,%0\n"
+ : "+&a" (aligned), "+&a" (count), "=m" (tmp)
+ : "a" (&tmp), "a" (&tmp[offset]), "a" (src)
+ : "cc", "memory", "1");
+ return size;
+}
+
+/*
+ * s390_kernel_write - write to kernel memory bypassing DAT
+ * @dst: destination address
+ * @src: source address
+ * @size: number of bytes to copy
+ *
+ * This function writes to kernel memory bypassing DAT and possible page table
+ * write protection. It writes to the destination using the sturg instruction.
+ * Therefore we have a read-modify-write sequence: the function reads eight
+ * bytes from destination at an eight byte boundary, modifies the bytes
+ * requested and writes the result back in a loop.
+ *
+ * Note: this means that this function may not be called concurrently on
+ * several cpus with overlapping words, since this may potentially
+ * cause data corruption.
+ */
+void notrace s390_kernel_write(void *dst, const void *src, size_t size)
+{
+ long copied;
+
+ while (size) {
+ copied = s390_kernel_write_odd(dst, src, size);
+ dst += copied;
+ src += copied;
+ size -= copied;
+ }
+}
+
+static int __memcpy_real(void *dest, void *src, size_t count)
+{
+ register unsigned long _dest asm("2") = (unsigned long) dest;
+ register unsigned long _len1 asm("3") = (unsigned long) count;
+ register unsigned long _src asm("4") = (unsigned long) src;
+ register unsigned long _len2 asm("5") = (unsigned long) count;
+ int rc = -EFAULT;
+
+ asm volatile (
+ "0: mvcle %1,%2,0x0\n"
+ "1: jo 0b\n"
+ " lhi %0,0x0\n"
+ "2:\n"
+ EX_TABLE(1b,2b)
+ : "+d" (rc), "+d" (_dest), "+d" (_src), "+d" (_len1),
+ "+d" (_len2), "=m" (*((long *) dest))
+ : "m" (*((long *) src))
+ : "cc", "memory");
+ return rc;
+}
+
+/*
+ * Copy memory in real mode (kernel to kernel)
+ */
+int memcpy_real(void *dest, void *src, size_t count)
+{
+ int irqs_disabled, rc;
+ unsigned long flags;
+
+ if (!count)
+ return 0;
+ flags = __arch_local_irq_stnsm(0xf8UL);
+ irqs_disabled = arch_irqs_disabled_flags(flags);
+ if (!irqs_disabled)
+ trace_hardirqs_off();
+ rc = __memcpy_real(dest, src, count);
+ if (!irqs_disabled)
+ trace_hardirqs_on();
+ __arch_local_irq_ssm(flags);
+ return rc;
+}
+
+/*
+ * Copy memory in absolute mode (kernel to kernel)
+ */
+void memcpy_absolute(void *dest, void *src, size_t count)
+{
+ unsigned long cr0, flags, prefix;
+
+ flags = arch_local_irq_save();
+ __ctl_store(cr0, 0, 0);
+ __ctl_clear_bit(0, 28); /* disable lowcore protection */
+ prefix = store_prefix();
+ if (prefix) {
+ local_mcck_disable();
+ set_prefix(0);
+ memcpy(dest, src, count);
+ set_prefix(prefix);
+ local_mcck_enable();
+ } else {
+ memcpy(dest, src, count);
+ }
+ __ctl_load(cr0, 0, 0);
+ arch_local_irq_restore(flags);
+}
+
+/*
+ * Copy memory from kernel (real) to user (virtual)
+ */
+int copy_to_user_real(void __user *dest, void *src, unsigned long count)
+{
+ int offs = 0, size, rc;
+ char *buf;
+
+ buf = (char *) __get_free_page(GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+ rc = -EFAULT;
+ while (offs < count) {
+ size = min(PAGE_SIZE, count - offs);
+ if (memcpy_real(buf, src + offs, size))
+ goto out;
+ if (copy_to_user(dest + offs, buf, size))
+ goto out;
+ offs += size;
+ }
+ rc = 0;
+out:
+ free_page((unsigned long) buf);
+ return rc;
+}
+
+/*
+ * Check if physical address is within prefix or zero page
+ */
+static int is_swapped(unsigned long addr)
+{
+ unsigned long lc;
+ int cpu;
+
+ if (addr < sizeof(struct lowcore))
+ return 1;
+ for_each_online_cpu(cpu) {
+ lc = (unsigned long) lowcore_ptr[cpu];
+ if (addr > lc + sizeof(struct lowcore) - 1 || addr < lc)
+ continue;
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Convert a physical pointer for /dev/mem access
+ *
+ * For swapped prefix pages a new buffer is returned that contains a copy of
+ * the absolute memory. The buffer size is maximum one page large.
+ */
+void *xlate_dev_mem_ptr(phys_addr_t addr)
+{
+ void *bounce = (void *) addr;
+ unsigned long size;
+
+ get_online_cpus();
+ preempt_disable();
+ if (is_swapped(addr)) {
+ size = PAGE_SIZE - (addr & ~PAGE_MASK);
+ bounce = (void *) __get_free_page(GFP_ATOMIC);
+ if (bounce)
+ memcpy_absolute(bounce, (void *) addr, size);
+ }
+ preempt_enable();
+ put_online_cpus();
+ return bounce;
+}
+
+/*
+ * Free converted buffer for /dev/mem access (if necessary)
+ */
+void unxlate_dev_mem_ptr(phys_addr_t addr, void *buf)
+{
+ if ((void *) addr != buf)
+ free_page((unsigned long) buf);
+}
diff --git a/arch/s390/mm/mem_detect.c b/arch/s390/mm/mem_detect.c
new file mode 100644
index 000000000..21f6c82c8
--- /dev/null
+++ b/arch/s390/mm/mem_detect.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2008, 2009
+ *
+ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/memblock.h>
+#include <linux/init.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <asm/ipl.h>
+#include <asm/sclp.h>
+#include <asm/setup.h>
+
+#define CHUNK_READ_WRITE 0
+#define CHUNK_READ_ONLY 1
+
+static inline void memblock_physmem_add(phys_addr_t start, phys_addr_t size)
+{
+ memblock_dbg("memblock_physmem_add: [%#016llx-%#016llx]\n",
+ start, start + size - 1);
+ memblock_add_range(&memblock.memory, start, size, 0, 0);
+ memblock_add_range(&memblock.physmem, start, size, 0, 0);
+}
+
+void __init detect_memory_memblock(void)
+{
+ unsigned long memsize, rnmax, rzm, addr, size;
+ int type;
+
+ rzm = sclp.rzm;
+ rnmax = sclp.rnmax;
+ memsize = rzm * rnmax;
+ if (!rzm)
+ rzm = 1UL << 17;
+ max_physmem_end = memsize;
+ addr = 0;
+ /* keep memblock lists close to the kernel */
+ memblock_set_bottom_up(true);
+ do {
+ size = 0;
+ /* assume lowcore is writable */
+ type = addr ? tprot(addr) : CHUNK_READ_WRITE;
+ do {
+ size += rzm;
+ if (max_physmem_end && addr + size >= max_physmem_end)
+ break;
+ } while (type == tprot(addr + size));
+ if (type == CHUNK_READ_WRITE || type == CHUNK_READ_ONLY) {
+ if (max_physmem_end && (addr + size > max_physmem_end))
+ size = max_physmem_end - addr;
+ memblock_physmem_add(addr, size);
+ }
+ addr += size;
+ } while (addr < max_physmem_end);
+ memblock_set_bottom_up(false);
+ if (!max_physmem_end)
+ max_physmem_end = memblock_end_of_DRAM();
+ memblock_dump_all();
+}
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
new file mode 100644
index 000000000..0a7627cdb
--- /dev/null
+++ b/arch/s390/mm/mmap.c
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * flexible mmap layout support
+ *
+ * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
+ * All Rights Reserved.
+ *
+ * Started by Ingo Molnar <mingo@elte.hu>
+ */
+
+#include <linux/elf-randomize.h>
+#include <linux/personality.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
+#include <linux/random.h>
+#include <linux/compat.h>
+#include <linux/security.h>
+#include <asm/pgalloc.h>
+#include <asm/elf.h>
+
+static unsigned long stack_maxrandom_size(void)
+{
+ if (!(current->flags & PF_RANDOMIZE))
+ return 0;
+ if (current->personality & ADDR_NO_RANDOMIZE)
+ return 0;
+ return STACK_RND_MASK << PAGE_SHIFT;
+}
+
+/*
+ * Top of mmap area (just below the process stack).
+ *
+ * Leave at least a ~32 MB hole.
+ */
+#define MIN_GAP (32*1024*1024)
+#define MAX_GAP (STACK_TOP/6*5)
+
+static inline int mmap_is_legacy(struct rlimit *rlim_stack)
+{
+ if (current->personality & ADDR_COMPAT_LAYOUT)
+ return 1;
+ if (rlim_stack->rlim_cur == RLIM_INFINITY)
+ return 1;
+ return sysctl_legacy_va_layout;
+}
+
+unsigned long arch_mmap_rnd(void)
+{
+ return (get_random_int() & MMAP_RND_MASK) << PAGE_SHIFT;
+}
+
+static unsigned long mmap_base_legacy(unsigned long rnd)
+{
+ return TASK_UNMAPPED_BASE + rnd;
+}
+
+static inline unsigned long mmap_base(unsigned long rnd,
+ struct rlimit *rlim_stack)
+{
+ unsigned long gap = rlim_stack->rlim_cur;
+
+ if (gap < MIN_GAP)
+ gap = MIN_GAP;
+ else if (gap > MAX_GAP)
+ gap = MAX_GAP;
+ gap &= PAGE_MASK;
+ return STACK_TOP - stack_maxrandom_size() - rnd - gap;
+}
+
+unsigned long
+arch_get_unmapped_area(struct file *filp, unsigned long addr,
+ unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ struct vm_unmapped_area_info info;
+ int rc;
+
+ if (len > TASK_SIZE - mmap_min_addr)
+ return -ENOMEM;
+
+ if (flags & MAP_FIXED)
+ goto check_asce_limit;
+
+ if (addr) {
+ addr = PAGE_ALIGN(addr);
+ vma = find_vma(mm, addr);
+ if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
+ (!vma || addr + len <= vm_start_gap(vma)))
+ goto check_asce_limit;
+ }
+
+ info.flags = 0;
+ info.length = len;
+ info.low_limit = mm->mmap_base;
+ info.high_limit = TASK_SIZE;
+ if (filp || (flags & MAP_SHARED))
+ info.align_mask = MMAP_ALIGN_MASK << PAGE_SHIFT;
+ else
+ info.align_mask = 0;
+ info.align_offset = pgoff << PAGE_SHIFT;
+ addr = vm_unmapped_area(&info);
+ if (addr & ~PAGE_MASK)
+ return addr;
+
+check_asce_limit:
+ if (addr + len > current->mm->context.asce_limit &&
+ addr + len <= TASK_SIZE) {
+ rc = crst_table_upgrade(mm, addr + len);
+ if (rc)
+ return (unsigned long) rc;
+ }
+
+ return addr;
+}
+
+unsigned long
+arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+ const unsigned long len, const unsigned long pgoff,
+ const unsigned long flags)
+{
+ struct vm_area_struct *vma;
+ struct mm_struct *mm = current->mm;
+ unsigned long addr = addr0;
+ struct vm_unmapped_area_info info;
+ int rc;
+
+ /* requested length too big for entire address space */
+ if (len > TASK_SIZE - mmap_min_addr)
+ return -ENOMEM;
+
+ if (flags & MAP_FIXED)
+ goto check_asce_limit;
+
+ /* requesting a specific address */
+ if (addr) {
+ addr = PAGE_ALIGN(addr);
+ vma = find_vma(mm, addr);
+ if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
+ (!vma || addr + len <= vm_start_gap(vma)))
+ goto check_asce_limit;
+ }
+
+ info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+ info.length = len;
+ info.low_limit = max(PAGE_SIZE, mmap_min_addr);
+ info.high_limit = mm->mmap_base;
+ if (filp || (flags & MAP_SHARED))
+ info.align_mask = MMAP_ALIGN_MASK << PAGE_SHIFT;
+ else
+ info.align_mask = 0;
+ info.align_offset = pgoff << PAGE_SHIFT;
+ addr = vm_unmapped_area(&info);
+
+ /*
+ * A failed mmap() very likely causes application failure,
+ * so fall back to the bottom-up function here. This scenario
+ * can happen with large stack limits and large mmap()
+ * allocations.
+ */
+ if (addr & ~PAGE_MASK) {
+ VM_BUG_ON(addr != -ENOMEM);
+ info.flags = 0;
+ info.low_limit = TASK_UNMAPPED_BASE;
+ info.high_limit = TASK_SIZE;
+ addr = vm_unmapped_area(&info);
+ if (addr & ~PAGE_MASK)
+ return addr;
+ }
+
+check_asce_limit:
+ if (addr + len > current->mm->context.asce_limit &&
+ addr + len <= TASK_SIZE) {
+ rc = crst_table_upgrade(mm, addr + len);
+ if (rc)
+ return (unsigned long) rc;
+ }
+
+ return addr;
+}
+
+/*
+ * This function, called very early during the creation of a new
+ * process VM image, sets up which VM layout function to use:
+ */
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
+{
+ unsigned long random_factor = 0UL;
+
+ if (current->flags & PF_RANDOMIZE)
+ random_factor = arch_mmap_rnd();
+
+ /*
+ * Fall back to the standard layout if the personality
+ * bit is set, or if the expected stack growth is unlimited:
+ */
+ if (mmap_is_legacy(rlim_stack)) {
+ mm->mmap_base = mmap_base_legacy(random_factor);
+ mm->get_unmapped_area = arch_get_unmapped_area;
+ } else {
+ mm->mmap_base = mmap_base(random_factor, rlim_stack);
+ mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+ }
+}
diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c
new file mode 100644
index 000000000..dc3cede7f
--- /dev/null
+++ b/arch/s390/mm/page-states.c
@@ -0,0 +1,281 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2008
+ *
+ * Guest page hinting for unused pages.
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/memblock.h>
+#include <linux/gfp.h>
+#include <linux/init.h>
+#include <asm/facility.h>
+#include <asm/page-states.h>
+
+static int cmma_flag = 1;
+
+static int __init cmma(char *str)
+{
+ char *parm;
+
+ parm = strstrip(str);
+ if (strcmp(parm, "yes") == 0 || strcmp(parm, "on") == 0) {
+ cmma_flag = 1;
+ return 1;
+ }
+ cmma_flag = 0;
+ if (strcmp(parm, "no") == 0 || strcmp(parm, "off") == 0)
+ return 1;
+ return 0;
+}
+__setup("cmma=", cmma);
+
+static inline int cmma_test_essa(void)
+{
+ register unsigned long tmp asm("0") = 0;
+ register int rc asm("1");
+
+ /* test ESSA_GET_STATE */
+ asm volatile(
+ " .insn rrf,0xb9ab0000,%1,%1,%2,0\n"
+ "0: la %0,0\n"
+ "1:\n"
+ EX_TABLE(0b,1b)
+ : "=&d" (rc), "+&d" (tmp)
+ : "i" (ESSA_GET_STATE), "0" (-EOPNOTSUPP));
+ return rc;
+}
+
+void __init cmma_init(void)
+{
+ if (!cmma_flag)
+ return;
+ if (cmma_test_essa()) {
+ cmma_flag = 0;
+ return;
+ }
+ if (test_facility(147))
+ cmma_flag = 2;
+}
+
+static inline unsigned char get_page_state(struct page *page)
+{
+ unsigned char state;
+
+ asm volatile(" .insn rrf,0xb9ab0000,%0,%1,%2,0"
+ : "=&d" (state)
+ : "a" (page_to_phys(page)),
+ "i" (ESSA_GET_STATE));
+ return state & 0x3f;
+}
+
+static inline void set_page_unused(struct page *page, int order)
+{
+ int i, rc;
+
+ for (i = 0; i < (1 << order); i++)
+ asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
+ : "=&d" (rc)
+ : "a" (page_to_phys(page + i)),
+ "i" (ESSA_SET_UNUSED));
+}
+
+static inline void set_page_stable_dat(struct page *page, int order)
+{
+ int i, rc;
+
+ for (i = 0; i < (1 << order); i++)
+ asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
+ : "=&d" (rc)
+ : "a" (page_to_phys(page + i)),
+ "i" (ESSA_SET_STABLE));
+}
+
+static inline void set_page_stable_nodat(struct page *page, int order)
+{
+ int i, rc;
+
+ for (i = 0; i < (1 << order); i++)
+ asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
+ : "=&d" (rc)
+ : "a" (page_to_phys(page + i)),
+ "i" (ESSA_SET_STABLE_NODAT));
+}
+
+static void mark_kernel_pmd(pud_t *pud, unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+ struct page *page;
+ pmd_t *pmd;
+
+ pmd = pmd_offset(pud, addr);
+ do {
+ next = pmd_addr_end(addr, end);
+ if (pmd_none(*pmd) || pmd_large(*pmd))
+ continue;
+ page = virt_to_page(pmd_val(*pmd));
+ set_bit(PG_arch_1, &page->flags);
+ } while (pmd++, addr = next, addr != end);
+}
+
+static void mark_kernel_pud(p4d_t *p4d, unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+ struct page *page;
+ pud_t *pud;
+ int i;
+
+ pud = pud_offset(p4d, addr);
+ do {
+ next = pud_addr_end(addr, end);
+ if (pud_none(*pud) || pud_large(*pud))
+ continue;
+ if (!pud_folded(*pud)) {
+ page = virt_to_page(pud_val(*pud));
+ for (i = 0; i < 3; i++)
+ set_bit(PG_arch_1, &page[i].flags);
+ }
+ mark_kernel_pmd(pud, addr, next);
+ } while (pud++, addr = next, addr != end);
+}
+
+static void mark_kernel_p4d(pgd_t *pgd, unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+ struct page *page;
+ p4d_t *p4d;
+ int i;
+
+ p4d = p4d_offset(pgd, addr);
+ do {
+ next = p4d_addr_end(addr, end);
+ if (p4d_none(*p4d))
+ continue;
+ if (!p4d_folded(*p4d)) {
+ page = virt_to_page(p4d_val(*p4d));
+ for (i = 0; i < 3; i++)
+ set_bit(PG_arch_1, &page[i].flags);
+ }
+ mark_kernel_pud(p4d, addr, next);
+ } while (p4d++, addr = next, addr != end);
+}
+
+static void mark_kernel_pgd(void)
+{
+ unsigned long addr, next;
+ struct page *page;
+ pgd_t *pgd;
+ int i;
+
+ addr = 0;
+ pgd = pgd_offset_k(addr);
+ do {
+ next = pgd_addr_end(addr, MODULES_END);
+ if (pgd_none(*pgd))
+ continue;
+ if (!pgd_folded(*pgd)) {
+ page = virt_to_page(pgd_val(*pgd));
+ for (i = 0; i < 3; i++)
+ set_bit(PG_arch_1, &page[i].flags);
+ }
+ mark_kernel_p4d(pgd, addr, next);
+ } while (pgd++, addr = next, addr != MODULES_END);
+}
+
+void __init cmma_init_nodat(void)
+{
+ struct memblock_region *reg;
+ struct page *page;
+ unsigned long start, end, ix;
+
+ if (cmma_flag < 2)
+ return;
+ /* Mark pages used in kernel page tables */
+ mark_kernel_pgd();
+
+ /* Set all kernel pages not used for page tables to stable/no-dat */
+ for_each_memblock(memory, reg) {
+ start = memblock_region_memory_base_pfn(reg);
+ end = memblock_region_memory_end_pfn(reg);
+ page = pfn_to_page(start);
+ for (ix = start; ix < end; ix++, page++) {
+ if (__test_and_clear_bit(PG_arch_1, &page->flags))
+ continue; /* skip page table pages */
+ if (!list_empty(&page->lru))
+ continue; /* skip free pages */
+ set_page_stable_nodat(page, 0);
+ }
+ }
+}
+
+void arch_free_page(struct page *page, int order)
+{
+ if (!cmma_flag)
+ return;
+ set_page_unused(page, order);
+}
+
+void arch_alloc_page(struct page *page, int order)
+{
+ if (!cmma_flag)
+ return;
+ if (cmma_flag < 2)
+ set_page_stable_dat(page, order);
+ else
+ set_page_stable_nodat(page, order);
+}
+
+void arch_set_page_dat(struct page *page, int order)
+{
+ if (!cmma_flag)
+ return;
+ set_page_stable_dat(page, order);
+}
+
+void arch_set_page_nodat(struct page *page, int order)
+{
+ if (cmma_flag < 2)
+ return;
+ set_page_stable_nodat(page, order);
+}
+
+int arch_test_page_nodat(struct page *page)
+{
+ unsigned char state;
+
+ if (cmma_flag < 2)
+ return 0;
+ state = get_page_state(page);
+ return !!(state & 0x20);
+}
+
+void arch_set_page_states(int make_stable)
+{
+ unsigned long flags, order, t;
+ struct list_head *l;
+ struct page *page;
+ struct zone *zone;
+
+ if (!cmma_flag)
+ return;
+ if (make_stable)
+ drain_local_pages(NULL);
+ for_each_populated_zone(zone) {
+ spin_lock_irqsave(&zone->lock, flags);
+ for_each_migratetype_order(order, t) {
+ list_for_each(l, &zone->free_area[order].free_list[t]) {
+ page = list_entry(l, struct page, lru);
+ if (make_stable)
+ set_page_stable_dat(page, order);
+ else
+ set_page_unused(page, order);
+ }
+ }
+ spin_unlock_irqrestore(&zone->lock, flags);
+ }
+}
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
new file mode 100644
index 000000000..f8c6faab4
--- /dev/null
+++ b/arch/s390/mm/pageattr.c
@@ -0,0 +1,386 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2011
+ * Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+#include <linux/hugetlb.h>
+#include <linux/mm.h>
+#include <asm/cacheflush.h>
+#include <asm/facility.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/page.h>
+#include <asm/set_memory.h>
+
+static inline unsigned long sske_frame(unsigned long addr, unsigned char skey)
+{
+ asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],1,0"
+ : [addr] "+a" (addr) : [skey] "d" (skey));
+ return addr;
+}
+
+void __storage_key_init_range(unsigned long start, unsigned long end)
+{
+ unsigned long boundary, size;
+
+ while (start < end) {
+ if (MACHINE_HAS_EDAT1) {
+ /* set storage keys for a 1MB frame */
+ size = 1UL << 20;
+ boundary = (start + size) & ~(size - 1);
+ if (boundary <= end) {
+ do {
+ start = sske_frame(start, PAGE_DEFAULT_KEY);
+ } while (start < boundary);
+ continue;
+ }
+ }
+ page_set_storage_key(start, PAGE_DEFAULT_KEY, 1);
+ start += PAGE_SIZE;
+ }
+}
+
+#ifdef CONFIG_PROC_FS
+atomic_long_t direct_pages_count[PG_DIRECT_MAP_MAX];
+
+void arch_report_meminfo(struct seq_file *m)
+{
+ seq_printf(m, "DirectMap4k: %8lu kB\n",
+ atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_4K]) << 2);
+ seq_printf(m, "DirectMap1M: %8lu kB\n",
+ atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_1M]) << 10);
+ seq_printf(m, "DirectMap2G: %8lu kB\n",
+ atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_2G]) << 21);
+}
+#endif /* CONFIG_PROC_FS */
+
+static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr,
+ unsigned long dtt)
+{
+ unsigned long table, mask;
+
+ mask = 0;
+ if (MACHINE_HAS_EDAT2) {
+ switch (dtt) {
+ case CRDTE_DTT_REGION3:
+ mask = ~(PTRS_PER_PUD * sizeof(pud_t) - 1);
+ break;
+ case CRDTE_DTT_SEGMENT:
+ mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1);
+ break;
+ case CRDTE_DTT_PAGE:
+ mask = ~(PTRS_PER_PTE * sizeof(pte_t) - 1);
+ break;
+ }
+ table = (unsigned long)old & mask;
+ crdte(*old, new, table, dtt, addr, S390_lowcore.kernel_asce);
+ } else if (MACHINE_HAS_IDTE) {
+ cspg(old, *old, new);
+ } else {
+ csp((unsigned int *)old + 1, *old, new);
+ }
+}
+
+static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end,
+ unsigned long flags)
+{
+ pte_t *ptep, new;
+
+ ptep = pte_offset(pmdp, addr);
+ do {
+ new = *ptep;
+ if (pte_none(new))
+ return -EINVAL;
+ if (flags & SET_MEMORY_RO)
+ new = pte_wrprotect(new);
+ else if (flags & SET_MEMORY_RW)
+ new = pte_mkwrite(pte_mkdirty(new));
+ if (flags & SET_MEMORY_NX)
+ pte_val(new) |= _PAGE_NOEXEC;
+ else if (flags & SET_MEMORY_X)
+ pte_val(new) &= ~_PAGE_NOEXEC;
+ pgt_set((unsigned long *)ptep, pte_val(new), addr, CRDTE_DTT_PAGE);
+ ptep++;
+ addr += PAGE_SIZE;
+ cond_resched();
+ } while (addr < end);
+ return 0;
+}
+
+static int split_pmd_page(pmd_t *pmdp, unsigned long addr)
+{
+ unsigned long pte_addr, prot;
+ pte_t *pt_dir, *ptep;
+ pmd_t new;
+ int i, ro, nx;
+
+ pt_dir = vmem_pte_alloc();
+ if (!pt_dir)
+ return -ENOMEM;
+ pte_addr = pmd_pfn(*pmdp) << PAGE_SHIFT;
+ ro = !!(pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT);
+ nx = !!(pmd_val(*pmdp) & _SEGMENT_ENTRY_NOEXEC);
+ prot = pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL);
+ if (!nx)
+ prot &= ~_PAGE_NOEXEC;
+ ptep = pt_dir;
+ for (i = 0; i < PTRS_PER_PTE; i++) {
+ pte_val(*ptep) = pte_addr | prot;
+ pte_addr += PAGE_SIZE;
+ ptep++;
+ }
+ pmd_val(new) = __pa(pt_dir) | _SEGMENT_ENTRY;
+ pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT);
+ update_page_count(PG_DIRECT_MAP_4K, PTRS_PER_PTE);
+ update_page_count(PG_DIRECT_MAP_1M, -1);
+ return 0;
+}
+
+static void modify_pmd_page(pmd_t *pmdp, unsigned long addr,
+ unsigned long flags)
+{
+ pmd_t new = *pmdp;
+
+ if (flags & SET_MEMORY_RO)
+ new = pmd_wrprotect(new);
+ else if (flags & SET_MEMORY_RW)
+ new = pmd_mkwrite(pmd_mkdirty(new));
+ if (flags & SET_MEMORY_NX)
+ pmd_val(new) |= _SEGMENT_ENTRY_NOEXEC;
+ else if (flags & SET_MEMORY_X)
+ pmd_val(new) &= ~_SEGMENT_ENTRY_NOEXEC;
+ pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT);
+}
+
+static int walk_pmd_level(pud_t *pudp, unsigned long addr, unsigned long end,
+ unsigned long flags)
+{
+ unsigned long next;
+ pmd_t *pmdp;
+ int rc = 0;
+
+ pmdp = pmd_offset(pudp, addr);
+ do {
+ if (pmd_none(*pmdp))
+ return -EINVAL;
+ next = pmd_addr_end(addr, end);
+ if (pmd_large(*pmdp)) {
+ if (addr & ~PMD_MASK || addr + PMD_SIZE > next) {
+ rc = split_pmd_page(pmdp, addr);
+ if (rc)
+ return rc;
+ continue;
+ }
+ modify_pmd_page(pmdp, addr, flags);
+ } else {
+ rc = walk_pte_level(pmdp, addr, next, flags);
+ if (rc)
+ return rc;
+ }
+ pmdp++;
+ addr = next;
+ cond_resched();
+ } while (addr < end);
+ return rc;
+}
+
+static int split_pud_page(pud_t *pudp, unsigned long addr)
+{
+ unsigned long pmd_addr, prot;
+ pmd_t *pm_dir, *pmdp;
+ pud_t new;
+ int i, ro, nx;
+
+ pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
+ if (!pm_dir)
+ return -ENOMEM;
+ pmd_addr = pud_pfn(*pudp) << PAGE_SHIFT;
+ ro = !!(pud_val(*pudp) & _REGION_ENTRY_PROTECT);
+ nx = !!(pud_val(*pudp) & _REGION_ENTRY_NOEXEC);
+ prot = pgprot_val(ro ? SEGMENT_KERNEL_RO : SEGMENT_KERNEL);
+ if (!nx)
+ prot &= ~_SEGMENT_ENTRY_NOEXEC;
+ pmdp = pm_dir;
+ for (i = 0; i < PTRS_PER_PMD; i++) {
+ pmd_val(*pmdp) = pmd_addr | prot;
+ pmd_addr += PMD_SIZE;
+ pmdp++;
+ }
+ pud_val(new) = __pa(pm_dir) | _REGION3_ENTRY;
+ pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
+ update_page_count(PG_DIRECT_MAP_1M, PTRS_PER_PMD);
+ update_page_count(PG_DIRECT_MAP_2G, -1);
+ return 0;
+}
+
+static void modify_pud_page(pud_t *pudp, unsigned long addr,
+ unsigned long flags)
+{
+ pud_t new = *pudp;
+
+ if (flags & SET_MEMORY_RO)
+ new = pud_wrprotect(new);
+ else if (flags & SET_MEMORY_RW)
+ new = pud_mkwrite(pud_mkdirty(new));
+ if (flags & SET_MEMORY_NX)
+ pud_val(new) |= _REGION_ENTRY_NOEXEC;
+ else if (flags & SET_MEMORY_X)
+ pud_val(new) &= ~_REGION_ENTRY_NOEXEC;
+ pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
+}
+
+static int walk_pud_level(p4d_t *p4d, unsigned long addr, unsigned long end,
+ unsigned long flags)
+{
+ unsigned long next;
+ pud_t *pudp;
+ int rc = 0;
+
+ pudp = pud_offset(p4d, addr);
+ do {
+ if (pud_none(*pudp))
+ return -EINVAL;
+ next = pud_addr_end(addr, end);
+ if (pud_large(*pudp)) {
+ if (addr & ~PUD_MASK || addr + PUD_SIZE > next) {
+ rc = split_pud_page(pudp, addr);
+ if (rc)
+ break;
+ continue;
+ }
+ modify_pud_page(pudp, addr, flags);
+ } else {
+ rc = walk_pmd_level(pudp, addr, next, flags);
+ }
+ pudp++;
+ addr = next;
+ cond_resched();
+ } while (addr < end && !rc);
+ return rc;
+}
+
+static int walk_p4d_level(pgd_t *pgd, unsigned long addr, unsigned long end,
+ unsigned long flags)
+{
+ unsigned long next;
+ p4d_t *p4dp;
+ int rc = 0;
+
+ p4dp = p4d_offset(pgd, addr);
+ do {
+ if (p4d_none(*p4dp))
+ return -EINVAL;
+ next = p4d_addr_end(addr, end);
+ rc = walk_pud_level(p4dp, addr, next, flags);
+ p4dp++;
+ addr = next;
+ cond_resched();
+ } while (addr < end && !rc);
+ return rc;
+}
+
+static DEFINE_MUTEX(cpa_mutex);
+
+static int change_page_attr(unsigned long addr, unsigned long end,
+ unsigned long flags)
+{
+ unsigned long next;
+ int rc = -EINVAL;
+ pgd_t *pgdp;
+
+ if (addr == end)
+ return 0;
+ if (end >= MODULES_END)
+ return -EINVAL;
+ mutex_lock(&cpa_mutex);
+ pgdp = pgd_offset_k(addr);
+ do {
+ if (pgd_none(*pgdp))
+ break;
+ next = pgd_addr_end(addr, end);
+ rc = walk_p4d_level(pgdp, addr, next, flags);
+ if (rc)
+ break;
+ cond_resched();
+ } while (pgdp++, addr = next, addr < end && !rc);
+ mutex_unlock(&cpa_mutex);
+ return rc;
+}
+
+int __set_memory(unsigned long addr, int numpages, unsigned long flags)
+{
+ if (!MACHINE_HAS_NX)
+ flags &= ~(SET_MEMORY_NX | SET_MEMORY_X);
+ if (!flags)
+ return 0;
+ addr &= PAGE_MASK;
+ return change_page_attr(addr, addr + numpages * PAGE_SIZE, flags);
+}
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+
+static void ipte_range(pte_t *pte, unsigned long address, int nr)
+{
+ int i;
+
+ if (test_facility(13)) {
+ __ptep_ipte_range(address, nr - 1, pte, IPTE_GLOBAL);
+ return;
+ }
+ for (i = 0; i < nr; i++) {
+ __ptep_ipte(address, pte, 0, 0, IPTE_GLOBAL);
+ address += PAGE_SIZE;
+ pte++;
+ }
+}
+
+void __kernel_map_pages(struct page *page, int numpages, int enable)
+{
+ unsigned long address;
+ int nr, i, j;
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ for (i = 0; i < numpages;) {
+ address = page_to_phys(page + i);
+ pgd = pgd_offset_k(address);
+ p4d = p4d_offset(pgd, address);
+ pud = pud_offset(p4d, address);
+ pmd = pmd_offset(pud, address);
+ pte = pte_offset_kernel(pmd, address);
+ nr = (unsigned long)pte >> ilog2(sizeof(long));
+ nr = PTRS_PER_PTE - (nr & (PTRS_PER_PTE - 1));
+ nr = min(numpages - i, nr);
+ if (enable) {
+ for (j = 0; j < nr; j++) {
+ pte_val(*pte) &= ~_PAGE_INVALID;
+ address += PAGE_SIZE;
+ pte++;
+ }
+ } else {
+ ipte_range(pte, address, nr);
+ }
+ i += nr;
+ }
+}
+
+#ifdef CONFIG_HIBERNATION
+bool kernel_page_present(struct page *page)
+{
+ unsigned long addr;
+ int cc;
+
+ addr = page_to_phys(page);
+ asm volatile(
+ " lra %1,0(%1)\n"
+ " ipm %0\n"
+ " srl %0,28"
+ : "=d" (cc), "+a" (addr) : : "cc");
+ return cc == 0;
+}
+#endif /* CONFIG_HIBERNATION */
+
+#endif /* CONFIG_DEBUG_PAGEALLOC */
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
new file mode 100644
index 000000000..3f3c13a4d
--- /dev/null
+++ b/arch/s390/mm/pgalloc.c
@@ -0,0 +1,681 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Page table allocation functions
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/sysctl.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <asm/mmu_context.h>
+#include <asm/pgalloc.h>
+#include <asm/gmap.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+
+#ifdef CONFIG_PGSTE
+
+static int page_table_allocate_pgste_min = 0;
+static int page_table_allocate_pgste_max = 1;
+int page_table_allocate_pgste = 0;
+EXPORT_SYMBOL(page_table_allocate_pgste);
+
+static struct ctl_table page_table_sysctl[] = {
+ {
+ .procname = "allocate_pgste",
+ .data = &page_table_allocate_pgste,
+ .maxlen = sizeof(int),
+ .mode = S_IRUGO | S_IWUSR,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &page_table_allocate_pgste_min,
+ .extra2 = &page_table_allocate_pgste_max,
+ },
+ { }
+};
+
+static struct ctl_table page_table_sysctl_dir[] = {
+ {
+ .procname = "vm",
+ .maxlen = 0,
+ .mode = 0555,
+ .child = page_table_sysctl,
+ },
+ { }
+};
+
+static int __init page_table_register_sysctl(void)
+{
+ return register_sysctl_table(page_table_sysctl_dir) ? 0 : -ENOMEM;
+}
+__initcall(page_table_register_sysctl);
+
+#endif /* CONFIG_PGSTE */
+
+unsigned long *crst_table_alloc(struct mm_struct *mm)
+{
+ struct page *page = alloc_pages(GFP_KERNEL, 2);
+
+ if (!page)
+ return NULL;
+ arch_set_page_dat(page, 2);
+ return (unsigned long *) page_to_phys(page);
+}
+
+void crst_table_free(struct mm_struct *mm, unsigned long *table)
+{
+ free_pages((unsigned long) table, 2);
+}
+
+static void __crst_table_upgrade(void *arg)
+{
+ struct mm_struct *mm = arg;
+
+ /* we must change all active ASCEs to avoid the creation of new TLBs */
+ if (current->active_mm == mm) {
+ S390_lowcore.user_asce = mm->context.asce;
+ if (current->thread.mm_segment == USER_DS) {
+ __ctl_load(S390_lowcore.user_asce, 1, 1);
+ /* Mark user-ASCE present in CR1 */
+ clear_cpu_flag(CIF_ASCE_PRIMARY);
+ }
+ if (current->thread.mm_segment == USER_DS_SACF) {
+ __ctl_load(S390_lowcore.user_asce, 7, 7);
+ /* enable_sacf_uaccess does all or nothing */
+ WARN_ON(!test_cpu_flag(CIF_ASCE_SECONDARY));
+ }
+ }
+ __tlb_flush_local();
+}
+
+int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
+{
+ unsigned long *table, *pgd;
+ int rc, notify;
+
+ /* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */
+ VM_BUG_ON(mm->context.asce_limit < _REGION2_SIZE);
+ rc = 0;
+ notify = 0;
+ while (mm->context.asce_limit < end) {
+ table = crst_table_alloc(mm);
+ if (!table) {
+ rc = -ENOMEM;
+ break;
+ }
+ spin_lock_bh(&mm->page_table_lock);
+ pgd = (unsigned long *) mm->pgd;
+ if (mm->context.asce_limit == _REGION2_SIZE) {
+ crst_table_init(table, _REGION2_ENTRY_EMPTY);
+ p4d_populate(mm, (p4d_t *) table, (pud_t *) pgd);
+ mm->pgd = (pgd_t *) table;
+ mm->context.asce_limit = _REGION1_SIZE;
+ mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS | _ASCE_TYPE_REGION2;
+ mm_inc_nr_puds(mm);
+ } else {
+ crst_table_init(table, _REGION1_ENTRY_EMPTY);
+ pgd_populate(mm, (pgd_t *) table, (p4d_t *) pgd);
+ mm->pgd = (pgd_t *) table;
+ mm->context.asce_limit = -PAGE_SIZE;
+ mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS | _ASCE_TYPE_REGION1;
+ }
+ notify = 1;
+ spin_unlock_bh(&mm->page_table_lock);
+ }
+ if (notify)
+ on_each_cpu(__crst_table_upgrade, mm, 0);
+ return rc;
+}
+
+void crst_table_downgrade(struct mm_struct *mm)
+{
+ pgd_t *pgd;
+
+ /* downgrade should only happen from 3 to 2 levels (compat only) */
+ VM_BUG_ON(mm->context.asce_limit != _REGION2_SIZE);
+
+ if (current->active_mm == mm) {
+ clear_user_asce();
+ __tlb_flush_mm(mm);
+ }
+
+ pgd = mm->pgd;
+ mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
+ mm->context.asce_limit = _REGION3_SIZE;
+ mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
+ crst_table_free(mm, (unsigned long *) pgd);
+
+ if (current->active_mm == mm)
+ set_user_asce(mm);
+}
+
+static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
+{
+ unsigned int old, new;
+
+ do {
+ old = atomic_read(v);
+ new = old ^ bits;
+ } while (atomic_cmpxchg(v, old, new) != old);
+ return new;
+}
+
+#ifdef CONFIG_PGSTE
+
+struct page *page_table_alloc_pgste(struct mm_struct *mm)
+{
+ struct page *page;
+ u64 *table;
+
+ page = alloc_page(GFP_KERNEL);
+ if (page) {
+ table = (u64 *)page_to_phys(page);
+ memset64(table, _PAGE_INVALID, PTRS_PER_PTE);
+ memset64(table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
+ }
+ return page;
+}
+
+void page_table_free_pgste(struct page *page)
+{
+ __free_page(page);
+}
+
+#endif /* CONFIG_PGSTE */
+
+/*
+ * page table entry allocation/free routines.
+ */
+unsigned long *page_table_alloc(struct mm_struct *mm)
+{
+ unsigned long *table;
+ struct page *page;
+ unsigned int mask, bit;
+
+ /* Try to get a fragment of a 4K page as a 2K page table */
+ if (!mm_alloc_pgste(mm)) {
+ table = NULL;
+ spin_lock_bh(&mm->context.lock);
+ if (!list_empty(&mm->context.pgtable_list)) {
+ page = list_first_entry(&mm->context.pgtable_list,
+ struct page, lru);
+ mask = atomic_read(&page->_refcount) >> 24;
+ mask = (mask | (mask >> 4)) & 3;
+ if (mask != 3) {
+ table = (unsigned long *) page_to_phys(page);
+ bit = mask & 1; /* =1 -> second 2K */
+ if (bit)
+ table += PTRS_PER_PTE;
+ atomic_xor_bits(&page->_refcount,
+ 1U << (bit + 24));
+ list_del(&page->lru);
+ }
+ }
+ spin_unlock_bh(&mm->context.lock);
+ if (table)
+ return table;
+ }
+ /* Allocate a fresh page */
+ page = alloc_page(GFP_KERNEL);
+ if (!page)
+ return NULL;
+ if (!pgtable_page_ctor(page)) {
+ __free_page(page);
+ return NULL;
+ }
+ arch_set_page_dat(page, 0);
+ /* Initialize page table */
+ table = (unsigned long *) page_to_phys(page);
+ if (mm_alloc_pgste(mm)) {
+ /* Return 4K page table with PGSTEs */
+ atomic_xor_bits(&page->_refcount, 3 << 24);
+ memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE);
+ memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
+ } else {
+ /* Return the first 2K fragment of the page */
+ atomic_xor_bits(&page->_refcount, 1 << 24);
+ memset64((u64 *)table, _PAGE_INVALID, 2 * PTRS_PER_PTE);
+ spin_lock_bh(&mm->context.lock);
+ list_add(&page->lru, &mm->context.pgtable_list);
+ spin_unlock_bh(&mm->context.lock);
+ }
+ return table;
+}
+
+void page_table_free(struct mm_struct *mm, unsigned long *table)
+{
+ struct page *page;
+ unsigned int bit, mask;
+
+ page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+ if (!mm_alloc_pgste(mm)) {
+ /* Free 2K page table fragment of a 4K page */
+ bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
+ spin_lock_bh(&mm->context.lock);
+ mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24));
+ mask >>= 24;
+ if (mask & 3)
+ list_add(&page->lru, &mm->context.pgtable_list);
+ else
+ list_del(&page->lru);
+ spin_unlock_bh(&mm->context.lock);
+ mask = atomic_xor_bits(&page->_refcount, 0x10U << (bit + 24));
+ mask >>= 24;
+ if (mask != 0)
+ return;
+ } else {
+ atomic_xor_bits(&page->_refcount, 3U << 24);
+ }
+
+ pgtable_page_dtor(page);
+ __free_page(page);
+}
+
+void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
+ unsigned long vmaddr)
+{
+ struct mm_struct *mm;
+ struct page *page;
+ unsigned int bit, mask;
+
+ mm = tlb->mm;
+ page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+ if (mm_alloc_pgste(mm)) {
+ gmap_unlink(mm, table, vmaddr);
+ table = (unsigned long *) (__pa(table) | 3);
+ tlb_remove_table(tlb, table);
+ return;
+ }
+ bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
+ spin_lock_bh(&mm->context.lock);
+ mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24));
+ mask >>= 24;
+ if (mask & 3)
+ list_add_tail(&page->lru, &mm->context.pgtable_list);
+ else
+ list_del(&page->lru);
+ spin_unlock_bh(&mm->context.lock);
+ table = (unsigned long *) (__pa(table) | (1U << bit));
+ tlb_remove_table(tlb, table);
+}
+
+static void __tlb_remove_table(void *_table)
+{
+ unsigned int mask = (unsigned long) _table & 3;
+ void *table = (void *)((unsigned long) _table ^ mask);
+ struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+
+ switch (mask) {
+ case 0: /* pmd, pud, or p4d */
+ free_pages((unsigned long) table, 2);
+ break;
+ case 1: /* lower 2K of a 4K page table */
+ case 2: /* higher 2K of a 4K page table */
+ mask = atomic_xor_bits(&page->_refcount, mask << (4 + 24));
+ mask >>= 24;
+ if (mask != 0)
+ break;
+ /* fallthrough */
+ case 3: /* 4K page table with pgstes */
+ if (mask & 3)
+ atomic_xor_bits(&page->_refcount, 3 << 24);
+ pgtable_page_dtor(page);
+ __free_page(page);
+ break;
+ }
+}
+
+static void tlb_remove_table_smp_sync(void *arg)
+{
+ /* Simply deliver the interrupt */
+}
+
+static void tlb_remove_table_one(void *table)
+{
+ /*
+ * This isn't an RCU grace period and hence the page-tables cannot be
+ * assumed to be actually RCU-freed.
+ *
+ * It is however sufficient for software page-table walkers that rely
+ * on IRQ disabling. See the comment near struct mmu_table_batch.
+ */
+ smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
+ __tlb_remove_table(table);
+}
+
+static void tlb_remove_table_rcu(struct rcu_head *head)
+{
+ struct mmu_table_batch *batch;
+ int i;
+
+ batch = container_of(head, struct mmu_table_batch, rcu);
+
+ for (i = 0; i < batch->nr; i++)
+ __tlb_remove_table(batch->tables[i]);
+
+ free_page((unsigned long)batch);
+}
+
+void tlb_table_flush(struct mmu_gather *tlb)
+{
+ struct mmu_table_batch **batch = &tlb->batch;
+
+ if (*batch) {
+ call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
+ *batch = NULL;
+ }
+}
+
+void tlb_remove_table(struct mmu_gather *tlb, void *table)
+{
+ struct mmu_table_batch **batch = &tlb->batch;
+
+ tlb->mm->context.flush_mm = 1;
+ if (*batch == NULL) {
+ *batch = (struct mmu_table_batch *)
+ __get_free_page(GFP_NOWAIT | __GFP_NOWARN);
+ if (*batch == NULL) {
+ __tlb_flush_mm_lazy(tlb->mm);
+ tlb_remove_table_one(table);
+ return;
+ }
+ (*batch)->nr = 0;
+ }
+ (*batch)->tables[(*batch)->nr++] = table;
+ if ((*batch)->nr == MAX_TABLE_BATCH)
+ tlb_flush_mmu(tlb);
+}
+
+/*
+ * Base infrastructure required to generate basic asces, region, segment,
+ * and page tables that do not make use of enhanced features like EDAT1.
+ */
+
+static struct kmem_cache *base_pgt_cache;
+
+static unsigned long base_pgt_alloc(void)
+{
+ u64 *table;
+
+ table = kmem_cache_alloc(base_pgt_cache, GFP_KERNEL);
+ if (table)
+ memset64(table, _PAGE_INVALID, PTRS_PER_PTE);
+ return (unsigned long) table;
+}
+
+static void base_pgt_free(unsigned long table)
+{
+ kmem_cache_free(base_pgt_cache, (void *) table);
+}
+
+static unsigned long base_crst_alloc(unsigned long val)
+{
+ unsigned long table;
+
+ table = __get_free_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
+ if (table)
+ crst_table_init((unsigned long *)table, val);
+ return table;
+}
+
+static void base_crst_free(unsigned long table)
+{
+ free_pages(table, CRST_ALLOC_ORDER);
+}
+
+#define BASE_ADDR_END_FUNC(NAME, SIZE) \
+static inline unsigned long base_##NAME##_addr_end(unsigned long addr, \
+ unsigned long end) \
+{ \
+ unsigned long next = (addr + (SIZE)) & ~((SIZE) - 1); \
+ \
+ return (next - 1) < (end - 1) ? next : end; \
+}
+
+BASE_ADDR_END_FUNC(page, _PAGE_SIZE)
+BASE_ADDR_END_FUNC(segment, _SEGMENT_SIZE)
+BASE_ADDR_END_FUNC(region3, _REGION3_SIZE)
+BASE_ADDR_END_FUNC(region2, _REGION2_SIZE)
+BASE_ADDR_END_FUNC(region1, _REGION1_SIZE)
+
+static inline unsigned long base_lra(unsigned long address)
+{
+ unsigned long real;
+
+ asm volatile(
+ " lra %0,0(%1)\n"
+ : "=d" (real) : "a" (address) : "cc");
+ return real;
+}
+
+static int base_page_walk(unsigned long origin, unsigned long addr,
+ unsigned long end, int alloc)
+{
+ unsigned long *pte, next;
+
+ if (!alloc)
+ return 0;
+ pte = (unsigned long *) origin;
+ pte += (addr & _PAGE_INDEX) >> _PAGE_SHIFT;
+ do {
+ next = base_page_addr_end(addr, end);
+ *pte = base_lra(addr);
+ } while (pte++, addr = next, addr < end);
+ return 0;
+}
+
+static int base_segment_walk(unsigned long origin, unsigned long addr,
+ unsigned long end, int alloc)
+{
+ unsigned long *ste, next, table;
+ int rc;
+
+ ste = (unsigned long *) origin;
+ ste += (addr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
+ do {
+ next = base_segment_addr_end(addr, end);
+ if (*ste & _SEGMENT_ENTRY_INVALID) {
+ if (!alloc)
+ continue;
+ table = base_pgt_alloc();
+ if (!table)
+ return -ENOMEM;
+ *ste = table | _SEGMENT_ENTRY;
+ }
+ table = *ste & _SEGMENT_ENTRY_ORIGIN;
+ rc = base_page_walk(table, addr, next, alloc);
+ if (rc)
+ return rc;
+ if (!alloc)
+ base_pgt_free(table);
+ cond_resched();
+ } while (ste++, addr = next, addr < end);
+ return 0;
+}
+
+static int base_region3_walk(unsigned long origin, unsigned long addr,
+ unsigned long end, int alloc)
+{
+ unsigned long *rtte, next, table;
+ int rc;
+
+ rtte = (unsigned long *) origin;
+ rtte += (addr & _REGION3_INDEX) >> _REGION3_SHIFT;
+ do {
+ next = base_region3_addr_end(addr, end);
+ if (*rtte & _REGION_ENTRY_INVALID) {
+ if (!alloc)
+ continue;
+ table = base_crst_alloc(_SEGMENT_ENTRY_EMPTY);
+ if (!table)
+ return -ENOMEM;
+ *rtte = table | _REGION3_ENTRY;
+ }
+ table = *rtte & _REGION_ENTRY_ORIGIN;
+ rc = base_segment_walk(table, addr, next, alloc);
+ if (rc)
+ return rc;
+ if (!alloc)
+ base_crst_free(table);
+ } while (rtte++, addr = next, addr < end);
+ return 0;
+}
+
+static int base_region2_walk(unsigned long origin, unsigned long addr,
+ unsigned long end, int alloc)
+{
+ unsigned long *rste, next, table;
+ int rc;
+
+ rste = (unsigned long *) origin;
+ rste += (addr & _REGION2_INDEX) >> _REGION2_SHIFT;
+ do {
+ next = base_region2_addr_end(addr, end);
+ if (*rste & _REGION_ENTRY_INVALID) {
+ if (!alloc)
+ continue;
+ table = base_crst_alloc(_REGION3_ENTRY_EMPTY);
+ if (!table)
+ return -ENOMEM;
+ *rste = table | _REGION2_ENTRY;
+ }
+ table = *rste & _REGION_ENTRY_ORIGIN;
+ rc = base_region3_walk(table, addr, next, alloc);
+ if (rc)
+ return rc;
+ if (!alloc)
+ base_crst_free(table);
+ } while (rste++, addr = next, addr < end);
+ return 0;
+}
+
+static int base_region1_walk(unsigned long origin, unsigned long addr,
+ unsigned long end, int alloc)
+{
+ unsigned long *rfte, next, table;
+ int rc;
+
+ rfte = (unsigned long *) origin;
+ rfte += (addr & _REGION1_INDEX) >> _REGION1_SHIFT;
+ do {
+ next = base_region1_addr_end(addr, end);
+ if (*rfte & _REGION_ENTRY_INVALID) {
+ if (!alloc)
+ continue;
+ table = base_crst_alloc(_REGION2_ENTRY_EMPTY);
+ if (!table)
+ return -ENOMEM;
+ *rfte = table | _REGION1_ENTRY;
+ }
+ table = *rfte & _REGION_ENTRY_ORIGIN;
+ rc = base_region2_walk(table, addr, next, alloc);
+ if (rc)
+ return rc;
+ if (!alloc)
+ base_crst_free(table);
+ } while (rfte++, addr = next, addr < end);
+ return 0;
+}
+
+/**
+ * base_asce_free - free asce and tables returned from base_asce_alloc()
+ * @asce: asce to be freed
+ *
+ * Frees all region, segment, and page tables that were allocated with a
+ * corresponding base_asce_alloc() call.
+ */
+void base_asce_free(unsigned long asce)
+{
+ unsigned long table = asce & _ASCE_ORIGIN;
+
+ if (!asce)
+ return;
+ switch (asce & _ASCE_TYPE_MASK) {
+ case _ASCE_TYPE_SEGMENT:
+ base_segment_walk(table, 0, _REGION3_SIZE, 0);
+ break;
+ case _ASCE_TYPE_REGION3:
+ base_region3_walk(table, 0, _REGION2_SIZE, 0);
+ break;
+ case _ASCE_TYPE_REGION2:
+ base_region2_walk(table, 0, _REGION1_SIZE, 0);
+ break;
+ case _ASCE_TYPE_REGION1:
+ base_region1_walk(table, 0, -_PAGE_SIZE, 0);
+ break;
+ }
+ base_crst_free(table);
+}
+
+static int base_pgt_cache_init(void)
+{
+ static DEFINE_MUTEX(base_pgt_cache_mutex);
+ unsigned long sz = _PAGE_TABLE_SIZE;
+
+ if (base_pgt_cache)
+ return 0;
+ mutex_lock(&base_pgt_cache_mutex);
+ if (!base_pgt_cache)
+ base_pgt_cache = kmem_cache_create("base_pgt", sz, sz, 0, NULL);
+ mutex_unlock(&base_pgt_cache_mutex);
+ return base_pgt_cache ? 0 : -ENOMEM;
+}
+
+/**
+ * base_asce_alloc - create kernel mapping without enhanced DAT features
+ * @addr: virtual start address of kernel mapping
+ * @num_pages: number of consecutive pages
+ *
+ * Generate an asce, including all required region, segment and page tables,
+ * that can be used to access the virtual kernel mapping. The difference is
+ * that the returned asce does not make use of any enhanced DAT features like
+ * e.g. large pages. This is required for some I/O functions that pass an
+ * asce, like e.g. some service call requests.
+ *
+ * Note: the returned asce may NEVER be attached to any cpu. It may only be
+ * used for I/O requests. tlb entries that might result because the
+ * asce was attached to a cpu won't be cleared.
+ */
+unsigned long base_asce_alloc(unsigned long addr, unsigned long num_pages)
+{
+ unsigned long asce, table, end;
+ int rc;
+
+ if (base_pgt_cache_init())
+ return 0;
+ end = addr + num_pages * PAGE_SIZE;
+ if (end <= _REGION3_SIZE) {
+ table = base_crst_alloc(_SEGMENT_ENTRY_EMPTY);
+ if (!table)
+ return 0;
+ rc = base_segment_walk(table, addr, end, 1);
+ asce = table | _ASCE_TYPE_SEGMENT | _ASCE_TABLE_LENGTH;
+ } else if (end <= _REGION2_SIZE) {
+ table = base_crst_alloc(_REGION3_ENTRY_EMPTY);
+ if (!table)
+ return 0;
+ rc = base_region3_walk(table, addr, end, 1);
+ asce = table | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
+ } else if (end <= _REGION1_SIZE) {
+ table = base_crst_alloc(_REGION2_ENTRY_EMPTY);
+ if (!table)
+ return 0;
+ rc = base_region2_walk(table, addr, end, 1);
+ asce = table | _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
+ } else {
+ table = base_crst_alloc(_REGION1_ENTRY_EMPTY);
+ if (!table)
+ return 0;
+ rc = base_region1_walk(table, addr, end, 1);
+ asce = table | _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH;
+ }
+ if (rc) {
+ base_asce_free(asce);
+ asce = 0;
+ }
+ return asce;
+}
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
new file mode 100644
index 000000000..9f3903089
--- /dev/null
+++ b/arch/s390/mm/pgtable.c
@@ -0,0 +1,1126 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2007, 2011
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/gfp.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/rcupdate.h>
+#include <linux/slab.h>
+#include <linux/swapops.h>
+#include <linux/sysctl.h>
+#include <linux/ksm.h>
+#include <linux/mman.h>
+
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/mmu_context.h>
+#include <asm/page-states.h>
+
+static inline void ptep_ipte_local(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, int nodat)
+{
+ unsigned long opt, asce;
+
+ if (MACHINE_HAS_TLB_GUEST) {
+ opt = 0;
+ asce = READ_ONCE(mm->context.gmap_asce);
+ if (asce == 0UL || nodat)
+ opt |= IPTE_NODAT;
+ if (asce != -1UL) {
+ asce = asce ? : mm->context.asce;
+ opt |= IPTE_GUEST_ASCE;
+ }
+ __ptep_ipte(addr, ptep, opt, asce, IPTE_LOCAL);
+ } else {
+ __ptep_ipte(addr, ptep, 0, 0, IPTE_LOCAL);
+ }
+}
+
+static inline void ptep_ipte_global(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, int nodat)
+{
+ unsigned long opt, asce;
+
+ if (MACHINE_HAS_TLB_GUEST) {
+ opt = 0;
+ asce = READ_ONCE(mm->context.gmap_asce);
+ if (asce == 0UL || nodat)
+ opt |= IPTE_NODAT;
+ if (asce != -1UL) {
+ asce = asce ? : mm->context.asce;
+ opt |= IPTE_GUEST_ASCE;
+ }
+ __ptep_ipte(addr, ptep, opt, asce, IPTE_GLOBAL);
+ } else {
+ __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL);
+ }
+}
+
+static inline pte_t ptep_flush_direct(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep,
+ int nodat)
+{
+ pte_t old;
+
+ old = *ptep;
+ if (unlikely(pte_val(old) & _PAGE_INVALID))
+ return old;
+ atomic_inc(&mm->context.flush_count);
+ if (MACHINE_HAS_TLB_LC &&
+ cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
+ ptep_ipte_local(mm, addr, ptep, nodat);
+ else
+ ptep_ipte_global(mm, addr, ptep, nodat);
+ atomic_dec(&mm->context.flush_count);
+ return old;
+}
+
+static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep,
+ int nodat)
+{
+ pte_t old;
+
+ old = *ptep;
+ if (unlikely(pte_val(old) & _PAGE_INVALID))
+ return old;
+ atomic_inc(&mm->context.flush_count);
+ if (cpumask_equal(&mm->context.cpu_attach_mask,
+ cpumask_of(smp_processor_id()))) {
+ pte_val(*ptep) |= _PAGE_INVALID;
+ mm->context.flush_mm = 1;
+ } else
+ ptep_ipte_global(mm, addr, ptep, nodat);
+ atomic_dec(&mm->context.flush_count);
+ return old;
+}
+
+static inline pgste_t pgste_get_lock(pte_t *ptep)
+{
+ unsigned long new = 0;
+#ifdef CONFIG_PGSTE
+ unsigned long old;
+
+ asm(
+ " lg %0,%2\n"
+ "0: lgr %1,%0\n"
+ " nihh %0,0xff7f\n" /* clear PCL bit in old */
+ " oihh %1,0x0080\n" /* set PCL bit in new */
+ " csg %0,%1,%2\n"
+ " jl 0b\n"
+ : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE])
+ : "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory");
+#endif
+ return __pgste(new);
+}
+
+static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
+{
+#ifdef CONFIG_PGSTE
+ asm(
+ " nihh %1,0xff7f\n" /* clear PCL bit */
+ " stg %1,%0\n"
+ : "=Q" (ptep[PTRS_PER_PTE])
+ : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE])
+ : "cc", "memory");
+#endif
+}
+
+static inline pgste_t pgste_get(pte_t *ptep)
+{
+ unsigned long pgste = 0;
+#ifdef CONFIG_PGSTE
+ pgste = *(unsigned long *)(ptep + PTRS_PER_PTE);
+#endif
+ return __pgste(pgste);
+}
+
+static inline void pgste_set(pte_t *ptep, pgste_t pgste)
+{
+#ifdef CONFIG_PGSTE
+ *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste;
+#endif
+}
+
+static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste,
+ struct mm_struct *mm)
+{
+#ifdef CONFIG_PGSTE
+ unsigned long address, bits, skey;
+
+ if (!mm_uses_skeys(mm) || pte_val(pte) & _PAGE_INVALID)
+ return pgste;
+ address = pte_val(pte) & PAGE_MASK;
+ skey = (unsigned long) page_get_storage_key(address);
+ bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
+ /* Transfer page changed & referenced bit to guest bits in pgste */
+ pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */
+ /* Copy page access key and fetch protection bit to pgste */
+ pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
+ pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
+#endif
+ return pgste;
+
+}
+
+static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
+ struct mm_struct *mm)
+{
+#ifdef CONFIG_PGSTE
+ unsigned long address;
+ unsigned long nkey;
+
+ if (!mm_uses_skeys(mm) || pte_val(entry) & _PAGE_INVALID)
+ return;
+ VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID));
+ address = pte_val(entry) & PAGE_MASK;
+ /*
+ * Set page access key and fetch protection bit from pgste.
+ * The guest C/R information is still in the PGSTE, set real
+ * key C/R to 0.
+ */
+ nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
+ nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
+ page_set_storage_key(address, nkey, 0);
+#endif
+}
+
+static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
+{
+#ifdef CONFIG_PGSTE
+ if ((pte_val(entry) & _PAGE_PRESENT) &&
+ (pte_val(entry) & _PAGE_WRITE) &&
+ !(pte_val(entry) & _PAGE_INVALID)) {
+ if (!MACHINE_HAS_ESOP) {
+ /*
+ * Without enhanced suppression-on-protection force
+ * the dirty bit on for all writable ptes.
+ */
+ pte_val(entry) |= _PAGE_DIRTY;
+ pte_val(entry) &= ~_PAGE_PROTECT;
+ }
+ if (!(pte_val(entry) & _PAGE_PROTECT))
+ /* This pte allows write access, set user-dirty */
+ pgste_val(pgste) |= PGSTE_UC_BIT;
+ }
+#endif
+ *ptep = entry;
+ return pgste;
+}
+
+static inline pgste_t pgste_pte_notify(struct mm_struct *mm,
+ unsigned long addr,
+ pte_t *ptep, pgste_t pgste)
+{
+#ifdef CONFIG_PGSTE
+ unsigned long bits;
+
+ bits = pgste_val(pgste) & (PGSTE_IN_BIT | PGSTE_VSIE_BIT);
+ if (bits) {
+ pgste_val(pgste) ^= bits;
+ ptep_notify(mm, addr, ptep, bits);
+ }
+#endif
+ return pgste;
+}
+
+static inline pgste_t ptep_xchg_start(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ pgste_t pgste = __pgste(0);
+
+ if (mm_has_pgste(mm)) {
+ pgste = pgste_get_lock(ptep);
+ pgste = pgste_pte_notify(mm, addr, ptep, pgste);
+ }
+ return pgste;
+}
+
+static inline pte_t ptep_xchg_commit(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep,
+ pgste_t pgste, pte_t old, pte_t new)
+{
+ if (mm_has_pgste(mm)) {
+ if (pte_val(old) & _PAGE_INVALID)
+ pgste_set_key(ptep, pgste, new, mm);
+ if (pte_val(new) & _PAGE_INVALID) {
+ pgste = pgste_update_all(old, pgste, mm);
+ if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) ==
+ _PGSTE_GPS_USAGE_UNUSED)
+ pte_val(old) |= _PAGE_UNUSED;
+ }
+ pgste = pgste_set_pte(ptep, pgste, new);
+ pgste_set_unlock(ptep, pgste);
+ } else {
+ *ptep = new;
+ }
+ return old;
+}
+
+pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t new)
+{
+ pgste_t pgste;
+ pte_t old;
+ int nodat;
+
+ preempt_disable();
+ pgste = ptep_xchg_start(mm, addr, ptep);
+ nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
+ old = ptep_flush_direct(mm, addr, ptep, nodat);
+ old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
+ preempt_enable();
+ return old;
+}
+EXPORT_SYMBOL(ptep_xchg_direct);
+
+pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t new)
+{
+ pgste_t pgste;
+ pte_t old;
+ int nodat;
+
+ preempt_disable();
+ pgste = ptep_xchg_start(mm, addr, ptep);
+ nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
+ old = ptep_flush_lazy(mm, addr, ptep, nodat);
+ old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
+ preempt_enable();
+ return old;
+}
+EXPORT_SYMBOL(ptep_xchg_lazy);
+
+pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep)
+{
+ pgste_t pgste;
+ pte_t old;
+ int nodat;
+
+ preempt_disable();
+ pgste = ptep_xchg_start(mm, addr, ptep);
+ nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
+ old = ptep_flush_lazy(mm, addr, ptep, nodat);
+ if (mm_has_pgste(mm)) {
+ pgste = pgste_update_all(old, pgste, mm);
+ pgste_set(ptep, pgste);
+ }
+ return old;
+}
+EXPORT_SYMBOL(ptep_modify_prot_start);
+
+void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte)
+{
+ pgste_t pgste;
+
+ if (!MACHINE_HAS_NX)
+ pte_val(pte) &= ~_PAGE_NOEXEC;
+ if (mm_has_pgste(mm)) {
+ pgste = pgste_get(ptep);
+ pgste_set_key(ptep, pgste, pte, mm);
+ pgste = pgste_set_pte(ptep, pgste, pte);
+ pgste_set_unlock(ptep, pgste);
+ } else {
+ *ptep = pte;
+ }
+ preempt_enable();
+}
+EXPORT_SYMBOL(ptep_modify_prot_commit);
+
+static inline void pmdp_idte_local(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp)
+{
+ if (MACHINE_HAS_TLB_GUEST)
+ __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
+ mm->context.asce, IDTE_LOCAL);
+ else
+ __pmdp_idte(addr, pmdp, 0, 0, IDTE_LOCAL);
+ if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
+ gmap_pmdp_idte_local(mm, addr);
+}
+
+static inline void pmdp_idte_global(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp)
+{
+ if (MACHINE_HAS_TLB_GUEST) {
+ __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
+ mm->context.asce, IDTE_GLOBAL);
+ if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
+ gmap_pmdp_idte_global(mm, addr);
+ } else if (MACHINE_HAS_IDTE) {
+ __pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL);
+ if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
+ gmap_pmdp_idte_global(mm, addr);
+ } else {
+ __pmdp_csp(pmdp);
+ if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
+ gmap_pmdp_csp(mm, addr);
+ }
+}
+
+static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp)
+{
+ pmd_t old;
+
+ old = *pmdp;
+ if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
+ return old;
+ atomic_inc(&mm->context.flush_count);
+ if (MACHINE_HAS_TLB_LC &&
+ cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
+ pmdp_idte_local(mm, addr, pmdp);
+ else
+ pmdp_idte_global(mm, addr, pmdp);
+ atomic_dec(&mm->context.flush_count);
+ return old;
+}
+
+static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp)
+{
+ pmd_t old;
+
+ old = *pmdp;
+ if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
+ return old;
+ atomic_inc(&mm->context.flush_count);
+ if (cpumask_equal(&mm->context.cpu_attach_mask,
+ cpumask_of(smp_processor_id()))) {
+ pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
+ mm->context.flush_mm = 1;
+ if (mm_has_pgste(mm))
+ gmap_pmdp_invalidate(mm, addr);
+ } else {
+ pmdp_idte_global(mm, addr, pmdp);
+ }
+ atomic_dec(&mm->context.flush_count);
+ return old;
+}
+
+#ifdef CONFIG_PGSTE
+static pmd_t *pmd_alloc_map(struct mm_struct *mm, unsigned long addr)
+{
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ pgd = pgd_offset(mm, addr);
+ p4d = p4d_alloc(mm, pgd, addr);
+ if (!p4d)
+ return NULL;
+ pud = pud_alloc(mm, p4d, addr);
+ if (!pud)
+ return NULL;
+ pmd = pmd_alloc(mm, pud, addr);
+ return pmd;
+}
+#endif
+
+pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t new)
+{
+ pmd_t old;
+
+ preempt_disable();
+ old = pmdp_flush_direct(mm, addr, pmdp);
+ *pmdp = new;
+ preempt_enable();
+ return old;
+}
+EXPORT_SYMBOL(pmdp_xchg_direct);
+
+pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t new)
+{
+ pmd_t old;
+
+ preempt_disable();
+ old = pmdp_flush_lazy(mm, addr, pmdp);
+ *pmdp = new;
+ preempt_enable();
+ return old;
+}
+EXPORT_SYMBOL(pmdp_xchg_lazy);
+
+static inline void pudp_idte_local(struct mm_struct *mm,
+ unsigned long addr, pud_t *pudp)
+{
+ if (MACHINE_HAS_TLB_GUEST)
+ __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE,
+ mm->context.asce, IDTE_LOCAL);
+ else
+ __pudp_idte(addr, pudp, 0, 0, IDTE_LOCAL);
+}
+
+static inline void pudp_idte_global(struct mm_struct *mm,
+ unsigned long addr, pud_t *pudp)
+{
+ if (MACHINE_HAS_TLB_GUEST)
+ __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE,
+ mm->context.asce, IDTE_GLOBAL);
+ else if (MACHINE_HAS_IDTE)
+ __pudp_idte(addr, pudp, 0, 0, IDTE_GLOBAL);
+ else
+ /*
+ * Invalid bit position is the same for pmd and pud, so we can
+ * re-use _pmd_csp() here
+ */
+ __pmdp_csp((pmd_t *) pudp);
+}
+
+static inline pud_t pudp_flush_direct(struct mm_struct *mm,
+ unsigned long addr, pud_t *pudp)
+{
+ pud_t old;
+
+ old = *pudp;
+ if (pud_val(old) & _REGION_ENTRY_INVALID)
+ return old;
+ atomic_inc(&mm->context.flush_count);
+ if (MACHINE_HAS_TLB_LC &&
+ cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
+ pudp_idte_local(mm, addr, pudp);
+ else
+ pudp_idte_global(mm, addr, pudp);
+ atomic_dec(&mm->context.flush_count);
+ return old;
+}
+
+pud_t pudp_xchg_direct(struct mm_struct *mm, unsigned long addr,
+ pud_t *pudp, pud_t new)
+{
+ pud_t old;
+
+ preempt_disable();
+ old = pudp_flush_direct(mm, addr, pudp);
+ *pudp = new;
+ preempt_enable();
+ return old;
+}
+EXPORT_SYMBOL(pudp_xchg_direct);
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+ pgtable_t pgtable)
+{
+ struct list_head *lh = (struct list_head *) pgtable;
+
+ assert_spin_locked(pmd_lockptr(mm, pmdp));
+
+ /* FIFO */
+ if (!pmd_huge_pte(mm, pmdp))
+ INIT_LIST_HEAD(lh);
+ else
+ list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
+ pmd_huge_pte(mm, pmdp) = pgtable;
+}
+
+pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
+{
+ struct list_head *lh;
+ pgtable_t pgtable;
+ pte_t *ptep;
+
+ assert_spin_locked(pmd_lockptr(mm, pmdp));
+
+ /* FIFO */
+ pgtable = pmd_huge_pte(mm, pmdp);
+ lh = (struct list_head *) pgtable;
+ if (list_empty(lh))
+ pmd_huge_pte(mm, pmdp) = NULL;
+ else {
+ pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
+ list_del(lh);
+ }
+ ptep = (pte_t *) pgtable;
+ pte_val(*ptep) = _PAGE_INVALID;
+ ptep++;
+ pte_val(*ptep) = _PAGE_INVALID;
+ return pgtable;
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+#ifdef CONFIG_PGSTE
+void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t entry)
+{
+ pgste_t pgste;
+
+ /* the mm_has_pgste() check is done in set_pte_at() */
+ preempt_disable();
+ pgste = pgste_get_lock(ptep);
+ pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
+ pgste_set_key(ptep, pgste, entry, mm);
+ pgste = pgste_set_pte(ptep, pgste, entry);
+ pgste_set_unlock(ptep, pgste);
+ preempt_enable();
+}
+
+void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+ pgste_t pgste;
+
+ preempt_disable();
+ pgste = pgste_get_lock(ptep);
+ pgste_val(pgste) |= PGSTE_IN_BIT;
+ pgste_set_unlock(ptep, pgste);
+ preempt_enable();
+}
+
+/**
+ * ptep_force_prot - change access rights of a locked pte
+ * @mm: pointer to the process mm_struct
+ * @addr: virtual address in the guest address space
+ * @ptep: pointer to the page table entry
+ * @prot: indicates guest access rights: PROT_NONE, PROT_READ or PROT_WRITE
+ * @bit: pgste bit to set (e.g. for notification)
+ *
+ * Returns 0 if the access rights were changed and -EAGAIN if the current
+ * and requested access rights are incompatible.
+ */
+int ptep_force_prot(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, int prot, unsigned long bit)
+{
+ pte_t entry;
+ pgste_t pgste;
+ int pte_i, pte_p, nodat;
+
+ pgste = pgste_get_lock(ptep);
+ entry = *ptep;
+ /* Check pte entry after all locks have been acquired */
+ pte_i = pte_val(entry) & _PAGE_INVALID;
+ pte_p = pte_val(entry) & _PAGE_PROTECT;
+ if ((pte_i && (prot != PROT_NONE)) ||
+ (pte_p && (prot & PROT_WRITE))) {
+ pgste_set_unlock(ptep, pgste);
+ return -EAGAIN;
+ }
+ /* Change access rights and set pgste bit */
+ nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
+ if (prot == PROT_NONE && !pte_i) {
+ ptep_flush_direct(mm, addr, ptep, nodat);
+ pgste = pgste_update_all(entry, pgste, mm);
+ pte_val(entry) |= _PAGE_INVALID;
+ }
+ if (prot == PROT_READ && !pte_p) {
+ ptep_flush_direct(mm, addr, ptep, nodat);
+ pte_val(entry) &= ~_PAGE_INVALID;
+ pte_val(entry) |= _PAGE_PROTECT;
+ }
+ pgste_val(pgste) |= bit;
+ pgste = pgste_set_pte(ptep, pgste, entry);
+ pgste_set_unlock(ptep, pgste);
+ return 0;
+}
+
+int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr,
+ pte_t *sptep, pte_t *tptep, pte_t pte)
+{
+ pgste_t spgste, tpgste;
+ pte_t spte, tpte;
+ int rc = -EAGAIN;
+
+ if (!(pte_val(*tptep) & _PAGE_INVALID))
+ return 0; /* already shadowed */
+ spgste = pgste_get_lock(sptep);
+ spte = *sptep;
+ if (!(pte_val(spte) & _PAGE_INVALID) &&
+ !((pte_val(spte) & _PAGE_PROTECT) &&
+ !(pte_val(pte) & _PAGE_PROTECT))) {
+ pgste_val(spgste) |= PGSTE_VSIE_BIT;
+ tpgste = pgste_get_lock(tptep);
+ pte_val(tpte) = (pte_val(spte) & PAGE_MASK) |
+ (pte_val(pte) & _PAGE_PROTECT);
+ /* don't touch the storage key - it belongs to parent pgste */
+ tpgste = pgste_set_pte(tptep, tpgste, tpte);
+ pgste_set_unlock(tptep, tpgste);
+ rc = 1;
+ }
+ pgste_set_unlock(sptep, spgste);
+ return rc;
+}
+
+void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep)
+{
+ pgste_t pgste;
+ int nodat;
+
+ pgste = pgste_get_lock(ptep);
+ /* notifier is called by the caller */
+ nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
+ ptep_flush_direct(mm, saddr, ptep, nodat);
+ /* don't touch the storage key - it belongs to parent pgste */
+ pgste = pgste_set_pte(ptep, pgste, __pte(_PAGE_INVALID));
+ pgste_set_unlock(ptep, pgste);
+}
+
+static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
+{
+ if (!non_swap_entry(entry))
+ dec_mm_counter(mm, MM_SWAPENTS);
+ else if (is_migration_entry(entry)) {
+ struct page *page = migration_entry_to_page(entry);
+
+ dec_mm_counter(mm, mm_counter(page));
+ }
+ free_swap_and_cache(entry);
+}
+
+void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, int reset)
+{
+ unsigned long pgstev;
+ pgste_t pgste;
+ pte_t pte;
+
+ /* Zap unused and logically-zero pages */
+ preempt_disable();
+ pgste = pgste_get_lock(ptep);
+ pgstev = pgste_val(pgste);
+ pte = *ptep;
+ if (!reset && pte_swap(pte) &&
+ ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED ||
+ (pgstev & _PGSTE_GPS_ZERO))) {
+ ptep_zap_swap_entry(mm, pte_to_swp_entry(pte));
+ pte_clear(mm, addr, ptep);
+ }
+ if (reset)
+ pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK;
+ pgste_set_unlock(ptep, pgste);
+ preempt_enable();
+}
+
+void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+ unsigned long ptev;
+ pgste_t pgste;
+
+ /* Clear storage key ACC and F, but set R/C */
+ preempt_disable();
+ pgste = pgste_get_lock(ptep);
+ pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
+ pgste_val(pgste) |= PGSTE_GR_BIT | PGSTE_GC_BIT;
+ ptev = pte_val(*ptep);
+ if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
+ page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 0);
+ pgste_set_unlock(ptep, pgste);
+ preempt_enable();
+}
+
+/*
+ * Test and reset if a guest page is dirty
+ */
+bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep)
+{
+ pgste_t pgste;
+ pte_t pte;
+ bool dirty;
+ int nodat;
+
+ pgste = pgste_get_lock(ptep);
+ dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
+ pgste_val(pgste) &= ~PGSTE_UC_BIT;
+ pte = *ptep;
+ if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
+ pgste = pgste_pte_notify(mm, addr, ptep, pgste);
+ nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
+ ptep_ipte_global(mm, addr, ptep, nodat);
+ if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
+ pte_val(pte) |= _PAGE_PROTECT;
+ else
+ pte_val(pte) |= _PAGE_INVALID;
+ *ptep = pte;
+ }
+ pgste_set_unlock(ptep, pgste);
+ return dirty;
+}
+EXPORT_SYMBOL_GPL(ptep_test_and_clear_uc);
+
+int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
+ unsigned char key, bool nq)
+{
+ unsigned long keyul, paddr;
+ spinlock_t *ptl;
+ pgste_t old, new;
+ pmd_t *pmdp;
+ pte_t *ptep;
+
+ pmdp = pmd_alloc_map(mm, addr);
+ if (unlikely(!pmdp))
+ return -EFAULT;
+
+ ptl = pmd_lock(mm, pmdp);
+ if (!pmd_present(*pmdp)) {
+ spin_unlock(ptl);
+ return -EFAULT;
+ }
+
+ if (pmd_large(*pmdp)) {
+ paddr = pmd_val(*pmdp) & HPAGE_MASK;
+ paddr |= addr & ~HPAGE_MASK;
+ /*
+ * Huge pmds need quiescing operations, they are
+ * always mapped.
+ */
+ page_set_storage_key(paddr, key, 1);
+ spin_unlock(ptl);
+ return 0;
+ }
+ spin_unlock(ptl);
+
+ ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl);
+ if (unlikely(!ptep))
+ return -EFAULT;
+
+ new = old = pgste_get_lock(ptep);
+ pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT |
+ PGSTE_ACC_BITS | PGSTE_FP_BIT);
+ keyul = (unsigned long) key;
+ pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
+ pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
+ if (!(pte_val(*ptep) & _PAGE_INVALID)) {
+ unsigned long bits, skey;
+
+ paddr = pte_val(*ptep) & PAGE_MASK;
+ skey = (unsigned long) page_get_storage_key(paddr);
+ bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
+ skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT);
+ /* Set storage key ACC and FP */
+ page_set_storage_key(paddr, skey, !nq);
+ /* Merge host changed & referenced into pgste */
+ pgste_val(new) |= bits << 52;
+ }
+ /* changing the guest storage key is considered a change of the page */
+ if ((pgste_val(new) ^ pgste_val(old)) &
+ (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))
+ pgste_val(new) |= PGSTE_UC_BIT;
+
+ pgste_set_unlock(ptep, new);
+ pte_unmap_unlock(ptep, ptl);
+ return 0;
+}
+EXPORT_SYMBOL(set_guest_storage_key);
+
+/**
+ * Conditionally set a guest storage key (handling csske).
+ * oldkey will be updated when either mr or mc is set and a pointer is given.
+ *
+ * Returns 0 if a guests storage key update wasn't necessary, 1 if the guest
+ * storage key was updated and -EFAULT on access errors.
+ */
+int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
+ unsigned char key, unsigned char *oldkey,
+ bool nq, bool mr, bool mc)
+{
+ unsigned char tmp, mask = _PAGE_ACC_BITS | _PAGE_FP_BIT;
+ int rc;
+
+ /* we can drop the pgste lock between getting and setting the key */
+ if (mr | mc) {
+ rc = get_guest_storage_key(current->mm, addr, &tmp);
+ if (rc)
+ return rc;
+ if (oldkey)
+ *oldkey = tmp;
+ if (!mr)
+ mask |= _PAGE_REFERENCED;
+ if (!mc)
+ mask |= _PAGE_CHANGED;
+ if (!((tmp ^ key) & mask))
+ return 0;
+ }
+ rc = set_guest_storage_key(current->mm, addr, key, nq);
+ return rc < 0 ? rc : 1;
+}
+EXPORT_SYMBOL(cond_set_guest_storage_key);
+
+/**
+ * Reset a guest reference bit (rrbe), returning the reference and changed bit.
+ *
+ * Returns < 0 in case of error, otherwise the cc to be reported to the guest.
+ */
+int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)
+{
+ spinlock_t *ptl;
+ unsigned long paddr;
+ pgste_t old, new;
+ pmd_t *pmdp;
+ pte_t *ptep;
+ int cc = 0;
+
+ pmdp = pmd_alloc_map(mm, addr);
+ if (unlikely(!pmdp))
+ return -EFAULT;
+
+ ptl = pmd_lock(mm, pmdp);
+ if (!pmd_present(*pmdp)) {
+ spin_unlock(ptl);
+ return -EFAULT;
+ }
+
+ if (pmd_large(*pmdp)) {
+ paddr = pmd_val(*pmdp) & HPAGE_MASK;
+ paddr |= addr & ~HPAGE_MASK;
+ cc = page_reset_referenced(paddr);
+ spin_unlock(ptl);
+ return cc;
+ }
+ spin_unlock(ptl);
+
+ ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl);
+ if (unlikely(!ptep))
+ return -EFAULT;
+
+ new = old = pgste_get_lock(ptep);
+ /* Reset guest reference bit only */
+ pgste_val(new) &= ~PGSTE_GR_BIT;
+
+ if (!(pte_val(*ptep) & _PAGE_INVALID)) {
+ paddr = pte_val(*ptep) & PAGE_MASK;
+ cc = page_reset_referenced(paddr);
+ /* Merge real referenced bit into host-set */
+ pgste_val(new) |= ((unsigned long) cc << 53) & PGSTE_HR_BIT;
+ }
+ /* Reflect guest's logical view, not physical */
+ cc |= (pgste_val(old) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 49;
+ /* Changing the guest storage key is considered a change of the page */
+ if ((pgste_val(new) ^ pgste_val(old)) & PGSTE_GR_BIT)
+ pgste_val(new) |= PGSTE_UC_BIT;
+
+ pgste_set_unlock(ptep, new);
+ pte_unmap_unlock(ptep, ptl);
+ return cc;
+}
+EXPORT_SYMBOL(reset_guest_reference_bit);
+
+int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
+ unsigned char *key)
+{
+ unsigned long paddr;
+ spinlock_t *ptl;
+ pgste_t pgste;
+ pmd_t *pmdp;
+ pte_t *ptep;
+
+ pmdp = pmd_alloc_map(mm, addr);
+ if (unlikely(!pmdp))
+ return -EFAULT;
+
+ ptl = pmd_lock(mm, pmdp);
+ if (!pmd_present(*pmdp)) {
+ /* Not yet mapped memory has a zero key */
+ spin_unlock(ptl);
+ *key = 0;
+ return 0;
+ }
+
+ if (pmd_large(*pmdp)) {
+ paddr = pmd_val(*pmdp) & HPAGE_MASK;
+ paddr |= addr & ~HPAGE_MASK;
+ *key = page_get_storage_key(paddr);
+ spin_unlock(ptl);
+ return 0;
+ }
+ spin_unlock(ptl);
+
+ ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl);
+ if (unlikely(!ptep))
+ return -EFAULT;
+
+ pgste = pgste_get_lock(ptep);
+ *key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
+ paddr = pte_val(*ptep) & PAGE_MASK;
+ if (!(pte_val(*ptep) & _PAGE_INVALID))
+ *key = page_get_storage_key(paddr);
+ /* Reflect guest's logical view, not physical */
+ *key |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
+ pgste_set_unlock(ptep, pgste);
+ pte_unmap_unlock(ptep, ptl);
+ return 0;
+}
+EXPORT_SYMBOL(get_guest_storage_key);
+
+/**
+ * pgste_perform_essa - perform ESSA actions on the PGSTE.
+ * @mm: the memory context. It must have PGSTEs, no check is performed here!
+ * @hva: the host virtual address of the page whose PGSTE is to be processed
+ * @orc: the specific action to perform, see the ESSA_SET_* macros.
+ * @oldpte: the PTE will be saved there if the pointer is not NULL.
+ * @oldpgste: the old PGSTE will be saved there if the pointer is not NULL.
+ *
+ * Return: 1 if the page is to be added to the CBRL, otherwise 0,
+ * or < 0 in case of error. -EINVAL is returned for invalid values
+ * of orc, -EFAULT for invalid addresses.
+ */
+int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
+ unsigned long *oldpte, unsigned long *oldpgste)
+{
+ struct vm_area_struct *vma;
+ unsigned long pgstev;
+ spinlock_t *ptl;
+ pgste_t pgste;
+ pte_t *ptep;
+ int res = 0;
+
+ WARN_ON_ONCE(orc > ESSA_MAX);
+ if (unlikely(orc > ESSA_MAX))
+ return -EINVAL;
+
+ vma = find_vma(mm, hva);
+ if (!vma || hva < vma->vm_start || is_vm_hugetlb_page(vma))
+ return -EFAULT;
+ ptep = get_locked_pte(mm, hva, &ptl);
+ if (unlikely(!ptep))
+ return -EFAULT;
+ pgste = pgste_get_lock(ptep);
+ pgstev = pgste_val(pgste);
+ if (oldpte)
+ *oldpte = pte_val(*ptep);
+ if (oldpgste)
+ *oldpgste = pgstev;
+
+ switch (orc) {
+ case ESSA_GET_STATE:
+ break;
+ case ESSA_SET_STABLE:
+ pgstev &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT);
+ pgstev |= _PGSTE_GPS_USAGE_STABLE;
+ break;
+ case ESSA_SET_UNUSED:
+ pgstev &= ~_PGSTE_GPS_USAGE_MASK;
+ pgstev |= _PGSTE_GPS_USAGE_UNUSED;
+ if (pte_val(*ptep) & _PAGE_INVALID)
+ res = 1;
+ break;
+ case ESSA_SET_VOLATILE:
+ pgstev &= ~_PGSTE_GPS_USAGE_MASK;
+ pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
+ if (pte_val(*ptep) & _PAGE_INVALID)
+ res = 1;
+ break;
+ case ESSA_SET_POT_VOLATILE:
+ pgstev &= ~_PGSTE_GPS_USAGE_MASK;
+ if (!(pte_val(*ptep) & _PAGE_INVALID)) {
+ pgstev |= _PGSTE_GPS_USAGE_POT_VOLATILE;
+ break;
+ }
+ if (pgstev & _PGSTE_GPS_ZERO) {
+ pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
+ break;
+ }
+ if (!(pgstev & PGSTE_GC_BIT)) {
+ pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
+ res = 1;
+ break;
+ }
+ break;
+ case ESSA_SET_STABLE_RESIDENT:
+ pgstev &= ~_PGSTE_GPS_USAGE_MASK;
+ pgstev |= _PGSTE_GPS_USAGE_STABLE;
+ /*
+ * Since the resident state can go away any time after this
+ * call, we will not make this page resident. We can revisit
+ * this decision if a guest will ever start using this.
+ */
+ break;
+ case ESSA_SET_STABLE_IF_RESIDENT:
+ if (!(pte_val(*ptep) & _PAGE_INVALID)) {
+ pgstev &= ~_PGSTE_GPS_USAGE_MASK;
+ pgstev |= _PGSTE_GPS_USAGE_STABLE;
+ }
+ break;
+ case ESSA_SET_STABLE_NODAT:
+ pgstev &= ~_PGSTE_GPS_USAGE_MASK;
+ pgstev |= _PGSTE_GPS_USAGE_STABLE | _PGSTE_GPS_NODAT;
+ break;
+ default:
+ /* we should never get here! */
+ break;
+ }
+ /* If we are discarding a page, set it to logical zero */
+ if (res)
+ pgstev |= _PGSTE_GPS_ZERO;
+
+ pgste_val(pgste) = pgstev;
+ pgste_set_unlock(ptep, pgste);
+ pte_unmap_unlock(ptep, ptl);
+ return res;
+}
+EXPORT_SYMBOL(pgste_perform_essa);
+
+/**
+ * set_pgste_bits - set specific PGSTE bits.
+ * @mm: the memory context. It must have PGSTEs, no check is performed here!
+ * @hva: the host virtual address of the page whose PGSTE is to be processed
+ * @bits: a bitmask representing the bits that will be touched
+ * @value: the values of the bits to be written. Only the bits in the mask
+ * will be written.
+ *
+ * Return: 0 on success, < 0 in case of error.
+ */
+int set_pgste_bits(struct mm_struct *mm, unsigned long hva,
+ unsigned long bits, unsigned long value)
+{
+ struct vm_area_struct *vma;
+ spinlock_t *ptl;
+ pgste_t new;
+ pte_t *ptep;
+
+ vma = find_vma(mm, hva);
+ if (!vma || hva < vma->vm_start || is_vm_hugetlb_page(vma))
+ return -EFAULT;
+ ptep = get_locked_pte(mm, hva, &ptl);
+ if (unlikely(!ptep))
+ return -EFAULT;
+ new = pgste_get_lock(ptep);
+
+ pgste_val(new) &= ~bits;
+ pgste_val(new) |= value & bits;
+
+ pgste_set_unlock(ptep, new);
+ pte_unmap_unlock(ptep, ptl);
+ return 0;
+}
+EXPORT_SYMBOL(set_pgste_bits);
+
+/**
+ * get_pgste - get the current PGSTE for the given address.
+ * @mm: the memory context. It must have PGSTEs, no check is performed here!
+ * @hva: the host virtual address of the page whose PGSTE is to be processed
+ * @pgstep: will be written with the current PGSTE for the given address.
+ *
+ * Return: 0 on success, < 0 in case of error.
+ */
+int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep)
+{
+ struct vm_area_struct *vma;
+ spinlock_t *ptl;
+ pte_t *ptep;
+
+ vma = find_vma(mm, hva);
+ if (!vma || hva < vma->vm_start || is_vm_hugetlb_page(vma))
+ return -EFAULT;
+ ptep = get_locked_pte(mm, hva, &ptl);
+ if (unlikely(!ptep))
+ return -EFAULT;
+ *pgstep = pgste_val(pgste_get(ptep));
+ pte_unmap_unlock(ptep, ptl);
+ return 0;
+}
+EXPORT_SYMBOL(get_pgste);
+#endif
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
new file mode 100644
index 000000000..db55561c5
--- /dev/null
+++ b/arch/s390/mm/vmem.c
@@ -0,0 +1,443 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2006
+ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#include <linux/bootmem.h>
+#include <linux/pfn.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/hugetlb.h>
+#include <linux/slab.h>
+#include <linux/memblock.h>
+#include <asm/cacheflush.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/setup.h>
+#include <asm/tlbflush.h>
+#include <asm/sections.h>
+#include <asm/set_memory.h>
+
+static DEFINE_MUTEX(vmem_mutex);
+
+struct memory_segment {
+ struct list_head list;
+ unsigned long start;
+ unsigned long size;
+};
+
+static LIST_HEAD(mem_segs);
+
+static void __ref *vmem_alloc_pages(unsigned int order)
+{
+ unsigned long size = PAGE_SIZE << order;
+
+ if (slab_is_available())
+ return (void *)__get_free_pages(GFP_KERNEL, order);
+ return (void *) memblock_alloc(size, size);
+}
+
+void *vmem_crst_alloc(unsigned long val)
+{
+ unsigned long *table;
+
+ table = vmem_alloc_pages(CRST_ALLOC_ORDER);
+ if (table)
+ crst_table_init(table, val);
+ return table;
+}
+
+pte_t __ref *vmem_pte_alloc(void)
+{
+ unsigned long size = PTRS_PER_PTE * sizeof(pte_t);
+ pte_t *pte;
+
+ if (slab_is_available())
+ pte = (pte_t *) page_table_alloc(&init_mm);
+ else
+ pte = (pte_t *) memblock_alloc(size, size);
+ if (!pte)
+ return NULL;
+ memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
+ return pte;
+}
+
+/*
+ * Add a physical memory range to the 1:1 mapping.
+ */
+static int vmem_add_mem(unsigned long start, unsigned long size)
+{
+ unsigned long pgt_prot, sgt_prot, r3_prot;
+ unsigned long pages4k, pages1m, pages2g;
+ unsigned long end = start + size;
+ unsigned long address = start;
+ pgd_t *pg_dir;
+ p4d_t *p4_dir;
+ pud_t *pu_dir;
+ pmd_t *pm_dir;
+ pte_t *pt_dir;
+ int ret = -ENOMEM;
+
+ pgt_prot = pgprot_val(PAGE_KERNEL);
+ sgt_prot = pgprot_val(SEGMENT_KERNEL);
+ r3_prot = pgprot_val(REGION3_KERNEL);
+ if (!MACHINE_HAS_NX) {
+ pgt_prot &= ~_PAGE_NOEXEC;
+ sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC;
+ r3_prot &= ~_REGION_ENTRY_NOEXEC;
+ }
+ pages4k = pages1m = pages2g = 0;
+ while (address < end) {
+ pg_dir = pgd_offset_k(address);
+ if (pgd_none(*pg_dir)) {
+ p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
+ if (!p4_dir)
+ goto out;
+ pgd_populate(&init_mm, pg_dir, p4_dir);
+ }
+ p4_dir = p4d_offset(pg_dir, address);
+ if (p4d_none(*p4_dir)) {
+ pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
+ if (!pu_dir)
+ goto out;
+ p4d_populate(&init_mm, p4_dir, pu_dir);
+ }
+ pu_dir = pud_offset(p4_dir, address);
+ if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address &&
+ !(address & ~PUD_MASK) && (address + PUD_SIZE <= end) &&
+ !debug_pagealloc_enabled()) {
+ pud_val(*pu_dir) = address | r3_prot;
+ address += PUD_SIZE;
+ pages2g++;
+ continue;
+ }
+ if (pud_none(*pu_dir)) {
+ pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
+ if (!pm_dir)
+ goto out;
+ pud_populate(&init_mm, pu_dir, pm_dir);
+ }
+ pm_dir = pmd_offset(pu_dir, address);
+ if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address &&
+ !(address & ~PMD_MASK) && (address + PMD_SIZE <= end) &&
+ !debug_pagealloc_enabled()) {
+ pmd_val(*pm_dir) = address | sgt_prot;
+ address += PMD_SIZE;
+ pages1m++;
+ continue;
+ }
+ if (pmd_none(*pm_dir)) {
+ pt_dir = vmem_pte_alloc();
+ if (!pt_dir)
+ goto out;
+ pmd_populate(&init_mm, pm_dir, pt_dir);
+ }
+
+ pt_dir = pte_offset_kernel(pm_dir, address);
+ pte_val(*pt_dir) = address | pgt_prot;
+ address += PAGE_SIZE;
+ pages4k++;
+ }
+ ret = 0;
+out:
+ update_page_count(PG_DIRECT_MAP_4K, pages4k);
+ update_page_count(PG_DIRECT_MAP_1M, pages1m);
+ update_page_count(PG_DIRECT_MAP_2G, pages2g);
+ return ret;
+}
+
+/*
+ * Remove a physical memory range from the 1:1 mapping.
+ * Currently only invalidates page table entries.
+ */
+static void vmem_remove_range(unsigned long start, unsigned long size)
+{
+ unsigned long pages4k, pages1m, pages2g;
+ unsigned long end = start + size;
+ unsigned long address = start;
+ pgd_t *pg_dir;
+ p4d_t *p4_dir;
+ pud_t *pu_dir;
+ pmd_t *pm_dir;
+ pte_t *pt_dir;
+
+ pages4k = pages1m = pages2g = 0;
+ while (address < end) {
+ pg_dir = pgd_offset_k(address);
+ if (pgd_none(*pg_dir)) {
+ address += PGDIR_SIZE;
+ continue;
+ }
+ p4_dir = p4d_offset(pg_dir, address);
+ if (p4d_none(*p4_dir)) {
+ address += P4D_SIZE;
+ continue;
+ }
+ pu_dir = pud_offset(p4_dir, address);
+ if (pud_none(*pu_dir)) {
+ address += PUD_SIZE;
+ continue;
+ }
+ if (pud_large(*pu_dir)) {
+ pud_clear(pu_dir);
+ address += PUD_SIZE;
+ pages2g++;
+ continue;
+ }
+ pm_dir = pmd_offset(pu_dir, address);
+ if (pmd_none(*pm_dir)) {
+ address += PMD_SIZE;
+ continue;
+ }
+ if (pmd_large(*pm_dir)) {
+ pmd_clear(pm_dir);
+ address += PMD_SIZE;
+ pages1m++;
+ continue;
+ }
+ pt_dir = pte_offset_kernel(pm_dir, address);
+ pte_clear(&init_mm, address, pt_dir);
+ address += PAGE_SIZE;
+ pages4k++;
+ }
+ flush_tlb_kernel_range(start, end);
+ update_page_count(PG_DIRECT_MAP_4K, -pages4k);
+ update_page_count(PG_DIRECT_MAP_1M, -pages1m);
+ update_page_count(PG_DIRECT_MAP_2G, -pages2g);
+}
+
+/*
+ * Add a backed mem_map array to the virtual mem_map array.
+ */
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
+ struct vmem_altmap *altmap)
+{
+ unsigned long pgt_prot, sgt_prot;
+ unsigned long address = start;
+ pgd_t *pg_dir;
+ p4d_t *p4_dir;
+ pud_t *pu_dir;
+ pmd_t *pm_dir;
+ pte_t *pt_dir;
+ int ret = -ENOMEM;
+
+ pgt_prot = pgprot_val(PAGE_KERNEL);
+ sgt_prot = pgprot_val(SEGMENT_KERNEL);
+ if (!MACHINE_HAS_NX) {
+ pgt_prot &= ~_PAGE_NOEXEC;
+ sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC;
+ }
+ for (address = start; address < end;) {
+ pg_dir = pgd_offset_k(address);
+ if (pgd_none(*pg_dir)) {
+ p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
+ if (!p4_dir)
+ goto out;
+ pgd_populate(&init_mm, pg_dir, p4_dir);
+ }
+
+ p4_dir = p4d_offset(pg_dir, address);
+ if (p4d_none(*p4_dir)) {
+ pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
+ if (!pu_dir)
+ goto out;
+ p4d_populate(&init_mm, p4_dir, pu_dir);
+ }
+
+ pu_dir = pud_offset(p4_dir, address);
+ if (pud_none(*pu_dir)) {
+ pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
+ if (!pm_dir)
+ goto out;
+ pud_populate(&init_mm, pu_dir, pm_dir);
+ }
+
+ pm_dir = pmd_offset(pu_dir, address);
+ if (pmd_none(*pm_dir)) {
+ /* Use 1MB frames for vmemmap if available. We always
+ * use large frames even if they are only partially
+ * used.
+ * Otherwise we would have also page tables since
+ * vmemmap_populate gets called for each section
+ * separately. */
+ if (MACHINE_HAS_EDAT1) {
+ void *new_page;
+
+ new_page = vmemmap_alloc_block(PMD_SIZE, node);
+ if (!new_page)
+ goto out;
+ pmd_val(*pm_dir) = __pa(new_page) | sgt_prot;
+ address = (address + PMD_SIZE) & PMD_MASK;
+ continue;
+ }
+ pt_dir = vmem_pte_alloc();
+ if (!pt_dir)
+ goto out;
+ pmd_populate(&init_mm, pm_dir, pt_dir);
+ } else if (pmd_large(*pm_dir)) {
+ address = (address + PMD_SIZE) & PMD_MASK;
+ continue;
+ }
+
+ pt_dir = pte_offset_kernel(pm_dir, address);
+ if (pte_none(*pt_dir)) {
+ void *new_page;
+
+ new_page = vmemmap_alloc_block(PAGE_SIZE, node);
+ if (!new_page)
+ goto out;
+ pte_val(*pt_dir) = __pa(new_page) | pgt_prot;
+ }
+ address += PAGE_SIZE;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+void vmemmap_free(unsigned long start, unsigned long end,
+ struct vmem_altmap *altmap)
+{
+}
+
+/*
+ * Add memory segment to the segment list if it doesn't overlap with
+ * an already present segment.
+ */
+static int insert_memory_segment(struct memory_segment *seg)
+{
+ struct memory_segment *tmp;
+
+ if (seg->start + seg->size > VMEM_MAX_PHYS ||
+ seg->start + seg->size < seg->start)
+ return -ERANGE;
+
+ list_for_each_entry(tmp, &mem_segs, list) {
+ if (seg->start >= tmp->start + tmp->size)
+ continue;
+ if (seg->start + seg->size <= tmp->start)
+ continue;
+ return -ENOSPC;
+ }
+ list_add(&seg->list, &mem_segs);
+ return 0;
+}
+
+/*
+ * Remove memory segment from the segment list.
+ */
+static void remove_memory_segment(struct memory_segment *seg)
+{
+ list_del(&seg->list);
+}
+
+static void __remove_shared_memory(struct memory_segment *seg)
+{
+ remove_memory_segment(seg);
+ vmem_remove_range(seg->start, seg->size);
+}
+
+int vmem_remove_mapping(unsigned long start, unsigned long size)
+{
+ struct memory_segment *seg;
+ int ret;
+
+ mutex_lock(&vmem_mutex);
+
+ ret = -ENOENT;
+ list_for_each_entry(seg, &mem_segs, list) {
+ if (seg->start == start && seg->size == size)
+ break;
+ }
+
+ if (seg->start != start || seg->size != size)
+ goto out;
+
+ ret = 0;
+ __remove_shared_memory(seg);
+ kfree(seg);
+out:
+ mutex_unlock(&vmem_mutex);
+ return ret;
+}
+
+int vmem_add_mapping(unsigned long start, unsigned long size)
+{
+ struct memory_segment *seg;
+ int ret;
+
+ mutex_lock(&vmem_mutex);
+ ret = -ENOMEM;
+ seg = kzalloc(sizeof(*seg), GFP_KERNEL);
+ if (!seg)
+ goto out;
+ seg->start = start;
+ seg->size = size;
+
+ ret = insert_memory_segment(seg);
+ if (ret)
+ goto out_free;
+
+ ret = vmem_add_mem(start, size);
+ if (ret)
+ goto out_remove;
+ goto out;
+
+out_remove:
+ __remove_shared_memory(seg);
+out_free:
+ kfree(seg);
+out:
+ mutex_unlock(&vmem_mutex);
+ return ret;
+}
+
+/*
+ * map whole physical memory to virtual memory (identity mapping)
+ * we reserve enough space in the vmalloc area for vmemmap to hotplug
+ * additional memory segments.
+ */
+void __init vmem_map_init(void)
+{
+ struct memblock_region *reg;
+
+ for_each_memblock(memory, reg)
+ vmem_add_mem(reg->base, reg->size);
+ __set_memory((unsigned long)_stext,
+ (unsigned long)(_etext - _stext) >> PAGE_SHIFT,
+ SET_MEMORY_RO | SET_MEMORY_X);
+ __set_memory((unsigned long)_etext,
+ (unsigned long)(__end_rodata - _etext) >> PAGE_SHIFT,
+ SET_MEMORY_RO);
+ __set_memory((unsigned long)_sinittext,
+ (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT,
+ SET_MEMORY_RO | SET_MEMORY_X);
+ pr_info("Write protected kernel read-only data: %luk\n",
+ (unsigned long)(__end_rodata - _stext) >> 10);
+}
+
+/*
+ * Convert memblock.memory to a memory segment list so there is a single
+ * list that contains all memory segments.
+ */
+static int __init vmem_convert_memory_chunk(void)
+{
+ struct memblock_region *reg;
+ struct memory_segment *seg;
+
+ mutex_lock(&vmem_mutex);
+ for_each_memblock(memory, reg) {
+ seg = kzalloc(sizeof(*seg), GFP_KERNEL);
+ if (!seg)
+ panic("Out of memory...\n");
+ seg->start = reg->base;
+ seg->size = reg->size;
+ insert_memory_segment(seg);
+ }
+ mutex_unlock(&vmem_mutex);
+ return 0;
+}
+
+core_initcall(vmem_convert_memory_chunk);
diff --git a/arch/s390/net/Makefile b/arch/s390/net/Makefile
new file mode 100644
index 000000000..8cab6deb0
--- /dev/null
+++ b/arch/s390/net/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Arch-specific network modules
+#
+obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o
+obj-$(CONFIG_HAVE_PNETID) += pnet.o
diff --git a/arch/s390/net/bpf_jit.h b/arch/s390/net/bpf_jit.h
new file mode 100644
index 000000000..7822ea92e
--- /dev/null
+++ b/arch/s390/net/bpf_jit.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * BPF Jit compiler defines
+ *
+ * Copyright IBM Corp. 2012,2015
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ * Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#ifndef __ARCH_S390_NET_BPF_JIT_H
+#define __ARCH_S390_NET_BPF_JIT_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/filter.h>
+#include <linux/types.h>
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * Stackframe layout (packed stack):
+ *
+ * ^ high
+ * +---------------+ |
+ * | old backchain | |
+ * +---------------+ |
+ * | r15 - r6 | |
+ * +---------------+ |
+ * | 4 byte align | |
+ * | tail_call_cnt | |
+ * BFP -> +===============+ |
+ * | | |
+ * | BPF stack | |
+ * | | |
+ * R15+160 -> +---------------+ |
+ * | new backchain | |
+ * R15+152 -> +---------------+ |
+ * | + 152 byte SA | |
+ * R15 -> +---------------+ + low
+ *
+ * We get 160 bytes stack space from calling function, but only use
+ * 12 * 8 byte for old backchain, r15..r6, and tail_call_cnt.
+ *
+ * The stack size used by the BPF program ("BPF stack" above) is passed
+ * via "aux->stack_depth".
+ */
+#define STK_SPACE_ADD (160)
+#define STK_160_UNUSED (160 - 12 * 8)
+#define STK_OFF (STK_SPACE_ADD - STK_160_UNUSED)
+
+#define STK_OFF_R6 (160 - 11 * 8) /* Offset of r6 on stack */
+#define STK_OFF_TCCNT (160 - 12 * 8) /* Offset of tail_call_cnt on stack */
+
+#endif /* __ARCH_S390_NET_BPF_JIT_H */
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
new file mode 100644
index 000000000..bdc33d0e3
--- /dev/null
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -0,0 +1,1320 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * BPF Jit compiler for s390.
+ *
+ * Minimum build requirements:
+ *
+ * - HAVE_MARCH_Z196_FEATURES: laal, laalg
+ * - HAVE_MARCH_Z10_FEATURES: msfi, cgrj, clgrj
+ * - HAVE_MARCH_Z9_109_FEATURES: alfi, llilf, clfi, oilf, nilf
+ * - PACK_STACK
+ * - 64BIT
+ *
+ * Copyright IBM Corp. 2012,2015
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ * Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#define KMSG_COMPONENT "bpf_jit"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/netdevice.h>
+#include <linux/filter.h>
+#include <linux/init.h>
+#include <linux/bpf.h>
+#include <asm/cacheflush.h>
+#include <asm/dis.h>
+#include <asm/facility.h>
+#include <asm/nospec-branch.h>
+#include <asm/set_memory.h>
+#include "bpf_jit.h"
+
+struct bpf_jit {
+ u32 seen; /* Flags to remember seen eBPF instructions */
+ u32 seen_reg[16]; /* Array to remember which registers are used */
+ u32 *addrs; /* Array with relative instruction addresses */
+ u8 *prg_buf; /* Start of program */
+ int size; /* Size of program and literal pool */
+ int size_prg; /* Size of program */
+ int prg; /* Current position in program */
+ int lit_start; /* Start of literal pool */
+ int lit; /* Current position in literal pool */
+ int base_ip; /* Base address for literal pool */
+ int ret0_ip; /* Address of return 0 */
+ int exit_ip; /* Address of exit */
+ int r1_thunk_ip; /* Address of expoline thunk for 'br %r1' */
+ int r14_thunk_ip; /* Address of expoline thunk for 'br %r14' */
+ int tail_call_start; /* Tail call start offset */
+ int labels[1]; /* Labels for local jumps */
+};
+
+#define BPF_SIZE_MAX 0xffff /* Max size for program (16 bit branches) */
+
+#define SEEN_MEM (1 << 0) /* use mem[] for temporary storage */
+#define SEEN_RET0 (1 << 1) /* ret0_ip points to a valid return 0 */
+#define SEEN_LITERAL (1 << 2) /* code uses literals */
+#define SEEN_FUNC (1 << 3) /* calls C functions */
+#define SEEN_TAIL_CALL (1 << 4) /* code uses tail calls */
+#define SEEN_REG_AX (1 << 5) /* code uses constant blinding */
+#define SEEN_STACK (SEEN_FUNC | SEEN_MEM)
+
+/*
+ * s390 registers
+ */
+#define REG_W0 (MAX_BPF_JIT_REG + 0) /* Work register 1 (even) */
+#define REG_W1 (MAX_BPF_JIT_REG + 1) /* Work register 2 (odd) */
+#define REG_L (MAX_BPF_JIT_REG + 2) /* Literal pool register */
+#define REG_15 (MAX_BPF_JIT_REG + 3) /* Register 15 */
+#define REG_0 REG_W0 /* Register 0 */
+#define REG_1 REG_W1 /* Register 1 */
+#define REG_2 BPF_REG_1 /* Register 2 */
+#define REG_14 BPF_REG_0 /* Register 14 */
+
+/*
+ * Mapping of BPF registers to s390 registers
+ */
+static const int reg2hex[] = {
+ /* Return code */
+ [BPF_REG_0] = 14,
+ /* Function parameters */
+ [BPF_REG_1] = 2,
+ [BPF_REG_2] = 3,
+ [BPF_REG_3] = 4,
+ [BPF_REG_4] = 5,
+ [BPF_REG_5] = 6,
+ /* Call saved registers */
+ [BPF_REG_6] = 7,
+ [BPF_REG_7] = 8,
+ [BPF_REG_8] = 9,
+ [BPF_REG_9] = 10,
+ /* BPF stack pointer */
+ [BPF_REG_FP] = 13,
+ /* Register for blinding */
+ [BPF_REG_AX] = 12,
+ /* Work registers for s390x backend */
+ [REG_W0] = 0,
+ [REG_W1] = 1,
+ [REG_L] = 11,
+ [REG_15] = 15,
+};
+
+static inline u32 reg(u32 dst_reg, u32 src_reg)
+{
+ return reg2hex[dst_reg] << 4 | reg2hex[src_reg];
+}
+
+static inline u32 reg_high(u32 reg)
+{
+ return reg2hex[reg] << 4;
+}
+
+static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
+{
+ u32 r1 = reg2hex[b1];
+
+ if (r1 >= 6 && r1 <= 15 && !jit->seen_reg[r1])
+ jit->seen_reg[r1] = 1;
+}
+
+#define REG_SET_SEEN(b1) \
+({ \
+ reg_set_seen(jit, b1); \
+})
+
+#define REG_SEEN(b1) jit->seen_reg[reg2hex[(b1)]]
+
+/*
+ * EMIT macros for code generation
+ */
+
+#define _EMIT2(op) \
+({ \
+ if (jit->prg_buf) \
+ *(u16 *) (jit->prg_buf + jit->prg) = op; \
+ jit->prg += 2; \
+})
+
+#define EMIT2(op, b1, b2) \
+({ \
+ _EMIT2(op | reg(b1, b2)); \
+ REG_SET_SEEN(b1); \
+ REG_SET_SEEN(b2); \
+})
+
+#define _EMIT4(op) \
+({ \
+ if (jit->prg_buf) \
+ *(u32 *) (jit->prg_buf + jit->prg) = op; \
+ jit->prg += 4; \
+})
+
+#define EMIT4(op, b1, b2) \
+({ \
+ _EMIT4(op | reg(b1, b2)); \
+ REG_SET_SEEN(b1); \
+ REG_SET_SEEN(b2); \
+})
+
+#define EMIT4_RRF(op, b1, b2, b3) \
+({ \
+ _EMIT4(op | reg_high(b3) << 8 | reg(b1, b2)); \
+ REG_SET_SEEN(b1); \
+ REG_SET_SEEN(b2); \
+ REG_SET_SEEN(b3); \
+})
+
+#define _EMIT4_DISP(op, disp) \
+({ \
+ unsigned int __disp = (disp) & 0xfff; \
+ _EMIT4(op | __disp); \
+})
+
+#define EMIT4_DISP(op, b1, b2, disp) \
+({ \
+ _EMIT4_DISP(op | reg_high(b1) << 16 | \
+ reg_high(b2) << 8, disp); \
+ REG_SET_SEEN(b1); \
+ REG_SET_SEEN(b2); \
+})
+
+#define EMIT4_IMM(op, b1, imm) \
+({ \
+ unsigned int __imm = (imm) & 0xffff; \
+ _EMIT4(op | reg_high(b1) << 16 | __imm); \
+ REG_SET_SEEN(b1); \
+})
+
+#define EMIT4_PCREL(op, pcrel) \
+({ \
+ long __pcrel = ((pcrel) >> 1) & 0xffff; \
+ _EMIT4(op | __pcrel); \
+})
+
+#define _EMIT6(op1, op2) \
+({ \
+ if (jit->prg_buf) { \
+ *(u32 *) (jit->prg_buf + jit->prg) = op1; \
+ *(u16 *) (jit->prg_buf + jit->prg + 4) = op2; \
+ } \
+ jit->prg += 6; \
+})
+
+#define _EMIT6_DISP(op1, op2, disp) \
+({ \
+ unsigned int __disp = (disp) & 0xfff; \
+ _EMIT6(op1 | __disp, op2); \
+})
+
+#define _EMIT6_DISP_LH(op1, op2, disp) \
+({ \
+ u32 _disp = (u32) disp; \
+ unsigned int __disp_h = _disp & 0xff000; \
+ unsigned int __disp_l = _disp & 0x00fff; \
+ _EMIT6(op1 | __disp_l, op2 | __disp_h >> 4); \
+})
+
+#define EMIT6_DISP_LH(op1, op2, b1, b2, b3, disp) \
+({ \
+ _EMIT6_DISP_LH(op1 | reg(b1, b2) << 16 | \
+ reg_high(b3) << 8, op2, disp); \
+ REG_SET_SEEN(b1); \
+ REG_SET_SEEN(b2); \
+ REG_SET_SEEN(b3); \
+})
+
+#define EMIT6_PCREL_LABEL(op1, op2, b1, b2, label, mask) \
+({ \
+ int rel = (jit->labels[label] - jit->prg) >> 1; \
+ _EMIT6(op1 | reg(b1, b2) << 16 | (rel & 0xffff), \
+ op2 | mask << 12); \
+ REG_SET_SEEN(b1); \
+ REG_SET_SEEN(b2); \
+})
+
+#define EMIT6_PCREL_IMM_LABEL(op1, op2, b1, imm, label, mask) \
+({ \
+ int rel = (jit->labels[label] - jit->prg) >> 1; \
+ _EMIT6(op1 | (reg_high(b1) | mask) << 16 | \
+ (rel & 0xffff), op2 | (imm & 0xff) << 8); \
+ REG_SET_SEEN(b1); \
+ BUILD_BUG_ON(((unsigned long) imm) > 0xff); \
+})
+
+#define EMIT6_PCREL(op1, op2, b1, b2, i, off, mask) \
+({ \
+ /* Branch instruction needs 6 bytes */ \
+ int rel = (addrs[i + off + 1] - (addrs[i + 1] - 6)) / 2;\
+ _EMIT6(op1 | reg(b1, b2) << 16 | (rel & 0xffff), op2 | mask); \
+ REG_SET_SEEN(b1); \
+ REG_SET_SEEN(b2); \
+})
+
+#define EMIT6_PCREL_RILB(op, b, target) \
+({ \
+ int rel = (target - jit->prg) / 2; \
+ _EMIT6(op | reg_high(b) << 16 | rel >> 16, rel & 0xffff); \
+ REG_SET_SEEN(b); \
+})
+
+#define EMIT6_PCREL_RIL(op, target) \
+({ \
+ int rel = (target - jit->prg) / 2; \
+ _EMIT6(op | rel >> 16, rel & 0xffff); \
+})
+
+#define _EMIT6_IMM(op, imm) \
+({ \
+ unsigned int __imm = (imm); \
+ _EMIT6(op | (__imm >> 16), __imm & 0xffff); \
+})
+
+#define EMIT6_IMM(op, b1, imm) \
+({ \
+ _EMIT6_IMM(op | reg_high(b1) << 16, imm); \
+ REG_SET_SEEN(b1); \
+})
+
+#define EMIT_CONST_U32(val) \
+({ \
+ unsigned int ret; \
+ ret = jit->lit - jit->base_ip; \
+ jit->seen |= SEEN_LITERAL; \
+ if (jit->prg_buf) \
+ *(u32 *) (jit->prg_buf + jit->lit) = (u32) val; \
+ jit->lit += 4; \
+ ret; \
+})
+
+#define EMIT_CONST_U64(val) \
+({ \
+ unsigned int ret; \
+ ret = jit->lit - jit->base_ip; \
+ jit->seen |= SEEN_LITERAL; \
+ if (jit->prg_buf) \
+ *(u64 *) (jit->prg_buf + jit->lit) = (u64) val; \
+ jit->lit += 8; \
+ ret; \
+})
+
+#define EMIT_ZERO(b1) \
+({ \
+ /* llgfr %dst,%dst (zero extend to 64 bit) */ \
+ EMIT4(0xb9160000, b1, b1); \
+ REG_SET_SEEN(b1); \
+})
+
+/*
+ * Fill whole space with illegal instructions
+ */
+static void jit_fill_hole(void *area, unsigned int size)
+{
+ memset(area, 0, size);
+}
+
+/*
+ * Save registers from "rs" (register start) to "re" (register end) on stack
+ */
+static void save_regs(struct bpf_jit *jit, u32 rs, u32 re)
+{
+ u32 off = STK_OFF_R6 + (rs - 6) * 8;
+
+ if (rs == re)
+ /* stg %rs,off(%r15) */
+ _EMIT6(0xe300f000 | rs << 20 | off, 0x0024);
+ else
+ /* stmg %rs,%re,off(%r15) */
+ _EMIT6_DISP(0xeb00f000 | rs << 20 | re << 16, 0x0024, off);
+}
+
+/*
+ * Restore registers from "rs" (register start) to "re" (register end) on stack
+ */
+static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re, u32 stack_depth)
+{
+ u32 off = STK_OFF_R6 + (rs - 6) * 8;
+
+ if (jit->seen & SEEN_STACK)
+ off += STK_OFF + stack_depth;
+
+ if (rs == re)
+ /* lg %rs,off(%r15) */
+ _EMIT6(0xe300f000 | rs << 20 | off, 0x0004);
+ else
+ /* lmg %rs,%re,off(%r15) */
+ _EMIT6_DISP(0xeb00f000 | rs << 20 | re << 16, 0x0004, off);
+}
+
+/*
+ * Return first seen register (from start)
+ */
+static int get_start(struct bpf_jit *jit, int start)
+{
+ int i;
+
+ for (i = start; i <= 15; i++) {
+ if (jit->seen_reg[i])
+ return i;
+ }
+ return 0;
+}
+
+/*
+ * Return last seen register (from start) (gap >= 2)
+ */
+static int get_end(struct bpf_jit *jit, int start)
+{
+ int i;
+
+ for (i = start; i < 15; i++) {
+ if (!jit->seen_reg[i] && !jit->seen_reg[i + 1])
+ return i - 1;
+ }
+ return jit->seen_reg[15] ? 15 : 14;
+}
+
+#define REGS_SAVE 1
+#define REGS_RESTORE 0
+/*
+ * Save and restore clobbered registers (6-15) on stack.
+ * We save/restore registers in chunks with gap >= 2 registers.
+ */
+static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth)
+{
+
+ int re = 6, rs;
+
+ do {
+ rs = get_start(jit, re);
+ if (!rs)
+ break;
+ re = get_end(jit, rs + 1);
+ if (op == REGS_SAVE)
+ save_regs(jit, rs, re);
+ else
+ restore_regs(jit, rs, re, stack_depth);
+ re++;
+ } while (re <= 15);
+}
+
+/*
+ * Emit function prologue
+ *
+ * Save registers and create stack frame if necessary.
+ * See stack frame layout desription in "bpf_jit.h"!
+ */
+static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
+{
+ if (jit->seen & SEEN_TAIL_CALL) {
+ /* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */
+ _EMIT6(0xd703f000 | STK_OFF_TCCNT, 0xf000 | STK_OFF_TCCNT);
+ } else {
+ /* j tail_call_start: NOP if no tail calls are used */
+ EMIT4_PCREL(0xa7f40000, 6);
+ _EMIT2(0);
+ }
+ /* Tail calls have to skip above initialization */
+ jit->tail_call_start = jit->prg;
+ /* Save registers */
+ save_restore_regs(jit, REGS_SAVE, stack_depth);
+ /* Setup literal pool */
+ if (jit->seen & SEEN_LITERAL) {
+ /* basr %r13,0 */
+ EMIT2(0x0d00, REG_L, REG_0);
+ jit->base_ip = jit->prg;
+ }
+ /* Setup stack and backchain */
+ if (jit->seen & SEEN_STACK) {
+ if (jit->seen & SEEN_FUNC)
+ /* lgr %w1,%r15 (backchain) */
+ EMIT4(0xb9040000, REG_W1, REG_15);
+ /* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */
+ EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED);
+ /* aghi %r15,-STK_OFF */
+ EMIT4_IMM(0xa70b0000, REG_15, -(STK_OFF + stack_depth));
+ if (jit->seen & SEEN_FUNC)
+ /* stg %w1,152(%r15) (backchain) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
+ REG_15, 152);
+ }
+}
+
+/*
+ * Function epilogue
+ */
+static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
+{
+ /* Return 0 */
+ if (jit->seen & SEEN_RET0) {
+ jit->ret0_ip = jit->prg;
+ /* lghi %b0,0 */
+ EMIT4_IMM(0xa7090000, BPF_REG_0, 0);
+ }
+ jit->exit_ip = jit->prg;
+ /* Load exit code: lgr %r2,%b0 */
+ EMIT4(0xb9040000, REG_2, BPF_REG_0);
+ /* Restore registers */
+ save_restore_regs(jit, REGS_RESTORE, stack_depth);
+ if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) {
+ jit->r14_thunk_ip = jit->prg;
+ /* Generate __s390_indirect_jump_r14 thunk */
+ if (test_facility(35)) {
+ /* exrl %r0,.+10 */
+ EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10);
+ } else {
+ /* larl %r1,.+14 */
+ EMIT6_PCREL_RILB(0xc0000000, REG_1, jit->prg + 14);
+ /* ex 0,0(%r1) */
+ EMIT4_DISP(0x44000000, REG_0, REG_1, 0);
+ }
+ /* j . */
+ EMIT4_PCREL(0xa7f40000, 0);
+ }
+ /* br %r14 */
+ _EMIT2(0x07fe);
+
+ if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable &&
+ (jit->seen & SEEN_FUNC)) {
+ jit->r1_thunk_ip = jit->prg;
+ /* Generate __s390_indirect_jump_r1 thunk */
+ if (test_facility(35)) {
+ /* exrl %r0,.+10 */
+ EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10);
+ /* j . */
+ EMIT4_PCREL(0xa7f40000, 0);
+ /* br %r1 */
+ _EMIT2(0x07f1);
+ } else {
+ /* ex 0,S390_lowcore.br_r1_tampoline */
+ EMIT4_DISP(0x44000000, REG_0, REG_0,
+ offsetof(struct lowcore, br_r1_trampoline));
+ /* j . */
+ EMIT4_PCREL(0xa7f40000, 0);
+ }
+ }
+}
+
+/*
+ * Compile one eBPF instruction into s390x code
+ *
+ * NOTE: Use noinline because for gcov (-fprofile-arcs) gcc allocates a lot of
+ * stack space for the large switch statement.
+ */
+static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
+{
+ struct bpf_insn *insn = &fp->insnsi[i];
+ int jmp_off, last, insn_count = 1;
+ u32 dst_reg = insn->dst_reg;
+ u32 src_reg = insn->src_reg;
+ u32 *addrs = jit->addrs;
+ s32 imm = insn->imm;
+ s16 off = insn->off;
+ unsigned int mask;
+
+ if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX)
+ jit->seen |= SEEN_REG_AX;
+ switch (insn->code) {
+ /*
+ * BPF_MOV
+ */
+ case BPF_ALU | BPF_MOV | BPF_X: /* dst = (u32) src */
+ /* llgfr %dst,%src */
+ EMIT4(0xb9160000, dst_reg, src_reg);
+ break;
+ case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
+ /* lgr %dst,%src */
+ EMIT4(0xb9040000, dst_reg, src_reg);
+ break;
+ case BPF_ALU | BPF_MOV | BPF_K: /* dst = (u32) imm */
+ /* llilf %dst,imm */
+ EMIT6_IMM(0xc00f0000, dst_reg, imm);
+ break;
+ case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = imm */
+ /* lgfi %dst,imm */
+ EMIT6_IMM(0xc0010000, dst_reg, imm);
+ break;
+ /*
+ * BPF_LD 64
+ */
+ case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
+ {
+ /* 16 byte instruction that uses two 'struct bpf_insn' */
+ u64 imm64;
+
+ imm64 = (u64)(u32) insn[0].imm | ((u64)(u32) insn[1].imm) << 32;
+ /* lg %dst,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, REG_0, REG_L,
+ EMIT_CONST_U64(imm64));
+ insn_count = 2;
+ break;
+ }
+ /*
+ * BPF_ADD
+ */
+ case BPF_ALU | BPF_ADD | BPF_X: /* dst = (u32) dst + (u32) src */
+ /* ar %dst,%src */
+ EMIT2(0x1a00, dst_reg, src_reg);
+ EMIT_ZERO(dst_reg);
+ break;
+ case BPF_ALU64 | BPF_ADD | BPF_X: /* dst = dst + src */
+ /* agr %dst,%src */
+ EMIT4(0xb9080000, dst_reg, src_reg);
+ break;
+ case BPF_ALU | BPF_ADD | BPF_K: /* dst = (u32) dst + (u32) imm */
+ if (imm != 0) {
+ /* alfi %dst,imm */
+ EMIT6_IMM(0xc20b0000, dst_reg, imm);
+ }
+ EMIT_ZERO(dst_reg);
+ break;
+ case BPF_ALU64 | BPF_ADD | BPF_K: /* dst = dst + imm */
+ if (!imm)
+ break;
+ /* agfi %dst,imm */
+ EMIT6_IMM(0xc2080000, dst_reg, imm);
+ break;
+ /*
+ * BPF_SUB
+ */
+ case BPF_ALU | BPF_SUB | BPF_X: /* dst = (u32) dst - (u32) src */
+ /* sr %dst,%src */
+ EMIT2(0x1b00, dst_reg, src_reg);
+ EMIT_ZERO(dst_reg);
+ break;
+ case BPF_ALU64 | BPF_SUB | BPF_X: /* dst = dst - src */
+ /* sgr %dst,%src */
+ EMIT4(0xb9090000, dst_reg, src_reg);
+ break;
+ case BPF_ALU | BPF_SUB | BPF_K: /* dst = (u32) dst - (u32) imm */
+ if (imm != 0) {
+ /* alfi %dst,-imm */
+ EMIT6_IMM(0xc20b0000, dst_reg, -imm);
+ }
+ EMIT_ZERO(dst_reg);
+ break;
+ case BPF_ALU64 | BPF_SUB | BPF_K: /* dst = dst - imm */
+ if (!imm)
+ break;
+ if (imm == -0x80000000) {
+ /* algfi %dst,0x80000000 */
+ EMIT6_IMM(0xc20a0000, dst_reg, 0x80000000);
+ } else {
+ /* agfi %dst,-imm */
+ EMIT6_IMM(0xc2080000, dst_reg, -imm);
+ }
+ break;
+ /*
+ * BPF_MUL
+ */
+ case BPF_ALU | BPF_MUL | BPF_X: /* dst = (u32) dst * (u32) src */
+ /* msr %dst,%src */
+ EMIT4(0xb2520000, dst_reg, src_reg);
+ EMIT_ZERO(dst_reg);
+ break;
+ case BPF_ALU64 | BPF_MUL | BPF_X: /* dst = dst * src */
+ /* msgr %dst,%src */
+ EMIT4(0xb90c0000, dst_reg, src_reg);
+ break;
+ case BPF_ALU | BPF_MUL | BPF_K: /* dst = (u32) dst * (u32) imm */
+ if (imm != 1) {
+ /* msfi %r5,imm */
+ EMIT6_IMM(0xc2010000, dst_reg, imm);
+ }
+ EMIT_ZERO(dst_reg);
+ break;
+ case BPF_ALU64 | BPF_MUL | BPF_K: /* dst = dst * imm */
+ if (imm == 1)
+ break;
+ /* msgfi %dst,imm */
+ EMIT6_IMM(0xc2000000, dst_reg, imm);
+ break;
+ /*
+ * BPF_DIV / BPF_MOD
+ */
+ case BPF_ALU | BPF_DIV | BPF_X: /* dst = (u32) dst / (u32) src */
+ case BPF_ALU | BPF_MOD | BPF_X: /* dst = (u32) dst % (u32) src */
+ {
+ int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
+
+ /* lhi %w0,0 */
+ EMIT4_IMM(0xa7080000, REG_W0, 0);
+ /* lr %w1,%dst */
+ EMIT2(0x1800, REG_W1, dst_reg);
+ /* dlr %w0,%src */
+ EMIT4(0xb9970000, REG_W0, src_reg);
+ /* llgfr %dst,%rc */
+ EMIT4(0xb9160000, dst_reg, rc_reg);
+ break;
+ }
+ case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / src */
+ case BPF_ALU64 | BPF_MOD | BPF_X: /* dst = dst % src */
+ {
+ int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
+
+ /* lghi %w0,0 */
+ EMIT4_IMM(0xa7090000, REG_W0, 0);
+ /* lgr %w1,%dst */
+ EMIT4(0xb9040000, REG_W1, dst_reg);
+ /* dlgr %w0,%dst */
+ EMIT4(0xb9870000, REG_W0, src_reg);
+ /* lgr %dst,%rc */
+ EMIT4(0xb9040000, dst_reg, rc_reg);
+ break;
+ }
+ case BPF_ALU | BPF_DIV | BPF_K: /* dst = (u32) dst / (u32) imm */
+ case BPF_ALU | BPF_MOD | BPF_K: /* dst = (u32) dst % (u32) imm */
+ {
+ int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
+
+ if (imm == 1) {
+ if (BPF_OP(insn->code) == BPF_MOD)
+ /* lhgi %dst,0 */
+ EMIT4_IMM(0xa7090000, dst_reg, 0);
+ else
+ EMIT_ZERO(dst_reg);
+ break;
+ }
+ /* lhi %w0,0 */
+ EMIT4_IMM(0xa7080000, REG_W0, 0);
+ /* lr %w1,%dst */
+ EMIT2(0x1800, REG_W1, dst_reg);
+ /* dl %w0,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0, REG_L,
+ EMIT_CONST_U32(imm));
+ /* llgfr %dst,%rc */
+ EMIT4(0xb9160000, dst_reg, rc_reg);
+ break;
+ }
+ case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / imm */
+ case BPF_ALU64 | BPF_MOD | BPF_K: /* dst = dst % imm */
+ {
+ int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
+
+ if (imm == 1) {
+ if (BPF_OP(insn->code) == BPF_MOD)
+ /* lhgi %dst,0 */
+ EMIT4_IMM(0xa7090000, dst_reg, 0);
+ break;
+ }
+ /* lghi %w0,0 */
+ EMIT4_IMM(0xa7090000, REG_W0, 0);
+ /* lgr %w1,%dst */
+ EMIT4(0xb9040000, REG_W1, dst_reg);
+ /* dlg %w0,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L,
+ EMIT_CONST_U64(imm));
+ /* lgr %dst,%rc */
+ EMIT4(0xb9040000, dst_reg, rc_reg);
+ break;
+ }
+ /*
+ * BPF_AND
+ */
+ case BPF_ALU | BPF_AND | BPF_X: /* dst = (u32) dst & (u32) src */
+ /* nr %dst,%src */
+ EMIT2(0x1400, dst_reg, src_reg);
+ EMIT_ZERO(dst_reg);
+ break;
+ case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */
+ /* ngr %dst,%src */
+ EMIT4(0xb9800000, dst_reg, src_reg);
+ break;
+ case BPF_ALU | BPF_AND | BPF_K: /* dst = (u32) dst & (u32) imm */
+ /* nilf %dst,imm */
+ EMIT6_IMM(0xc00b0000, dst_reg, imm);
+ EMIT_ZERO(dst_reg);
+ break;
+ case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */
+ /* ng %dst,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x0080, dst_reg, REG_0, REG_L,
+ EMIT_CONST_U64(imm));
+ break;
+ /*
+ * BPF_OR
+ */
+ case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */
+ /* or %dst,%src */
+ EMIT2(0x1600, dst_reg, src_reg);
+ EMIT_ZERO(dst_reg);
+ break;
+ case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */
+ /* ogr %dst,%src */
+ EMIT4(0xb9810000, dst_reg, src_reg);
+ break;
+ case BPF_ALU | BPF_OR | BPF_K: /* dst = (u32) dst | (u32) imm */
+ /* oilf %dst,imm */
+ EMIT6_IMM(0xc00d0000, dst_reg, imm);
+ EMIT_ZERO(dst_reg);
+ break;
+ case BPF_ALU64 | BPF_OR | BPF_K: /* dst = dst | imm */
+ /* og %dst,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x0081, dst_reg, REG_0, REG_L,
+ EMIT_CONST_U64(imm));
+ break;
+ /*
+ * BPF_XOR
+ */
+ case BPF_ALU | BPF_XOR | BPF_X: /* dst = (u32) dst ^ (u32) src */
+ /* xr %dst,%src */
+ EMIT2(0x1700, dst_reg, src_reg);
+ EMIT_ZERO(dst_reg);
+ break;
+ case BPF_ALU64 | BPF_XOR | BPF_X: /* dst = dst ^ src */
+ /* xgr %dst,%src */
+ EMIT4(0xb9820000, dst_reg, src_reg);
+ break;
+ case BPF_ALU | BPF_XOR | BPF_K: /* dst = (u32) dst ^ (u32) imm */
+ if (imm != 0) {
+ /* xilf %dst,imm */
+ EMIT6_IMM(0xc0070000, dst_reg, imm);
+ }
+ EMIT_ZERO(dst_reg);
+ break;
+ case BPF_ALU64 | BPF_XOR | BPF_K: /* dst = dst ^ imm */
+ /* xg %dst,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x0082, dst_reg, REG_0, REG_L,
+ EMIT_CONST_U64(imm));
+ break;
+ /*
+ * BPF_LSH
+ */
+ case BPF_ALU | BPF_LSH | BPF_X: /* dst = (u32) dst << (u32) src */
+ /* sll %dst,0(%src) */
+ EMIT4_DISP(0x89000000, dst_reg, src_reg, 0);
+ EMIT_ZERO(dst_reg);
+ break;
+ case BPF_ALU64 | BPF_LSH | BPF_X: /* dst = dst << src */
+ /* sllg %dst,%dst,0(%src) */
+ EMIT6_DISP_LH(0xeb000000, 0x000d, dst_reg, dst_reg, src_reg, 0);
+ break;
+ case BPF_ALU | BPF_LSH | BPF_K: /* dst = (u32) dst << (u32) imm */
+ if (imm != 0) {
+ /* sll %dst,imm(%r0) */
+ EMIT4_DISP(0x89000000, dst_reg, REG_0, imm);
+ }
+ EMIT_ZERO(dst_reg);
+ break;
+ case BPF_ALU64 | BPF_LSH | BPF_K: /* dst = dst << imm */
+ if (imm == 0)
+ break;
+ /* sllg %dst,%dst,imm(%r0) */
+ EMIT6_DISP_LH(0xeb000000, 0x000d, dst_reg, dst_reg, REG_0, imm);
+ break;
+ /*
+ * BPF_RSH
+ */
+ case BPF_ALU | BPF_RSH | BPF_X: /* dst = (u32) dst >> (u32) src */
+ /* srl %dst,0(%src) */
+ EMIT4_DISP(0x88000000, dst_reg, src_reg, 0);
+ EMIT_ZERO(dst_reg);
+ break;
+ case BPF_ALU64 | BPF_RSH | BPF_X: /* dst = dst >> src */
+ /* srlg %dst,%dst,0(%src) */
+ EMIT6_DISP_LH(0xeb000000, 0x000c, dst_reg, dst_reg, src_reg, 0);
+ break;
+ case BPF_ALU | BPF_RSH | BPF_K: /* dst = (u32) dst >> (u32) imm */
+ if (imm != 0) {
+ /* srl %dst,imm(%r0) */
+ EMIT4_DISP(0x88000000, dst_reg, REG_0, imm);
+ }
+ EMIT_ZERO(dst_reg);
+ break;
+ case BPF_ALU64 | BPF_RSH | BPF_K: /* dst = dst >> imm */
+ if (imm == 0)
+ break;
+ /* srlg %dst,%dst,imm(%r0) */
+ EMIT6_DISP_LH(0xeb000000, 0x000c, dst_reg, dst_reg, REG_0, imm);
+ break;
+ /*
+ * BPF_ARSH
+ */
+ case BPF_ALU64 | BPF_ARSH | BPF_X: /* ((s64) dst) >>= src */
+ /* srag %dst,%dst,0(%src) */
+ EMIT6_DISP_LH(0xeb000000, 0x000a, dst_reg, dst_reg, src_reg, 0);
+ break;
+ case BPF_ALU64 | BPF_ARSH | BPF_K: /* ((s64) dst) >>= imm */
+ if (imm == 0)
+ break;
+ /* srag %dst,%dst,imm(%r0) */
+ EMIT6_DISP_LH(0xeb000000, 0x000a, dst_reg, dst_reg, REG_0, imm);
+ break;
+ /*
+ * BPF_NEG
+ */
+ case BPF_ALU | BPF_NEG: /* dst = (u32) -dst */
+ /* lcr %dst,%dst */
+ EMIT2(0x1300, dst_reg, dst_reg);
+ EMIT_ZERO(dst_reg);
+ break;
+ case BPF_ALU64 | BPF_NEG: /* dst = -dst */
+ /* lcgr %dst,%dst */
+ EMIT4(0xb9030000, dst_reg, dst_reg);
+ break;
+ /*
+ * BPF_FROM_BE/LE
+ */
+ case BPF_ALU | BPF_END | BPF_FROM_BE:
+ /* s390 is big endian, therefore only clear high order bytes */
+ switch (imm) {
+ case 16: /* dst = (u16) cpu_to_be16(dst) */
+ /* llghr %dst,%dst */
+ EMIT4(0xb9850000, dst_reg, dst_reg);
+ break;
+ case 32: /* dst = (u32) cpu_to_be32(dst) */
+ /* llgfr %dst,%dst */
+ EMIT4(0xb9160000, dst_reg, dst_reg);
+ break;
+ case 64: /* dst = (u64) cpu_to_be64(dst) */
+ break;
+ }
+ break;
+ case BPF_ALU | BPF_END | BPF_FROM_LE:
+ switch (imm) {
+ case 16: /* dst = (u16) cpu_to_le16(dst) */
+ /* lrvr %dst,%dst */
+ EMIT4(0xb91f0000, dst_reg, dst_reg);
+ /* srl %dst,16(%r0) */
+ EMIT4_DISP(0x88000000, dst_reg, REG_0, 16);
+ /* llghr %dst,%dst */
+ EMIT4(0xb9850000, dst_reg, dst_reg);
+ break;
+ case 32: /* dst = (u32) cpu_to_le32(dst) */
+ /* lrvr %dst,%dst */
+ EMIT4(0xb91f0000, dst_reg, dst_reg);
+ /* llgfr %dst,%dst */
+ EMIT4(0xb9160000, dst_reg, dst_reg);
+ break;
+ case 64: /* dst = (u64) cpu_to_le64(dst) */
+ /* lrvgr %dst,%dst */
+ EMIT4(0xb90f0000, dst_reg, dst_reg);
+ break;
+ }
+ break;
+ /*
+ * BPF_NOSPEC (speculation barrier)
+ */
+ case BPF_ST | BPF_NOSPEC:
+ break;
+ /*
+ * BPF_ST(X)
+ */
+ case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src_reg */
+ /* stcy %src,off(%dst) */
+ EMIT6_DISP_LH(0xe3000000, 0x0072, src_reg, dst_reg, REG_0, off);
+ jit->seen |= SEEN_MEM;
+ break;
+ case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */
+ /* sthy %src,off(%dst) */
+ EMIT6_DISP_LH(0xe3000000, 0x0070, src_reg, dst_reg, REG_0, off);
+ jit->seen |= SEEN_MEM;
+ break;
+ case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */
+ /* sty %src,off(%dst) */
+ EMIT6_DISP_LH(0xe3000000, 0x0050, src_reg, dst_reg, REG_0, off);
+ jit->seen |= SEEN_MEM;
+ break;
+ case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */
+ /* stg %src,off(%dst) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, src_reg, dst_reg, REG_0, off);
+ jit->seen |= SEEN_MEM;
+ break;
+ case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */
+ /* lhi %w0,imm */
+ EMIT4_IMM(0xa7080000, REG_W0, (u8) imm);
+ /* stcy %w0,off(dst) */
+ EMIT6_DISP_LH(0xe3000000, 0x0072, REG_W0, dst_reg, REG_0, off);
+ jit->seen |= SEEN_MEM;
+ break;
+ case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */
+ /* lhi %w0,imm */
+ EMIT4_IMM(0xa7080000, REG_W0, (u16) imm);
+ /* sthy %w0,off(dst) */
+ EMIT6_DISP_LH(0xe3000000, 0x0070, REG_W0, dst_reg, REG_0, off);
+ jit->seen |= SEEN_MEM;
+ break;
+ case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */
+ /* llilf %w0,imm */
+ EMIT6_IMM(0xc00f0000, REG_W0, (u32) imm);
+ /* sty %w0,off(%dst) */
+ EMIT6_DISP_LH(0xe3000000, 0x0050, REG_W0, dst_reg, REG_0, off);
+ jit->seen |= SEEN_MEM;
+ break;
+ case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */
+ /* lgfi %w0,imm */
+ EMIT6_IMM(0xc0010000, REG_W0, imm);
+ /* stg %w0,off(%dst) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W0, dst_reg, REG_0, off);
+ jit->seen |= SEEN_MEM;
+ break;
+ /*
+ * BPF_STX XADD (atomic_add)
+ */
+ case BPF_STX | BPF_XADD | BPF_W: /* *(u32 *)(dst + off) += src */
+ /* laal %w0,%src,off(%dst) */
+ EMIT6_DISP_LH(0xeb000000, 0x00fa, REG_W0, src_reg,
+ dst_reg, off);
+ jit->seen |= SEEN_MEM;
+ break;
+ case BPF_STX | BPF_XADD | BPF_DW: /* *(u64 *)(dst + off) += src */
+ /* laalg %w0,%src,off(%dst) */
+ EMIT6_DISP_LH(0xeb000000, 0x00ea, REG_W0, src_reg,
+ dst_reg, off);
+ jit->seen |= SEEN_MEM;
+ break;
+ /*
+ * BPF_LDX
+ */
+ case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */
+ /* llgc %dst,0(off,%src) */
+ EMIT6_DISP_LH(0xe3000000, 0x0090, dst_reg, src_reg, REG_0, off);
+ jit->seen |= SEEN_MEM;
+ break;
+ case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */
+ /* llgh %dst,0(off,%src) */
+ EMIT6_DISP_LH(0xe3000000, 0x0091, dst_reg, src_reg, REG_0, off);
+ jit->seen |= SEEN_MEM;
+ break;
+ case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */
+ /* llgf %dst,off(%src) */
+ jit->seen |= SEEN_MEM;
+ EMIT6_DISP_LH(0xe3000000, 0x0016, dst_reg, src_reg, REG_0, off);
+ break;
+ case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */
+ /* lg %dst,0(off,%src) */
+ jit->seen |= SEEN_MEM;
+ EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, src_reg, REG_0, off);
+ break;
+ /*
+ * BPF_JMP / CALL
+ */
+ case BPF_JMP | BPF_CALL:
+ {
+ /*
+ * b0 = (__bpf_call_base + imm)(b1, b2, b3, b4, b5)
+ */
+ const u64 func = (u64)__bpf_call_base + imm;
+
+ REG_SET_SEEN(BPF_REG_5);
+ jit->seen |= SEEN_FUNC;
+ /* lg %w1,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x0004, REG_W1, REG_0, REG_L,
+ EMIT_CONST_U64(func));
+ if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) {
+ /* brasl %r14,__s390_indirect_jump_r1 */
+ EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip);
+ } else {
+ /* basr %r14,%w1 */
+ EMIT2(0x0d00, REG_14, REG_W1);
+ }
+ /* lgr %b0,%r2: load return value into %b0 */
+ EMIT4(0xb9040000, BPF_REG_0, REG_2);
+ break;
+ }
+ case BPF_JMP | BPF_TAIL_CALL:
+ /*
+ * Implicit input:
+ * B1: pointer to ctx
+ * B2: pointer to bpf_array
+ * B3: index in bpf_array
+ */
+ jit->seen |= SEEN_TAIL_CALL;
+
+ /*
+ * if (index >= array->map.max_entries)
+ * goto out;
+ */
+
+ /* llgf %w1,map.max_entries(%b2) */
+ EMIT6_DISP_LH(0xe3000000, 0x0016, REG_W1, REG_0, BPF_REG_2,
+ offsetof(struct bpf_array, map.max_entries));
+ /* clrj %b3,%w1,0xa,label0: if (u32)%b3 >= (u32)%w1 goto out */
+ EMIT6_PCREL_LABEL(0xec000000, 0x0077, BPF_REG_3,
+ REG_W1, 0, 0xa);
+
+ /*
+ * if (tail_call_cnt++ > MAX_TAIL_CALL_CNT)
+ * goto out;
+ */
+
+ if (jit->seen & SEEN_STACK)
+ off = STK_OFF_TCCNT + STK_OFF + fp->aux->stack_depth;
+ else
+ off = STK_OFF_TCCNT;
+ /* lhi %w0,1 */
+ EMIT4_IMM(0xa7080000, REG_W0, 1);
+ /* laal %w1,%w0,off(%r15) */
+ EMIT6_DISP_LH(0xeb000000, 0x00fa, REG_W1, REG_W0, REG_15, off);
+ /* clij %w1,MAX_TAIL_CALL_CNT,0x2,label0 */
+ EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007f, REG_W1,
+ MAX_TAIL_CALL_CNT, 0, 0x2);
+
+ /*
+ * prog = array->ptrs[index];
+ * if (prog == NULL)
+ * goto out;
+ */
+
+ /* llgfr %r1,%b3: %r1 = (u32) index */
+ EMIT4(0xb9160000, REG_1, BPF_REG_3);
+ /* sllg %r1,%r1,3: %r1 *= 8 */
+ EMIT6_DISP_LH(0xeb000000, 0x000d, REG_1, REG_1, REG_0, 3);
+ /* lg %r1,prog(%b2,%r1) */
+ EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, BPF_REG_2,
+ REG_1, offsetof(struct bpf_array, ptrs));
+ /* clgij %r1,0,0x8,label0 */
+ EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007d, REG_1, 0, 0, 0x8);
+
+ /*
+ * Restore registers before calling function
+ */
+ save_restore_regs(jit, REGS_RESTORE, fp->aux->stack_depth);
+
+ /*
+ * goto *(prog->bpf_func + tail_call_start);
+ */
+
+ /* lg %r1,bpf_func(%r1) */
+ EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, REG_1, REG_0,
+ offsetof(struct bpf_prog, bpf_func));
+ /* bc 0xf,tail_call_start(%r1) */
+ _EMIT4(0x47f01000 + jit->tail_call_start);
+ /* out: */
+ jit->labels[0] = jit->prg;
+ break;
+ case BPF_JMP | BPF_EXIT: /* return b0 */
+ last = (i == fp->len - 1) ? 1 : 0;
+ if (last && !(jit->seen & SEEN_RET0))
+ break;
+ /* j <exit> */
+ EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg);
+ break;
+ /*
+ * Branch relative (number of skipped instructions) to offset on
+ * condition.
+ *
+ * Condition code to mask mapping:
+ *
+ * CC | Description | Mask
+ * ------------------------------
+ * 0 | Operands equal | 8
+ * 1 | First operand low | 4
+ * 2 | First operand high | 2
+ * 3 | Unused | 1
+ *
+ * For s390x relative branches: ip = ip + off_bytes
+ * For BPF relative branches: insn = insn + off_insns + 1
+ *
+ * For example for s390x with offset 0 we jump to the branch
+ * instruction itself (loop) and for BPF with offset 0 we
+ * branch to the instruction behind the branch.
+ */
+ case BPF_JMP | BPF_JA: /* if (true) */
+ mask = 0xf000; /* j */
+ goto branch_oc;
+ case BPF_JMP | BPF_JSGT | BPF_K: /* ((s64) dst > (s64) imm) */
+ mask = 0x2000; /* jh */
+ goto branch_ks;
+ case BPF_JMP | BPF_JSLT | BPF_K: /* ((s64) dst < (s64) imm) */
+ mask = 0x4000; /* jl */
+ goto branch_ks;
+ case BPF_JMP | BPF_JSGE | BPF_K: /* ((s64) dst >= (s64) imm) */
+ mask = 0xa000; /* jhe */
+ goto branch_ks;
+ case BPF_JMP | BPF_JSLE | BPF_K: /* ((s64) dst <= (s64) imm) */
+ mask = 0xc000; /* jle */
+ goto branch_ks;
+ case BPF_JMP | BPF_JGT | BPF_K: /* (dst_reg > imm) */
+ mask = 0x2000; /* jh */
+ goto branch_ku;
+ case BPF_JMP | BPF_JLT | BPF_K: /* (dst_reg < imm) */
+ mask = 0x4000; /* jl */
+ goto branch_ku;
+ case BPF_JMP | BPF_JGE | BPF_K: /* (dst_reg >= imm) */
+ mask = 0xa000; /* jhe */
+ goto branch_ku;
+ case BPF_JMP | BPF_JLE | BPF_K: /* (dst_reg <= imm) */
+ mask = 0xc000; /* jle */
+ goto branch_ku;
+ case BPF_JMP | BPF_JNE | BPF_K: /* (dst_reg != imm) */
+ mask = 0x7000; /* jne */
+ goto branch_ku;
+ case BPF_JMP | BPF_JEQ | BPF_K: /* (dst_reg == imm) */
+ mask = 0x8000; /* je */
+ goto branch_ku;
+ case BPF_JMP | BPF_JSET | BPF_K: /* (dst_reg & imm) */
+ mask = 0x7000; /* jnz */
+ /* lgfi %w1,imm (load sign extend imm) */
+ EMIT6_IMM(0xc0010000, REG_W1, imm);
+ /* ngr %w1,%dst */
+ EMIT4(0xb9800000, REG_W1, dst_reg);
+ goto branch_oc;
+
+ case BPF_JMP | BPF_JSGT | BPF_X: /* ((s64) dst > (s64) src) */
+ mask = 0x2000; /* jh */
+ goto branch_xs;
+ case BPF_JMP | BPF_JSLT | BPF_X: /* ((s64) dst < (s64) src) */
+ mask = 0x4000; /* jl */
+ goto branch_xs;
+ case BPF_JMP | BPF_JSGE | BPF_X: /* ((s64) dst >= (s64) src) */
+ mask = 0xa000; /* jhe */
+ goto branch_xs;
+ case BPF_JMP | BPF_JSLE | BPF_X: /* ((s64) dst <= (s64) src) */
+ mask = 0xc000; /* jle */
+ goto branch_xs;
+ case BPF_JMP | BPF_JGT | BPF_X: /* (dst > src) */
+ mask = 0x2000; /* jh */
+ goto branch_xu;
+ case BPF_JMP | BPF_JLT | BPF_X: /* (dst < src) */
+ mask = 0x4000; /* jl */
+ goto branch_xu;
+ case BPF_JMP | BPF_JGE | BPF_X: /* (dst >= src) */
+ mask = 0xa000; /* jhe */
+ goto branch_xu;
+ case BPF_JMP | BPF_JLE | BPF_X: /* (dst <= src) */
+ mask = 0xc000; /* jle */
+ goto branch_xu;
+ case BPF_JMP | BPF_JNE | BPF_X: /* (dst != src) */
+ mask = 0x7000; /* jne */
+ goto branch_xu;
+ case BPF_JMP | BPF_JEQ | BPF_X: /* (dst == src) */
+ mask = 0x8000; /* je */
+ goto branch_xu;
+ case BPF_JMP | BPF_JSET | BPF_X: /* (dst & src) */
+ mask = 0x7000; /* jnz */
+ /* ngrk %w1,%dst,%src */
+ EMIT4_RRF(0xb9e40000, REG_W1, dst_reg, src_reg);
+ goto branch_oc;
+branch_ks:
+ /* lgfi %w1,imm (load sign extend imm) */
+ EMIT6_IMM(0xc0010000, REG_W1, imm);
+ /* cgrj %dst,%w1,mask,off */
+ EMIT6_PCREL(0xec000000, 0x0064, dst_reg, REG_W1, i, off, mask);
+ break;
+branch_ku:
+ /* lgfi %w1,imm (load sign extend imm) */
+ EMIT6_IMM(0xc0010000, REG_W1, imm);
+ /* clgrj %dst,%w1,mask,off */
+ EMIT6_PCREL(0xec000000, 0x0065, dst_reg, REG_W1, i, off, mask);
+ break;
+branch_xs:
+ /* cgrj %dst,%src,mask,off */
+ EMIT6_PCREL(0xec000000, 0x0064, dst_reg, src_reg, i, off, mask);
+ break;
+branch_xu:
+ /* clgrj %dst,%src,mask,off */
+ EMIT6_PCREL(0xec000000, 0x0065, dst_reg, src_reg, i, off, mask);
+ break;
+branch_oc:
+ /* brc mask,jmp_off (branch instruction needs 4 bytes) */
+ jmp_off = addrs[i + off + 1] - (addrs[i + 1] - 4);
+ EMIT4_PCREL(0xa7040000 | mask << 8, jmp_off);
+ break;
+ default: /* too complex, give up */
+ pr_err("Unknown opcode %02x\n", insn->code);
+ return -1;
+ }
+ return insn_count;
+}
+
+/*
+ * Compile eBPF program into s390x code
+ */
+static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp)
+{
+ int i, insn_count;
+
+ jit->lit = jit->lit_start;
+ jit->prg = 0;
+
+ bpf_jit_prologue(jit, fp->aux->stack_depth);
+ for (i = 0; i < fp->len; i += insn_count) {
+ insn_count = bpf_jit_insn(jit, fp, i);
+ if (insn_count < 0)
+ return -1;
+ /* Next instruction address */
+ jit->addrs[i + insn_count] = jit->prg;
+ }
+ bpf_jit_epilogue(jit, fp->aux->stack_depth);
+
+ jit->lit_start = jit->prg;
+ jit->size = jit->lit;
+ jit->size_prg = jit->prg;
+ return 0;
+}
+
+/*
+ * Compile eBPF program "fp"
+ */
+struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
+{
+ struct bpf_prog *tmp, *orig_fp = fp;
+ struct bpf_binary_header *header;
+ bool tmp_blinded = false;
+ struct bpf_jit jit;
+ int pass;
+
+ if (!fp->jit_requested)
+ return orig_fp;
+
+ tmp = bpf_jit_blind_constants(fp);
+ /*
+ * If blinding was requested and we failed during blinding,
+ * we must fall back to the interpreter.
+ */
+ if (IS_ERR(tmp))
+ return orig_fp;
+ if (tmp != fp) {
+ tmp_blinded = true;
+ fp = tmp;
+ }
+
+ memset(&jit, 0, sizeof(jit));
+ jit.addrs = kcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL);
+ if (jit.addrs == NULL) {
+ fp = orig_fp;
+ goto out;
+ }
+ /*
+ * Three initial passes:
+ * - 1/2: Determine clobbered registers
+ * - 3: Calculate program size and addrs arrray
+ */
+ for (pass = 1; pass <= 3; pass++) {
+ if (bpf_jit_prog(&jit, fp)) {
+ fp = orig_fp;
+ goto free_addrs;
+ }
+ }
+ /*
+ * Final pass: Allocate and generate program
+ */
+ if (jit.size >= BPF_SIZE_MAX) {
+ fp = orig_fp;
+ goto free_addrs;
+ }
+ header = bpf_jit_binary_alloc(jit.size, &jit.prg_buf, 2, jit_fill_hole);
+ if (!header) {
+ fp = orig_fp;
+ goto free_addrs;
+ }
+ if (bpf_jit_prog(&jit, fp)) {
+ bpf_jit_binary_free(header);
+ fp = orig_fp;
+ goto free_addrs;
+ }
+ if (bpf_jit_enable > 1) {
+ bpf_jit_dump(fp->len, jit.size, pass, jit.prg_buf);
+ print_fn_code(jit.prg_buf, jit.size_prg);
+ }
+ bpf_jit_binary_lock_ro(header);
+ fp->bpf_func = (void *) jit.prg_buf;
+ fp->jited = 1;
+ fp->jited_len = jit.size;
+free_addrs:
+ kfree(jit.addrs);
+out:
+ if (tmp_blinded)
+ bpf_jit_prog_release_other(fp, fp == orig_fp ?
+ tmp : orig_fp);
+ return fp;
+}
diff --git a/arch/s390/net/pnet.c b/arch/s390/net/pnet.c
new file mode 100644
index 000000000..e22f1b10a
--- /dev/null
+++ b/arch/s390/net/pnet.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * IBM System z PNET ID Support
+ *
+ * Copyright IBM Corp. 2018
+ */
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/types.h>
+#include <asm/ccwgroup.h>
+#include <asm/ccwdev.h>
+#include <asm/pnet.h>
+
+/*
+ * Get the PNETIDs from a device.
+ * s390 hardware supports the definition of a so-called Physical Network
+ * Identifier (short PNETID) per network device port. These PNETIDs can be
+ * used to identify network devices that are attached to the same physical
+ * network (broadcast domain).
+ *
+ * The device can be
+ * - a ccwgroup device with all bundled subchannels having the same PNETID
+ * - a PCI attached network device
+ *
+ * Returns:
+ * 0: PNETIDs extracted from device.
+ * -ENOMEM: No memory to extract utility string.
+ * -EOPNOTSUPP: Device type without utility string support
+ */
+static int pnet_ids_by_device(struct device *dev, u8 *pnetids)
+{
+ memset(pnetids, 0, PNETIDS_LEN);
+ if (dev_is_ccwgroup(dev)) {
+ struct ccwgroup_device *gdev = to_ccwgroupdev(dev);
+ u8 *util_str;
+
+ util_str = ccw_device_get_util_str(gdev->cdev[0], 0);
+ if (!util_str)
+ return -ENOMEM;
+ memcpy(pnetids, util_str, PNETIDS_LEN);
+ kfree(util_str);
+ return 0;
+ }
+ if (dev_is_pci(dev)) {
+ struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
+
+ memcpy(pnetids, zdev->util_str, sizeof(zdev->util_str));
+ return 0;
+ }
+ return -EOPNOTSUPP;
+}
+
+/*
+ * Extract the pnetid for a device port.
+ *
+ * Return 0 if a pnetid is found and -ENOENT otherwise.
+ */
+int pnet_id_by_dev_port(struct device *dev, unsigned short port, u8 *pnetid)
+{
+ u8 pnetids[MAX_PNETID_PORTS][MAX_PNETID_LEN];
+ static const u8 zero[MAX_PNETID_LEN] = { 0 };
+ int rc = 0;
+
+ if (!dev || port >= MAX_PNETID_PORTS)
+ return -ENOENT;
+
+ if (!pnet_ids_by_device(dev, (u8 *)pnetids) &&
+ memcmp(pnetids[port], zero, MAX_PNETID_LEN))
+ memcpy(pnetid, pnetids[port], MAX_PNETID_LEN);
+ else
+ rc = -ENOENT;
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(pnet_id_by_dev_port);
+
+MODULE_DESCRIPTION("pnetid determination from utility strings");
+MODULE_LICENSE("GPL");
diff --git a/arch/s390/numa/Makefile b/arch/s390/numa/Makefile
new file mode 100644
index 000000000..66c2dff74
--- /dev/null
+++ b/arch/s390/numa/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-y += numa.o
+obj-y += toptree.o
+obj-$(CONFIG_NUMA_EMU) += mode_emu.o
diff --git a/arch/s390/numa/mode_emu.c b/arch/s390/numa/mode_emu.c
new file mode 100644
index 000000000..83b222c57
--- /dev/null
+++ b/arch/s390/numa/mode_emu.c
@@ -0,0 +1,576 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NUMA support for s390
+ *
+ * NUMA emulation (aka fake NUMA) distributes the available memory to nodes
+ * without using real topology information about the physical memory of the
+ * machine.
+ *
+ * It distributes the available CPUs to nodes while respecting the original
+ * machine topology information. This is done by trying to avoid to separate
+ * CPUs which reside on the same book or even on the same MC.
+ *
+ * Because the current Linux scheduler code requires a stable cpu to node
+ * mapping, cores are pinned to nodes when the first CPU thread is set online.
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#define KMSG_COMPONENT "numa_emu"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/cpumask.h>
+#include <linux/memblock.h>
+#include <linux/bootmem.h>
+#include <linux/node.h>
+#include <linux/memory.h>
+#include <linux/slab.h>
+#include <asm/smp.h>
+#include <asm/topology.h>
+#include "numa_mode.h"
+#include "toptree.h"
+
+/* Distances between the different system components */
+#define DIST_EMPTY 0
+#define DIST_CORE 1
+#define DIST_MC 2
+#define DIST_BOOK 3
+#define DIST_DRAWER 4
+#define DIST_MAX 5
+
+/* Node distance reported to common code */
+#define EMU_NODE_DIST 10
+
+/* Node ID for free (not yet pinned) cores */
+#define NODE_ID_FREE -1
+
+/* Different levels of toptree */
+enum toptree_level {CORE, MC, BOOK, DRAWER, NODE, TOPOLOGY};
+
+/* The two toptree IDs */
+enum {TOPTREE_ID_PHYS, TOPTREE_ID_NUMA};
+
+/* Number of NUMA nodes */
+static int emu_nodes = 1;
+/* NUMA stripe size */
+static unsigned long emu_size;
+
+/*
+ * Node to core pinning information updates are protected by
+ * "sched_domains_mutex".
+ */
+static struct {
+ s32 to_node_id[CONFIG_NR_CPUS]; /* Pinned core to node mapping */
+ int total; /* Total number of pinned cores */
+ int per_node_target; /* Cores per node without extra cores */
+ int per_node[MAX_NUMNODES]; /* Number of cores pinned to node */
+} *emu_cores;
+
+/*
+ * Pin a core to a node
+ */
+static void pin_core_to_node(int core_id, int node_id)
+{
+ if (emu_cores->to_node_id[core_id] == NODE_ID_FREE) {
+ emu_cores->per_node[node_id]++;
+ emu_cores->to_node_id[core_id] = node_id;
+ emu_cores->total++;
+ } else {
+ WARN_ON(emu_cores->to_node_id[core_id] != node_id);
+ }
+}
+
+/*
+ * Number of pinned cores of a node
+ */
+static int cores_pinned(struct toptree *node)
+{
+ return emu_cores->per_node[node->id];
+}
+
+/*
+ * ID of the node where the core is pinned (or NODE_ID_FREE)
+ */
+static int core_pinned_to_node_id(struct toptree *core)
+{
+ return emu_cores->to_node_id[core->id];
+}
+
+/*
+ * Number of cores in the tree that are not yet pinned
+ */
+static int cores_free(struct toptree *tree)
+{
+ struct toptree *core;
+ int count = 0;
+
+ toptree_for_each(core, tree, CORE) {
+ if (core_pinned_to_node_id(core) == NODE_ID_FREE)
+ count++;
+ }
+ return count;
+}
+
+/*
+ * Return node of core
+ */
+static struct toptree *core_node(struct toptree *core)
+{
+ return core->parent->parent->parent->parent;
+}
+
+/*
+ * Return drawer of core
+ */
+static struct toptree *core_drawer(struct toptree *core)
+{
+ return core->parent->parent->parent;
+}
+
+/*
+ * Return book of core
+ */
+static struct toptree *core_book(struct toptree *core)
+{
+ return core->parent->parent;
+}
+
+/*
+ * Return mc of core
+ */
+static struct toptree *core_mc(struct toptree *core)
+{
+ return core->parent;
+}
+
+/*
+ * Distance between two cores
+ */
+static int dist_core_to_core(struct toptree *core1, struct toptree *core2)
+{
+ if (core_drawer(core1)->id != core_drawer(core2)->id)
+ return DIST_DRAWER;
+ if (core_book(core1)->id != core_book(core2)->id)
+ return DIST_BOOK;
+ if (core_mc(core1)->id != core_mc(core2)->id)
+ return DIST_MC;
+ /* Same core or sibling on same MC */
+ return DIST_CORE;
+}
+
+/*
+ * Distance of a node to a core
+ */
+static int dist_node_to_core(struct toptree *node, struct toptree *core)
+{
+ struct toptree *core_node;
+ int dist_min = DIST_MAX;
+
+ toptree_for_each(core_node, node, CORE)
+ dist_min = min(dist_min, dist_core_to_core(core_node, core));
+ return dist_min == DIST_MAX ? DIST_EMPTY : dist_min;
+}
+
+/*
+ * Unify will delete empty nodes, therefore recreate nodes.
+ */
+static void toptree_unify_tree(struct toptree *tree)
+{
+ int nid;
+
+ toptree_unify(tree);
+ for (nid = 0; nid < emu_nodes; nid++)
+ toptree_get_child(tree, nid);
+}
+
+/*
+ * Find the best/nearest node for a given core and ensure that no node
+ * gets more than "emu_cores->per_node_target + extra" cores.
+ */
+static struct toptree *node_for_core(struct toptree *numa, struct toptree *core,
+ int extra)
+{
+ struct toptree *node, *node_best = NULL;
+ int dist_cur, dist_best, cores_target;
+
+ cores_target = emu_cores->per_node_target + extra;
+ dist_best = DIST_MAX;
+ node_best = NULL;
+ toptree_for_each(node, numa, NODE) {
+ /* Already pinned cores must use their nodes */
+ if (core_pinned_to_node_id(core) == node->id) {
+ node_best = node;
+ break;
+ }
+ /* Skip nodes that already have enough cores */
+ if (cores_pinned(node) >= cores_target)
+ continue;
+ dist_cur = dist_node_to_core(node, core);
+ if (dist_cur < dist_best) {
+ dist_best = dist_cur;
+ node_best = node;
+ }
+ }
+ return node_best;
+}
+
+/*
+ * Find the best node for each core with respect to "extra" core count
+ */
+static void toptree_to_numa_single(struct toptree *numa, struct toptree *phys,
+ int extra)
+{
+ struct toptree *node, *core, *tmp;
+
+ toptree_for_each_safe(core, tmp, phys, CORE) {
+ node = node_for_core(numa, core, extra);
+ if (!node)
+ return;
+ toptree_move(core, node);
+ pin_core_to_node(core->id, node->id);
+ }
+}
+
+/*
+ * Move structures of given level to specified NUMA node
+ */
+static void move_level_to_numa_node(struct toptree *node, struct toptree *phys,
+ enum toptree_level level, bool perfect)
+{
+ int cores_free, cores_target = emu_cores->per_node_target;
+ struct toptree *cur, *tmp;
+
+ toptree_for_each_safe(cur, tmp, phys, level) {
+ cores_free = cores_target - toptree_count(node, CORE);
+ if (perfect) {
+ if (cores_free == toptree_count(cur, CORE))
+ toptree_move(cur, node);
+ } else {
+ if (cores_free >= toptree_count(cur, CORE))
+ toptree_move(cur, node);
+ }
+ }
+}
+
+/*
+ * Move structures of a given level to NUMA nodes. If "perfect" is specified
+ * move only perfectly fitting structures. Otherwise move also smaller
+ * than needed structures.
+ */
+static void move_level_to_numa(struct toptree *numa, struct toptree *phys,
+ enum toptree_level level, bool perfect)
+{
+ struct toptree *node;
+
+ toptree_for_each(node, numa, NODE)
+ move_level_to_numa_node(node, phys, level, perfect);
+}
+
+/*
+ * For the first run try to move the big structures
+ */
+static void toptree_to_numa_first(struct toptree *numa, struct toptree *phys)
+{
+ struct toptree *core;
+
+ /* Always try to move perfectly fitting structures first */
+ move_level_to_numa(numa, phys, DRAWER, true);
+ move_level_to_numa(numa, phys, DRAWER, false);
+ move_level_to_numa(numa, phys, BOOK, true);
+ move_level_to_numa(numa, phys, BOOK, false);
+ move_level_to_numa(numa, phys, MC, true);
+ move_level_to_numa(numa, phys, MC, false);
+ /* Now pin all the moved cores */
+ toptree_for_each(core, numa, CORE)
+ pin_core_to_node(core->id, core_node(core)->id);
+}
+
+/*
+ * Allocate new topology and create required nodes
+ */
+static struct toptree *toptree_new(int id, int nodes)
+{
+ struct toptree *tree;
+ int nid;
+
+ tree = toptree_alloc(TOPOLOGY, id);
+ if (!tree)
+ goto fail;
+ for (nid = 0; nid < nodes; nid++) {
+ if (!toptree_get_child(tree, nid))
+ goto fail;
+ }
+ return tree;
+fail:
+ panic("NUMA emulation could not allocate topology");
+}
+
+/*
+ * Allocate and initialize core to node mapping
+ */
+static void __ref create_core_to_node_map(void)
+{
+ int i;
+
+ emu_cores = memblock_virt_alloc(sizeof(*emu_cores), 8);
+ for (i = 0; i < ARRAY_SIZE(emu_cores->to_node_id); i++)
+ emu_cores->to_node_id[i] = NODE_ID_FREE;
+}
+
+/*
+ * Move cores from physical topology into NUMA target topology
+ * and try to keep as much of the physical topology as possible.
+ */
+static struct toptree *toptree_to_numa(struct toptree *phys)
+{
+ static int first = 1;
+ struct toptree *numa;
+ int cores_total;
+
+ cores_total = emu_cores->total + cores_free(phys);
+ emu_cores->per_node_target = cores_total / emu_nodes;
+ numa = toptree_new(TOPTREE_ID_NUMA, emu_nodes);
+ if (first) {
+ toptree_to_numa_first(numa, phys);
+ first = 0;
+ }
+ toptree_to_numa_single(numa, phys, 0);
+ toptree_to_numa_single(numa, phys, 1);
+ toptree_unify_tree(numa);
+
+ WARN_ON(cpumask_weight(&phys->mask));
+ return numa;
+}
+
+/*
+ * Create a toptree out of the physical topology that we got from the hypervisor
+ */
+static struct toptree *toptree_from_topology(void)
+{
+ struct toptree *phys, *node, *drawer, *book, *mc, *core;
+ struct cpu_topology_s390 *top;
+ int cpu;
+
+ phys = toptree_new(TOPTREE_ID_PHYS, 1);
+
+ for_each_cpu(cpu, &cpus_with_topology) {
+ top = &cpu_topology[cpu];
+ node = toptree_get_child(phys, 0);
+ drawer = toptree_get_child(node, top->drawer_id);
+ book = toptree_get_child(drawer, top->book_id);
+ mc = toptree_get_child(book, top->socket_id);
+ core = toptree_get_child(mc, smp_get_base_cpu(cpu));
+ if (!drawer || !book || !mc || !core)
+ panic("NUMA emulation could not allocate memory");
+ cpumask_set_cpu(cpu, &core->mask);
+ toptree_update_mask(mc);
+ }
+ return phys;
+}
+
+/*
+ * Add toptree core to topology and create correct CPU masks
+ */
+static void topology_add_core(struct toptree *core)
+{
+ struct cpu_topology_s390 *top;
+ int cpu;
+
+ for_each_cpu(cpu, &core->mask) {
+ top = &cpu_topology[cpu];
+ cpumask_copy(&top->thread_mask, &core->mask);
+ cpumask_copy(&top->core_mask, &core_mc(core)->mask);
+ cpumask_copy(&top->book_mask, &core_book(core)->mask);
+ cpumask_copy(&top->drawer_mask, &core_drawer(core)->mask);
+ cpumask_set_cpu(cpu, &node_to_cpumask_map[core_node(core)->id]);
+ top->node_id = core_node(core)->id;
+ }
+}
+
+/*
+ * Apply toptree to topology and create CPU masks
+ */
+static void toptree_to_topology(struct toptree *numa)
+{
+ struct toptree *core;
+ int i;
+
+ /* Clear all node masks */
+ for (i = 0; i < MAX_NUMNODES; i++)
+ cpumask_clear(&node_to_cpumask_map[i]);
+
+ /* Rebuild all masks */
+ toptree_for_each(core, numa, CORE)
+ topology_add_core(core);
+}
+
+/*
+ * Show the node to core mapping
+ */
+static void print_node_to_core_map(void)
+{
+ int nid, cid;
+
+ if (!numa_debug_enabled)
+ return;
+ printk(KERN_DEBUG "NUMA node to core mapping\n");
+ for (nid = 0; nid < emu_nodes; nid++) {
+ printk(KERN_DEBUG " node %3d: ", nid);
+ for (cid = 0; cid < ARRAY_SIZE(emu_cores->to_node_id); cid++) {
+ if (emu_cores->to_node_id[cid] == nid)
+ printk(KERN_CONT "%d ", cid);
+ }
+ printk(KERN_CONT "\n");
+ }
+}
+
+static void pin_all_possible_cpus(void)
+{
+ int core_id, node_id, cpu;
+ static int initialized;
+
+ if (initialized)
+ return;
+ print_node_to_core_map();
+ node_id = 0;
+ for_each_possible_cpu(cpu) {
+ core_id = smp_get_base_cpu(cpu);
+ if (emu_cores->to_node_id[core_id] != NODE_ID_FREE)
+ continue;
+ pin_core_to_node(core_id, node_id);
+ cpu_topology[cpu].node_id = node_id;
+ node_id = (node_id + 1) % emu_nodes;
+ }
+ print_node_to_core_map();
+ initialized = 1;
+}
+
+/*
+ * Transfer physical topology into a NUMA topology and modify CPU masks
+ * according to the NUMA topology.
+ *
+ * Must be called with "sched_domains_mutex" lock held.
+ */
+static void emu_update_cpu_topology(void)
+{
+ struct toptree *phys, *numa;
+
+ if (emu_cores == NULL)
+ create_core_to_node_map();
+ phys = toptree_from_topology();
+ numa = toptree_to_numa(phys);
+ toptree_free(phys);
+ toptree_to_topology(numa);
+ toptree_free(numa);
+ pin_all_possible_cpus();
+}
+
+/*
+ * If emu_size is not set, use CONFIG_EMU_SIZE. Then round to minimum
+ * alignment (needed for memory hotplug).
+ */
+static unsigned long emu_setup_size_adjust(unsigned long size)
+{
+ unsigned long size_new;
+
+ size = size ? : CONFIG_EMU_SIZE;
+ size_new = roundup(size, memory_block_size_bytes());
+ if (size_new == size)
+ return size;
+ pr_warn("Increasing memory stripe size from %ld MB to %ld MB\n",
+ size >> 20, size_new >> 20);
+ return size_new;
+}
+
+/*
+ * If we have not enough memory for the specified nodes, reduce the node count.
+ */
+static int emu_setup_nodes_adjust(int nodes)
+{
+ int nodes_max;
+
+ nodes_max = memblock.memory.total_size / emu_size;
+ nodes_max = max(nodes_max, 1);
+ if (nodes_max >= nodes)
+ return nodes;
+ pr_warn("Not enough memory for %d nodes, reducing node count\n", nodes);
+ return nodes_max;
+}
+
+/*
+ * Early emu setup
+ */
+static void emu_setup(void)
+{
+ int nid;
+
+ emu_size = emu_setup_size_adjust(emu_size);
+ emu_nodes = emu_setup_nodes_adjust(emu_nodes);
+ for (nid = 0; nid < emu_nodes; nid++)
+ node_set(nid, node_possible_map);
+ pr_info("Creating %d nodes with memory stripe size %ld MB\n",
+ emu_nodes, emu_size >> 20);
+}
+
+/*
+ * Return node id for given page number
+ */
+static int emu_pfn_to_nid(unsigned long pfn)
+{
+ return (pfn / (emu_size >> PAGE_SHIFT)) % emu_nodes;
+}
+
+/*
+ * Return stripe size
+ */
+static unsigned long emu_align(void)
+{
+ return emu_size;
+}
+
+/*
+ * Return distance between two nodes
+ */
+static int emu_distance(int node1, int node2)
+{
+ return (node1 != node2) * EMU_NODE_DIST;
+}
+
+/*
+ * Define callbacks for generic s390 NUMA infrastructure
+ */
+const struct numa_mode numa_mode_emu = {
+ .name = "emu",
+ .setup = emu_setup,
+ .update_cpu_topology = emu_update_cpu_topology,
+ .__pfn_to_nid = emu_pfn_to_nid,
+ .align = emu_align,
+ .distance = emu_distance,
+};
+
+/*
+ * Kernel parameter: emu_nodes=<n>
+ */
+static int __init early_parse_emu_nodes(char *p)
+{
+ int count;
+
+ if (kstrtoint(p, 0, &count) != 0 || count <= 0)
+ return 0;
+ if (count <= 0)
+ return 0;
+ emu_nodes = min(count, MAX_NUMNODES);
+ return 0;
+}
+early_param("emu_nodes", early_parse_emu_nodes);
+
+/*
+ * Kernel parameter: emu_size=[<n>[k|M|G|T]]
+ */
+static int __init early_parse_emu_size(char *p)
+{
+ emu_size = memparse(p, NULL);
+ return 0;
+}
+early_param("emu_size", early_parse_emu_size);
diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c
new file mode 100644
index 000000000..6c151b42e
--- /dev/null
+++ b/arch/s390/numa/numa.c
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NUMA support for s390
+ *
+ * Implement NUMA core code.
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#define KMSG_COMPONENT "numa"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/mmzone.h>
+#include <linux/cpumask.h>
+#include <linux/bootmem.h>
+#include <linux/memblock.h>
+#include <linux/slab.h>
+#include <linux/node.h>
+
+#include <asm/numa.h>
+#include "numa_mode.h"
+
+pg_data_t *node_data[MAX_NUMNODES];
+EXPORT_SYMBOL(node_data);
+
+cpumask_t node_to_cpumask_map[MAX_NUMNODES];
+EXPORT_SYMBOL(node_to_cpumask_map);
+
+static void plain_setup(void)
+{
+ node_set(0, node_possible_map);
+}
+
+const struct numa_mode numa_mode_plain = {
+ .name = "plain",
+ .setup = plain_setup,
+};
+
+static const struct numa_mode *mode = &numa_mode_plain;
+
+int numa_pfn_to_nid(unsigned long pfn)
+{
+ return mode->__pfn_to_nid ? mode->__pfn_to_nid(pfn) : 0;
+}
+
+void numa_update_cpu_topology(void)
+{
+ if (mode->update_cpu_topology)
+ mode->update_cpu_topology();
+}
+
+int __node_distance(int a, int b)
+{
+ return mode->distance ? mode->distance(a, b) : 0;
+}
+EXPORT_SYMBOL(__node_distance);
+
+int numa_debug_enabled;
+
+/*
+ * alloc_node_data() - Allocate node data
+ */
+static __init pg_data_t *alloc_node_data(void)
+{
+ pg_data_t *res;
+
+ res = (pg_data_t *) memblock_alloc(sizeof(pg_data_t), 8);
+ memset(res, 0, sizeof(pg_data_t));
+ return res;
+}
+
+/*
+ * numa_setup_memory() - Assign bootmem to nodes
+ *
+ * The memory is first added to memblock without any respect to nodes.
+ * This is fixed before remaining memblock memory is handed over to the
+ * buddy allocator.
+ * An important side effect is that large bootmem allocations might easily
+ * cross node boundaries, which can be needed for large allocations with
+ * smaller memory stripes in each node (i.e. when using NUMA emulation).
+ *
+ * Memory defines nodes:
+ * Therefore this routine also sets the nodes online with memory.
+ */
+static void __init numa_setup_memory(void)
+{
+ unsigned long cur_base, align, end_of_dram;
+ int nid = 0;
+
+ end_of_dram = memblock_end_of_DRAM();
+ align = mode->align ? mode->align() : ULONG_MAX;
+
+ /*
+ * Step through all available memory and assign it to the nodes
+ * indicated by the mode implementation.
+ * All nodes which are seen here will be set online.
+ */
+ cur_base = 0;
+ do {
+ nid = numa_pfn_to_nid(PFN_DOWN(cur_base));
+ node_set_online(nid);
+ memblock_set_node(cur_base, align, &memblock.memory, nid);
+ cur_base += align;
+ } while (cur_base < end_of_dram);
+
+ /* Allocate and fill out node_data */
+ for (nid = 0; nid < MAX_NUMNODES; nid++)
+ NODE_DATA(nid) = alloc_node_data();
+
+ for_each_online_node(nid) {
+ unsigned long start_pfn, end_pfn;
+ unsigned long t_start, t_end;
+ int i;
+
+ start_pfn = ULONG_MAX;
+ end_pfn = 0;
+ for_each_mem_pfn_range(i, nid, &t_start, &t_end, NULL) {
+ if (t_start < start_pfn)
+ start_pfn = t_start;
+ if (t_end > end_pfn)
+ end_pfn = t_end;
+ }
+ NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
+ NODE_DATA(nid)->node_id = nid;
+ }
+}
+
+/*
+ * numa_setup() - Earliest initialization
+ *
+ * Assign the mode and call the mode's setup routine.
+ */
+void __init numa_setup(void)
+{
+ pr_info("NUMA mode: %s\n", mode->name);
+ nodes_clear(node_possible_map);
+ /* Initially attach all possible CPUs to node 0. */
+ cpumask_copy(&node_to_cpumask_map[0], cpu_possible_mask);
+ if (mode->setup)
+ mode->setup();
+ numa_setup_memory();
+ memblock_dump_all();
+}
+
+/*
+ * numa_init_late() - Initialization initcall
+ *
+ * Register NUMA nodes.
+ */
+static int __init numa_init_late(void)
+{
+ int nid;
+
+ for_each_online_node(nid)
+ register_one_node(nid);
+ return 0;
+}
+arch_initcall(numa_init_late);
+
+static int __init parse_debug(char *parm)
+{
+ numa_debug_enabled = 1;
+ return 0;
+}
+early_param("numa_debug", parse_debug);
+
+static int __init parse_numa(char *parm)
+{
+ if (strcmp(parm, numa_mode_plain.name) == 0)
+ mode = &numa_mode_plain;
+#ifdef CONFIG_NUMA_EMU
+ if (strcmp(parm, numa_mode_emu.name) == 0)
+ mode = &numa_mode_emu;
+#endif
+ return 0;
+}
+early_param("numa", parse_numa);
diff --git a/arch/s390/numa/numa_mode.h b/arch/s390/numa/numa_mode.h
new file mode 100644
index 000000000..dfd3e2784
--- /dev/null
+++ b/arch/s390/numa/numa_mode.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * NUMA support for s390
+ *
+ * Define declarations used for communication between NUMA mode
+ * implementations and NUMA core functionality.
+ *
+ * Copyright IBM Corp. 2015
+ */
+#ifndef __S390_NUMA_MODE_H
+#define __S390_NUMA_MODE_H
+
+struct numa_mode {
+ char *name; /* Name of mode */
+ void (*setup)(void); /* Initizalize mode */
+ void (*update_cpu_topology)(void); /* Called by topology code */
+ int (*__pfn_to_nid)(unsigned long pfn); /* PFN to node ID */
+ unsigned long (*align)(void); /* Minimum node alignment */
+ int (*distance)(int a, int b); /* Distance between two nodes */
+};
+
+extern const struct numa_mode numa_mode_plain;
+extern const struct numa_mode numa_mode_emu;
+
+#endif /* __S390_NUMA_MODE_H */
diff --git a/arch/s390/numa/toptree.c b/arch/s390/numa/toptree.c
new file mode 100644
index 000000000..21d1e8a15
--- /dev/null
+++ b/arch/s390/numa/toptree.c
@@ -0,0 +1,351 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NUMA support for s390
+ *
+ * A tree structure used for machine topology mangling
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#include <linux/kernel.h>
+#include <linux/bootmem.h>
+#include <linux/cpumask.h>
+#include <linux/list.h>
+#include <linux/list_sort.h>
+#include <linux/slab.h>
+#include <asm/numa.h>
+
+#include "toptree.h"
+
+/**
+ * toptree_alloc - Allocate and initialize a new tree node.
+ * @level: The node's vertical level; level 0 contains the leaves.
+ * @id: ID number, explicitly not unique beyond scope of node's siblings
+ *
+ * Allocate a new tree node and initialize it.
+ *
+ * RETURNS:
+ * Pointer to the new tree node or NULL on error
+ */
+struct toptree __ref *toptree_alloc(int level, int id)
+{
+ struct toptree *res;
+
+ if (slab_is_available())
+ res = kzalloc(sizeof(*res), GFP_KERNEL);
+ else
+ res = memblock_virt_alloc(sizeof(*res), 8);
+ if (!res)
+ return res;
+
+ INIT_LIST_HEAD(&res->children);
+ INIT_LIST_HEAD(&res->sibling);
+ cpumask_clear(&res->mask);
+ res->level = level;
+ res->id = id;
+ return res;
+}
+
+/**
+ * toptree_remove - Remove a tree node from a tree
+ * @cand: Pointer to the node to remove
+ *
+ * The node is detached from its parent node. The parent node's
+ * masks will be updated to reflect the loss of the child.
+ */
+static void toptree_remove(struct toptree *cand)
+{
+ struct toptree *oldparent;
+
+ list_del_init(&cand->sibling);
+ oldparent = cand->parent;
+ cand->parent = NULL;
+ toptree_update_mask(oldparent);
+}
+
+/**
+ * toptree_free - discard a tree node
+ * @cand: Pointer to the tree node to discard
+ *
+ * Checks if @cand is attached to a parent node. Detaches it
+ * cleanly using toptree_remove. Possible children are freed
+ * recursively. In the end @cand itself is freed.
+ */
+void __ref toptree_free(struct toptree *cand)
+{
+ struct toptree *child, *tmp;
+
+ if (cand->parent)
+ toptree_remove(cand);
+ toptree_for_each_child_safe(child, tmp, cand)
+ toptree_free(child);
+ if (slab_is_available())
+ kfree(cand);
+ else
+ memblock_free_early((unsigned long)cand, sizeof(*cand));
+}
+
+/**
+ * toptree_update_mask - Update node bitmasks
+ * @cand: Pointer to a tree node
+ *
+ * The node's cpumask will be updated by combining all children's
+ * masks. Then toptree_update_mask is called recursively for the
+ * parent if applicable.
+ *
+ * NOTE:
+ * This must not be called on leaves. If called on a leaf, its
+ * CPU mask is cleared and lost.
+ */
+void toptree_update_mask(struct toptree *cand)
+{
+ struct toptree *child;
+
+ cpumask_clear(&cand->mask);
+ list_for_each_entry(child, &cand->children, sibling)
+ cpumask_or(&cand->mask, &cand->mask, &child->mask);
+ if (cand->parent)
+ toptree_update_mask(cand->parent);
+}
+
+/**
+ * toptree_insert - Insert a tree node into tree
+ * @cand: Pointer to the node to insert
+ * @target: Pointer to the node to which @cand will added as a child
+ *
+ * Insert a tree node into a tree. Masks will be updated automatically.
+ *
+ * RETURNS:
+ * 0 on success, -1 if NULL is passed as argument or the node levels
+ * don't fit.
+ */
+static int toptree_insert(struct toptree *cand, struct toptree *target)
+{
+ if (!cand || !target)
+ return -1;
+ if (target->level != (cand->level + 1))
+ return -1;
+ list_add_tail(&cand->sibling, &target->children);
+ cand->parent = target;
+ toptree_update_mask(target);
+ return 0;
+}
+
+/**
+ * toptree_move_children - Move all child nodes of a node to a new place
+ * @cand: Pointer to the node whose children are to be moved
+ * @target: Pointer to the node to which @cand's children will be attached
+ *
+ * Take all child nodes of @cand and move them using toptree_move.
+ */
+static void toptree_move_children(struct toptree *cand, struct toptree *target)
+{
+ struct toptree *child, *tmp;
+
+ toptree_for_each_child_safe(child, tmp, cand)
+ toptree_move(child, target);
+}
+
+/**
+ * toptree_unify - Merge children with same ID
+ * @cand: Pointer to node whose direct children should be made unique
+ *
+ * When mangling the tree it is possible that a node has two or more children
+ * which have the same ID. This routine merges these children into one and
+ * moves all children of the merged nodes into the unified node.
+ */
+void toptree_unify(struct toptree *cand)
+{
+ struct toptree *child, *tmp, *cand_copy;
+
+ /* Threads cannot be split, cores are not split */
+ if (cand->level < 2)
+ return;
+
+ cand_copy = toptree_alloc(cand->level, 0);
+ toptree_for_each_child_safe(child, tmp, cand) {
+ struct toptree *tmpchild;
+
+ if (!cpumask_empty(&child->mask)) {
+ tmpchild = toptree_get_child(cand_copy, child->id);
+ toptree_move_children(child, tmpchild);
+ }
+ toptree_free(child);
+ }
+ toptree_move_children(cand_copy, cand);
+ toptree_free(cand_copy);
+
+ toptree_for_each_child(child, cand)
+ toptree_unify(child);
+}
+
+/**
+ * toptree_move - Move a node to another context
+ * @cand: Pointer to the node to move
+ * @target: Pointer to the node where @cand should go
+ *
+ * In the easiest case @cand is exactly on the level below @target
+ * and will be immediately moved to the target.
+ *
+ * If @target's level is not the direct parent level of @cand,
+ * nodes for the missing levels are created and put between
+ * @cand and @target. The "stacking" nodes' IDs are taken from
+ * @cand's parents.
+ *
+ * After this it is likely to have redundant nodes in the tree
+ * which are addressed by means of toptree_unify.
+ */
+void toptree_move(struct toptree *cand, struct toptree *target)
+{
+ struct toptree *stack_target, *real_insert_point, *ptr, *tmp;
+
+ if (cand->level + 1 == target->level) {
+ toptree_remove(cand);
+ toptree_insert(cand, target);
+ return;
+ }
+
+ real_insert_point = NULL;
+ ptr = cand;
+ stack_target = NULL;
+
+ do {
+ tmp = stack_target;
+ stack_target = toptree_alloc(ptr->level + 1,
+ ptr->parent->id);
+ toptree_insert(tmp, stack_target);
+ if (!real_insert_point)
+ real_insert_point = stack_target;
+ ptr = ptr->parent;
+ } while (stack_target->level < (target->level - 1));
+
+ toptree_remove(cand);
+ toptree_insert(cand, real_insert_point);
+ toptree_insert(stack_target, target);
+}
+
+/**
+ * toptree_get_child - Access a tree node's child by its ID
+ * @cand: Pointer to tree node whose child is to access
+ * @id: The desired child's ID
+ *
+ * @cand's children are searched for a child with matching ID.
+ * If no match can be found, a new child with the desired ID
+ * is created and returned.
+ */
+struct toptree *toptree_get_child(struct toptree *cand, int id)
+{
+ struct toptree *child;
+
+ toptree_for_each_child(child, cand)
+ if (child->id == id)
+ return child;
+ child = toptree_alloc(cand->level-1, id);
+ toptree_insert(child, cand);
+ return child;
+}
+
+/**
+ * toptree_first - Find the first descendant on specified level
+ * @context: Pointer to tree node whose descendants are to be used
+ * @level: The level of interest
+ *
+ * RETURNS:
+ * @context's first descendant on the specified level, or NULL
+ * if there is no matching descendant
+ */
+struct toptree *toptree_first(struct toptree *context, int level)
+{
+ struct toptree *child, *tmp;
+
+ if (context->level == level)
+ return context;
+
+ if (!list_empty(&context->children)) {
+ list_for_each_entry(child, &context->children, sibling) {
+ tmp = toptree_first(child, level);
+ if (tmp)
+ return tmp;
+ }
+ }
+ return NULL;
+}
+
+/**
+ * toptree_next_sibling - Return next sibling
+ * @cur: Pointer to a tree node
+ *
+ * RETURNS:
+ * If @cur has a parent and is not the last in the parent's children list,
+ * the next sibling is returned. Or NULL when there are no siblings left.
+ */
+static struct toptree *toptree_next_sibling(struct toptree *cur)
+{
+ if (cur->parent == NULL)
+ return NULL;
+
+ if (cur == list_last_entry(&cur->parent->children,
+ struct toptree, sibling))
+ return NULL;
+ return (struct toptree *) list_next_entry(cur, sibling);
+}
+
+/**
+ * toptree_next - Tree traversal function
+ * @cur: Pointer to current element
+ * @context: Pointer to the root node of the tree or subtree to
+ * be traversed.
+ * @level: The level of interest.
+ *
+ * RETURNS:
+ * Pointer to the next node on level @level
+ * or NULL when there is no next node.
+ */
+struct toptree *toptree_next(struct toptree *cur, struct toptree *context,
+ int level)
+{
+ struct toptree *cur_context, *tmp;
+
+ if (!cur)
+ return NULL;
+
+ if (context->level == level)
+ return NULL;
+
+ tmp = toptree_next_sibling(cur);
+ if (tmp != NULL)
+ return tmp;
+
+ cur_context = cur;
+ while (cur_context->level < context->level - 1) {
+ /* Step up */
+ cur_context = cur_context->parent;
+ /* Step aside */
+ tmp = toptree_next_sibling(cur_context);
+ if (tmp != NULL) {
+ /* Step down */
+ tmp = toptree_first(tmp, level);
+ if (tmp != NULL)
+ return tmp;
+ }
+ }
+ return NULL;
+}
+
+/**
+ * toptree_count - Count descendants on specified level
+ * @context: Pointer to node whose descendants are to be considered
+ * @level: Only descendants on the specified level will be counted
+ *
+ * RETURNS:
+ * Number of descendants on the specified level
+ */
+int toptree_count(struct toptree *context, int level)
+{
+ struct toptree *cur;
+ int cnt = 0;
+
+ toptree_for_each(cur, context, level)
+ cnt++;
+ return cnt;
+}
diff --git a/arch/s390/numa/toptree.h b/arch/s390/numa/toptree.h
new file mode 100644
index 000000000..5246371ec
--- /dev/null
+++ b/arch/s390/numa/toptree.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * NUMA support for s390
+ *
+ * A tree structure used for machine topology mangling
+ *
+ * Copyright IBM Corp. 2015
+ */
+#ifndef S390_TOPTREE_H
+#define S390_TOPTREE_H
+
+#include <linux/cpumask.h>
+#include <linux/list.h>
+
+struct toptree {
+ int level;
+ int id;
+ cpumask_t mask;
+ struct toptree *parent;
+ struct list_head sibling;
+ struct list_head children;
+};
+
+struct toptree *toptree_alloc(int level, int id);
+void toptree_free(struct toptree *cand);
+void toptree_update_mask(struct toptree *cand);
+void toptree_unify(struct toptree *cand);
+struct toptree *toptree_get_child(struct toptree *cand, int id);
+void toptree_move(struct toptree *cand, struct toptree *target);
+int toptree_count(struct toptree *context, int level);
+
+struct toptree *toptree_first(struct toptree *context, int level);
+struct toptree *toptree_next(struct toptree *cur, struct toptree *context,
+ int level);
+
+#define toptree_for_each_child(child, ptree) \
+ list_for_each_entry(child, &ptree->children, sibling)
+
+#define toptree_for_each_child_safe(child, ptmp, ptree) \
+ list_for_each_entry_safe(child, ptmp, &ptree->children, sibling)
+
+#define toptree_is_last(ptree) \
+ ((ptree->parent == NULL) || \
+ (ptree->parent->children.prev == &ptree->sibling))
+
+#define toptree_for_each(ptree, cont, ttype) \
+ for (ptree = toptree_first(cont, ttype); \
+ ptree != NULL; \
+ ptree = toptree_next(ptree, cont, ttype))
+
+#define toptree_for_each_safe(ptree, tmp, cont, ttype) \
+ for (ptree = toptree_first(cont, ttype), \
+ tmp = toptree_next(ptree, cont, ttype); \
+ ptree != NULL; \
+ ptree = tmp, \
+ tmp = toptree_next(ptree, cont, ttype))
+
+#define toptree_for_each_sibling(ptree, start) \
+ toptree_for_each(ptree, start->parent, start->level)
+
+#endif /* S390_TOPTREE_H */
diff --git a/arch/s390/oprofile/Makefile b/arch/s390/oprofile/Makefile
new file mode 100644
index 000000000..36261f9d3
--- /dev/null
+++ b/arch/s390/oprofile/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_OPROFILE) += oprofile.o
+
+DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
+ oprof.o cpu_buffer.o buffer_sync.o \
+ event_buffer.o oprofile_files.o \
+ oprofilefs.o oprofile_stats.o \
+ timer_int.o )
+
+oprofile-y := $(DRIVER_OBJS) init.o
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
new file mode 100644
index 000000000..43d9525c3
--- /dev/null
+++ b/arch/s390/oprofile/init.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * S390 Version
+ * Copyright IBM Corp. 2002, 2011
+ * Author(s): Thomas Spatzier (tspat@de.ibm.com)
+ * Author(s): Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com)
+ * Author(s): Heinz Graalfs (graalfs@linux.vnet.ibm.com)
+ * Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com)
+ *
+ * @remark Copyright 2002-2011 OProfile authors
+ */
+
+#include <linux/oprofile.h>
+#include <linux/init.h>
+#include <asm/processor.h>
+
+static int __s390_backtrace(void *data, unsigned long address, int reliable)
+{
+ unsigned int *depth = data;
+
+ if (*depth == 0)
+ return 1;
+ (*depth)--;
+ oprofile_add_trace(address);
+ return 0;
+}
+
+static void s390_backtrace(struct pt_regs *regs, unsigned int depth)
+{
+ if (user_mode(regs))
+ return;
+ dump_trace(__s390_backtrace, &depth, NULL, regs->gprs[15]);
+}
+
+int __init oprofile_arch_init(struct oprofile_operations *ops)
+{
+ ops->backtrace = s390_backtrace;
+ return 0;
+}
+
+void oprofile_arch_exit(void)
+{
+}
diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile
new file mode 100644
index 000000000..22d087129
--- /dev/null
+++ b/arch/s390/pci/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the s390 PCI subsystem.
+#
+
+obj-$(CONFIG_PCI) += pci.o pci_dma.o pci_clp.o pci_sysfs.o \
+ pci_event.o pci_debug.o pci_insn.o pci_mmio.o
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
new file mode 100644
index 000000000..9f6f392a4
--- /dev/null
+++ b/arch/s390/pci/pci.c
@@ -0,0 +1,1005 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2012
+ *
+ * Author(s):
+ * Jan Glauber <jang@linux.vnet.ibm.com>
+ *
+ * The System z PCI code is a rewrite from a prototype by
+ * the following people (Kudoz!):
+ * Alexander Schmidt
+ * Christoph Raisch
+ * Hannes Hering
+ * Hoang-Nam Nguyen
+ * Jan-Bernd Themann
+ * Stefan Roscher
+ * Thomas Klein
+ */
+
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/kernel_stat.h>
+#include <linux/seq_file.h>
+#include <linux/pci.h>
+#include <linux/msi.h>
+
+#include <asm/isc.h>
+#include <asm/airq.h>
+#include <asm/facility.h>
+#include <asm/pci_insn.h>
+#include <asm/pci_clp.h>
+#include <asm/pci_dma.h>
+
+#define DEBUG /* enable pr_debug */
+
+#define SIC_IRQ_MODE_ALL 0
+#define SIC_IRQ_MODE_SINGLE 1
+
+#define ZPCI_NR_DMA_SPACES 1
+#define ZPCI_NR_DEVICES CONFIG_PCI_NR_FUNCTIONS
+
+/* list of all detected zpci devices */
+static LIST_HEAD(zpci_list);
+static DEFINE_SPINLOCK(zpci_list_lock);
+
+static struct irq_chip zpci_irq_chip = {
+ .name = "zPCI",
+ .irq_unmask = pci_msi_unmask_irq,
+ .irq_mask = pci_msi_mask_irq,
+};
+
+static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES);
+static DEFINE_SPINLOCK(zpci_domain_lock);
+
+static struct airq_iv *zpci_aisb_iv;
+static struct airq_iv *zpci_aibv[ZPCI_NR_DEVICES];
+
+#define ZPCI_IOMAP_ENTRIES \
+ min(((unsigned long) ZPCI_NR_DEVICES * PCI_BAR_COUNT / 2), \
+ ZPCI_IOMAP_MAX_ENTRIES)
+
+static DEFINE_SPINLOCK(zpci_iomap_lock);
+static unsigned long *zpci_iomap_bitmap;
+struct zpci_iomap_entry *zpci_iomap_start;
+EXPORT_SYMBOL_GPL(zpci_iomap_start);
+
+static struct kmem_cache *zdev_fmb_cache;
+
+struct zpci_dev *get_zdev_by_fid(u32 fid)
+{
+ struct zpci_dev *tmp, *zdev = NULL;
+
+ spin_lock(&zpci_list_lock);
+ list_for_each_entry(tmp, &zpci_list, entry) {
+ if (tmp->fid == fid) {
+ zdev = tmp;
+ break;
+ }
+ }
+ spin_unlock(&zpci_list_lock);
+ return zdev;
+}
+
+void zpci_remove_reserved_devices(void)
+{
+ struct zpci_dev *tmp, *zdev;
+ enum zpci_state state;
+ LIST_HEAD(remove);
+
+ spin_lock(&zpci_list_lock);
+ list_for_each_entry_safe(zdev, tmp, &zpci_list, entry) {
+ if (zdev->state == ZPCI_FN_STATE_STANDBY &&
+ !clp_get_state(zdev->fid, &state) &&
+ state == ZPCI_FN_STATE_RESERVED)
+ list_move_tail(&zdev->entry, &remove);
+ }
+ spin_unlock(&zpci_list_lock);
+
+ list_for_each_entry_safe(zdev, tmp, &remove, entry)
+ zpci_remove_device(zdev);
+}
+
+static struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus)
+{
+ return (bus && bus->sysdata) ? (struct zpci_dev *) bus->sysdata : NULL;
+}
+
+int pci_domain_nr(struct pci_bus *bus)
+{
+ return ((struct zpci_dev *) bus->sysdata)->domain;
+}
+EXPORT_SYMBOL_GPL(pci_domain_nr);
+
+int pci_proc_domain(struct pci_bus *bus)
+{
+ return pci_domain_nr(bus);
+}
+EXPORT_SYMBOL_GPL(pci_proc_domain);
+
+/* Modify PCI: Register adapter interruptions */
+static int zpci_set_airq(struct zpci_dev *zdev)
+{
+ u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT);
+ struct zpci_fib fib = {0};
+ u8 status;
+
+ fib.isc = PCI_ISC;
+ fib.sum = 1; /* enable summary notifications */
+ fib.noi = airq_iv_end(zdev->aibv);
+ fib.aibv = (unsigned long) zdev->aibv->vector;
+ fib.aibvo = 0; /* each zdev has its own interrupt vector */
+ fib.aisb = (unsigned long) zpci_aisb_iv->vector + (zdev->aisb/64)*8;
+ fib.aisbo = zdev->aisb & 63;
+
+ return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
+}
+
+/* Modify PCI: Unregister adapter interruptions */
+static int zpci_clear_airq(struct zpci_dev *zdev)
+{
+ u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_DEREG_INT);
+ struct zpci_fib fib = {0};
+ u8 cc, status;
+
+ cc = zpci_mod_fc(req, &fib, &status);
+ if (cc == 3 || (cc == 1 && status == 24))
+ /* Function already gone or IRQs already deregistered. */
+ cc = 0;
+
+ return cc ? -EIO : 0;
+}
+
+/* Modify PCI: Register I/O address translation parameters */
+int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas,
+ u64 base, u64 limit, u64 iota)
+{
+ u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, ZPCI_MOD_FC_REG_IOAT);
+ struct zpci_fib fib = {0};
+ u8 status;
+
+ WARN_ON_ONCE(iota & 0x3fff);
+ fib.pba = base;
+ fib.pal = limit;
+ fib.iota = iota | ZPCI_IOTA_RTTO_FLAG;
+ return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
+}
+
+/* Modify PCI: Unregister I/O address translation parameters */
+int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas)
+{
+ u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, ZPCI_MOD_FC_DEREG_IOAT);
+ struct zpci_fib fib = {0};
+ u8 cc, status;
+
+ cc = zpci_mod_fc(req, &fib, &status);
+ if (cc == 3) /* Function already gone. */
+ cc = 0;
+ return cc ? -EIO : 0;
+}
+
+/* Modify PCI: Set PCI function measurement parameters */
+int zpci_fmb_enable_device(struct zpci_dev *zdev)
+{
+ u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_SET_MEASURE);
+ struct zpci_fib fib = {0};
+ u8 cc, status;
+
+ if (zdev->fmb || sizeof(*zdev->fmb) < zdev->fmb_length)
+ return -EINVAL;
+
+ zdev->fmb = kmem_cache_zalloc(zdev_fmb_cache, GFP_KERNEL);
+ if (!zdev->fmb)
+ return -ENOMEM;
+ WARN_ON((u64) zdev->fmb & 0xf);
+
+ /* reset software counters */
+ atomic64_set(&zdev->allocated_pages, 0);
+ atomic64_set(&zdev->mapped_pages, 0);
+ atomic64_set(&zdev->unmapped_pages, 0);
+
+ fib.fmb_addr = virt_to_phys(zdev->fmb);
+ cc = zpci_mod_fc(req, &fib, &status);
+ if (cc) {
+ kmem_cache_free(zdev_fmb_cache, zdev->fmb);
+ zdev->fmb = NULL;
+ }
+ return cc ? -EIO : 0;
+}
+
+/* Modify PCI: Disable PCI function measurement */
+int zpci_fmb_disable_device(struct zpci_dev *zdev)
+{
+ u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_SET_MEASURE);
+ struct zpci_fib fib = {0};
+ u8 cc, status;
+
+ if (!zdev->fmb)
+ return -EINVAL;
+
+ /* Function measurement is disabled if fmb address is zero */
+ cc = zpci_mod_fc(req, &fib, &status);
+ if (cc == 3) /* Function already gone. */
+ cc = 0;
+
+ if (!cc) {
+ kmem_cache_free(zdev_fmb_cache, zdev->fmb);
+ zdev->fmb = NULL;
+ }
+ return cc ? -EIO : 0;
+}
+
+static int zpci_cfg_load(struct zpci_dev *zdev, int offset, u32 *val, u8 len)
+{
+ u64 req = ZPCI_CREATE_REQ(zdev->fh, ZPCI_PCIAS_CFGSPC, len);
+ u64 data;
+ int rc;
+
+ rc = zpci_load(&data, req, offset);
+ if (!rc) {
+ data = le64_to_cpu((__force __le64) data);
+ data >>= (8 - len) * 8;
+ *val = (u32) data;
+ } else
+ *val = 0xffffffff;
+ return rc;
+}
+
+static int zpci_cfg_store(struct zpci_dev *zdev, int offset, u32 val, u8 len)
+{
+ u64 req = ZPCI_CREATE_REQ(zdev->fh, ZPCI_PCIAS_CFGSPC, len);
+ u64 data = val;
+ int rc;
+
+ data <<= (8 - len) * 8;
+ data = (__force u64) cpu_to_le64(data);
+ rc = zpci_store(data, req, offset);
+ return rc;
+}
+
+resource_size_t pcibios_align_resource(void *data, const struct resource *res,
+ resource_size_t size,
+ resource_size_t align)
+{
+ return 0;
+}
+
+/* combine single writes by using store-block insn */
+void __iowrite64_copy(void __iomem *to, const void *from, size_t count)
+{
+ zpci_memcpy_toio(to, from, count);
+}
+
+/* Create a virtual mapping cookie for a PCI BAR */
+void __iomem *pci_iomap_range(struct pci_dev *pdev,
+ int bar,
+ unsigned long offset,
+ unsigned long max)
+{
+ struct zpci_dev *zdev = to_zpci(pdev);
+ int idx;
+
+ if (!pci_resource_len(pdev, bar))
+ return NULL;
+
+ idx = zdev->bars[bar].map_idx;
+ spin_lock(&zpci_iomap_lock);
+ /* Detect overrun */
+ WARN_ON(!++zpci_iomap_start[idx].count);
+ zpci_iomap_start[idx].fh = zdev->fh;
+ zpci_iomap_start[idx].bar = bar;
+ spin_unlock(&zpci_iomap_lock);
+
+ return (void __iomem *) ZPCI_ADDR(idx) + offset;
+}
+EXPORT_SYMBOL(pci_iomap_range);
+
+void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
+{
+ return pci_iomap_range(dev, bar, 0, maxlen);
+}
+EXPORT_SYMBOL(pci_iomap);
+
+void pci_iounmap(struct pci_dev *pdev, void __iomem *addr)
+{
+ unsigned int idx = ZPCI_IDX(addr);
+
+ spin_lock(&zpci_iomap_lock);
+ /* Detect underrun */
+ WARN_ON(!zpci_iomap_start[idx].count);
+ if (!--zpci_iomap_start[idx].count) {
+ zpci_iomap_start[idx].fh = 0;
+ zpci_iomap_start[idx].bar = 0;
+ }
+ spin_unlock(&zpci_iomap_lock);
+}
+EXPORT_SYMBOL(pci_iounmap);
+
+static int pci_read(struct pci_bus *bus, unsigned int devfn, int where,
+ int size, u32 *val)
+{
+ struct zpci_dev *zdev = get_zdev_by_bus(bus);
+ int ret;
+
+ if (!zdev || devfn != ZPCI_DEVFN)
+ ret = -ENODEV;
+ else
+ ret = zpci_cfg_load(zdev, where, val, size);
+
+ return ret;
+}
+
+static int pci_write(struct pci_bus *bus, unsigned int devfn, int where,
+ int size, u32 val)
+{
+ struct zpci_dev *zdev = get_zdev_by_bus(bus);
+ int ret;
+
+ if (!zdev || devfn != ZPCI_DEVFN)
+ ret = -ENODEV;
+ else
+ ret = zpci_cfg_store(zdev, where, val, size);
+
+ return ret;
+}
+
+static struct pci_ops pci_root_ops = {
+ .read = pci_read,
+ .write = pci_write,
+};
+
+static void zpci_irq_handler(struct airq_struct *airq)
+{
+ unsigned long si, ai;
+ struct airq_iv *aibv;
+ int irqs_on = 0;
+
+ inc_irq_stat(IRQIO_PCI);
+ for (si = 0;;) {
+ /* Scan adapter summary indicator bit vector */
+ si = airq_iv_scan(zpci_aisb_iv, si, airq_iv_end(zpci_aisb_iv));
+ if (si == -1UL) {
+ if (irqs_on++)
+ /* End of second scan with interrupts on. */
+ break;
+ /* First scan complete, reenable interrupts. */
+ if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC))
+ break;
+ si = 0;
+ continue;
+ }
+
+ /* Scan the adapter interrupt vector for this device. */
+ aibv = zpci_aibv[si];
+ for (ai = 0;;) {
+ ai = airq_iv_scan(aibv, ai, airq_iv_end(aibv));
+ if (ai == -1UL)
+ break;
+ inc_irq_stat(IRQIO_MSI);
+ airq_iv_lock(aibv, ai);
+ generic_handle_irq(airq_iv_get_data(aibv, ai));
+ airq_iv_unlock(aibv, ai);
+ }
+ }
+}
+
+int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+{
+ struct zpci_dev *zdev = to_zpci(pdev);
+ unsigned int hwirq, msi_vecs;
+ unsigned long aisb;
+ struct msi_desc *msi;
+ struct msi_msg msg;
+ int rc, irq;
+
+ zdev->aisb = -1UL;
+ if (type == PCI_CAP_ID_MSI && nvec > 1)
+ return 1;
+ msi_vecs = min_t(unsigned int, nvec, zdev->max_msi);
+
+ /* Allocate adapter summary indicator bit */
+ aisb = airq_iv_alloc_bit(zpci_aisb_iv);
+ if (aisb == -1UL)
+ return -EIO;
+ zdev->aisb = aisb;
+
+ /* Create adapter interrupt vector */
+ zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK);
+ if (!zdev->aibv)
+ return -ENOMEM;
+
+ /* Wire up shortcut pointer */
+ zpci_aibv[aisb] = zdev->aibv;
+
+ /* Request MSI interrupts */
+ hwirq = 0;
+ for_each_pci_msi_entry(msi, pdev) {
+ if (hwirq >= msi_vecs)
+ break;
+ irq = irq_alloc_desc(0); /* Alloc irq on node 0 */
+ if (irq < 0)
+ return -ENOMEM;
+ rc = irq_set_msi_desc(irq, msi);
+ if (rc)
+ return rc;
+ irq_set_chip_and_handler(irq, &zpci_irq_chip,
+ handle_simple_irq);
+ msg.data = hwirq;
+ msg.address_lo = zdev->msi_addr & 0xffffffff;
+ msg.address_hi = zdev->msi_addr >> 32;
+ pci_write_msi_msg(irq, &msg);
+ airq_iv_set_data(zdev->aibv, hwirq, irq);
+ hwirq++;
+ }
+
+ /* Enable adapter interrupts */
+ rc = zpci_set_airq(zdev);
+ if (rc)
+ return rc;
+
+ return (msi_vecs == nvec) ? 0 : msi_vecs;
+}
+
+void arch_teardown_msi_irqs(struct pci_dev *pdev)
+{
+ struct zpci_dev *zdev = to_zpci(pdev);
+ struct msi_desc *msi;
+ int rc;
+
+ /* Disable adapter interrupts */
+ rc = zpci_clear_airq(zdev);
+ if (rc)
+ return;
+
+ /* Release MSI interrupts */
+ for_each_pci_msi_entry(msi, pdev) {
+ if (!msi->irq)
+ continue;
+ if (msi->msi_attrib.is_msix)
+ __pci_msix_desc_mask_irq(msi, 1);
+ else
+ __pci_msi_desc_mask_irq(msi, 1, 1);
+ irq_set_msi_desc(msi->irq, NULL);
+ irq_free_desc(msi->irq);
+ msi->msg.address_lo = 0;
+ msi->msg.address_hi = 0;
+ msi->msg.data = 0;
+ msi->irq = 0;
+ }
+
+ if (zdev->aisb != -1UL) {
+ zpci_aibv[zdev->aisb] = NULL;
+ airq_iv_free_bit(zpci_aisb_iv, zdev->aisb);
+ zdev->aisb = -1UL;
+ }
+ if (zdev->aibv) {
+ airq_iv_release(zdev->aibv);
+ zdev->aibv = NULL;
+ }
+}
+
+static void zpci_map_resources(struct pci_dev *pdev)
+{
+ resource_size_t len;
+ int i;
+
+ for (i = 0; i < PCI_BAR_COUNT; i++) {
+ len = pci_resource_len(pdev, i);
+ if (!len)
+ continue;
+ pdev->resource[i].start =
+ (resource_size_t __force) pci_iomap(pdev, i, 0);
+ pdev->resource[i].end = pdev->resource[i].start + len - 1;
+ }
+}
+
+static void zpci_unmap_resources(struct pci_dev *pdev)
+{
+ resource_size_t len;
+ int i;
+
+ for (i = 0; i < PCI_BAR_COUNT; i++) {
+ len = pci_resource_len(pdev, i);
+ if (!len)
+ continue;
+ pci_iounmap(pdev, (void __iomem __force *)
+ pdev->resource[i].start);
+ }
+}
+
+static struct airq_struct zpci_airq = {
+ .handler = zpci_irq_handler,
+ .isc = PCI_ISC,
+};
+
+static int __init zpci_irq_init(void)
+{
+ int rc;
+
+ rc = register_adapter_interrupt(&zpci_airq);
+ if (rc)
+ goto out;
+ /* Set summary to 1 to be called every time for the ISC. */
+ *zpci_airq.lsi_ptr = 1;
+
+ rc = -ENOMEM;
+ zpci_aisb_iv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC);
+ if (!zpci_aisb_iv)
+ goto out_airq;
+
+ zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
+ return 0;
+
+out_airq:
+ unregister_adapter_interrupt(&zpci_airq);
+out:
+ return rc;
+}
+
+static void zpci_irq_exit(void)
+{
+ airq_iv_release(zpci_aisb_iv);
+ unregister_adapter_interrupt(&zpci_airq);
+}
+
+static int zpci_alloc_iomap(struct zpci_dev *zdev)
+{
+ unsigned long entry;
+
+ spin_lock(&zpci_iomap_lock);
+ entry = find_first_zero_bit(zpci_iomap_bitmap, ZPCI_IOMAP_ENTRIES);
+ if (entry == ZPCI_IOMAP_ENTRIES) {
+ spin_unlock(&zpci_iomap_lock);
+ return -ENOSPC;
+ }
+ set_bit(entry, zpci_iomap_bitmap);
+ spin_unlock(&zpci_iomap_lock);
+ return entry;
+}
+
+static void zpci_free_iomap(struct zpci_dev *zdev, int entry)
+{
+ spin_lock(&zpci_iomap_lock);
+ memset(&zpci_iomap_start[entry], 0, sizeof(struct zpci_iomap_entry));
+ clear_bit(entry, zpci_iomap_bitmap);
+ spin_unlock(&zpci_iomap_lock);
+}
+
+static struct resource *__alloc_res(struct zpci_dev *zdev, unsigned long start,
+ unsigned long size, unsigned long flags)
+{
+ struct resource *r;
+
+ r = kzalloc(sizeof(*r), GFP_KERNEL);
+ if (!r)
+ return NULL;
+
+ r->start = start;
+ r->end = r->start + size - 1;
+ r->flags = flags;
+ r->name = zdev->res_name;
+
+ if (request_resource(&iomem_resource, r)) {
+ kfree(r);
+ return NULL;
+ }
+ return r;
+}
+
+static int zpci_setup_bus_resources(struct zpci_dev *zdev,
+ struct list_head *resources)
+{
+ unsigned long addr, size, flags;
+ struct resource *res;
+ int i, entry;
+
+ snprintf(zdev->res_name, sizeof(zdev->res_name),
+ "PCI Bus %04x:%02x", zdev->domain, ZPCI_BUS_NR);
+
+ for (i = 0; i < PCI_BAR_COUNT; i++) {
+ if (!zdev->bars[i].size)
+ continue;
+ entry = zpci_alloc_iomap(zdev);
+ if (entry < 0)
+ return entry;
+ zdev->bars[i].map_idx = entry;
+
+ /* only MMIO is supported */
+ flags = IORESOURCE_MEM;
+ if (zdev->bars[i].val & 8)
+ flags |= IORESOURCE_PREFETCH;
+ if (zdev->bars[i].val & 4)
+ flags |= IORESOURCE_MEM_64;
+
+ addr = ZPCI_ADDR(entry);
+ size = 1UL << zdev->bars[i].size;
+
+ res = __alloc_res(zdev, addr, size, flags);
+ if (!res) {
+ zpci_free_iomap(zdev, entry);
+ return -ENOMEM;
+ }
+ zdev->bars[i].res = res;
+ pci_add_resource(resources, res);
+ }
+
+ return 0;
+}
+
+static void zpci_cleanup_bus_resources(struct zpci_dev *zdev)
+{
+ int i;
+
+ for (i = 0; i < PCI_BAR_COUNT; i++) {
+ if (!zdev->bars[i].size || !zdev->bars[i].res)
+ continue;
+
+ zpci_free_iomap(zdev, zdev->bars[i].map_idx);
+ release_resource(zdev->bars[i].res);
+ kfree(zdev->bars[i].res);
+ }
+}
+
+int pcibios_add_device(struct pci_dev *pdev)
+{
+ struct resource *res;
+ int i;
+
+ pdev->dev.groups = zpci_attr_groups;
+ pdev->dev.dma_ops = &s390_pci_dma_ops;
+ zpci_map_resources(pdev);
+
+ for (i = 0; i < PCI_BAR_COUNT; i++) {
+ res = &pdev->resource[i];
+ if (res->parent || !res->flags)
+ continue;
+ pci_claim_resource(pdev, i);
+ }
+
+ return 0;
+}
+
+void pcibios_release_device(struct pci_dev *pdev)
+{
+ zpci_unmap_resources(pdev);
+}
+
+int pcibios_enable_device(struct pci_dev *pdev, int mask)
+{
+ struct zpci_dev *zdev = to_zpci(pdev);
+
+ zpci_debug_init_device(zdev, dev_name(&pdev->dev));
+ zpci_fmb_enable_device(zdev);
+
+ return pci_enable_resources(pdev, mask);
+}
+
+void pcibios_disable_device(struct pci_dev *pdev)
+{
+ struct zpci_dev *zdev = to_zpci(pdev);
+
+ zpci_fmb_disable_device(zdev);
+ zpci_debug_exit_device(zdev);
+}
+
+#ifdef CONFIG_HIBERNATE_CALLBACKS
+static int zpci_restore(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct zpci_dev *zdev = to_zpci(pdev);
+ int ret = 0;
+
+ if (zdev->state != ZPCI_FN_STATE_ONLINE)
+ goto out;
+
+ ret = clp_enable_fh(zdev, ZPCI_NR_DMA_SPACES);
+ if (ret)
+ goto out;
+
+ zpci_map_resources(pdev);
+ zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
+ (u64) zdev->dma_table);
+
+out:
+ return ret;
+}
+
+static int zpci_freeze(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct zpci_dev *zdev = to_zpci(pdev);
+
+ if (zdev->state != ZPCI_FN_STATE_ONLINE)
+ return 0;
+
+ zpci_unregister_ioat(zdev, 0);
+ zpci_unmap_resources(pdev);
+ return clp_disable_fh(zdev);
+}
+
+struct dev_pm_ops pcibios_pm_ops = {
+ .thaw_noirq = zpci_restore,
+ .freeze_noirq = zpci_freeze,
+ .restore_noirq = zpci_restore,
+ .poweroff_noirq = zpci_freeze,
+};
+#endif /* CONFIG_HIBERNATE_CALLBACKS */
+
+static int zpci_alloc_domain(struct zpci_dev *zdev)
+{
+ if (zpci_unique_uid) {
+ zdev->domain = (u16) zdev->uid;
+ if (zdev->domain >= ZPCI_NR_DEVICES)
+ return 0;
+
+ spin_lock(&zpci_domain_lock);
+ if (test_bit(zdev->domain, zpci_domain)) {
+ spin_unlock(&zpci_domain_lock);
+ return -EEXIST;
+ }
+ set_bit(zdev->domain, zpci_domain);
+ spin_unlock(&zpci_domain_lock);
+ return 0;
+ }
+
+ spin_lock(&zpci_domain_lock);
+ zdev->domain = find_first_zero_bit(zpci_domain, ZPCI_NR_DEVICES);
+ if (zdev->domain == ZPCI_NR_DEVICES) {
+ spin_unlock(&zpci_domain_lock);
+ return -ENOSPC;
+ }
+ set_bit(zdev->domain, zpci_domain);
+ spin_unlock(&zpci_domain_lock);
+ return 0;
+}
+
+static void zpci_free_domain(struct zpci_dev *zdev)
+{
+ if (zdev->domain >= ZPCI_NR_DEVICES)
+ return;
+
+ spin_lock(&zpci_domain_lock);
+ clear_bit(zdev->domain, zpci_domain);
+ spin_unlock(&zpci_domain_lock);
+}
+
+void pcibios_remove_bus(struct pci_bus *bus)
+{
+ struct zpci_dev *zdev = get_zdev_by_bus(bus);
+
+ zpci_exit_slot(zdev);
+ zpci_cleanup_bus_resources(zdev);
+ zpci_destroy_iommu(zdev);
+ zpci_free_domain(zdev);
+
+ spin_lock(&zpci_list_lock);
+ list_del(&zdev->entry);
+ spin_unlock(&zpci_list_lock);
+
+ zpci_dbg(3, "rem fid:%x\n", zdev->fid);
+ kfree(zdev);
+}
+
+static int zpci_scan_bus(struct zpci_dev *zdev)
+{
+ LIST_HEAD(resources);
+ int ret;
+
+ ret = zpci_setup_bus_resources(zdev, &resources);
+ if (ret)
+ goto error;
+
+ zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops,
+ zdev, &resources);
+ if (!zdev->bus) {
+ ret = -EIO;
+ goto error;
+ }
+ zdev->bus->max_bus_speed = zdev->max_bus_speed;
+ pci_bus_add_devices(zdev->bus);
+ return 0;
+
+error:
+ zpci_cleanup_bus_resources(zdev);
+ pci_free_resource_list(&resources);
+ return ret;
+}
+
+int zpci_enable_device(struct zpci_dev *zdev)
+{
+ int rc;
+
+ rc = clp_enable_fh(zdev, ZPCI_NR_DMA_SPACES);
+ if (rc)
+ goto out;
+
+ rc = zpci_dma_init_device(zdev);
+ if (rc)
+ goto out_dma;
+
+ zdev->state = ZPCI_FN_STATE_ONLINE;
+ return 0;
+
+out_dma:
+ clp_disable_fh(zdev);
+out:
+ return rc;
+}
+EXPORT_SYMBOL_GPL(zpci_enable_device);
+
+int zpci_disable_device(struct zpci_dev *zdev)
+{
+ zpci_dma_exit_device(zdev);
+ return clp_disable_fh(zdev);
+}
+EXPORT_SYMBOL_GPL(zpci_disable_device);
+
+int zpci_create_device(struct zpci_dev *zdev)
+{
+ int rc;
+
+ rc = zpci_alloc_domain(zdev);
+ if (rc)
+ goto out;
+
+ rc = zpci_init_iommu(zdev);
+ if (rc)
+ goto out_free;
+
+ mutex_init(&zdev->lock);
+ if (zdev->state == ZPCI_FN_STATE_CONFIGURED) {
+ rc = zpci_enable_device(zdev);
+ if (rc)
+ goto out_destroy_iommu;
+ }
+ rc = zpci_scan_bus(zdev);
+ if (rc)
+ goto out_disable;
+
+ spin_lock(&zpci_list_lock);
+ list_add_tail(&zdev->entry, &zpci_list);
+ spin_unlock(&zpci_list_lock);
+
+ zpci_init_slot(zdev);
+
+ return 0;
+
+out_disable:
+ if (zdev->state == ZPCI_FN_STATE_ONLINE)
+ zpci_disable_device(zdev);
+out_destroy_iommu:
+ zpci_destroy_iommu(zdev);
+out_free:
+ zpci_free_domain(zdev);
+out:
+ return rc;
+}
+
+void zpci_remove_device(struct zpci_dev *zdev)
+{
+ if (!zdev->bus)
+ return;
+
+ pci_stop_root_bus(zdev->bus);
+ pci_remove_root_bus(zdev->bus);
+}
+
+int zpci_report_error(struct pci_dev *pdev,
+ struct zpci_report_error_header *report)
+{
+ struct zpci_dev *zdev = to_zpci(pdev);
+
+ return sclp_pci_report(report, zdev->fh, zdev->fid);
+}
+EXPORT_SYMBOL(zpci_report_error);
+
+static int zpci_mem_init(void)
+{
+ BUILD_BUG_ON(!is_power_of_2(__alignof__(struct zpci_fmb)) ||
+ __alignof__(struct zpci_fmb) < sizeof(struct zpci_fmb));
+
+ zdev_fmb_cache = kmem_cache_create("PCI_FMB_cache", sizeof(struct zpci_fmb),
+ __alignof__(struct zpci_fmb), 0, NULL);
+ if (!zdev_fmb_cache)
+ goto error_fmb;
+
+ zpci_iomap_start = kcalloc(ZPCI_IOMAP_ENTRIES,
+ sizeof(*zpci_iomap_start), GFP_KERNEL);
+ if (!zpci_iomap_start)
+ goto error_iomap;
+
+ zpci_iomap_bitmap = kcalloc(BITS_TO_LONGS(ZPCI_IOMAP_ENTRIES),
+ sizeof(*zpci_iomap_bitmap), GFP_KERNEL);
+ if (!zpci_iomap_bitmap)
+ goto error_iomap_bitmap;
+
+ return 0;
+error_iomap_bitmap:
+ kfree(zpci_iomap_start);
+error_iomap:
+ kmem_cache_destroy(zdev_fmb_cache);
+error_fmb:
+ return -ENOMEM;
+}
+
+static void zpci_mem_exit(void)
+{
+ kfree(zpci_iomap_bitmap);
+ kfree(zpci_iomap_start);
+ kmem_cache_destroy(zdev_fmb_cache);
+}
+
+static unsigned int s390_pci_probe = 1;
+static unsigned int s390_pci_initialized;
+
+char * __init pcibios_setup(char *str)
+{
+ if (!strcmp(str, "off")) {
+ s390_pci_probe = 0;
+ return NULL;
+ }
+ return str;
+}
+
+bool zpci_is_enabled(void)
+{
+ return s390_pci_initialized;
+}
+
+static int __init pci_base_init(void)
+{
+ int rc;
+
+ if (!s390_pci_probe)
+ return 0;
+
+ if (!test_facility(69) || !test_facility(71))
+ return 0;
+
+ rc = zpci_debug_init();
+ if (rc)
+ goto out;
+
+ rc = zpci_mem_init();
+ if (rc)
+ goto out_mem;
+
+ rc = zpci_irq_init();
+ if (rc)
+ goto out_irq;
+
+ rc = zpci_dma_init();
+ if (rc)
+ goto out_dma;
+
+ rc = clp_scan_pci_devices();
+ if (rc)
+ goto out_find;
+
+ s390_pci_initialized = 1;
+ return 0;
+
+out_find:
+ zpci_dma_exit();
+out_dma:
+ zpci_irq_exit();
+out_irq:
+ zpci_mem_exit();
+out_mem:
+ zpci_debug_exit();
+out:
+ return rc;
+}
+subsys_initcall_sync(pci_base_init);
+
+void zpci_rescan(void)
+{
+ if (zpci_is_enabled())
+ clp_rescan_pci_devices_simple();
+}
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
new file mode 100644
index 000000000..eeb7450db
--- /dev/null
+++ b/arch/s390/pci/pci_clp.c
@@ -0,0 +1,644 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2012
+ *
+ * Author(s):
+ * Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/compat.h>
+#include <linux/kernel.h>
+#include <linux/miscdevice.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/uaccess.h>
+#include <asm/pci_debug.h>
+#include <asm/pci_clp.h>
+#include <asm/clp.h>
+#include <uapi/asm/clp.h>
+
+bool zpci_unique_uid;
+
+static void update_uid_checking(bool new)
+{
+ if (zpci_unique_uid != new)
+ zpci_dbg(1, "uid checking:%d\n", new);
+
+ zpci_unique_uid = new;
+}
+
+static inline void zpci_err_clp(unsigned int rsp, int rc)
+{
+ struct {
+ unsigned int rsp;
+ int rc;
+ } __packed data = {rsp, rc};
+
+ zpci_err_hex(&data, sizeof(data));
+}
+
+/*
+ * Call Logical Processor with c=1, lps=0 and command 1
+ * to get the bit mask of installed logical processors
+ */
+static inline int clp_get_ilp(unsigned long *ilp)
+{
+ unsigned long mask;
+ int cc = 3;
+
+ asm volatile (
+ " .insn rrf,0xb9a00000,%[mask],%[cmd],8,0\n"
+ "0: ipm %[cc]\n"
+ " srl %[cc],28\n"
+ "1:\n"
+ EX_TABLE(0b, 1b)
+ : [cc] "+d" (cc), [mask] "=d" (mask) : [cmd] "a" (1)
+ : "cc");
+ *ilp = mask;
+ return cc;
+}
+
+/*
+ * Call Logical Processor with c=0, the give constant lps and an lpcb request.
+ */
+static inline int clp_req(void *data, unsigned int lps)
+{
+ struct { u8 _[CLP_BLK_SIZE]; } *req = data;
+ u64 ignored;
+ int cc = 3;
+
+ asm volatile (
+ " .insn rrf,0xb9a00000,%[ign],%[req],0,%[lps]\n"
+ "0: ipm %[cc]\n"
+ " srl %[cc],28\n"
+ "1:\n"
+ EX_TABLE(0b, 1b)
+ : [cc] "+d" (cc), [ign] "=d" (ignored), "+m" (*req)
+ : [req] "a" (req), [lps] "i" (lps)
+ : "cc");
+ return cc;
+}
+
+static void *clp_alloc_block(gfp_t gfp_mask)
+{
+ return (void *) __get_free_pages(gfp_mask, get_order(CLP_BLK_SIZE));
+}
+
+static void clp_free_block(void *ptr)
+{
+ free_pages((unsigned long) ptr, get_order(CLP_BLK_SIZE));
+}
+
+static void clp_store_query_pci_fngrp(struct zpci_dev *zdev,
+ struct clp_rsp_query_pci_grp *response)
+{
+ zdev->tlb_refresh = response->refresh;
+ zdev->dma_mask = response->dasm;
+ zdev->msi_addr = response->msia;
+ zdev->max_msi = response->noi;
+ zdev->fmb_update = response->mui;
+
+ switch (response->version) {
+ case 1:
+ zdev->max_bus_speed = PCIE_SPEED_5_0GT;
+ break;
+ default:
+ zdev->max_bus_speed = PCI_SPEED_UNKNOWN;
+ break;
+ }
+}
+
+static int clp_query_pci_fngrp(struct zpci_dev *zdev, u8 pfgid)
+{
+ struct clp_req_rsp_query_pci_grp *rrb;
+ int rc;
+
+ rrb = clp_alloc_block(GFP_KERNEL);
+ if (!rrb)
+ return -ENOMEM;
+
+ memset(rrb, 0, sizeof(*rrb));
+ rrb->request.hdr.len = sizeof(rrb->request);
+ rrb->request.hdr.cmd = CLP_QUERY_PCI_FNGRP;
+ rrb->response.hdr.len = sizeof(rrb->response);
+ rrb->request.pfgid = pfgid;
+
+ rc = clp_req(rrb, CLP_LPS_PCI);
+ if (!rc && rrb->response.hdr.rsp == CLP_RC_OK)
+ clp_store_query_pci_fngrp(zdev, &rrb->response);
+ else {
+ zpci_err("Q PCI FGRP:\n");
+ zpci_err_clp(rrb->response.hdr.rsp, rc);
+ rc = -EIO;
+ }
+ clp_free_block(rrb);
+ return rc;
+}
+
+static int clp_store_query_pci_fn(struct zpci_dev *zdev,
+ struct clp_rsp_query_pci *response)
+{
+ int i;
+
+ for (i = 0; i < PCI_BAR_COUNT; i++) {
+ zdev->bars[i].val = le32_to_cpu(response->bar[i]);
+ zdev->bars[i].size = response->bar_size[i];
+ }
+ zdev->start_dma = response->sdma;
+ zdev->end_dma = response->edma;
+ zdev->pchid = response->pchid;
+ zdev->pfgid = response->pfgid;
+ zdev->pft = response->pft;
+ zdev->vfn = response->vfn;
+ zdev->uid = response->uid;
+ zdev->fmb_length = sizeof(u32) * response->fmb_len;
+
+ memcpy(zdev->pfip, response->pfip, sizeof(zdev->pfip));
+ if (response->util_str_avail) {
+ memcpy(zdev->util_str, response->util_str,
+ sizeof(zdev->util_str));
+ }
+
+ return 0;
+}
+
+static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh)
+{
+ struct clp_req_rsp_query_pci *rrb;
+ int rc;
+
+ rrb = clp_alloc_block(GFP_KERNEL);
+ if (!rrb)
+ return -ENOMEM;
+
+ memset(rrb, 0, sizeof(*rrb));
+ rrb->request.hdr.len = sizeof(rrb->request);
+ rrb->request.hdr.cmd = CLP_QUERY_PCI_FN;
+ rrb->response.hdr.len = sizeof(rrb->response);
+ rrb->request.fh = fh;
+
+ rc = clp_req(rrb, CLP_LPS_PCI);
+ if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) {
+ rc = clp_store_query_pci_fn(zdev, &rrb->response);
+ if (rc)
+ goto out;
+ rc = clp_query_pci_fngrp(zdev, rrb->response.pfgid);
+ } else {
+ zpci_err("Q PCI FN:\n");
+ zpci_err_clp(rrb->response.hdr.rsp, rc);
+ rc = -EIO;
+ }
+out:
+ clp_free_block(rrb);
+ return rc;
+}
+
+int clp_add_pci_device(u32 fid, u32 fh, int configured)
+{
+ struct zpci_dev *zdev;
+ int rc = -ENOMEM;
+
+ zpci_dbg(3, "add fid:%x, fh:%x, c:%d\n", fid, fh, configured);
+ zdev = kzalloc(sizeof(*zdev), GFP_KERNEL);
+ if (!zdev)
+ goto error;
+
+ zdev->fh = fh;
+ zdev->fid = fid;
+
+ /* Query function properties and update zdev */
+ rc = clp_query_pci_fn(zdev, fh);
+ if (rc)
+ goto error;
+
+ if (configured)
+ zdev->state = ZPCI_FN_STATE_CONFIGURED;
+ else
+ zdev->state = ZPCI_FN_STATE_STANDBY;
+
+ rc = zpci_create_device(zdev);
+ if (rc)
+ goto error;
+ return 0;
+
+error:
+ zpci_dbg(0, "add fid:%x, rc:%d\n", fid, rc);
+ kfree(zdev);
+ return rc;
+}
+
+/*
+ * Enable/Disable a given PCI function defined by its function handle.
+ */
+static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command)
+{
+ struct clp_req_rsp_set_pci *rrb;
+ int rc, retries = 100;
+
+ rrb = clp_alloc_block(GFP_KERNEL);
+ if (!rrb)
+ return -ENOMEM;
+
+ do {
+ memset(rrb, 0, sizeof(*rrb));
+ rrb->request.hdr.len = sizeof(rrb->request);
+ rrb->request.hdr.cmd = CLP_SET_PCI_FN;
+ rrb->response.hdr.len = sizeof(rrb->response);
+ rrb->request.fh = *fh;
+ rrb->request.oc = command;
+ rrb->request.ndas = nr_dma_as;
+
+ rc = clp_req(rrb, CLP_LPS_PCI);
+ if (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY) {
+ retries--;
+ if (retries < 0)
+ break;
+ msleep(20);
+ }
+ } while (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY);
+
+ if (!rc && rrb->response.hdr.rsp == CLP_RC_OK)
+ *fh = rrb->response.fh;
+ else {
+ zpci_err("Set PCI FN:\n");
+ zpci_err_clp(rrb->response.hdr.rsp, rc);
+ rc = -EIO;
+ }
+ clp_free_block(rrb);
+ return rc;
+}
+
+int clp_enable_fh(struct zpci_dev *zdev, u8 nr_dma_as)
+{
+ u32 fh = zdev->fh;
+ int rc;
+
+ rc = clp_set_pci_fn(&fh, nr_dma_as, CLP_SET_ENABLE_PCI_FN);
+ if (!rc)
+ /* Success -> store enabled handle in zdev */
+ zdev->fh = fh;
+
+ zpci_dbg(3, "ena fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc);
+ return rc;
+}
+
+int clp_disable_fh(struct zpci_dev *zdev)
+{
+ u32 fh = zdev->fh;
+ int rc;
+
+ if (!zdev_enabled(zdev))
+ return 0;
+
+ rc = clp_set_pci_fn(&fh, 0, CLP_SET_DISABLE_PCI_FN);
+ if (!rc)
+ /* Success -> store disabled handle in zdev */
+ zdev->fh = fh;
+
+ zpci_dbg(3, "dis fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc);
+ return rc;
+}
+
+static int clp_list_pci(struct clp_req_rsp_list_pci *rrb, void *data,
+ void (*cb)(struct clp_fh_list_entry *, void *))
+{
+ u64 resume_token = 0;
+ int entries, i, rc;
+
+ do {
+ memset(rrb, 0, sizeof(*rrb));
+ rrb->request.hdr.len = sizeof(rrb->request);
+ rrb->request.hdr.cmd = CLP_LIST_PCI;
+ /* store as many entries as possible */
+ rrb->response.hdr.len = CLP_BLK_SIZE - LIST_PCI_HDR_LEN;
+ rrb->request.resume_token = resume_token;
+
+ /* Get PCI function handle list */
+ rc = clp_req(rrb, CLP_LPS_PCI);
+ if (rc || rrb->response.hdr.rsp != CLP_RC_OK) {
+ zpci_err("List PCI FN:\n");
+ zpci_err_clp(rrb->response.hdr.rsp, rc);
+ rc = -EIO;
+ goto out;
+ }
+
+ update_uid_checking(rrb->response.uid_checking);
+ WARN_ON_ONCE(rrb->response.entry_size !=
+ sizeof(struct clp_fh_list_entry));
+
+ entries = (rrb->response.hdr.len - LIST_PCI_HDR_LEN) /
+ rrb->response.entry_size;
+
+ resume_token = rrb->response.resume_token;
+ for (i = 0; i < entries; i++)
+ cb(&rrb->response.fh_list[i], data);
+ } while (resume_token);
+out:
+ return rc;
+}
+
+static void __clp_add(struct clp_fh_list_entry *entry, void *data)
+{
+ struct zpci_dev *zdev;
+
+ if (!entry->vendor_id)
+ return;
+
+ zdev = get_zdev_by_fid(entry->fid);
+ if (!zdev)
+ clp_add_pci_device(entry->fid, entry->fh, entry->config_state);
+}
+
+static void __clp_update(struct clp_fh_list_entry *entry, void *data)
+{
+ struct zpci_dev *zdev;
+
+ if (!entry->vendor_id)
+ return;
+
+ zdev = get_zdev_by_fid(entry->fid);
+ if (!zdev)
+ return;
+
+ zdev->fh = entry->fh;
+}
+
+int clp_scan_pci_devices(void)
+{
+ struct clp_req_rsp_list_pci *rrb;
+ int rc;
+
+ rrb = clp_alloc_block(GFP_KERNEL);
+ if (!rrb)
+ return -ENOMEM;
+
+ rc = clp_list_pci(rrb, NULL, __clp_add);
+
+ clp_free_block(rrb);
+ return rc;
+}
+
+int clp_rescan_pci_devices(void)
+{
+ struct clp_req_rsp_list_pci *rrb;
+ int rc;
+
+ zpci_remove_reserved_devices();
+
+ rrb = clp_alloc_block(GFP_KERNEL);
+ if (!rrb)
+ return -ENOMEM;
+
+ rc = clp_list_pci(rrb, NULL, __clp_add);
+
+ clp_free_block(rrb);
+ return rc;
+}
+
+int clp_rescan_pci_devices_simple(void)
+{
+ struct clp_req_rsp_list_pci *rrb;
+ int rc;
+
+ rrb = clp_alloc_block(GFP_NOWAIT);
+ if (!rrb)
+ return -ENOMEM;
+
+ rc = clp_list_pci(rrb, NULL, __clp_update);
+
+ clp_free_block(rrb);
+ return rc;
+}
+
+struct clp_state_data {
+ u32 fid;
+ enum zpci_state state;
+};
+
+static void __clp_get_state(struct clp_fh_list_entry *entry, void *data)
+{
+ struct clp_state_data *sd = data;
+
+ if (entry->fid != sd->fid)
+ return;
+
+ sd->state = entry->config_state;
+}
+
+int clp_get_state(u32 fid, enum zpci_state *state)
+{
+ struct clp_req_rsp_list_pci *rrb;
+ struct clp_state_data sd = {fid, ZPCI_FN_STATE_RESERVED};
+ int rc;
+
+ rrb = clp_alloc_block(GFP_ATOMIC);
+ if (!rrb)
+ return -ENOMEM;
+
+ rc = clp_list_pci(rrb, &sd, __clp_get_state);
+ if (!rc)
+ *state = sd.state;
+
+ clp_free_block(rrb);
+ return rc;
+}
+
+static int clp_base_slpc(struct clp_req *req, struct clp_req_rsp_slpc *lpcb)
+{
+ unsigned long limit = PAGE_SIZE - sizeof(lpcb->request);
+
+ if (lpcb->request.hdr.len != sizeof(lpcb->request) ||
+ lpcb->response.hdr.len > limit)
+ return -EINVAL;
+ return clp_req(lpcb, CLP_LPS_BASE) ? -EOPNOTSUPP : 0;
+}
+
+static int clp_base_command(struct clp_req *req, struct clp_req_hdr *lpcb)
+{
+ switch (lpcb->cmd) {
+ case 0x0001: /* store logical-processor characteristics */
+ return clp_base_slpc(req, (void *) lpcb);
+ default:
+ return -EINVAL;
+ }
+}
+
+static int clp_pci_slpc(struct clp_req *req, struct clp_req_rsp_slpc *lpcb)
+{
+ unsigned long limit = PAGE_SIZE - sizeof(lpcb->request);
+
+ if (lpcb->request.hdr.len != sizeof(lpcb->request) ||
+ lpcb->response.hdr.len > limit)
+ return -EINVAL;
+ return clp_req(lpcb, CLP_LPS_PCI) ? -EOPNOTSUPP : 0;
+}
+
+static int clp_pci_list(struct clp_req *req, struct clp_req_rsp_list_pci *lpcb)
+{
+ unsigned long limit = PAGE_SIZE - sizeof(lpcb->request);
+
+ if (lpcb->request.hdr.len != sizeof(lpcb->request) ||
+ lpcb->response.hdr.len > limit)
+ return -EINVAL;
+ if (lpcb->request.reserved2 != 0)
+ return -EINVAL;
+ return clp_req(lpcb, CLP_LPS_PCI) ? -EOPNOTSUPP : 0;
+}
+
+static int clp_pci_query(struct clp_req *req,
+ struct clp_req_rsp_query_pci *lpcb)
+{
+ unsigned long limit = PAGE_SIZE - sizeof(lpcb->request);
+
+ if (lpcb->request.hdr.len != sizeof(lpcb->request) ||
+ lpcb->response.hdr.len > limit)
+ return -EINVAL;
+ if (lpcb->request.reserved2 != 0 || lpcb->request.reserved3 != 0)
+ return -EINVAL;
+ return clp_req(lpcb, CLP_LPS_PCI) ? -EOPNOTSUPP : 0;
+}
+
+static int clp_pci_query_grp(struct clp_req *req,
+ struct clp_req_rsp_query_pci_grp *lpcb)
+{
+ unsigned long limit = PAGE_SIZE - sizeof(lpcb->request);
+
+ if (lpcb->request.hdr.len != sizeof(lpcb->request) ||
+ lpcb->response.hdr.len > limit)
+ return -EINVAL;
+ if (lpcb->request.reserved2 != 0 || lpcb->request.reserved3 != 0 ||
+ lpcb->request.reserved4 != 0)
+ return -EINVAL;
+ return clp_req(lpcb, CLP_LPS_PCI) ? -EOPNOTSUPP : 0;
+}
+
+static int clp_pci_command(struct clp_req *req, struct clp_req_hdr *lpcb)
+{
+ switch (lpcb->cmd) {
+ case 0x0001: /* store logical-processor characteristics */
+ return clp_pci_slpc(req, (void *) lpcb);
+ case 0x0002: /* list PCI functions */
+ return clp_pci_list(req, (void *) lpcb);
+ case 0x0003: /* query PCI function */
+ return clp_pci_query(req, (void *) lpcb);
+ case 0x0004: /* query PCI function group */
+ return clp_pci_query_grp(req, (void *) lpcb);
+ default:
+ return -EINVAL;
+ }
+}
+
+static int clp_normal_command(struct clp_req *req)
+{
+ struct clp_req_hdr *lpcb;
+ void __user *uptr;
+ int rc;
+
+ rc = -EINVAL;
+ if (req->lps != 0 && req->lps != 2)
+ goto out;
+
+ rc = -ENOMEM;
+ lpcb = clp_alloc_block(GFP_KERNEL);
+ if (!lpcb)
+ goto out;
+
+ rc = -EFAULT;
+ uptr = (void __force __user *)(unsigned long) req->data_p;
+ if (copy_from_user(lpcb, uptr, PAGE_SIZE) != 0)
+ goto out_free;
+
+ rc = -EINVAL;
+ if (lpcb->fmt != 0 || lpcb->reserved1 != 0 || lpcb->reserved2 != 0)
+ goto out_free;
+
+ switch (req->lps) {
+ case 0:
+ rc = clp_base_command(req, lpcb);
+ break;
+ case 2:
+ rc = clp_pci_command(req, lpcb);
+ break;
+ }
+ if (rc)
+ goto out_free;
+
+ rc = -EFAULT;
+ if (copy_to_user(uptr, lpcb, PAGE_SIZE) != 0)
+ goto out_free;
+
+ rc = 0;
+
+out_free:
+ clp_free_block(lpcb);
+out:
+ return rc;
+}
+
+static int clp_immediate_command(struct clp_req *req)
+{
+ void __user *uptr;
+ unsigned long ilp;
+ int exists;
+
+ if (req->cmd > 1 || clp_get_ilp(&ilp) != 0)
+ return -EINVAL;
+
+ uptr = (void __force __user *)(unsigned long) req->data_p;
+ if (req->cmd == 0) {
+ /* Command code 0: test for a specific processor */
+ exists = test_bit_inv(req->lps, &ilp);
+ return put_user(exists, (int __user *) uptr);
+ }
+ /* Command code 1: return bit mask of installed processors */
+ return put_user(ilp, (unsigned long __user *) uptr);
+}
+
+static long clp_misc_ioctl(struct file *filp, unsigned int cmd,
+ unsigned long arg)
+{
+ struct clp_req req;
+ void __user *argp;
+
+ if (cmd != CLP_SYNC)
+ return -EINVAL;
+
+ argp = is_compat_task() ? compat_ptr(arg) : (void __user *) arg;
+ if (copy_from_user(&req, argp, sizeof(req)))
+ return -EFAULT;
+ if (req.r != 0)
+ return -EINVAL;
+ return req.c ? clp_immediate_command(&req) : clp_normal_command(&req);
+}
+
+static int clp_misc_release(struct inode *inode, struct file *filp)
+{
+ return 0;
+}
+
+static const struct file_operations clp_misc_fops = {
+ .owner = THIS_MODULE,
+ .open = nonseekable_open,
+ .release = clp_misc_release,
+ .unlocked_ioctl = clp_misc_ioctl,
+ .compat_ioctl = clp_misc_ioctl,
+ .llseek = no_llseek,
+};
+
+static struct miscdevice clp_misc_device = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "clp",
+ .fops = &clp_misc_fops,
+};
+
+static int __init clp_misc_init(void)
+{
+ return misc_register(&clp_misc_device);
+}
+
+device_initcall(clp_misc_init);
diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c
new file mode 100644
index 000000000..04388a254
--- /dev/null
+++ b/arch/s390/pci/pci_debug.c
@@ -0,0 +1,217 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2012,2015
+ *
+ * Author(s):
+ * Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <linux/export.h>
+#include <linux/pci.h>
+#include <asm/debug.h>
+
+#include <asm/pci_dma.h>
+
+static struct dentry *debugfs_root;
+debug_info_t *pci_debug_msg_id;
+EXPORT_SYMBOL_GPL(pci_debug_msg_id);
+debug_info_t *pci_debug_err_id;
+EXPORT_SYMBOL_GPL(pci_debug_err_id);
+
+static char *pci_common_names[] = {
+ "Load operations",
+ "Store operations",
+ "Store block operations",
+ "Refresh operations",
+};
+
+static char *pci_fmt0_names[] = {
+ "DMA read bytes",
+ "DMA write bytes",
+};
+
+static char *pci_fmt1_names[] = {
+ "Received bytes",
+ "Received packets",
+ "Transmitted bytes",
+ "Transmitted packets",
+};
+
+static char *pci_fmt2_names[] = {
+ "Consumed work units",
+ "Maximum work units",
+};
+
+static char *pci_fmt3_names[] = {
+ "Transmitted bytes",
+};
+
+static char *pci_sw_names[] = {
+ "Allocated pages",
+ "Mapped pages",
+ "Unmapped pages",
+};
+
+static void pci_fmb_show(struct seq_file *m, char *name[], int length,
+ u64 *data)
+{
+ int i;
+
+ for (i = 0; i < length; i++, data++)
+ seq_printf(m, "%26s:\t%llu\n", name[i], *data);
+}
+
+static void pci_sw_counter_show(struct seq_file *m)
+{
+ struct zpci_dev *zdev = m->private;
+ atomic64_t *counter = &zdev->allocated_pages;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(pci_sw_names); i++, counter++)
+ seq_printf(m, "%26s:\t%lu\n", pci_sw_names[i],
+ atomic64_read(counter));
+}
+
+static int pci_perf_show(struct seq_file *m, void *v)
+{
+ struct zpci_dev *zdev = m->private;
+
+ if (!zdev)
+ return 0;
+
+ mutex_lock(&zdev->lock);
+ if (!zdev->fmb) {
+ mutex_unlock(&zdev->lock);
+ seq_puts(m, "FMB statistics disabled\n");
+ return 0;
+ }
+
+ /* header */
+ seq_printf(m, "Update interval: %u ms\n", zdev->fmb_update);
+ seq_printf(m, "Samples: %u\n", zdev->fmb->samples);
+ seq_printf(m, "Last update TOD: %Lx\n", zdev->fmb->last_update);
+
+ pci_fmb_show(m, pci_common_names, ARRAY_SIZE(pci_common_names),
+ &zdev->fmb->ld_ops);
+
+ switch (zdev->fmb->format) {
+ case 0:
+ if (!(zdev->fmb->fmt_ind & ZPCI_FMB_DMA_COUNTER_VALID))
+ break;
+ pci_fmb_show(m, pci_fmt0_names, ARRAY_SIZE(pci_fmt0_names),
+ &zdev->fmb->fmt0.dma_rbytes);
+ break;
+ case 1:
+ pci_fmb_show(m, pci_fmt1_names, ARRAY_SIZE(pci_fmt1_names),
+ &zdev->fmb->fmt1.rx_bytes);
+ break;
+ case 2:
+ pci_fmb_show(m, pci_fmt2_names, ARRAY_SIZE(pci_fmt2_names),
+ &zdev->fmb->fmt2.consumed_work_units);
+ break;
+ case 3:
+ pci_fmb_show(m, pci_fmt3_names, ARRAY_SIZE(pci_fmt3_names),
+ &zdev->fmb->fmt3.tx_bytes);
+ break;
+ default:
+ seq_puts(m, "Unknown format\n");
+ }
+
+ pci_sw_counter_show(m);
+ mutex_unlock(&zdev->lock);
+ return 0;
+}
+
+static ssize_t pci_perf_seq_write(struct file *file, const char __user *ubuf,
+ size_t count, loff_t *off)
+{
+ struct zpci_dev *zdev = ((struct seq_file *) file->private_data)->private;
+ unsigned long val;
+ int rc;
+
+ if (!zdev)
+ return 0;
+
+ rc = kstrtoul_from_user(ubuf, count, 10, &val);
+ if (rc)
+ return rc;
+
+ mutex_lock(&zdev->lock);
+ switch (val) {
+ case 0:
+ rc = zpci_fmb_disable_device(zdev);
+ break;
+ case 1:
+ rc = zpci_fmb_enable_device(zdev);
+ break;
+ }
+ mutex_unlock(&zdev->lock);
+ return rc ? rc : count;
+}
+
+static int pci_perf_seq_open(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, pci_perf_show,
+ file_inode(filp)->i_private);
+}
+
+static const struct file_operations debugfs_pci_perf_fops = {
+ .open = pci_perf_seq_open,
+ .read = seq_read,
+ .write = pci_perf_seq_write,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+void zpci_debug_init_device(struct zpci_dev *zdev, const char *name)
+{
+ zdev->debugfs_dev = debugfs_create_dir(name, debugfs_root);
+ if (IS_ERR(zdev->debugfs_dev))
+ zdev->debugfs_dev = NULL;
+
+ zdev->debugfs_perf = debugfs_create_file("statistics",
+ S_IFREG | S_IRUGO | S_IWUSR,
+ zdev->debugfs_dev, zdev,
+ &debugfs_pci_perf_fops);
+ if (IS_ERR(zdev->debugfs_perf))
+ zdev->debugfs_perf = NULL;
+}
+
+void zpci_debug_exit_device(struct zpci_dev *zdev)
+{
+ debugfs_remove(zdev->debugfs_perf);
+ debugfs_remove(zdev->debugfs_dev);
+}
+
+int __init zpci_debug_init(void)
+{
+ /* event trace buffer */
+ pci_debug_msg_id = debug_register("pci_msg", 8, 1, 8 * sizeof(long));
+ if (!pci_debug_msg_id)
+ return -EINVAL;
+ debug_register_view(pci_debug_msg_id, &debug_sprintf_view);
+ debug_set_level(pci_debug_msg_id, 3);
+
+ /* error log */
+ pci_debug_err_id = debug_register("pci_error", 2, 1, 16);
+ if (!pci_debug_err_id)
+ return -EINVAL;
+ debug_register_view(pci_debug_err_id, &debug_hex_ascii_view);
+ debug_set_level(pci_debug_err_id, 6);
+
+ debugfs_root = debugfs_create_dir("pci", NULL);
+ return 0;
+}
+
+void zpci_debug_exit(void)
+{
+ debug_unregister(pci_debug_msg_id);
+ debug_unregister(pci_debug_err_id);
+ debugfs_remove(debugfs_root);
+}
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
new file mode 100644
index 000000000..d387a0fbd
--- /dev/null
+++ b/arch/s390/pci/pci_dma.c
@@ -0,0 +1,690 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2012
+ *
+ * Author(s):
+ * Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/iommu-helper.h>
+#include <linux/dma-mapping.h>
+#include <linux/vmalloc.h>
+#include <linux/pci.h>
+#include <asm/pci_dma.h>
+
+#define S390_MAPPING_ERROR (~(dma_addr_t) 0x0)
+
+static struct kmem_cache *dma_region_table_cache;
+static struct kmem_cache *dma_page_table_cache;
+static int s390_iommu_strict;
+
+static int zpci_refresh_global(struct zpci_dev *zdev)
+{
+ return zpci_refresh_trans((u64) zdev->fh << 32, zdev->start_dma,
+ zdev->iommu_pages * PAGE_SIZE);
+}
+
+unsigned long *dma_alloc_cpu_table(void)
+{
+ unsigned long *table, *entry;
+
+ table = kmem_cache_alloc(dma_region_table_cache, GFP_ATOMIC);
+ if (!table)
+ return NULL;
+
+ for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++)
+ *entry = ZPCI_TABLE_INVALID;
+ return table;
+}
+
+static void dma_free_cpu_table(void *table)
+{
+ kmem_cache_free(dma_region_table_cache, table);
+}
+
+static unsigned long *dma_alloc_page_table(void)
+{
+ unsigned long *table, *entry;
+
+ table = kmem_cache_alloc(dma_page_table_cache, GFP_ATOMIC);
+ if (!table)
+ return NULL;
+
+ for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++)
+ *entry = ZPCI_PTE_INVALID;
+ return table;
+}
+
+static void dma_free_page_table(void *table)
+{
+ kmem_cache_free(dma_page_table_cache, table);
+}
+
+static unsigned long *dma_get_seg_table_origin(unsigned long *entry)
+{
+ unsigned long *sto;
+
+ if (reg_entry_isvalid(*entry))
+ sto = get_rt_sto(*entry);
+ else {
+ sto = dma_alloc_cpu_table();
+ if (!sto)
+ return NULL;
+
+ set_rt_sto(entry, sto);
+ validate_rt_entry(entry);
+ entry_clr_protected(entry);
+ }
+ return sto;
+}
+
+static unsigned long *dma_get_page_table_origin(unsigned long *entry)
+{
+ unsigned long *pto;
+
+ if (reg_entry_isvalid(*entry))
+ pto = get_st_pto(*entry);
+ else {
+ pto = dma_alloc_page_table();
+ if (!pto)
+ return NULL;
+ set_st_pto(entry, pto);
+ validate_st_entry(entry);
+ entry_clr_protected(entry);
+ }
+ return pto;
+}
+
+unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr)
+{
+ unsigned long *sto, *pto;
+ unsigned int rtx, sx, px;
+
+ rtx = calc_rtx(dma_addr);
+ sto = dma_get_seg_table_origin(&rto[rtx]);
+ if (!sto)
+ return NULL;
+
+ sx = calc_sx(dma_addr);
+ pto = dma_get_page_table_origin(&sto[sx]);
+ if (!pto)
+ return NULL;
+
+ px = calc_px(dma_addr);
+ return &pto[px];
+}
+
+void dma_update_cpu_trans(unsigned long *entry, void *page_addr, int flags)
+{
+ if (flags & ZPCI_PTE_INVALID) {
+ invalidate_pt_entry(entry);
+ } else {
+ set_pt_pfaa(entry, page_addr);
+ validate_pt_entry(entry);
+ }
+
+ if (flags & ZPCI_TABLE_PROTECTED)
+ entry_set_protected(entry);
+ else
+ entry_clr_protected(entry);
+}
+
+static int __dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
+ dma_addr_t dma_addr, size_t size, int flags)
+{
+ unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+ u8 *page_addr = (u8 *) (pa & PAGE_MASK);
+ unsigned long irq_flags;
+ unsigned long *entry;
+ int i, rc = 0;
+
+ if (!nr_pages)
+ return -EINVAL;
+
+ spin_lock_irqsave(&zdev->dma_table_lock, irq_flags);
+ if (!zdev->dma_table) {
+ rc = -EINVAL;
+ goto out_unlock;
+ }
+
+ for (i = 0; i < nr_pages; i++) {
+ entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
+ if (!entry) {
+ rc = -ENOMEM;
+ goto undo_cpu_trans;
+ }
+ dma_update_cpu_trans(entry, page_addr, flags);
+ page_addr += PAGE_SIZE;
+ dma_addr += PAGE_SIZE;
+ }
+
+undo_cpu_trans:
+ if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) {
+ flags = ZPCI_PTE_INVALID;
+ while (i-- > 0) {
+ page_addr -= PAGE_SIZE;
+ dma_addr -= PAGE_SIZE;
+ entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
+ if (!entry)
+ break;
+ dma_update_cpu_trans(entry, page_addr, flags);
+ }
+ }
+out_unlock:
+ spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags);
+ return rc;
+}
+
+static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr,
+ size_t size, int flags)
+{
+ unsigned long irqflags;
+ int ret;
+
+ /*
+ * With zdev->tlb_refresh == 0, rpcit is not required to establish new
+ * translations when previously invalid translation-table entries are
+ * validated. With lazy unmap, rpcit is skipped for previously valid
+ * entries, but a global rpcit is then required before any address can
+ * be re-used, i.e. after each iommu bitmap wrap-around.
+ */
+ if ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID) {
+ if (!zdev->tlb_refresh)
+ return 0;
+ } else {
+ if (!s390_iommu_strict)
+ return 0;
+ }
+
+ ret = zpci_refresh_trans((u64) zdev->fh << 32, dma_addr,
+ PAGE_ALIGN(size));
+ if (ret == -ENOMEM && !s390_iommu_strict) {
+ /* enable the hypervisor to free some resources */
+ if (zpci_refresh_global(zdev))
+ goto out;
+
+ spin_lock_irqsave(&zdev->iommu_bitmap_lock, irqflags);
+ bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap,
+ zdev->lazy_bitmap, zdev->iommu_pages);
+ bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages);
+ spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, irqflags);
+ ret = 0;
+ }
+out:
+ return ret;
+}
+
+static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
+ dma_addr_t dma_addr, size_t size, int flags)
+{
+ int rc;
+
+ rc = __dma_update_trans(zdev, pa, dma_addr, size, flags);
+ if (rc)
+ return rc;
+
+ rc = __dma_purge_tlb(zdev, dma_addr, size, flags);
+ if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID))
+ __dma_update_trans(zdev, pa, dma_addr, size, ZPCI_PTE_INVALID);
+
+ return rc;
+}
+
+void dma_free_seg_table(unsigned long entry)
+{
+ unsigned long *sto = get_rt_sto(entry);
+ int sx;
+
+ for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++)
+ if (reg_entry_isvalid(sto[sx]))
+ dma_free_page_table(get_st_pto(sto[sx]));
+
+ dma_free_cpu_table(sto);
+}
+
+void dma_cleanup_tables(unsigned long *table)
+{
+ int rtx;
+
+ if (!table)
+ return;
+
+ for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++)
+ if (reg_entry_isvalid(table[rtx]))
+ dma_free_seg_table(table[rtx]);
+
+ dma_free_cpu_table(table);
+}
+
+static unsigned long __dma_alloc_iommu(struct device *dev,
+ unsigned long start, int size)
+{
+ struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
+ unsigned long boundary_size;
+
+ boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
+ PAGE_SIZE) >> PAGE_SHIFT;
+ return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages,
+ start, size, zdev->start_dma >> PAGE_SHIFT,
+ boundary_size, 0);
+}
+
+static dma_addr_t dma_alloc_address(struct device *dev, int size)
+{
+ struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
+ unsigned long offset, flags;
+
+ spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
+ offset = __dma_alloc_iommu(dev, zdev->next_bit, size);
+ if (offset == -1) {
+ if (!s390_iommu_strict) {
+ /* global flush before DMA addresses are reused */
+ if (zpci_refresh_global(zdev))
+ goto out_error;
+
+ bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap,
+ zdev->lazy_bitmap, zdev->iommu_pages);
+ bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages);
+ }
+ /* wrap-around */
+ offset = __dma_alloc_iommu(dev, 0, size);
+ if (offset == -1)
+ goto out_error;
+ }
+ zdev->next_bit = offset + size;
+ spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
+
+ return zdev->start_dma + offset * PAGE_SIZE;
+
+out_error:
+ spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
+ return S390_MAPPING_ERROR;
+}
+
+static void dma_free_address(struct device *dev, dma_addr_t dma_addr, int size)
+{
+ struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
+ unsigned long flags, offset;
+
+ offset = (dma_addr - zdev->start_dma) >> PAGE_SHIFT;
+
+ spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
+ if (!zdev->iommu_bitmap)
+ goto out;
+
+ if (s390_iommu_strict)
+ bitmap_clear(zdev->iommu_bitmap, offset, size);
+ else
+ bitmap_set(zdev->lazy_bitmap, offset, size);
+
+out:
+ spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
+}
+
+static inline void zpci_err_dma(unsigned long rc, unsigned long addr)
+{
+ struct {
+ unsigned long rc;
+ unsigned long addr;
+ } __packed data = {rc, addr};
+
+ zpci_err_hex(&data, sizeof(data));
+}
+
+static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction direction,
+ unsigned long attrs)
+{
+ struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
+ unsigned long pa = page_to_phys(page) + offset;
+ int flags = ZPCI_PTE_VALID;
+ unsigned long nr_pages;
+ dma_addr_t dma_addr;
+ int ret;
+
+ /* This rounds up number of pages based on size and offset */
+ nr_pages = iommu_num_pages(pa, size, PAGE_SIZE);
+ dma_addr = dma_alloc_address(dev, nr_pages);
+ if (dma_addr == S390_MAPPING_ERROR) {
+ ret = -ENOSPC;
+ goto out_err;
+ }
+
+ /* Use rounded up size */
+ size = nr_pages * PAGE_SIZE;
+
+ if (direction == DMA_NONE || direction == DMA_TO_DEVICE)
+ flags |= ZPCI_TABLE_PROTECTED;
+
+ ret = dma_update_trans(zdev, pa, dma_addr, size, flags);
+ if (ret)
+ goto out_free;
+
+ atomic64_add(nr_pages, &zdev->mapped_pages);
+ return dma_addr + (offset & ~PAGE_MASK);
+
+out_free:
+ dma_free_address(dev, dma_addr, nr_pages);
+out_err:
+ zpci_err("map error:\n");
+ zpci_err_dma(ret, pa);
+ return S390_MAPPING_ERROR;
+}
+
+static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
+ size_t size, enum dma_data_direction direction,
+ unsigned long attrs)
+{
+ struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
+ int npages, ret;
+
+ npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
+ dma_addr = dma_addr & PAGE_MASK;
+ ret = dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE,
+ ZPCI_PTE_INVALID);
+ if (ret) {
+ zpci_err("unmap error:\n");
+ zpci_err_dma(ret, dma_addr);
+ return;
+ }
+
+ atomic64_add(npages, &zdev->unmapped_pages);
+ dma_free_address(dev, dma_addr, npages);
+}
+
+static void *s390_dma_alloc(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag,
+ unsigned long attrs)
+{
+ struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
+ struct page *page;
+ unsigned long pa;
+ dma_addr_t map;
+
+ size = PAGE_ALIGN(size);
+ page = alloc_pages(flag, get_order(size));
+ if (!page)
+ return NULL;
+
+ pa = page_to_phys(page);
+ map = s390_dma_map_pages(dev, page, 0, size, DMA_BIDIRECTIONAL, 0);
+ if (dma_mapping_error(dev, map)) {
+ free_pages(pa, get_order(size));
+ return NULL;
+ }
+
+ atomic64_add(size / PAGE_SIZE, &zdev->allocated_pages);
+ if (dma_handle)
+ *dma_handle = map;
+ return (void *) pa;
+}
+
+static void s390_dma_free(struct device *dev, size_t size,
+ void *pa, dma_addr_t dma_handle,
+ unsigned long attrs)
+{
+ struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
+
+ size = PAGE_ALIGN(size);
+ atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages);
+ s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, 0);
+ free_pages((unsigned long) pa, get_order(size));
+}
+
+/* Map a segment into a contiguous dma address area */
+static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
+ size_t size, dma_addr_t *handle,
+ enum dma_data_direction dir)
+{
+ unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+ struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
+ dma_addr_t dma_addr_base, dma_addr;
+ int flags = ZPCI_PTE_VALID;
+ struct scatterlist *s;
+ unsigned long pa = 0;
+ int ret;
+
+ dma_addr_base = dma_alloc_address(dev, nr_pages);
+ if (dma_addr_base == S390_MAPPING_ERROR)
+ return -ENOMEM;
+
+ dma_addr = dma_addr_base;
+ if (dir == DMA_NONE || dir == DMA_TO_DEVICE)
+ flags |= ZPCI_TABLE_PROTECTED;
+
+ for (s = sg; dma_addr < dma_addr_base + size; s = sg_next(s)) {
+ pa = page_to_phys(sg_page(s));
+ ret = __dma_update_trans(zdev, pa, dma_addr,
+ s->offset + s->length, flags);
+ if (ret)
+ goto unmap;
+
+ dma_addr += s->offset + s->length;
+ }
+ ret = __dma_purge_tlb(zdev, dma_addr_base, size, flags);
+ if (ret)
+ goto unmap;
+
+ *handle = dma_addr_base;
+ atomic64_add(nr_pages, &zdev->mapped_pages);
+
+ return ret;
+
+unmap:
+ dma_update_trans(zdev, 0, dma_addr_base, dma_addr - dma_addr_base,
+ ZPCI_PTE_INVALID);
+ dma_free_address(dev, dma_addr_base, nr_pages);
+ zpci_err("map error:\n");
+ zpci_err_dma(ret, pa);
+ return ret;
+}
+
+static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
+ int nr_elements, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ struct scatterlist *s = sg, *start = sg, *dma = sg;
+ unsigned int max = dma_get_max_seg_size(dev);
+ unsigned int size = s->offset + s->length;
+ unsigned int offset = s->offset;
+ int count = 0, i;
+
+ for (i = 1; i < nr_elements; i++) {
+ s = sg_next(s);
+
+ s->dma_address = S390_MAPPING_ERROR;
+ s->dma_length = 0;
+
+ if (s->offset || (size & ~PAGE_MASK) ||
+ size + s->length > max) {
+ if (__s390_dma_map_sg(dev, start, size,
+ &dma->dma_address, dir))
+ goto unmap;
+
+ dma->dma_address += offset;
+ dma->dma_length = size - offset;
+
+ size = offset = s->offset;
+ start = s;
+ dma = sg_next(dma);
+ count++;
+ }
+ size += s->length;
+ }
+ if (__s390_dma_map_sg(dev, start, size, &dma->dma_address, dir))
+ goto unmap;
+
+ dma->dma_address += offset;
+ dma->dma_length = size - offset;
+
+ return count + 1;
+unmap:
+ for_each_sg(sg, s, count, i)
+ s390_dma_unmap_pages(dev, sg_dma_address(s), sg_dma_len(s),
+ dir, attrs);
+
+ return 0;
+}
+
+static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
+ int nr_elements, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ struct scatterlist *s;
+ int i;
+
+ for_each_sg(sg, s, nr_elements, i) {
+ if (s->dma_length)
+ s390_dma_unmap_pages(dev, s->dma_address, s->dma_length,
+ dir, attrs);
+ s->dma_address = 0;
+ s->dma_length = 0;
+ }
+}
+
+static int s390_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+ return dma_addr == S390_MAPPING_ERROR;
+}
+
+int zpci_dma_init_device(struct zpci_dev *zdev)
+{
+ int rc;
+
+ /*
+ * At this point, if the device is part of an IOMMU domain, this would
+ * be a strong hint towards a bug in the IOMMU API (common) code and/or
+ * simultaneous access via IOMMU and DMA API. So let's issue a warning.
+ */
+ WARN_ON(zdev->s390_domain);
+
+ spin_lock_init(&zdev->iommu_bitmap_lock);
+ spin_lock_init(&zdev->dma_table_lock);
+
+ zdev->dma_table = dma_alloc_cpu_table();
+ if (!zdev->dma_table) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ /*
+ * Restrict the iommu bitmap size to the minimum of the following:
+ * - main memory size
+ * - 3-level pagetable address limit minus start_dma offset
+ * - DMA address range allowed by the hardware (clp query pci fn)
+ *
+ * Also set zdev->end_dma to the actual end address of the usable
+ * range, instead of the theoretical maximum as reported by hardware.
+ */
+ zdev->start_dma = PAGE_ALIGN(zdev->start_dma);
+ zdev->iommu_size = min3((u64) high_memory,
+ ZPCI_TABLE_SIZE_RT - zdev->start_dma,
+ zdev->end_dma - zdev->start_dma + 1);
+ zdev->end_dma = zdev->start_dma + zdev->iommu_size - 1;
+ zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT;
+ zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8);
+ if (!zdev->iommu_bitmap) {
+ rc = -ENOMEM;
+ goto free_dma_table;
+ }
+ if (!s390_iommu_strict) {
+ zdev->lazy_bitmap = vzalloc(zdev->iommu_pages / 8);
+ if (!zdev->lazy_bitmap) {
+ rc = -ENOMEM;
+ goto free_bitmap;
+ }
+
+ }
+ rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
+ (u64) zdev->dma_table);
+ if (rc)
+ goto free_bitmap;
+
+ return 0;
+free_bitmap:
+ vfree(zdev->iommu_bitmap);
+ zdev->iommu_bitmap = NULL;
+ vfree(zdev->lazy_bitmap);
+ zdev->lazy_bitmap = NULL;
+free_dma_table:
+ dma_free_cpu_table(zdev->dma_table);
+ zdev->dma_table = NULL;
+out:
+ return rc;
+}
+
+void zpci_dma_exit_device(struct zpci_dev *zdev)
+{
+ /*
+ * At this point, if the device is part of an IOMMU domain, this would
+ * be a strong hint towards a bug in the IOMMU API (common) code and/or
+ * simultaneous access via IOMMU and DMA API. So let's issue a warning.
+ */
+ WARN_ON(zdev->s390_domain);
+
+ if (zpci_unregister_ioat(zdev, 0))
+ return;
+
+ dma_cleanup_tables(zdev->dma_table);
+ zdev->dma_table = NULL;
+ vfree(zdev->iommu_bitmap);
+ zdev->iommu_bitmap = NULL;
+ vfree(zdev->lazy_bitmap);
+ zdev->lazy_bitmap = NULL;
+
+ zdev->next_bit = 0;
+}
+
+static int __init dma_alloc_cpu_table_caches(void)
+{
+ dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables",
+ ZPCI_TABLE_SIZE, ZPCI_TABLE_ALIGN,
+ 0, NULL);
+ if (!dma_region_table_cache)
+ return -ENOMEM;
+
+ dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables",
+ ZPCI_PT_SIZE, ZPCI_PT_ALIGN,
+ 0, NULL);
+ if (!dma_page_table_cache) {
+ kmem_cache_destroy(dma_region_table_cache);
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+int __init zpci_dma_init(void)
+{
+ return dma_alloc_cpu_table_caches();
+}
+
+void zpci_dma_exit(void)
+{
+ kmem_cache_destroy(dma_page_table_cache);
+ kmem_cache_destroy(dma_region_table_cache);
+}
+
+const struct dma_map_ops s390_pci_dma_ops = {
+ .alloc = s390_dma_alloc,
+ .free = s390_dma_free,
+ .map_sg = s390_dma_map_sg,
+ .unmap_sg = s390_dma_unmap_sg,
+ .map_page = s390_dma_map_pages,
+ .unmap_page = s390_dma_unmap_pages,
+ .mapping_error = s390_mapping_error,
+ /* dma_supported is unconditionally true without a callback */
+};
+EXPORT_SYMBOL_GPL(s390_pci_dma_ops);
+
+static int __init s390_iommu_setup(char *str)
+{
+ if (!strncmp(str, "strict", 6))
+ s390_iommu_strict = 1;
+ return 0;
+}
+
+__setup("s390_iommu=", s390_iommu_setup);
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
new file mode 100644
index 000000000..8d6ee4af4
--- /dev/null
+++ b/arch/s390/pci/pci_event.c
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2012
+ *
+ * Author(s):
+ * Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <asm/pci_debug.h>
+#include <asm/sclp.h>
+
+/* Content Code Description for PCI Function Error */
+struct zpci_ccdf_err {
+ u32 reserved1;
+ u32 fh; /* function handle */
+ u32 fid; /* function id */
+ u32 ett : 4; /* expected table type */
+ u32 mvn : 12; /* MSI vector number */
+ u32 dmaas : 8; /* DMA address space */
+ u32 : 6;
+ u32 q : 1; /* event qualifier */
+ u32 rw : 1; /* read/write */
+ u64 faddr; /* failing address */
+ u32 reserved3;
+ u16 reserved4;
+ u16 pec; /* PCI event code */
+} __packed;
+
+/* Content Code Description for PCI Function Availability */
+struct zpci_ccdf_avail {
+ u32 reserved1;
+ u32 fh; /* function handle */
+ u32 fid; /* function id */
+ u32 reserved2;
+ u32 reserved3;
+ u32 reserved4;
+ u32 reserved5;
+ u16 reserved6;
+ u16 pec; /* PCI event code */
+} __packed;
+
+static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
+{
+ struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
+ struct pci_dev *pdev = NULL;
+
+ zpci_err("error CCDF:\n");
+ zpci_err_hex(ccdf, sizeof(*ccdf));
+
+ if (zdev)
+ pdev = pci_get_slot(zdev->bus, ZPCI_DEVFN);
+
+ pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
+ pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
+
+ if (!pdev)
+ return;
+
+ pdev->error_state = pci_channel_io_perm_failure;
+ pci_dev_put(pdev);
+}
+
+void zpci_event_error(void *data)
+{
+ if (zpci_is_enabled())
+ __zpci_event_error(data);
+}
+
+static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
+{
+ struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
+ struct pci_dev *pdev = NULL;
+ enum zpci_state state;
+ int ret;
+
+ if (zdev)
+ pdev = pci_get_slot(zdev->bus, ZPCI_DEVFN);
+
+ pr_info("%s: Event 0x%x reconfigured PCI function 0x%x\n",
+ pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
+ zpci_err("avail CCDF:\n");
+ zpci_err_hex(ccdf, sizeof(*ccdf));
+
+ switch (ccdf->pec) {
+ case 0x0301: /* Reserved|Standby -> Configured */
+ if (!zdev) {
+ ret = clp_add_pci_device(ccdf->fid, ccdf->fh, 0);
+ if (ret)
+ break;
+ zdev = get_zdev_by_fid(ccdf->fid);
+ }
+ if (!zdev || zdev->state != ZPCI_FN_STATE_STANDBY)
+ break;
+ zdev->state = ZPCI_FN_STATE_CONFIGURED;
+ zdev->fh = ccdf->fh;
+ ret = zpci_enable_device(zdev);
+ if (ret)
+ break;
+ pci_lock_rescan_remove();
+ pci_rescan_bus(zdev->bus);
+ pci_unlock_rescan_remove();
+ break;
+ case 0x0302: /* Reserved -> Standby */
+ if (!zdev)
+ clp_add_pci_device(ccdf->fid, ccdf->fh, 0);
+ break;
+ case 0x0303: /* Deconfiguration requested */
+ if (!zdev)
+ break;
+ if (pdev)
+ pci_stop_and_remove_bus_device_locked(pdev);
+
+ ret = zpci_disable_device(zdev);
+ if (ret)
+ break;
+
+ ret = sclp_pci_deconfigure(zdev->fid);
+ zpci_dbg(3, "deconf fid:%x, rc:%d\n", zdev->fid, ret);
+ if (!ret)
+ zdev->state = ZPCI_FN_STATE_STANDBY;
+
+ break;
+ case 0x0304: /* Configured -> Standby|Reserved */
+ if (!zdev)
+ break;
+ if (pdev) {
+ /* Give the driver a hint that the function is
+ * already unusable. */
+ pdev->error_state = pci_channel_io_perm_failure;
+ pci_stop_and_remove_bus_device_locked(pdev);
+ }
+
+ zdev->fh = ccdf->fh;
+ zpci_disable_device(zdev);
+ zdev->state = ZPCI_FN_STATE_STANDBY;
+ if (!clp_get_state(ccdf->fid, &state) &&
+ state == ZPCI_FN_STATE_RESERVED) {
+ zpci_remove_device(zdev);
+ }
+ break;
+ case 0x0306: /* 0x308 or 0x302 for multiple devices */
+ clp_rescan_pci_devices();
+ break;
+ case 0x0308: /* Standby -> Reserved */
+ if (!zdev)
+ break;
+ zpci_remove_device(zdev);
+ break;
+ default:
+ break;
+ }
+ pci_dev_put(pdev);
+}
+
+void zpci_event_availability(void *data)
+{
+ if (zpci_is_enabled())
+ __zpci_event_availability(data);
+}
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
new file mode 100644
index 000000000..f069929e8
--- /dev/null
+++ b/arch/s390/pci/pci_insn.c
@@ -0,0 +1,233 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * s390 specific pci instructions
+ *
+ * Copyright IBM Corp. 2013
+ */
+
+#include <linux/export.h>
+#include <linux/errno.h>
+#include <linux/delay.h>
+#include <asm/facility.h>
+#include <asm/pci_insn.h>
+#include <asm/pci_debug.h>
+#include <asm/processor.h>
+
+#define ZPCI_INSN_BUSY_DELAY 1 /* 1 microsecond */
+
+static inline void zpci_err_insn(u8 cc, u8 status, u64 req, u64 offset)
+{
+ struct {
+ u64 req;
+ u64 offset;
+ u8 cc;
+ u8 status;
+ } __packed data = {req, offset, cc, status};
+
+ zpci_err_hex(&data, sizeof(data));
+}
+
+/* Modify PCI Function Controls */
+static inline u8 __mpcifc(u64 req, struct zpci_fib *fib, u8 *status)
+{
+ u8 cc;
+
+ asm volatile (
+ " .insn rxy,0xe300000000d0,%[req],%[fib]\n"
+ " ipm %[cc]\n"
+ " srl %[cc],28\n"
+ : [cc] "=d" (cc), [req] "+d" (req), [fib] "+Q" (*fib)
+ : : "cc");
+ *status = req >> 24 & 0xff;
+ return cc;
+}
+
+u8 zpci_mod_fc(u64 req, struct zpci_fib *fib, u8 *status)
+{
+ u8 cc;
+
+ do {
+ cc = __mpcifc(req, fib, status);
+ if (cc == 2)
+ msleep(ZPCI_INSN_BUSY_DELAY);
+ } while (cc == 2);
+
+ if (cc)
+ zpci_err_insn(cc, *status, req, 0);
+
+ return cc;
+}
+
+/* Refresh PCI Translations */
+static inline u8 __rpcit(u64 fn, u64 addr, u64 range, u8 *status)
+{
+ register u64 __addr asm("2") = addr;
+ register u64 __range asm("3") = range;
+ u8 cc;
+
+ asm volatile (
+ " .insn rre,0xb9d30000,%[fn],%[addr]\n"
+ " ipm %[cc]\n"
+ " srl %[cc],28\n"
+ : [cc] "=d" (cc), [fn] "+d" (fn)
+ : [addr] "d" (__addr), "d" (__range)
+ : "cc");
+ *status = fn >> 24 & 0xff;
+ return cc;
+}
+
+int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
+{
+ u8 cc, status;
+
+ do {
+ cc = __rpcit(fn, addr, range, &status);
+ if (cc == 2)
+ udelay(ZPCI_INSN_BUSY_DELAY);
+ } while (cc == 2);
+
+ if (cc)
+ zpci_err_insn(cc, status, addr, range);
+
+ if (cc == 1 && (status == 4 || status == 16))
+ return -ENOMEM;
+
+ return (cc) ? -EIO : 0;
+}
+
+/* Set Interruption Controls */
+int zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc)
+{
+ if (!test_facility(72))
+ return -EIO;
+ asm volatile (
+ " .insn rsy,0xeb00000000d1,%[ctl],%[isc],%[u]\n"
+ : : [ctl] "d" (ctl), [isc] "d" (isc << 27), [u] "Q" (*unused));
+ return 0;
+}
+
+/* PCI Load */
+static inline int ____pcilg(u64 *data, u64 req, u64 offset, u8 *status)
+{
+ register u64 __req asm("2") = req;
+ register u64 __offset asm("3") = offset;
+ int cc = -ENXIO;
+ u64 __data;
+
+ asm volatile (
+ " .insn rre,0xb9d20000,%[data],%[req]\n"
+ "0: ipm %[cc]\n"
+ " srl %[cc],28\n"
+ "1:\n"
+ EX_TABLE(0b, 1b)
+ : [cc] "+d" (cc), [data] "=d" (__data), [req] "+d" (__req)
+ : "d" (__offset)
+ : "cc");
+ *status = __req >> 24 & 0xff;
+ *data = __data;
+ return cc;
+}
+
+static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status)
+{
+ u64 __data;
+ int cc;
+
+ cc = ____pcilg(&__data, req, offset, status);
+ if (!cc)
+ *data = __data;
+
+ return cc;
+}
+
+int zpci_load(u64 *data, u64 req, u64 offset)
+{
+ u8 status;
+ int cc;
+
+ do {
+ cc = __pcilg(data, req, offset, &status);
+ if (cc == 2)
+ udelay(ZPCI_INSN_BUSY_DELAY);
+ } while (cc == 2);
+
+ if (cc)
+ zpci_err_insn(cc, status, req, offset);
+
+ return (cc > 0) ? -EIO : cc;
+}
+EXPORT_SYMBOL_GPL(zpci_load);
+
+/* PCI Store */
+static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status)
+{
+ register u64 __req asm("2") = req;
+ register u64 __offset asm("3") = offset;
+ int cc = -ENXIO;
+
+ asm volatile (
+ " .insn rre,0xb9d00000,%[data],%[req]\n"
+ "0: ipm %[cc]\n"
+ " srl %[cc],28\n"
+ "1:\n"
+ EX_TABLE(0b, 1b)
+ : [cc] "+d" (cc), [req] "+d" (__req)
+ : "d" (__offset), [data] "d" (data)
+ : "cc");
+ *status = __req >> 24 & 0xff;
+ return cc;
+}
+
+int zpci_store(u64 data, u64 req, u64 offset)
+{
+ u8 status;
+ int cc;
+
+ do {
+ cc = __pcistg(data, req, offset, &status);
+ if (cc == 2)
+ udelay(ZPCI_INSN_BUSY_DELAY);
+ } while (cc == 2);
+
+ if (cc)
+ zpci_err_insn(cc, status, req, offset);
+
+ return (cc > 0) ? -EIO : cc;
+}
+EXPORT_SYMBOL_GPL(zpci_store);
+
+/* PCI Store Block */
+static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status)
+{
+ int cc = -ENXIO;
+
+ asm volatile (
+ " .insn rsy,0xeb00000000d0,%[req],%[offset],%[data]\n"
+ "0: ipm %[cc]\n"
+ " srl %[cc],28\n"
+ "1:\n"
+ EX_TABLE(0b, 1b)
+ : [cc] "+d" (cc), [req] "+d" (req)
+ : [offset] "d" (offset), [data] "Q" (*data)
+ : "cc");
+ *status = req >> 24 & 0xff;
+ return cc;
+}
+
+int zpci_store_block(const u64 *data, u64 req, u64 offset)
+{
+ u8 status;
+ int cc;
+
+ do {
+ cc = __pcistb(data, req, offset, &status);
+ if (cc == 2)
+ udelay(ZPCI_INSN_BUSY_DELAY);
+ } while (cc == 2);
+
+ if (cc)
+ zpci_err_insn(cc, status, req, offset);
+
+ return (cc > 0) ? -EIO : cc;
+}
+EXPORT_SYMBOL_GPL(zpci_store_block);
diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c
new file mode 100644
index 000000000..7d42a8794
--- /dev/null
+++ b/arch/s390/pci/pci_mmio.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Access to PCI I/O memory from user space programs.
+ *
+ * Copyright IBM Corp. 2014
+ * Author(s): Alexey Ishchuk <aishchuk@linux.vnet.ibm.com>
+ */
+#include <linux/kernel.h>
+#include <linux/syscalls.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+
+static long get_pfn(unsigned long user_addr, unsigned long access,
+ unsigned long *pfn)
+{
+ struct vm_area_struct *vma;
+ long ret;
+
+ down_read(&current->mm->mmap_sem);
+ ret = -EINVAL;
+ vma = find_vma(current->mm, user_addr);
+ if (!vma)
+ goto out;
+ ret = -EACCES;
+ if (!(vma->vm_flags & access))
+ goto out;
+ ret = follow_pfn(vma, user_addr, pfn);
+out:
+ up_read(&current->mm->mmap_sem);
+ return ret;
+}
+
+SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
+ const void __user *, user_buffer, size_t, length)
+{
+ u8 local_buf[64];
+ void __iomem *io_addr;
+ void *buf;
+ unsigned long pfn;
+ long ret;
+
+ if (!zpci_is_enabled())
+ return -ENODEV;
+
+ if (length <= 0 || PAGE_SIZE - (mmio_addr & ~PAGE_MASK) < length)
+ return -EINVAL;
+ if (length > 64) {
+ buf = kmalloc(length, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+ } else
+ buf = local_buf;
+
+ ret = get_pfn(mmio_addr, VM_WRITE, &pfn);
+ if (ret)
+ goto out;
+ io_addr = (void __iomem *)((pfn << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK));
+
+ ret = -EFAULT;
+ if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE)
+ goto out;
+
+ if (copy_from_user(buf, user_buffer, length))
+ goto out;
+
+ ret = zpci_memcpy_toio(io_addr, buf, length);
+out:
+ if (buf != local_buf)
+ kfree(buf);
+ return ret;
+}
+
+SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
+ void __user *, user_buffer, size_t, length)
+{
+ u8 local_buf[64];
+ void __iomem *io_addr;
+ void *buf;
+ unsigned long pfn;
+ long ret;
+
+ if (!zpci_is_enabled())
+ return -ENODEV;
+
+ if (length <= 0 || PAGE_SIZE - (mmio_addr & ~PAGE_MASK) < length)
+ return -EINVAL;
+ if (length > 64) {
+ buf = kmalloc(length, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+ } else
+ buf = local_buf;
+
+ ret = get_pfn(mmio_addr, VM_READ, &pfn);
+ if (ret)
+ goto out;
+ io_addr = (void __iomem *)((pfn << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK));
+
+ if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE) {
+ ret = -EFAULT;
+ goto out;
+ }
+ ret = zpci_memcpy_fromio(buf, io_addr, length);
+ if (ret)
+ goto out;
+ if (copy_to_user(user_buffer, buf, length))
+ ret = -EFAULT;
+
+out:
+ if (buf != local_buf)
+ kfree(buf);
+ return ret;
+}
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
new file mode 100644
index 000000000..0e11fc023
--- /dev/null
+++ b/arch/s390/pci/pci_sysfs.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2012
+ *
+ * Author(s):
+ * Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/stat.h>
+#include <linux/pci.h>
+
+#include "../../../drivers/pci/pci.h"
+
+#include <asm/sclp.h>
+
+#define zpci_attr(name, fmt, member) \
+static ssize_t name##_show(struct device *dev, \
+ struct device_attribute *attr, char *buf) \
+{ \
+ struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); \
+ \
+ return sprintf(buf, fmt, zdev->member); \
+} \
+static DEVICE_ATTR_RO(name)
+
+zpci_attr(function_id, "0x%08x\n", fid);
+zpci_attr(function_handle, "0x%08x\n", fh);
+zpci_attr(pchid, "0x%04x\n", pchid);
+zpci_attr(pfgid, "0x%02x\n", pfgid);
+zpci_attr(vfn, "0x%04x\n", vfn);
+zpci_attr(pft, "0x%02x\n", pft);
+zpci_attr(uid, "0x%x\n", uid);
+zpci_attr(segment0, "0x%02x\n", pfip[0]);
+zpci_attr(segment1, "0x%02x\n", pfip[1]);
+zpci_attr(segment2, "0x%02x\n", pfip[2]);
+zpci_attr(segment3, "0x%02x\n", pfip[3]);
+
+static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct kernfs_node *kn;
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct zpci_dev *zdev = to_zpci(pdev);
+ int ret = 0;
+
+ /* Can't use device_remove_self() here as that would lead us to lock
+ * the pci_rescan_remove_lock while holding the device' kernfs lock.
+ * This would create a possible deadlock with disable_slot() which is
+ * not directly protected by the device' kernfs lock but takes it
+ * during the device removal which happens under
+ * pci_rescan_remove_lock.
+ *
+ * This is analogous to sdev_store_delete() in
+ * drivers/scsi/scsi_sysfs.c
+ */
+ kn = sysfs_break_active_protection(&dev->kobj, &attr->attr);
+ WARN_ON_ONCE(!kn);
+ /* device_remove_file() serializes concurrent calls ignoring all but
+ * the first
+ */
+ device_remove_file(dev, attr);
+
+ /* A concurrent call to recover_store() may slip between
+ * sysfs_break_active_protection() and the sysfs file removal.
+ * Once it unblocks from pci_lock_rescan_remove() the original pdev
+ * will already be removed.
+ */
+ pci_lock_rescan_remove();
+ if (pci_dev_is_added(pdev)) {
+ pci_stop_and_remove_bus_device(pdev);
+ ret = zpci_disable_device(zdev);
+ if (ret)
+ goto out;
+
+ ret = zpci_enable_device(zdev);
+ if (ret)
+ goto out;
+ pci_rescan_bus(zdev->bus);
+ }
+out:
+ pci_unlock_rescan_remove();
+ if (kn)
+ sysfs_unbreak_active_protection(kn);
+ return ret ? ret : count;
+}
+static DEVICE_ATTR_WO(recover);
+
+static ssize_t util_string_read(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr, char *buf,
+ loff_t off, size_t count)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct zpci_dev *zdev = to_zpci(pdev);
+
+ return memory_read_from_buffer(buf, count, &off, zdev->util_str,
+ sizeof(zdev->util_str));
+}
+static BIN_ATTR_RO(util_string, CLP_UTIL_STR_LEN);
+
+static ssize_t report_error_write(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr, char *buf,
+ loff_t off, size_t count)
+{
+ struct zpci_report_error_header *report = (void *) buf;
+ struct device *dev = kobj_to_dev(kobj);
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct zpci_dev *zdev = to_zpci(pdev);
+ int ret;
+
+ if (off || (count < sizeof(*report)))
+ return -EINVAL;
+
+ ret = sclp_pci_report(report, zdev->fh, zdev->fid);
+
+ return ret ? ret : count;
+}
+static BIN_ATTR(report_error, S_IWUSR, NULL, report_error_write, PAGE_SIZE);
+
+static struct bin_attribute *zpci_bin_attrs[] = {
+ &bin_attr_util_string,
+ &bin_attr_report_error,
+ NULL,
+};
+
+static struct attribute *zpci_dev_attrs[] = {
+ &dev_attr_function_id.attr,
+ &dev_attr_function_handle.attr,
+ &dev_attr_pchid.attr,
+ &dev_attr_pfgid.attr,
+ &dev_attr_pft.attr,
+ &dev_attr_vfn.attr,
+ &dev_attr_uid.attr,
+ &dev_attr_recover.attr,
+ NULL,
+};
+static struct attribute_group zpci_attr_group = {
+ .attrs = zpci_dev_attrs,
+ .bin_attrs = zpci_bin_attrs,
+};
+
+static struct attribute *pfip_attrs[] = {
+ &dev_attr_segment0.attr,
+ &dev_attr_segment1.attr,
+ &dev_attr_segment2.attr,
+ &dev_attr_segment3.attr,
+ NULL,
+};
+static struct attribute_group pfip_attr_group = {
+ .name = "pfip",
+ .attrs = pfip_attrs,
+};
+
+const struct attribute_group *zpci_attr_groups[] = {
+ &zpci_attr_group,
+ &pfip_attr_group,
+ NULL,
+};
diff --git a/arch/s390/purgatory/.gitignore b/arch/s390/purgatory/.gitignore
new file mode 100644
index 000000000..e9e66f178
--- /dev/null
+++ b/arch/s390/purgatory/.gitignore
@@ -0,0 +1,2 @@
+kexec-purgatory.c
+purgatory.ro
diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile
new file mode 100644
index 000000000..fdccb7689
--- /dev/null
+++ b/arch/s390/purgatory/Makefile
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: GPL-2.0
+
+OBJECT_FILES_NON_STANDARD := y
+
+purgatory-y := head.o purgatory.o string.o sha256.o mem.o
+
+targets += $(purgatory-y) purgatory.ro kexec-purgatory.c
+PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
+
+$(obj)/sha256.o: $(srctree)/lib/sha256.c FORCE
+ $(call if_changed_rule,cc_o_c)
+
+$(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE
+ $(call if_changed_rule,as_o_S)
+
+KCOV_INSTRUMENT := n
+GCOV_PROFILE := n
+UBSAN_SANITIZE := n
+KASAN_SANITIZE := n
+
+LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib
+LDFLAGS_purgatory.ro += -z nodefaultlib
+KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes
+KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare
+KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding
+KBUILD_CFLAGS += -c -MD -Os -m64 -msoft-float -fno-common
+KBUILD_CFLAGS += $(call cc-option,-fno-PIE)
+KBUILD_AFLAGS := $(filter-out -DCC_USING_EXPOLINE,$(KBUILD_AFLAGS))
+
+$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
+ $(call if_changed,ld)
+
+quiet_cmd_bin2c = BIN2C $@
+ cmd_bin2c = $(objtree)/scripts/bin2c kexec_purgatory < $< > $@
+
+$(obj)/kexec-purgatory.c: $(obj)/purgatory.ro FORCE
+ $(call if_changed,bin2c)
+
+obj-$(CONFIG_ARCH_HAS_KEXEC_PURGATORY) += kexec-purgatory.o
diff --git a/arch/s390/purgatory/head.S b/arch/s390/purgatory/head.S
new file mode 100644
index 000000000..9b2d7a71f
--- /dev/null
+++ b/arch/s390/purgatory/head.S
@@ -0,0 +1,273 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Purgatory setup code
+ *
+ * Copyright IBM Corp. 2018
+ *
+ * Author(s): Philipp Rudo <prudo@linux.vnet.ibm.com>
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/page.h>
+#include <asm/sigp.h>
+
+/* The purgatory is the code running between two kernels. It's main purpose
+ * is to verify that the next kernel was not corrupted after load and to
+ * start it.
+ *
+ * If the next kernel is a crash kernel there are some peculiarities to
+ * consider:
+ *
+ * First the purgatory is called twice. Once only to verify the
+ * sha digest. So if the crash kernel got corrupted the old kernel can try
+ * to trigger a stand-alone dumper. And once to actually load the crash kernel.
+ *
+ * Second the purgatory also has to swap the crash memory region with its
+ * destination at address 0. As the purgatory is part of crash memory this
+ * requires some finesse. The tactic here is that the purgatory first copies
+ * itself to the end of the destination and then swaps the rest of the
+ * memory running from there.
+ */
+
+#define bufsz purgatory_end-stack
+
+.macro MEMCPY dst,src,len
+ lgr %r0,\dst
+ lgr %r1,\len
+ lgr %r2,\src
+ lgr %r3,\len
+
+20: mvcle %r0,%r2,0
+ jo 20b
+.endm
+
+.macro MEMSWAP dst,src,buf,len
+10: cghi \len,bufsz
+ jh 11f
+ lgr %r4,\len
+ j 12f
+11: lghi %r4,bufsz
+
+12: MEMCPY \buf,\dst,%r4
+ MEMCPY \dst,\src,%r4
+ MEMCPY \src,\buf,%r4
+
+ agr \dst,%r4
+ agr \src,%r4
+ sgr \len,%r4
+
+ cghi \len,0
+ jh 10b
+.endm
+
+.macro START_NEXT_KERNEL base subcode
+ lg %r4,kernel_entry-\base(%r13)
+ lg %r5,load_psw_mask-\base(%r13)
+ ogr %r4,%r5
+ stg %r4,0(%r0)
+
+ xgr %r0,%r0
+ lghi %r1,\subcode
+ diag %r0,%r1,0x308
+.endm
+
+.text
+.align PAGE_SIZE
+ENTRY(purgatory_start)
+ /* The purgatory might be called after a diag308 so better set
+ * architecture and addressing mode.
+ */
+ lhi %r1,1
+ sigp %r1,%r0,SIGP_SET_ARCHITECTURE
+ sam64
+
+ larl %r5,gprregs
+ stmg %r6,%r15,0(%r5)
+
+ basr %r13,0
+.base_crash:
+
+ /* Setup stack */
+ larl %r15,purgatory_end
+ aghi %r15,-160
+
+ /* If the next kernel is KEXEC_TYPE_CRASH the purgatory is called
+ * directly with a flag passed in %r2 whether the purgatory shall do
+ * checksum verification only (%r2 = 0 -> verification only).
+ *
+ * Check now and preserve over C function call by storing in
+ * %r10 whith
+ * 1 -> checksum verification only
+ * 0 -> load new kernel
+ */
+ lghi %r10,0
+ lg %r11,kernel_type-.base_crash(%r13)
+ cghi %r11,1 /* KEXEC_TYPE_CRASH */
+ jne .do_checksum_verification
+ cghi %r2,0 /* checksum verification only */
+ jne .do_checksum_verification
+ lghi %r10,1
+
+.do_checksum_verification:
+ brasl %r14,verify_sha256_digest
+
+ cghi %r10,1 /* checksum verification only */
+ je .return_old_kernel
+ cghi %r2,0 /* checksum match */
+ jne .disabled_wait
+
+ /* If the next kernel is a crash kernel the purgatory has to swap
+ * the mem regions first.
+ */
+ cghi %r11,1 /* KEXEC_TYPE_CRASH */
+ je .start_crash_kernel
+
+ /* start normal kernel */
+ START_NEXT_KERNEL .base_crash 0
+
+.return_old_kernel:
+ lmg %r6,%r15,gprregs-.base_crash(%r13)
+ br %r14
+
+.disabled_wait:
+ lpswe disabled_wait_psw-.base_crash(%r13)
+
+.start_crash_kernel:
+ /* Location of purgatory_start in crash memory */
+ lgr %r8,%r13
+ aghi %r8,-(.base_crash-purgatory_start)
+
+ /* Destination for this code i.e. end of memory to be swapped. */
+ lg %r9,crash_size-.base_crash(%r13)
+ aghi %r9,-(purgatory_end-purgatory_start)
+
+ /* Destination in crash memory, i.e. same as r9 but in crash memory. */
+ lg %r10,crash_start-.base_crash(%r13)
+ agr %r10,%r9
+
+ /* Buffer location (in crash memory) and size. As the purgatory is
+ * behind the point of no return it can re-use the stack as buffer.
+ */
+ lghi %r11,bufsz
+ larl %r12,stack
+
+ MEMCPY %r12,%r9,%r11 /* dst -> (crash) buf */
+ MEMCPY %r9,%r8,%r11 /* self -> dst */
+
+ /* Jump to new location. */
+ lgr %r7,%r9
+ aghi %r7,.jump_to_dst-purgatory_start
+ br %r7
+
+.jump_to_dst:
+ basr %r13,0
+.base_dst:
+
+ /* clear buffer */
+ MEMCPY %r12,%r10,%r11 /* (crash) buf -> (crash) dst */
+
+ /* Load new buffer location after jump */
+ larl %r7,stack
+ aghi %r10,stack-purgatory_start
+ MEMCPY %r10,%r7,%r11 /* (new) buf -> (crash) buf */
+
+ /* Now the code is set up to run from its designated location. Start
+ * swapping the rest of crash memory now.
+ *
+ * The registers will be used as follow:
+ *
+ * %r0-%r4 reserved for macros defined above
+ * %r5-%r6 tmp registers
+ * %r7 pointer to current struct sha region
+ * %r8 index to iterate over all sha regions
+ * %r9 pointer in crash memory
+ * %r10 pointer in old kernel
+ * %r11 total size (still) to be moved
+ * %r12 pointer to buffer
+ */
+ lgr %r12,%r7
+ lgr %r11,%r9
+ lghi %r10,0
+ lg %r9,crash_start-.base_dst(%r13)
+ lghi %r8,16 /* KEXEC_SEGMENTS_MAX */
+ larl %r7,purgatory_sha_regions
+
+ j .loop_first
+
+ /* Loop over all purgatory_sha_regions. */
+.loop_next:
+ aghi %r8,-1
+ cghi %r8,0
+ je .loop_out
+
+ aghi %r7,__KEXEC_SHA_REGION_SIZE
+
+.loop_first:
+ lg %r5,__KEXEC_SHA_REGION_START(%r7)
+ cghi %r5,0
+ je .loop_next
+
+ /* Copy [end last sha region, start current sha region) */
+ /* Note: kexec_sha_region->start points in crash memory */
+ sgr %r5,%r9
+ MEMCPY %r9,%r10,%r5
+
+ agr %r9,%r5
+ agr %r10,%r5
+ sgr %r11,%r5
+
+ /* Swap sha region */
+ lg %r6,__KEXEC_SHA_REGION_LEN(%r7)
+ MEMSWAP %r9,%r10,%r12,%r6
+ sg %r11,__KEXEC_SHA_REGION_LEN(%r7)
+ j .loop_next
+
+.loop_out:
+ /* Copy rest of crash memory */
+ MEMCPY %r9,%r10,%r11
+
+ /* start crash kernel */
+ START_NEXT_KERNEL .base_dst 1
+
+
+load_psw_mask:
+ .long 0x00080000,0x80000000
+
+ .align 8
+disabled_wait_psw:
+ .quad 0x0002000180000000
+ .quad 0x0000000000000000 + .do_checksum_verification
+
+gprregs:
+ .rept 10
+ .quad 0
+ .endr
+
+/* Macro to define a global variable with name and size (in bytes) to be
+ * shared with C code.
+ *
+ * Add the .size and .type attribute to satisfy checks on the Elf_Sym during
+ * purgatory load.
+ */
+.macro GLOBAL_VARIABLE name,size
+\name:
+ .global \name
+ .size \name,\size
+ .type \name,object
+ .skip \size,0
+.endm
+
+GLOBAL_VARIABLE purgatory_sha256_digest,32
+GLOBAL_VARIABLE purgatory_sha_regions,16*__KEXEC_SHA_REGION_SIZE
+GLOBAL_VARIABLE kernel_entry,8
+GLOBAL_VARIABLE kernel_type,8
+GLOBAL_VARIABLE crash_start,8
+GLOBAL_VARIABLE crash_size,8
+
+ .align PAGE_SIZE
+stack:
+ /* The buffer to move this code must be as big as the code. */
+ .skip stack-purgatory_start
+ .align PAGE_SIZE
+purgatory_end:
diff --git a/arch/s390/purgatory/purgatory.c b/arch/s390/purgatory/purgatory.c
new file mode 100644
index 000000000..3528e6da4
--- /dev/null
+++ b/arch/s390/purgatory/purgatory.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Purgatory code running between two kernels.
+ *
+ * Copyright IBM Corp. 2018
+ *
+ * Author(s): Philipp Rudo <prudo@linux.vnet.ibm.com>
+ */
+
+#include <linux/kexec.h>
+#include <linux/sha256.h>
+#include <linux/string.h>
+#include <asm/purgatory.h>
+
+int verify_sha256_digest(void)
+{
+ struct kexec_sha_region *ptr, *end;
+ u8 digest[SHA256_DIGEST_SIZE];
+ struct sha256_state sctx;
+
+ sha256_init(&sctx);
+ end = purgatory_sha_regions + ARRAY_SIZE(purgatory_sha_regions);
+
+ for (ptr = purgatory_sha_regions; ptr < end; ptr++)
+ sha256_update(&sctx, (uint8_t *)(ptr->start), ptr->len);
+
+ sha256_final(&sctx, digest);
+
+ if (memcmp(digest, purgatory_sha256_digest, sizeof(digest)))
+ return 1;
+
+ return 0;
+}
diff --git a/arch/s390/purgatory/string.c b/arch/s390/purgatory/string.c
new file mode 100644
index 000000000..c98c22a72
--- /dev/null
+++ b/arch/s390/purgatory/string.c
@@ -0,0 +1,3 @@
+// SPDX-License-Identifier: GPL-2.0
+#define __HAVE_ARCH_MEMCMP /* arch function */
+#include "../lib/string.c"
diff --git a/arch/s390/scripts/Makefile.chkbss b/arch/s390/scripts/Makefile.chkbss
new file mode 100644
index 000000000..9bba2c14e
--- /dev/null
+++ b/arch/s390/scripts/Makefile.chkbss
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: GPL-2.0
+
+quiet_cmd_chkbss = CHKBSS $<
+define cmd_chkbss
+ rm -f $@; \
+ if ! $(OBJDUMP) -j .bss -w -h $< | awk 'END { if ($$3) exit 1 }'; then \
+ echo "error: $< .bss section is not empty" >&2; exit 1; \
+ fi; \
+ touch $@;
+endef
+
+chkbss-target ?= $(obj)/built-in.a
+ifneq (,$(findstring /,$(chkbss)))
+chkbss-files := $(patsubst %, %.chkbss, $(chkbss))
+else
+chkbss-files := $(patsubst %, $(obj)/%.chkbss, $(chkbss))
+endif
+
+$(chkbss-target): $(chkbss-files)
+targets += $(notdir $(chkbss-files))
+
+%.o.chkbss: %.o
+ $(call cmd,chkbss)
diff --git a/arch/s390/tools/.gitignore b/arch/s390/tools/.gitignore
new file mode 100644
index 000000000..71bd6f8ee
--- /dev/null
+++ b/arch/s390/tools/.gitignore
@@ -0,0 +1,2 @@
+gen_facilities
+gen_opcode_table
diff --git a/arch/s390/tools/Makefile b/arch/s390/tools/Makefile
new file mode 100644
index 000000000..48cdac114
--- /dev/null
+++ b/arch/s390/tools/Makefile
@@ -0,0 +1,35 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for s390 specific build tools
+#
+
+kapi := arch/$(ARCH)/include/generated/asm
+kapi-hdrs-y := $(kapi)/facility-defs.h $(kapi)/dis-defs.h
+
+targets += $(addprefix ../../../,$(kapi-hdrs-y))
+PHONY += kapi
+
+kapi: $(kapi-hdrs-y)
+
+hostprogs-y += gen_facilities
+hostprogs-y += gen_opcode_table
+
+HOSTCFLAGS_gen_facilities.o += -Wall $(LINUXINCLUDE)
+HOSTCFLAGS_gen_opcode_table.o += -Wall $(LINUXINCLUDE)
+
+# Ensure output directory exists
+_dummy := $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)')
+
+define filechk_facility-defs.h
+ $(obj)/gen_facilities
+endef
+
+define filechk_dis-defs.h
+ ( $(obj)/gen_opcode_table < $(srctree)/arch/$(ARCH)/tools/opcodes.txt )
+endef
+
+$(kapi)/facility-defs.h: $(obj)/gen_facilities FORCE
+ $(call filechk,facility-defs.h)
+
+$(kapi)/dis-defs.h: $(obj)/gen_opcode_table FORCE
+ $(call filechk,dis-defs.h)
diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c
new file mode 100644
index 000000000..0c85aedcf
--- /dev/null
+++ b/arch/s390/tools/gen_facilities.c
@@ -0,0 +1,163 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Simple program to generate defines out of facility lists that use the bit
+ * numbering scheme from the Princples of Operations: most significant bit
+ * has bit number 0.
+ *
+ * Copyright IBM Corp. 2015, 2018
+ *
+ */
+
+#include <strings.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+struct facility_def {
+ char *name;
+ int *bits;
+};
+
+static struct facility_def facility_defs[] = {
+ {
+ /*
+ * FACILITIES_ALS contains the list of facilities that are
+ * required to run a kernel that is compiled e.g. with
+ * -march=<machine>.
+ */
+ .name = "FACILITIES_ALS",
+ .bits = (int[]){
+#ifdef CONFIG_HAVE_MARCH_Z900_FEATURES
+ 0, /* N3 instructions */
+ 1, /* z/Arch mode installed */
+#endif
+#ifdef CONFIG_HAVE_MARCH_Z990_FEATURES
+ 18, /* long displacement facility */
+#endif
+#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES
+ 21, /* extended-immediate facility */
+ 25, /* store clock fast */
+#endif
+#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
+ 27, /* mvcos */
+ 32, /* compare and swap and store */
+ 33, /* compare and swap and store 2 */
+ 34, /* general instructions extension */
+ 35, /* execute extensions */
+#endif
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+ 45, /* fast-BCR, etc. */
+#endif
+#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
+ 49, /* misc-instruction-extensions */
+ 52, /* interlocked facility 2 */
+#endif
+#ifdef CONFIG_HAVE_MARCH_Z13_FEATURES
+ 53, /* load-and-zero-rightmost-byte, etc. */
+#endif
+#ifdef CONFIG_HAVE_MARCH_Z14_FEATURES
+ 58, /* miscellaneous-instruction-extension 2 */
+#endif
+ -1 /* END */
+ }
+ },
+ {
+ /*
+ * FACILITIES_KVM contains the list of facilities that are part
+ * of the default facility mask and list that are passed to the
+ * initial CPU model. If no CPU model is used, this, together
+ * with the non-hypervisor managed bits, is the maximum list of
+ * guest facilities supported by KVM.
+ */
+ .name = "FACILITIES_KVM",
+ .bits = (int[]){
+ 0, /* N3 instructions */
+ 1, /* z/Arch mode installed */
+ 2, /* z/Arch mode active */
+ 3, /* DAT-enhancement */
+ 4, /* idte segment table */
+ 5, /* idte region table */
+ 6, /* ASN-and-LX reuse */
+ 7, /* stfle */
+ 8, /* enhanced-DAT 1 */
+ 9, /* sense-running-status */
+ 10, /* conditional sske */
+ 13, /* ipte-range */
+ 14, /* nonquiescing key-setting */
+ 73, /* transactional execution */
+ 75, /* access-exception-fetch/store indication */
+ 76, /* msa extension 3 */
+ 77, /* msa extension 4 */
+ 78, /* enhanced-DAT 2 */
+ 130, /* instruction-execution-protection */
+ 131, /* enhanced-SOP 2 and side-effect */
+ 139, /* multiple epoch facility */
+ 146, /* msa extension 8 */
+ -1 /* END */
+ }
+ },
+ {
+ /*
+ * FACILITIES_KVM_CPUMODEL contains the list of facilities
+ * that can be enabled by CPU model code if the host supports
+ * it. These facilities are not passed to the guest without
+ * CPU model support.
+ */
+
+ .name = "FACILITIES_KVM_CPUMODEL",
+ .bits = (int[]){
+ 156, /* etoken facility */
+ -1 /* END */
+ }
+ },
+};
+
+static void print_facility_list(struct facility_def *def)
+{
+ unsigned int high, bit, dword, i;
+ unsigned long long *array;
+
+ array = calloc(1, 8);
+ if (!array)
+ exit(EXIT_FAILURE);
+ high = 0;
+ for (i = 0; def->bits[i] != -1; i++) {
+ bit = 63 - (def->bits[i] & 63);
+ dword = def->bits[i] / 64;
+ if (dword > high) {
+ array = realloc(array, (dword + 1) * 8);
+ if (!array)
+ exit(EXIT_FAILURE);
+ memset(array + high + 1, 0, (dword - high) * 8);
+ high = dword;
+ }
+ array[dword] |= 1ULL << bit;
+ }
+ printf("#define %s ", def->name);
+ for (i = 0; i <= high; i++)
+ printf("_AC(0x%016llx,UL)%c", array[i], i < high ? ',' : '\n');
+ free(array);
+}
+
+static void print_facility_lists(void)
+{
+ unsigned int i;
+
+ for (i = 0; i < sizeof(facility_defs) / sizeof(facility_defs[0]); i++)
+ print_facility_list(&facility_defs[i]);
+}
+
+int main(int argc, char **argv)
+{
+ printf("#ifndef __ASM_S390_FACILITY_DEFS__\n");
+ printf("#define __ASM_S390_FACILITY_DEFS__\n");
+ printf("/*\n");
+ printf(" * DO NOT MODIFY.\n");
+ printf(" *\n");
+ printf(" * This file was generated by %s\n", __FILE__);
+ printf(" */\n\n");
+ printf("#include <linux/const.h>\n\n");
+ print_facility_lists();
+ printf("\n#endif\n");
+ return 0;
+}
diff --git a/arch/s390/tools/gen_opcode_table.c b/arch/s390/tools/gen_opcode_table.c
new file mode 100644
index 000000000..a1bc02b29
--- /dev/null
+++ b/arch/s390/tools/gen_opcode_table.c
@@ -0,0 +1,337 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Generate opcode table initializers for the in-kernel disassembler.
+ *
+ * Copyright IBM Corp. 2017
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdio.h>
+
+#define STRING_SIZE_MAX 20
+
+struct insn_type {
+ unsigned char byte;
+ unsigned char mask;
+ char **format;
+};
+
+struct insn {
+ struct insn_type *type;
+ char opcode[STRING_SIZE_MAX];
+ char name[STRING_SIZE_MAX];
+ char upper[STRING_SIZE_MAX];
+ char format[STRING_SIZE_MAX];
+ unsigned int name_len;
+};
+
+struct insn_group {
+ struct insn_type *type;
+ int offset;
+ int count;
+ char opcode[2];
+};
+
+struct insn_format {
+ char *format;
+ int type;
+};
+
+struct gen_opcode {
+ struct insn *insn;
+ int nr;
+ struct insn_group *group;
+ int nr_groups;
+};
+
+/*
+ * Table of instruction format types. Each opcode is defined with at
+ * least one byte (two nibbles), three nibbles, or two bytes (four
+ * nibbles).
+ * The byte member of each instruction format type entry defines
+ * within which byte of an instruction the third (and fourth) nibble
+ * of an opcode can be found. The mask member is the and-mask that
+ * needs to be applied on this byte in order to get the third (and
+ * fourth) nibble of the opcode.
+ * The format array defines all instruction formats (as defined in the
+ * Principles of Operation) which have the same position of the opcode
+ * nibbles.
+ * A special case are instruction formats with 1-byte opcodes. In this
+ * case the byte member always is zero, so that the mask is applied on
+ * the (only) byte that contains the opcode.
+ */
+static struct insn_type insn_type_table[] = {
+ {
+ .byte = 0,
+ .mask = 0xff,
+ .format = (char *[]) {
+ "MII",
+ "RR",
+ "RS",
+ "RSI",
+ "RX",
+ "SI",
+ "SMI",
+ "SS",
+ NULL,
+ },
+ },
+ {
+ .byte = 1,
+ .mask = 0x0f,
+ .format = (char *[]) {
+ "RI",
+ "RIL",
+ "SSF",
+ NULL,
+ },
+ },
+ {
+ .byte = 1,
+ .mask = 0xff,
+ .format = (char *[]) {
+ "E",
+ "IE",
+ "RRE",
+ "RRF",
+ "RRR",
+ "S",
+ "SIL",
+ "SSE",
+ NULL,
+ },
+ },
+ {
+ .byte = 5,
+ .mask = 0xff,
+ .format = (char *[]) {
+ "RIE",
+ "RIS",
+ "RRS",
+ "RSE",
+ "RSL",
+ "RSY",
+ "RXE",
+ "RXF",
+ "RXY",
+ "SIY",
+ "VRI",
+ "VRR",
+ "VRS",
+ "VRV",
+ "VRX",
+ "VSI",
+ NULL,
+ },
+ },
+};
+
+static struct insn_type *insn_format_to_type(char *format)
+{
+ char tmp[STRING_SIZE_MAX];
+ char *base_format, **ptr;
+ int i;
+
+ strcpy(tmp, format);
+ base_format = tmp;
+ base_format = strsep(&base_format, "_");
+ for (i = 0; i < sizeof(insn_type_table) / sizeof(insn_type_table[0]); i++) {
+ ptr = insn_type_table[i].format;
+ while (*ptr) {
+ if (!strcmp(base_format, *ptr))
+ return &insn_type_table[i];
+ ptr++;
+ }
+ }
+ exit(EXIT_FAILURE);
+}
+
+static void read_instructions(struct gen_opcode *desc)
+{
+ struct insn insn;
+ int rc, i;
+
+ while (1) {
+ rc = scanf("%s %s %s", insn.opcode, insn.name, insn.format);
+ if (rc == EOF)
+ break;
+ if (rc != 3)
+ exit(EXIT_FAILURE);
+ insn.type = insn_format_to_type(insn.format);
+ insn.name_len = strlen(insn.name);
+ for (i = 0; i <= insn.name_len; i++)
+ insn.upper[i] = toupper((unsigned char)insn.name[i]);
+ desc->nr++;
+ desc->insn = realloc(desc->insn, desc->nr * sizeof(*desc->insn));
+ if (!desc->insn)
+ exit(EXIT_FAILURE);
+ desc->insn[desc->nr - 1] = insn;
+ }
+}
+
+static int cmpformat(const void *a, const void *b)
+{
+ return strcmp(((struct insn *)a)->format, ((struct insn *)b)->format);
+}
+
+static void print_formats(struct gen_opcode *desc)
+{
+ char *format;
+ int i, count;
+
+ qsort(desc->insn, desc->nr, sizeof(*desc->insn), cmpformat);
+ format = "";
+ count = 0;
+ printf("enum {\n");
+ for (i = 0; i < desc->nr; i++) {
+ if (!strcmp(format, desc->insn[i].format))
+ continue;
+ count++;
+ format = desc->insn[i].format;
+ printf("\tINSTR_%s,\n", format);
+ }
+ printf("}; /* %d */\n\n", count);
+}
+
+static int cmp_long_insn(const void *a, const void *b)
+{
+ return strcmp(((struct insn *)a)->name, ((struct insn *)b)->name);
+}
+
+static void print_long_insn(struct gen_opcode *desc)
+{
+ struct insn *insn;
+ int i, count;
+
+ qsort(desc->insn, desc->nr, sizeof(*desc->insn), cmp_long_insn);
+ count = 0;
+ printf("enum {\n");
+ for (i = 0; i < desc->nr; i++) {
+ insn = &desc->insn[i];
+ if (insn->name_len < 6)
+ continue;
+ printf("\tLONG_INSN_%s,\n", insn->upper);
+ count++;
+ }
+ printf("}; /* %d */\n\n", count);
+
+ printf("#define LONG_INSN_INITIALIZER { \\\n");
+ for (i = 0; i < desc->nr; i++) {
+ insn = &desc->insn[i];
+ if (insn->name_len < 6)
+ continue;
+ printf("\t[LONG_INSN_%s] = \"%s\", \\\n", insn->upper, insn->name);
+ }
+ printf("}\n\n");
+}
+
+static void print_opcode(struct insn *insn, int nr)
+{
+ char *opcode;
+
+ opcode = insn->opcode;
+ if (insn->type->byte != 0)
+ opcode += 2;
+ printf("\t[%4d] = { .opfrag = 0x%s, .format = INSTR_%s, ", nr, opcode, insn->format);
+ if (insn->name_len < 6)
+ printf(".name = \"%s\" ", insn->name);
+ else
+ printf(".offset = LONG_INSN_%s ", insn->upper);
+ printf("}, \\\n");
+}
+
+static void add_to_group(struct gen_opcode *desc, struct insn *insn, int offset)
+{
+ struct insn_group *group;
+
+ group = desc->group ? &desc->group[desc->nr_groups - 1] : NULL;
+ if (group && (!strncmp(group->opcode, insn->opcode, 2) || group->type->byte == 0)) {
+ group->count++;
+ return;
+ }
+ desc->nr_groups++;
+ desc->group = realloc(desc->group, desc->nr_groups * sizeof(*desc->group));
+ if (!desc->group)
+ exit(EXIT_FAILURE);
+ group = &desc->group[desc->nr_groups - 1];
+ memcpy(group->opcode, insn->opcode, 2);
+ group->type = insn->type;
+ group->offset = offset;
+ group->count = 1;
+}
+
+static int cmpopcode(const void *a, const void *b)
+{
+ return strcmp(((struct insn *)a)->opcode, ((struct insn *)b)->opcode);
+}
+
+static void print_opcode_table(struct gen_opcode *desc)
+{
+ char opcode[2] = "";
+ struct insn *insn;
+ int i, offset;
+
+ qsort(desc->insn, desc->nr, sizeof(*desc->insn), cmpopcode);
+ printf("#define OPCODE_TABLE_INITIALIZER { \\\n");
+ offset = 0;
+ for (i = 0; i < desc->nr; i++) {
+ insn = &desc->insn[i];
+ if (insn->type->byte == 0)
+ continue;
+ add_to_group(desc, insn, offset);
+ if (strncmp(opcode, insn->opcode, 2)) {
+ memcpy(opcode, insn->opcode, 2);
+ printf("\t/* %.2s */ \\\n", opcode);
+ }
+ print_opcode(insn, offset);
+ offset++;
+ }
+ printf("\t/* 1-byte opcode instructions */ \\\n");
+ for (i = 0; i < desc->nr; i++) {
+ insn = &desc->insn[i];
+ if (insn->type->byte != 0)
+ continue;
+ add_to_group(desc, insn, offset);
+ print_opcode(insn, offset);
+ offset++;
+ }
+ printf("}\n\n");
+}
+
+static void print_opcode_table_offsets(struct gen_opcode *desc)
+{
+ struct insn_group *group;
+ int i;
+
+ printf("#define OPCODE_OFFSET_INITIALIZER { \\\n");
+ for (i = 0; i < desc->nr_groups; i++) {
+ group = &desc->group[i];
+ printf("\t{ .opcode = 0x%.2s, .mask = 0x%02x, .byte = %d, .offset = %d, .count = %d }, \\\n",
+ group->opcode, group->type->mask, group->type->byte, group->offset, group->count);
+ }
+ printf("}\n\n");
+}
+
+int main(int argc, char **argv)
+{
+ struct gen_opcode _desc = { 0 };
+ struct gen_opcode *desc = &_desc;
+
+ read_instructions(desc);
+ printf("#ifndef __S390_GENERATED_DIS_DEFS_H__\n");
+ printf("#define __S390_GENERATED_DIS_DEFS_H__\n");
+ printf("/*\n");
+ printf(" * DO NOT MODIFY.\n");
+ printf(" *\n");
+ printf(" * This file was generated by %s\n", __FILE__);
+ printf(" */\n\n");
+ print_formats(desc);
+ print_long_insn(desc);
+ print_opcode_table(desc);
+ print_opcode_table_offsets(desc);
+ printf("#endif\n");
+ exit(EXIT_SUCCESS);
+}
diff --git a/arch/s390/tools/opcodes.txt b/arch/s390/tools/opcodes.txt
new file mode 100644
index 000000000..1cbed82cd
--- /dev/null
+++ b/arch/s390/tools/opcodes.txt
@@ -0,0 +1,1183 @@
+0101 pr E
+0102 upt E
+0104 ptff E
+0107 sckpf E
+010a pfpo E
+010b tam E
+010c sam24 E
+010d sam31 E
+010e sam64 E
+01ff trap2 E
+04 spm RR_R0
+05 balr RR_RR
+06 bctr RR_RR
+07 bcr RR_UR
+0a svc RR_U0
+0b bsm RR_RR
+0c bassm RR_RR
+0d basr RR_RR
+0e mvcl RR_RR
+0f clcl RR_RR
+10 lpr RR_RR
+11 lnr RR_RR
+12 ltr RR_RR
+13 lcr RR_RR
+14 nr RR_RR
+15 clr RR_RR
+16 or RR_RR
+17 xr RR_RR
+18 lr RR_RR
+19 cr RR_RR
+1a ar RR_RR
+1b sr RR_RR
+1c mr RR_RR
+1d dr RR_RR
+1e alr RR_RR
+1f slr RR_RR
+20 lpdr RR_FF
+21 lndr RR_FF
+22 ltdr RR_FF
+23 lcdr RR_FF
+24 hdr RR_FF
+25 ldxr RR_FF
+26 mxr RR_FF
+27 mxdr RR_FF
+28 ldr RR_FF
+29 cdr RR_FF
+2a adr RR_FF
+2b sdr RR_FF
+2c mdr RR_FF
+2d ddr RR_FF
+2e awr RR_FF
+2f swr RR_FF
+30 lper RR_FF
+31 lner RR_FF
+32 lter RR_FF
+33 lcer RR_FF
+34 her RR_FF
+35 ledr RR_FF
+36 axr RR_FF
+37 sxr RR_FF
+38 ler RR_FF
+39 cer RR_FF
+3a aer RR_FF
+3b ser RR_FF
+3c mder RR_FF
+3d der RR_FF
+3e aur RR_FF
+3f sur RR_FF
+40 sth RX_RRRD
+41 la RX_RRRD
+42 stc RX_RRRD
+43 ic RX_RRRD
+44 ex RX_RRRD
+45 bal RX_RRRD
+46 bct RX_RRRD
+47 bc RX_URRD
+48 lh RX_RRRD
+49 ch RX_RRRD
+4a ah RX_RRRD
+4b sh RX_RRRD
+4c mh RX_RRRD
+4d bas RX_RRRD
+4e cvd RX_RRRD
+4f cvb RX_RRRD
+50 st RX_RRRD
+51 lae RX_RRRD
+54 n RX_RRRD
+55 cl RX_RRRD
+56 o RX_RRRD
+57 x RX_RRRD
+58 l RX_RRRD
+59 c RX_RRRD
+5a a RX_RRRD
+5b s RX_RRRD
+5c m RX_RRRD
+5d d RX_RRRD
+5e al RX_RRRD
+5f sl RX_RRRD
+60 std RX_FRRD
+67 mxd RX_FRRD
+68 ld RX_FRRD
+69 cd RX_FRRD
+6a ad RX_FRRD
+6b sd RX_FRRD
+6c md RX_FRRD
+6d dd RX_FRRD
+6e aw RX_FRRD
+6f sw RX_FRRD
+70 ste RX_FRRD
+71 ms RX_RRRD
+78 le RX_FRRD
+79 ce RX_FRRD
+7a ae RX_FRRD
+7b se RX_FRRD
+7c mde RX_FRRD
+7d de RX_FRRD
+7e au RX_FRRD
+7f su RX_FRRD
+80 ssm SI_RD
+82 lpsw SI_RD
+83 diag RS_RRRD
+84 brxh RSI_RRP
+85 brxle RSI_RRP
+86 bxh RS_RRRD
+87 bxle RS_RRRD
+88 srl RS_R0RD
+89 sll RS_R0RD
+8a sra RS_R0RD
+8b sla RS_R0RD
+8c srdl RS_R0RD
+8d sldl RS_R0RD
+8e srda RS_R0RD
+8f slda RS_R0RD
+90 stm RS_RRRD
+91 tm SI_URD
+92 mvi SI_URD
+93 ts SI_RD
+94 ni SI_URD
+95 cli SI_URD
+96 oi SI_URD
+97 xi SI_URD
+98 lm RS_RRRD
+99 trace RS_RRRD
+9a lam RS_AARD
+9b stam RS_AARD
+a50 iihh RI_RU
+a51 iihl RI_RU
+a52 iilh RI_RU
+a53 iill RI_RU
+a54 nihh RI_RU
+a55 nihl RI_RU
+a56 nilh RI_RU
+a57 nill RI_RU
+a58 oihh RI_RU
+a59 oihl RI_RU
+a5a oilh RI_RU
+a5b oill RI_RU
+a5c llihh RI_RU
+a5d llihl RI_RU
+a5e llilh RI_RU
+a5f llill RI_RU
+a70 tmlh RI_RU
+a71 tmll RI_RU
+a72 tmhh RI_RU
+a73 tmhl RI_RU
+a74 brc RI_UP
+a75 bras RI_RP
+a76 brct RI_RP
+a77 brctg RI_RP
+a78 lhi RI_RI
+a79 lghi RI_RI
+a7a ahi RI_RI
+a7b aghi RI_RI
+a7c mhi RI_RI
+a7d mghi RI_RI
+a7e chi RI_RI
+a7f cghi RI_RI
+a8 mvcle RS_RRRD
+a9 clcle RS_RRRD
+aa0 rinext RI_RI
+aa1 rion RI_RI
+aa2 tric RI_RI
+aa3 rioff RI_RI
+aa4 riemit RI_RI
+ac stnsm SI_URD
+ad stosm SI_URD
+ae sigp RS_RRRD
+af mc SI_URD
+b1 lra RX_RRRD
+b202 stidp S_RD
+b204 sck S_RD
+b205 stck S_RD
+b206 sckc S_RD
+b207 stckc S_RD
+b208 spt S_RD
+b209 stpt S_RD
+b20a spka S_RD
+b20b ipk S_00
+b20d ptlb S_00
+b210 spx S_RD
+b211 stpx S_RD
+b212 stap S_RD
+b214 sie S_RD
+b218 pc S_RD
+b219 sac S_RD
+b21a cfc S_RD
+b220 servc RRE_RR
+b221 ipte RRF_RURR
+b222 ipm RRE_R0
+b223 ivsk RRE_RR
+b224 iac RRE_R0
+b225 ssar RRE_R0
+b226 epar RRE_R0
+b227 esar RRE_R0
+b228 pt RRE_RR
+b229 iske RRE_RR
+b22a rrbe RRE_RR
+b22b sske RRF_U0RR
+b22c tb RRE_RR
+b22d dxr RRE_FF
+b22e pgin RRE_RR
+b22f pgout RRE_RR
+b230 csch S_00
+b231 hsch S_00
+b232 msch S_RD
+b233 ssch S_RD
+b234 stsch S_RD
+b235 tsch S_RD
+b236 tpi S_RD
+b237 sal S_00
+b238 rsch S_00
+b239 stcrw S_RD
+b23a stcps S_RD
+b23b rchp S_00
+b23c schm S_00
+b240 bakr RRE_RR
+b241 cksm RRE_RR
+b244 sqdr RRE_FF
+b245 sqer RRE_FF
+b246 stura RRE_RR
+b247 msta RRE_R0
+b248 palb RRE_00
+b249 ereg RRE_RR
+b24a esta RRE_RR
+b24b lura RRE_RR
+b24c tar RRE_AR
+b24d cpya RRE_AA
+b24e sar RRE_AR
+b24f ear RRE_RA
+b250 csp RRE_RR
+b252 msr RRE_RR
+b254 mvpg RRE_RR
+b255 mvst RRE_RR
+b256 sthyi RRE_RR
+b257 cuse RRE_RR
+b258 bsg RRE_RR
+b25a bsa RRE_RR
+b25d clst RRE_RR
+b25e srst RRE_RR
+b263 cmpsc RRE_RR
+b274 siga S_RD
+b276 xsch S_00
+b277 rp S_RD
+b278 stcke S_RD
+b279 sacf S_RD
+b27c stckf S_RD
+b27d stsi S_RD
+b280 lpp S_RD
+b284 lcctl S_RD
+b285 lpctl S_RD
+b286 qsi S_RD
+b287 lsctl S_RD
+b28e qctri S_RD
+b299 srnm S_RD
+b29c stfpc S_RD
+b29d lfpc S_RD
+b2a5 tre RRE_RR
+b2a6 cu21 RRF_U0RR
+b2a7 cu12 RRF_U0RR
+b2b0 stfle S_RD
+b2b1 stfl S_RD
+b2b2 lpswe S_RD
+b2b8 srnmb S_RD
+b2b9 srnmt S_RD
+b2bd lfas S_RD
+b2e0 scctr RRE_RR
+b2e1 spctr RRE_RR
+b2e4 ecctr RRE_RR
+b2e5 epctr RRE_RR
+b2e8 ppa RRF_U0RR
+b2ec etnd RRE_R0
+b2ed ecpga RRE_RR
+b2f8 tend S_00
+b2fa niai IE_UU
+b2fc tabort S_RD
+b2ff trap4 S_RD
+b300 lpebr RRE_FF
+b301 lnebr RRE_FF
+b302 ltebr RRE_FF
+b303 lcebr RRE_FF
+b304 ldebr RRE_FF
+b305 lxdbr RRE_FF
+b306 lxebr RRE_FF
+b307 mxdbr RRE_FF
+b308 kebr RRE_FF
+b309 cebr RRE_FF
+b30a aebr RRE_FF
+b30b sebr RRE_FF
+b30c mdebr RRE_FF
+b30d debr RRE_FF
+b30e maebr RRF_F0FF
+b30f msebr RRF_F0FF
+b310 lpdbr RRE_FF
+b311 lndbr RRE_FF
+b312 ltdbr RRE_FF
+b313 lcdbr RRE_FF
+b314 sqebr RRE_FF
+b315 sqdbr RRE_FF
+b316 sqxbr RRE_FF
+b317 meebr RRE_FF
+b318 kdbr RRE_FF
+b319 cdbr RRE_FF
+b31a adbr RRE_FF
+b31b sdbr RRE_FF
+b31c mdbr RRE_FF
+b31d ddbr RRE_FF
+b31e madbr RRF_F0FF
+b31f msdbr RRF_F0FF
+b324 lder RRE_FF
+b325 lxdr RRE_FF
+b326 lxer RRE_FF
+b32e maer RRF_F0FF
+b32f mser RRF_F0FF
+b336 sqxr RRE_FF
+b337 meer RRE_FF
+b338 maylr RRF_F0FF
+b339 mylr RRF_F0FF
+b33a mayr RRF_F0FF
+b33b myr RRF_F0FF
+b33c mayhr RRF_F0FF
+b33d myhr RRF_F0FF
+b33e madr RRF_F0FF
+b33f msdr RRF_F0FF
+b340 lpxbr RRE_FF
+b341 lnxbr RRE_FF
+b342 ltxbr RRE_FF
+b343 lcxbr RRE_FF
+b344 ledbra RRF_UUFF
+b345 ldxbra RRF_UUFF
+b346 lexbra RRF_UUFF
+b347 fixbra RRF_UUFF
+b348 kxbr RRE_FF
+b349 cxbr RRE_FF
+b34a axbr RRE_FF
+b34b sxbr RRE_FF
+b34c mxbr RRE_FF
+b34d dxbr RRE_FF
+b350 tbedr RRF_U0FF
+b351 tbdr RRF_U0FF
+b353 diebr RRF_FUFF
+b357 fiebra RRF_UUFF
+b358 thder RRE_FF
+b359 thdr RRE_FF
+b35b didbr RRF_FUFF
+b35f fidbra RRF_UUFF
+b360 lpxr RRE_FF
+b361 lnxr RRE_FF
+b362 ltxr RRE_FF
+b363 lcxr RRE_FF
+b365 lxr RRE_FF
+b366 lexr RRE_FF
+b367 fixr RRE_FF
+b369 cxr RRE_FF
+b370 lpdfr RRE_FF
+b371 lndfr RRE_FF
+b372 cpsdr RRF_F0FF2
+b373 lcdfr RRE_FF
+b374 lzer RRE_F0
+b375 lzdr RRE_F0
+b376 lzxr RRE_F0
+b377 fier RRE_FF
+b37f fidr RRE_FF
+b384 sfpc RRE_RR
+b385 sfasr RRE_R0
+b38c efpc RRE_RR
+b390 celfbr RRF_UUFR
+b391 cdlfbr RRF_UUFR
+b392 cxlfbr RRF_UUFR
+b394 cefbra RRF_UUFR
+b395 cdfbra RRF_UUFR
+b396 cxfbra RRF_UUFR
+b398 cfebra RRF_UURF
+b399 cfdbra RRF_UURF
+b39a cfxbra RRF_UURF
+b39c clfebr RRF_UURF
+b39d clfdbr RRF_UURF
+b39e clfxbr RRF_UURF
+b3a0 celgbr RRF_UUFR
+b3a1 cdlgbr RRF_UUFR
+b3a2 cxlgbr RRF_UUFR
+b3a4 cegbra RRF_UUFR
+b3a5 cdgbra RRF_UUFR
+b3a6 cxgbra RRF_UUFR
+b3a8 cgebra RRF_UURF
+b3a9 cgdbra RRF_UURF
+b3aa cgxbra RRF_UURF
+b3ac clgebr RRF_UURF
+b3ad clgdbr RRF_UURF
+b3ae clgxbr RRF_UURF
+b3b4 cefr RRE_FR
+b3b5 cdfr RRE_FR
+b3b6 cxfr RRE_FR
+b3b8 cfer RRF_U0RF
+b3b9 cfdr RRF_U0RF
+b3ba cfxr RRF_U0RF
+b3c1 ldgr RRE_FR
+b3c4 cegr RRE_FR
+b3c5 cdgr RRE_FR
+b3c6 cxgr RRE_FR
+b3c8 cger RRF_U0RF
+b3c9 cgdr RRF_U0RF
+b3ca cgxr RRF_U0RF
+b3cd lgdr RRE_RF
+b3d0 mdtra RRF_FUFF2
+b3d1 ddtra RRF_FUFF2
+b3d2 adtra RRF_FUFF2
+b3d3 sdtra RRF_FUFF2
+b3d4 ldetr RRF_0UFF
+b3d5 ledtr RRF_UUFF
+b3d6 ltdtr RRE_FF
+b3d7 fidtr RRF_UUFF
+b3d8 mxtra RRF_FUFF2
+b3d9 dxtra RRF_FUFF2
+b3da axtra RRF_FUFF2
+b3db sxtra RRF_FUFF2
+b3dc lxdtr RRF_0UFF
+b3dd ldxtr RRF_UUFF
+b3de ltxtr RRE_FF
+b3df fixtr RRF_UUFF
+b3e0 kdtr RRE_FF
+b3e1 cgdtra RRF_UURF
+b3e2 cudtr RRE_RF
+b3e3 csdtr RRF_0URF
+b3e4 cdtr RRE_FF
+b3e5 eedtr RRE_RF
+b3e7 esdtr RRE_RF
+b3e8 kxtr RRE_FF
+b3e9 cgxtra RRF_UURF
+b3ea cuxtr RRE_RF
+b3eb csxtr RRF_0URF
+b3ec cxtr RRE_FF
+b3ed eextr RRE_RF
+b3ef esxtr RRE_RF
+b3f1 cdgtra RRF_UUFR
+b3f2 cdutr RRE_FR
+b3f3 cdstr RRE_FR
+b3f4 cedtr RRE_FF
+b3f5 qadtr RRF_FUFF
+b3f6 iedtr RRF_F0FR
+b3f7 rrdtr RRF_FFRU
+b3f9 cxgtra RRF_UUFR
+b3fa cxutr RRE_FR
+b3fb cxstr RRE_FR
+b3fc cextr RRE_FF
+b3fd qaxtr RRF_FUFF
+b3fe iextr RRF_F0FR
+b3ff rrxtr RRF_FFRU
+b6 stctl RS_CCRD
+b7 lctl RS_CCRD
+b900 lpgr RRE_RR
+b901 lngr RRE_RR
+b902 ltgr RRE_RR
+b903 lcgr RRE_RR
+b904 lgr RRE_RR
+b905 lurag RRE_RR
+b906 lgbr RRE_RR
+b907 lghr RRE_RR
+b908 agr RRE_RR
+b909 sgr RRE_RR
+b90a algr RRE_RR
+b90b slgr RRE_RR
+b90c msgr RRE_RR
+b90d dsgr RRE_RR
+b90e eregg RRE_RR
+b90f lrvgr RRE_RR
+b910 lpgfr RRE_RR
+b911 lngfr RRE_RR
+b912 ltgfr RRE_RR
+b913 lcgfr RRE_RR
+b914 lgfr RRE_RR
+b916 llgfr RRE_RR
+b917 llgtr RRE_RR
+b918 agfr RRE_RR
+b919 sgfr RRE_RR
+b91a algfr RRE_RR
+b91b slgfr RRE_RR
+b91c msgfr RRE_RR
+b91d dsgfr RRE_RR
+b91e kmac RRE_RR
+b91f lrvr RRE_RR
+b920 cgr RRE_RR
+b921 clgr RRE_RR
+b925 sturg RRE_RR
+b926 lbr RRE_RR
+b927 lhr RRE_RR
+b928 pckmo RRE_00
+b929 kma RRF_R0RR
+b92a kmf RRE_RR
+b92b kmo RRE_RR
+b92c pcc RRE_00
+b92d kmctr RRF_R0RR
+b92e km RRE_RR
+b92f kmc RRE_RR
+b930 cgfr RRE_RR
+b931 clgfr RRE_RR
+b93c ppno RRE_RR
+b93e kimd RRE_RR
+b93f klmd RRE_RR
+b941 cfdtr RRF_UURF
+b942 clgdtr RRF_UURF
+b943 clfdtr RRF_UURF
+b946 bctgr RRE_RR
+b949 cfxtr RRF_UURF
+b94a clgxtr RRF_UURF
+b94b clfxtr RRF_UURF
+b951 cdftr RRF_UUFR
+b952 cdlgtr RRF_UUFR
+b953 cdlftr RRF_UUFR
+b959 cxftr RRF_UUFR
+b95a cxlgtr RRF_UUFR
+b95b cxlftr RRF_UUFR
+b960 cgrt RRF_U0RR
+b961 clgrt RRF_U0RR
+b972 crt RRF_U0RR
+b973 clrt RRF_U0RR
+b980 ngr RRE_RR
+b981 ogr RRE_RR
+b982 xgr RRE_RR
+b983 flogr RRE_RR
+b984 llgcr RRE_RR
+b985 llghr RRE_RR
+b986 mlgr RRE_RR
+b987 dlgr RRE_RR
+b988 alcgr RRE_RR
+b989 slbgr RRE_RR
+b98a cspg RRE_RR
+b98d epsw RRE_RR
+b98e idte RRF_RURR2
+b98f crdte RRF_RURR2
+b990 trtt RRF_U0RR
+b991 trto RRF_U0RR
+b992 trot RRF_U0RR
+b993 troo RRF_U0RR
+b994 llcr RRE_RR
+b995 llhr RRE_RR
+b996 mlr RRE_RR
+b997 dlr RRE_RR
+b998 alcr RRE_RR
+b999 slbr RRE_RR
+b99a epair RRE_R0
+b99b esair RRE_R0
+b99d esea RRE_R0
+b99e pti RRE_RR
+b99f ssair RRE_R0
+b9a1 tpei RRE_RR
+b9a2 ptf RRE_R0
+b9aa lptea RRF_RURR2
+b9ac irbm RRE_RR
+b9ae rrbm RRE_RR
+b9af pfmf RRE_RR
+b9b0 cu14 RRF_U0RR
+b9b1 cu24 RRF_U0RR
+b9b2 cu41 RRE_RR
+b9b3 cu42 RRE_RR
+b9bd trtre RRF_U0RR
+b9be srstu RRE_RR
+b9bf trte RRF_U0RR
+b9c8 ahhhr RRF_R0RR2
+b9c9 shhhr RRF_R0RR2
+b9ca alhhhr RRF_R0RR2
+b9cb slhhhr RRF_R0RR2
+b9cd chhr RRE_RR
+b9cf clhhr RRE_RR
+b9d0 pcistg RRE_RR
+b9d2 pcilg RRE_RR
+b9d3 rpcit RRE_RR
+b9d8 ahhlr RRF_R0RR2
+b9d9 shhlr RRF_R0RR2
+b9da alhhlr RRF_R0RR2
+b9db slhhlr RRF_R0RR2
+b9dd chlr RRE_RR
+b9df clhlr RRE_RR
+b9e0 locfhr RRF_U0RR
+b9e1 popcnt RRE_RR
+b9e2 locgr RRF_U0RR
+b9e4 ngrk RRF_R0RR2
+b9e6 ogrk RRF_R0RR2
+b9e7 xgrk RRF_R0RR2
+b9e8 agrk RRF_R0RR2
+b9e9 sgrk RRF_R0RR2
+b9ea algrk RRF_R0RR2
+b9eb slgrk RRF_R0RR2
+b9ec mgrk RRF_R0RR2
+b9ed msgrkc RRF_R0RR2
+b9f2 locr RRF_U0RR
+b9f4 nrk RRF_R0RR2
+b9f6 ork RRF_R0RR2
+b9f7 xrk RRF_R0RR2
+b9f8 ark RRF_R0RR2
+b9f9 srk RRF_R0RR2
+b9fa alrk RRF_R0RR2
+b9fb slrk RRF_R0RR2
+b9fd msrkc RRF_R0RR2
+ba cs RS_RRRD
+bb cds RS_RRRD
+bd clm RS_RURD
+be stcm RS_RURD
+bf icm RS_RURD
+c00 larl RIL_RP
+c01 lgfi RIL_RI
+c04 brcl RIL_UP
+c05 brasl RIL_RP
+c06 xihf RIL_RU
+c07 xilf RIL_RU
+c08 iihf RIL_RU
+c09 iilf RIL_RU
+c0a nihf RIL_RU
+c0b nilf RIL_RU
+c0c oihf RIL_RU
+c0d oilf RIL_RU
+c0e llihf RIL_RU
+c0f llilf RIL_RU
+c20 msgfi RIL_RI
+c21 msfi RIL_RI
+c24 slgfi RIL_RU
+c25 slfi RIL_RU
+c28 agfi RIL_RI
+c29 afi RIL_RI
+c2a algfi RIL_RU
+c2b alfi RIL_RU
+c2c cgfi RIL_RI
+c2d cfi RIL_RI
+c2e clgfi RIL_RU
+c2f clfi RIL_RU
+c42 llhrl RIL_RP
+c44 lghrl RIL_RP
+c45 lhrl RIL_RP
+c46 llghrl RIL_RP
+c47 sthrl RIL_RP
+c48 lgrl RIL_RP
+c4b stgrl RIL_RP
+c4c lgfrl RIL_RP
+c4d lrl RIL_RP
+c4e llgfrl RIL_RP
+c4f strl RIL_RP
+c5 bprp MII_UPP
+c60 exrl RIL_RP
+c62 pfdrl RIL_UP
+c64 cghrl RIL_RP
+c65 chrl RIL_RP
+c66 clghrl RIL_RP
+c67 clhrl RIL_RP
+c68 cgrl RIL_RP
+c6a clgrl RIL_RP
+c6c cgfrl RIL_RP
+c6d crl RIL_RP
+c6e clgfrl RIL_RP
+c6f clrl RIL_RP
+c7 bpp SMI_U0RDP
+c80 mvcos SSF_RRDRD
+c81 ectg SSF_RRDRD
+c82 csst SSF_RRDRD
+c84 lpd SSF_RRDRD2
+c85 lpdg SSF_RRDRD2
+cc6 brcth RIL_RP
+cc8 aih RIL_RI
+cca alsih RIL_RI
+ccb alsihn RIL_RI
+ccd cih RIL_RI
+ccf clih RIL_RU
+d0 trtr SS_L0RDRD
+d1 mvn SS_L0RDRD
+d2 mvc SS_L0RDRD
+d3 mvz SS_L0RDRD
+d4 nc SS_L0RDRD
+d5 clc SS_L0RDRD
+d6 oc SS_L0RDRD
+d7 xc SS_L0RDRD
+d9 mvck SS_RRRDRD
+da mvcp SS_RRRDRD
+db mvcs SS_RRRDRD
+dc tr SS_L0RDRD
+dd trt SS_L0RDRD
+de ed SS_L0RDRD
+df edmk SS_L0RDRD
+e1 pku SS_L2RDRD
+e2 unpku SS_L0RDRD
+e302 ltg RXY_RRRD
+e303 lrag RXY_RRRD
+e304 lg RXY_RRRD
+e306 cvby RXY_RRRD
+e308 ag RXY_RRRD
+e309 sg RXY_RRRD
+e30a alg RXY_RRRD
+e30b slg RXY_RRRD
+e30c msg RXY_RRRD
+e30d dsg RXY_RRRD
+e30e cvbg RXY_RRRD
+e30f lrvg RXY_RRRD
+e312 lt RXY_RRRD
+e313 lray RXY_RRRD
+e314 lgf RXY_RRRD
+e315 lgh RXY_RRRD
+e316 llgf RXY_RRRD
+e317 llgt RXY_RRRD
+e318 agf RXY_RRRD
+e319 sgf RXY_RRRD
+e31a algf RXY_RRRD
+e31b slgf RXY_RRRD
+e31c msgf RXY_RRRD
+e31d dsgf RXY_RRRD
+e31e lrv RXY_RRRD
+e31f lrvh RXY_RRRD
+e320 cg RXY_RRRD
+e321 clg RXY_RRRD
+e324 stg RXY_RRRD
+e325 ntstg RXY_RRRD
+e326 cvdy RXY_RRRD
+e32a lzrg RXY_RRRD
+e32e cvdg RXY_RRRD
+e32f strvg RXY_RRRD
+e330 cgf RXY_RRRD
+e331 clgf RXY_RRRD
+e332 ltgf RXY_RRRD
+e334 cgh RXY_RRRD
+e336 pfd RXY_URRD
+e338 agh RXY_RRRD
+e339 sgh RXY_RRRD
+e33a llzrgf RXY_RRRD
+e33b lzrf RXY_RRRD
+e33c mgh RXY_RRRD
+e33e strv RXY_RRRD
+e33f strvh RXY_RRRD
+e346 bctg RXY_RRRD
+e347 bic RXY_URRD
+e348 llgfsg RXY_RRRD
+e349 stgsc RXY_RRRD
+e34c lgg RXY_RRRD
+e34d lgsc RXY_RRRD
+e350 sty RXY_RRRD
+e351 msy RXY_RRRD
+e353 msc RXY_RRRD
+e354 ny RXY_RRRD
+e355 cly RXY_RRRD
+e356 oy RXY_RRRD
+e357 xy RXY_RRRD
+e358 ly RXY_RRRD
+e359 cy RXY_RRRD
+e35a ay RXY_RRRD
+e35b sy RXY_RRRD
+e35c mfy RXY_RRRD
+e35e aly RXY_RRRD
+e35f sly RXY_RRRD
+e370 sthy RXY_RRRD
+e371 lay RXY_RRRD
+e372 stcy RXY_RRRD
+e373 icy RXY_RRRD
+e375 laey RXY_RRRD
+e376 lb RXY_RRRD
+e377 lgb RXY_RRRD
+e378 lhy RXY_RRRD
+e379 chy RXY_RRRD
+e37a ahy RXY_RRRD
+e37b shy RXY_RRRD
+e37c mhy RXY_RRRD
+e380 ng RXY_RRRD
+e381 og RXY_RRRD
+e382 xg RXY_RRRD
+e383 msgc RXY_RRRD
+e384 mg RXY_RRRD
+e385 lgat RXY_RRRD
+e386 mlg RXY_RRRD
+e387 dlg RXY_RRRD
+e388 alcg RXY_RRRD
+e389 slbg RXY_RRRD
+e38e stpq RXY_RRRD
+e38f lpq RXY_RRRD
+e390 llgc RXY_RRRD
+e391 llgh RXY_RRRD
+e394 llc RXY_RRRD
+e395 llh RXY_RRRD
+e396 ml RXY_RRRD
+e397 dl RXY_RRRD
+e398 alc RXY_RRRD
+e399 slb RXY_RRRD
+e39c llgtat RXY_RRRD
+e39d llgfat RXY_RRRD
+e39f lat RXY_RRRD
+e3c0 lbh RXY_RRRD
+e3c2 llch RXY_RRRD
+e3c3 stch RXY_RRRD
+e3c4 lhh RXY_RRRD
+e3c6 llhh RXY_RRRD
+e3c7 sthh RXY_RRRD
+e3c8 lfhat RXY_RRRD
+e3ca lfh RXY_RRRD
+e3cb stfh RXY_RRRD
+e3cd chf RXY_RRRD
+e3cf clhf RXY_RRRD
+e3d0 mpcifc RXY_RRRD
+e3d4 stpcifc RXY_RRRD
+e500 lasp SSE_RDRD
+e501 tprot SSE_RDRD
+e502 strag SSE_RDRD
+e50e mvcsk SSE_RDRD
+e50f mvcdk SSE_RDRD
+e544 mvhhi SIL_RDI
+e548 mvghi SIL_RDI
+e54c mvhi SIL_RDI
+e554 chhsi SIL_RDI
+e555 clhhsi SIL_RDU
+e558 cghsi SIL_RDI
+e559 clghsi SIL_RDU
+e55c chsi SIL_RDI
+e55d clfhsi SIL_RDU
+e560 tbegin SIL_RDU
+e561 tbeginc SIL_RDU
+e634 vpkz VSI_URDV
+e635 vlrl VSI_URDV
+e637 vlrlr VRS_RRDV
+e63c vupkz VSI_URDV
+e63d vstrl VSI_URDV
+e63f vstrlr VRS_RRDV
+e649 vlip VRI_V0UU2
+e650 vcvb VRR_RV0U
+e652 vcvbg VRR_RV0U
+e658 vcvd VRI_VR0UU
+e659 vsrp VRI_VVUUU2
+e65a vcvdg VRI_VR0UU
+e65b vpsop VRI_VVUUU2
+e65f vtp VRR_0V
+e671 vap VRI_VVV0UU2
+e673 vsp VRI_VVV0UU2
+e677 vcp VRR_0VV0U
+e678 vmp VRI_VVV0UU2
+e679 vmsp VRI_VVV0UU2
+e67a vdp VRI_VVV0UU2
+e67b vrp VRI_VVV0UU2
+e67e vsdp VRI_VVV0UU2
+e700 vleb VRX_VRRDU
+e701 vleh VRX_VRRDU
+e702 vleg VRX_VRRDU
+e703 vlef VRX_VRRDU
+e704 vllez VRX_VRRDU
+e705 vlrep VRX_VRRDU
+e706 vl VRX_VRRD
+e707 vlbb VRX_VRRDU
+e708 vsteb VRX_VRRDU
+e709 vsteh VRX_VRRDU
+e70a vsteg VRX_VRRDU
+e70b vstef VRX_VRRDU
+e70e vst VRX_VRRD
+e712 vgeg VRV_VVXRDU
+e713 vgef VRV_VVXRDU
+e71a vsceg VRV_VVXRDU
+e71b vscef VRV_VVXRDU
+e721 vlgv VRS_RVRDU
+e722 vlvg VRS_VRRDU
+e727 lcbb RXE_RRRDU
+e730 vesl VRS_VVRDU
+e733 verll VRS_VVRDU
+e736 vlm VRS_VVRD
+e737 vll VRS_VRRD
+e738 vesrl VRS_VVRDU
+e73a vesra VRS_VVRDU
+e73e vstm VRS_VVRD
+e73f vstl VRS_VRRD
+e740 vleib VRI_V0IU
+e741 vleih VRI_V0IU
+e742 vleig VRI_V0IU
+e743 vleif VRI_V0IU
+e744 vgbm VRI_V0U
+e745 vrepi VRI_V0IU
+e746 vgm VRI_V0UUU
+e74a vftci VRI_VVUUU
+e74d vrep VRI_VVUU
+e750 vpopct VRR_VV0U
+e752 vctz VRR_VV0U
+e753 vclz VRR_VV0U
+e756 vlr VRX_VV
+e75c vistr VRR_VV0U0U
+e75f vseg VRR_VV0U
+e760 vmrl VRR_VVV0U
+e761 vmrh VRR_VVV0U
+e762 vlvgp VRR_VRR
+e764 vsum VRR_VVV0U
+e765 vsumg VRR_VVV0U
+e766 vcksm VRR_VVV
+e767 vsumq VRR_VVV0U
+e768 vn VRR_VVV
+e769 vnc VRR_VVV
+e76a vo VRR_VVV
+e76b vno VRR_VVV
+e76c vnx VRR_VVV
+e76d vx VRR_VVV
+e76e vnn VRR_VVV
+e76f voc VRR_VVV
+e770 veslv VRR_VVV0U
+e772 verim VRI_VVV0UU
+e773 verllv VRR_VVV0U
+e774 vsl VRR_VVV
+e775 vslb VRR_VVV
+e777 vsldb VRI_VVV0U
+e778 vesrlv VRR_VVV0U
+e77a vesrav VRR_VVV0U
+e77c vsrl VRR_VVV
+e77d vsrlb VRR_VVV
+e77e vsra VRR_VVV
+e77f vsrab VRR_VVV
+e780 vfee VRR_VVV0U0U
+e781 vfene VRR_VVV0U0U
+e782 vfae VRR_VVV0U0U
+e784 vpdi VRR_VVV0U
+e785 vbperm VRR_VVV
+e78a vstrc VRR_VVVUU0V
+e78c vperm VRR_VVV0V
+e78d vsel VRR_VVV0V
+e78e vfms VRR_VVVU0UV
+e78f vfma VRR_VVVU0UV
+e794 vpk VRR_VVV0U
+e795 vpkls VRR_VVV0U0U
+e797 vpks VRR_VVV0U0U
+e79e vfnms VRR_VVVU0UV
+e79f vfnma VRR_VVVU0UV
+e7a1 vmlh VRR_VVV0U
+e7a2 vml VRR_VVV0U
+e7a3 vmh VRR_VVV0U
+e7a4 vmle VRR_VVV0U
+e7a5 vmlo VRR_VVV0U
+e7a6 vme VRR_VVV0U
+e7a7 vmo VRR_VVV0U
+e7a9 vmalh VRR_VVVU0V
+e7aa vmal VRR_VVVU0V
+e7ab vmah VRR_VVVU0V
+e7ac vmale VRR_VVVU0V
+e7ad vmalo VRR_VVVU0V
+e7ae vmae VRR_VVVU0V
+e7af vmao VRR_VVVU0V
+e7b4 vgfm VRR_VVV0U
+e7b8 vmsl VRR_VVVUU0V
+e7b9 vaccc VRR_VVVU0V
+e7bb vac VRR_VVVU0V
+e7bc vgfma VRR_VVVU0V
+e7bd vsbcbi VRR_VVVU0V
+e7bf vsbi VRR_VVVU0V
+e7c0 vclgd VRR_VV0UUU
+e7c1 vcdlg VRR_VV0UUU
+e7c2 vcgd VRR_VV0UUU
+e7c3 vcdg VRR_VV0UUU
+e7c4 vlde VRR_VV0UU2
+e7c5 vled VRR_VV0UUU
+e7c7 vfi VRR_VV0UUU
+e7ca wfk VRR_VV0UU2
+e7cb wfc VRR_VV0UU2
+e7cc vfpso VRR_VV0UUU
+e7ce vfsq VRR_VV0UU2
+e7d4 vupll VRR_VV0U
+e7d5 vuplh VRR_VV0U
+e7d6 vupl VRR_VV0U
+e7d7 vuph VRR_VV0U
+e7d8 vtm VRR_VV
+e7d9 vecl VRR_VV0U
+e7db vec VRR_VV0U
+e7de vlc VRR_VV0U
+e7df vlp VRR_VV0U
+e7e2 vfs VRR_VVV0UU
+e7e3 vfa VRR_VVV0UU
+e7e5 vfd VRR_VVV0UU
+e7e7 vfm VRR_VVV0UU
+e7e8 vfce VRR_VVV0UUU
+e7ea vfche VRR_VVV0UUU
+e7eb vfch VRR_VVV0UUU
+e7ee vfmin VRR_VVV0UUU
+e7ef vfmax VRR_VVV0UUU
+e7f0 vavgl VRR_VVV0U
+e7f1 vacc VRR_VVV0U
+e7f2 vavg VRR_VVV0U
+e7f3 va VRR_VVV0U
+e7f5 vscbi VRR_VVV0U
+e7f7 vs VRR_VVV0U
+e7f8 vceq VRR_VVV0U0U
+e7f9 vchl VRR_VVV0U0U
+e7fb vch VRR_VVV0U0U
+e7fc vmnl VRR_VVV0U
+e7fd vmxl VRR_VVV0U
+e7fe vmn VRR_VVV0U
+e7ff vmx VRR_VVV0U
+e8 mvcin SS_L0RDRD
+e9 pka SS_L2RDRD
+ea unpka SS_L0RDRD
+eb04 lmg RSY_RRRD
+eb0a srag RSY_RRRD
+eb0b slag RSY_RRRD
+eb0c srlg RSY_RRRD
+eb0d sllg RSY_RRRD
+eb0f tracg RSY_RRRD
+eb14 csy RSY_RRRD
+eb17 stcctm RSY_RURD
+eb1c rllg RSY_RRRD
+eb1d rll RSY_RRRD
+eb20 clmh RSY_RURD
+eb21 clmy RSY_RURD
+eb23 clt RSY_RURD
+eb24 stmg RSY_RRRD
+eb25 stctg RSY_CCRD
+eb26 stmh RSY_RRRD
+eb2b clgt RSY_RURD
+eb2c stcmh RSY_RURD
+eb2d stcmy RSY_RURD
+eb2f lctlg RSY_CCRD
+eb30 csg RSY_RRRD
+eb31 cdsy RSY_RRRD
+eb3e cdsg RSY_RRRD
+eb44 bxhg RSY_RRRD
+eb45 bxleg RSY_RRRD
+eb4c ecag RSY_RRRD
+eb51 tmy SIY_URD
+eb52 mviy SIY_URD
+eb54 niy SIY_URD
+eb55 cliy SIY_URD
+eb56 oiy SIY_URD
+eb57 xiy SIY_URD
+eb60 lric RSY_RDRU
+eb61 stric RSY_RDRU
+eb62 mric RSY_RDRU
+eb6a asi SIY_IRD
+eb6e alsi SIY_IRD
+eb7a agsi SIY_IRD
+eb7e algsi SIY_IRD
+eb80 icmh RSY_RURD
+eb81 icmy RSY_RURD
+eb8e mvclu RSY_RRRD
+eb8f clclu RSY_RRRD
+eb90 stmy RSY_RRRD
+eb96 lmh RSY_RRRD
+eb98 lmy RSY_RRRD
+eb9a lamy RSY_AARD
+eb9b stamy RSY_AARD
+ebc0 tp RSL_R0RD
+ebd0 pcistb RSY_RRRD
+ebd1 sic RSY_RRRD
+ebdc srak RSY_RRRD
+ebdd slak RSY_RRRD
+ebde srlk RSY_RRRD
+ebdf sllk RSY_RRRD
+ebe0 locfh RSY_RURD2
+ebe1 stocfh RSY_RURD2
+ebe2 locg RSY_RURD2
+ebe3 stocg RSY_RURD2
+ebe4 lang RSY_RRRD
+ebe6 laog RSY_RRRD
+ebe7 laxg RSY_RRRD
+ebe8 laag RSY_RRRD
+ebea laalg RSY_RRRD
+ebf2 loc RSY_RURD2
+ebf3 stoc RSY_RURD2
+ebf4 lan RSY_RRRD
+ebf6 lao RSY_RRRD
+ebf7 lax RSY_RRRD
+ebf8 laa RSY_RRRD
+ebfa laal RSY_RRRD
+ec42 lochi RIE_RUI0
+ec44 brxhg RIE_RRP
+ec45 brxlg RIE_RRP
+ec46 locghi RIE_RUI0
+ec4e lochhi RIE_RUI0
+ec51 risblg RIE_RRUUU
+ec54 rnsbg RIE_RRUUU
+ec55 risbg RIE_RRUUU
+ec56 rosbg RIE_RRUUU
+ec57 rxsbg RIE_RRUUU
+ec59 risbgn RIE_RRUUU
+ec5d risbhg RIE_RRUUU
+ec64 cgrj RIE_RRPU
+ec65 clgrj RIE_RRPU
+ec70 cgit RIE_R0IU
+ec71 clgit RIE_R0UU
+ec72 cit RIE_R0IU
+ec73 clfit RIE_R0UU
+ec76 crj RIE_RRPU
+ec77 clrj RIE_RRPU
+ec7c cgij RIE_RUPI
+ec7d clgij RIE_RUPU
+ec7e cij RIE_RUPI
+ec7f clij RIE_RUPU
+ecd8 ahik RIE_RRI0
+ecd9 aghik RIE_RRI0
+ecda alhsik RIE_RRI0
+ecdb alghsik RIE_RRI0
+ece4 cgrb RRS_RRRDU
+ece5 clgrb RRS_RRRDU
+ecf6 crb RRS_RRRDU
+ecf7 clrb RRS_RRRDU
+ecfc cgib RIS_RURDI
+ecfd clgib RIS_RURDU
+ecfe cib RIS_RURDI
+ecff clib RIS_RURDU
+ed04 ldeb RXE_FRRD
+ed05 lxdb RXE_FRRD
+ed06 lxeb RXE_FRRD
+ed07 mxdb RXE_FRRD
+ed08 keb RXE_FRRD
+ed09 ceb RXE_FRRD
+ed0a aeb RXE_FRRD
+ed0b seb RXE_FRRD
+ed0c mdeb RXE_FRRD
+ed0d deb RXE_FRRD
+ed0e maeb RXF_FRRDF
+ed0f mseb RXF_FRRDF
+ed10 tceb RXE_FRRD
+ed11 tcdb RXE_FRRD
+ed12 tcxb RXE_FRRD
+ed14 sqeb RXE_FRRD
+ed15 sqdb RXE_FRRD
+ed17 meeb RXE_FRRD
+ed18 kdb RXE_FRRD
+ed19 cdb RXE_FRRD
+ed1a adb RXE_FRRD
+ed1b sdb RXE_FRRD
+ed1c mdb RXE_FRRD
+ed1d ddb RXE_FRRD
+ed1e madb RXF_FRRDF
+ed1f msdb RXF_FRRDF
+ed24 lde RXE_FRRD
+ed25 lxd RXE_FRRD
+ed26 lxe RXE_FRRD
+ed2e mae RXF_FRRDF
+ed2f mse RXF_FRRDF
+ed34 sqe RXE_FRRD
+ed35 sqd RXE_FRRD
+ed37 mee RXE_FRRD
+ed38 mayl RXF_FRRDF
+ed39 myl RXF_FRRDF
+ed3a may RXF_FRRDF
+ed3b my RXF_FRRDF
+ed3c mayh RXF_FRRDF
+ed3d myh RXF_FRRDF
+ed3e mad RXF_FRRDF
+ed3f msd RXF_FRRDF
+ed40 sldt RXF_FRRDF
+ed41 srdt RXF_FRRDF
+ed48 slxt RXF_FRRDF
+ed49 srxt RXF_FRRDF
+ed50 tdcet RXE_FRRD
+ed51 tdget RXE_FRRD
+ed54 tdcdt RXE_FRRD
+ed55 tdgdt RXE_FRRD
+ed58 tdcxt RXE_FRRD
+ed59 tdgxt RXE_FRRD
+ed64 ley RXY_FRRD
+ed65 ldy RXY_FRRD
+ed66 stey RXY_FRRD
+ed67 stdy RXY_FRRD
+eda8 czdt RSL_LRDFU
+eda9 czxt RSL_LRDFU
+edaa cdzt RSL_LRDFU
+edab cxzt RSL_LRDFU
+edac cpdt RSL_LRDFU
+edad cpxt RSL_LRDFU
+edae cdpt RSL_LRDFU
+edaf cxpt RSL_LRDFU
+ee plo SS_RRRDRD2
+ef lmd SS_RRRDRD3
+f0 srp SS_LIRDRD
+f1 mvo SS_LLRDRD
+f2 pack SS_LLRDRD
+f3 unpk SS_LLRDRD
+f8 zap SS_LLRDRD
+f9 cp SS_LLRDRD
+fa ap SS_LLRDRD
+fb sp SS_LLRDRD
+fc mp SS_LLRDRD
+fd dp SS_LLRDRD