summaryrefslogtreecommitdiffstats
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/Kconfig12
-rw-r--r--arch/s390/Makefile6
-rw-r--r--arch/s390/appldata/appldata_base.c4
-rw-r--r--arch/s390/boot/ipl_parm.c10
-rw-r--r--arch/s390/boot/startup.c49
-rw-r--r--arch/s390/boot/vmem.c36
-rw-r--r--arch/s390/configs/compat.config3
-rw-r--r--arch/s390/configs/debug_defconfig18
-rw-r--r--arch/s390/configs/defconfig19
-rw-r--r--arch/s390/configs/zfcpdump_defconfig6
-rw-r--r--arch/s390/crypto/chacha-glue.c2
-rw-r--r--arch/s390/hypfs/inode.c4
-rw-r--r--arch/s390/include/asm/ap.h21
-rw-r--r--arch/s390/include/asm/asm-extable.h27
-rw-r--r--arch/s390/include/asm/bitops.h10
-rw-r--r--arch/s390/include/asm/ctl_reg.h146
-rw-r--r--arch/s390/include/asm/ctlreg.h255
-rw-r--r--arch/s390/include/asm/dwarf.h1
-rw-r--r--arch/s390/include/asm/facility.h6
-rw-r--r--arch/s390/include/asm/fault.h28
-rw-r--r--arch/s390/include/asm/fpu/api.h35
-rw-r--r--arch/s390/include/asm/fpu/internal.h11
-rw-r--r--arch/s390/include/asm/gmap.h2
-rw-r--r--arch/s390/include/asm/irq.h23
-rw-r--r--arch/s390/include/asm/irq_work.h2
-rw-r--r--arch/s390/include/asm/jump_label.h4
-rw-r--r--arch/s390/include/asm/kprobes.h5
-rw-r--r--arch/s390/include/asm/kvm_host.h9
-rw-r--r--arch/s390/include/asm/lowcore.h15
-rw-r--r--arch/s390/include/asm/mmu.h7
-rw-r--r--arch/s390/include/asm/mmu_context.h12
-rw-r--r--arch/s390/include/asm/page-states.h59
-rw-r--r--arch/s390/include/asm/page.h1
-rw-r--r--arch/s390/include/asm/pci.h11
-rw-r--r--arch/s390/include/asm/pci_clp.h3
-rw-r--r--arch/s390/include/asm/pci_dma.h121
-rw-r--r--arch/s390/include/asm/pgalloc.h1
-rw-r--r--arch/s390/include/asm/pgtable.h24
-rw-r--r--arch/s390/include/asm/processor.h35
-rw-r--r--arch/s390/include/asm/setup.h5
-rw-r--r--arch/s390/include/asm/smp.h1
-rw-r--r--arch/s390/include/asm/stacktrace.h7
-rw-r--r--arch/s390/include/asm/sysinfo.h4
-rw-r--r--arch/s390/include/asm/tlb.h13
-rw-r--r--arch/s390/include/asm/uaccess.h1
-rw-r--r--arch/s390/include/asm/word-at-a-time.h64
-rw-r--r--arch/s390/kernel/Makefile4
-rw-r--r--arch/s390/kernel/cache.c1
-rw-r--r--arch/s390/kernel/compat_signal.c16
-rw-r--r--arch/s390/kernel/crash_dump.c11
-rw-r--r--arch/s390/kernel/ctlreg.c121
-rw-r--r--arch/s390/kernel/debug.c1
-rw-r--r--arch/s390/kernel/diag.c3
-rw-r--r--arch/s390/kernel/early.c33
-rw-r--r--arch/s390/kernel/entry.S1
-rw-r--r--arch/s390/kernel/facility.c21
-rw-r--r--arch/s390/kernel/fpu.c13
-rw-r--r--arch/s390/kernel/guarded_storage.c6
-rw-r--r--arch/s390/kernel/ipl.c2
-rw-r--r--arch/s390/kernel/irq.c4
-rw-r--r--arch/s390/kernel/kprobes.c21
-rw-r--r--arch/s390/kernel/machine_kexec.c8
-rw-r--r--arch/s390/kernel/nmi.c34
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c2
-rw-r--r--arch/s390/kernel/perf_event.c41
-rw-r--r--arch/s390/kernel/perf_pai_crypto.c233
-rw-r--r--arch/s390/kernel/perf_pai_ext.c98
-rw-r--r--arch/s390/kernel/perf_regs.c6
-rw-r--r--arch/s390/kernel/process.c2
-rw-r--r--arch/s390/kernel/processor.c7
-rw-r--r--arch/s390/kernel/ptrace.c76
-rw-r--r--arch/s390/kernel/setup.c52
-rw-r--r--arch/s390/kernel/signal.c13
-rw-r--r--arch/s390/kernel/smp.c140
-rw-r--r--arch/s390/kernel/stacktrace.c43
-rw-r--r--arch/s390/kernel/syscalls/syscall.tbl11
-rw-r--r--arch/s390/kernel/sysinfo.c10
-rw-r--r--arch/s390/kernel/time.c4
-rw-r--r--arch/s390/kernel/topology.c3
-rw-r--r--arch/s390/kernel/traps.c15
-rw-r--r--arch/s390/kernel/vdso32/Makefile12
-rw-r--r--arch/s390/kernel/vdso64/Makefile12
-rw-r--r--arch/s390/kernel/vdso64/vdso_user_wrapper.S2
-rw-r--r--arch/s390/kernel/vmlinux.lds.S1
-rw-r--r--arch/s390/kernel/vtime.c4
-rw-r--r--arch/s390/kvm/Kconfig5
-rw-r--r--arch/s390/kvm/gaccess.c52
-rw-r--r--arch/s390/kvm/guestdbg.c4
-rw-r--r--arch/s390/kvm/interrupt.c2
-rw-r--r--arch/s390/kvm/kvm-s390.c48
-rw-r--r--arch/s390/kvm/priv.c10
-rw-r--r--arch/s390/kvm/trace-s390.h23
-rw-r--r--arch/s390/kvm/vsie.c25
-rw-r--r--arch/s390/lib/test_unwind.c6
-rw-r--r--arch/s390/lib/uaccess.c15
-rw-r--r--arch/s390/mm/cmm.c1
-rw-r--r--arch/s390/mm/dump_pagetables.c2
-rw-r--r--arch/s390/mm/extable.c18
-rw-r--r--arch/s390/mm/fault.c465
-rw-r--r--arch/s390/mm/gmap.c174
-rw-r--r--arch/s390/mm/hugetlbpage.c6
-rw-r--r--arch/s390/mm/init.c8
-rw-r--r--arch/s390/mm/maccess.c2
-rw-r--r--arch/s390/mm/page-states.c206
-rw-r--r--arch/s390/mm/pageattr.c4
-rw-r--r--arch/s390/mm/pgalloc.c304
-rw-r--r--arch/s390/mm/pgtable.c31
-rw-r--r--arch/s390/mm/vmem.c17
-rw-r--r--arch/s390/net/bpf_jit_comp.c380
-rw-r--r--arch/s390/pci/Makefile2
-rw-r--r--arch/s390/pci/pci.c39
-rw-r--r--arch/s390/pci/pci_bus.c5
-rw-r--r--arch/s390/pci/pci_debug.c12
-rw-r--r--arch/s390/pci/pci_dma.c746
-rw-r--r--arch/s390/pci/pci_event.c17
-rw-r--r--arch/s390/pci/pci_sysfs.c19
-rw-r--r--arch/s390/tools/gen_facilities.c1
117 files changed, 2157 insertions, 2717 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index bd4782f23..fe565f3a3 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -123,7 +123,6 @@ config S390
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF
select ARCH_USE_SYM_ANNOTATIONS
- select ARCH_WANTS_DYNAMIC_TASK_STRUCT
select ARCH_WANTS_NO_INSTR
select ARCH_WANT_DEFAULT_BPF_JIT
select ARCH_WANT_IPC_PARSE_VERSION
@@ -131,6 +130,7 @@ config S390
select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
select BUILDTIME_TABLE_SORT
select CLONE_BACKWARDS2
+ select DCACHE_WORD_ACCESS if !KMSAN
select DMA_OPS if PCI
select DYNAMIC_FTRACE if FUNCTION_TRACER
select FUNCTION_ALIGNMENT_8B if CC_IS_GCC
@@ -145,7 +145,7 @@ config S390
select GENERIC_TIME_VSYSCALL
select GENERIC_VDSO_TIME_NS
select GENERIC_IOREMAP if PCI
- select HAVE_ALIGNED_STRUCT_PAGE if SLUB
+ select HAVE_ALIGNED_STRUCT_PAGE
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_JUMP_LABEL_RELATIVE
@@ -216,7 +216,6 @@ config S390
select HAVE_VIRT_CPU_ACCOUNTING_IDLE
select IOMMU_HELPER if PCI
select IOMMU_SUPPORT if PCI
- select KEXEC
select MMU_GATHER_MERGE_VMAS
select MMU_GATHER_NO_GATHER
select MMU_GATHER_RCU_TABLE_FREE
@@ -235,6 +234,7 @@ config S390
select THREAD_INFO_IN_TASK
select TRACE_IRQFLAGS_SUPPORT
select TTY
+ select USER_STACKTRACE_SUPPORT
select VIRT_CPU_ACCOUNTING
select ZONE_DMA
# Note: keep the above list sorted alphabetically
@@ -442,7 +442,7 @@ config COMMAND_LINE_SIZE
line.
config COMPAT
- def_bool y
+ def_bool n
prompt "Kernel support for 31 bit emulation"
select ARCH_WANT_OLD_COMPAT_IPC
select COMPAT_OLD_SIGACTION
@@ -453,7 +453,9 @@ config COMPAT
Select this option if you want to enable your system kernel to
handle system-calls from ELF binaries for 31 bit ESA. This option
(and some other stuff like libraries and such) is needed for
- executing 31 bit applications. It is safe to say "Y".
+ executing 31 bit applications.
+
+ If unsure say N.
config SMP
def_bool y
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index a53a36ee0..73873e451 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -138,9 +138,6 @@ bzImage: vmlinux
zfcpdump:
$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
-vdso_install:
- $(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso64 $@
-
archheaders:
$(Q)$(MAKE) $(build)=$(syscalls) uapi
@@ -160,6 +157,9 @@ vdso_prepare: prepare0
$(if $(CONFIG_COMPAT),$(Q)$(MAKE) \
$(build)=arch/s390/kernel/vdso32 include/generated/vdso32-offsets.h)
+vdso-install-y += arch/s390/kernel/vdso64/vdso64.so.dbg
+vdso-install-$(CONFIG_COMPAT) += arch/s390/kernel/vdso32/vdso32.so.dbg
+
ifdef CONFIG_EXPOLINE_EXTERN
modules_prepare: expoline_prepare
expoline_prepare: scripts
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index 3b0994625..c2978cb03 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -63,7 +63,6 @@ static struct ctl_table appldata_table[] = {
.mode = S_IRUGO | S_IWUSR,
.proc_handler = appldata_interval_handler,
},
- { },
};
/*
@@ -351,8 +350,7 @@ int appldata_register_ops(struct appldata_ops *ops)
if (ops->size > APPLDATA_MAX_REC_SIZE)
return -EINVAL;
- /* The last entry must be an empty one */
- ops->ctl_table = kcalloc(2, sizeof(struct ctl_table), GFP_KERNEL);
+ ops->ctl_table = kcalloc(1, sizeof(struct ctl_table), GFP_KERNEL);
if (!ops->ctl_table)
return -ENOMEM;
diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c
index 7b7521762..b24de9aab 100644
--- a/arch/s390/boot/ipl_parm.c
+++ b/arch/s390/boot/ipl_parm.c
@@ -3,6 +3,7 @@
#include <linux/init.h>
#include <linux/ctype.h>
#include <linux/pgtable.h>
+#include <asm/page-states.h>
#include <asm/ebcdic.h>
#include <asm/sclp.h>
#include <asm/sections.h>
@@ -24,6 +25,7 @@ unsigned int __bootdata_preserved(zlib_dfltcc_support) = ZLIB_DFLTCC_FULL;
struct ipl_parameter_block __bootdata_preserved(ipl_block);
int __bootdata_preserved(ipl_block_valid);
int __bootdata_preserved(__kaslr_enabled);
+int __bootdata_preserved(cmma_flag) = 1;
unsigned long vmalloc_size = VMALLOC_DEFAULT_SIZE;
unsigned long memory_limit;
@@ -272,7 +274,7 @@ void parse_boot_command_line(void)
memory_limit = round_down(memparse(val, NULL), PAGE_SIZE);
if (!strcmp(param, "vmalloc") && val) {
- vmalloc_size = round_up(memparse(val, NULL), PAGE_SIZE);
+ vmalloc_size = round_up(memparse(val, NULL), _SEGMENT_SIZE);
vmalloc_size_set = 1;
}
@@ -295,6 +297,12 @@ void parse_boot_command_line(void)
if (!strcmp(param, "nokaslr"))
__kaslr_enabled = 0;
+ if (!strcmp(param, "cmma")) {
+ rc = kstrtobool(val, &enabled);
+ if (!rc && !enabled)
+ cmma_flag = 0;
+ }
+
#if IS_ENABLED(CONFIG_KVM)
if (!strcmp(param, "prot_virt")) {
rc = kstrtobool(val, &enabled);
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index d3e48bd9c..9cc76e631 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/string.h>
#include <linux/elf.h>
+#include <asm/page-states.h>
#include <asm/boot_data.h>
#include <asm/sections.h>
#include <asm/maccess.h>
@@ -49,7 +50,7 @@ static void detect_facilities(void)
{
if (test_facility(8)) {
machine.has_edat1 = 1;
- __ctl_set_bit(0, 23);
+ local_ctl_set_bit(0, CR0_EDAT_BIT);
}
if (test_facility(78))
machine.has_edat2 = 1;
@@ -57,6 +58,48 @@ static void detect_facilities(void)
machine.has_nx = 1;
}
+static int cmma_test_essa(void)
+{
+ unsigned long reg1, reg2, tmp = 0;
+ int rc = 1;
+ psw_t old;
+
+ /* Test ESSA_GET_STATE */
+ asm volatile(
+ " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n"
+ " epsw %[reg1],%[reg2]\n"
+ " st %[reg1],0(%[psw_pgm])\n"
+ " st %[reg2],4(%[psw_pgm])\n"
+ " larl %[reg1],1f\n"
+ " stg %[reg1],8(%[psw_pgm])\n"
+ " .insn rrf,0xb9ab0000,%[tmp],%[tmp],%[cmd],0\n"
+ " la %[rc],0\n"
+ "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n"
+ : [reg1] "=&d" (reg1),
+ [reg2] "=&a" (reg2),
+ [rc] "+&d" (rc),
+ [tmp] "=&d" (tmp),
+ "+Q" (S390_lowcore.program_new_psw),
+ "=Q" (old)
+ : [psw_old] "a" (&old),
+ [psw_pgm] "a" (&S390_lowcore.program_new_psw),
+ [cmd] "i" (ESSA_GET_STATE)
+ : "cc", "memory");
+ return rc;
+}
+
+static void cmma_init(void)
+{
+ if (!cmma_flag)
+ return;
+ if (cmma_test_essa()) {
+ cmma_flag = 0;
+ return;
+ }
+ if (test_facility(147))
+ cmma_flag = 2;
+}
+
static void setup_lpp(void)
{
S390_lowcore.current_pid = 0;
@@ -212,7 +255,8 @@ static unsigned long setup_kernel_memory_layout(void)
VMALLOC_END = MODULES_VADDR;
/* allow vmalloc area to occupy up to about 1/2 of the rest virtual space left */
- vmalloc_size = min(vmalloc_size, round_down(VMALLOC_END / 2, _REGION3_SIZE));
+ vsize = round_down(VMALLOC_END / 2, _SEGMENT_SIZE);
+ vmalloc_size = min(vmalloc_size, vsize);
VMALLOC_START = VMALLOC_END - vmalloc_size;
/* split remaining virtual space between 1:1 mapping & vmemmap array */
@@ -306,6 +350,7 @@ void startup_kernel(void)
setup_boot_command_line();
parse_boot_command_line();
detect_facilities();
+ cmma_init();
sanitize_prot_virt_host();
max_physmem_end = detect_max_physmem_end();
setup_ident_map_size(max_physmem_end);
diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c
index 442a74f11..47e1f54c5 100644
--- a/arch/s390/boot/vmem.c
+++ b/arch/s390/boot/vmem.c
@@ -2,16 +2,18 @@
#include <linux/sched/task.h>
#include <linux/pgtable.h>
#include <linux/kasan.h>
+#include <asm/page-states.h>
#include <asm/pgalloc.h>
#include <asm/facility.h>
#include <asm/sections.h>
+#include <asm/ctlreg.h>
#include <asm/physmem_info.h>
#include <asm/maccess.h>
#include <asm/abs_lowcore.h>
#include "decompressor.h"
#include "boot.h"
-unsigned long __bootdata_preserved(s390_invalid_asce);
+struct ctlreg __bootdata_preserved(s390_invalid_asce);
#ifdef CONFIG_PROC_FS
atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]);
@@ -69,6 +71,10 @@ static void kasan_populate_shadow(void)
crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z));
crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z));
memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE);
+ __arch_set_page_dat(kasan_early_shadow_p4d, 1UL << CRST_ALLOC_ORDER);
+ __arch_set_page_dat(kasan_early_shadow_pud, 1UL << CRST_ALLOC_ORDER);
+ __arch_set_page_dat(kasan_early_shadow_pmd, 1UL << CRST_ALLOC_ORDER);
+ __arch_set_page_dat(kasan_early_shadow_pte, 1);
/*
* Current memory layout:
@@ -166,8 +172,6 @@ static bool kasan_pmd_populate_zero_shadow(pmd_t *pmd, unsigned long addr,
static bool kasan_pte_populate_zero_shadow(pte_t *pte, enum populate_mode mode)
{
- pte_t entry;
-
if (mode == POPULATE_KASAN_ZERO_SHADOW) {
set_pte(pte, pte_z);
return true;
@@ -224,6 +228,7 @@ static void *boot_crst_alloc(unsigned long val)
table = (unsigned long *)physmem_alloc_top_down(RR_VMEM, size, size);
crst_table_init(table, val);
+ __arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER);
return table;
}
@@ -239,6 +244,7 @@ static pte_t *boot_pte_alloc(void)
if (!pte_leftover) {
pte_leftover = (void *)physmem_alloc_top_down(RR_VMEM, PAGE_SIZE, PAGE_SIZE);
pte = pte_leftover + _PAGE_TABLE_SIZE;
+ __arch_set_page_dat(pte, 1);
} else {
pte = pte_leftover;
pte_leftover = NULL;
@@ -360,7 +366,7 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e
}
pmd = boot_crst_alloc(_SEGMENT_ENTRY_EMPTY);
pud_populate(&init_mm, pud, pmd);
- } else if (pud_large(*pud)) {
+ } else if (pud_leaf(*pud)) {
continue;
}
pgtable_pmd_populate(pud, addr, next, mode);
@@ -419,6 +425,14 @@ void setup_vmem(unsigned long asce_limit)
unsigned long asce_bits;
int i;
+ /*
+ * Mark whole memory as no-dat. This must be done before any
+ * page tables are allocated, or kernel image builtin pages
+ * are marked as dat tables.
+ */
+ for_each_physmem_online_range(i, &start, &end)
+ __arch_set_page_nodat((void *)start, (end - start) >> PAGE_SHIFT);
+
if (asce_limit == _REGION1_SIZE) {
asce_type = _REGION2_ENTRY_EMPTY;
asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
@@ -426,10 +440,12 @@ void setup_vmem(unsigned long asce_limit)
asce_type = _REGION3_ENTRY_EMPTY;
asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
}
- s390_invalid_asce = invalid_pg_dir | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
+ s390_invalid_asce.val = invalid_pg_dir | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
crst_table_init((unsigned long *)swapper_pg_dir, asce_type);
crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY);
+ __arch_set_page_dat((void *)swapper_pg_dir, 1UL << CRST_ALLOC_ORDER);
+ __arch_set_page_dat((void *)invalid_pg_dir, 1UL << CRST_ALLOC_ORDER);
/*
* To allow prefixing the lowcore must be mapped with 4KB pages.
@@ -447,12 +463,12 @@ void setup_vmem(unsigned long asce_limit)
kasan_populate_shadow();
- S390_lowcore.kernel_asce = swapper_pg_dir | asce_bits;
+ S390_lowcore.kernel_asce.val = swapper_pg_dir | asce_bits;
S390_lowcore.user_asce = s390_invalid_asce;
- __ctl_load(S390_lowcore.kernel_asce, 1, 1);
- __ctl_load(S390_lowcore.user_asce, 7, 7);
- __ctl_load(S390_lowcore.kernel_asce, 13, 13);
+ local_ctl_load(1, &S390_lowcore.kernel_asce);
+ local_ctl_load(7, &S390_lowcore.user_asce);
+ local_ctl_load(13, &S390_lowcore.kernel_asce);
- init_mm.context.asce = S390_lowcore.kernel_asce;
+ init_mm.context.asce = S390_lowcore.kernel_asce.val;
}
diff --git a/arch/s390/configs/compat.config b/arch/s390/configs/compat.config
new file mode 100644
index 000000000..6fd051453
--- /dev/null
+++ b/arch/s390/configs/compat.config
@@ -0,0 +1,3 @@
+# Help: Enable compat support
+CONFIG_COMPAT=y
+CONFIG_COMPAT_32BIT_TIME=y
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index dd0608629..c924be0d7 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -40,12 +40,12 @@ CONFIG_SCHED_AUTOGROUP=y
CONFIG_EXPERT=y
# CONFIG_SYSFS_SYSCALL is not set
CONFIG_PROFILING=y
+CONFIG_KEXEC=y
CONFIG_KEXEC_FILE=y
CONFIG_KEXEC_SIG=y
CONFIG_CRASH_DUMP=y
CONFIG_LIVEPATCH=y
-CONFIG_MARCH_ZEC12=y
-CONFIG_TUNE_ZEC12=y
+CONFIG_MARCH_Z13=y
CONFIG_NR_CPUS=512
CONFIG_NUMA=y
CONFIG_HZ_100=y
@@ -76,7 +76,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_MODULE_UNLOAD_TAINT_TRACKING=y
CONFIG_MODVERSIONS=y
CONFIG_MODULE_SRCVERSION_ALL=y
-CONFIG_MODULE_SIG_SHA256=y
CONFIG_BLK_DEV_THROTTLING=y
CONFIG_BLK_WBT=y
CONFIG_BLK_CGROUP_IOLATENCY=y
@@ -93,6 +92,7 @@ CONFIG_UNIXWARE_DISKLABEL=y
CONFIG_IOSCHED_BFQ=y
CONFIG_BINFMT_MISC=m
CONFIG_ZSWAP=y
+CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y
CONFIG_ZSMALLOC_STAT=y
CONFIG_SLUB_STATS=y
# CONFIG_COMPAT_BRK is not set
@@ -118,7 +118,6 @@ CONFIG_UNIX=y
CONFIG_UNIX_DIAG=m
CONFIG_XFRM_USER=m
CONFIG_NET_KEY=m
-CONFIG_SMC=m
CONFIG_SMC_DIAG=m
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
@@ -437,9 +436,6 @@ CONFIG_SCSI_DH_ALUA=m
CONFIG_MD=y
CONFIG_BLK_DEV_MD=y
# CONFIG_MD_BITMAP_FILE is not set
-CONFIG_MD_LINEAR=m
-CONFIG_MD_MULTIPATH=m
-CONFIG_MD_FAULTY=m
CONFIG_MD_CLUSTER=m
CONFIG_BCACHE=m
CONFIG_BLK_DEV_DM=y
@@ -619,6 +615,9 @@ CONFIG_BTRFS_FS_POSIX_ACL=y
CONFIG_BTRFS_DEBUG=y
CONFIG_BTRFS_ASSERT=y
CONFIG_NILFS2_FS=m
+CONFIG_BCACHEFS_FS=y
+CONFIG_BCACHEFS_QUOTA=y
+CONFIG_BCACHEFS_POSIX_ACL=y
CONFIG_FS_DAX=y
CONFIG_EXPORTFS_BLOCK_OPS=y
CONFIG_FS_ENCRYPTION=y
@@ -636,7 +635,7 @@ CONFIG_CUSE=m
CONFIG_VIRTIO_FS=m
CONFIG_OVERLAY_FS=m
CONFIG_NETFS_STATS=y
-CONFIG_FSCACHE=m
+CONFIG_FSCACHE=y
CONFIG_CACHEFILES=m
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
@@ -691,7 +690,6 @@ CONFIG_PERSISTENT_KEYRINGS=y
CONFIG_ENCRYPTED_KEYS=m
CONFIG_KEY_NOTIFICATIONS=y
CONFIG_SECURITY=y
-CONFIG_SECURITY_NETWORK=y
CONFIG_HARDENED_USERCOPY=y
CONFIG_FORTIFY_SOURCE=y
CONFIG_SECURITY_SELINUX=y
@@ -707,7 +705,6 @@ CONFIG_IMA_DEFAULT_HASH_SHA256=y
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA_APPRAISE=y
CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor"
-CONFIG_INIT_STACK_NONE=y
CONFIG_BUG_ON_DATA_CORRUPTION=y
CONFIG_CRYPTO_USER=m
# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
@@ -737,7 +734,6 @@ CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_ARC4=m
-CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_HCTR2=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_LRW=m
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index 1b8150e50..c8f0c9fe4 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -38,12 +38,12 @@ CONFIG_SCHED_AUTOGROUP=y
CONFIG_EXPERT=y
# CONFIG_SYSFS_SYSCALL is not set
CONFIG_PROFILING=y
+CONFIG_KEXEC=y
CONFIG_KEXEC_FILE=y
CONFIG_KEXEC_SIG=y
CONFIG_CRASH_DUMP=y
CONFIG_LIVEPATCH=y
-CONFIG_MARCH_ZEC12=y
-CONFIG_TUNE_ZEC12=y
+CONFIG_MARCH_Z13=y
CONFIG_NR_CPUS=512
CONFIG_NUMA=y
CONFIG_HZ_100=y
@@ -71,7 +71,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_MODULE_UNLOAD_TAINT_TRACKING=y
CONFIG_MODVERSIONS=y
CONFIG_MODULE_SRCVERSION_ALL=y
-CONFIG_MODULE_SIG_SHA256=y
CONFIG_BLK_DEV_THROTTLING=y
CONFIG_BLK_WBT=y
CONFIG_BLK_CGROUP_IOLATENCY=y
@@ -88,6 +87,7 @@ CONFIG_UNIXWARE_DISKLABEL=y
CONFIG_IOSCHED_BFQ=y
CONFIG_BINFMT_MISC=m
CONFIG_ZSWAP=y
+CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y
CONFIG_ZSMALLOC_STAT=y
# CONFIG_COMPAT_BRK is not set
CONFIG_MEMORY_HOTPLUG=y
@@ -109,7 +109,6 @@ CONFIG_UNIX=y
CONFIG_UNIX_DIAG=m
CONFIG_XFRM_USER=m
CONFIG_NET_KEY=m
-CONFIG_SMC=m
CONFIG_SMC_DIAG=m
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
@@ -427,9 +426,6 @@ CONFIG_SCSI_DH_ALUA=m
CONFIG_MD=y
CONFIG_BLK_DEV_MD=y
# CONFIG_MD_BITMAP_FILE is not set
-CONFIG_MD_LINEAR=m
-CONFIG_MD_MULTIPATH=m
-CONFIG_MD_FAULTY=m
CONFIG_MD_CLUSTER=m
CONFIG_BCACHE=m
CONFIG_BLK_DEV_DM=y
@@ -605,6 +601,9 @@ CONFIG_OCFS2_FS=m
CONFIG_BTRFS_FS=y
CONFIG_BTRFS_FS_POSIX_ACL=y
CONFIG_NILFS2_FS=m
+CONFIG_BCACHEFS_FS=m
+CONFIG_BCACHEFS_QUOTA=y
+CONFIG_BCACHEFS_POSIX_ACL=y
CONFIG_FS_DAX=y
CONFIG_EXPORTFS_BLOCK_OPS=y
CONFIG_FS_ENCRYPTION=y
@@ -621,7 +620,7 @@ CONFIG_CUSE=m
CONFIG_VIRTIO_FS=m
CONFIG_OVERLAY_FS=m
CONFIG_NETFS_STATS=y
-CONFIG_FSCACHE=m
+CONFIG_FSCACHE=y
CONFIG_CACHEFILES=m
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
@@ -677,7 +676,6 @@ CONFIG_PERSISTENT_KEYRINGS=y
CONFIG_ENCRYPTED_KEYS=m
CONFIG_KEY_NOTIFICATIONS=y
CONFIG_SECURITY=y
-CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_SELINUX=y
CONFIG_SECURITY_SELINUX_BOOTPARAM=y
CONFIG_SECURITY_LOCKDOWN_LSM=y
@@ -691,7 +689,6 @@ CONFIG_IMA_DEFAULT_HASH_SHA256=y
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA_APPRAISE=y
CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor"
-CONFIG_INIT_STACK_NONE=y
CONFIG_BUG_ON_DATA_CORRUPTION=y
CONFIG_CRYPTO_FIPS=y
CONFIG_CRYPTO_USER=m
@@ -722,11 +719,9 @@ CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_ARC4=m
-CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_HCTR2=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_LRW=m
-CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_AEGIS128=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index b831083b4..c51f3ec4e 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -8,10 +8,9 @@ CONFIG_BPF_SYSCALL=y
# CONFIG_NET_NS is not set
CONFIG_BLK_DEV_INITRD=y
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_KEXEC=y
CONFIG_CRASH_DUMP=y
-CONFIG_MARCH_ZEC12=y
-CONFIG_TUNE_ZEC12=y
-# CONFIG_COMPAT is not set
+CONFIG_MARCH_Z13=y
CONFIG_NR_CPUS=2
CONFIG_HZ_100=y
# CONFIG_CHSC_SCH is not set
@@ -66,7 +65,6 @@ CONFIG_ZFCP=y
# CONFIG_MISC_FILESYSTEMS is not set
# CONFIG_NETWORK_FILESYSTEMS is not set
CONFIG_LSM="yama,loadpin,safesetid,integrity"
-CONFIG_INIT_STACK_NONE=y
# CONFIG_ZLIB_DFLTCC is not set
CONFIG_XZ_DEC_MICROLZMA=y
CONFIG_PRINTK_TIME=y
diff --git a/arch/s390/crypto/chacha-glue.c b/arch/s390/crypto/chacha-glue.c
index 5fae187f9..ed9959e6f 100644
--- a/arch/s390/crypto/chacha-glue.c
+++ b/arch/s390/crypto/chacha-glue.c
@@ -82,7 +82,7 @@ void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src,
* it cannot handle a block of data or less, but otherwise
* it can handle data of arbitrary size
*/
- if (bytes <= CHACHA_BLOCK_SIZE || nrounds != 20 || !MACHINE_HAS_VX)
+ if (bytes <= CHACHA_BLOCK_SIZE || nrounds != 20 || !cpu_has_vx())
chacha_crypt_generic(state, dst, src, bytes, nrounds);
else
chacha20_crypt_s390(state, dst, src, bytes,
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index ada831499..858beaf4a 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -53,7 +53,7 @@ static void hypfs_update_update(struct super_block *sb)
struct inode *inode = d_inode(sb_info->update_file);
sb_info->last_update = ktime_get_seconds();
- inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
+ simple_inode_init_ts(inode);
}
/* directory tree removal functions */
@@ -101,7 +101,7 @@ static struct inode *hypfs_make_inode(struct super_block *sb, umode_t mode)
ret->i_mode = mode;
ret->i_uid = hypfs_info->uid;
ret->i_gid = hypfs_info->gid;
- ret->i_atime = ret->i_mtime = inode_set_ctime_current(ret);
+ simple_inode_init_ts(ret);
if (S_ISDIR(mode))
set_nlink(ret, 2);
}
diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h
index 40c2b82f0..43ac4a64f 100644
--- a/arch/s390/include/asm/ap.h
+++ b/arch/s390/include/asm/ap.h
@@ -88,7 +88,7 @@ static inline bool ap_instructions_available(void)
}
/* TAPQ register GR2 response struct */
-struct ap_tapq_gr2 {
+struct ap_tapq_hwinfo {
union {
unsigned long value;
struct {
@@ -96,11 +96,13 @@ struct ap_tapq_gr2 {
unsigned int apinfo : 32; /* ap type, ... */
};
struct {
- unsigned int s : 1; /* APSC */
- unsigned int m : 1; /* AP4KM */
- unsigned int c : 1; /* AP4KC */
- unsigned int mode : 3;
- unsigned int n : 1; /* APXA */
+ unsigned int apsc : 1; /* APSC */
+ unsigned int mex4k : 1; /* AP4KM */
+ unsigned int crt4k : 1; /* AP4KC */
+ unsigned int cca : 1; /* D */
+ unsigned int accel : 1; /* A */
+ unsigned int ep11 : 1; /* X */
+ unsigned int apxa : 1; /* APXA */
unsigned int : 1;
unsigned int class : 8;
unsigned int bs : 2; /* SE bind/assoc */
@@ -126,11 +128,12 @@ struct ap_tapq_gr2 {
/**
* ap_tapq(): Test adjunct processor queue.
* @qid: The AP queue number
- * @info: Pointer to queue descriptor
+ * @info: Pointer to tapq hwinfo struct
*
* Returns AP queue status structure.
*/
-static inline struct ap_queue_status ap_tapq(ap_qid_t qid, struct ap_tapq_gr2 *info)
+static inline struct ap_queue_status ap_tapq(ap_qid_t qid,
+ struct ap_tapq_hwinfo *info)
{
union ap_queue_status_reg reg1;
unsigned long reg2;
@@ -158,7 +161,7 @@ static inline struct ap_queue_status ap_tapq(ap_qid_t qid, struct ap_tapq_gr2 *i
* Returns AP queue status structure.
*/
static inline struct ap_queue_status ap_test_queue(ap_qid_t qid, int tbit,
- struct ap_tapq_gr2 *info)
+ struct ap_tapq_hwinfo *info)
{
if (tbit)
qid |= 1UL << 23; /* set T bit*/
diff --git a/arch/s390/include/asm/asm-extable.h b/arch/s390/include/asm/asm-extable.h
index e6532477f..4a6b0a8b6 100644
--- a/arch/s390/include/asm/asm-extable.h
+++ b/arch/s390/include/asm/asm-extable.h
@@ -13,6 +13,7 @@
#define EX_TYPE_UA_LOAD_MEM 4
#define EX_TYPE_UA_LOAD_REG 5
#define EX_TYPE_UA_LOAD_REGPAIR 6
+#define EX_TYPE_ZEROPAD 7
#define EX_DATA_REG_ERR_SHIFT 0
#define EX_DATA_REG_ERR GENMASK(3, 0)
@@ -23,16 +24,7 @@
#define EX_DATA_LEN_SHIFT 8
#define EX_DATA_LEN GENMASK(11, 8)
-#define __EX_TABLE(_section, _fault, _target, _type) \
- stringify_in_c(.section _section,"a";) \
- stringify_in_c(.balign 4;) \
- stringify_in_c(.long (_fault) - .;) \
- stringify_in_c(.long (_target) - .;) \
- stringify_in_c(.short (_type);) \
- stringify_in_c(.short 0;) \
- stringify_in_c(.previous)
-
-#define __EX_TABLE_UA(_section, _fault, _target, _type, _regerr, _regaddr, _len)\
+#define __EX_TABLE(_section, _fault, _target, _type, _regerr, _regaddr, _len) \
stringify_in_c(.section _section,"a";) \
stringify_in_c(.balign 4;) \
stringify_in_c(.long (_fault) - .;) \
@@ -72,21 +64,24 @@
stringify_in_c(.previous)
#define EX_TABLE(_fault, _target) \
- __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_FIXUP)
+ __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_FIXUP, __stringify(%%r0), __stringify(%%r0), 0)
#define EX_TABLE_AMODE31(_fault, _target) \
- __EX_TABLE(.amode31.ex_table, _fault, _target, EX_TYPE_FIXUP)
+ __EX_TABLE(.amode31.ex_table, _fault, _target, EX_TYPE_FIXUP, __stringify(%%r0), __stringify(%%r0), 0)
#define EX_TABLE_UA_STORE(_fault, _target, _regerr) \
- __EX_TABLE_UA(__ex_table, _fault, _target, EX_TYPE_UA_STORE, _regerr, _regerr, 0)
+ __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_STORE, _regerr, _regerr, 0)
#define EX_TABLE_UA_LOAD_MEM(_fault, _target, _regerr, _regmem, _len) \
- __EX_TABLE_UA(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_MEM, _regerr, _regmem, _len)
+ __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_MEM, _regerr, _regmem, _len)
#define EX_TABLE_UA_LOAD_REG(_fault, _target, _regerr, _regzero) \
- __EX_TABLE_UA(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_REG, _regerr, _regzero, 0)
+ __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_REG, _regerr, _regzero, 0)
#define EX_TABLE_UA_LOAD_REGPAIR(_fault, _target, _regerr, _regzero) \
- __EX_TABLE_UA(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_REGPAIR, _regerr, _regzero, 0)
+ __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_REGPAIR, _regerr, _regzero, 0)
+
+#define EX_TABLE_ZEROPAD(_fault, _target, _regdata, _regaddr) \
+ __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_ZEROPAD, _regdata, _regaddr, 0)
#endif /* __ASM_EXTABLE_H */
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index 2de74fcd0..c467dffa8 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -201,6 +201,16 @@ static inline void arch___clear_bit_unlock(unsigned long nr,
arch___clear_bit(nr, ptr);
}
+static inline bool arch_xor_unlock_is_negative_byte(unsigned long mask,
+ volatile unsigned long *ptr)
+{
+ unsigned long old;
+
+ old = __atomic64_xor_barrier(mask, (long *)ptr);
+ return old & BIT(7);
+}
+#define arch_xor_unlock_is_negative_byte arch_xor_unlock_is_negative_byte
+
#include <asm-generic/bitops/instrumented-atomic.h>
#include <asm-generic/bitops/instrumented-non-atomic.h>
#include <asm-generic/bitops/instrumented-lock.h>
diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
deleted file mode 100644
index adf7d8cda..000000000
--- a/arch/s390/include/asm/ctl_reg.h
+++ /dev/null
@@ -1,146 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright IBM Corp. 1999, 2009
- *
- * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
- */
-
-#ifndef __ASM_CTL_REG_H
-#define __ASM_CTL_REG_H
-
-#include <linux/bits.h>
-
-#define CR0_CLOCK_COMPARATOR_SIGN BIT(63 - 10)
-#define CR0_LOW_ADDRESS_PROTECTION BIT(63 - 35)
-#define CR0_FETCH_PROTECTION_OVERRIDE BIT(63 - 38)
-#define CR0_STORAGE_PROTECTION_OVERRIDE BIT(63 - 39)
-#define CR0_EMERGENCY_SIGNAL_SUBMASK BIT(63 - 49)
-#define CR0_EXTERNAL_CALL_SUBMASK BIT(63 - 50)
-#define CR0_CLOCK_COMPARATOR_SUBMASK BIT(63 - 52)
-#define CR0_CPU_TIMER_SUBMASK BIT(63 - 53)
-#define CR0_SERVICE_SIGNAL_SUBMASK BIT(63 - 54)
-#define CR0_UNUSED_56 BIT(63 - 56)
-#define CR0_INTERRUPT_KEY_SUBMASK BIT(63 - 57)
-#define CR0_MEASUREMENT_ALERT_SUBMASK BIT(63 - 58)
-
-#define CR14_UNUSED_32 BIT(63 - 32)
-#define CR14_UNUSED_33 BIT(63 - 33)
-#define CR14_CHANNEL_REPORT_SUBMASK BIT(63 - 35)
-#define CR14_RECOVERY_SUBMASK BIT(63 - 36)
-#define CR14_DEGRADATION_SUBMASK BIT(63 - 37)
-#define CR14_EXTERNAL_DAMAGE_SUBMASK BIT(63 - 38)
-#define CR14_WARNING_SUBMASK BIT(63 - 39)
-
-#ifndef __ASSEMBLY__
-
-#include <linux/bug.h>
-
-#define __ctl_load(array, low, high) do { \
- typedef struct { char _[sizeof(array)]; } addrtype; \
- \
- BUILD_BUG_ON(sizeof(addrtype) != (high - low + 1) * sizeof(long));\
- asm volatile( \
- " lctlg %1,%2,%0\n" \
- : \
- : "Q" (*(addrtype *)(&array)), "i" (low), "i" (high) \
- : "memory"); \
-} while (0)
-
-#define __ctl_store(array, low, high) do { \
- typedef struct { char _[sizeof(array)]; } addrtype; \
- \
- BUILD_BUG_ON(sizeof(addrtype) != (high - low + 1) * sizeof(long));\
- asm volatile( \
- " stctg %1,%2,%0\n" \
- : "=Q" (*(addrtype *)(&array)) \
- : "i" (low), "i" (high)); \
-} while (0)
-
-static __always_inline void __ctl_set_bit(unsigned int cr, unsigned int bit)
-{
- unsigned long reg;
-
- __ctl_store(reg, cr, cr);
- reg |= 1UL << bit;
- __ctl_load(reg, cr, cr);
-}
-
-static __always_inline void __ctl_clear_bit(unsigned int cr, unsigned int bit)
-{
- unsigned long reg;
-
- __ctl_store(reg, cr, cr);
- reg &= ~(1UL << bit);
- __ctl_load(reg, cr, cr);
-}
-
-void smp_ctl_set_clear_bit(int cr, int bit, bool set);
-
-static inline void ctl_set_bit(int cr, int bit)
-{
- smp_ctl_set_clear_bit(cr, bit, true);
-}
-
-static inline void ctl_clear_bit(int cr, int bit)
-{
- smp_ctl_set_clear_bit(cr, bit, false);
-}
-
-union ctlreg0 {
- unsigned long val;
- struct {
- unsigned long : 8;
- unsigned long tcx : 1; /* Transactional-Execution control */
- unsigned long pifo : 1; /* Transactional-Execution Program-
- Interruption-Filtering Override */
- unsigned long : 3;
- unsigned long ccc : 1; /* Cryptography counter control */
- unsigned long pec : 1; /* PAI extension control */
- unsigned long : 17;
- unsigned long : 3;
- unsigned long lap : 1; /* Low-address-protection control */
- unsigned long : 4;
- unsigned long edat : 1; /* Enhanced-DAT-enablement control */
- unsigned long : 2;
- unsigned long iep : 1; /* Instruction-Execution-Protection */
- unsigned long : 1;
- unsigned long afp : 1; /* AFP-register control */
- unsigned long vx : 1; /* Vector enablement control */
- unsigned long : 7;
- unsigned long sssm : 1; /* Service signal subclass mask */
- unsigned long : 9;
- };
-};
-
-union ctlreg2 {
- unsigned long val;
- struct {
- unsigned long : 33;
- unsigned long ducto : 25;
- unsigned long : 1;
- unsigned long gse : 1;
- unsigned long : 1;
- unsigned long tds : 1;
- unsigned long tdc : 2;
- };
-};
-
-union ctlreg5 {
- unsigned long val;
- struct {
- unsigned long : 33;
- unsigned long pasteo: 25;
- unsigned long : 6;
- };
-};
-
-union ctlreg15 {
- unsigned long val;
- struct {
- unsigned long lsea : 61;
- unsigned long : 3;
- };
-};
-
-#endif /* __ASSEMBLY__ */
-#endif /* __ASM_CTL_REG_H */
diff --git a/arch/s390/include/asm/ctlreg.h b/arch/s390/include/asm/ctlreg.h
new file mode 100644
index 000000000..72a9556d0
--- /dev/null
+++ b/arch/s390/include/asm/ctlreg.h
@@ -0,0 +1,255 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 1999, 2009
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef __ASM_S390_CTLREG_H
+#define __ASM_S390_CTLREG_H
+
+#include <linux/bits.h>
+
+#define CR0_TRANSACTIONAL_EXECUTION_BIT (63 - 8)
+#define CR0_CLOCK_COMPARATOR_SIGN_BIT (63 - 10)
+#define CR0_CRYPTOGRAPHY_COUNTER_BIT (63 - 13)
+#define CR0_PAI_EXTENSION_BIT (63 - 14)
+#define CR0_CPUMF_EXTRACTION_AUTH_BIT (63 - 15)
+#define CR0_WARNING_TRACK_BIT (63 - 30)
+#define CR0_LOW_ADDRESS_PROTECTION_BIT (63 - 35)
+#define CR0_FETCH_PROTECTION_OVERRIDE_BIT (63 - 38)
+#define CR0_STORAGE_PROTECTION_OVERRIDE_BIT (63 - 39)
+#define CR0_EDAT_BIT (63 - 40)
+#define CR0_INSTRUCTION_EXEC_PROTECTION_BIT (63 - 43)
+#define CR0_VECTOR_BIT (63 - 46)
+#define CR0_MALFUNCTION_ALERT_SUBMASK_BIT (63 - 48)
+#define CR0_EMERGENCY_SIGNAL_SUBMASK_BIT (63 - 49)
+#define CR0_EXTERNAL_CALL_SUBMASK_BIT (63 - 50)
+#define CR0_CLOCK_COMPARATOR_SUBMASK_BIT (63 - 52)
+#define CR0_CPU_TIMER_SUBMASK_BIT (63 - 53)
+#define CR0_SERVICE_SIGNAL_SUBMASK_BIT (63 - 54)
+#define CR0_UNUSED_56_BIT (63 - 56)
+#define CR0_INTERRUPT_KEY_SUBMASK_BIT (63 - 57)
+#define CR0_MEASUREMENT_ALERT_SUBMASK_BIT (63 - 58)
+#define CR0_ETR_SUBMASK_BIT (63 - 59)
+#define CR0_IUCV_BIT (63 - 62)
+
+#define CR0_TRANSACTIONAL_EXECUTION BIT(CR0_TRANSACTIONAL_EXECUTION_BIT)
+#define CR0_CLOCK_COMPARATOR_SIGN BIT(CR0_CLOCK_COMPARATOR_SIGN_BIT)
+#define CR0_CRYPTOGRAPHY_COUNTER BIT(CR0_CRYPTOGRAPHY_COUNTER_BIT)
+#define CR0_PAI_EXTENSION BIT(CR0_PAI_EXTENSION_BIT)
+#define CR0_CPUMF_EXTRACTION_AUTH BIT(CR0_CPUMF_EXTRACTION_AUTH_BIT)
+#define CR0_WARNING_TRACK BIT(CR0_WARNING_TRACK_BIT)
+#define CR0_LOW_ADDRESS_PROTECTION BIT(CR0_LOW_ADDRESS_PROTECTION_BIT)
+#define CR0_FETCH_PROTECTION_OVERRIDE BIT(CR0_FETCH_PROTECTION_OVERRIDE_BIT)
+#define CR0_STORAGE_PROTECTION_OVERRIDE BIT(CR0_STORAGE_PROTECTION_OVERRIDE_BIT)
+#define CR0_EDAT BIT(CR0_EDAT_BIT)
+#define CR0_INSTRUCTION_EXEC_PROTECTION BIT(CR0_INSTRUCTION_EXEC_PROTECTION_BIT)
+#define CR0_VECTOR BIT(CR0_VECTOR_BIT)
+#define CR0_MALFUNCTION_ALERT_SUBMASK BIT(CR0_MALFUNCTION_ALERT_SUBMASK_BIT)
+#define CR0_EMERGENCY_SIGNAL_SUBMASK BIT(CR0_EMERGENCY_SIGNAL_SUBMASK_BIT)
+#define CR0_EXTERNAL_CALL_SUBMASK BIT(CR0_EXTERNAL_CALL_SUBMASK_BIT)
+#define CR0_CLOCK_COMPARATOR_SUBMASK BIT(CR0_CLOCK_COMPARATOR_SUBMASK_BIT)
+#define CR0_CPU_TIMER_SUBMASK BIT(CR0_CPU_TIMER_SUBMASK_BIT)
+#define CR0_SERVICE_SIGNAL_SUBMASK BIT(CR0_SERVICE_SIGNAL_SUBMASK_BIT)
+#define CR0_UNUSED_56 BIT(CR0_UNUSED_56_BIT)
+#define CR0_INTERRUPT_KEY_SUBMASK BIT(CR0_INTERRUPT_KEY_SUBMASK_BIT)
+#define CR0_MEASUREMENT_ALERT_SUBMASK BIT(CR0_MEASUREMENT_ALERT_SUBMASK_BIT)
+#define CR0_ETR_SUBMASK BIT(CR0_ETR_SUBMASK_BIT)
+#define CR0_IUCV BIT(CR0_IUCV_BIT)
+
+#define CR2_MIO_ADDRESSING_BIT (63 - 58)
+#define CR2_GUARDED_STORAGE_BIT (63 - 59)
+
+#define CR2_MIO_ADDRESSING BIT(CR2_MIO_ADDRESSING_BIT)
+#define CR2_GUARDED_STORAGE BIT(CR2_GUARDED_STORAGE_BIT)
+
+#define CR14_UNUSED_32_BIT (63 - 32)
+#define CR14_UNUSED_33_BIT (63 - 33)
+#define CR14_CHANNEL_REPORT_SUBMASK_BIT (63 - 35)
+#define CR14_RECOVERY_SUBMASK_BIT (63 - 36)
+#define CR14_DEGRADATION_SUBMASK_BIT (63 - 37)
+#define CR14_EXTERNAL_DAMAGE_SUBMASK_BIT (63 - 38)
+#define CR14_WARNING_SUBMASK_BIT (63 - 39)
+
+#define CR14_UNUSED_32 BIT(CR14_UNUSED_32_BIT)
+#define CR14_UNUSED_33 BIT(CR14_UNUSED_33_BIT)
+#define CR14_CHANNEL_REPORT_SUBMASK BIT(CR14_CHANNEL_REPORT_SUBMASK_BIT)
+#define CR14_RECOVERY_SUBMASK BIT(CR14_RECOVERY_SUBMASK_BIT)
+#define CR14_DEGRADATION_SUBMASK BIT(CR14_DEGRADATION_SUBMASK_BIT)
+#define CR14_EXTERNAL_DAMAGE_SUBMASK BIT(CR14_EXTERNAL_DAMAGE_SUBMASK_BIT)
+#define CR14_WARNING_SUBMASK BIT(CR14_WARNING_SUBMASK_BIT)
+
+#ifndef __ASSEMBLY__
+
+#include <linux/bug.h>
+
+struct ctlreg {
+ unsigned long val;
+};
+
+#define __local_ctl_load(low, high, array) do { \
+ struct addrtype { \
+ char _[sizeof(array)]; \
+ }; \
+ int _high = high; \
+ int _low = low; \
+ int _esize; \
+ \
+ _esize = (_high - _low + 1) * sizeof(struct ctlreg); \
+ BUILD_BUG_ON(sizeof(struct addrtype) != _esize); \
+ typecheck(struct ctlreg, array[0]); \
+ asm volatile( \
+ " lctlg %[_low],%[_high],%[_arr]\n" \
+ : \
+ : [_arr] "Q" (*(struct addrtype *)(&array)), \
+ [_low] "i" (low), [_high] "i" (high) \
+ : "memory"); \
+} while (0)
+
+#define __local_ctl_store(low, high, array) do { \
+ struct addrtype { \
+ char _[sizeof(array)]; \
+ }; \
+ int _high = high; \
+ int _low = low; \
+ int _esize; \
+ \
+ _esize = (_high - _low + 1) * sizeof(struct ctlreg); \
+ BUILD_BUG_ON(sizeof(struct addrtype) != _esize); \
+ typecheck(struct ctlreg, array[0]); \
+ asm volatile( \
+ " stctg %[_low],%[_high],%[_arr]\n" \
+ : [_arr] "=Q" (*(struct addrtype *)(&array)) \
+ : [_low] "i" (low), [_high] "i" (high)); \
+} while (0)
+
+static __always_inline void local_ctl_load(unsigned int cr, struct ctlreg *reg)
+{
+ asm volatile(
+ " lctlg %[cr],%[cr],%[reg]\n"
+ :
+ : [reg] "Q" (*reg), [cr] "i" (cr)
+ : "memory");
+}
+
+static __always_inline void local_ctl_store(unsigned int cr, struct ctlreg *reg)
+{
+ asm volatile(
+ " stctg %[cr],%[cr],%[reg]\n"
+ : [reg] "=Q" (*reg)
+ : [cr] "i" (cr));
+}
+
+static __always_inline struct ctlreg local_ctl_set_bit(unsigned int cr, unsigned int bit)
+{
+ struct ctlreg new, old;
+
+ local_ctl_store(cr, &old);
+ new = old;
+ new.val |= 1UL << bit;
+ local_ctl_load(cr, &new);
+ return old;
+}
+
+static __always_inline struct ctlreg local_ctl_clear_bit(unsigned int cr, unsigned int bit)
+{
+ struct ctlreg new, old;
+
+ local_ctl_store(cr, &old);
+ new = old;
+ new.val &= ~(1UL << bit);
+ local_ctl_load(cr, &new);
+ return old;
+}
+
+struct lowcore;
+
+void system_ctlreg_lock(void);
+void system_ctlreg_unlock(void);
+void system_ctlreg_init_save_area(struct lowcore *lc);
+void system_ctlreg_modify(unsigned int cr, unsigned long data, int request);
+
+enum {
+ CTLREG_SET_BIT,
+ CTLREG_CLEAR_BIT,
+ CTLREG_LOAD,
+};
+
+static inline void system_ctl_set_bit(unsigned int cr, unsigned int bit)
+{
+ system_ctlreg_modify(cr, bit, CTLREG_SET_BIT);
+}
+
+static inline void system_ctl_clear_bit(unsigned int cr, unsigned int bit)
+{
+ system_ctlreg_modify(cr, bit, CTLREG_CLEAR_BIT);
+}
+
+static inline void system_ctl_load(unsigned int cr, struct ctlreg *reg)
+{
+ system_ctlreg_modify(cr, reg->val, CTLREG_LOAD);
+}
+
+union ctlreg0 {
+ unsigned long val;
+ struct ctlreg reg;
+ struct {
+ unsigned long : 8;
+ unsigned long tcx : 1; /* Transactional-Execution control */
+ unsigned long pifo : 1; /* Transactional-Execution Program-
+ Interruption-Filtering Override */
+ unsigned long : 3;
+ unsigned long ccc : 1; /* Cryptography counter control */
+ unsigned long pec : 1; /* PAI extension control */
+ unsigned long : 17;
+ unsigned long : 3;
+ unsigned long lap : 1; /* Low-address-protection control */
+ unsigned long : 4;
+ unsigned long edat : 1; /* Enhanced-DAT-enablement control */
+ unsigned long : 2;
+ unsigned long iep : 1; /* Instruction-Execution-Protection */
+ unsigned long : 1;
+ unsigned long afp : 1; /* AFP-register control */
+ unsigned long vx : 1; /* Vector enablement control */
+ unsigned long : 7;
+ unsigned long sssm : 1; /* Service signal subclass mask */
+ unsigned long : 9;
+ };
+};
+
+union ctlreg2 {
+ unsigned long val;
+ struct ctlreg reg;
+ struct {
+ unsigned long : 33;
+ unsigned long ducto : 25;
+ unsigned long : 1;
+ unsigned long gse : 1;
+ unsigned long : 1;
+ unsigned long tds : 1;
+ unsigned long tdc : 2;
+ };
+};
+
+union ctlreg5 {
+ unsigned long val;
+ struct ctlreg reg;
+ struct {
+ unsigned long : 33;
+ unsigned long pasteo: 25;
+ unsigned long : 6;
+ };
+};
+
+union ctlreg15 {
+ unsigned long val;
+ struct ctlreg reg;
+ struct {
+ unsigned long lsea : 61;
+ unsigned long : 3;
+ };
+};
+
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_S390_CTLREG_H */
diff --git a/arch/s390/include/asm/dwarf.h b/arch/s390/include/asm/dwarf.h
index 4f21ae561..390906b8e 100644
--- a/arch/s390/include/asm/dwarf.h
+++ b/arch/s390/include/asm/dwarf.h
@@ -9,6 +9,7 @@
#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset
#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset
#define CFI_RESTORE .cfi_restore
+#define CFI_REL_OFFSET .cfi_rel_offset
#ifdef CONFIG_AS_CFI_VAL_OFFSET
#define CFI_VAL_OFFSET .cfi_val_offset
diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h
index 94b691902..796007125 100644
--- a/arch/s390/include/asm/facility.h
+++ b/arch/s390/include/asm/facility.h
@@ -111,4 +111,10 @@ static inline void stfle(u64 *stfle_fac_list, int size)
preempt_enable();
}
+/**
+ * stfle_size - Actual size of the facility list as specified by stfle
+ * (number of double words)
+ */
+unsigned int stfle_size(void);
+
#endif /* __ASM_FACILITY_H */
diff --git a/arch/s390/include/asm/fault.h b/arch/s390/include/asm/fault.h
new file mode 100644
index 000000000..d326f5660
--- /dev/null
+++ b/arch/s390/include/asm/fault.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 1999, 2023
+ */
+#ifndef _ASM_S390_FAULT_H
+#define _ASM_S390_FAULT_H
+
+union teid {
+ unsigned long val;
+ struct {
+ unsigned long addr : 52; /* Translation-exception Address */
+ unsigned long fsi : 2; /* Access Exception Fetch/Store Indication */
+ unsigned long : 2;
+ unsigned long b56 : 1;
+ unsigned long : 3;
+ unsigned long b60 : 1;
+ unsigned long b61 : 1;
+ unsigned long as : 2; /* ASCE Identifier */
+ };
+};
+
+enum {
+ TEID_FSI_UNKNOWN = 0, /* Unknown whether fetch or store */
+ TEID_FSI_STORE = 1, /* Exception was due to store operation */
+ TEID_FSI_FETCH = 2 /* Exception was due to fetch operation */
+};
+
+#endif /* _ASM_S390_FAULT_H */
diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h
index 9acf48e53..d6ca8bc6c 100644
--- a/arch/s390/include/asm/fpu/api.h
+++ b/arch/s390/include/asm/fpu/api.h
@@ -46,26 +46,33 @@
#include <linux/preempt.h>
#include <asm/asm-extable.h>
+#include <asm/fpu/internal.h>
void save_fpu_regs(void);
void load_fpu_regs(void);
void __load_fpu_regs(void);
-static inline int test_fp_ctl(u32 fpc)
+/**
+ * sfpc_safe - Set floating point control register safely.
+ * @fpc: new value for floating point control register
+ *
+ * Set floating point control register. This may lead to an exception,
+ * since a saved value may have been modified by user space (ptrace,
+ * signal return, kvm registers) to an invalid value. In such a case
+ * set the floating point control register to zero.
+ */
+static inline void sfpc_safe(u32 fpc)
{
- u32 orig_fpc;
- int rc;
-
- asm volatile(
- " efpc %1\n"
- " sfpc %2\n"
- "0: sfpc %1\n"
- " la %0,0\n"
- "1:\n"
- EX_TABLE(0b,1b)
- : "=d" (rc), "=&d" (orig_fpc)
- : "d" (fpc), "0" (-EINVAL));
- return rc;
+ asm volatile("\n"
+ "0: sfpc %[fpc]\n"
+ "1: nopr %%r7\n"
+ ".pushsection .fixup, \"ax\"\n"
+ "2: lghi %[fpc],0\n"
+ " jg 0b\n"
+ ".popsection\n"
+ EX_TABLE(1b, 2b)
+ : [fpc] "+d" (fpc)
+ : : "memory");
}
#define KERNEL_FPC 1
diff --git a/arch/s390/include/asm/fpu/internal.h b/arch/s390/include/asm/fpu/internal.h
index bbdadb1c9..d511c4cf5 100644
--- a/arch/s390/include/asm/fpu/internal.h
+++ b/arch/s390/include/asm/fpu/internal.h
@@ -10,9 +10,14 @@
#define _ASM_S390_FPU_INTERNAL_H
#include <linux/string.h>
-#include <asm/ctl_reg.h>
+#include <asm/facility.h>
#include <asm/fpu/types.h>
+static inline bool cpu_has_vx(void)
+{
+ return likely(test_facility(129));
+}
+
static inline void save_vx_regs(__vector128 *vxrs)
{
asm volatile(
@@ -42,7 +47,7 @@ static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu)
{
fpregs->pad = 0;
fpregs->fpc = fpu->fpc;
- if (MACHINE_HAS_VX)
+ if (cpu_has_vx())
convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs);
else
memcpy((freg_t *)&fpregs->fprs, fpu->fprs,
@@ -52,7 +57,7 @@ static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu)
static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu)
{
fpu->fpc = fpregs->fpc;
- if (MACHINE_HAS_VX)
+ if (cpu_has_vx())
convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs);
else
memcpy(fpu->fprs, (freg_t *)&fpregs->fprs,
diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h
index 5cc46e0dd..9725586f4 100644
--- a/arch/s390/include/asm/gmap.h
+++ b/arch/s390/include/asm/gmap.h
@@ -146,7 +146,7 @@ int gmap_mprotect_notify(struct gmap *, unsigned long start,
void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4],
unsigned long gaddr, unsigned long vmaddr);
-int gmap_mark_unmergeable(void);
+int s390_disable_cow_sharing(void);
void s390_unlist_old_asce(struct gmap *gmap);
int s390_replace_asce(struct gmap *gmap);
void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns);
diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h
index 89902f754..54b42817f 100644
--- a/arch/s390/include/asm/irq.h
+++ b/arch/s390/include/asm/irq.h
@@ -31,6 +31,7 @@
#include <linux/percpu.h>
#include <linux/cache.h>
#include <linux/types.h>
+#include <asm/ctlreg.h>
enum interruption_class {
IRQEXT_CLK,
@@ -101,17 +102,17 @@ enum irq_subclass {
};
#define CR0_IRQ_SUBCLASS_MASK \
- ((1UL << (63 - 30)) /* Warning Track */ | \
- (1UL << (63 - 48)) /* Malfunction Alert */ | \
- (1UL << (63 - 49)) /* Emergency Signal */ | \
- (1UL << (63 - 50)) /* External Call */ | \
- (1UL << (63 - 52)) /* Clock Comparator */ | \
- (1UL << (63 - 53)) /* CPU Timer */ | \
- (1UL << (63 - 54)) /* Service Signal */ | \
- (1UL << (63 - 57)) /* Interrupt Key */ | \
- (1UL << (63 - 58)) /* Measurement Alert */ | \
- (1UL << (63 - 59)) /* Timing Alert */ | \
- (1UL << (63 - 62))) /* IUCV */
+ (CR0_WARNING_TRACK | \
+ CR0_MALFUNCTION_ALERT_SUBMASK | \
+ CR0_EMERGENCY_SIGNAL_SUBMASK | \
+ CR0_EXTERNAL_CALL_SUBMASK | \
+ CR0_CLOCK_COMPARATOR_SUBMASK | \
+ CR0_CPU_TIMER_SUBMASK | \
+ CR0_SERVICE_SIGNAL_SUBMASK | \
+ CR0_INTERRUPT_KEY_SUBMASK | \
+ CR0_MEASUREMENT_ALERT_SUBMASK | \
+ CR0_ETR_SUBMASK | \
+ CR0_IUCV)
void irq_subclass_register(enum irq_subclass subclass);
void irq_subclass_unregister(enum irq_subclass subclass);
diff --git a/arch/s390/include/asm/irq_work.h b/arch/s390/include/asm/irq_work.h
index 603783766..f00c9f610 100644
--- a/arch/s390/include/asm/irq_work.h
+++ b/arch/s390/include/asm/irq_work.h
@@ -7,6 +7,4 @@ static inline bool arch_irq_work_has_interrupt(void)
return true;
}
-void arch_irq_work_raise(void);
-
#endif /* _ASM_S390_IRQ_WORK_H */
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h
index 895f774bb..bf78cf381 100644
--- a/arch/s390/include/asm/jump_label.h
+++ b/arch/s390/include/asm/jump_label.h
@@ -25,7 +25,7 @@
*/
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
- asm_volatile_goto("0: brcl 0,%l[label]\n"
+ asm goto("0: brcl 0,%l[label]\n"
".pushsection __jump_table,\"aw\"\n"
".balign 8\n"
".long 0b-.,%l[label]-.\n"
@@ -39,7 +39,7 @@ label:
static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
{
- asm_volatile_goto("0: brcl 15,%l[label]\n"
+ asm goto("0: brcl 15,%l[label]\n"
".pushsection __jump_table,\"aw\"\n"
".balign 8\n"
".long 0b-.,%l[label]-.\n"
diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h
index 83f732ca3..01f1682a7 100644
--- a/arch/s390/include/asm/kprobes.h
+++ b/arch/s390/include/asm/kprobes.h
@@ -15,6 +15,7 @@
* <grundym@us.ibm.com>
*/
#include <linux/types.h>
+#include <asm/ctlreg.h>
#include <asm-generic/kprobes.h>
#define BREAKPOINT_INSTRUCTION 0x0002
@@ -65,15 +66,13 @@ struct prev_kprobe {
struct kprobe_ctlblk {
unsigned long kprobe_status;
unsigned long kprobe_saved_imask;
- unsigned long kprobe_saved_ctl[3];
+ struct ctlreg kprobe_saved_ctl[3];
struct prev_kprobe prev_kprobe;
};
void arch_remove_kprobe(struct kprobe *p);
int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
-int kprobe_exceptions_notify(struct notifier_block *self,
- unsigned long val, void *data);
#define flush_insn_slot(p) do { } while (0)
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 427f9528a..52664105a 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -777,6 +777,13 @@ struct kvm_vm_stat {
u64 inject_service_signal;
u64 inject_virtio;
u64 aen_forward;
+ u64 gmap_shadow_create;
+ u64 gmap_shadow_reuse;
+ u64 gmap_shadow_r1_entry;
+ u64 gmap_shadow_r2_entry;
+ u64 gmap_shadow_r3_entry;
+ u64 gmap_shadow_sg_entry;
+ u64 gmap_shadow_pg_entry;
};
struct kvm_arch_memory_slot {
@@ -811,7 +818,7 @@ struct s390_io_adapter {
struct kvm_s390_cpu_model {
/* facility mask supported by kvm & hosting machine */
- __u64 fac_mask[S390_ARCH_FAC_LIST_SIZE_U64];
+ __u64 fac_mask[S390_ARCH_FAC_MASK_SIZE_U64];
struct kvm_s390_vm_cpu_subfunc subfuncs;
/* facility list requested by guest (in dma page) */
__u64 *fac_list;
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 69ccc464a..5dc1b6345 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -11,6 +11,7 @@
#include <linux/types.h>
#include <asm/ptrace.h>
+#include <asm/ctlreg.h>
#include <asm/cpu.h>
#include <asm/types.h>
@@ -139,8 +140,8 @@ struct lowcore {
__u32 restart_flags; /* 0x0384 */
/* Address space pointer. */
- __u64 kernel_asce; /* 0x0388 */
- __u64 user_asce; /* 0x0390 */
+ struct ctlreg kernel_asce; /* 0x0388 */
+ struct ctlreg user_asce; /* 0x0390 */
/*
* The lpp and current_pid fields form a
@@ -199,7 +200,7 @@ struct lowcore {
__u32 clock_comp_save_area[2]; /* 0x1330 */
__u64 last_break_save_area; /* 0x1338 */
__u32 access_regs_save_area[16]; /* 0x1340 */
- __u64 cregs_save_area[16]; /* 0x1380 */
+ struct ctlreg cregs_save_area[16]; /* 0x1380 */
__u8 pad_0x1400[0x1500-0x1400]; /* 0x1400 */
/* Cryptography-counter designation */
__u64 ccd; /* 0x1500 */
@@ -221,12 +222,4 @@ static inline void set_prefix(__u32 address)
asm volatile("spx %0" : : "Q" (address) : "memory");
}
-static inline __u32 store_prefix(void)
-{
- __u32 address;
-
- asm volatile("stpx %0" : "=Q" (address));
- return address;
-}
-
#endif /* _ASM_S390_LOWCORE_H */
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index 829d68e2c..4c2dc7abc 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -11,7 +11,6 @@ typedef struct {
cpumask_t cpu_attach_mask;
atomic_t flush_count;
unsigned int flush_mm;
- struct list_head pgtable_list;
struct list_head gmap_list;
unsigned long gmap_asce;
unsigned long asce;
@@ -33,13 +32,17 @@ typedef struct {
unsigned int uses_skeys:1;
/* The mmu context uses CMM. */
unsigned int uses_cmm:1;
+ /*
+ * The mmu context allows COW-sharing of memory pages (KSM, zeropage).
+ * Note that COW-sharing during fork() is currently always allowed.
+ */
+ unsigned int allow_cow_sharing:1;
/* The gmaps associated with this context are allowed to use huge pages. */
unsigned int allow_gmap_hpage_1m:1;
} mm_context_t;
#define INIT_MM_CONTEXT(name) \
.context.lock = __SPIN_LOCK_UNLOCKED(name.context.lock), \
- .context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), \
.context.gmap_list = LIST_HEAD_INIT(name.context.gmap_list),
#endif
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 2a38af5a0..a7789a9f6 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -12,7 +12,7 @@
#include <linux/uaccess.h>
#include <linux/mm_types.h>
#include <asm/tlbflush.h>
-#include <asm/ctl_reg.h>
+#include <asm/ctlreg.h>
#include <asm-generic/mm_hooks.h>
#define init_new_context init_new_context
@@ -22,7 +22,6 @@ static inline int init_new_context(struct task_struct *tsk,
unsigned long asce_type, init_entry;
spin_lock_init(&mm->context.lock);
- INIT_LIST_HEAD(&mm->context.pgtable_list);
INIT_LIST_HEAD(&mm->context.gmap_list);
cpumask_clear(&mm->context.cpu_attach_mask);
atomic_set(&mm->context.flush_count, 0);
@@ -36,6 +35,7 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.has_pgste = 0;
mm->context.uses_skeys = 0;
mm->context.uses_cmm = 0;
+ mm->context.allow_cow_sharing = 1;
mm->context.allow_gmap_hpage_1m = 0;
#endif
switch (mm->context.asce_limit) {
@@ -78,10 +78,10 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *
if (next == &init_mm)
S390_lowcore.user_asce = s390_invalid_asce;
else
- S390_lowcore.user_asce = next->context.asce;
+ S390_lowcore.user_asce.val = next->context.asce;
cpumask_set_cpu(cpu, &next->context.cpu_attach_mask);
/* Clear previous user-ASCE from CR7 */
- __ctl_load(s390_invalid_asce, 7, 7);
+ local_ctl_load(7, &s390_invalid_asce);
if (prev != next)
cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask);
}
@@ -111,7 +111,7 @@ static inline void finish_arch_post_lock_switch(void)
__tlb_flush_mm_lazy(mm);
preempt_enable();
}
- __ctl_load(S390_lowcore.user_asce, 7, 7);
+ local_ctl_load(7, &S390_lowcore.user_asce);
}
#define activate_mm activate_mm
@@ -120,7 +120,7 @@ static inline void activate_mm(struct mm_struct *prev,
{
switch_mm(prev, next, current);
cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
- __ctl_load(S390_lowcore.user_asce, 7, 7);
+ local_ctl_load(7, &S390_lowcore.user_asce);
}
#include <asm-generic/mmu_context.h>
diff --git a/arch/s390/include/asm/page-states.h b/arch/s390/include/asm/page-states.h
index c33c4deb5..08fcbd628 100644
--- a/arch/s390/include/asm/page-states.h
+++ b/arch/s390/include/asm/page-states.h
@@ -7,6 +7,9 @@
#ifndef PAGE_STATES_H
#define PAGE_STATES_H
+#include <asm/sections.h>
+#include <asm/page.h>
+
#define ESSA_GET_STATE 0
#define ESSA_SET_STABLE 1
#define ESSA_SET_UNUSED 2
@@ -18,4 +21,60 @@
#define ESSA_MAX ESSA_SET_STABLE_NODAT
+extern int __bootdata_preserved(cmma_flag);
+
+static __always_inline unsigned long essa(unsigned long paddr, unsigned char cmd)
+{
+ unsigned long rc;
+
+ asm volatile(
+ " .insn rrf,0xb9ab0000,%[rc],%[paddr],%[cmd],0"
+ : [rc] "=d" (rc)
+ : [paddr] "d" (paddr),
+ [cmd] "i" (cmd));
+ return rc;
+}
+
+static __always_inline void __set_page_state(void *addr, unsigned long num_pages, unsigned char cmd)
+{
+ unsigned long paddr = __pa(addr) & PAGE_MASK;
+
+ while (num_pages--) {
+ essa(paddr, cmd);
+ paddr += PAGE_SIZE;
+ }
+}
+
+static inline void __set_page_unused(void *addr, unsigned long num_pages)
+{
+ __set_page_state(addr, num_pages, ESSA_SET_UNUSED);
+}
+
+static inline void __set_page_stable_dat(void *addr, unsigned long num_pages)
+{
+ __set_page_state(addr, num_pages, ESSA_SET_STABLE);
+}
+
+static inline void __set_page_stable_nodat(void *addr, unsigned long num_pages)
+{
+ __set_page_state(addr, num_pages, ESSA_SET_STABLE_NODAT);
+}
+
+static inline void __arch_set_page_nodat(void *addr, unsigned long num_pages)
+{
+ if (!cmma_flag)
+ return;
+ if (cmma_flag < 2)
+ __set_page_stable_dat(addr, num_pages);
+ else
+ __set_page_stable_nodat(addr, num_pages);
+}
+
+static inline void __arch_set_page_dat(void *addr, unsigned long num_pages)
+{
+ if (!cmma_flag)
+ return;
+ __set_page_stable_dat(addr, num_pages);
+}
+
#endif
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index cfec07433..73b9c3bf3 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -164,7 +164,6 @@ static inline int page_reset_referenced(unsigned long addr)
struct page;
void arch_free_page(struct page *page, int order);
void arch_alloc_page(struct page *page, int order);
-void arch_set_page_dat(struct page *page, int order);
static inline int devmem_is_allowed(unsigned long pfn)
{
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index b248694e0..e91cd6bbc 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -159,13 +159,6 @@ struct zpci_dev {
unsigned long *dma_table;
int tlb_refresh;
- spinlock_t iommu_bitmap_lock;
- unsigned long *iommu_bitmap;
- unsigned long *lazy_bitmap;
- unsigned long iommu_size;
- unsigned long iommu_pages;
- unsigned int next_bit;
-
struct iommu_device iommu_dev; /* IOMMU core handle */
char res_name[16];
@@ -180,10 +173,6 @@ struct zpci_dev {
struct zpci_fmb *fmb;
u16 fmb_update; /* update interval */
u16 fmb_length;
- /* software counters */
- atomic64_t allocated_pages;
- atomic64_t mapped_pages;
- atomic64_t unmapped_pages;
u8 version;
enum pci_bus_speed max_bus_speed;
diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h
index d6189ed14..f0c677ddd 100644
--- a/arch/s390/include/asm/pci_clp.h
+++ b/arch/s390/include/asm/pci_clp.h
@@ -50,6 +50,9 @@ struct clp_fh_list_entry {
#define CLP_UTIL_STR_LEN 64
#define CLP_PFIP_NR_SEGMENTS 4
+/* PCI function type numbers */
+#define PCI_FUNC_TYPE_ISM 0x5 /* ISM device */
+
extern bool zpci_unique_uid;
struct clp_rsp_slpc_pci {
diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h
index 7119c04c5..42d7cc426 100644
--- a/arch/s390/include/asm/pci_dma.h
+++ b/arch/s390/include/asm/pci_dma.h
@@ -82,117 +82,16 @@ enum zpci_ioat_dtype {
#define ZPCI_TABLE_VALID_MASK 0x20
#define ZPCI_TABLE_PROT_MASK 0x200
-static inline unsigned int calc_rtx(dma_addr_t ptr)
-{
- return ((unsigned long) ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK;
-}
-
-static inline unsigned int calc_sx(dma_addr_t ptr)
-{
- return ((unsigned long) ptr >> ZPCI_ST_SHIFT) & ZPCI_INDEX_MASK;
-}
-
-static inline unsigned int calc_px(dma_addr_t ptr)
-{
- return ((unsigned long) ptr >> PAGE_SHIFT) & ZPCI_PT_MASK;
-}
-
-static inline void set_pt_pfaa(unsigned long *entry, phys_addr_t pfaa)
-{
- *entry &= ZPCI_PTE_FLAG_MASK;
- *entry |= (pfaa & ZPCI_PTE_ADDR_MASK);
-}
-
-static inline void set_rt_sto(unsigned long *entry, phys_addr_t sto)
-{
- *entry &= ZPCI_RTE_FLAG_MASK;
- *entry |= (sto & ZPCI_RTE_ADDR_MASK);
- *entry |= ZPCI_TABLE_TYPE_RTX;
-}
-
-static inline void set_st_pto(unsigned long *entry, phys_addr_t pto)
-{
- *entry &= ZPCI_STE_FLAG_MASK;
- *entry |= (pto & ZPCI_STE_ADDR_MASK);
- *entry |= ZPCI_TABLE_TYPE_SX;
-}
-
-static inline void validate_rt_entry(unsigned long *entry)
-{
- *entry &= ~ZPCI_TABLE_VALID_MASK;
- *entry &= ~ZPCI_TABLE_OFFSET_MASK;
- *entry |= ZPCI_TABLE_VALID;
- *entry |= ZPCI_TABLE_LEN_RTX;
-}
-
-static inline void validate_st_entry(unsigned long *entry)
-{
- *entry &= ~ZPCI_TABLE_VALID_MASK;
- *entry |= ZPCI_TABLE_VALID;
-}
-
-static inline void invalidate_pt_entry(unsigned long *entry)
-{
- WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_INVALID);
- *entry &= ~ZPCI_PTE_VALID_MASK;
- *entry |= ZPCI_PTE_INVALID;
-}
-
-static inline void validate_pt_entry(unsigned long *entry)
-{
- WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID);
- *entry &= ~ZPCI_PTE_VALID_MASK;
- *entry |= ZPCI_PTE_VALID;
-}
-
-static inline void entry_set_protected(unsigned long *entry)
-{
- *entry &= ~ZPCI_TABLE_PROT_MASK;
- *entry |= ZPCI_TABLE_PROTECTED;
-}
-
-static inline void entry_clr_protected(unsigned long *entry)
-{
- *entry &= ~ZPCI_TABLE_PROT_MASK;
- *entry |= ZPCI_TABLE_UNPROTECTED;
-}
-
-static inline int reg_entry_isvalid(unsigned long entry)
-{
- return (entry & ZPCI_TABLE_VALID_MASK) == ZPCI_TABLE_VALID;
-}
-
-static inline int pt_entry_isvalid(unsigned long entry)
-{
- return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID;
-}
-
-static inline unsigned long *get_rt_sto(unsigned long entry)
-{
- if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX)
- return phys_to_virt(entry & ZPCI_RTE_ADDR_MASK);
- else
- return NULL;
-
-}
-
-static inline unsigned long *get_st_pto(unsigned long entry)
-{
- if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_SX)
- return phys_to_virt(entry & ZPCI_STE_ADDR_MASK);
- else
- return NULL;
-}
-
-/* Prototypes */
-void dma_free_seg_table(unsigned long);
-unsigned long *dma_alloc_cpu_table(gfp_t gfp);
-void dma_cleanup_tables(unsigned long *);
-unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr,
- gfp_t gfp);
-void dma_update_cpu_trans(unsigned long *entry, phys_addr_t page_addr, int flags);
-
-extern const struct dma_map_ops s390_pci_dma_ops;
+struct zpci_iommu_ctrs {
+ atomic64_t mapped_pages;
+ atomic64_t unmapped_pages;
+ atomic64_t global_rpcits;
+ atomic64_t sync_map_rpcits;
+ atomic64_t sync_rpcits;
+};
+
+struct zpci_dev;
+struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev);
#endif
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 376b4b23b..502d655fe 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -25,7 +25,6 @@ void crst_table_free(struct mm_struct *, unsigned long *);
unsigned long *page_table_alloc(struct mm_struct *);
struct page *page_table_alloc_pgste(struct mm_struct *mm);
void page_table_free(struct mm_struct *, unsigned long *);
-void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long);
void page_table_free_pgste(struct page *page);
extern int page_table_allocate_pgste;
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index fb3ee7758..0a7055518 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -18,6 +18,7 @@
#include <linux/radix-tree.h>
#include <linux/atomic.h>
#include <asm/sections.h>
+#include <asm/ctlreg.h>
#include <asm/bug.h>
#include <asm/page.h>
#include <asm/uv.h>
@@ -25,7 +26,7 @@
extern pgd_t swapper_pg_dir[];
extern pgd_t invalid_pg_dir[];
extern void paging_init(void);
-extern unsigned long s390_invalid_asce;
+extern struct ctlreg s390_invalid_asce;
enum {
PG_DIRECT_MAP_4K = 0,
@@ -565,10 +566,20 @@ static inline pud_t set_pud_bit(pud_t pud, pgprot_t prot)
}
/*
- * In the case that a guest uses storage keys
- * faults should no longer be backed by zero pages
+ * As soon as the guest uses storage keys or enables PV, we deduplicate all
+ * mapped shared zeropages and prevent new shared zeropages from getting
+ * mapped.
*/
-#define mm_forbids_zeropage mm_has_pgste
+#define mm_forbids_zeropage mm_forbids_zeropage
+static inline int mm_forbids_zeropage(struct mm_struct *mm)
+{
+#ifdef CONFIG_PGSTE
+ if (!mm->context.allow_cow_sharing)
+ return 1;
+#endif
+ return 0;
+}
+
static inline int mm_uses_skeys(struct mm_struct *mm)
{
#ifdef CONFIG_PGSTE
@@ -729,7 +740,7 @@ static inline int pud_bad(pud_t pud)
{
unsigned long type = pud_val(pud) & _REGION_ENTRY_TYPE_MASK;
- if (type > _REGION_ENTRY_TYPE_R3 || pud_large(pud))
+ if (type > _REGION_ENTRY_TYPE_R3 || pud_leaf(pud))
return 1;
if (type < _REGION_ENTRY_TYPE_R3)
return 0;
@@ -769,6 +780,7 @@ static inline int pud_write(pud_t pud)
return (pud_val(pud) & _REGION3_ENTRY_WRITE) != 0;
}
+#define pmd_dirty pmd_dirty
static inline int pmd_dirty(pmd_t pmd)
{
return (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0;
@@ -1396,7 +1408,7 @@ static inline unsigned long pud_deref(pud_t pud)
unsigned long origin_mask;
origin_mask = _REGION_ENTRY_ORIGIN;
- if (pud_large(pud))
+ if (pud_leaf(pud))
origin_mask = _REGION3_ENTRY_ORIGIN_LARGE;
return (unsigned long)__va(pud_val(pud) & origin_mask);
}
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index dc17896a0..c0b6e74d8 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -184,11 +184,7 @@ struct thread_struct {
struct gs_cb *gs_cb; /* Current guarded storage cb */
struct gs_cb *gs_bc_cb; /* Broadcast guarded storage cb */
struct pgm_tdb trap_tdb; /* Transaction abort diagnose block */
- /*
- * Warning: 'fpu' is dynamically-sized. It *MUST* be at
- * the end.
- */
- struct fpu fpu; /* FP and VX register save area */
+ struct fpu fpu; /* FP and VX register save area */
};
/* Flag to disable transactions. */
@@ -228,7 +224,6 @@ typedef struct thread_struct thread_struct;
execve_tail(); \
} while (0)
-/* Forward declaration, a strange C thing */
struct task_struct;
struct mm_struct;
struct seq_file;
@@ -332,14 +327,36 @@ static inline unsigned long __extract_psw(void)
return (((unsigned long) reg1) << 32) | ((unsigned long) reg2);
}
-static inline void local_mcck_enable(void)
+static inline unsigned long __local_mcck_save(void)
{
- __load_psw_mask(__extract_psw() | PSW_MASK_MCHECK);
+ unsigned long mask = __extract_psw();
+
+ __load_psw_mask(mask & ~PSW_MASK_MCHECK);
+ return mask & PSW_MASK_MCHECK;
+}
+
+#define local_mcck_save(mflags) \
+do { \
+ typecheck(unsigned long, mflags); \
+ mflags = __local_mcck_save(); \
+} while (0)
+
+static inline void local_mcck_restore(unsigned long mflags)
+{
+ unsigned long mask = __extract_psw();
+
+ mask &= ~PSW_MASK_MCHECK;
+ __load_psw_mask(mask | mflags);
}
static inline void local_mcck_disable(void)
{
- __load_psw_mask(__extract_psw() & ~PSW_MASK_MCHECK);
+ __local_mcck_save();
+}
+
+static inline void local_mcck_enable(void)
+{
+ __load_psw_mask(__extract_psw() | PSW_MASK_MCHECK);
}
/*
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 25cadc2b9..03bcaa8ef 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -28,7 +28,6 @@
#define MACHINE_FLAG_TOPOLOGY BIT(10)
#define MACHINE_FLAG_TE BIT(11)
#define MACHINE_FLAG_TLB_LC BIT(12)
-#define MACHINE_FLAG_VX BIT(13)
#define MACHINE_FLAG_TLB_GUEST BIT(14)
#define MACHINE_FLAG_NX BIT(15)
#define MACHINE_FLAG_GS BIT(16)
@@ -90,7 +89,6 @@ extern unsigned long mio_wb_bit_mask;
#define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY)
#define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE)
#define MACHINE_HAS_TLB_LC (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC)
-#define MACHINE_HAS_VX (S390_lowcore.machine_flags & MACHINE_FLAG_VX)
#define MACHINE_HAS_TLB_GUEST (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_GUEST)
#define MACHINE_HAS_NX (S390_lowcore.machine_flags & MACHINE_FLAG_NX)
#define MACHINE_HAS_GS (S390_lowcore.machine_flags & MACHINE_FLAG_GS)
@@ -125,9 +123,6 @@ static inline void vmcp_cma_reserve(void) { }
void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault);
-void cmma_init(void);
-void cmma_init_nodat(void);
-
extern void (*_machine_restart)(char *command);
extern void (*_machine_halt)(void);
extern void (*_machine_power_off)(void);
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index 73ed27810..6e5b1b4b1 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -59,7 +59,6 @@ static inline void smp_cpus_done(unsigned int max_cpus)
{
}
-extern int smp_reinit_ipl_cpu(void);
extern int smp_rescan_cpus(void);
extern void __noreturn cpu_die(void);
extern void __cpu_die(unsigned int cpu);
diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h
index 78f7b729b..31ec4f545 100644
--- a/arch/s390/include/asm/stacktrace.h
+++ b/arch/s390/include/asm/stacktrace.h
@@ -6,6 +6,13 @@
#include <linux/ptrace.h>
#include <asm/switch_to.h>
+struct stack_frame_user {
+ unsigned long back_chain;
+ unsigned long empty1[5];
+ unsigned long gprs[10];
+ unsigned long empty2[4];
+};
+
enum stack_type {
STACK_TYPE_UNKNOWN,
STACK_TYPE_TASK,
diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h
index ab1c63160..edca5a751 100644
--- a/arch/s390/include/asm/sysinfo.h
+++ b/arch/s390/include/asm/sysinfo.h
@@ -40,6 +40,10 @@ struct sysinfo_1_1_1 {
unsigned int ncr;
unsigned int npr;
unsigned int ntr;
+ char reserved_3[4];
+ char model_var_cap[16];
+ unsigned int model_var_cap_rating;
+ unsigned int nvr;
};
struct sysinfo_1_2_1 {
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 383b1f914..d1455a601 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -69,12 +69,9 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
tlb->mm->context.flush_mm = 1;
tlb->freed_tables = 1;
tlb->cleared_pmds = 1;
- /*
- * page_table_free_rcu takes care of the allocation bit masks
- * of the 2K table fragments in the 4K page table page,
- * then calls tlb_remove_table.
- */
- page_table_free_rcu(tlb, (unsigned long *) pte, address);
+ if (mm_alloc_pgste(tlb->mm))
+ gmap_unlink(tlb->mm, (unsigned long *)pte, address);
+ tlb_remove_ptdesc(tlb, pte);
}
/*
@@ -112,7 +109,7 @@ static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
__tlb_adjust_range(tlb, address, PAGE_SIZE);
tlb->mm->context.flush_mm = 1;
tlb->freed_tables = 1;
- tlb_remove_table(tlb, p4d);
+ tlb_remove_ptdesc(tlb, p4d);
}
/*
@@ -130,7 +127,7 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
tlb->mm->context.flush_mm = 1;
tlb->freed_tables = 1;
tlb->cleared_p4ds = 1;
- tlb_remove_table(tlb, pud);
+ tlb_remove_ptdesc(tlb, pud);
}
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index 8a8c64a67..81ae8a98e 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -15,7 +15,6 @@
*/
#include <asm/asm-extable.h>
#include <asm/processor.h>
-#include <asm/ctl_reg.h>
#include <asm/extable.h>
#include <asm/facility.h>
#include <asm-generic/access_ok.h>
diff --git a/arch/s390/include/asm/word-at-a-time.h b/arch/s390/include/asm/word-at-a-time.h
new file mode 100644
index 000000000..2579f1694
--- /dev/null
+++ b/arch/s390/include/asm/word-at-a-time.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_WORD_AT_A_TIME_H
+#define _ASM_WORD_AT_A_TIME_H
+
+#include <linux/kernel.h>
+#include <asm/asm-extable.h>
+#include <asm/bitsperlong.h>
+
+struct word_at_a_time {
+ const unsigned long bits;
+};
+
+#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x7f) }
+
+static inline unsigned long prep_zero_mask(unsigned long val, unsigned long data, const struct word_at_a_time *c)
+{
+ return data;
+}
+
+static inline unsigned long create_zero_mask(unsigned long data)
+{
+ return __fls(data);
+}
+
+static inline unsigned long find_zero(unsigned long data)
+{
+ return (data ^ (BITS_PER_LONG - 1)) >> 3;
+}
+
+static inline unsigned long has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
+{
+ unsigned long mask = (val & c->bits) + c->bits;
+
+ *data = ~(mask | val | c->bits);
+ return *data;
+}
+
+static inline unsigned long zero_bytemask(unsigned long data)
+{
+ return ~1UL << data;
+}
+
+/*
+ * Load an unaligned word from kernel space.
+ *
+ * In the (very unlikely) case of the word being a page-crosser
+ * and the next page not being mapped, take the exception and
+ * return zeroes in the non-existing part.
+ */
+static inline unsigned long load_unaligned_zeropad(const void *addr)
+{
+ unsigned long data;
+
+ asm volatile(
+ "0: lg %[data],0(%[addr])\n"
+ "1: nopr %%r7\n"
+ EX_TABLE_ZEROPAD(0b, 1b, %[data], %[addr])
+ EX_TABLE_ZEROPAD(1b, 1b, %[data], %[addr])
+ : [data] "=d" (data)
+ : [addr] "a" (addr), "m" (*(unsigned long *)addr));
+ return data;
+}
+
+#endif /* _ASM_WORD_AT_A_TIME_H */
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 0df2b88cc..7a562b419 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -37,11 +37,11 @@ CFLAGS_unwind_bc.o += -fno-optimize-sibling-calls
obj-y := head64.o traps.o time.o process.o earlypgm.o early.o setup.o idle.o vtime.o
obj-y += processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o cpufeature.o
-obj-y += sysinfo.o lgr.o os_info.o
+obj-y += sysinfo.o lgr.o os_info.o ctlreg.o
obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
obj-y += entry.o reipl.o kdebugfs.o alternative.o
obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o
-obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o
+obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o
extra-y += vmlinux.lds
diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c
index 56254fa06..4f2669030 100644
--- a/arch/s390/kernel/cache.c
+++ b/arch/s390/kernel/cache.c
@@ -166,5 +166,6 @@ int populate_cache_leaves(unsigned int cpu)
ci_leaf_init(this_leaf++, pvt, ctype, level, cpu);
}
}
+ this_cpu_ci->cpu_map_populated = true;
return 0;
}
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index cecedd01d..f8fc6c25d 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -29,6 +29,7 @@
#include <asm/lowcore.h>
#include <asm/switch_to.h>
#include <asm/vdso.h>
+#include <asm/fpu/api.h>
#include "compat_linux.h"
#include "compat_ptrace.h"
#include "entry.h"
@@ -98,10 +99,6 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs)
if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW32_MASK_RI))
return -EINVAL;
- /* Test the floating-point-control word. */
- if (test_fp_ctl(user_sregs.fpregs.fpc))
- return -EINVAL;
-
/* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */
regs->psw.mask = (regs->psw.mask & ~(PSW_MASK_USER | PSW_MASK_RI)) |
(__u64)(user_sregs.regs.psw.mask & PSW32_MASK_USER) << 32 |
@@ -137,7 +134,7 @@ static int save_sigregs_ext32(struct pt_regs *regs,
return -EFAULT;
/* Save vector registers to signal stack */
- if (MACHINE_HAS_VX) {
+ if (cpu_has_vx()) {
for (i = 0; i < __NUM_VXRS_LOW; i++)
vxrs[i] = current->thread.fpu.vxrs[i].low;
if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
@@ -165,7 +162,7 @@ static int restore_sigregs_ext32(struct pt_regs *regs,
*(__u32 *)&regs->gprs[i] = gprs_high[i];
/* Restore vector registers from signal stack */
- if (MACHINE_HAS_VX) {
+ if (cpu_has_vx()) {
if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
sizeof(sregs_ext->vxrs_low)) ||
__copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW,
@@ -265,7 +262,7 @@ static int setup_frame32(struct ksignal *ksig, sigset_t *set,
* the machine supports it
*/
frame_size = sizeof(*frame) - sizeof(frame->sregs_ext.__reserved);
- if (!MACHINE_HAS_VX)
+ if (!cpu_has_vx())
frame_size -= sizeof(frame->sregs_ext.vxrs_low) +
sizeof(frame->sregs_ext.vxrs_high);
frame = get_sigframe(&ksig->ka, regs, frame_size);
@@ -348,11 +345,12 @@ static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set,
* the machine supports it
*/
uc_flags = UC_GPRS_HIGH;
- if (MACHINE_HAS_VX) {
+ if (cpu_has_vx()) {
uc_flags |= UC_VXRS;
- } else
+ } else {
frame_size -= sizeof(frame->uc.uc_mcontext_ext.vxrs_low) +
sizeof(frame->uc.uc_mcontext_ext.vxrs_high);
+ }
frame = get_sigframe(&ksig->ka, regs, frame_size);
if (frame == (void __user *) -1UL)
return -EFAULT;
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index 7af69948b..5c46c2659 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -22,6 +22,7 @@
#include <asm/ipl.h>
#include <asm/sclp.h>
#include <asm/maccess.h>
+#include <asm/fpu/api.h>
#define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y)))
#define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y)))
@@ -319,7 +320,7 @@ static void *fill_cpu_elf_notes(void *ptr, int cpu, struct save_area *sa)
ptr = nt_init(ptr, NT_S390_TODPREG, &sa->todpreg, sizeof(sa->todpreg));
ptr = nt_init(ptr, NT_S390_CTRS, &sa->ctrs, sizeof(sa->ctrs));
ptr = nt_init(ptr, NT_S390_PREFIX, &sa->prefix, sizeof(sa->prefix));
- if (MACHINE_HAS_VX) {
+ if (cpu_has_vx()) {
ptr = nt_init(ptr, NT_S390_VXRS_HIGH,
&sa->vxrs_high, sizeof(sa->vxrs_high));
ptr = nt_init(ptr, NT_S390_VXRS_LOW,
@@ -343,7 +344,7 @@ static size_t get_cpu_elf_notes_size(void)
size += nt_size(NT_S390_TODPREG, sizeof(sa->todpreg));
size += nt_size(NT_S390_CTRS, sizeof(sa->ctrs));
size += nt_size(NT_S390_PREFIX, sizeof(sa->prefix));
- if (MACHINE_HAS_VX) {
+ if (cpu_has_vx()) {
size += nt_size(NT_S390_VXRS_HIGH, sizeof(sa->vxrs_high));
size += nt_size(NT_S390_VXRS_LOW, sizeof(sa->vxrs_low));
}
@@ -498,7 +499,7 @@ static int get_mem_chunk_cnt(void)
/*
* Initialize ELF loads (new kernel)
*/
-static void loads_init(Elf64_Phdr *phdr, u64 loads_offset)
+static void loads_init(Elf64_Phdr *phdr)
{
phys_addr_t start, end;
u64 idx;
@@ -507,7 +508,7 @@ static void loads_init(Elf64_Phdr *phdr, u64 loads_offset)
phdr->p_filesz = end - start;
phdr->p_type = PT_LOAD;
phdr->p_offset = start;
- phdr->p_vaddr = start;
+ phdr->p_vaddr = (unsigned long)__va(start);
phdr->p_paddr = start;
phdr->p_memsz = end - start;
phdr->p_flags = PF_R | PF_W | PF_X;
@@ -612,7 +613,7 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
ptr = notes_init(phdr_notes, ptr, ((unsigned long) hdr) + hdr_off);
/* Init loads */
hdr_off = PTR_DIFF(ptr, hdr);
- loads_init(phdr_loads, hdr_off);
+ loads_init(phdr_loads);
*addr = (unsigned long long) hdr;
*size = (unsigned long long) hdr_off;
BUG_ON(elfcorehdr_size > alloc_size);
diff --git a/arch/s390/kernel/ctlreg.c b/arch/s390/kernel/ctlreg.c
new file mode 100644
index 000000000..8cc26cf2c
--- /dev/null
+++ b/arch/s390/kernel/ctlreg.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 1999, 2023
+ */
+
+#include <linux/irqflags.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/cache.h>
+#include <asm/abs_lowcore.h>
+#include <asm/ctlreg.h>
+
+/*
+ * ctl_lock guards access to global control register contents which
+ * are kept in the control register save area within absolute lowcore
+ * at physical address zero.
+ */
+static DEFINE_SPINLOCK(system_ctl_lock);
+
+void system_ctlreg_lock(void)
+ __acquires(&system_ctl_lock)
+{
+ spin_lock(&system_ctl_lock);
+}
+
+void system_ctlreg_unlock(void)
+ __releases(&system_ctl_lock)
+{
+ spin_unlock(&system_ctl_lock);
+}
+
+static bool system_ctlreg_area_init __ro_after_init;
+
+void __init system_ctlreg_init_save_area(struct lowcore *lc)
+{
+ struct lowcore *abs_lc;
+
+ abs_lc = get_abs_lowcore();
+ __local_ctl_store(0, 15, lc->cregs_save_area);
+ __local_ctl_store(0, 15, abs_lc->cregs_save_area);
+ put_abs_lowcore(abs_lc);
+ system_ctlreg_area_init = true;
+}
+
+struct ctlreg_parms {
+ unsigned long andval;
+ unsigned long orval;
+ unsigned long val;
+ int request;
+ int cr;
+};
+
+static void ctlreg_callback(void *info)
+{
+ struct ctlreg_parms *pp = info;
+ struct ctlreg regs[16];
+
+ __local_ctl_store(0, 15, regs);
+ if (pp->request == CTLREG_LOAD) {
+ regs[pp->cr].val = pp->val;
+ } else {
+ regs[pp->cr].val &= pp->andval;
+ regs[pp->cr].val |= pp->orval;
+ }
+ __local_ctl_load(0, 15, regs);
+}
+
+static void system_ctlreg_update(void *info)
+{
+ unsigned long flags;
+
+ if (system_state == SYSTEM_BOOTING) {
+ /*
+ * For very early calls do not call on_each_cpu()
+ * since not everything might be setup.
+ */
+ local_irq_save(flags);
+ ctlreg_callback(info);
+ local_irq_restore(flags);
+ } else {
+ on_each_cpu(ctlreg_callback, info, 1);
+ }
+}
+
+void system_ctlreg_modify(unsigned int cr, unsigned long data, int request)
+{
+ struct ctlreg_parms pp = { .cr = cr, .request = request, };
+ struct lowcore *abs_lc;
+
+ switch (request) {
+ case CTLREG_SET_BIT:
+ pp.orval = 1UL << data;
+ pp.andval = -1UL;
+ break;
+ case CTLREG_CLEAR_BIT:
+ pp.orval = 0;
+ pp.andval = ~(1UL << data);
+ break;
+ case CTLREG_LOAD:
+ pp.val = data;
+ break;
+ }
+ if (system_ctlreg_area_init) {
+ system_ctlreg_lock();
+ abs_lc = get_abs_lowcore();
+ if (request == CTLREG_LOAD) {
+ abs_lc->cregs_save_area[cr].val = pp.val;
+ } else {
+ abs_lc->cregs_save_area[cr].val &= pp.andval;
+ abs_lc->cregs_save_area[cr].val |= pp.orval;
+ }
+ put_abs_lowcore(abs_lc);
+ system_ctlreg_update(&pp);
+ system_ctlreg_unlock();
+ } else {
+ system_ctlreg_update(&pp);
+ }
+}
+EXPORT_SYMBOL(system_ctlreg_modify);
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index a85e0c3e7..85328a0ef 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -978,7 +978,6 @@ static struct ctl_table s390dbf_table[] = {
.mode = S_IRUGO | S_IWUSR,
.proc_handler = s390dbf_procactive,
},
- { }
};
static struct ctl_table_header *s390dbf_sysctl_header;
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
index f9f06cd8f..92fdc35f0 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag.c
@@ -245,6 +245,7 @@ EXPORT_SYMBOL(diag8c);
int diag224(void *ptr)
{
+ unsigned long addr = __pa(ptr);
int rc = -EOPNOTSUPP;
diag_stat_inc(DIAG_STAT_X224);
@@ -253,7 +254,7 @@ int diag224(void *ptr)
"0: lhi %0,0x0\n"
"1:\n"
EX_TABLE(0b,1b)
- : "+d" (rc) :"d" (0), "d" (ptr) : "memory");
+ : "+d" (rc) :"d" (0), "d" (addr) : "memory");
return rc;
}
EXPORT_SYMBOL(diag224);
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 442ce0489..2345ea332 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -46,6 +46,7 @@ decompressor_handled_param(vmalloc);
decompressor_handled_param(dfltcc);
decompressor_handled_param(facilities);
decompressor_handled_param(nokaslr);
+decompressor_handled_param(cmma);
#if IS_ENABLED(CONFIG_KVM)
decompressor_handled_param(prot_virt);
#endif
@@ -216,7 +217,7 @@ static __init void detect_machine_facilities(void)
{
if (test_facility(8)) {
S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT1;
- __ctl_set_bit(0, 23);
+ system_ctl_set_bit(0, CR0_EDAT_BIT);
}
if (test_facility(78))
S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT2;
@@ -224,14 +225,12 @@ static __init void detect_machine_facilities(void)
S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE;
if (test_facility(50) && test_facility(73)) {
S390_lowcore.machine_flags |= MACHINE_FLAG_TE;
- __ctl_set_bit(0, 55);
+ system_ctl_set_bit(0, CR0_TRANSACTIONAL_EXECUTION_BIT);
}
if (test_facility(51))
S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC;
- if (test_facility(129)) {
- S390_lowcore.machine_flags |= MACHINE_FLAG_VX;
- __ctl_set_bit(0, 17);
- }
+ if (test_facility(129))
+ system_ctl_set_bit(0, CR0_VECTOR_BIT);
if (test_facility(130))
S390_lowcore.machine_flags |= MACHINE_FLAG_NX;
if (test_facility(133))
@@ -240,7 +239,7 @@ static __init void detect_machine_facilities(void)
/* Enabled signed clock comparator comparisons */
S390_lowcore.machine_flags |= MACHINE_FLAG_SCC;
clock_comparator_max = -1ULL >> 1;
- __ctl_set_bit(0, 53);
+ system_ctl_set_bit(0, CR0_CLOCK_COMPARATOR_SIGN_BIT);
}
if (IS_ENABLED(CONFIG_PCI) && test_facility(153)) {
S390_lowcore.machine_flags |= MACHINE_FLAG_PCI_MIO;
@@ -258,15 +257,9 @@ static inline void save_vector_registers(void)
#endif
}
-static inline void setup_control_registers(void)
+static inline void setup_low_address_protection(void)
{
- unsigned long reg;
-
- __ctl_store(reg, 0, 0);
- reg |= CR0_LOW_ADDRESS_PROTECTION;
- reg |= CR0_EMERGENCY_SIGNAL_SUBMASK;
- reg |= CR0_EXTERNAL_CALL_SUBMASK;
- __ctl_load(reg, 0, 0);
+ system_ctl_set_bit(0, CR0_LOW_ADDRESS_PROTECTION_BIT);
}
static inline void setup_access_registers(void)
@@ -276,14 +269,6 @@ static inline void setup_access_registers(void)
restore_access_regs(acrs);
}
-static int __init disable_vector_extension(char *str)
-{
- S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX;
- __ctl_clear_bit(0, 17);
- return 0;
-}
-early_param("novx", disable_vector_extension);
-
char __bootdata(early_command_line)[COMMAND_LINE_SIZE];
static void __init setup_boot_command_line(void)
{
@@ -314,7 +299,7 @@ void __init startup_init(void)
save_vector_registers();
setup_topology();
sclp_early_detect();
- setup_control_registers();
+ setup_low_address_protection();
setup_access_registers();
lockdep_on();
}
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 49a11f6dd..26c08ee87 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -653,6 +653,7 @@ SYM_DATA_START_LOCAL(daton_psw)
SYM_DATA_END(daton_psw)
.section .rodata, "a"
+ .balign 8
#define SYSCALL(esame,emu) .quad __s390x_ ## esame
SYM_DATA_START(sys_call_table)
#include "asm/syscall_table.h"
diff --git a/arch/s390/kernel/facility.c b/arch/s390/kernel/facility.c
new file mode 100644
index 000000000..f02127219
--- /dev/null
+++ b/arch/s390/kernel/facility.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2023
+ */
+
+#include <asm/facility.h>
+
+unsigned int stfle_size(void)
+{
+ static unsigned int size;
+ unsigned int r;
+ u64 dummy;
+
+ r = READ_ONCE(size);
+ if (!r) {
+ r = __stfle_asm(&dummy, 1) + 1;
+ WRITE_ONCE(size, r);
+ }
+ return r;
+}
+EXPORT_SYMBOL(stfle_size);
diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c
index 4666b29ac..a4f3449cc 100644
--- a/arch/s390/kernel/fpu.c
+++ b/arch/s390/kernel/fpu.c
@@ -24,7 +24,7 @@ void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags)
/* Save floating point control */
asm volatile("stfpc %0" : "=Q" (state->fpc));
- if (!MACHINE_HAS_VX) {
+ if (!cpu_has_vx()) {
if (flags & KERNEL_VXR_V0V7) {
/* Save floating-point registers */
asm volatile("std 0,%0" : "=Q" (state->fprs[0]));
@@ -106,7 +106,7 @@ void __kernel_fpu_end(struct kernel_fpu *state, u32 flags)
/* Restore floating-point controls */
asm volatile("lfpc %0" : : "Q" (state->fpc));
- if (!MACHINE_HAS_VX) {
+ if (!cpu_has_vx()) {
if (flags & KERNEL_VXR_V0V7) {
/* Restore floating-point registers */
asm volatile("ld 0,%0" : : "Q" (state->fprs[0]));
@@ -177,11 +177,11 @@ EXPORT_SYMBOL(__kernel_fpu_end);
void __load_fpu_regs(void)
{
- struct fpu *state = &current->thread.fpu;
unsigned long *regs = current->thread.fpu.regs;
+ struct fpu *state = &current->thread.fpu;
- asm volatile("lfpc %0" : : "Q" (state->fpc));
- if (likely(MACHINE_HAS_VX)) {
+ sfpc_safe(state->fpc);
+ if (likely(cpu_has_vx())) {
asm volatile("lgr 1,%0\n"
"VLM 0,15,0,1\n"
"VLM 16,31,256,1\n"
@@ -208,7 +208,6 @@ void __load_fpu_regs(void)
}
clear_cpu_flag(CIF_FPU);
}
-EXPORT_SYMBOL(__load_fpu_regs);
void load_fpu_regs(void)
{
@@ -232,7 +231,7 @@ void save_fpu_regs(void)
regs = current->thread.fpu.regs;
asm volatile("stfpc %0" : "=Q" (state->fpc));
- if (likely(MACHINE_HAS_VX)) {
+ if (likely(cpu_has_vx())) {
asm volatile("lgr 1,%0\n"
"VSTM 0,15,0,1\n"
"VSTM 16,31,256,1\n"
diff --git a/arch/s390/kernel/guarded_storage.c b/arch/s390/kernel/guarded_storage.c
index d14dd1c2e..0b68168d9 100644
--- a/arch/s390/kernel/guarded_storage.c
+++ b/arch/s390/kernel/guarded_storage.c
@@ -28,7 +28,7 @@ static int gs_enable(void)
return -ENOMEM;
gs_cb->gsd = 25;
preempt_disable();
- __ctl_set_bit(2, 4);
+ local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT);
load_gs_cb(gs_cb);
current->thread.gs_cb = gs_cb;
preempt_enable();
@@ -42,7 +42,7 @@ static int gs_disable(void)
preempt_disable();
kfree(current->thread.gs_cb);
current->thread.gs_cb = NULL;
- __ctl_clear_bit(2, 4);
+ local_ctl_clear_bit(2, CR2_GUARDED_STORAGE_BIT);
preempt_enable();
}
return 0;
@@ -84,7 +84,7 @@ void gs_load_bc_cb(struct pt_regs *regs)
if (gs_cb) {
kfree(current->thread.gs_cb);
current->thread.gs_bc_cb = NULL;
- __ctl_set_bit(2, 4);
+ local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT);
load_gs_cb(gs_cb);
current->thread.gs_cb = gs_cb;
}
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 8d0b95c17..ba75f6bee 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -2382,7 +2382,7 @@ void s390_reset_system(void)
set_prefix(0);
/* Disable lowcore protection */
- __ctl_clear_bit(0, 28);
+ local_ctl_clear_bit(0, CR0_LOW_ADDRESS_PROTECTION_BIT);
diag_amode31_ops.diag308_reset();
}
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index b020ff17d..6f71b0ce1 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -385,7 +385,7 @@ void irq_subclass_register(enum irq_subclass subclass)
{
spin_lock(&irq_subclass_lock);
if (!irq_subclass_refcount[subclass])
- ctl_set_bit(0, subclass);
+ system_ctl_set_bit(0, subclass);
irq_subclass_refcount[subclass]++;
spin_unlock(&irq_subclass_lock);
}
@@ -396,7 +396,7 @@ void irq_subclass_unregister(enum irq_subclass subclass)
spin_lock(&irq_subclass_lock);
irq_subclass_refcount[subclass]--;
if (!irq_subclass_refcount[subclass])
- ctl_clear_bit(0, subclass);
+ system_ctl_clear_bit(0, subclass);
spin_unlock(&irq_subclass_lock);
}
EXPORT_SYMBOL(irq_subclass_unregister);
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index d4b863ed0..f0cf20d4b 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -224,20 +224,27 @@ static void enable_singlestep(struct kprobe_ctlblk *kcb,
struct pt_regs *regs,
unsigned long ip)
{
- struct per_regs per_kprobe;
+ union {
+ struct ctlreg regs[3];
+ struct {
+ struct ctlreg control;
+ struct ctlreg start;
+ struct ctlreg end;
+ };
+ } per_kprobe;
/* Set up the PER control registers %cr9-%cr11 */
- per_kprobe.control = PER_EVENT_IFETCH;
- per_kprobe.start = ip;
- per_kprobe.end = ip;
+ per_kprobe.control.val = PER_EVENT_IFETCH;
+ per_kprobe.start.val = ip;
+ per_kprobe.end.val = ip;
/* Save control regs and psw mask */
- __ctl_store(kcb->kprobe_saved_ctl, 9, 11);
+ __local_ctl_store(9, 11, kcb->kprobe_saved_ctl);
kcb->kprobe_saved_imask = regs->psw.mask &
(PSW_MASK_PER | PSW_MASK_IO | PSW_MASK_EXT);
/* Set PER control regs, turns on single step for the given address */
- __ctl_load(per_kprobe, 9, 11);
+ __local_ctl_load(9, 11, per_kprobe.regs);
regs->psw.mask |= PSW_MASK_PER;
regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT);
regs->psw.addr = ip;
@@ -249,7 +256,7 @@ static void disable_singlestep(struct kprobe_ctlblk *kcb,
unsigned long ip)
{
/* Restore control regs and psw mask, set new psw address */
- __ctl_load(kcb->kprobe_saved_ctl, 9, 11);
+ __local_ctl_load(9, 11, kcb->kprobe_saved_ctl);
regs->psw.mask &= ~PSW_MASK_PER;
regs->psw.mask |= kcb->kprobe_saved_imask;
regs->psw.addr = ip;
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index ce65fc016..aa22ffc16 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -91,15 +91,15 @@ static noinline void __machine_kdump(void *image)
}
/* Store status of the boot CPU */
mcesa = __va(S390_lowcore.mcesad & MCESA_ORIGIN_MASK);
- if (MACHINE_HAS_VX)
+ if (cpu_has_vx())
save_vx_regs((__vector128 *) mcesa->vector_save_area);
if (MACHINE_HAS_GS) {
- __ctl_store(cr2_old.val, 2, 2);
+ local_ctl_store(2, &cr2_old.reg);
cr2_new = cr2_old;
cr2_new.gse = 1;
- __ctl_load(cr2_new.val, 2, 2);
+ local_ctl_load(2, &cr2_new.reg);
save_gs_cb((struct gs_cb *) mcesa->guarded_storage_save_area);
- __ctl_load(cr2_old.val, 2, 2);
+ local_ctl_load(2, &cr2_old.reg);
}
/*
* To create a good backchain for this CPU in the dump store_status
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 38ec04875..9ad44c26d 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -22,16 +22,17 @@
#include <linux/kvm_host.h>
#include <linux/export.h>
#include <asm/lowcore.h>
+#include <asm/ctlreg.h>
#include <asm/smp.h>
#include <asm/stp.h>
#include <asm/cputime.h>
#include <asm/nmi.h>
#include <asm/crw.h>
#include <asm/switch_to.h>
-#include <asm/ctl_reg.h>
#include <asm/asm-offsets.h>
#include <asm/pai.h>
#include <asm/vx-insn.h>
+#include <asm/fpu/api.h>
struct mcck_struct {
unsigned int kill_task : 1;
@@ -45,7 +46,7 @@ static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck);
static inline int nmi_needs_mcesa(void)
{
- return MACHINE_HAS_VX || MACHINE_HAS_GS;
+ return cpu_has_vx() || MACHINE_HAS_GS;
}
/*
@@ -131,10 +132,10 @@ static notrace void s390_handle_damage(void)
* Disable low address protection and make machine check new PSW a
* disabled wait PSW. Any additional machine check cannot be handled.
*/
- __ctl_store(cr0.val, 0, 0);
+ local_ctl_store(0, &cr0.reg);
cr0_new = cr0;
cr0_new.lap = 0;
- __ctl_load(cr0_new.val, 0, 0);
+ local_ctl_load(0, &cr0_new.reg);
psw_save = S390_lowcore.mcck_new_psw;
psw_bits(S390_lowcore.mcck_new_psw).io = 0;
psw_bits(S390_lowcore.mcck_new_psw).ext = 0;
@@ -146,7 +147,7 @@ static notrace void s390_handle_damage(void)
* values. This makes possible system dump analysis easier.
*/
S390_lowcore.mcck_new_psw = psw_save;
- __ctl_load(cr0.val, 0, 0);
+ local_ctl_load(0, &cr0.reg);
disabled_wait();
while (1);
}
@@ -159,16 +160,17 @@ NOKPROBE_SYMBOL(s390_handle_damage);
void s390_handle_mcck(void)
{
struct mcck_struct mcck;
+ unsigned long mflags;
/*
* Disable machine checks and get the current state of accumulated
* machine checks. Afterwards delete the old state and enable machine
* checks again.
*/
- local_mcck_disable();
+ local_mcck_save(mflags);
mcck = *this_cpu_ptr(&cpu_mcck);
memset(this_cpu_ptr(&cpu_mcck), 0, sizeof(mcck));
- local_mcck_enable();
+ local_mcck_restore(mflags);
if (mcck.channel_report)
crw_handle_channel_report();
@@ -185,7 +187,7 @@ void s390_handle_mcck(void)
static int mchchk_wng_posted = 0;
/* Use single cpu clear, as we cannot handle smp here. */
- __ctl_clear_bit(14, 24); /* Disable WARNING MCH */
+ local_ctl_clear_bit(14, CR14_WARNING_SUBMASK_BIT);
if (xchg(&mchchk_wng_posted, 1) == 0)
kill_cad_pid(SIGPWR, 1);
}
@@ -234,7 +236,7 @@ static int notrace s390_validate_registers(union mci mci)
}
mcesa = __va(S390_lowcore.mcesad & MCESA_ORIGIN_MASK);
- if (!MACHINE_HAS_VX) {
+ if (!cpu_has_vx()) {
/* Validate floating point registers */
asm volatile(
" ld 0,0(%0)\n"
@@ -269,9 +271,9 @@ static int notrace s390_validate_registers(union mci mci)
*/
if (!mci.vr && !test_cpu_flag(CIF_MCCK_GUEST))
kill_task = 1;
- cr0.val = S390_lowcore.cregs_save_area[0];
+ cr0.reg = S390_lowcore.cregs_save_area[0];
cr0.afp = cr0.vx = 1;
- __ctl_load(cr0.val, 0, 0);
+ local_ctl_load(0, &cr0.reg);
asm volatile(
" la 1,%0\n"
" VLM 0,15,0,1\n"
@@ -279,7 +281,7 @@ static int notrace s390_validate_registers(union mci mci)
:
: "Q" (*(struct vx_array *)mcesa->vector_save_area)
: "1");
- __ctl_load(S390_lowcore.cregs_save_area[0], 0, 0);
+ local_ctl_load(0, &S390_lowcore.cregs_save_area[0]);
}
/* Validate access registers */
asm volatile(
@@ -290,7 +292,7 @@ static int notrace s390_validate_registers(union mci mci)
if (!mci.ar)
kill_task = 1;
/* Validate guarded storage registers */
- cr2.val = S390_lowcore.cregs_save_area[2];
+ cr2.reg = S390_lowcore.cregs_save_area[2];
if (cr2.gse) {
if (!mci.gs) {
/*
@@ -505,9 +507,9 @@ NOKPROBE_SYMBOL(s390_do_machine_check);
static int __init machine_check_init(void)
{
- ctl_set_bit(14, 25); /* enable external damage MCH */
- ctl_set_bit(14, 27); /* enable system recovery MCH */
- ctl_set_bit(14, 24); /* enable warning MCH */
+ system_ctl_set_bit(14, CR14_EXTERNAL_DAMAGE_SUBMASK_BIT);
+ system_ctl_set_bit(14, CR14_RECOVERY_SUBMASK_BIT);
+ system_ctl_set_bit(14, CR14_WARNING_SUBMASK_BIT);
return 0;
}
early_initcall(machine_check_init);
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 850c11ea6..41ed6e0f0 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -1193,7 +1193,7 @@ static int __init cpumf_pmu_init(void)
* Clear bit 15 of cr0 to unauthorize problem-state to
* extract measurement counters
*/
- ctl_clear_bit(0, 48);
+ system_ctl_clear_bit(0, CR0_CPUMF_EXTRACTION_AUTH_BIT);
/* register handler for measurement-alert interruptions */
rc = register_external_irq(EXT_IRQ_MEASURE_ALERT,
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index c27321cb0..dfa77da2f 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -15,7 +15,10 @@
#include <linux/export.h>
#include <linux/seq_file.h>
#include <linux/spinlock.h>
+#include <linux/uaccess.h>
+#include <linux/compat.h>
#include <linux/sysfs.h>
+#include <asm/stacktrace.h>
#include <asm/irq.h>
#include <asm/cpu_mf.h>
#include <asm/lowcore.h>
@@ -212,6 +215,44 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
}
}
+void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
+ struct pt_regs *regs)
+{
+ struct stack_frame_user __user *sf;
+ unsigned long ip, sp;
+ bool first = true;
+
+ if (is_compat_task())
+ return;
+ perf_callchain_store(entry, instruction_pointer(regs));
+ sf = (void __user *)user_stack_pointer(regs);
+ pagefault_disable();
+ while (entry->nr < entry->max_stack) {
+ if (__get_user(sp, &sf->back_chain))
+ break;
+ if (__get_user(ip, &sf->gprs[8]))
+ break;
+ if (ip & 0x1) {
+ /*
+ * If the instruction address is invalid, and this
+ * is the first stack frame, assume r14 has not
+ * been written to the stack yet. Otherwise exit.
+ */
+ if (first && !(regs->gprs[14] & 0x1))
+ ip = regs->gprs[14];
+ else
+ break;
+ }
+ perf_callchain_store(entry, ip);
+ /* Sanity check: ABI requires SP to be aligned 8 bytes. */
+ if (!sp || sp & 0x7)
+ break;
+ sf = (void __user *)sp;
+ first = false;
+ }
+ pagefault_enable();
+}
+
/* Perf definitions for PMU event attributes in sysfs */
ssize_t cpumf_events_sysfs_show(struct device *dev,
struct device_attribute *attr, char *page)
diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c
index fe7d1774d..335e3f5d7 100644
--- a/arch/s390/kernel/perf_pai_crypto.c
+++ b/arch/s390/kernel/perf_pai_crypto.c
@@ -16,8 +16,7 @@
#include <linux/export.h>
#include <linux/io.h>
#include <linux/perf_event.h>
-
-#include <asm/ctl_reg.h>
+#include <asm/ctlreg.h>
#include <asm/pai.h>
#include <asm/debug.h>
@@ -41,7 +40,43 @@ struct paicrypt_map {
struct perf_event *event; /* Perf event for sampling */
};
-static DEFINE_PER_CPU(struct paicrypt_map, paicrypt_map);
+struct paicrypt_mapptr {
+ struct paicrypt_map *mapptr;
+};
+
+static struct paicrypt_root { /* Anchor to per CPU data */
+ refcount_t refcnt; /* Overall active events */
+ struct paicrypt_mapptr __percpu *mapptr;
+} paicrypt_root;
+
+/* Free per CPU data when the last event is removed. */
+static void paicrypt_root_free(void)
+{
+ if (refcount_dec_and_test(&paicrypt_root.refcnt)) {
+ free_percpu(paicrypt_root.mapptr);
+ paicrypt_root.mapptr = NULL;
+ }
+ debug_sprintf_event(cfm_dbg, 5, "%s root.refcount %d\n", __func__,
+ refcount_read(&paicrypt_root.refcnt));
+}
+
+/*
+ * On initialization of first event also allocate per CPU data dynamically.
+ * Start with an array of pointers, the array size is the maximum number of
+ * CPUs possible, which might be larger than the number of CPUs currently
+ * online.
+ */
+static int paicrypt_root_alloc(void)
+{
+ if (!refcount_inc_not_zero(&paicrypt_root.refcnt)) {
+ /* The memory is already zeroed. */
+ paicrypt_root.mapptr = alloc_percpu(struct paicrypt_mapptr);
+ if (!paicrypt_root.mapptr)
+ return -ENOMEM;
+ refcount_set(&paicrypt_root.refcnt, 1);
+ }
+ return 0;
+}
/* Release the PMU if event is the last perf event */
static DEFINE_MUTEX(pai_reserve_mutex);
@@ -51,9 +86,10 @@ static DEFINE_MUTEX(pai_reserve_mutex);
*/
static void paicrypt_event_destroy(struct perf_event *event)
{
- struct paicrypt_map *cpump = per_cpu_ptr(&paicrypt_map, event->cpu);
+ struct paicrypt_mapptr *mp = per_cpu_ptr(paicrypt_root.mapptr,
+ event->cpu);
+ struct paicrypt_map *cpump = mp->mapptr;
- cpump->event = NULL;
static_branch_dec(&pai_key);
mutex_lock(&pai_reserve_mutex);
debug_sprintf_event(cfm_dbg, 5, "%s event %#llx cpu %d users %d"
@@ -66,19 +102,19 @@ static void paicrypt_event_destroy(struct perf_event *event)
__func__, (unsigned long)cpump->page,
cpump->save);
free_page((unsigned long)cpump->page);
- cpump->page = NULL;
kvfree(cpump->save);
- cpump->save = NULL;
- cpump->mode = PAI_MODE_NONE;
+ kfree(cpump);
+ mp->mapptr = NULL;
}
+ paicrypt_root_free();
mutex_unlock(&pai_reserve_mutex);
}
-static u64 paicrypt_getctr(struct paicrypt_map *cpump, int nr, bool kernel)
+static u64 paicrypt_getctr(unsigned long *page, int nr, bool kernel)
{
if (kernel)
nr += PAI_CRYPTO_MAXCTR;
- return cpump->page[nr];
+ return page[nr];
}
/* Read the counter values. Return value from location in CMP. For event
@@ -86,18 +122,19 @@ static u64 paicrypt_getctr(struct paicrypt_map *cpump, int nr, bool kernel)
*/
static u64 paicrypt_getdata(struct perf_event *event, bool kernel)
{
- struct paicrypt_map *cpump = this_cpu_ptr(&paicrypt_map);
+ struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
+ struct paicrypt_map *cpump = mp->mapptr;
u64 sum = 0;
int i;
if (event->attr.config != PAI_CRYPTO_BASE) {
- return paicrypt_getctr(cpump,
+ return paicrypt_getctr(cpump->page,
event->attr.config - PAI_CRYPTO_BASE,
kernel);
}
for (i = 1; i <= paicrypt_cnt; i++) {
- u64 val = paicrypt_getctr(cpump, i, kernel);
+ u64 val = paicrypt_getctr(cpump->page, i, kernel);
if (!val)
continue;
@@ -132,11 +169,31 @@ static u64 paicrypt_getall(struct perf_event *event)
*
* Allocate the memory for the event.
*/
-static int paicrypt_busy(struct perf_event_attr *a, struct paicrypt_map *cpump)
+static struct paicrypt_map *paicrypt_busy(struct perf_event *event)
{
- int rc = 0;
+ struct perf_event_attr *a = &event->attr;
+ struct paicrypt_map *cpump = NULL;
+ struct paicrypt_mapptr *mp;
+ int rc;
mutex_lock(&pai_reserve_mutex);
+
+ /* Allocate root node */
+ rc = paicrypt_root_alloc();
+ if (rc)
+ goto unlock;
+
+ /* Allocate node for this event */
+ mp = per_cpu_ptr(paicrypt_root.mapptr, event->cpu);
+ cpump = mp->mapptr;
+ if (!cpump) { /* Paicrypt_map allocated? */
+ cpump = kzalloc(sizeof(*cpump), GFP_KERNEL);
+ if (!cpump) {
+ rc = -ENOMEM;
+ goto free_root;
+ }
+ }
+
if (a->sample_period) { /* Sampling requested */
if (cpump->mode != PAI_MODE_NONE)
rc = -EBUSY; /* ... sampling/counting active */
@@ -144,8 +201,15 @@ static int paicrypt_busy(struct perf_event_attr *a, struct paicrypt_map *cpump)
if (cpump->mode == PAI_MODE_SAMPLING)
rc = -EBUSY; /* ... and sampling active */
}
+ /*
+ * This error case triggers when there is a conflict:
+ * Either sampling requested and counting already active, or visa
+ * versa. Therefore the struct paicrypto_map for this CPU is
+ * needed or the error could not have occurred. Only adjust root
+ * node refcount.
+ */
if (rc)
- goto unlock;
+ goto free_root;
/* Allocate memory for counter page and counter extraction.
* Only the first counting event has to allocate a page.
@@ -158,30 +222,36 @@ static int paicrypt_busy(struct perf_event_attr *a, struct paicrypt_map *cpump)
rc = -ENOMEM;
cpump->page = (unsigned long *)get_zeroed_page(GFP_KERNEL);
if (!cpump->page)
- goto unlock;
+ goto free_paicrypt_map;
cpump->save = kvmalloc_array(paicrypt_cnt + 1,
sizeof(struct pai_userdata), GFP_KERNEL);
if (!cpump->save) {
free_page((unsigned long)cpump->page);
cpump->page = NULL;
- goto unlock;
+ goto free_paicrypt_map;
}
+
+ /* Set mode and reference count */
rc = 0;
refcount_set(&cpump->refcnt, 1);
-
-unlock:
- /* If rc is non-zero, do not set mode and reference count */
- if (!rc) {
- cpump->mode = a->sample_period ? PAI_MODE_SAMPLING
- : PAI_MODE_COUNTING;
- }
+ cpump->mode = a->sample_period ? PAI_MODE_SAMPLING : PAI_MODE_COUNTING;
+ mp->mapptr = cpump;
debug_sprintf_event(cfm_dbg, 5, "%s sample_period %#llx users %d"
" mode %d refcnt %u page %#lx save %p rc %d\n",
__func__, a->sample_period, cpump->active_events,
cpump->mode, refcount_read(&cpump->refcnt),
(unsigned long)cpump->page, cpump->save, rc);
+ goto unlock;
+
+free_paicrypt_map:
+ kfree(cpump);
+ mp->mapptr = NULL;
+free_root:
+ paicrypt_root_free();
+
+unlock:
mutex_unlock(&pai_reserve_mutex);
- return rc;
+ return rc ? ERR_PTR(rc) : cpump;
}
/* Might be called on different CPU than the one the event is intended for. */
@@ -189,7 +259,6 @@ static int paicrypt_event_init(struct perf_event *event)
{
struct perf_event_attr *a = &event->attr;
struct paicrypt_map *cpump;
- int rc;
/* PAI crypto PMU registered as PERF_TYPE_RAW, check event type */
if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type)
@@ -199,24 +268,16 @@ static int paicrypt_event_init(struct perf_event *event)
a->config > PAI_CRYPTO_BASE + paicrypt_cnt)
return -EINVAL;
/* Allow only CPU wide operation, no process context for now. */
- if (event->hw.target || event->cpu == -1)
+ if ((event->attach_state & PERF_ATTACH_TASK) || event->cpu == -1)
return -ENOENT;
/* Allow only CRYPTO_ALL for sampling. */
if (a->sample_period && a->config != PAI_CRYPTO_BASE)
return -EINVAL;
- cpump = per_cpu_ptr(&paicrypt_map, event->cpu);
- rc = paicrypt_busy(a, cpump);
- if (rc)
- return rc;
+ cpump = paicrypt_busy(event);
+ if (IS_ERR(cpump))
+ return PTR_ERR(cpump);
- /* Event initialization sets last_tag to 0. When later on the events
- * are deleted and re-added, do not reset the event count value to zero.
- * Events are added, deleted and re-added when 2 or more events
- * are active at the same time.
- */
- event->hw.last_tag = 0;
- cpump->event = event;
event->destroy = paicrypt_event_destroy;
if (a->sample_period) {
@@ -250,52 +311,62 @@ static void paicrypt_start(struct perf_event *event, int flags)
{
u64 sum;
- if (!event->hw.last_tag) {
- event->hw.last_tag = 1;
- sum = paicrypt_getall(event); /* Get current value */
- local64_set(&event->count, 0);
- local64_set(&event->hw.prev_count, sum);
+ /* Event initialization sets last_tag to 0. When later on the events
+ * are deleted and re-added, do not reset the event count value to zero.
+ * Events are added, deleted and re-added when 2 or more events
+ * are active at the same time.
+ */
+ if (!event->attr.sample_period) { /* Counting */
+ if (!event->hw.last_tag) {
+ event->hw.last_tag = 1;
+ sum = paicrypt_getall(event); /* Get current value */
+ local64_set(&event->hw.prev_count, sum);
+ }
+ } else { /* Sampling */
+ perf_sched_cb_inc(event->pmu);
}
}
static int paicrypt_add(struct perf_event *event, int flags)
{
- struct paicrypt_map *cpump = this_cpu_ptr(&paicrypt_map);
+ struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
+ struct paicrypt_map *cpump = mp->mapptr;
unsigned long ccd;
if (++cpump->active_events == 1) {
ccd = virt_to_phys(cpump->page) | PAI_CRYPTO_KERNEL_OFFSET;
WRITE_ONCE(S390_lowcore.ccd, ccd);
- __ctl_set_bit(0, 50);
+ local_ctl_set_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT);
}
cpump->event = event;
- if (flags & PERF_EF_START && !event->attr.sample_period) {
- /* Only counting needs initial counter value */
+ if (flags & PERF_EF_START)
paicrypt_start(event, PERF_EF_RELOAD);
- }
event->hw.state = 0;
- if (event->attr.sample_period)
- perf_sched_cb_inc(event->pmu);
return 0;
}
static void paicrypt_stop(struct perf_event *event, int flags)
{
- paicrypt_read(event);
+ struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
+ struct paicrypt_map *cpump = mp->mapptr;
+
+ if (!event->attr.sample_period) { /* Counting */
+ paicrypt_read(event);
+ } else { /* Sampling */
+ perf_sched_cb_dec(event->pmu);
+ cpump->event = NULL;
+ }
event->hw.state = PERF_HES_STOPPED;
}
static void paicrypt_del(struct perf_event *event, int flags)
{
- struct paicrypt_map *cpump = this_cpu_ptr(&paicrypt_map);
+ struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
+ struct paicrypt_map *cpump = mp->mapptr;
- if (event->attr.sample_period)
- perf_sched_cb_dec(event->pmu);
- if (!event->attr.sample_period)
- /* Only counting needs to read counter */
- paicrypt_stop(event, PERF_EF_UPDATE);
+ paicrypt_stop(event, PERF_EF_UPDATE);
if (--cpump->active_events == 0) {
- __ctl_clear_bit(0, 50);
+ local_ctl_clear_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT);
WRITE_ONCE(S390_lowcore.ccd, 0);
}
}
@@ -305,8 +376,7 @@ static void paicrypt_del(struct perf_event *event, int flags)
* 2 bytes: Number of counter
* 8 bytes: Value of counter
*/
-static size_t paicrypt_copy(struct pai_userdata *userdata,
- struct paicrypt_map *cpump,
+static size_t paicrypt_copy(struct pai_userdata *userdata, unsigned long *page,
bool exclude_user, bool exclude_kernel)
{
int i, outidx = 0;
@@ -315,9 +385,9 @@ static size_t paicrypt_copy(struct pai_userdata *userdata,
u64 val = 0;
if (!exclude_kernel)
- val += paicrypt_getctr(cpump, i, true);
+ val += paicrypt_getctr(page, i, true);
if (!exclude_user)
- val += paicrypt_getctr(cpump, i, false);
+ val += paicrypt_getctr(page, i, false);
if (val) {
userdata[outidx].num = i;
userdata[outidx].value = val;
@@ -327,24 +397,14 @@ static size_t paicrypt_copy(struct pai_userdata *userdata,
return outidx * sizeof(struct pai_userdata);
}
-static int paicrypt_push_sample(void)
+static int paicrypt_push_sample(size_t rawsize, struct paicrypt_map *cpump,
+ struct perf_event *event)
{
- struct paicrypt_map *cpump = this_cpu_ptr(&paicrypt_map);
- struct perf_event *event = cpump->event;
struct perf_sample_data data;
struct perf_raw_record raw;
struct pt_regs regs;
- size_t rawsize;
int overflow;
- if (!cpump->event) /* No event active */
- return 0;
- rawsize = paicrypt_copy(cpump->save, cpump,
- cpump->event->attr.exclude_user,
- cpump->event->attr.exclude_kernel);
- if (!rawsize) /* No incremented counters */
- return 0;
-
/* Setup perf sample */
memset(&regs, 0, sizeof(regs));
memset(&raw, 0, sizeof(raw));
@@ -375,6 +435,25 @@ static int paicrypt_push_sample(void)
return overflow;
}
+/* Check if there is data to be saved on schedule out of a task. */
+static int paicrypt_have_sample(void)
+{
+ struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
+ struct paicrypt_map *cpump = mp->mapptr;
+ struct perf_event *event = cpump->event;
+ size_t rawsize;
+ int rc = 0;
+
+ if (!event) /* No event active */
+ return 0;
+ rawsize = paicrypt_copy(cpump->save, cpump->page,
+ cpump->event->attr.exclude_user,
+ cpump->event->attr.exclude_kernel);
+ if (rawsize) /* No incremented counters */
+ rc = paicrypt_push_sample(rawsize, cpump, event);
+ return rc;
+}
+
/* Called on schedule-in and schedule-out. No access to event structure,
* but for sampling only event CRYPTO_ALL is allowed.
*/
@@ -384,7 +463,7 @@ static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sch
* results on schedule_out and if page was dirty, clear values.
*/
if (!sched_in)
- paicrypt_push_sample();
+ paicrypt_have_sample();
}
/* Attribute definitions for paicrypt interface. As with other CPU
@@ -646,7 +725,7 @@ static int __init attr_event_init(void)
for (i = 0; i < ARRAY_SIZE(paicrypt_ctrnames); i++) {
ret = attr_event_init_one(attrs, i);
if (ret) {
- attr_event_free(attrs, i - 1);
+ attr_event_free(attrs, i);
return ret;
}
}
diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c
index c57c1a203..db37c38dd 100644
--- a/arch/s390/kernel/perf_pai_ext.c
+++ b/arch/s390/kernel/perf_pai_ext.c
@@ -17,8 +17,7 @@
#include <linux/export.h>
#include <linux/io.h>
#include <linux/perf_event.h>
-
-#include <asm/ctl_reg.h>
+#include <asm/ctlreg.h>
#include <asm/pai.h>
#include <asm/debug.h>
@@ -122,7 +121,6 @@ static void paiext_event_destroy(struct perf_event *event)
struct paiext_map *cpump = mp->mapptr;
mutex_lock(&paiext_reserve_mutex);
- cpump->event = NULL;
if (refcount_dec_and_test(&cpump->refcnt)) /* Last reference gone */
paiext_free(mp);
paiext_root_free();
@@ -249,7 +247,7 @@ static int paiext_event_init(struct perf_event *event)
if (rc)
return rc;
/* Allow only CPU wide operation, no process context for now. */
- if (event->hw.target || event->cpu == -1)
+ if ((event->attach_state & PERF_ATTACH_TASK) || event->cpu == -1)
return -ENOENT;
/* Allow only event NNPA_ALL for sampling. */
if (a->sample_period && a->config != PAI_NNPA_BASE)
@@ -261,7 +259,6 @@ static int paiext_event_init(struct perf_event *event)
rc = paiext_alloc(a, event);
if (rc)
return rc;
- event->hw.last_tag = 0;
event->destroy = paiext_event_destroy;
if (a->sample_period) {
@@ -278,9 +275,9 @@ static int paiext_event_init(struct perf_event *event)
return 0;
}
-static u64 paiext_getctr(struct paiext_map *cpump, int nr)
+static u64 paiext_getctr(unsigned long *area, int nr)
{
- return cpump->area[nr];
+ return area[nr];
}
/* Read the counter values. Return value from location in buffer. For event
@@ -294,10 +291,11 @@ static u64 paiext_getdata(struct perf_event *event)
int i;
if (event->attr.config != PAI_NNPA_BASE)
- return paiext_getctr(cpump, event->attr.config - PAI_NNPA_BASE);
+ return paiext_getctr(cpump->area,
+ event->attr.config - PAI_NNPA_BASE);
for (i = 1; i <= paiext_cnt; i++)
- sum += paiext_getctr(cpump, i);
+ sum += paiext_getctr(cpump->area, i);
return sum;
}
@@ -322,12 +320,15 @@ static void paiext_start(struct perf_event *event, int flags)
{
u64 sum;
- if (event->hw.last_tag)
- return;
- event->hw.last_tag = 1;
- sum = paiext_getall(event); /* Get current value */
- local64_set(&event->hw.prev_count, sum);
- local64_set(&event->count, 0);
+ if (!event->attr.sample_period) { /* Counting */
+ if (!event->hw.last_tag) {
+ event->hw.last_tag = 1;
+ sum = paiext_getall(event); /* Get current value */
+ local64_set(&event->hw.prev_count, sum);
+ }
+ } else { /* Sampling */
+ perf_sched_cb_inc(event->pmu);
+ }
}
static int paiext_add(struct perf_event *event, int flags)
@@ -340,25 +341,28 @@ static int paiext_add(struct perf_event *event, int flags)
S390_lowcore.aicd = virt_to_phys(cpump->paiext_cb);
pcb->acc = virt_to_phys(cpump->area) | 0x1;
/* Enable CPU instruction lookup for PAIE1 control block */
- __ctl_set_bit(0, 49);
+ local_ctl_set_bit(0, CR0_PAI_EXTENSION_BIT);
debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n",
__func__, S390_lowcore.aicd, pcb->acc);
}
- if (flags & PERF_EF_START && !event->attr.sample_period) {
- /* Only counting needs initial counter value */
+ cpump->event = event;
+ if (flags & PERF_EF_START)
paiext_start(event, PERF_EF_RELOAD);
- }
event->hw.state = 0;
- if (event->attr.sample_period) {
- cpump->event = event;
- perf_sched_cb_inc(event->pmu);
- }
return 0;
}
static void paiext_stop(struct perf_event *event, int flags)
{
- paiext_read(event);
+ struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+ struct paiext_map *cpump = mp->mapptr;
+
+ if (!event->attr.sample_period) { /* Counting */
+ paiext_read(event);
+ } else { /* Sampling */
+ perf_sched_cb_dec(event->pmu);
+ cpump->event = NULL;
+ }
event->hw.state = PERF_HES_STOPPED;
}
@@ -368,15 +372,10 @@ static void paiext_del(struct perf_event *event, int flags)
struct paiext_map *cpump = mp->mapptr;
struct paiext_cb *pcb = cpump->paiext_cb;
- if (event->attr.sample_period)
- perf_sched_cb_dec(event->pmu);
- if (!event->attr.sample_period) {
- /* Only counting needs to read counter */
- paiext_stop(event, PERF_EF_UPDATE);
- }
+ paiext_stop(event, PERF_EF_UPDATE);
if (--cpump->active_events == 0) {
/* Disable CPU instruction lookup for PAIE1 control block */
- __ctl_clear_bit(0, 49);
+ local_ctl_clear_bit(0, CR0_PAI_EXTENSION_BIT);
pcb->acc = 0;
S390_lowcore.aicd = 0;
debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n",
@@ -389,13 +388,12 @@ static void paiext_del(struct perf_event *event, int flags)
* 2 bytes: Number of counter
* 8 bytes: Value of counter
*/
-static size_t paiext_copy(struct paiext_map *cpump)
+static size_t paiext_copy(struct pai_userdata *userdata, unsigned long *area)
{
- struct pai_userdata *userdata = cpump->save;
int i, outidx = 0;
for (i = 1; i <= paiext_cnt; i++) {
- u64 val = paiext_getctr(cpump, i);
+ u64 val = paiext_getctr(area, i);
if (val) {
userdata[outidx].num = i;
@@ -421,21 +419,14 @@ static size_t paiext_copy(struct paiext_map *cpump)
* sched_task() callback. That callback is not active after paiext_del()
* returns and has deleted the event on that CPU.
*/
-static int paiext_push_sample(void)
+static int paiext_push_sample(size_t rawsize, struct paiext_map *cpump,
+ struct perf_event *event)
{
- struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
- struct paiext_map *cpump = mp->mapptr;
- struct perf_event *event = cpump->event;
struct perf_sample_data data;
struct perf_raw_record raw;
struct pt_regs regs;
- size_t rawsize;
int overflow;
- rawsize = paiext_copy(cpump);
- if (!rawsize) /* No incremented counters */
- return 0;
-
/* Setup perf sample */
memset(&regs, 0, sizeof(regs));
memset(&raw, 0, sizeof(raw));
@@ -464,6 +455,23 @@ static int paiext_push_sample(void)
return overflow;
}
+/* Check if there is data to be saved on schedule out of a task. */
+static int paiext_have_sample(void)
+{
+ struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+ struct paiext_map *cpump = mp->mapptr;
+ struct perf_event *event = cpump->event;
+ size_t rawsize;
+ int rc = 0;
+
+ if (!event)
+ return 0;
+ rawsize = paiext_copy(cpump->save, cpump->area);
+ if (rawsize) /* Incremented counters */
+ rc = paiext_push_sample(rawsize, cpump, event);
+ return rc;
+}
+
/* Called on schedule-in and schedule-out. No access to event structure,
* but for sampling only event NNPA_ALL is allowed.
*/
@@ -473,7 +481,7 @@ static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched
* results on schedule_out and if page was dirty, clear values.
*/
if (!sched_in)
- paiext_push_sample();
+ paiext_have_sample();
}
/* Attribute definitions for pai extension1 interface. As with other CPU
@@ -607,7 +615,7 @@ static int __init attr_event_init(void)
for (i = 0; i < ARRAY_SIZE(paiext_ctrnames); i++) {
ret = attr_event_init_one(attrs, i);
if (ret) {
- attr_event_free(attrs, i - 1);
+ attr_event_free(attrs, i);
return ret;
}
}
diff --git a/arch/s390/kernel/perf_regs.c b/arch/s390/kernel/perf_regs.c
index 6e9e5d5e9..3d93656bd 100644
--- a/arch/s390/kernel/perf_regs.c
+++ b/arch/s390/kernel/perf_regs.c
@@ -20,8 +20,10 @@ u64 perf_reg_value(struct pt_regs *regs, int idx)
return 0;
idx -= PERF_REG_S390_FP0;
- fp = MACHINE_HAS_VX ? *(freg_t *)(current->thread.fpu.vxrs + idx)
- : current->thread.fpu.fprs[idx];
+ if (cpu_has_vx())
+ fp = *(freg_t *)(current->thread.fpu.vxrs + idx);
+ else
+ fp = current->thread.fpu.fprs[idx];
return fp.ui;
}
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 258000417..4e3b36658 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -89,7 +89,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
*/
save_fpu_regs();
- memcpy(dst, src, arch_task_struct_size);
+ *dst = *src;
dst->thread.fpu.regs = dst->thread.fpu.fprs;
/*
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 0a999c822..65c1464ee 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -201,11 +201,8 @@ static int __init setup_hwcaps(void)
if (MACHINE_HAS_TE)
elf_hwcap |= HWCAP_TE;
- /*
- * Vector extension can be disabled with the "novx" parameter.
- * Use MACHINE_HAS_VX instead of facility bit 129.
- */
- if (MACHINE_HAS_VX) {
+ /* vector */
+ if (test_facility(129)) {
elf_hwcap |= HWCAP_VXRS;
if (test_facility(134))
elf_hwcap |= HWCAP_VXRS_BCD;
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index ea244a73e..f1897a8bb 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -30,6 +30,7 @@
#include <asm/switch_to.h>
#include <asm/runtime_instr.h>
#include <asm/facility.h>
+#include <asm/fpu/api.h>
#include "entry.h"
@@ -41,13 +42,20 @@ void update_cr_regs(struct task_struct *task)
{
struct pt_regs *regs = task_pt_regs(task);
struct thread_struct *thread = &task->thread;
- struct per_regs old, new;
union ctlreg0 cr0_old, cr0_new;
union ctlreg2 cr2_old, cr2_new;
int cr0_changed, cr2_changed;
-
- __ctl_store(cr0_old.val, 0, 0);
- __ctl_store(cr2_old.val, 2, 2);
+ union {
+ struct ctlreg regs[3];
+ struct {
+ struct ctlreg control;
+ struct ctlreg start;
+ struct ctlreg end;
+ };
+ } old, new;
+
+ local_ctl_store(0, &cr0_old.reg);
+ local_ctl_store(2, &cr2_old.reg);
cr0_new = cr0_old;
cr2_new = cr2_old;
/* Take care of the enable/disable of transactional execution. */
@@ -75,38 +83,38 @@ void update_cr_regs(struct task_struct *task)
cr0_changed = cr0_new.val != cr0_old.val;
cr2_changed = cr2_new.val != cr2_old.val;
if (cr0_changed)
- __ctl_load(cr0_new.val, 0, 0);
+ local_ctl_load(0, &cr0_new.reg);
if (cr2_changed)
- __ctl_load(cr2_new.val, 2, 2);
+ local_ctl_load(2, &cr2_new.reg);
/* Copy user specified PER registers */
- new.control = thread->per_user.control;
- new.start = thread->per_user.start;
- new.end = thread->per_user.end;
+ new.control.val = thread->per_user.control;
+ new.start.val = thread->per_user.start;
+ new.end.val = thread->per_user.end;
/* merge TIF_SINGLE_STEP into user specified PER registers. */
if (test_tsk_thread_flag(task, TIF_SINGLE_STEP) ||
test_tsk_thread_flag(task, TIF_UPROBE_SINGLESTEP)) {
if (test_tsk_thread_flag(task, TIF_BLOCK_STEP))
- new.control |= PER_EVENT_BRANCH;
+ new.control.val |= PER_EVENT_BRANCH;
else
- new.control |= PER_EVENT_IFETCH;
- new.control |= PER_CONTROL_SUSPENSION;
- new.control |= PER_EVENT_TRANSACTION_END;
+ new.control.val |= PER_EVENT_IFETCH;
+ new.control.val |= PER_CONTROL_SUSPENSION;
+ new.control.val |= PER_EVENT_TRANSACTION_END;
if (test_tsk_thread_flag(task, TIF_UPROBE_SINGLESTEP))
- new.control |= PER_EVENT_IFETCH;
- new.start = 0;
- new.end = -1UL;
+ new.control.val |= PER_EVENT_IFETCH;
+ new.start.val = 0;
+ new.end.val = -1UL;
}
/* Take care of the PER enablement bit in the PSW. */
- if (!(new.control & PER_EVENT_MASK)) {
+ if (!(new.control.val & PER_EVENT_MASK)) {
regs->psw.mask &= ~PSW_MASK_PER;
return;
}
regs->psw.mask |= PSW_MASK_PER;
- __ctl_store(old, 9, 11);
+ __local_ctl_store(9, 11, old.regs);
if (memcmp(&new, &old, sizeof(struct per_regs)) != 0)
- __ctl_load(new, 9, 11);
+ __local_ctl_load(9, 11, new.regs);
}
void user_enable_single_step(struct task_struct *task)
@@ -247,7 +255,7 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr)
* or the child->thread.fpu.vxrs array
*/
offset = addr - offsetof(struct user, regs.fp_regs.fprs);
- if (MACHINE_HAS_VX)
+ if (cpu_has_vx())
tmp = *(addr_t *)
((addr_t) child->thread.fpu.vxrs + 2*offset);
else
@@ -385,8 +393,7 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
/*
* floating point control reg. is in the thread structure
*/
- if ((unsigned int) data != 0 ||
- test_fp_ctl(data >> (BITS_PER_LONG - 32)))
+ if ((unsigned int)data != 0)
return -EINVAL;
child->thread.fpu.fpc = data >> (BITS_PER_LONG - 32);
@@ -396,7 +403,7 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
* or the child->thread.fpu.vxrs array
*/
offset = addr - offsetof(struct user, regs.fp_regs.fprs);
- if (MACHINE_HAS_VX)
+ if (cpu_has_vx())
*(addr_t *)((addr_t)
child->thread.fpu.vxrs + 2*offset) = data;
else
@@ -623,7 +630,7 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr)
* or the child->thread.fpu.vxrs array
*/
offset = addr - offsetof(struct compat_user, regs.fp_regs.fprs);
- if (MACHINE_HAS_VX)
+ if (cpu_has_vx())
tmp = *(__u32 *)
((addr_t) child->thread.fpu.vxrs + 2*offset);
else
@@ -741,8 +748,6 @@ static int __poke_user_compat(struct task_struct *child,
/*
* floating point control reg. is in the thread structure
*/
- if (test_fp_ctl(tmp))
- return -EINVAL;
child->thread.fpu.fpc = data;
} else if (addr < offsetof(struct compat_user, regs.fp_regs) + sizeof(s390_fp_regs)) {
@@ -751,7 +756,7 @@ static int __poke_user_compat(struct task_struct *child,
* or the child->thread.fpu.vxrs array
*/
offset = addr - offsetof(struct compat_user, regs.fp_regs.fprs);
- if (MACHINE_HAS_VX)
+ if (cpu_has_vx())
*(__u32 *)((addr_t)
child->thread.fpu.vxrs + 2*offset) = tmp;
else
@@ -907,19 +912,18 @@ static int s390_fpregs_set(struct task_struct *target,
if (target == current)
save_fpu_regs();
- if (MACHINE_HAS_VX)
+ if (cpu_has_vx())
convert_vx_to_fp(fprs, target->thread.fpu.vxrs);
else
memcpy(&fprs, target->thread.fpu.fprs, sizeof(fprs));
- /* If setting FPC, must validate it first. */
if (count > 0 && pos < offsetof(s390_fp_regs, fprs)) {
u32 ufpc[2] = { target->thread.fpu.fpc, 0 };
rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ufpc,
0, offsetof(s390_fp_regs, fprs));
if (rc)
return rc;
- if (ufpc[1] != 0 || test_fp_ctl(ufpc[0]))
+ if (ufpc[1] != 0)
return -EINVAL;
target->thread.fpu.fpc = ufpc[0];
}
@@ -930,7 +934,7 @@ static int s390_fpregs_set(struct task_struct *target,
if (rc)
return rc;
- if (MACHINE_HAS_VX)
+ if (cpu_has_vx())
convert_fp_to_vx(target->thread.fpu.vxrs, fprs);
else
memcpy(target->thread.fpu.fprs, &fprs, sizeof(fprs));
@@ -981,7 +985,7 @@ static int s390_vxrs_low_get(struct task_struct *target,
__u64 vxrs[__NUM_VXRS_LOW];
int i;
- if (!MACHINE_HAS_VX)
+ if (!cpu_has_vx())
return -ENODEV;
if (target == current)
save_fpu_regs();
@@ -998,7 +1002,7 @@ static int s390_vxrs_low_set(struct task_struct *target,
__u64 vxrs[__NUM_VXRS_LOW];
int i, rc;
- if (!MACHINE_HAS_VX)
+ if (!cpu_has_vx())
return -ENODEV;
if (target == current)
save_fpu_regs();
@@ -1018,7 +1022,7 @@ static int s390_vxrs_high_get(struct task_struct *target,
const struct user_regset *regset,
struct membuf to)
{
- if (!MACHINE_HAS_VX)
+ if (!cpu_has_vx())
return -ENODEV;
if (target == current)
save_fpu_regs();
@@ -1033,7 +1037,7 @@ static int s390_vxrs_high_set(struct task_struct *target,
{
int rc;
- if (!MACHINE_HAS_VX)
+ if (!cpu_has_vx())
return -ENODEV;
if (target == current)
save_fpu_regs();
@@ -1107,7 +1111,7 @@ static int s390_gs_cb_set(struct task_struct *target,
target->thread.gs_cb = data;
*target->thread.gs_cb = gs_cb;
if (target == current) {
- __ctl_set_bit(2, 4);
+ local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT);
restore_gs_cb(target->thread.gs_cb);
}
preempt_enable();
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index de6ad0fb2..d1f3b56e7 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -305,7 +305,7 @@ static void __init setup_zfcpdump(void)
return;
if (oldmem_data.start)
return;
- strcat(boot_command_line, " cio_ignore=all,!ipldev,!condev");
+ strlcat(boot_command_line, " cio_ignore=all,!ipldev,!condev", COMMAND_LINE_SIZE);
console_loglevel = 2;
}
#else
@@ -381,12 +381,6 @@ void stack_free(unsigned long stack)
#endif
}
-void __init __noreturn arch_call_rest_init(void)
-{
- smp_reinit_ipl_cpu();
- rest_init();
-}
-
static unsigned long __init stack_alloc_early(void)
{
unsigned long stack;
@@ -414,15 +408,15 @@ static void __init setup_lowcore(void)
lc->restart_psw.mask = PSW_KERNEL_BITS & ~PSW_MASK_DAT;
lc->restart_psw.addr = __pa(restart_int_handler);
- lc->external_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
+ lc->external_new_psw.mask = PSW_KERNEL_BITS;
lc->external_new_psw.addr = (unsigned long) ext_int_handler;
- lc->svc_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
+ lc->svc_new_psw.mask = PSW_KERNEL_BITS;
lc->svc_new_psw.addr = (unsigned long) system_call;
- lc->program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
+ lc->program_new_psw.mask = PSW_KERNEL_BITS;
lc->program_new_psw.addr = (unsigned long) pgm_check_handler;
lc->mcck_new_psw.mask = PSW_KERNEL_BITS;
lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler;
- lc->io_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
+ lc->io_new_psw.mask = PSW_KERNEL_BITS;
lc->io_new_psw.addr = (unsigned long) io_int_handler;
lc->clock_comparator = clock_comparator_max;
lc->current_task = (unsigned long)&init_task;
@@ -455,7 +449,6 @@ static void __init setup_lowcore(void)
lc->restart_fn = (unsigned long) do_restart;
lc->restart_data = 0;
lc->restart_source = -1U;
- __ctl_store(lc->cregs_save_area, 0, 15);
lc->spinlock_lockval = arch_spin_lockval(0);
lc->spinlock_index = 0;
arch_spin_lock_setup(0);
@@ -465,6 +458,7 @@ static void __init setup_lowcore(void)
lc->kernel_asce = S390_lowcore.kernel_asce;
lc->user_asce = S390_lowcore.user_asce;
+ system_ctlreg_init_save_area(lc);
abs_lc = get_abs_lowcore();
abs_lc->restart_stack = lc->restart_stack;
abs_lc->restart_fn = lc->restart_fn;
@@ -472,7 +466,6 @@ static void __init setup_lowcore(void)
abs_lc->restart_source = lc->restart_source;
abs_lc->restart_psw = lc->restart_psw;
abs_lc->restart_flags = RESTART_FLAG_CTLREGS;
- memcpy(abs_lc->cregs_save_area, lc->cregs_save_area, sizeof(abs_lc->cregs_save_area));
abs_lc->program_new_psw = lc->program_new_psw;
abs_lc->mcesad = lc->mcesad;
put_abs_lowcore(abs_lc);
@@ -625,8 +618,8 @@ static void __init reserve_crashkernel(void)
phys_addr_t low, high;
int rc;
- rc = parse_crashkernel(boot_command_line, ident_map_size, &crash_size,
- &crash_base);
+ rc = parse_crashkernel(boot_command_line, ident_map_size,
+ &crash_size, &crash_base, NULL, NULL);
crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN);
crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN);
@@ -797,15 +790,15 @@ static void __init setup_cr(void)
__ctl_duct[4] = (unsigned long)__ctl_duald;
/* Update control registers CR2, CR5 and CR15 */
- __ctl_store(cr2.val, 2, 2);
- __ctl_store(cr5.val, 5, 5);
- __ctl_store(cr15.val, 15, 15);
+ local_ctl_store(2, &cr2.reg);
+ local_ctl_store(5, &cr5.reg);
+ local_ctl_store(15, &cr15.reg);
cr2.ducto = (unsigned long)__ctl_duct >> 6;
cr5.pasteo = (unsigned long)__ctl_duct >> 6;
cr15.lsea = (unsigned long)__ctl_linkage_stack >> 3;
- __ctl_load(cr2.val, 2, 2);
- __ctl_load(cr5.val, 5, 5);
- __ctl_load(cr15.val, 15, 15);
+ system_ctl_load(2, &cr2.reg);
+ system_ctl_load(5, &cr5.reg);
+ system_ctl_load(15, &cr15.reg);
}
/*
@@ -827,22 +820,6 @@ static void __init setup_randomness(void)
}
/*
- * Find the correct size for the task_struct. This depends on
- * the size of the struct fpu at the end of the thread_struct
- * which is embedded in the task_struct.
- */
-static void __init setup_task_size(void)
-{
- int task_size = sizeof(struct task_struct);
-
- if (!MACHINE_HAS_VX) {
- task_size -= sizeof(__vector128) * __NUM_VXRS;
- task_size += sizeof(freg_t) * __NUM_FPRS;
- }
- arch_task_struct_size = task_size;
-}
-
-/*
* Issue diagnose 318 to set the control program name and
* version codes.
*/
@@ -934,7 +911,6 @@ void __init setup_arch(char **cmdline_p)
os_info_init();
setup_ipl();
- setup_task_size();
setup_control_program_code();
/* Do some memory reservations *before* memory is added to memblock */
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index d63557d38..43e9661cd 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -12,6 +12,7 @@
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
+#include <linux/rseq.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/kernel.h>
@@ -149,10 +150,6 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW_MASK_RI))
return -EINVAL;
- /* Test the floating-point-control word. */
- if (test_fp_ctl(user_sregs.fpregs.fpc))
- return -EINVAL;
-
/* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */
regs->psw.mask = (regs->psw.mask & ~(PSW_MASK_USER | PSW_MASK_RI)) |
(user_sregs.regs.psw.mask & (PSW_MASK_USER | PSW_MASK_RI));
@@ -182,7 +179,7 @@ static int save_sigregs_ext(struct pt_regs *regs,
int i;
/* Save vector registers to signal stack */
- if (MACHINE_HAS_VX) {
+ if (cpu_has_vx()) {
for (i = 0; i < __NUM_VXRS_LOW; i++)
vxrs[i] = current->thread.fpu.vxrs[i].low;
if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
@@ -202,7 +199,7 @@ static int restore_sigregs_ext(struct pt_regs *regs,
int i;
/* Restore vector registers from signal stack */
- if (MACHINE_HAS_VX) {
+ if (cpu_has_vx()) {
if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
sizeof(sregs_ext->vxrs_low)) ||
__copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW,
@@ -300,7 +297,7 @@ static int setup_frame(int sig, struct k_sigaction *ka,
* included in the signal frame on a 31-bit system.
*/
frame_size = sizeof(*frame) - sizeof(frame->sregs_ext);
- if (MACHINE_HAS_VX)
+ if (cpu_has_vx())
frame_size += sizeof(frame->sregs_ext);
frame = get_sigframe(ka, regs, frame_size);
if (frame == (void __user *) -1UL)
@@ -377,7 +374,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
* included in the signal frame on a 31-bit system.
*/
uc_flags = 0;
- if (MACHINE_HAS_VX) {
+ if (cpu_has_vx()) {
frame_size += sizeof(_sigregs_ext);
uc_flags |= UC_VXRS;
}
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index a4edb7ea6..c39d9f0d4 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -37,6 +37,7 @@
#include <linux/crash_dump.h>
#include <linux/kprobes.h>
#include <asm/asm-offsets.h>
+#include <asm/ctlreg.h>
#include <asm/pfault.h>
#include <asm/diag.h>
#include <asm/switch_to.h>
@@ -567,54 +568,6 @@ void arch_irq_work_raise(void)
}
#endif
-/*
- * parameter area for the set/clear control bit callbacks
- */
-struct ec_creg_mask_parms {
- unsigned long orval;
- unsigned long andval;
- int cr;
-};
-
-/*
- * callback for setting/clearing control bits
- */
-static void smp_ctl_bit_callback(void *info)
-{
- struct ec_creg_mask_parms *pp = info;
- unsigned long cregs[16];
-
- __ctl_store(cregs, 0, 15);
- cregs[pp->cr] = (cregs[pp->cr] & pp->andval) | pp->orval;
- __ctl_load(cregs, 0, 15);
-}
-
-static DEFINE_SPINLOCK(ctl_lock);
-
-void smp_ctl_set_clear_bit(int cr, int bit, bool set)
-{
- struct ec_creg_mask_parms parms = { .cr = cr, };
- struct lowcore *abs_lc;
- u64 ctlreg;
-
- if (set) {
- parms.orval = 1UL << bit;
- parms.andval = -1UL;
- } else {
- parms.orval = 0;
- parms.andval = ~(1UL << bit);
- }
- spin_lock(&ctl_lock);
- abs_lc = get_abs_lowcore();
- ctlreg = abs_lc->cregs_save_area[cr];
- ctlreg = (ctlreg & parms.andval) | parms.orval;
- abs_lc->cregs_save_area[cr] = ctlreg;
- put_abs_lowcore(abs_lc);
- on_each_cpu(smp_ctl_bit_callback, &parms, 1);
- spin_unlock(&ctl_lock);
-}
-EXPORT_SYMBOL(smp_ctl_set_clear_bit);
-
#ifdef CONFIG_CRASH_DUMP
int smp_store_status(int cpu)
@@ -629,7 +582,7 @@ int smp_store_status(int cpu)
if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS,
pa) != SIGP_CC_ORDER_CODE_ACCEPTED)
return -EIO;
- if (!MACHINE_HAS_VX && !MACHINE_HAS_GS)
+ if (!cpu_has_vx() && !MACHINE_HAS_GS)
return 0;
pa = lc->mcesad & MCESA_ORIGIN_MASK;
if (MACHINE_HAS_GS)
@@ -685,7 +638,7 @@ void __init smp_save_dump_ipl_cpu(void)
copy_oldmem_kernel(regs, __LC_FPREGS_SAVE_AREA, 512);
save_area_add_regs(sa, regs);
memblock_free(regs, 512);
- if (MACHINE_HAS_VX)
+ if (cpu_has_vx())
save_area_add_vxrs(sa, boot_cpu_vector_save_area);
}
@@ -718,7 +671,7 @@ void __init smp_save_dump_secondary_cpus(void)
panic("could not allocate memory for save area\n");
__pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, __pa(page));
save_area_add_regs(sa, page);
- if (MACHINE_HAS_VX) {
+ if (cpu_has_vx()) {
__pcpu_sigp_relax(addr, SIGP_STORE_ADDITIONAL_STATUS, __pa(page));
save_area_add_vxrs(sa, page);
}
@@ -898,7 +851,7 @@ static void smp_start_secondary(void *cpuvoid)
S390_lowcore.restart_flags = 0;
restore_access_regs(S390_lowcore.access_regs_save_area);
cpu_init();
- rcu_cpu_starting(cpu);
+ rcutree_report_cpu_starting(cpu);
init_cpu_timer();
vtime_init();
vdso_getcpu_init();
@@ -935,14 +888,14 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
* Make sure global control register contents do not change
* until new CPU has initialized control registers.
*/
- spin_lock(&ctl_lock);
+ system_ctlreg_lock();
pcpu_prepare_secondary(pcpu, cpu);
pcpu_attach_task(pcpu, tidle);
pcpu_start_fn(pcpu, smp_start_secondary, NULL);
/* Wait until cpu puts itself in the online & active maps */
while (!cpu_online(cpu))
cpu_relax();
- spin_unlock(&ctl_lock);
+ system_ctlreg_unlock();
return 0;
}
@@ -957,7 +910,7 @@ early_param("possible_cpus", _setup_possible_cpus);
int __cpu_disable(void)
{
- unsigned long cregs[16];
+ struct ctlreg cregs[16];
int cpu;
/* Handle possible pending IPIs */
@@ -969,11 +922,11 @@ int __cpu_disable(void)
/* Disable pseudo page faults on this cpu. */
pfault_fini();
/* Disable interrupt sources via control register. */
- __ctl_store(cregs, 0, 15);
- cregs[0] &= ~0x0000ee70UL; /* disable all external interrupts */
- cregs[6] &= ~0xff000000UL; /* disable all I/O interrupts */
- cregs[14] &= ~0x1f000000UL; /* disable most machine checks */
- __ctl_load(cregs, 0, 15);
+ __local_ctl_store(0, 15, cregs);
+ cregs[0].val &= ~0x0000ee70UL; /* disable all external interrupts */
+ cregs[6].val &= ~0xff000000UL; /* disable all I/O interrupts */
+ cregs[14].val &= ~0x1f000000UL; /* disable most machine checks */
+ __local_ctl_load(0, 15, cregs);
clear_cpu_flag(CIF_NOHZ_DELAY);
return 0;
}
@@ -1013,12 +966,12 @@ void __init smp_fill_possible_mask(void)
void __init smp_prepare_cpus(unsigned int max_cpus)
{
- /* request the 0x1201 emergency signal external interrupt */
if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt))
panic("Couldn't request external interrupt 0x1201");
- /* request the 0x1202 external call external interrupt */
+ system_ctl_set_bit(0, 14);
if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt))
panic("Couldn't request external interrupt 0x1202");
+ system_ctl_set_bit(0, 13);
}
void __init smp_prepare_boot_cpu(void)
@@ -1076,11 +1029,9 @@ static ssize_t cpu_configure_store(struct device *dev,
cpus_read_lock();
mutex_lock(&smp_cpu_state_mutex);
rc = -EBUSY;
- /* disallow configuration changes of online cpus and cpu 0 */
+ /* disallow configuration changes of online cpus */
cpu = dev->id;
cpu = smp_get_base_cpu(cpu);
- if (cpu == 0)
- goto out;
for (i = 0; i <= smp_cpu_mtid; i++)
if (cpu_online(cpu + i))
goto out;
@@ -1180,7 +1131,7 @@ static int smp_add_present_cpu(int cpu)
return -ENOMEM;
per_cpu(cpu_device, cpu) = c;
s = &c->dev;
- c->hotpluggable = 1;
+ c->hotpluggable = !!cpu;
rc = register_cpu(c, cpu);
if (rc)
goto out;
@@ -1258,60 +1209,3 @@ out:
return rc;
}
subsys_initcall(s390_smp_init);
-
-static __always_inline void set_new_lowcore(struct lowcore *lc)
-{
- union register_pair dst, src;
- u32 pfx;
-
- src.even = (unsigned long) &S390_lowcore;
- src.odd = sizeof(S390_lowcore);
- dst.even = (unsigned long) lc;
- dst.odd = sizeof(*lc);
- pfx = __pa(lc);
-
- asm volatile(
- " mvcl %[dst],%[src]\n"
- " spx %[pfx]\n"
- : [dst] "+&d" (dst.pair), [src] "+&d" (src.pair)
- : [pfx] "Q" (pfx)
- : "memory", "cc");
-}
-
-int __init smp_reinit_ipl_cpu(void)
-{
- unsigned long async_stack, nodat_stack, mcck_stack;
- struct lowcore *lc, *lc_ipl;
- unsigned long flags, cr0;
- u64 mcesad;
-
- lc_ipl = lowcore_ptr[0];
- lc = (struct lowcore *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
- nodat_stack = __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
- async_stack = stack_alloc();
- mcck_stack = stack_alloc();
- if (!lc || !nodat_stack || !async_stack || !mcck_stack || nmi_alloc_mcesa(&mcesad))
- panic("Couldn't allocate memory");
-
- local_irq_save(flags);
- local_mcck_disable();
- set_new_lowcore(lc);
- S390_lowcore.nodat_stack = nodat_stack + STACK_INIT_OFFSET;
- S390_lowcore.async_stack = async_stack + STACK_INIT_OFFSET;
- S390_lowcore.mcck_stack = mcck_stack + STACK_INIT_OFFSET;
- __ctl_store(cr0, 0, 0);
- __ctl_clear_bit(0, 28); /* disable lowcore protection */
- S390_lowcore.mcesad = mcesad;
- __ctl_load(cr0, 0, 0);
- if (abs_lowcore_map(0, lc, false))
- panic("Couldn't remap absolute lowcore");
- lowcore_ptr[0] = lc;
- local_mcck_enable();
- local_irq_restore(flags);
-
- memblock_free_late(__pa(lc_ipl->mcck_stack - STACK_INIT_OFFSET), THREAD_SIZE);
- memblock_free_late(__pa(lc_ipl->async_stack - STACK_INIT_OFFSET), THREAD_SIZE);
- memblock_free_late(__pa(lc_ipl->nodat_stack - STACK_INIT_OFFSET), THREAD_SIZE);
- memblock_free_late(__pa(lc_ipl), sizeof(*lc_ipl));
- return 0;
-}
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index 078701013..94f440e38 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -6,9 +6,12 @@
*/
#include <linux/stacktrace.h>
+#include <linux/uaccess.h>
+#include <linux/compat.h>
#include <asm/stacktrace.h>
#include <asm/unwind.h>
#include <asm/kprobes.h>
+#include <asm/ptrace.h>
void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
struct task_struct *task, struct pt_regs *regs)
@@ -58,3 +61,43 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
return -EINVAL;
return 0;
}
+
+void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
+ const struct pt_regs *regs)
+{
+ struct stack_frame_user __user *sf;
+ unsigned long ip, sp;
+ bool first = true;
+
+ if (is_compat_task())
+ return;
+ if (!consume_entry(cookie, instruction_pointer(regs)))
+ return;
+ sf = (void __user *)user_stack_pointer(regs);
+ pagefault_disable();
+ while (1) {
+ if (__get_user(sp, &sf->back_chain))
+ break;
+ if (__get_user(ip, &sf->gprs[8]))
+ break;
+ if (ip & 0x1) {
+ /*
+ * If the instruction address is invalid, and this
+ * is the first stack frame, assume r14 has not
+ * been written to the stack yet. Otherwise exit.
+ */
+ if (first && !(regs->gprs[14] & 0x1))
+ ip = regs->gprs[14];
+ else
+ break;
+ }
+ if (!consume_entry(cookie, ip))
+ break;
+ /* Sanity check: ABI requires SP to be aligned 8 bytes. */
+ if (!sp || sp & 0x7)
+ break;
+ sf = (void __user *)sp;
+ first = false;
+ }
+ pagefault_enable();
+}
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 0122cc156..095bb8633 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -100,7 +100,7 @@
106 common stat sys_newstat compat_sys_newstat
107 common lstat sys_newlstat compat_sys_newlstat
108 common fstat sys_newfstat compat_sys_newfstat
-110 common lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie
+110 common lookup_dcookie - -
111 common vhangup sys_vhangup sys_vhangup
112 common idle - -
114 common wait4 sys_wait4 compat_sys_wait4
@@ -455,3 +455,12 @@
450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node
451 common cachestat sys_cachestat sys_cachestat
452 common fchmodat2 sys_fchmodat2 sys_fchmodat2
+453 common map_shadow_stack sys_map_shadow_stack sys_map_shadow_stack
+454 common futex_wake sys_futex_wake sys_futex_wake
+455 common futex_wait sys_futex_wait sys_futex_wait
+456 common futex_requeue sys_futex_requeue sys_futex_requeue
+457 common statmount sys_statmount sys_statmount
+458 common listmount sys_listmount sys_listmount
+459 common lsm_get_self_attr sys_lsm_get_self_attr sys_lsm_get_self_attr
+460 common lsm_set_self_attr sys_lsm_set_self_attr sys_lsm_set_self_attr
+461 common lsm_list_modules sys_lsm_list_modules sys_lsm_list_modules
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index b5e364358..f6f8f498c 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -81,10 +81,12 @@ static bool convert_ext_name(unsigned char encoding, char *name, size_t len)
static void stsi_1_1_1(struct seq_file *m, struct sysinfo_1_1_1 *info)
{
+ bool has_var_cap;
int i;
if (stsi(info, 1, 1, 1))
return;
+ has_var_cap = !!info->model_var_cap[0];
EBCASC(info->manufacturer, sizeof(info->manufacturer));
EBCASC(info->type, sizeof(info->type));
EBCASC(info->model, sizeof(info->model));
@@ -93,6 +95,8 @@ static void stsi_1_1_1(struct seq_file *m, struct sysinfo_1_1_1 *info)
EBCASC(info->model_capacity, sizeof(info->model_capacity));
EBCASC(info->model_perm_cap, sizeof(info->model_perm_cap));
EBCASC(info->model_temp_cap, sizeof(info->model_temp_cap));
+ if (has_var_cap)
+ EBCASC(info->model_var_cap, sizeof(info->model_var_cap));
seq_printf(m, "Manufacturer: %-16.16s\n", info->manufacturer);
seq_printf(m, "Type: %-4.4s\n", info->type);
if (info->lic)
@@ -120,12 +124,18 @@ static void stsi_1_1_1(struct seq_file *m, struct sysinfo_1_1_1 *info)
seq_printf(m, "Model Temp. Capacity: %-16.16s %08u\n",
info->model_temp_cap,
info->model_temp_cap_rating);
+ if (has_var_cap && info->model_var_cap_rating)
+ seq_printf(m, "Model Var. Capacity: %-16.16s %08u\n",
+ info->model_var_cap,
+ info->model_var_cap_rating);
if (info->ncr)
seq_printf(m, "Nominal Cap. Rating: %08u\n", info->ncr);
if (info->npr)
seq_printf(m, "Nominal Perm. Rating: %08u\n", info->npr);
if (info->ntr)
seq_printf(m, "Nominal Temp. Rating: %08u\n", info->ntr);
+ if (has_var_cap && info->nvr)
+ seq_printf(m, "Nominal Var. Rating: %08u\n", info->nvr);
if (info->cai) {
seq_printf(m, "Capacity Adj. Ind.: %d\n", info->cai);
seq_printf(m, "Capacity Ch. Reason: %d\n", info->ccr);
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index d34d3548c..14abad953 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -173,10 +173,10 @@ void init_cpu_timer(void)
clockevents_register_device(cd);
/* Enable clock comparator timer interrupt. */
- __ctl_set_bit(0,11);
+ local_ctl_set_bit(0, CR0_CLOCK_COMPARATOR_SUBMASK_BIT);
/* Always allow the timing alert external interrupt. */
- __ctl_set_bit(0, 4);
+ local_ctl_set_bit(0, CR0_ETR_SUBMASK_BIT);
}
static void clock_comparator_interrupt(struct ext_code ext_code,
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 68adf1de8..89e91b8ce 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -522,7 +522,7 @@ static struct sched_domain_topology_level s390_topology[] = {
{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
{ cpu_book_mask, SD_INIT_NAME(BOOK) },
{ cpu_drawer_mask, SD_INIT_NAME(DRAWER) },
- { cpu_cpu_mask, SD_INIT_NAME(DIE) },
+ { cpu_cpu_mask, SD_INIT_NAME(PKG) },
{ NULL, },
};
@@ -636,7 +636,6 @@ static struct ctl_table topology_ctl_table[] = {
.mode = 0644,
.proc_handler = topology_ctl_handler,
},
- { },
};
static int __init topology_init(void)
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 1d2aa448d..46dac4540 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -43,10 +43,12 @@ static inline void __user *get_trap_ip(struct pt_regs *regs)
return (void __user *) (address - (regs->int_code >> 16));
}
+#ifdef CONFIG_GENERIC_BUG
int is_valid_bugaddr(unsigned long addr)
{
return 1;
}
+#endif
void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
{
@@ -193,7 +195,7 @@ static void vector_exception(struct pt_regs *regs)
{
int si_code, vic;
- if (!MACHINE_HAS_VX) {
+ if (!cpu_has_vx()) {
do_trap(regs, SIGILL, ILL_ILLOPN, "illegal operation");
return;
}
@@ -286,6 +288,17 @@ static void __init test_monitor_call(void)
void __init trap_init(void)
{
+ unsigned long flags;
+ struct ctlreg cr0;
+
+ local_irq_save(flags);
+ cr0 = local_ctl_clear_bit(0, CR0_LOW_ADDRESS_PROTECTION_BIT);
+ psw_bits(S390_lowcore.external_new_psw).mcheck = 1;
+ psw_bits(S390_lowcore.program_new_psw).mcheck = 1;
+ psw_bits(S390_lowcore.svc_new_psw).mcheck = 1;
+ psw_bits(S390_lowcore.io_new_psw).mcheck = 1;
+ local_ctl_load(0, &cr0);
+ local_irq_restore(flags);
local_mcck_enable();
test_monitor_call();
}
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
index 23e868b79..b12a274cb 100644
--- a/arch/s390/kernel/vdso32/Makefile
+++ b/arch/s390/kernel/vdso32/Makefile
@@ -22,7 +22,7 @@ KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
KBUILD_CFLAGS_32 := $(filter-out -mno-pic-data-is-text-relative,$(KBUILD_CFLAGS_32))
KBUILD_CFLAGS_32 += -m31 -fPIC -shared -fno-common -fno-builtin
-LDFLAGS_vdso32.so.dbg += -fPIC -shared -soname=linux-vdso32.so.1 \
+LDFLAGS_vdso32.so.dbg += -shared -soname=linux-vdso32.so.1 \
--hash-style=both --build-id=sha1 -melf_s390 -T
$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
@@ -61,16 +61,6 @@ quiet_cmd_vdso32as = VDSO32A $@
quiet_cmd_vdso32cc = VDSO32C $@
cmd_vdso32cc = $(CC) $(c_flags) -c -o $@ $<
-# install commands for the unstripped file
-quiet_cmd_vdso_install = INSTALL $@
- cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
-
-vdso32.so: $(obj)/vdso32.so.dbg
- @mkdir -p $(MODLIB)/vdso
- $(call cmd,vdso_install)
-
-vdso_install: vdso32.so
-
# Generate VDSO offsets using helper script
gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh
quiet_cmd_vdsosym = VDSOSYM $@
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
index fc1c6ff81..caa4ebff8 100644
--- a/arch/s390/kernel/vdso64/Makefile
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -26,7 +26,7 @@ KBUILD_AFLAGS_64 += -m64
KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS))
KBUILD_CFLAGS_64 := $(filter-out -mno-pic-data-is-text-relative,$(KBUILD_CFLAGS_64))
KBUILD_CFLAGS_64 += -m64 -fPIC -fno-common -fno-builtin
-ldflags-y := -fPIC -shared -soname=linux-vdso64.so.1 \
+ldflags-y := -shared -soname=linux-vdso64.so.1 \
--hash-style=both --build-id=sha1 -T
$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64)
@@ -70,16 +70,6 @@ quiet_cmd_vdso64as = VDSO64A $@
quiet_cmd_vdso64cc = VDSO64C $@
cmd_vdso64cc = $(CC) $(c_flags) -c -o $@ $<
-# install commands for the unstripped file
-quiet_cmd_vdso_install = INSTALL $@
- cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
-
-vdso64.so: $(obj)/vdso64.so.dbg
- @mkdir -p $(MODLIB)/vdso
- $(call cmd,vdso_install)
-
-vdso_install: vdso64.so
-
# Generate VDSO offsets using helper script
gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh
quiet_cmd_vdsosym = VDSOSYM $@
diff --git a/arch/s390/kernel/vdso64/vdso_user_wrapper.S b/arch/s390/kernel/vdso64/vdso_user_wrapper.S
index 57f62596e..85247ef5a 100644
--- a/arch/s390/kernel/vdso64/vdso_user_wrapper.S
+++ b/arch/s390/kernel/vdso64/vdso_user_wrapper.S
@@ -24,8 +24,10 @@ __kernel_\func:
CFI_DEF_CFA_OFFSET (STACK_FRAME_OVERHEAD + WRAPPER_FRAME_SIZE)
CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
stg %r14,STACK_FRAME_OVERHEAD(%r15)
+ CFI_REL_OFFSET 14, STACK_FRAME_OVERHEAD
brasl %r14,__s390_vdso_\func
lg %r14,STACK_FRAME_OVERHEAD(%r15)
+ CFI_RESTORE 14
aghi %r15,WRAPPER_FRAME_SIZE
CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
CFI_RESTORE 15
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 2ae201ebf..e32ef446f 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -52,6 +52,7 @@ SECTIONS
SOFTIRQENTRY_TEXT
FTRACE_HOTPATCH_TRAMPOLINES_TEXT
*(.text.*_indirect_*)
+ *(.fixup)
*(.gnu.warning)
. = ALIGN(PAGE_SIZE);
_etext = .; /* End of text section */
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index e0a88dcaf..24a18e5ef 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -210,13 +210,13 @@ void vtime_flush(struct task_struct *tsk)
virt_timer_expire();
steal = S390_lowcore.steal_timer;
- avg_steal = S390_lowcore.avg_steal_timer / 2;
+ avg_steal = S390_lowcore.avg_steal_timer;
if ((s64) steal > 0) {
S390_lowcore.steal_timer = 0;
account_steal_time(cputime_to_nsecs(steal));
avg_steal += steal;
}
- S390_lowcore.avg_steal_timer = avg_steal;
+ S390_lowcore.avg_steal_timer = avg_steal / 2;
}
static u64 vtime_delta(void)
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 45fdf2a9b..72e9b7dcd 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -20,19 +20,16 @@ config KVM
def_tristate y
prompt "Kernel-based Virtual Machine (KVM) support"
depends on HAVE_KVM
- select PREEMPT_NOTIFIERS
select HAVE_KVM_CPU_RELAX_INTERCEPT
select HAVE_KVM_VCPU_ASYNC_IOCTL
- select HAVE_KVM_EVENTFD
select KVM_ASYNC_PF
select KVM_ASYNC_PF_SYNC
+ select KVM_COMMON
select HAVE_KVM_IRQCHIP
- select HAVE_KVM_IRQFD
select HAVE_KVM_IRQ_ROUTING
select HAVE_KVM_INVALID_WAKEUPS
select HAVE_KVM_NO_POLL
select KVM_VFIO
- select INTERVAL_TREE
select MMU_NOTIFIER
help
Support hosting paravirtualized guest machines using the SIE
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 6d6bc19b3..5bfcc50c1 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -11,7 +11,7 @@
#include <linux/err.h>
#include <linux/pgtable.h>
#include <linux/bitfield.h>
-
+#include <asm/fault.h>
#include <asm/gmap.h>
#include "kvm-s390.h"
#include "gaccess.h"
@@ -466,23 +466,6 @@ static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, u8 ar,
return 0;
}
-struct trans_exc_code_bits {
- unsigned long addr : 52; /* Translation-exception Address */
- unsigned long fsi : 2; /* Access Exception Fetch/Store Indication */
- unsigned long : 2;
- unsigned long b56 : 1;
- unsigned long : 3;
- unsigned long b60 : 1;
- unsigned long b61 : 1;
- unsigned long as : 2; /* ASCE Identifier */
-};
-
-enum {
- FSI_UNKNOWN = 0, /* Unknown whether fetch or store */
- FSI_STORE = 1, /* Exception was due to store operation */
- FSI_FETCH = 2 /* Exception was due to fetch operation */
-};
-
enum prot_type {
PROT_TYPE_LA = 0,
PROT_TYPE_KEYC = 1,
@@ -497,11 +480,11 @@ static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva,
enum gacc_mode mode, enum prot_type prot, bool terminate)
{
struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
- struct trans_exc_code_bits *tec;
+ union teid *teid;
memset(pgm, 0, sizeof(*pgm));
pgm->code = code;
- tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
+ teid = (union teid *)&pgm->trans_exc_code;
switch (code) {
case PGM_PROTECTION:
@@ -511,25 +494,25 @@ static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva,
WARN_ON_ONCE(1);
break;
case PROT_TYPE_IEP:
- tec->b61 = 1;
+ teid->b61 = 1;
fallthrough;
case PROT_TYPE_LA:
- tec->b56 = 1;
+ teid->b56 = 1;
break;
case PROT_TYPE_KEYC:
- tec->b60 = 1;
+ teid->b60 = 1;
break;
case PROT_TYPE_ALC:
- tec->b60 = 1;
+ teid->b60 = 1;
fallthrough;
case PROT_TYPE_DAT:
- tec->b61 = 1;
+ teid->b61 = 1;
break;
}
if (terminate) {
- tec->b56 = 0;
- tec->b60 = 0;
- tec->b61 = 0;
+ teid->b56 = 0;
+ teid->b60 = 0;
+ teid->b61 = 0;
}
fallthrough;
case PGM_ASCE_TYPE:
@@ -543,9 +526,9 @@ static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva,
* exc_access_id has to be set to 0 for some instructions. Both
* cases have to be handled by the caller.
*/
- tec->addr = gva >> PAGE_SHIFT;
- tec->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH;
- tec->as = psw_bits(vcpu->arch.sie_block->gpsw).as;
+ teid->addr = gva >> PAGE_SHIFT;
+ teid->fsi = mode == GACC_STORE ? TEID_FSI_STORE : TEID_FSI_FETCH;
+ teid->as = psw_bits(vcpu->arch.sie_block->gpsw).as;
fallthrough;
case PGM_ALEN_TRANSLATION:
case PGM_ALE_SEQUENCE:
@@ -1382,6 +1365,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
unsigned long *pgt, int *dat_protection,
int *fake)
{
+ struct kvm *kvm;
struct gmap *parent;
union asce asce;
union vaddress vaddr;
@@ -1390,6 +1374,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
*fake = 0;
*dat_protection = 0;
+ kvm = sg->private;
parent = sg->parent;
vaddr.addr = saddr;
asce.val = sg->orig_asce;
@@ -1450,6 +1435,7 @@ shadow_r2t:
rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake);
if (rc)
return rc;
+ kvm->stat.gmap_shadow_r1_entry++;
}
fallthrough;
case ASCE_TYPE_REGION2: {
@@ -1478,6 +1464,7 @@ shadow_r3t:
rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake);
if (rc)
return rc;
+ kvm->stat.gmap_shadow_r2_entry++;
}
fallthrough;
case ASCE_TYPE_REGION3: {
@@ -1515,6 +1502,7 @@ shadow_sgt:
rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake);
if (rc)
return rc;
+ kvm->stat.gmap_shadow_r3_entry++;
}
fallthrough;
case ASCE_TYPE_SEGMENT: {
@@ -1548,6 +1536,7 @@ shadow_pgt:
rc = gmap_shadow_pgt(sg, saddr, ste.val, *fake);
if (rc)
return rc;
+ kvm->stat.gmap_shadow_sg_entry++;
}
}
/* Return the parent address of the page table */
@@ -1618,6 +1607,7 @@ shadow_page:
pte.p |= dat_protection;
if (!rc)
rc = gmap_shadow_page(sg, saddr, __pte(pte.val));
+ vcpu->kvm->stat.gmap_shadow_pg_entry++;
ipte_unlock(vcpu->kvm);
mmap_read_unlock(sg->mm);
return rc;
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
index 3765c4223..80879fc73 100644
--- a/arch/s390/kvm/guestdbg.c
+++ b/arch/s390/kvm/guestdbg.c
@@ -213,8 +213,8 @@ int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu,
else if (dbg->arch.nr_hw_bp > MAX_BP_COUNT)
return -EINVAL;
- bp_data = memdup_user(dbg->arch.hw_bp,
- sizeof(*bp_data) * dbg->arch.nr_hw_bp);
+ bp_data = memdup_array_user(dbg->arch.hw_bp, dbg->arch.nr_hw_bp,
+ sizeof(*bp_data));
if (IS_ERR(bp_data))
return PTR_ERR(bp_data);
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index efaebba5e..fc4007cc0 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -639,7 +639,7 @@ static int __write_machine_check(struct kvm_vcpu *vcpu,
rc |= put_guest_lc(vcpu, mci.val, (u64 __user *) __LC_MCCK_CODE);
/* Register-save areas */
- if (MACHINE_HAS_VX) {
+ if (cpu_has_vx()) {
convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
rc |= write_guest_lc(vcpu, __LC_FPREGS_SAVE_AREA, fprs, 128);
} else {
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index b3f17e014..7c17be6ad 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -66,7 +66,14 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
STATS_DESC_COUNTER(VM, inject_pfault_done),
STATS_DESC_COUNTER(VM, inject_service_signal),
STATS_DESC_COUNTER(VM, inject_virtio),
- STATS_DESC_COUNTER(VM, aen_forward)
+ STATS_DESC_COUNTER(VM, aen_forward),
+ STATS_DESC_COUNTER(VM, gmap_shadow_reuse),
+ STATS_DESC_COUNTER(VM, gmap_shadow_create),
+ STATS_DESC_COUNTER(VM, gmap_shadow_r1_entry),
+ STATS_DESC_COUNTER(VM, gmap_shadow_r2_entry),
+ STATS_DESC_COUNTER(VM, gmap_shadow_r3_entry),
+ STATS_DESC_COUNTER(VM, gmap_shadow_sg_entry),
+ STATS_DESC_COUNTER(VM, gmap_shadow_pg_entry),
};
const struct kvm_stats_header kvm_vm_stats_header = {
@@ -556,7 +563,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ENABLE_CAP:
case KVM_CAP_S390_CSS_SUPPORT:
case KVM_CAP_IOEVENTFD:
- case KVM_CAP_DEVICE_CTRL:
case KVM_CAP_S390_IRQCHIP:
case KVM_CAP_VM_ATTRIBUTES:
case KVM_CAP_MP_STATE:
@@ -611,7 +617,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = MACHINE_HAS_ESOP;
break;
case KVM_CAP_S390_VECTOR_REGISTERS:
- r = MACHINE_HAS_VX;
+ r = test_facility(129);
break;
case KVM_CAP_S390_RI:
r = test_facility(64);
@@ -760,7 +766,7 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
mutex_lock(&kvm->lock);
if (kvm->created_vcpus) {
r = -EBUSY;
- } else if (MACHINE_HAS_VX) {
+ } else if (cpu_has_vx()) {
set_kvm_facility(kvm->arch.model.fac_mask, 129);
set_kvm_facility(kvm->arch.model.fac_list, 129);
if (test_facility(134)) {
@@ -2625,9 +2631,7 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
if (r)
break;
- mmap_write_lock(current->mm);
- r = gmap_mark_unmergeable();
- mmap_write_unlock(current->mm);
+ r = s390_disable_cow_sharing();
if (r)
break;
@@ -3955,9 +3959,9 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
if (test_kvm_facility(vcpu->kvm, 156))
vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
/* fprs can be synchronized via vrs, even if the guest has no vx. With
- * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
+ * cpu_has_vx(), (load|store)_fpu_regs() will work with vrs format.
*/
- if (MACHINE_HAS_VX)
+ if (cpu_has_vx())
vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
else
vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
@@ -4053,6 +4057,8 @@ static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
unsigned long prefix;
unsigned long i;
+ trace_kvm_s390_gmap_notifier(start, end, gmap_is_shadow(gmap));
+
if (gmap_is_shadow(gmap))
return;
if (start >= 1UL << 31)
@@ -4307,18 +4313,13 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
vcpu_load(vcpu);
- if (test_fp_ctl(fpu->fpc)) {
- ret = -EINVAL;
- goto out;
- }
vcpu->run->s.regs.fpc = fpu->fpc;
- if (MACHINE_HAS_VX)
+ if (cpu_has_vx())
convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
(freg_t *) fpu->fprs);
else
memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
-out:
vcpu_put(vcpu);
return ret;
}
@@ -4327,9 +4328,7 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
vcpu_load(vcpu);
- /* make sure we have the latest values */
- save_fpu_regs();
- if (MACHINE_HAS_VX)
+ if (cpu_has_vx())
convert_vx_to_fp((freg_t *) fpu->fprs,
(__vector128 *) vcpu->run->s.regs.vrs);
else
@@ -4918,7 +4917,7 @@ static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
}
if (MACHINE_HAS_GS) {
preempt_disable();
- __ctl_set_bit(2, 4);
+ local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT);
if (current->thread.gs_cb) {
vcpu->arch.host_gscb = current->thread.gs_cb;
save_gs_cb(vcpu->arch.host_gscb);
@@ -4954,14 +4953,11 @@ static void sync_regs(struct kvm_vcpu *vcpu)
save_fpu_regs();
vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
- if (MACHINE_HAS_VX)
+ if (cpu_has_vx())
current->thread.fpu.regs = vcpu->run->s.regs.vrs;
else
current->thread.fpu.regs = vcpu->run->s.regs.fprs;
current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
- if (test_fp_ctl(current->thread.fpu.fpc))
- /* User space provided an invalid FPC, let's clear it */
- current->thread.fpu.fpc = 0;
/* Sync fmt2 only data */
if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
@@ -4995,13 +4991,13 @@ static void store_regs_fmt2(struct kvm_vcpu *vcpu)
kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
if (MACHINE_HAS_GS) {
preempt_disable();
- __ctl_set_bit(2, 4);
+ local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT);
if (vcpu->arch.gs_enabled)
save_gs_cb(current->thread.gs_cb);
current->thread.gs_cb = vcpu->arch.host_gscb;
restore_gs_cb(vcpu->arch.host_gscb);
if (!vcpu->arch.host_gscb)
- __ctl_clear_bit(2, 4);
+ local_ctl_clear_bit(2, CR2_GUARDED_STORAGE_BIT);
vcpu->arch.host_gscb = NULL;
preempt_enable();
}
@@ -5136,7 +5132,7 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
gpa -= __LC_FPREGS_SAVE_AREA;
/* manually convert vector registers if necessary */
- if (MACHINE_HAS_VX) {
+ if (cpu_has_vx()) {
convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
fprs, 128);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index dc4cfa879..f875a404a 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -57,7 +57,7 @@ static int handle_gs(struct kvm_vcpu *vcpu)
if (test_kvm_facility(vcpu->kvm, 133)) {
VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (lazy)");
preempt_disable();
- __ctl_set_bit(2, 4);
+ local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT);
current->thread.gs_cb = (struct gs_cb *)&vcpu->run->s.regs.gscb;
restore_gs_cb(current->thread.gs_cb);
preempt_enable();
@@ -676,8 +676,12 @@ static int handle_pqap(struct kvm_vcpu *vcpu)
if (vcpu->kvm->arch.crypto.pqap_hook) {
pqap_hook = *vcpu->kvm->arch.crypto.pqap_hook;
ret = pqap_hook(vcpu);
- if (!ret && vcpu->run->s.regs.gprs[1] & 0x00ff0000)
- kvm_s390_set_psw_cc(vcpu, 3);
+ if (!ret) {
+ if (vcpu->run->s.regs.gprs[1] & 0x00ff0000)
+ kvm_s390_set_psw_cc(vcpu, 3);
+ else
+ kvm_s390_set_psw_cc(vcpu, 0);
+ }
up_read(&vcpu->kvm->arch.crypto.pqap_hook_rwsem);
return ret;
}
diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h
index 6f0209d45..9ac92dbf6 100644
--- a/arch/s390/kvm/trace-s390.h
+++ b/arch/s390/kvm/trace-s390.h
@@ -333,6 +333,29 @@ TRACE_EVENT(kvm_s390_airq_suppressed,
__entry->id, __entry->isc)
);
+/*
+ * Trace point for gmap notifier calls.
+ */
+TRACE_EVENT(kvm_s390_gmap_notifier,
+ TP_PROTO(unsigned long start, unsigned long end, unsigned int shadow),
+ TP_ARGS(start, end, shadow),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, start)
+ __field(unsigned long, end)
+ __field(unsigned int, shadow)
+ ),
+
+ TP_fast_assign(
+ __entry->start = start;
+ __entry->end = end;
+ __entry->shadow = shadow;
+ ),
+
+ TP_printk("gmap notified (start:0x%lx end:0x%lx shadow:%d)",
+ __entry->start, __entry->end, __entry->shadow)
+ );
+
#endif /* _TRACE_KVMS390_H */
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index e55f489e1..3af3bd20a 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -19,6 +19,7 @@
#include <asm/nmi.h>
#include <asm/dis.h>
#include <asm/fpu/api.h>
+#include <asm/facility.h>
#include "kvm-s390.h"
#include "gaccess.h"
@@ -984,12 +985,26 @@ static void retry_vsie_icpt(struct vsie_page *vsie_page)
static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
{
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
- __u32 fac = READ_ONCE(vsie_page->scb_o->fac) & 0x7ffffff8U;
+ __u32 fac = READ_ONCE(vsie_page->scb_o->fac);
+ /*
+ * Alternate-STFLE-Interpretive-Execution facilities are not supported
+ * -> format-0 flcb
+ */
if (fac && test_kvm_facility(vcpu->kvm, 7)) {
retry_vsie_icpt(vsie_page);
+ /*
+ * The facility list origin (FLO) is in bits 1 - 28 of the FLD
+ * so we need to mask here before reading.
+ */
+ fac = fac & 0x7ffffff8U;
+ /*
+ * format-0 -> size of nested guest's facility list == guest's size
+ * guest's size == host's size, since STFLE is interpretatively executed
+ * using a format-0 for the guest, too.
+ */
if (read_guest_real(vcpu, fac, &vsie_page->fac,
- sizeof(vsie_page->fac)))
+ stfle_size() * sizeof(u64)))
return set_validity_icpt(scb_s, 0x1090U);
scb_s->fac = (__u32)(__u64) &vsie_page->fac;
}
@@ -1210,15 +1225,17 @@ static int acquire_gmap_shadow(struct kvm_vcpu *vcpu,
* we're holding has been unshadowed. If the gmap is still valid,
* we can safely reuse it.
*/
- if (vsie_page->gmap && gmap_shadow_valid(vsie_page->gmap, asce, edat))
+ if (vsie_page->gmap && gmap_shadow_valid(vsie_page->gmap, asce, edat)) {
+ vcpu->kvm->stat.gmap_shadow_reuse++;
return 0;
+ }
/* release the old shadow - if any, and mark the prefix as unmapped */
release_gmap_shadow(vsie_page);
gmap = gmap_shadow(vcpu->arch.gmap, asce, edat);
if (IS_ERR(gmap))
return PTR_ERR(gmap);
- gmap->private = vcpu->kvm;
+ vcpu->kvm->stat.gmap_shadow_create++;
WRITE_ONCE(vsie_page->gmap, gmap);
return 0;
}
diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c
index 7231bf97b..2848e3fb2 100644
--- a/arch/s390/lib/test_unwind.c
+++ b/arch/s390/lib/test_unwind.c
@@ -350,15 +350,15 @@ static noinline int unwindme_func3(struct unwindme *u)
/* This function must appear in the backtrace. */
static noinline int unwindme_func2(struct unwindme *u)
{
- unsigned long flags;
+ unsigned long flags, mflags;
int rc;
if (u->flags & UWM_SWITCH_STACK) {
local_irq_save(flags);
- local_mcck_disable();
+ local_mcck_save(mflags);
rc = call_on_stack(1, S390_lowcore.nodat_stack,
int, unwindme_func3, struct unwindme *, u);
- local_mcck_enable();
+ local_mcck_restore(mflags);
local_irq_restore(flags);
return rc;
} else {
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index e4a13d7ca..61d8dcd95 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -12,21 +12,22 @@
#include <linux/export.h>
#include <linux/mm.h>
#include <asm/asm-extable.h>
+#include <asm/ctlreg.h>
#ifdef CONFIG_DEBUG_ENTRY
void debug_user_asce(int exit)
{
- unsigned long cr1, cr7;
+ struct ctlreg cr1, cr7;
- __ctl_store(cr1, 1, 1);
- __ctl_store(cr7, 7, 7);
- if (cr1 == S390_lowcore.kernel_asce && cr7 == S390_lowcore.user_asce)
+ local_ctl_store(1, &cr1);
+ local_ctl_store(7, &cr7);
+ if (cr1.val == S390_lowcore.kernel_asce.val && cr7.val == S390_lowcore.user_asce.val)
return;
panic("incorrect ASCE on kernel %s\n"
"cr1: %016lx cr7: %016lx\n"
- "kernel: %016llx user: %016llx\n",
- exit ? "exit" : "entry", cr1, cr7,
- S390_lowcore.kernel_asce, S390_lowcore.user_asce);
+ "kernel: %016lx user: %016lx\n",
+ exit ? "exit" : "entry", cr1.val, cr7.val,
+ S390_lowcore.kernel_asce.val, S390_lowcore.user_asce.val);
}
#endif /*CONFIG_DEBUG_ENTRY */
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index f47515313..f8b13f247 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -332,7 +332,6 @@ static struct ctl_table cmm_table[] = {
.mode = 0644,
.proc_handler = cmm_timeout_handler,
},
- { }
};
#ifdef CONFIG_CMM_IUCV
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index b51666967..d37a8f607 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -287,7 +287,7 @@ static int pt_dump_init(void)
* kernel ASCE. We need this to keep the page table walker functions
* from accessing non-existent entries.
*/
- max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2;
+ max_addr = (S390_lowcore.kernel_asce.val & _REGION_ENTRY_TYPE_MASK) >> 2;
max_addr = 1UL << (max_addr * 11 + 31);
address_markers[IDENTITY_AFTER_END_NR].start_address = ident_map_size;
address_markers[AMODE31_START_NR].start_address = (unsigned long)__samode31;
diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c
index fe87291df..0a0738a47 100644
--- a/arch/s390/mm/extable.c
+++ b/arch/s390/mm/extable.c
@@ -61,6 +61,22 @@ static bool ex_handler_ua_load_reg(const struct exception_table_entry *ex,
return true;
}
+static bool ex_handler_zeropad(const struct exception_table_entry *ex, struct pt_regs *regs)
+{
+ unsigned int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->data);
+ unsigned int reg_data = FIELD_GET(EX_DATA_REG_ERR, ex->data);
+ unsigned long data, addr, offset;
+
+ addr = regs->gprs[reg_addr];
+ offset = addr & (sizeof(unsigned long) - 1);
+ addr &= ~(sizeof(unsigned long) - 1);
+ data = *(unsigned long *)addr;
+ data <<= BITS_PER_BYTE * offset;
+ regs->gprs[reg_data] = data;
+ regs->psw.addr = extable_fixup(ex);
+ return true;
+}
+
bool fixup_exception(struct pt_regs *regs)
{
const struct exception_table_entry *ex;
@@ -81,6 +97,8 @@ bool fixup_exception(struct pt_regs *regs)
return ex_handler_ua_load_reg(ex, false, regs);
case EX_TYPE_UA_LOAD_REGPAIR:
return ex_handler_ua_load_reg(ex, true, regs);
+ case EX_TYPE_ZEROPAD:
+ return ex_handler_zeropad(ex, regs);
}
panic("invalid exception table entry");
}
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index b67829593..ac4c78546 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -3,17 +3,19 @@
* S390 version
* Copyright IBM Corp. 1999
* Author(s): Hartmut Penner (hp@de.ibm.com)
- * Ulrich Weigand (uweigand@de.ibm.com)
+ * Ulrich Weigand (uweigand@de.ibm.com)
*
* Derived from "arch/i386/mm/fault.c"
* Copyright (C) 1995 Linus Torvalds
*/
#include <linux/kernel_stat.h>
+#include <linux/mmu_context.h>
#include <linux/perf_event.h>
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
+#include <linux/jump_label.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/string.h>
@@ -34,38 +36,27 @@
#include <linux/kfence.h>
#include <asm/asm-extable.h>
#include <asm/asm-offsets.h>
+#include <asm/ptrace.h>
+#include <asm/fault.h>
#include <asm/diag.h>
#include <asm/gmap.h>
#include <asm/irq.h>
-#include <asm/mmu_context.h>
#include <asm/facility.h>
#include <asm/uv.h>
#include "../kernel/entry.h"
-#define __FAIL_ADDR_MASK -4096L
-
-/*
- * Allocate private vm_fault_reason from top. Please make sure it won't
- * collide with vm_fault_reason.
- */
-#define VM_FAULT_BADCONTEXT ((__force vm_fault_t)0x80000000)
-#define VM_FAULT_BADMAP ((__force vm_fault_t)0x40000000)
-#define VM_FAULT_BADACCESS ((__force vm_fault_t)0x20000000)
-#define VM_FAULT_SIGNAL ((__force vm_fault_t)0x10000000)
-#define VM_FAULT_PFAULT ((__force vm_fault_t)0x8000000)
-
enum fault_type {
KERNEL_FAULT,
USER_FAULT,
GMAP_FAULT,
};
-static unsigned long store_indication __read_mostly;
+static DEFINE_STATIC_KEY_FALSE(have_store_indication);
static int __init fault_init(void)
{
if (test_facility(75))
- store_indication = 0xc00;
+ static_branch_enable(&have_store_indication);
return 0;
}
early_initcall(fault_init);
@@ -75,11 +66,9 @@ early_initcall(fault_init);
*/
static enum fault_type get_fault_type(struct pt_regs *regs)
{
- unsigned long trans_exc_code;
+ union teid teid = { .val = regs->int_parm_long };
- trans_exc_code = regs->int_parm_long & 3;
- if (likely(trans_exc_code == 0)) {
- /* primary space exception */
+ if (likely(teid.as == PSW_BITS_AS_PRIMARY)) {
if (user_mode(regs))
return USER_FAULT;
if (!IS_ENABLED(CONFIG_PGSTE))
@@ -88,83 +77,77 @@ static enum fault_type get_fault_type(struct pt_regs *regs)
return GMAP_FAULT;
return KERNEL_FAULT;
}
- if (trans_exc_code == 2)
+ if (teid.as == PSW_BITS_AS_SECONDARY)
return USER_FAULT;
- if (trans_exc_code == 1) {
- /* access register mode, not used in the kernel */
+ /* Access register mode, not used in the kernel */
+ if (teid.as == PSW_BITS_AS_ACCREG)
return USER_FAULT;
- }
- /* home space exception -> access via kernel ASCE */
+ /* Home space -> access via kernel ASCE */
return KERNEL_FAULT;
}
static unsigned long get_fault_address(struct pt_regs *regs)
{
- unsigned long trans_exc_code = regs->int_parm_long;
-
- return trans_exc_code & __FAIL_ADDR_MASK;
-}
-
-static bool fault_is_write(struct pt_regs *regs)
-{
- unsigned long trans_exc_code = regs->int_parm_long;
+ union teid teid = { .val = regs->int_parm_long };
- return (trans_exc_code & store_indication) == 0x400;
+ return teid.addr * PAGE_SIZE;
}
-static int bad_address(void *p)
+static __always_inline bool fault_is_write(struct pt_regs *regs)
{
- unsigned long dummy;
+ union teid teid = { .val = regs->int_parm_long };
- return get_kernel_nofault(dummy, (unsigned long *)p);
+ if (static_branch_likely(&have_store_indication))
+ return teid.fsi == TEID_FSI_STORE;
+ return false;
}
static void dump_pagetable(unsigned long asce, unsigned long address)
{
- unsigned long *table = __va(asce & _ASCE_ORIGIN);
+ unsigned long entry, *table = __va(asce & _ASCE_ORIGIN);
pr_alert("AS:%016lx ", asce);
switch (asce & _ASCE_TYPE_MASK) {
case _ASCE_TYPE_REGION1:
table += (address & _REGION1_INDEX) >> _REGION1_SHIFT;
- if (bad_address(table))
+ if (get_kernel_nofault(entry, table))
goto bad;
- pr_cont("R1:%016lx ", *table);
- if (*table & _REGION_ENTRY_INVALID)
+ pr_cont("R1:%016lx ", entry);
+ if (entry & _REGION_ENTRY_INVALID)
goto out;
- table = __va(*table & _REGION_ENTRY_ORIGIN);
+ table = __va(entry & _REGION_ENTRY_ORIGIN);
fallthrough;
case _ASCE_TYPE_REGION2:
table += (address & _REGION2_INDEX) >> _REGION2_SHIFT;
- if (bad_address(table))
+ if (get_kernel_nofault(entry, table))
goto bad;
- pr_cont("R2:%016lx ", *table);
- if (*table & _REGION_ENTRY_INVALID)
+ pr_cont("R2:%016lx ", entry);
+ if (entry & _REGION_ENTRY_INVALID)
goto out;
- table = __va(*table & _REGION_ENTRY_ORIGIN);
+ table = __va(entry & _REGION_ENTRY_ORIGIN);
fallthrough;
case _ASCE_TYPE_REGION3:
table += (address & _REGION3_INDEX) >> _REGION3_SHIFT;
- if (bad_address(table))
+ if (get_kernel_nofault(entry, table))
goto bad;
- pr_cont("R3:%016lx ", *table);
- if (*table & (_REGION_ENTRY_INVALID | _REGION3_ENTRY_LARGE))
+ pr_cont("R3:%016lx ", entry);
+ if (entry & (_REGION_ENTRY_INVALID | _REGION3_ENTRY_LARGE))
goto out;
- table = __va(*table & _REGION_ENTRY_ORIGIN);
+ table = __va(entry & _REGION_ENTRY_ORIGIN);
fallthrough;
case _ASCE_TYPE_SEGMENT:
table += (address & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
- if (bad_address(table))
+ if (get_kernel_nofault(entry, table))
goto bad;
- pr_cont("S:%016lx ", *table);
- if (*table & (_SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_LARGE))
+ pr_cont("S:%016lx ", entry);
+ if (entry & (_SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_LARGE))
goto out;
- table = __va(*table & _SEGMENT_ENTRY_ORIGIN);
+ table = __va(entry & _SEGMENT_ENTRY_ORIGIN);
}
table += (address & _PAGE_INDEX) >> _PAGE_SHIFT;
- if (bad_address(table))
+ if (get_kernel_nofault(entry, table))
goto bad;
- pr_cont("P:%016lx ", *table);
+ pr_cont("P:%016lx ", entry);
out:
pr_cont("\n");
return;
@@ -174,173 +157,113 @@ bad:
static void dump_fault_info(struct pt_regs *regs)
{
+ union teid teid = { .val = regs->int_parm_long };
unsigned long asce;
pr_alert("Failing address: %016lx TEID: %016lx\n",
- regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long);
+ get_fault_address(regs), teid.val);
pr_alert("Fault in ");
- switch (regs->int_parm_long & 3) {
- case 3:
+ switch (teid.as) {
+ case PSW_BITS_AS_HOME:
pr_cont("home space ");
break;
- case 2:
+ case PSW_BITS_AS_SECONDARY:
pr_cont("secondary space ");
break;
- case 1:
+ case PSW_BITS_AS_ACCREG:
pr_cont("access register ");
break;
- case 0:
+ case PSW_BITS_AS_PRIMARY:
pr_cont("primary space ");
break;
}
pr_cont("mode while using ");
switch (get_fault_type(regs)) {
case USER_FAULT:
- asce = S390_lowcore.user_asce;
+ asce = S390_lowcore.user_asce.val;
pr_cont("user ");
break;
case GMAP_FAULT:
- asce = ((struct gmap *) S390_lowcore.gmap)->asce;
+ asce = ((struct gmap *)S390_lowcore.gmap)->asce;
pr_cont("gmap ");
break;
case KERNEL_FAULT:
- asce = S390_lowcore.kernel_asce;
+ asce = S390_lowcore.kernel_asce.val;
pr_cont("kernel ");
break;
default:
unreachable();
}
pr_cont("ASCE.\n");
- dump_pagetable(asce, regs->int_parm_long & __FAIL_ADDR_MASK);
+ dump_pagetable(asce, get_fault_address(regs));
}
int show_unhandled_signals = 1;
void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault)
{
+ static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST);
+
if ((task_pid_nr(current) > 1) && !show_unhandled_signals)
return;
if (!unhandled_signal(current, signr))
return;
- if (!printk_ratelimit())
+ if (!__ratelimit(&rs))
return;
- printk(KERN_ALERT "User process fault: interruption code %04x ilc:%d ",
- regs->int_code & 0xffff, regs->int_code >> 17);
+ pr_alert("User process fault: interruption code %04x ilc:%d ",
+ regs->int_code & 0xffff, regs->int_code >> 17);
print_vma_addr(KERN_CONT "in ", regs->psw.addr);
- printk(KERN_CONT "\n");
+ pr_cont("\n");
if (is_mm_fault)
dump_fault_info(regs);
show_regs(regs);
}
-/*
- * Send SIGSEGV to task. This is an external routine
- * to keep the stack usage of do_page_fault small.
- */
-static noinline void do_sigsegv(struct pt_regs *regs, int si_code)
+static void do_sigsegv(struct pt_regs *regs, int si_code)
{
report_user_fault(regs, SIGSEGV, 1);
- force_sig_fault(SIGSEGV, si_code,
- (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK));
+ force_sig_fault(SIGSEGV, si_code, (void __user *)get_fault_address(regs));
}
-static noinline void do_no_context(struct pt_regs *regs, vm_fault_t fault)
+static void handle_fault_error_nolock(struct pt_regs *regs, int si_code)
{
enum fault_type fault_type;
unsigned long address;
bool is_write;
+ if (user_mode(regs)) {
+ if (WARN_ON_ONCE(!si_code))
+ si_code = SEGV_MAPERR;
+ return do_sigsegv(regs, si_code);
+ }
if (fixup_exception(regs))
return;
fault_type = get_fault_type(regs);
- if ((fault_type == KERNEL_FAULT) && (fault == VM_FAULT_BADCONTEXT)) {
+ if (fault_type == KERNEL_FAULT) {
address = get_fault_address(regs);
is_write = fault_is_write(regs);
if (kfence_handle_page_fault(address, is_write, regs))
return;
}
- /*
- * Oops. The kernel tried to access some bad page. We'll have to
- * terminate things with extreme prejudice.
- */
if (fault_type == KERNEL_FAULT)
- printk(KERN_ALERT "Unable to handle kernel pointer dereference"
- " in virtual kernel address space\n");
+ pr_alert("Unable to handle kernel pointer dereference in virtual kernel address space\n");
else
- printk(KERN_ALERT "Unable to handle kernel paging request"
- " in virtual user address space\n");
+ pr_alert("Unable to handle kernel paging request in virtual user address space\n");
dump_fault_info(regs);
die(regs, "Oops");
}
-static noinline void do_low_address(struct pt_regs *regs)
+static void handle_fault_error(struct pt_regs *regs, int si_code)
{
- /* Low-address protection hit in kernel mode means
- NULL pointer write access in kernel mode. */
- if (regs->psw.mask & PSW_MASK_PSTATE) {
- /* Low-address protection hit in user mode 'cannot happen'. */
- die (regs, "Low-address protection");
- }
+ struct mm_struct *mm = current->mm;
- do_no_context(regs, VM_FAULT_BADACCESS);
+ mmap_read_unlock(mm);
+ handle_fault_error_nolock(regs, si_code);
}
-static noinline void do_sigbus(struct pt_regs *regs)
+static void do_sigbus(struct pt_regs *regs)
{
- /*
- * Send a sigbus, regardless of whether we were in kernel
- * or user mode.
- */
- force_sig_fault(SIGBUS, BUS_ADRERR,
- (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK));
-}
-
-static noinline void do_fault_error(struct pt_regs *regs, vm_fault_t fault)
-{
- int si_code;
-
- switch (fault) {
- case VM_FAULT_BADACCESS:
- case VM_FAULT_BADMAP:
- /* Bad memory access. Check if it is kernel or user space. */
- if (user_mode(regs)) {
- /* User mode accesses just cause a SIGSEGV */
- si_code = (fault == VM_FAULT_BADMAP) ?
- SEGV_MAPERR : SEGV_ACCERR;
- do_sigsegv(regs, si_code);
- break;
- }
- fallthrough;
- case VM_FAULT_BADCONTEXT:
- case VM_FAULT_PFAULT:
- do_no_context(regs, fault);
- break;
- case VM_FAULT_SIGNAL:
- if (!user_mode(regs))
- do_no_context(regs, fault);
- break;
- default: /* fault & VM_FAULT_ERROR */
- if (fault & VM_FAULT_OOM) {
- if (!user_mode(regs))
- do_no_context(regs, fault);
- else
- pagefault_out_of_memory();
- } else if (fault & VM_FAULT_SIGSEGV) {
- /* Kernel mode? Handle exceptions or die */
- if (!user_mode(regs))
- do_no_context(regs, fault);
- else
- do_sigsegv(regs, SEGV_MAPERR);
- } else if (fault & VM_FAULT_SIGBUS) {
- /* Kernel mode? Handle exceptions or die */
- if (!user_mode(regs))
- do_no_context(regs, fault);
- else
- do_sigbus(regs);
- } else
- BUG();
- break;
- }
+ force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)get_fault_address(regs));
}
/*
@@ -349,54 +272,42 @@ static noinline void do_fault_error(struct pt_regs *regs, vm_fault_t fault)
* routines.
*
* interruption code (int_code):
- * 04 Protection -> Write-Protection (suppression)
- * 10 Segment translation -> Not present (nullification)
- * 11 Page translation -> Not present (nullification)
- * 3b Region third trans. -> Not present (nullification)
+ * 04 Protection -> Write-Protection (suppression)
+ * 10 Segment translation -> Not present (nullification)
+ * 11 Page translation -> Not present (nullification)
+ * 3b Region third trans. -> Not present (nullification)
*/
-static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
+static void do_exception(struct pt_regs *regs, int access)
{
- struct gmap *gmap;
- struct task_struct *tsk;
- struct mm_struct *mm;
struct vm_area_struct *vma;
- enum fault_type type;
unsigned long address;
+ struct mm_struct *mm;
+ enum fault_type type;
unsigned int flags;
+ struct gmap *gmap;
vm_fault_t fault;
bool is_write;
- tsk = current;
/*
* The instruction that caused the program check has
* been nullified. Don't signal single step via SIGTRAP.
*/
clear_thread_flag(TIF_PER_TRAP);
-
if (kprobe_page_fault(regs, 14))
- return 0;
-
- mm = tsk->mm;
+ return;
+ mm = current->mm;
address = get_fault_address(regs);
is_write = fault_is_write(regs);
-
- /*
- * Verify that the fault happened in user space, that
- * we are not in an interrupt and that there is a
- * user context.
- */
- fault = VM_FAULT_BADCONTEXT;
type = get_fault_type(regs);
switch (type) {
case KERNEL_FAULT:
- goto out;
+ return handle_fault_error_nolock(regs, 0);
case USER_FAULT:
case GMAP_FAULT:
if (faulthandler_disabled() || !mm)
- goto out;
+ return handle_fault_error_nolock(regs, 0);
break;
}
-
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
flags = FAULT_FLAG_DEFAULT;
if (user_mode(regs))
@@ -419,125 +330,120 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
vma_end_read(vma);
if (!(fault & VM_FAULT_RETRY)) {
count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
- if (likely(!(fault & VM_FAULT_ERROR)))
- fault = 0;
- goto out;
+ if (unlikely(fault & VM_FAULT_ERROR))
+ goto error;
+ return;
}
count_vm_vma_lock_event(VMA_LOCK_RETRY);
+ if (fault & VM_FAULT_MAJOR)
+ flags |= FAULT_FLAG_TRIED;
+
/* Quick path to respond to signals */
if (fault_signal_pending(fault, regs)) {
- fault = VM_FAULT_SIGNAL;
- goto out;
+ if (!user_mode(regs))
+ handle_fault_error_nolock(regs, 0);
+ return;
}
lock_mmap:
mmap_read_lock(mm);
-
gmap = NULL;
if (IS_ENABLED(CONFIG_PGSTE) && type == GMAP_FAULT) {
- gmap = (struct gmap *) S390_lowcore.gmap;
+ gmap = (struct gmap *)S390_lowcore.gmap;
current->thread.gmap_addr = address;
current->thread.gmap_write_flag = !!(flags & FAULT_FLAG_WRITE);
current->thread.gmap_int_code = regs->int_code & 0xffff;
address = __gmap_translate(gmap, address);
- if (address == -EFAULT) {
- fault = VM_FAULT_BADMAP;
- goto out_up;
- }
+ if (address == -EFAULT)
+ return handle_fault_error(regs, SEGV_MAPERR);
if (gmap->pfault_enabled)
flags |= FAULT_FLAG_RETRY_NOWAIT;
}
-
retry:
- fault = VM_FAULT_BADMAP;
vma = find_vma(mm, address);
if (!vma)
- goto out_up;
-
+ return handle_fault_error(regs, SEGV_MAPERR);
if (unlikely(vma->vm_start > address)) {
if (!(vma->vm_flags & VM_GROWSDOWN))
- goto out_up;
+ return handle_fault_error(regs, SEGV_MAPERR);
vma = expand_stack(mm, address);
if (!vma)
- goto out;
+ return handle_fault_error_nolock(regs, SEGV_MAPERR);
}
-
- /*
- * Ok, we have a good vm_area for this memory access, so
- * we can handle it..
- */
- fault = VM_FAULT_BADACCESS;
if (unlikely(!(vma->vm_flags & access)))
- goto out_up;
-
- /*
- * If for any reason at all we couldn't handle the fault,
- * make sure we exit gracefully rather than endlessly redo
- * the fault.
- */
+ return handle_fault_error(regs, SEGV_ACCERR);
fault = handle_mm_fault(vma, address, flags, regs);
if (fault_signal_pending(fault, regs)) {
- fault = VM_FAULT_SIGNAL;
if (flags & FAULT_FLAG_RETRY_NOWAIT)
- goto out_up;
- goto out;
+ mmap_read_unlock(mm);
+ if (!user_mode(regs))
+ handle_fault_error_nolock(regs, 0);
+ return;
}
-
/* The fault is fully completed (including releasing mmap lock) */
if (fault & VM_FAULT_COMPLETED) {
if (gmap) {
mmap_read_lock(mm);
- goto out_gmap;
+ goto gmap;
}
- fault = 0;
- goto out;
+ return;
+ }
+ if (unlikely(fault & VM_FAULT_ERROR)) {
+ mmap_read_unlock(mm);
+ goto error;
}
-
- if (unlikely(fault & VM_FAULT_ERROR))
- goto out_up;
-
if (fault & VM_FAULT_RETRY) {
- if (IS_ENABLED(CONFIG_PGSTE) && gmap &&
- (flags & FAULT_FLAG_RETRY_NOWAIT)) {
+ if (IS_ENABLED(CONFIG_PGSTE) && gmap && (flags & FAULT_FLAG_RETRY_NOWAIT)) {
/*
- * FAULT_FLAG_RETRY_NOWAIT has been set, mmap_lock has
- * not been released
+ * FAULT_FLAG_RETRY_NOWAIT has been set,
+ * mmap_lock has not been released
*/
current->thread.gmap_pfault = 1;
- fault = VM_FAULT_PFAULT;
- goto out_up;
+ return handle_fault_error(regs, 0);
}
flags &= ~FAULT_FLAG_RETRY_NOWAIT;
flags |= FAULT_FLAG_TRIED;
mmap_read_lock(mm);
goto retry;
}
-out_gmap:
+gmap:
if (IS_ENABLED(CONFIG_PGSTE) && gmap) {
address = __gmap_link(gmap, current->thread.gmap_addr,
address);
- if (address == -EFAULT) {
- fault = VM_FAULT_BADMAP;
- goto out_up;
- }
+ if (address == -EFAULT)
+ return handle_fault_error(regs, SEGV_MAPERR);
if (address == -ENOMEM) {
fault = VM_FAULT_OOM;
- goto out_up;
+ mmap_read_unlock(mm);
+ goto error;
}
}
- fault = 0;
-out_up:
mmap_read_unlock(mm);
-out:
- return fault;
+ return;
+error:
+ if (fault & VM_FAULT_OOM) {
+ if (!user_mode(regs))
+ handle_fault_error_nolock(regs, 0);
+ else
+ pagefault_out_of_memory();
+ } else if (fault & VM_FAULT_SIGSEGV) {
+ if (!user_mode(regs))
+ handle_fault_error_nolock(regs, 0);
+ else
+ do_sigsegv(regs, SEGV_MAPERR);
+ } else if (fault & VM_FAULT_SIGBUS) {
+ if (!user_mode(regs))
+ handle_fault_error_nolock(regs, 0);
+ else
+ do_sigbus(regs);
+ } else {
+ BUG();
+ }
}
void do_protection_exception(struct pt_regs *regs)
{
- unsigned long trans_exc_code;
- int access;
- vm_fault_t fault;
+ union teid teid = { .val = regs->int_parm_long };
- trans_exc_code = regs->int_parm_long;
/*
* Protection exceptions are suppressing, decrement psw address.
* The exception to this rule are aborted transactions, for these
@@ -550,33 +456,28 @@ void do_protection_exception(struct pt_regs *regs)
* as a special case because the translation exception code
* field is not guaranteed to contain valid data in this case.
*/
- if (unlikely(!(trans_exc_code & 4))) {
- do_low_address(regs);
- return;
+ if (unlikely(!teid.b61)) {
+ if (user_mode(regs)) {
+ /* Low-address protection in user mode: cannot happen */
+ die(regs, "Low-address protection");
+ }
+ /*
+ * Low-address protection in kernel mode means
+ * NULL pointer write access in kernel mode.
+ */
+ return handle_fault_error_nolock(regs, 0);
}
- if (unlikely(MACHINE_HAS_NX && (trans_exc_code & 0x80))) {
- regs->int_parm_long = (trans_exc_code & ~PAGE_MASK) |
- (regs->psw.addr & PAGE_MASK);
- access = VM_EXEC;
- fault = VM_FAULT_BADACCESS;
- } else {
- access = VM_WRITE;
- fault = do_exception(regs, access);
+ if (unlikely(MACHINE_HAS_NX && teid.b56)) {
+ regs->int_parm_long = (teid.addr * PAGE_SIZE) | (regs->psw.addr & PAGE_MASK);
+ return handle_fault_error_nolock(regs, SEGV_ACCERR);
}
- if (unlikely(fault))
- do_fault_error(regs, fault);
+ do_exception(regs, VM_WRITE);
}
NOKPROBE_SYMBOL(do_protection_exception);
void do_dat_exception(struct pt_regs *regs)
{
- int access;
- vm_fault_t fault;
-
- access = VM_ACCESS_FLAGS;
- fault = do_exception(regs, access);
- if (unlikely(fault))
- do_fault_error(regs, fault);
+ do_exception(regs, VM_ACCESS_FLAGS);
}
NOKPROBE_SYMBOL(do_dat_exception);
@@ -584,7 +485,8 @@ NOKPROBE_SYMBOL(do_dat_exception);
void do_secure_storage_access(struct pt_regs *regs)
{
- unsigned long addr = regs->int_parm_long & __FAIL_ADDR_MASK;
+ union teid teid = { .val = regs->int_parm_long };
+ unsigned long addr = get_fault_address(regs);
struct vm_area_struct *vma;
struct mm_struct *mm;
struct page *page;
@@ -592,14 +494,12 @@ void do_secure_storage_access(struct pt_regs *regs)
int rc;
/*
- * bit 61 tells us if the address is valid, if it's not we
- * have a major problem and should stop the kernel or send a
- * SIGSEGV to the process. Unfortunately bit 61 is not
- * reliable without the misc UV feature so we need to check
- * for that as well.
+ * Bit 61 indicates if the address is valid, if it is not the
+ * kernel should be stopped or SIGSEGV should be sent to the
+ * process. Bit 61 is not reliable without the misc UV feature,
+ * therefore this needs to be checked too.
*/
- if (uv_has_feature(BIT_UV_FEAT_MISC) &&
- !test_bit_inv(61, &regs->int_parm_long)) {
+ if (uv_has_feature(BIT_UV_FEAT_MISC) && !teid.b61) {
/*
* When this happens, userspace did something that it
* was not supposed to do, e.g. branching into secure
@@ -609,14 +509,12 @@ void do_secure_storage_access(struct pt_regs *regs)
send_sig(SIGSEGV, current, 0);
return;
}
-
/*
- * The kernel should never run into this case and we
- * have no way out of this situation.
+ * The kernel should never run into this case and
+ * there is no way out of this situation.
*/
panic("Unexpected PGM 0x3d with TEID bit 61=0");
}
-
switch (get_fault_type(regs)) {
case GMAP_FAULT:
mm = current->mm;
@@ -624,20 +522,15 @@ void do_secure_storage_access(struct pt_regs *regs)
mmap_read_lock(mm);
addr = __gmap_translate(gmap, addr);
mmap_read_unlock(mm);
- if (IS_ERR_VALUE(addr)) {
- do_fault_error(regs, VM_FAULT_BADMAP);
- break;
- }
+ if (IS_ERR_VALUE(addr))
+ return handle_fault_error_nolock(regs, SEGV_MAPERR);
fallthrough;
case USER_FAULT:
mm = current->mm;
mmap_read_lock(mm);
vma = find_vma(mm, addr);
- if (!vma) {
- mmap_read_unlock(mm);
- do_fault_error(regs, VM_FAULT_BADMAP);
- break;
- }
+ if (!vma)
+ return handle_fault_error(regs, SEGV_MAPERR);
page = follow_page(vma, addr, FOLL_WRITE | FOLL_GET);
if (IS_ERR_OR_NULL(page)) {
mmap_read_unlock(mm);
@@ -658,23 +551,18 @@ void do_secure_storage_access(struct pt_regs *regs)
BUG();
break;
default:
- do_fault_error(regs, VM_FAULT_BADMAP);
- WARN_ON_ONCE(1);
+ unreachable();
}
}
NOKPROBE_SYMBOL(do_secure_storage_access);
void do_non_secure_storage_access(struct pt_regs *regs)
{
- unsigned long gaddr = regs->int_parm_long & __FAIL_ADDR_MASK;
struct gmap *gmap = (struct gmap *)S390_lowcore.gmap;
+ unsigned long gaddr = get_fault_address(regs);
- if (get_fault_type(regs) != GMAP_FAULT) {
- do_fault_error(regs, VM_FAULT_BADMAP);
- WARN_ON_ONCE(1);
- return;
- }
-
+ if (WARN_ON_ONCE(get_fault_type(regs) != GMAP_FAULT))
+ return handle_fault_error_nolock(regs, SEGV_MAPERR);
if (gmap_convert_to_secure(gmap, gaddr) == -EINVAL)
send_sig(SIGSEGV, current, 0);
}
@@ -682,8 +570,8 @@ NOKPROBE_SYMBOL(do_non_secure_storage_access);
void do_secure_storage_violation(struct pt_regs *regs)
{
- unsigned long gaddr = regs->int_parm_long & __FAIL_ADDR_MASK;
struct gmap *gmap = (struct gmap *)S390_lowcore.gmap;
+ unsigned long gaddr = get_fault_address(regs);
/*
* If the VM has been rebooted, its address space might still contain
@@ -699,9 +587,8 @@ void do_secure_storage_violation(struct pt_regs *regs)
* This exception is only triggered when a guest 2 is running
* and can therefore never occur in kernel context.
*/
- printk_ratelimited(KERN_WARNING
- "Secure storage violation in task: %s, pid %d\n",
- current->comm, current->pid);
+ pr_warn_ratelimited("Secure storage violation in task: %s, pid %d\n",
+ current->comm, current->pid);
send_sig(SIGSEGV, current, 0);
}
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 20786f688..3b57837f3 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -18,7 +18,7 @@
#include <linux/ksm.h>
#include <linux/mman.h>
#include <linux/pgtable.h>
-
+#include <asm/page-states.h>
#include <asm/pgalloc.h>
#include <asm/gmap.h>
#include <asm/page.h>
@@ -33,7 +33,7 @@ static struct page *gmap_alloc_crst(void)
page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER);
if (!page)
return NULL;
- arch_set_page_dat(page, CRST_ALLOC_ORDER);
+ __arch_set_page_dat(page_to_virt(page), 1UL << CRST_ALLOC_ORDER);
return page;
}
@@ -596,7 +596,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
pud = pud_offset(p4d, vmaddr);
VM_BUG_ON(pud_none(*pud));
/* large puds cannot yet be handled */
- if (pud_large(*pud))
+ if (pud_leaf(*pud))
return -EFAULT;
pmd = pmd_offset(pud, vmaddr);
VM_BUG_ON(pmd_none(*pmd));
@@ -1691,6 +1691,7 @@ struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
return ERR_PTR(-ENOMEM);
new->mm = parent->mm;
new->parent = gmap_get(parent);
+ new->private = parent->private;
new->orig_asce = asce;
new->edat_level = edat_level;
new->initialized = false;
@@ -2547,41 +2548,6 @@ static inline void thp_split_mm(struct mm_struct *mm)
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
/*
- * Remove all empty zero pages from the mapping for lazy refaulting
- * - This must be called after mm->context.has_pgste is set, to avoid
- * future creation of zero pages
- * - This must be called after THP was disabled.
- *
- * mm contracts with s390, that even if mm were to remove a page table,
- * racing with the loop below and so causing pte_offset_map_lock() to fail,
- * it will never insert a page table containing empty zero pages once
- * mm_forbids_zeropage(mm) i.e. mm->context.has_pgste is set.
- */
-static int __zap_zero_pages(pmd_t *pmd, unsigned long start,
- unsigned long end, struct mm_walk *walk)
-{
- unsigned long addr;
-
- for (addr = start; addr != end; addr += PAGE_SIZE) {
- pte_t *ptep;
- spinlock_t *ptl;
-
- ptep = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
- if (!ptep)
- break;
- if (is_zero_pfn(pte_pfn(*ptep)))
- ptep_xchg_direct(walk->mm, addr, ptep, __pte(_PAGE_INVALID));
- pte_unmap_unlock(ptep, ptl);
- }
- return 0;
-}
-
-static const struct mm_walk_ops zap_zero_walk_ops = {
- .pmd_entry = __zap_zero_pages,
- .walk_lock = PGWALK_WRLOCK,
-};
-
-/*
* switch on pgstes for its userspace process (for kvm)
*/
int s390_enable_sie(void)
@@ -2598,22 +2564,142 @@ int s390_enable_sie(void)
mm->context.has_pgste = 1;
/* split thp mappings and disable thp for future mappings */
thp_split_mm(mm);
- walk_page_range(mm, 0, TASK_SIZE, &zap_zero_walk_ops, NULL);
mmap_write_unlock(mm);
return 0;
}
EXPORT_SYMBOL_GPL(s390_enable_sie);
-int gmap_mark_unmergeable(void)
+static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr,
+ unsigned long end, struct mm_walk *walk)
+{
+ unsigned long *found_addr = walk->private;
+
+ /* Return 1 of the page is a zeropage. */
+ if (is_zero_pfn(pte_pfn(*pte))) {
+ /*
+ * Shared zeropage in e.g., a FS DAX mapping? We cannot do the
+ * right thing and likely don't care: FAULT_FLAG_UNSHARE
+ * currently only works in COW mappings, which is also where
+ * mm_forbids_zeropage() is checked.
+ */
+ if (!is_cow_mapping(walk->vma->vm_flags))
+ return -EFAULT;
+
+ *found_addr = addr;
+ return 1;
+ }
+ return 0;
+}
+
+static const struct mm_walk_ops find_zeropage_ops = {
+ .pte_entry = find_zeropage_pte_entry,
+ .walk_lock = PGWALK_WRLOCK,
+};
+
+/*
+ * Unshare all shared zeropages, replacing them by anonymous pages. Note that
+ * we cannot simply zap all shared zeropages, because this could later
+ * trigger unexpected userfaultfd missing events.
+ *
+ * This must be called after mm->context.allow_cow_sharing was
+ * set to 0, to avoid future mappings of shared zeropages.
+ *
+ * mm contracts with s390, that even if mm were to remove a page table,
+ * and racing with walk_page_range_vma() calling pte_offset_map_lock()
+ * would fail, it will never insert a page table containing empty zero
+ * pages once mm_forbids_zeropage(mm) i.e.
+ * mm->context.allow_cow_sharing is set to 0.
+ */
+static int __s390_unshare_zeropages(struct mm_struct *mm)
{
+ struct vm_area_struct *vma;
+ VMA_ITERATOR(vmi, mm, 0);
+ unsigned long addr;
+ vm_fault_t fault;
+ int rc;
+
+ for_each_vma(vmi, vma) {
+ /*
+ * We could only look at COW mappings, but it's more future
+ * proof to catch unexpected zeropages in other mappings and
+ * fail.
+ */
+ if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma))
+ continue;
+ addr = vma->vm_start;
+
+retry:
+ rc = walk_page_range_vma(vma, addr, vma->vm_end,
+ &find_zeropage_ops, &addr);
+ if (rc < 0)
+ return rc;
+ else if (!rc)
+ continue;
+
+ /* addr was updated by find_zeropage_pte_entry() */
+ fault = handle_mm_fault(vma, addr,
+ FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE,
+ NULL);
+ if (fault & VM_FAULT_OOM)
+ return -ENOMEM;
+ /*
+ * See break_ksm(): even after handle_mm_fault() returned 0, we
+ * must start the lookup from the current address, because
+ * handle_mm_fault() may back out if there's any difficulty.
+ *
+ * VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but
+ * maybe they could trigger in the future on concurrent
+ * truncation. In that case, the shared zeropage would be gone
+ * and we can simply retry and make progress.
+ */
+ cond_resched();
+ goto retry;
+ }
+
+ return 0;
+}
+
+static int __s390_disable_cow_sharing(struct mm_struct *mm)
+{
+ int rc;
+
+ if (!mm->context.allow_cow_sharing)
+ return 0;
+
+ mm->context.allow_cow_sharing = 0;
+
+ /* Replace all shared zeropages by anonymous pages. */
+ rc = __s390_unshare_zeropages(mm);
/*
* Make sure to disable KSM (if enabled for the whole process or
* individual VMAs). Note that nothing currently hinders user space
* from re-enabling it.
*/
- return ksm_disable(current->mm);
+ if (!rc)
+ rc = ksm_disable(mm);
+ if (rc)
+ mm->context.allow_cow_sharing = 1;
+ return rc;
+}
+
+/*
+ * Disable most COW-sharing of memory pages for the whole process:
+ * (1) Disable KSM and unmerge/unshare any KSM pages.
+ * (2) Disallow shared zeropages and unshare any zerpages that are mapped.
+ *
+ * Not that we currently don't bother with COW-shared pages that are shared
+ * with parent/child processes due to fork().
+ */
+int s390_disable_cow_sharing(void)
+{
+ int rc;
+
+ mmap_write_lock(current->mm);
+ rc = __s390_disable_cow_sharing(current->mm);
+ mmap_write_unlock(current->mm);
+ return rc;
}
-EXPORT_SYMBOL_GPL(gmap_mark_unmergeable);
+EXPORT_SYMBOL_GPL(s390_disable_cow_sharing);
/*
* Enable storage key handling from now on and initialize the storage
@@ -2658,7 +2744,7 @@ static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr,
return 0;
start = pmd_val(*pmd) & HPAGE_MASK;
- end = start + HPAGE_SIZE - 1;
+ end = start + HPAGE_SIZE;
__storage_key_init_range(start, end);
set_bit(PG_arch_1, &page->flags);
cond_resched();
@@ -2682,7 +2768,7 @@ int s390_enable_skey(void)
goto out_up;
mm->context.uses_skeys = 1;
- rc = gmap_mark_unmergeable();
+ rc = __s390_disable_cow_sharing(mm);
if (rc) {
mm->context.uses_skeys = 0;
goto out_up;
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index 297a6d897..763469e51 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -139,7 +139,7 @@ static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste)
}
if (!test_and_set_bit(PG_arch_1, &page->flags))
- __storage_key_init_range(paddr, paddr + size - 1);
+ __storage_key_init_range(paddr, paddr + size);
}
void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
@@ -224,7 +224,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
if (p4d_present(*p4dp)) {
pudp = pud_offset(p4dp, addr);
if (pud_present(*pudp)) {
- if (pud_large(*pudp))
+ if (pud_leaf(*pudp))
return (pte_t *) pudp;
pmdp = pmd_offset(pudp, addr);
}
@@ -240,7 +240,7 @@ int pmd_huge(pmd_t pmd)
int pud_huge(pud_t pud)
{
- return pud_large(pud);
+ return pud_leaf(pud);
}
bool __init arch_hugetlb_valid_size(unsigned long size)
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 8b94d2212..43e612bc2 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -35,6 +35,7 @@
#include <asm/processor.h>
#include <linux/uaccess.h>
#include <asm/pgalloc.h>
+#include <asm/ctlreg.h>
#include <asm/kfence.h>
#include <asm/ptdump.h>
#include <asm/dma.h>
@@ -42,7 +43,6 @@
#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/sections.h>
-#include <asm/ctl_reg.h>
#include <asm/sclp.h>
#include <asm/set_memory.h>
#include <asm/kasan.h>
@@ -54,7 +54,7 @@
pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(".bss..swapper_pg_dir");
pgd_t invalid_pg_dir[PTRS_PER_PGD] __section(".bss..invalid_pg_dir");
-unsigned long __bootdata_preserved(s390_invalid_asce);
+struct ctlreg __bootdata_preserved(s390_invalid_asce);
unsigned long empty_zero_page, zero_page_mask;
EXPORT_SYMBOL(empty_zero_page);
@@ -164,14 +164,10 @@ void __init mem_init(void)
pv_init();
kfence_split_mapping();
- /* Setup guest page hinting */
- cmma_init();
/* this will put all low memory onto the freelists */
memblock_free_all();
setup_zero_pages(); /* Setup zeroed pages. */
-
- cmma_init_nodat();
}
void free_initmem(void)
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index c805b3e25..632c3a55f 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -15,10 +15,10 @@
#include <linux/uio.h>
#include <linux/io.h>
#include <asm/asm-extable.h>
-#include <asm/ctl_reg.h>
#include <asm/abs_lowcore.h>
#include <asm/stacktrace.h>
#include <asm/maccess.h>
+#include <asm/ctlreg.h>
unsigned long __bootdata_preserved(__memcpy_real_area);
pte_t *__bootdata_preserved(memcpy_real_ptep);
diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c
index 79a037f49..01f9b39e6 100644
--- a/arch/s390/mm/page-states.c
+++ b/arch/s390/mm/page-states.c
@@ -7,205 +7,18 @@
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
*/
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/types.h>
#include <linux/mm.h>
-#include <linux/memblock.h>
-#include <linux/gfp.h>
-#include <linux/init.h>
-#include <asm/asm-extable.h>
-#include <asm/facility.h>
#include <asm/page-states.h>
+#include <asm/sections.h>
+#include <asm/page.h>
-static int cmma_flag = 1;
-
-static int __init cmma(char *str)
-{
- bool enabled;
-
- if (!kstrtobool(str, &enabled))
- cmma_flag = enabled;
- return 1;
-}
-__setup("cmma=", cmma);
-
-static inline int cmma_test_essa(void)
-{
- unsigned long tmp = 0;
- int rc = -EOPNOTSUPP;
-
- /* test ESSA_GET_STATE */
- asm volatile(
- " .insn rrf,0xb9ab0000,%[tmp],%[tmp],%[cmd],0\n"
- "0: la %[rc],0\n"
- "1:\n"
- EX_TABLE(0b,1b)
- : [rc] "+&d" (rc), [tmp] "+&d" (tmp)
- : [cmd] "i" (ESSA_GET_STATE));
- return rc;
-}
-
-void __init cmma_init(void)
-{
- if (!cmma_flag)
- return;
- if (cmma_test_essa()) {
- cmma_flag = 0;
- return;
- }
- if (test_facility(147))
- cmma_flag = 2;
-}
-
-static inline void set_page_unused(struct page *page, int order)
-{
- int i, rc;
-
- for (i = 0; i < (1 << order); i++)
- asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
- : "=&d" (rc)
- : "a" (page_to_phys(page + i)),
- "i" (ESSA_SET_UNUSED));
-}
-
-static inline void set_page_stable_dat(struct page *page, int order)
-{
- int i, rc;
-
- for (i = 0; i < (1 << order); i++)
- asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
- : "=&d" (rc)
- : "a" (page_to_phys(page + i)),
- "i" (ESSA_SET_STABLE));
-}
-
-static inline void set_page_stable_nodat(struct page *page, int order)
-{
- int i, rc;
-
- for (i = 0; i < (1 << order); i++)
- asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
- : "=&d" (rc)
- : "a" (page_to_phys(page + i)),
- "i" (ESSA_SET_STABLE_NODAT));
-}
-
-static void mark_kernel_pmd(pud_t *pud, unsigned long addr, unsigned long end)
-{
- unsigned long next;
- struct page *page;
- pmd_t *pmd;
-
- pmd = pmd_offset(pud, addr);
- do {
- next = pmd_addr_end(addr, end);
- if (pmd_none(*pmd) || pmd_large(*pmd))
- continue;
- page = phys_to_page(pmd_val(*pmd));
- set_bit(PG_arch_1, &page->flags);
- } while (pmd++, addr = next, addr != end);
-}
-
-static void mark_kernel_pud(p4d_t *p4d, unsigned long addr, unsigned long end)
-{
- unsigned long next;
- struct page *page;
- pud_t *pud;
- int i;
-
- pud = pud_offset(p4d, addr);
- do {
- next = pud_addr_end(addr, end);
- if (pud_none(*pud) || pud_large(*pud))
- continue;
- if (!pud_folded(*pud)) {
- page = phys_to_page(pud_val(*pud));
- for (i = 0; i < 4; i++)
- set_bit(PG_arch_1, &page[i].flags);
- }
- mark_kernel_pmd(pud, addr, next);
- } while (pud++, addr = next, addr != end);
-}
-
-static void mark_kernel_p4d(pgd_t *pgd, unsigned long addr, unsigned long end)
-{
- unsigned long next;
- struct page *page;
- p4d_t *p4d;
- int i;
-
- p4d = p4d_offset(pgd, addr);
- do {
- next = p4d_addr_end(addr, end);
- if (p4d_none(*p4d))
- continue;
- if (!p4d_folded(*p4d)) {
- page = phys_to_page(p4d_val(*p4d));
- for (i = 0; i < 4; i++)
- set_bit(PG_arch_1, &page[i].flags);
- }
- mark_kernel_pud(p4d, addr, next);
- } while (p4d++, addr = next, addr != end);
-}
-
-static void mark_kernel_pgd(void)
-{
- unsigned long addr, next;
- struct page *page;
- pgd_t *pgd;
- int i;
-
- addr = 0;
- pgd = pgd_offset_k(addr);
- do {
- next = pgd_addr_end(addr, MODULES_END);
- if (pgd_none(*pgd))
- continue;
- if (!pgd_folded(*pgd)) {
- page = phys_to_page(pgd_val(*pgd));
- for (i = 0; i < 4; i++)
- set_bit(PG_arch_1, &page[i].flags);
- }
- mark_kernel_p4d(pgd, addr, next);
- } while (pgd++, addr = next, addr != MODULES_END);
-}
-
-void __init cmma_init_nodat(void)
-{
- struct page *page;
- unsigned long start, end, ix;
- int i;
-
- if (cmma_flag < 2)
- return;
- /* Mark pages used in kernel page tables */
- mark_kernel_pgd();
- page = virt_to_page(&swapper_pg_dir);
- for (i = 0; i < 4; i++)
- set_bit(PG_arch_1, &page[i].flags);
- page = virt_to_page(&invalid_pg_dir);
- for (i = 0; i < 4; i++)
- set_bit(PG_arch_1, &page[i].flags);
-
- /* Set all kernel pages not used for page tables to stable/no-dat */
- for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) {
- page = pfn_to_page(start);
- for (ix = start; ix < end; ix++, page++) {
- if (__test_and_clear_bit(PG_arch_1, &page->flags))
- continue; /* skip page table pages */
- if (!list_empty(&page->lru))
- continue; /* skip free pages */
- set_page_stable_nodat(page, 0);
- }
- }
-}
+int __bootdata_preserved(cmma_flag);
void arch_free_page(struct page *page, int order)
{
if (!cmma_flag)
return;
- set_page_unused(page, order);
+ __set_page_unused(page_to_virt(page), 1UL << order);
}
void arch_alloc_page(struct page *page, int order)
@@ -213,14 +26,7 @@ void arch_alloc_page(struct page *page, int order)
if (!cmma_flag)
return;
if (cmma_flag < 2)
- set_page_stable_dat(page, order);
+ __set_page_stable_dat(page_to_virt(page), 1UL << order);
else
- set_page_stable_nodat(page, order);
-}
-
-void arch_set_page_dat(struct page *page, int order)
-{
- if (!cmma_flag)
- return;
- set_page_stable_dat(page, order);
+ __set_page_stable_nodat(page_to_virt(page), 1UL << order);
}
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index b87e96c64..739185fc3 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -75,7 +75,7 @@ static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr,
break;
}
table = (unsigned long *)((unsigned long)old & mask);
- crdte(*old, new, table, dtt, addr, S390_lowcore.kernel_asce);
+ crdte(*old, new, table, dtt, addr, S390_lowcore.kernel_asce.val);
} else if (MACHINE_HAS_IDTE) {
cspg(old, *old, new);
} else {
@@ -274,7 +274,7 @@ static int walk_pud_level(p4d_t *p4d, unsigned long addr, unsigned long end,
if (pud_none(*pudp))
return -EINVAL;
next = pud_addr_end(addr, end);
- if (pud_large(*pudp)) {
+ if (pud_leaf(*pudp)) {
need_split = !!(flags & SET_MEMORY_4K);
need_split |= !!(addr & ~PUD_MASK);
need_split |= !!(addr + PUD_SIZE > next);
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 6396d6b06..008e487c9 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -10,6 +10,7 @@
#include <linux/slab.h>
#include <linux/mm.h>
#include <asm/mmu_context.h>
+#include <asm/page-states.h>
#include <asm/pgalloc.h>
#include <asm/gmap.h>
#include <asm/tlb.h>
@@ -30,7 +31,6 @@ static struct ctl_table page_table_sysctl[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
- { }
};
static int __init page_table_register_sysctl(void)
@@ -44,11 +44,13 @@ __initcall(page_table_register_sysctl);
unsigned long *crst_table_alloc(struct mm_struct *mm)
{
struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, CRST_ALLOC_ORDER);
+ unsigned long *table;
if (!ptdesc)
return NULL;
- arch_set_page_dat(ptdesc_page(ptdesc), CRST_ALLOC_ORDER);
- return (unsigned long *) ptdesc_to_virt(ptdesc);
+ table = ptdesc_to_virt(ptdesc);
+ __arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER);
+ return table;
}
void crst_table_free(struct mm_struct *mm, unsigned long *table)
@@ -62,8 +64,8 @@ static void __crst_table_upgrade(void *arg)
/* change all active ASCEs to avoid the creation of new TLBs */
if (current->active_mm == mm) {
- S390_lowcore.user_asce = mm->context.asce;
- __ctl_load(S390_lowcore.user_asce, 7, 7);
+ S390_lowcore.user_asce.val = mm->context.asce;
+ local_ctl_load(7, &S390_lowcore.user_asce);
}
__tlb_flush_local();
}
@@ -131,11 +133,6 @@ err_p4d:
return -ENOMEM;
}
-static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
-{
- return atomic_fetch_xor(bits, v) ^ bits;
-}
-
#ifdef CONFIG_PGSTE
struct page *page_table_alloc_pgste(struct mm_struct *mm)
@@ -146,7 +143,7 @@ struct page *page_table_alloc_pgste(struct mm_struct *mm)
ptdesc = pagetable_alloc(GFP_KERNEL, 0);
if (ptdesc) {
table = (u64 *)ptdesc_to_virt(ptdesc);
- arch_set_page_dat(virt_to_page(table), 0);
+ __arch_set_page_dat(table, 1);
memset64(table, _PAGE_INVALID, PTRS_PER_PTE);
memset64(table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
}
@@ -160,125 +157,11 @@ void page_table_free_pgste(struct page *page)
#endif /* CONFIG_PGSTE */
-/*
- * A 2KB-pgtable is either upper or lower half of a normal page.
- * The second half of the page may be unused or used as another
- * 2KB-pgtable.
- *
- * Whenever possible the parent page for a new 2KB-pgtable is picked
- * from the list of partially allocated pages mm_context_t::pgtable_list.
- * In case the list is empty a new parent page is allocated and added to
- * the list.
- *
- * When a parent page gets fully allocated it contains 2KB-pgtables in both
- * upper and lower halves and is removed from mm_context_t::pgtable_list.
- *
- * When 2KB-pgtable is freed from to fully allocated parent page that
- * page turns partially allocated and added to mm_context_t::pgtable_list.
- *
- * If 2KB-pgtable is freed from the partially allocated parent page that
- * page turns unused and gets removed from mm_context_t::pgtable_list.
- * Furthermore, the unused parent page is released.
- *
- * As follows from the above, no unallocated or fully allocated parent
- * pages are contained in mm_context_t::pgtable_list.
- *
- * The upper byte (bits 24-31) of the parent page _refcount is used
- * for tracking contained 2KB-pgtables and has the following format:
- *
- * PP AA
- * 01234567 upper byte (bits 24-31) of struct page::_refcount
- * || ||
- * || |+--- upper 2KB-pgtable is allocated
- * || +---- lower 2KB-pgtable is allocated
- * |+------- upper 2KB-pgtable is pending for removal
- * +-------- lower 2KB-pgtable is pending for removal
- *
- * (See commit 620b4e903179 ("s390: use _refcount for pgtables") on why
- * using _refcount is possible).
- *
- * When 2KB-pgtable is allocated the corresponding AA bit is set to 1.
- * The parent page is either:
- * - added to mm_context_t::pgtable_list in case the second half of the
- * parent page is still unallocated;
- * - removed from mm_context_t::pgtable_list in case both hales of the
- * parent page are allocated;
- * These operations are protected with mm_context_t::lock.
- *
- * When 2KB-pgtable is deallocated the corresponding AA bit is set to 0
- * and the corresponding PP bit is set to 1 in a single atomic operation.
- * Thus, PP and AA bits corresponding to the same 2KB-pgtable are mutually
- * exclusive and may never be both set to 1!
- * The parent page is either:
- * - added to mm_context_t::pgtable_list in case the second half of the
- * parent page is still allocated;
- * - removed from mm_context_t::pgtable_list in case the second half of
- * the parent page is unallocated;
- * These operations are protected with mm_context_t::lock.
- *
- * It is important to understand that mm_context_t::lock only protects
- * mm_context_t::pgtable_list and AA bits, but not the parent page itself
- * and PP bits.
- *
- * Releasing the parent page happens whenever the PP bit turns from 1 to 0,
- * while both AA bits and the second PP bit are already unset. Then the
- * parent page does not contain any 2KB-pgtable fragment anymore, and it has
- * also been removed from mm_context_t::pgtable_list. It is safe to release
- * the page therefore.
- *
- * PGSTE memory spaces use full 4KB-pgtables and do not need most of the
- * logic described above. Both AA bits are set to 1 to denote a 4KB-pgtable
- * while the PP bits are never used, nor such a page is added to or removed
- * from mm_context_t::pgtable_list.
- *
- * pte_free_defer() overrides those rules: it takes the page off pgtable_list,
- * and prevents both 2K fragments from being reused. pte_free_defer() has to
- * guarantee that its pgtable cannot be reused before the RCU grace period
- * has elapsed (which page_table_free_rcu() does not actually guarantee).
- * But for simplicity, because page->rcu_head overlays page->lru, and because
- * the RCU callback might not be called before the mm_context_t has been freed,
- * pte_free_defer() in this implementation prevents both fragments from being
- * reused, and delays making the call to RCU until both fragments are freed.
- */
unsigned long *page_table_alloc(struct mm_struct *mm)
{
- unsigned long *table;
struct ptdesc *ptdesc;
- unsigned int mask, bit;
-
- /* Try to get a fragment of a 4K page as a 2K page table */
- if (!mm_alloc_pgste(mm)) {
- table = NULL;
- spin_lock_bh(&mm->context.lock);
- if (!list_empty(&mm->context.pgtable_list)) {
- ptdesc = list_first_entry(&mm->context.pgtable_list,
- struct ptdesc, pt_list);
- mask = atomic_read(&ptdesc->_refcount) >> 24;
- /*
- * The pending removal bits must also be checked.
- * Failure to do so might lead to an impossible
- * value of (i.e 0x13 or 0x23) written to _refcount.
- * Such values violate the assumption that pending and
- * allocation bits are mutually exclusive, and the rest
- * of the code unrails as result. That could lead to
- * a whole bunch of races and corruptions.
- */
- mask = (mask | (mask >> 4)) & 0x03U;
- if (mask != 0x03U) {
- table = (unsigned long *) ptdesc_to_virt(ptdesc);
- bit = mask & 1; /* =1 -> second 2K */
- if (bit)
- table += PTRS_PER_PTE;
- atomic_xor_bits(&ptdesc->_refcount,
- 0x01U << (bit + 24));
- list_del_init(&ptdesc->pt_list);
- }
- }
- spin_unlock_bh(&mm->context.lock);
- if (table)
- return table;
- }
- /* Allocate a fresh page */
+ unsigned long *table;
+
ptdesc = pagetable_alloc(GFP_KERNEL, 0);
if (!ptdesc)
return NULL;
@@ -286,177 +169,57 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
pagetable_free(ptdesc);
return NULL;
}
- arch_set_page_dat(ptdesc_page(ptdesc), 0);
- /* Initialize page table */
- table = (unsigned long *) ptdesc_to_virt(ptdesc);
- if (mm_alloc_pgste(mm)) {
- /* Return 4K page table with PGSTEs */
- INIT_LIST_HEAD(&ptdesc->pt_list);
- atomic_xor_bits(&ptdesc->_refcount, 0x03U << 24);
- memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE);
- memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
- } else {
- /* Return the first 2K fragment of the page */
- atomic_xor_bits(&ptdesc->_refcount, 0x01U << 24);
- memset64((u64 *)table, _PAGE_INVALID, 2 * PTRS_PER_PTE);
- spin_lock_bh(&mm->context.lock);
- list_add(&ptdesc->pt_list, &mm->context.pgtable_list);
- spin_unlock_bh(&mm->context.lock);
- }
+ table = ptdesc_to_virt(ptdesc);
+ __arch_set_page_dat(table, 1);
+ /* pt_list is used by gmap only */
+ INIT_LIST_HEAD(&ptdesc->pt_list);
+ memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE);
+ memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
return table;
}
-static void page_table_release_check(struct page *page, void *table,
- unsigned int half, unsigned int mask)
+static void pagetable_pte_dtor_free(struct ptdesc *ptdesc)
{
- char msg[128];
-
- if (!IS_ENABLED(CONFIG_DEBUG_VM))
- return;
- if (!mask && list_empty(&page->lru))
- return;
- snprintf(msg, sizeof(msg),
- "Invalid pgtable %p release half 0x%02x mask 0x%02x",
- table, half, mask);
- dump_page(page, msg);
-}
-
-static void pte_free_now(struct rcu_head *head)
-{
- struct ptdesc *ptdesc;
-
- ptdesc = container_of(head, struct ptdesc, pt_rcu_head);
pagetable_pte_dtor(ptdesc);
pagetable_free(ptdesc);
}
void page_table_free(struct mm_struct *mm, unsigned long *table)
{
- unsigned int mask, bit, half;
struct ptdesc *ptdesc = virt_to_ptdesc(table);
- if (!mm_alloc_pgste(mm)) {
- /* Free 2K page table fragment of a 4K page */
- bit = ((unsigned long) table & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
- spin_lock_bh(&mm->context.lock);
- /*
- * Mark the page for delayed release. The actual release
- * will happen outside of the critical section from this
- * function or from __tlb_remove_table()
- */
- mask = atomic_xor_bits(&ptdesc->_refcount, 0x11U << (bit + 24));
- mask >>= 24;
- if ((mask & 0x03U) && !folio_test_active(ptdesc_folio(ptdesc))) {
- /*
- * Other half is allocated, and neither half has had
- * its free deferred: add page to head of list, to make
- * this freed half available for immediate reuse.
- */
- list_add(&ptdesc->pt_list, &mm->context.pgtable_list);
- } else {
- /* If page is on list, now remove it. */
- list_del_init(&ptdesc->pt_list);
- }
- spin_unlock_bh(&mm->context.lock);
- mask = atomic_xor_bits(&ptdesc->_refcount, 0x10U << (bit + 24));
- mask >>= 24;
- if (mask != 0x00U)
- return;
- half = 0x01U << bit;
- } else {
- half = 0x03U;
- mask = atomic_xor_bits(&ptdesc->_refcount, 0x03U << 24);
- mask >>= 24;
- }
-
- page_table_release_check(ptdesc_page(ptdesc), table, half, mask);
- if (folio_test_clear_active(ptdesc_folio(ptdesc)))
- call_rcu(&ptdesc->pt_rcu_head, pte_free_now);
- else
- pte_free_now(&ptdesc->pt_rcu_head);
+ pagetable_pte_dtor_free(ptdesc);
}
-void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
- unsigned long vmaddr)
+void __tlb_remove_table(void *table)
{
- struct mm_struct *mm;
- unsigned int bit, mask;
struct ptdesc *ptdesc = virt_to_ptdesc(table);
+ struct page *page = ptdesc_page(ptdesc);
- mm = tlb->mm;
- if (mm_alloc_pgste(mm)) {
- gmap_unlink(mm, table, vmaddr);
- table = (unsigned long *) ((unsigned long)table | 0x03U);
- tlb_remove_ptdesc(tlb, table);
+ if (compound_order(page) == CRST_ALLOC_ORDER) {
+ /* pmd, pud, or p4d */
+ pagetable_free(ptdesc);
return;
}
- bit = ((unsigned long) table & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
- spin_lock_bh(&mm->context.lock);
- /*
- * Mark the page for delayed release. The actual release will happen
- * outside of the critical section from __tlb_remove_table() or from
- * page_table_free()
- */
- mask = atomic_xor_bits(&ptdesc->_refcount, 0x11U << (bit + 24));
- mask >>= 24;
- if ((mask & 0x03U) && !folio_test_active(ptdesc_folio(ptdesc))) {
- /*
- * Other half is allocated, and neither half has had
- * its free deferred: add page to end of list, to make
- * this freed half available for reuse once its pending
- * bit has been cleared by __tlb_remove_table().
- */
- list_add_tail(&ptdesc->pt_list, &mm->context.pgtable_list);
- } else {
- /* If page is on list, now remove it. */
- list_del_init(&ptdesc->pt_list);
- }
- spin_unlock_bh(&mm->context.lock);
- table = (unsigned long *) ((unsigned long) table | (0x01U << bit));
- tlb_remove_table(tlb, table);
+ pagetable_pte_dtor_free(ptdesc);
}
-void __tlb_remove_table(void *_table)
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static void pte_free_now(struct rcu_head *head)
{
- unsigned int mask = (unsigned long) _table & 0x03U, half = mask;
- void *table = (void *)((unsigned long) _table ^ mask);
- struct ptdesc *ptdesc = virt_to_ptdesc(table);
-
- switch (half) {
- case 0x00U: /* pmd, pud, or p4d */
- pagetable_free(ptdesc);
- return;
- case 0x01U: /* lower 2K of a 4K page table */
- case 0x02U: /* higher 2K of a 4K page table */
- mask = atomic_xor_bits(&ptdesc->_refcount, mask << (4 + 24));
- mask >>= 24;
- if (mask != 0x00U)
- return;
- break;
- case 0x03U: /* 4K page table with pgstes */
- mask = atomic_xor_bits(&ptdesc->_refcount, 0x03U << 24);
- mask >>= 24;
- break;
- }
+ struct ptdesc *ptdesc = container_of(head, struct ptdesc, pt_rcu_head);
- page_table_release_check(ptdesc_page(ptdesc), table, half, mask);
- if (folio_test_clear_active(ptdesc_folio(ptdesc)))
- call_rcu(&ptdesc->pt_rcu_head, pte_free_now);
- else
- pte_free_now(&ptdesc->pt_rcu_head);
+ pagetable_pte_dtor_free(ptdesc);
}
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable)
{
- struct page *page;
+ struct ptdesc *ptdesc = virt_to_ptdesc(pgtable);
- page = virt_to_page(pgtable);
- SetPageActive(page);
- page_table_free(mm, (unsigned long *)pgtable);
+ call_rcu(&ptdesc->pt_rcu_head, pte_free_now);
/*
- * page_table_free() does not do the pgste gmap_unlink() which
- * page_table_free_rcu() does: warn us if pgste ever reaches here.
+ * THPs are not allowed for KVM guests. Warn if pgste ever reaches here.
+ * Turn to the generic pte_free_defer() version once gmap is removed.
*/
WARN_ON_ONCE(mm_has_pgste(mm));
}
@@ -489,11 +252,10 @@ static unsigned long *base_crst_alloc(unsigned long val)
unsigned long *table;
struct ptdesc *ptdesc;
- ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM, CRST_ALLOC_ORDER);
+ ptdesc = pagetable_alloc(GFP_KERNEL, CRST_ALLOC_ORDER);
if (!ptdesc)
return NULL;
table = ptdesc_address(ptdesc);
-
crst_table_init(table, val);
return table;
}
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 5cb929415..3ff07b6bc 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -125,32 +125,23 @@ static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
static inline pgste_t pgste_get_lock(pte_t *ptep)
{
- unsigned long new = 0;
+ unsigned long value = 0;
#ifdef CONFIG_PGSTE
- unsigned long old;
-
- asm(
- " lg %0,%2\n"
- "0: lgr %1,%0\n"
- " nihh %0,0xff7f\n" /* clear PCL bit in old */
- " oihh %1,0x0080\n" /* set PCL bit in new */
- " csg %0,%1,%2\n"
- " jl 0b\n"
- : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE])
- : "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory");
+ unsigned long *ptr = (unsigned long *)(ptep + PTRS_PER_PTE);
+
+ do {
+ value = __atomic64_or_barrier(PGSTE_PCL_BIT, ptr);
+ } while (value & PGSTE_PCL_BIT);
+ value |= PGSTE_PCL_BIT;
#endif
- return __pgste(new);
+ return __pgste(value);
}
static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
{
#ifdef CONFIG_PGSTE
- asm(
- " nihh %1,0xff7f\n" /* clear PCL bit */
- " stg %1,%0\n"
- : "=Q" (ptep[PTRS_PER_PTE])
- : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE])
- : "cc", "memory");
+ barrier();
+ WRITE_ONCE(*(unsigned long *)(ptep + PTRS_PER_PTE), pgste_val(pgste) & ~PGSTE_PCL_BIT);
#endif
}
@@ -479,7 +470,7 @@ static int pmd_lookup(struct mm_struct *mm, unsigned long addr, pmd_t **pmdp)
return -ENOENT;
/* Large PUDs are not supported yet. */
- if (pud_large(*pud))
+ if (pud_leaf(*pud))
return -EFAULT;
*pmdp = pmd_offset(pud, addr);
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 6d276103c..84e173c02 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -15,6 +15,7 @@
#include <asm/page-states.h>
#include <asm/cacheflush.h>
#include <asm/nospec-branch.h>
+#include <asm/ctlreg.h>
#include <asm/pgalloc.h>
#include <asm/setup.h>
#include <asm/tlbflush.h>
@@ -49,8 +50,7 @@ void *vmem_crst_alloc(unsigned long val)
if (!table)
return NULL;
crst_table_init(table, val);
- if (slab_is_available())
- arch_set_page_dat(virt_to_page(table), CRST_ALLOC_ORDER);
+ __arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER);
return table;
}
@@ -66,6 +66,7 @@ pte_t __ref *vmem_pte_alloc(void)
if (!pte)
return NULL;
memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
+ __arch_set_page_dat(pte, 1);
return pte;
}
@@ -322,7 +323,7 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
if (!add) {
if (pud_none(*pud))
continue;
- if (pud_large(*pud)) {
+ if (pud_leaf(*pud)) {
if (IS_ALIGNED(addr, PUD_SIZE) &&
IS_ALIGNED(next, PUD_SIZE)) {
pud_clear(pud);
@@ -343,7 +344,7 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
if (!pmd)
goto out;
pud_populate(&init_mm, pud, pmd);
- } else if (pud_large(*pud)) {
+ } else if (pud_leaf(*pud)) {
continue;
}
ret = modify_pmd_table(pud, addr, next, add, direct);
@@ -502,6 +503,8 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
return ret;
}
+#ifdef CONFIG_MEMORY_HOTPLUG
+
void vmemmap_free(unsigned long start, unsigned long end,
struct vmem_altmap *altmap)
{
@@ -510,6 +513,8 @@ void vmemmap_free(unsigned long start, unsigned long end,
mutex_unlock(&vmem_mutex);
}
+#endif
+
void vmem_remove_mapping(unsigned long start, unsigned long size)
{
mutex_lock(&vmem_mutex);
@@ -586,7 +591,7 @@ pte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc)
if (!pmd)
goto out;
pud_populate(&init_mm, pud, pmd);
- } else if (WARN_ON_ONCE(pud_large(*pud))) {
+ } else if (WARN_ON_ONCE(pud_leaf(*pud))) {
goto out;
}
pmd = pmd_offset(pud, addr);
@@ -663,7 +668,7 @@ void __init vmem_map_init(void)
__set_memory_4k(__va(0), RELOC_HIDE(__va(0), ident_map_size));
}
if (MACHINE_HAS_NX)
- ctl_set_bit(0, 20);
+ system_ctl_set_bit(0, CR0_INSTRUCTION_EXEC_PROTECTION_BIT);
pr_info("Write protected kernel read-only data: %luk\n",
(unsigned long)(__end_rodata - _stext) >> 10);
}
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index e507692e5..1d168a98a 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -516,11 +516,12 @@ static void bpf_skip(struct bpf_jit *jit, int size)
* PLT for hotpatchable calls. The calling convention is the same as for the
* ftrace hotpatch trampolines: %r0 is return address, %r1 is clobbered.
*/
-extern const char bpf_plt[];
-extern const char bpf_plt_ret[];
-extern const char bpf_plt_target[];
-extern const char bpf_plt_end[];
-#define BPF_PLT_SIZE 32
+struct bpf_plt {
+ char code[16];
+ void *ret;
+ void *target;
+} __packed;
+extern const struct bpf_plt bpf_plt;
asm(
".pushsection .rodata\n"
" .balign 8\n"
@@ -531,15 +532,14 @@ asm(
" .balign 8\n"
"bpf_plt_ret: .quad 0\n"
"bpf_plt_target: .quad 0\n"
- "bpf_plt_end:\n"
" .popsection\n"
);
-static void bpf_jit_plt(void *plt, void *ret, void *target)
+static void bpf_jit_plt(struct bpf_plt *plt, void *ret, void *target)
{
- memcpy(plt, bpf_plt, BPF_PLT_SIZE);
- *(void **)((char *)plt + (bpf_plt_ret - bpf_plt)) = ret;
- *(void **)((char *)plt + (bpf_plt_target - bpf_plt)) = target ?: ret;
+ memcpy(plt, &bpf_plt, sizeof(*plt));
+ plt->ret = ret;
+ plt->target = target;
}
/*
@@ -556,7 +556,7 @@ static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT6_PCREL_RILC(0xc0040000, 0, jit->prologue_plt);
jit->prologue_plt_ret = jit->prg;
- if (fp->aux->func_idx == 0) {
+ if (!bpf_is_subprog(fp)) {
/* Initialize the tail call counter in the main program. */
/* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */
_EMIT6(0xd703f000 | STK_OFF_TCCNT, 0xf000 | STK_OFF_TCCNT);
@@ -662,23 +662,26 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
jit->prg = ALIGN(jit->prg, 8);
jit->prologue_plt = jit->prg;
if (jit->prg_buf)
- bpf_jit_plt(jit->prg_buf + jit->prg,
+ bpf_jit_plt((struct bpf_plt *)(jit->prg_buf + jit->prg),
jit->prg_buf + jit->prologue_plt_ret, NULL);
- jit->prg += BPF_PLT_SIZE;
+ jit->prg += sizeof(struct bpf_plt);
}
static int get_probe_mem_regno(const u8 *insn)
{
/*
- * insn must point to llgc, llgh, llgf or lg, which have destination
- * register at the same position.
+ * insn must point to llgc, llgh, llgf, lg, lgb, lgh or lgf, which have
+ * destination register at the same position.
*/
- if (insn[0] != 0xe3) /* common llgc, llgh, llgf and lg prefix */
+ if (insn[0] != 0xe3) /* common prefix */
return -1;
if (insn[5] != 0x90 && /* llgc */
insn[5] != 0x91 && /* llgh */
insn[5] != 0x16 && /* llgf */
- insn[5] != 0x04) /* lg */
+ insn[5] != 0x04 && /* lg */
+ insn[5] != 0x77 && /* lgb */
+ insn[5] != 0x15 && /* lgh */
+ insn[5] != 0x14) /* lgf */
return -1;
return insn[1] >> 4;
}
@@ -776,6 +779,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
int i, bool extra_pass, u32 stack_depth)
{
struct bpf_insn *insn = &fp->insnsi[i];
+ s32 branch_oc_off = insn->off;
u32 dst_reg = insn->dst_reg;
u32 src_reg = insn->src_reg;
int last, insn_count = 1;
@@ -788,22 +792,55 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
int err;
if (BPF_CLASS(insn->code) == BPF_LDX &&
- BPF_MODE(insn->code) == BPF_PROBE_MEM)
+ (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
+ BPF_MODE(insn->code) == BPF_PROBE_MEMSX))
probe_prg = jit->prg;
switch (insn->code) {
/*
* BPF_MOV
*/
- case BPF_ALU | BPF_MOV | BPF_X: /* dst = (u32) src */
- /* llgfr %dst,%src */
- EMIT4(0xb9160000, dst_reg, src_reg);
- if (insn_is_zext(&insn[1]))
- insn_count = 2;
+ case BPF_ALU | BPF_MOV | BPF_X:
+ switch (insn->off) {
+ case 0: /* DST = (u32) SRC */
+ /* llgfr %dst,%src */
+ EMIT4(0xb9160000, dst_reg, src_reg);
+ if (insn_is_zext(&insn[1]))
+ insn_count = 2;
+ break;
+ case 8: /* DST = (u32)(s8) SRC */
+ /* lbr %dst,%src */
+ EMIT4(0xb9260000, dst_reg, src_reg);
+ /* llgfr %dst,%dst */
+ EMIT4(0xb9160000, dst_reg, dst_reg);
+ break;
+ case 16: /* DST = (u32)(s16) SRC */
+ /* lhr %dst,%src */
+ EMIT4(0xb9270000, dst_reg, src_reg);
+ /* llgfr %dst,%dst */
+ EMIT4(0xb9160000, dst_reg, dst_reg);
+ break;
+ }
break;
- case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
- /* lgr %dst,%src */
- EMIT4(0xb9040000, dst_reg, src_reg);
+ case BPF_ALU64 | BPF_MOV | BPF_X:
+ switch (insn->off) {
+ case 0: /* DST = SRC */
+ /* lgr %dst,%src */
+ EMIT4(0xb9040000, dst_reg, src_reg);
+ break;
+ case 8: /* DST = (s8) SRC */
+ /* lgbr %dst,%src */
+ EMIT4(0xb9060000, dst_reg, src_reg);
+ break;
+ case 16: /* DST = (s16) SRC */
+ /* lghr %dst,%src */
+ EMIT4(0xb9070000, dst_reg, src_reg);
+ break;
+ case 32: /* DST = (s32) SRC */
+ /* lgfr %dst,%src */
+ EMIT4(0xb9140000, dst_reg, src_reg);
+ break;
+ }
break;
case BPF_ALU | BPF_MOV | BPF_K: /* dst = (u32) imm */
/* llilf %dst,imm */
@@ -912,66 +949,115 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
/*
* BPF_DIV / BPF_MOD
*/
- case BPF_ALU | BPF_DIV | BPF_X: /* dst = (u32) dst / (u32) src */
- case BPF_ALU | BPF_MOD | BPF_X: /* dst = (u32) dst % (u32) src */
+ case BPF_ALU | BPF_DIV | BPF_X:
+ case BPF_ALU | BPF_MOD | BPF_X:
{
int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
- /* lhi %w0,0 */
- EMIT4_IMM(0xa7080000, REG_W0, 0);
- /* lr %w1,%dst */
- EMIT2(0x1800, REG_W1, dst_reg);
- /* dlr %w0,%src */
- EMIT4(0xb9970000, REG_W0, src_reg);
+ switch (off) {
+ case 0: /* dst = (u32) dst {/,%} (u32) src */
+ /* xr %w0,%w0 */
+ EMIT2(0x1700, REG_W0, REG_W0);
+ /* lr %w1,%dst */
+ EMIT2(0x1800, REG_W1, dst_reg);
+ /* dlr %w0,%src */
+ EMIT4(0xb9970000, REG_W0, src_reg);
+ break;
+ case 1: /* dst = (u32) ((s32) dst {/,%} (s32) src) */
+ /* lgfr %r1,%dst */
+ EMIT4(0xb9140000, REG_W1, dst_reg);
+ /* dsgfr %r0,%src */
+ EMIT4(0xb91d0000, REG_W0, src_reg);
+ break;
+ }
/* llgfr %dst,%rc */
EMIT4(0xb9160000, dst_reg, rc_reg);
if (insn_is_zext(&insn[1]))
insn_count = 2;
break;
}
- case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / src */
- case BPF_ALU64 | BPF_MOD | BPF_X: /* dst = dst % src */
+ case BPF_ALU64 | BPF_DIV | BPF_X:
+ case BPF_ALU64 | BPF_MOD | BPF_X:
{
int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
- /* lghi %w0,0 */
- EMIT4_IMM(0xa7090000, REG_W0, 0);
- /* lgr %w1,%dst */
- EMIT4(0xb9040000, REG_W1, dst_reg);
- /* dlgr %w0,%dst */
- EMIT4(0xb9870000, REG_W0, src_reg);
+ switch (off) {
+ case 0: /* dst = dst {/,%} src */
+ /* lghi %w0,0 */
+ EMIT4_IMM(0xa7090000, REG_W0, 0);
+ /* lgr %w1,%dst */
+ EMIT4(0xb9040000, REG_W1, dst_reg);
+ /* dlgr %w0,%src */
+ EMIT4(0xb9870000, REG_W0, src_reg);
+ break;
+ case 1: /* dst = (s64) dst {/,%} (s64) src */
+ /* lgr %w1,%dst */
+ EMIT4(0xb9040000, REG_W1, dst_reg);
+ /* dsgr %w0,%src */
+ EMIT4(0xb90d0000, REG_W0, src_reg);
+ break;
+ }
/* lgr %dst,%rc */
EMIT4(0xb9040000, dst_reg, rc_reg);
break;
}
- case BPF_ALU | BPF_DIV | BPF_K: /* dst = (u32) dst / (u32) imm */
- case BPF_ALU | BPF_MOD | BPF_K: /* dst = (u32) dst % (u32) imm */
+ case BPF_ALU | BPF_DIV | BPF_K:
+ case BPF_ALU | BPF_MOD | BPF_K:
{
int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
if (imm == 1) {
if (BPF_OP(insn->code) == BPF_MOD)
- /* lhgi %dst,0 */
+ /* lghi %dst,0 */
EMIT4_IMM(0xa7090000, dst_reg, 0);
else
EMIT_ZERO(dst_reg);
break;
}
- /* lhi %w0,0 */
- EMIT4_IMM(0xa7080000, REG_W0, 0);
- /* lr %w1,%dst */
- EMIT2(0x1800, REG_W1, dst_reg);
if (!is_first_pass(jit) && can_use_ldisp_for_lit32(jit)) {
- /* dl %w0,<d(imm)>(%l) */
- EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0, REG_L,
- EMIT_CONST_U32(imm));
+ switch (off) {
+ case 0: /* dst = (u32) dst {/,%} (u32) imm */
+ /* xr %w0,%w0 */
+ EMIT2(0x1700, REG_W0, REG_W0);
+ /* lr %w1,%dst */
+ EMIT2(0x1800, REG_W1, dst_reg);
+ /* dl %w0,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0,
+ REG_L, EMIT_CONST_U32(imm));
+ break;
+ case 1: /* dst = (s32) dst {/,%} (s32) imm */
+ /* lgfr %r1,%dst */
+ EMIT4(0xb9140000, REG_W1, dst_reg);
+ /* dsgf %r0,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x001d, REG_W0, REG_0,
+ REG_L, EMIT_CONST_U32(imm));
+ break;
+ }
} else {
- /* lgfrl %dst,imm */
- EMIT6_PCREL_RILB(0xc40c0000, dst_reg,
- _EMIT_CONST_U32(imm));
- jit->seen |= SEEN_LITERAL;
- /* dlr %w0,%dst */
- EMIT4(0xb9970000, REG_W0, dst_reg);
+ switch (off) {
+ case 0: /* dst = (u32) dst {/,%} (u32) imm */
+ /* xr %w0,%w0 */
+ EMIT2(0x1700, REG_W0, REG_W0);
+ /* lr %w1,%dst */
+ EMIT2(0x1800, REG_W1, dst_reg);
+ /* lrl %dst,imm */
+ EMIT6_PCREL_RILB(0xc40d0000, dst_reg,
+ _EMIT_CONST_U32(imm));
+ jit->seen |= SEEN_LITERAL;
+ /* dlr %w0,%dst */
+ EMIT4(0xb9970000, REG_W0, dst_reg);
+ break;
+ case 1: /* dst = (s32) dst {/,%} (s32) imm */
+ /* lgfr %w1,%dst */
+ EMIT4(0xb9140000, REG_W1, dst_reg);
+ /* lgfrl %dst,imm */
+ EMIT6_PCREL_RILB(0xc40c0000, dst_reg,
+ _EMIT_CONST_U32(imm));
+ jit->seen |= SEEN_LITERAL;
+ /* dsgr %w0,%dst */
+ EMIT4(0xb90d0000, REG_W0, dst_reg);
+ break;
+ }
}
/* llgfr %dst,%rc */
EMIT4(0xb9160000, dst_reg, rc_reg);
@@ -979,8 +1065,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
insn_count = 2;
break;
}
- case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / imm */
- case BPF_ALU64 | BPF_MOD | BPF_K: /* dst = dst % imm */
+ case BPF_ALU64 | BPF_DIV | BPF_K:
+ case BPF_ALU64 | BPF_MOD | BPF_K:
{
int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
@@ -990,21 +1076,50 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT4_IMM(0xa7090000, dst_reg, 0);
break;
}
- /* lghi %w0,0 */
- EMIT4_IMM(0xa7090000, REG_W0, 0);
- /* lgr %w1,%dst */
- EMIT4(0xb9040000, REG_W1, dst_reg);
if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
- /* dlg %w0,<d(imm)>(%l) */
- EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L,
- EMIT_CONST_U64(imm));
+ switch (off) {
+ case 0: /* dst = dst {/,%} imm */
+ /* lghi %w0,0 */
+ EMIT4_IMM(0xa7090000, REG_W0, 0);
+ /* lgr %w1,%dst */
+ EMIT4(0xb9040000, REG_W1, dst_reg);
+ /* dlg %w0,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0,
+ REG_L, EMIT_CONST_U64(imm));
+ break;
+ case 1: /* dst = (s64) dst {/,%} (s64) imm */
+ /* lgr %w1,%dst */
+ EMIT4(0xb9040000, REG_W1, dst_reg);
+ /* dsg %w0,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x000d, REG_W0, REG_0,
+ REG_L, EMIT_CONST_U64(imm));
+ break;
+ }
} else {
- /* lgrl %dst,imm */
- EMIT6_PCREL_RILB(0xc4080000, dst_reg,
- _EMIT_CONST_U64(imm));
- jit->seen |= SEEN_LITERAL;
- /* dlgr %w0,%dst */
- EMIT4(0xb9870000, REG_W0, dst_reg);
+ switch (off) {
+ case 0: /* dst = dst {/,%} imm */
+ /* lghi %w0,0 */
+ EMIT4_IMM(0xa7090000, REG_W0, 0);
+ /* lgr %w1,%dst */
+ EMIT4(0xb9040000, REG_W1, dst_reg);
+ /* lgrl %dst,imm */
+ EMIT6_PCREL_RILB(0xc4080000, dst_reg,
+ _EMIT_CONST_U64(imm));
+ jit->seen |= SEEN_LITERAL;
+ /* dlgr %w0,%dst */
+ EMIT4(0xb9870000, REG_W0, dst_reg);
+ break;
+ case 1: /* dst = (s64) dst {/,%} (s64) imm */
+ /* lgr %w1,%dst */
+ EMIT4(0xb9040000, REG_W1, dst_reg);
+ /* lgrl %dst,imm */
+ EMIT6_PCREL_RILB(0xc4080000, dst_reg,
+ _EMIT_CONST_U64(imm));
+ jit->seen |= SEEN_LITERAL;
+ /* dsgr %w0,%dst */
+ EMIT4(0xb90d0000, REG_W0, dst_reg);
+ break;
+ }
}
/* lgr %dst,%rc */
EMIT4(0xb9040000, dst_reg, rc_reg);
@@ -1217,6 +1332,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
}
break;
case BPF_ALU | BPF_END | BPF_FROM_LE:
+ case BPF_ALU64 | BPF_END | BPF_FROM_LE:
switch (imm) {
case 16: /* dst = (u16) cpu_to_le16(dst) */
/* lrvr %dst,%dst */
@@ -1311,8 +1427,12 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT6_DISP_LH(0xeb000000, is32 ? (op32) : (op64), \
(insn->imm & BPF_FETCH) ? src_reg : REG_W0, \
src_reg, dst_reg, off); \
- if (is32 && (insn->imm & BPF_FETCH)) \
- EMIT_ZERO(src_reg); \
+ if (insn->imm & BPF_FETCH) { \
+ /* bcr 14,0 - see atomic_fetch_{add,and,or,xor}() */ \
+ _EMIT2(0x07e0); \
+ if (is32) \
+ EMIT_ZERO(src_reg); \
+ } \
} while (0)
case BPF_ADD:
case BPF_ADD | BPF_FETCH:
@@ -1374,6 +1494,12 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
if (insn_is_zext(&insn[1]))
insn_count = 2;
break;
+ case BPF_LDX | BPF_MEMSX | BPF_B: /* dst = *(s8 *)(ul) (src + off) */
+ case BPF_LDX | BPF_PROBE_MEMSX | BPF_B:
+ /* lgb %dst,0(off,%src) */
+ EMIT6_DISP_LH(0xe3000000, 0x0077, dst_reg, src_reg, REG_0, off);
+ jit->seen |= SEEN_MEM;
+ break;
case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */
case BPF_LDX | BPF_PROBE_MEM | BPF_H:
/* llgh %dst,0(off,%src) */
@@ -1382,6 +1508,12 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
if (insn_is_zext(&insn[1]))
insn_count = 2;
break;
+ case BPF_LDX | BPF_MEMSX | BPF_H: /* dst = *(s16 *)(ul) (src + off) */
+ case BPF_LDX | BPF_PROBE_MEMSX | BPF_H:
+ /* lgh %dst,0(off,%src) */
+ EMIT6_DISP_LH(0xe3000000, 0x0015, dst_reg, src_reg, REG_0, off);
+ jit->seen |= SEEN_MEM;
+ break;
case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */
case BPF_LDX | BPF_PROBE_MEM | BPF_W:
/* llgf %dst,off(%src) */
@@ -1390,6 +1522,12 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
if (insn_is_zext(&insn[1]))
insn_count = 2;
break;
+ case BPF_LDX | BPF_MEMSX | BPF_W: /* dst = *(s32 *)(ul) (src + off) */
+ case BPF_LDX | BPF_PROBE_MEMSX | BPF_W:
+ /* lgf %dst,off(%src) */
+ jit->seen |= SEEN_MEM;
+ EMIT6_DISP_LH(0xe3000000, 0x0014, dst_reg, src_reg, REG_0, off);
+ break;
case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */
case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
/* lg %dst,0(off,%src) */
@@ -1570,6 +1708,9 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
* instruction itself (loop) and for BPF with offset 0 we
* branch to the instruction behind the branch.
*/
+ case BPF_JMP32 | BPF_JA: /* if (true) */
+ branch_oc_off = imm;
+ fallthrough;
case BPF_JMP | BPF_JA: /* if (true) */
mask = 0xf000; /* j */
goto branch_oc;
@@ -1738,14 +1879,16 @@ branch_xu:
break;
branch_oc:
if (!is_first_pass(jit) &&
- can_use_rel(jit, addrs[i + off + 1])) {
+ can_use_rel(jit, addrs[i + branch_oc_off + 1])) {
/* brc mask,off */
EMIT4_PCREL_RIC(0xa7040000,
- mask >> 12, addrs[i + off + 1]);
+ mask >> 12,
+ addrs[i + branch_oc_off + 1]);
} else {
/* brcl mask,off */
EMIT6_PCREL_RILC(0xc0040000,
- mask >> 12, addrs[i + off + 1]);
+ mask >> 12,
+ addrs[i + branch_oc_off + 1]);
}
break;
}
@@ -1901,9 +2044,6 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
struct bpf_jit jit;
int pass;
- if (WARN_ON_ONCE(bpf_plt_end - bpf_plt != BPF_PLT_SIZE))
- return orig_fp;
-
if (!fp->jit_requested)
return orig_fp;
@@ -2009,14 +2149,11 @@ bool bpf_jit_supports_far_kfunc_call(void)
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
void *old_addr, void *new_addr)
{
+ struct bpf_plt expected_plt, current_plt, new_plt, *plt;
struct {
u16 opc;
s32 disp;
} __packed insn;
- char expected_plt[BPF_PLT_SIZE];
- char current_plt[BPF_PLT_SIZE];
- char new_plt[BPF_PLT_SIZE];
- char *plt;
char *ret;
int err;
@@ -2035,18 +2172,18 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
*/
} else {
/* Verify the PLT. */
- plt = (char *)ip + (insn.disp << 1);
- err = copy_from_kernel_nofault(current_plt, plt, BPF_PLT_SIZE);
+ plt = ip + (insn.disp << 1);
+ err = copy_from_kernel_nofault(&current_plt, plt,
+ sizeof(current_plt));
if (err < 0)
return err;
ret = (char *)ip + 6;
- bpf_jit_plt(expected_plt, ret, old_addr);
- if (memcmp(current_plt, expected_plt, BPF_PLT_SIZE))
+ bpf_jit_plt(&expected_plt, ret, old_addr);
+ if (memcmp(&current_plt, &expected_plt, sizeof(current_plt)))
return -EINVAL;
/* Adjust the call address. */
- bpf_jit_plt(new_plt, ret, new_addr);
- s390_kernel_write(plt + (bpf_plt_target - bpf_plt),
- new_plt + (bpf_plt_target - bpf_plt),
+ bpf_jit_plt(&new_plt, ret, new_addr);
+ s390_kernel_write(&plt->target, &new_plt.target,
sizeof(void *));
}
@@ -2223,7 +2360,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
return -ENOTSUPP;
/* Return to %r14, since func_addr and %r0 are not available. */
- if (!func_addr && !(flags & BPF_TRAMP_F_ORIG_STACK))
+ if ((!func_addr && !(flags & BPF_TRAMP_F_ORIG_STACK)) ||
+ (flags & BPF_TRAMP_F_INDIRECT))
flags |= BPF_TRAMP_F_SKIP_FRAME;
/*
@@ -2498,6 +2636,21 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
return 0;
}
+int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
+ struct bpf_tramp_links *tlinks, void *orig_call)
+{
+ struct bpf_tramp_image im;
+ struct bpf_tramp_jit tjit;
+ int ret;
+
+ memset(&tjit, 0, sizeof(tjit));
+
+ ret = __arch_prepare_bpf_trampoline(&im, &tjit, m, flags,
+ tlinks, orig_call);
+
+ return ret < 0 ? ret : tjit.common.prg;
+}
+
int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
void *image_end, const struct btf_func_model *m,
u32 flags, struct bpf_tramp_links *tlinks,
@@ -2505,30 +2658,27 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
{
struct bpf_tramp_jit tjit;
int ret;
- int i;
- for (i = 0; i < 2; i++) {
- if (i == 0) {
- /* Compute offsets, check whether the code fits. */
- memset(&tjit, 0, sizeof(tjit));
- } else {
- /* Generate the code. */
- tjit.common.prg = 0;
- tjit.common.prg_buf = image;
- }
- ret = __arch_prepare_bpf_trampoline(im, &tjit, m, flags,
- tlinks, func_addr);
- if (ret < 0)
- return ret;
- if (tjit.common.prg > (char *)image_end - (char *)image)
- /*
- * Use the same error code as for exceeding
- * BPF_MAX_TRAMP_LINKS.
- */
- return -E2BIG;
- }
+ /* Compute offsets, check whether the code fits. */
+ memset(&tjit, 0, sizeof(tjit));
+ ret = __arch_prepare_bpf_trampoline(im, &tjit, m, flags,
+ tlinks, func_addr);
+
+ if (ret < 0)
+ return ret;
+ if (tjit.common.prg > (char *)image_end - (char *)image)
+ /*
+ * Use the same error code as for exceeding
+ * BPF_MAX_TRAMP_LINKS.
+ */
+ return -E2BIG;
+
+ tjit.common.prg = 0;
+ tjit.common.prg_buf = image;
+ ret = __arch_prepare_bpf_trampoline(im, &tjit, m, flags,
+ tlinks, func_addr);
- return tjit.common.prg;
+ return ret < 0 ? ret : tjit.common.prg;
}
bool bpf_jit_supports_subprog_tailcalls(void)
diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile
index 5ae31ca9d..0547a1040 100644
--- a/arch/s390/pci/Makefile
+++ b/arch/s390/pci/Makefile
@@ -3,7 +3,7 @@
# Makefile for the s390 PCI subsystem.
#
-obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_dma.o pci_clp.o pci_sysfs.o \
+obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_clp.o pci_sysfs.o \
pci_event.o pci_debug.o pci_insn.o pci_mmio.o \
pci_bus.o pci_kvm_hook.o
obj-$(CONFIG_PCI_IOV) += pci_iov.o
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index d34d5813d..52a44e353 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -124,7 +124,11 @@ int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas,
WARN_ON_ONCE(iota & 0x3fff);
fib.pba = base;
- fib.pal = limit;
+ /* Work around off by one in ISM virt device */
+ if (zdev->pft == PCI_FUNC_TYPE_ISM && limit > base)
+ fib.pal = limit + (1 << 12);
+ else
+ fib.pal = limit;
fib.iota = iota | ZPCI_IOTA_RTTO_FLAG;
fib.gd = zdev->gisa;
cc = zpci_mod_fc(req, &fib, status);
@@ -153,6 +157,7 @@ int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas)
int zpci_fmb_enable_device(struct zpci_dev *zdev)
{
u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_SET_MEASURE);
+ struct zpci_iommu_ctrs *ctrs;
struct zpci_fib fib = {0};
u8 cc, status;
@@ -165,9 +170,15 @@ int zpci_fmb_enable_device(struct zpci_dev *zdev)
WARN_ON((u64) zdev->fmb & 0xf);
/* reset software counters */
- atomic64_set(&zdev->allocated_pages, 0);
- atomic64_set(&zdev->mapped_pages, 0);
- atomic64_set(&zdev->unmapped_pages, 0);
+ ctrs = zpci_get_iommu_ctrs(zdev);
+ if (ctrs) {
+ atomic64_set(&ctrs->mapped_pages, 0);
+ atomic64_set(&ctrs->unmapped_pages, 0);
+ atomic64_set(&ctrs->global_rpcits, 0);
+ atomic64_set(&ctrs->sync_map_rpcits, 0);
+ atomic64_set(&ctrs->sync_rpcits, 0);
+ }
+
fib.fmb_addr = virt_to_phys(zdev->fmb);
fib.gd = zdev->gisa;
@@ -241,7 +252,7 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res,
/* combine single writes by using store-block insn */
void __iowrite64_copy(void __iomem *to, const void *from, size_t count)
{
- zpci_memcpy_toio(to, from, count);
+ zpci_memcpy_toio(to, from, count * 8);
}
void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
@@ -582,7 +593,6 @@ int pcibios_device_add(struct pci_dev *pdev)
pdev->no_vf_scan = 1;
pdev->dev.groups = zpci_attr_groups;
- pdev->dev.dma_ops = &s390_pci_dma_ops;
zpci_map_resources(pdev);
for (i = 0; i < PCI_STD_NUM_BARS; i++) {
@@ -756,8 +766,6 @@ int zpci_hot_reset_device(struct zpci_dev *zdev)
if (zdev->dma_table)
rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
virt_to_phys(zdev->dma_table), &status);
- else
- rc = zpci_dma_init_device(zdev);
if (rc) {
zpci_disable_device(zdev);
return rc;
@@ -865,11 +873,6 @@ int zpci_deconfigure_device(struct zpci_dev *zdev)
if (zdev->zbus->bus)
zpci_bus_remove_device(zdev, false);
- if (zdev->dma_table) {
- rc = zpci_dma_exit_device(zdev);
- if (rc)
- return rc;
- }
if (zdev_enabled(zdev)) {
rc = zpci_disable_device(zdev);
if (rc)
@@ -918,8 +921,6 @@ void zpci_release_device(struct kref *kref)
if (zdev->zbus->bus)
zpci_bus_remove_device(zdev, false);
- if (zdev->dma_table)
- zpci_dma_exit_device(zdev);
if (zdev_enabled(zdev))
zpci_disable_device(zdev);
@@ -1094,7 +1095,7 @@ static int __init pci_base_init(void)
if (MACHINE_HAS_PCI_MIO) {
static_branch_enable(&have_mio);
- ctl_set_bit(2, 5);
+ system_ctl_set_bit(2, CR2_MIO_ADDRESSING_BIT);
}
rc = zpci_debug_init();
@@ -1109,10 +1110,6 @@ static int __init pci_base_init(void)
if (rc)
goto out_irq;
- rc = zpci_dma_init();
- if (rc)
- goto out_dma;
-
rc = clp_scan_pci_devices();
if (rc)
goto out_find;
@@ -1122,8 +1119,6 @@ static int __init pci_base_init(void)
return 0;
out_find:
- zpci_dma_exit();
-out_dma:
zpci_irq_exit();
out_irq:
zpci_mem_exit();
diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c
index 32245b970..daa5d7450 100644
--- a/arch/s390/pci/pci_bus.c
+++ b/arch/s390/pci/pci_bus.c
@@ -47,11 +47,6 @@ static int zpci_bus_prepare_device(struct zpci_dev *zdev)
rc = zpci_enable_device(zdev);
if (rc)
return rc;
- rc = zpci_dma_init_device(zdev);
- if (rc) {
- zpci_disable_device(zdev);
- return rc;
- }
}
if (!zdev->has_resources) {
diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c
index ca6bd98ee..6dde2263c 100644
--- a/arch/s390/pci/pci_debug.c
+++ b/arch/s390/pci/pci_debug.c
@@ -53,9 +53,11 @@ static char *pci_fmt3_names[] = {
};
static char *pci_sw_names[] = {
- "Allocated pages",
"Mapped pages",
"Unmapped pages",
+ "Global RPCITs",
+ "Sync Map RPCITs",
+ "Sync RPCITs",
};
static void pci_fmb_show(struct seq_file *m, char *name[], int length,
@@ -69,10 +71,14 @@ static void pci_fmb_show(struct seq_file *m, char *name[], int length,
static void pci_sw_counter_show(struct seq_file *m)
{
- struct zpci_dev *zdev = m->private;
- atomic64_t *counter = &zdev->allocated_pages;
+ struct zpci_iommu_ctrs *ctrs = zpci_get_iommu_ctrs(m->private);
+ atomic64_t *counter;
int i;
+ if (!ctrs)
+ return;
+
+ counter = &ctrs->mapped_pages;
for (i = 0; i < ARRAY_SIZE(pci_sw_names); i++, counter++)
seq_printf(m, "%26s:\t%llu\n", pci_sw_names[i],
atomic64_read(counter));
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
deleted file mode 100644
index 99209085c..000000000
--- a/arch/s390/pci/pci_dma.c
+++ /dev/null
@@ -1,746 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright IBM Corp. 2012
- *
- * Author(s):
- * Jan Glauber <jang@linux.vnet.ibm.com>
- */
-
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-#include <linux/iommu-helper.h>
-#include <linux/dma-map-ops.h>
-#include <linux/vmalloc.h>
-#include <linux/pci.h>
-#include <asm/pci_dma.h>
-
-static struct kmem_cache *dma_region_table_cache;
-static struct kmem_cache *dma_page_table_cache;
-static int s390_iommu_strict;
-static u64 s390_iommu_aperture;
-static u32 s390_iommu_aperture_factor = 1;
-
-static int zpci_refresh_global(struct zpci_dev *zdev)
-{
- return zpci_refresh_trans((u64) zdev->fh << 32, zdev->start_dma,
- zdev->iommu_pages * PAGE_SIZE);
-}
-
-unsigned long *dma_alloc_cpu_table(gfp_t gfp)
-{
- unsigned long *table, *entry;
-
- table = kmem_cache_alloc(dma_region_table_cache, gfp);
- if (!table)
- return NULL;
-
- for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++)
- *entry = ZPCI_TABLE_INVALID;
- return table;
-}
-
-static void dma_free_cpu_table(void *table)
-{
- kmem_cache_free(dma_region_table_cache, table);
-}
-
-static unsigned long *dma_alloc_page_table(gfp_t gfp)
-{
- unsigned long *table, *entry;
-
- table = kmem_cache_alloc(dma_page_table_cache, gfp);
- if (!table)
- return NULL;
-
- for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++)
- *entry = ZPCI_PTE_INVALID;
- return table;
-}
-
-static void dma_free_page_table(void *table)
-{
- kmem_cache_free(dma_page_table_cache, table);
-}
-
-static unsigned long *dma_get_seg_table_origin(unsigned long *rtep, gfp_t gfp)
-{
- unsigned long old_rte, rte;
- unsigned long *sto;
-
- rte = READ_ONCE(*rtep);
- if (reg_entry_isvalid(rte)) {
- sto = get_rt_sto(rte);
- } else {
- sto = dma_alloc_cpu_table(gfp);
- if (!sto)
- return NULL;
-
- set_rt_sto(&rte, virt_to_phys(sto));
- validate_rt_entry(&rte);
- entry_clr_protected(&rte);
-
- old_rte = cmpxchg(rtep, ZPCI_TABLE_INVALID, rte);
- if (old_rte != ZPCI_TABLE_INVALID) {
- /* Somone else was faster, use theirs */
- dma_free_cpu_table(sto);
- sto = get_rt_sto(old_rte);
- }
- }
- return sto;
-}
-
-static unsigned long *dma_get_page_table_origin(unsigned long *step, gfp_t gfp)
-{
- unsigned long old_ste, ste;
- unsigned long *pto;
-
- ste = READ_ONCE(*step);
- if (reg_entry_isvalid(ste)) {
- pto = get_st_pto(ste);
- } else {
- pto = dma_alloc_page_table(gfp);
- if (!pto)
- return NULL;
- set_st_pto(&ste, virt_to_phys(pto));
- validate_st_entry(&ste);
- entry_clr_protected(&ste);
-
- old_ste = cmpxchg(step, ZPCI_TABLE_INVALID, ste);
- if (old_ste != ZPCI_TABLE_INVALID) {
- /* Somone else was faster, use theirs */
- dma_free_page_table(pto);
- pto = get_st_pto(old_ste);
- }
- }
- return pto;
-}
-
-unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr,
- gfp_t gfp)
-{
- unsigned long *sto, *pto;
- unsigned int rtx, sx, px;
-
- rtx = calc_rtx(dma_addr);
- sto = dma_get_seg_table_origin(&rto[rtx], gfp);
- if (!sto)
- return NULL;
-
- sx = calc_sx(dma_addr);
- pto = dma_get_page_table_origin(&sto[sx], gfp);
- if (!pto)
- return NULL;
-
- px = calc_px(dma_addr);
- return &pto[px];
-}
-
-void dma_update_cpu_trans(unsigned long *ptep, phys_addr_t page_addr, int flags)
-{
- unsigned long pte;
-
- pte = READ_ONCE(*ptep);
- if (flags & ZPCI_PTE_INVALID) {
- invalidate_pt_entry(&pte);
- } else {
- set_pt_pfaa(&pte, page_addr);
- validate_pt_entry(&pte);
- }
-
- if (flags & ZPCI_TABLE_PROTECTED)
- entry_set_protected(&pte);
- else
- entry_clr_protected(&pte);
-
- xchg(ptep, pte);
-}
-
-static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa,
- dma_addr_t dma_addr, size_t size, int flags)
-{
- unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
- phys_addr_t page_addr = (pa & PAGE_MASK);
- unsigned long *entry;
- int i, rc = 0;
-
- if (!nr_pages)
- return -EINVAL;
-
- if (!zdev->dma_table)
- return -EINVAL;
-
- for (i = 0; i < nr_pages; i++) {
- entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr,
- GFP_ATOMIC);
- if (!entry) {
- rc = -ENOMEM;
- goto undo_cpu_trans;
- }
- dma_update_cpu_trans(entry, page_addr, flags);
- page_addr += PAGE_SIZE;
- dma_addr += PAGE_SIZE;
- }
-
-undo_cpu_trans:
- if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) {
- flags = ZPCI_PTE_INVALID;
- while (i-- > 0) {
- page_addr -= PAGE_SIZE;
- dma_addr -= PAGE_SIZE;
- entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr,
- GFP_ATOMIC);
- if (!entry)
- break;
- dma_update_cpu_trans(entry, page_addr, flags);
- }
- }
- return rc;
-}
-
-static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr,
- size_t size, int flags)
-{
- unsigned long irqflags;
- int ret;
-
- /*
- * With zdev->tlb_refresh == 0, rpcit is not required to establish new
- * translations when previously invalid translation-table entries are
- * validated. With lazy unmap, rpcit is skipped for previously valid
- * entries, but a global rpcit is then required before any address can
- * be re-used, i.e. after each iommu bitmap wrap-around.
- */
- if ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID) {
- if (!zdev->tlb_refresh)
- return 0;
- } else {
- if (!s390_iommu_strict)
- return 0;
- }
-
- ret = zpci_refresh_trans((u64) zdev->fh << 32, dma_addr,
- PAGE_ALIGN(size));
- if (ret == -ENOMEM && !s390_iommu_strict) {
- /* enable the hypervisor to free some resources */
- if (zpci_refresh_global(zdev))
- goto out;
-
- spin_lock_irqsave(&zdev->iommu_bitmap_lock, irqflags);
- bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap,
- zdev->lazy_bitmap, zdev->iommu_pages);
- bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages);
- spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, irqflags);
- ret = 0;
- }
-out:
- return ret;
-}
-
-static int dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa,
- dma_addr_t dma_addr, size_t size, int flags)
-{
- int rc;
-
- rc = __dma_update_trans(zdev, pa, dma_addr, size, flags);
- if (rc)
- return rc;
-
- rc = __dma_purge_tlb(zdev, dma_addr, size, flags);
- if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID))
- __dma_update_trans(zdev, pa, dma_addr, size, ZPCI_PTE_INVALID);
-
- return rc;
-}
-
-void dma_free_seg_table(unsigned long entry)
-{
- unsigned long *sto = get_rt_sto(entry);
- int sx;
-
- for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++)
- if (reg_entry_isvalid(sto[sx]))
- dma_free_page_table(get_st_pto(sto[sx]));
-
- dma_free_cpu_table(sto);
-}
-
-void dma_cleanup_tables(unsigned long *table)
-{
- int rtx;
-
- if (!table)
- return;
-
- for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++)
- if (reg_entry_isvalid(table[rtx]))
- dma_free_seg_table(table[rtx]);
-
- dma_free_cpu_table(table);
-}
-
-static unsigned long __dma_alloc_iommu(struct device *dev,
- unsigned long start, int size)
-{
- struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
-
- return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages,
- start, size, zdev->start_dma >> PAGE_SHIFT,
- dma_get_seg_boundary_nr_pages(dev, PAGE_SHIFT),
- 0);
-}
-
-static dma_addr_t dma_alloc_address(struct device *dev, int size)
-{
- struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
- unsigned long offset, flags;
-
- spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
- offset = __dma_alloc_iommu(dev, zdev->next_bit, size);
- if (offset == -1) {
- if (!s390_iommu_strict) {
- /* global flush before DMA addresses are reused */
- if (zpci_refresh_global(zdev))
- goto out_error;
-
- bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap,
- zdev->lazy_bitmap, zdev->iommu_pages);
- bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages);
- }
- /* wrap-around */
- offset = __dma_alloc_iommu(dev, 0, size);
- if (offset == -1)
- goto out_error;
- }
- zdev->next_bit = offset + size;
- spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
-
- return zdev->start_dma + offset * PAGE_SIZE;
-
-out_error:
- spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
- return DMA_MAPPING_ERROR;
-}
-
-static void dma_free_address(struct device *dev, dma_addr_t dma_addr, int size)
-{
- struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
- unsigned long flags, offset;
-
- offset = (dma_addr - zdev->start_dma) >> PAGE_SHIFT;
-
- spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
- if (!zdev->iommu_bitmap)
- goto out;
-
- if (s390_iommu_strict)
- bitmap_clear(zdev->iommu_bitmap, offset, size);
- else
- bitmap_set(zdev->lazy_bitmap, offset, size);
-
-out:
- spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
-}
-
-static inline void zpci_err_dma(unsigned long rc, unsigned long addr)
-{
- struct {
- unsigned long rc;
- unsigned long addr;
- } __packed data = {rc, addr};
-
- zpci_err_hex(&data, sizeof(data));
-}
-
-static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction direction,
- unsigned long attrs)
-{
- struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
- unsigned long pa = page_to_phys(page) + offset;
- int flags = ZPCI_PTE_VALID;
- unsigned long nr_pages;
- dma_addr_t dma_addr;
- int ret;
-
- /* This rounds up number of pages based on size and offset */
- nr_pages = iommu_num_pages(pa, size, PAGE_SIZE);
- dma_addr = dma_alloc_address(dev, nr_pages);
- if (dma_addr == DMA_MAPPING_ERROR) {
- ret = -ENOSPC;
- goto out_err;
- }
-
- /* Use rounded up size */
- size = nr_pages * PAGE_SIZE;
-
- if (direction == DMA_NONE || direction == DMA_TO_DEVICE)
- flags |= ZPCI_TABLE_PROTECTED;
-
- ret = dma_update_trans(zdev, pa, dma_addr, size, flags);
- if (ret)
- goto out_free;
-
- atomic64_add(nr_pages, &zdev->mapped_pages);
- return dma_addr + (offset & ~PAGE_MASK);
-
-out_free:
- dma_free_address(dev, dma_addr, nr_pages);
-out_err:
- zpci_err("map error:\n");
- zpci_err_dma(ret, pa);
- return DMA_MAPPING_ERROR;
-}
-
-static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
- size_t size, enum dma_data_direction direction,
- unsigned long attrs)
-{
- struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
- int npages, ret;
-
- npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
- dma_addr = dma_addr & PAGE_MASK;
- ret = dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE,
- ZPCI_PTE_INVALID);
- if (ret) {
- zpci_err("unmap error:\n");
- zpci_err_dma(ret, dma_addr);
- return;
- }
-
- atomic64_add(npages, &zdev->unmapped_pages);
- dma_free_address(dev, dma_addr, npages);
-}
-
-static void *s390_dma_alloc(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag,
- unsigned long attrs)
-{
- struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
- struct page *page;
- phys_addr_t pa;
- dma_addr_t map;
-
- size = PAGE_ALIGN(size);
- page = alloc_pages(flag | __GFP_ZERO, get_order(size));
- if (!page)
- return NULL;
-
- pa = page_to_phys(page);
- map = s390_dma_map_pages(dev, page, 0, size, DMA_BIDIRECTIONAL, 0);
- if (dma_mapping_error(dev, map)) {
- __free_pages(page, get_order(size));
- return NULL;
- }
-
- atomic64_add(size / PAGE_SIZE, &zdev->allocated_pages);
- if (dma_handle)
- *dma_handle = map;
- return phys_to_virt(pa);
-}
-
-static void s390_dma_free(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_handle,
- unsigned long attrs)
-{
- struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
-
- size = PAGE_ALIGN(size);
- atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages);
- s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, 0);
- free_pages((unsigned long)vaddr, get_order(size));
-}
-
-/* Map a segment into a contiguous dma address area */
-static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
- size_t size, dma_addr_t *handle,
- enum dma_data_direction dir)
-{
- unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
- struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
- dma_addr_t dma_addr_base, dma_addr;
- int flags = ZPCI_PTE_VALID;
- struct scatterlist *s;
- phys_addr_t pa = 0;
- int ret;
-
- dma_addr_base = dma_alloc_address(dev, nr_pages);
- if (dma_addr_base == DMA_MAPPING_ERROR)
- return -ENOMEM;
-
- dma_addr = dma_addr_base;
- if (dir == DMA_NONE || dir == DMA_TO_DEVICE)
- flags |= ZPCI_TABLE_PROTECTED;
-
- for (s = sg; dma_addr < dma_addr_base + size; s = sg_next(s)) {
- pa = page_to_phys(sg_page(s));
- ret = __dma_update_trans(zdev, pa, dma_addr,
- s->offset + s->length, flags);
- if (ret)
- goto unmap;
-
- dma_addr += s->offset + s->length;
- }
- ret = __dma_purge_tlb(zdev, dma_addr_base, size, flags);
- if (ret)
- goto unmap;
-
- *handle = dma_addr_base;
- atomic64_add(nr_pages, &zdev->mapped_pages);
-
- return ret;
-
-unmap:
- dma_update_trans(zdev, 0, dma_addr_base, dma_addr - dma_addr_base,
- ZPCI_PTE_INVALID);
- dma_free_address(dev, dma_addr_base, nr_pages);
- zpci_err("map error:\n");
- zpci_err_dma(ret, pa);
- return ret;
-}
-
-static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
- int nr_elements, enum dma_data_direction dir,
- unsigned long attrs)
-{
- struct scatterlist *s = sg, *start = sg, *dma = sg;
- unsigned int max = dma_get_max_seg_size(dev);
- unsigned int size = s->offset + s->length;
- unsigned int offset = s->offset;
- int count = 0, i, ret;
-
- for (i = 1; i < nr_elements; i++) {
- s = sg_next(s);
-
- s->dma_length = 0;
-
- if (s->offset || (size & ~PAGE_MASK) ||
- size + s->length > max) {
- ret = __s390_dma_map_sg(dev, start, size,
- &dma->dma_address, dir);
- if (ret)
- goto unmap;
-
- dma->dma_address += offset;
- dma->dma_length = size - offset;
-
- size = offset = s->offset;
- start = s;
- dma = sg_next(dma);
- count++;
- }
- size += s->length;
- }
- ret = __s390_dma_map_sg(dev, start, size, &dma->dma_address, dir);
- if (ret)
- goto unmap;
-
- dma->dma_address += offset;
- dma->dma_length = size - offset;
-
- return count + 1;
-unmap:
- for_each_sg(sg, s, count, i)
- s390_dma_unmap_pages(dev, sg_dma_address(s), sg_dma_len(s),
- dir, attrs);
-
- return ret;
-}
-
-static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
- int nr_elements, enum dma_data_direction dir,
- unsigned long attrs)
-{
- struct scatterlist *s;
- int i;
-
- for_each_sg(sg, s, nr_elements, i) {
- if (s->dma_length)
- s390_dma_unmap_pages(dev, s->dma_address, s->dma_length,
- dir, attrs);
- s->dma_address = 0;
- s->dma_length = 0;
- }
-}
-
-static unsigned long *bitmap_vzalloc(size_t bits, gfp_t flags)
-{
- size_t n = BITS_TO_LONGS(bits);
- size_t bytes;
-
- if (unlikely(check_mul_overflow(n, sizeof(unsigned long), &bytes)))
- return NULL;
-
- return vzalloc(bytes);
-}
-
-int zpci_dma_init_device(struct zpci_dev *zdev)
-{
- u8 status;
- int rc;
-
- /*
- * At this point, if the device is part of an IOMMU domain, this would
- * be a strong hint towards a bug in the IOMMU API (common) code and/or
- * simultaneous access via IOMMU and DMA API. So let's issue a warning.
- */
- WARN_ON(zdev->s390_domain);
-
- spin_lock_init(&zdev->iommu_bitmap_lock);
-
- zdev->dma_table = dma_alloc_cpu_table(GFP_KERNEL);
- if (!zdev->dma_table) {
- rc = -ENOMEM;
- goto out;
- }
-
- /*
- * Restrict the iommu bitmap size to the minimum of the following:
- * - s390_iommu_aperture which defaults to high_memory
- * - 3-level pagetable address limit minus start_dma offset
- * - DMA address range allowed by the hardware (clp query pci fn)
- *
- * Also set zdev->end_dma to the actual end address of the usable
- * range, instead of the theoretical maximum as reported by hardware.
- *
- * This limits the number of concurrently usable DMA mappings since
- * for each DMA mapped memory address we need a DMA address including
- * extra DMA addresses for multiple mappings of the same memory address.
- */
- zdev->start_dma = PAGE_ALIGN(zdev->start_dma);
- zdev->iommu_size = min3(s390_iommu_aperture,
- ZPCI_TABLE_SIZE_RT - zdev->start_dma,
- zdev->end_dma - zdev->start_dma + 1);
- zdev->end_dma = zdev->start_dma + zdev->iommu_size - 1;
- zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT;
- zdev->iommu_bitmap = bitmap_vzalloc(zdev->iommu_pages, GFP_KERNEL);
- if (!zdev->iommu_bitmap) {
- rc = -ENOMEM;
- goto free_dma_table;
- }
- if (!s390_iommu_strict) {
- zdev->lazy_bitmap = bitmap_vzalloc(zdev->iommu_pages, GFP_KERNEL);
- if (!zdev->lazy_bitmap) {
- rc = -ENOMEM;
- goto free_bitmap;
- }
-
- }
- if (zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- virt_to_phys(zdev->dma_table), &status)) {
- rc = -EIO;
- goto free_bitmap;
- }
-
- return 0;
-free_bitmap:
- vfree(zdev->iommu_bitmap);
- zdev->iommu_bitmap = NULL;
- vfree(zdev->lazy_bitmap);
- zdev->lazy_bitmap = NULL;
-free_dma_table:
- dma_free_cpu_table(zdev->dma_table);
- zdev->dma_table = NULL;
-out:
- return rc;
-}
-
-int zpci_dma_exit_device(struct zpci_dev *zdev)
-{
- int cc = 0;
-
- /*
- * At this point, if the device is part of an IOMMU domain, this would
- * be a strong hint towards a bug in the IOMMU API (common) code and/or
- * simultaneous access via IOMMU and DMA API. So let's issue a warning.
- */
- WARN_ON(zdev->s390_domain);
- if (zdev_enabled(zdev))
- cc = zpci_unregister_ioat(zdev, 0);
- /*
- * cc == 3 indicates the function is gone already. This can happen
- * if the function was deconfigured/disabled suddenly and we have not
- * received a new handle yet.
- */
- if (cc && cc != 3)
- return -EIO;
-
- dma_cleanup_tables(zdev->dma_table);
- zdev->dma_table = NULL;
- vfree(zdev->iommu_bitmap);
- zdev->iommu_bitmap = NULL;
- vfree(zdev->lazy_bitmap);
- zdev->lazy_bitmap = NULL;
- zdev->next_bit = 0;
- return 0;
-}
-
-static int __init dma_alloc_cpu_table_caches(void)
-{
- dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables",
- ZPCI_TABLE_SIZE, ZPCI_TABLE_ALIGN,
- 0, NULL);
- if (!dma_region_table_cache)
- return -ENOMEM;
-
- dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables",
- ZPCI_PT_SIZE, ZPCI_PT_ALIGN,
- 0, NULL);
- if (!dma_page_table_cache) {
- kmem_cache_destroy(dma_region_table_cache);
- return -ENOMEM;
- }
- return 0;
-}
-
-int __init zpci_dma_init(void)
-{
- s390_iommu_aperture = (u64)virt_to_phys(high_memory);
- if (!s390_iommu_aperture_factor)
- s390_iommu_aperture = ULONG_MAX;
- else
- s390_iommu_aperture *= s390_iommu_aperture_factor;
-
- return dma_alloc_cpu_table_caches();
-}
-
-void zpci_dma_exit(void)
-{
- kmem_cache_destroy(dma_page_table_cache);
- kmem_cache_destroy(dma_region_table_cache);
-}
-
-const struct dma_map_ops s390_pci_dma_ops = {
- .alloc = s390_dma_alloc,
- .free = s390_dma_free,
- .map_sg = s390_dma_map_sg,
- .unmap_sg = s390_dma_unmap_sg,
- .map_page = s390_dma_map_pages,
- .unmap_page = s390_dma_unmap_pages,
- .mmap = dma_common_mmap,
- .get_sgtable = dma_common_get_sgtable,
- .alloc_pages = dma_common_alloc_pages,
- .free_pages = dma_common_free_pages,
- /* dma_supported is unconditionally true without a callback */
-};
-EXPORT_SYMBOL_GPL(s390_pci_dma_ops);
-
-static int __init s390_iommu_setup(char *str)
-{
- if (!strcmp(str, "strict"))
- s390_iommu_strict = 1;
- return 1;
-}
-
-__setup("s390_iommu=", s390_iommu_setup);
-
-static int __init s390_iommu_aperture_setup(char *str)
-{
- if (kstrtou32(str, 10, &s390_iommu_aperture_factor))
- s390_iommu_aperture_factor = 1;
- return 1;
-}
-
-__setup("s390_iommu_aperture=", s390_iommu_aperture_setup);
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index b9324ca2e..4d9773ef9 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -59,9 +59,16 @@ static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res)
}
}
-static bool is_passed_through(struct zpci_dev *zdev)
+static bool is_passed_through(struct pci_dev *pdev)
{
- return zdev->s390_domain;
+ struct zpci_dev *zdev = to_zpci(pdev);
+ bool ret;
+
+ mutex_lock(&zdev->kzdev_lock);
+ ret = !!zdev->kzdev;
+ mutex_unlock(&zdev->kzdev_lock);
+
+ return ret;
}
static bool is_driver_supported(struct pci_driver *driver)
@@ -176,7 +183,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
}
pdev->error_state = pci_channel_io_frozen;
- if (is_passed_through(to_zpci(pdev))) {
+ if (is_passed_through(pdev)) {
pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n",
pci_name(pdev));
goto out_unlock;
@@ -239,7 +246,7 @@ static void zpci_event_io_failure(struct pci_dev *pdev, pci_channel_state_t es)
* we will inject the error event and let the guest recover the device
* itself.
*/
- if (is_passed_through(to_zpci(pdev)))
+ if (is_passed_through(pdev))
goto out;
driver = to_pci_driver(pdev->dev.driver);
if (driver && driver->err_handler && driver->err_handler->error_detected)
@@ -306,8 +313,6 @@ static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh)
/* Even though the device is already gone we still
* need to free zPCI resources as part of the disable.
*/
- if (zdev->dma_table)
- zpci_dma_exit_device(zdev);
if (zdev_enabled(zdev))
zpci_disable_device(zdev);
zdev->state = ZPCI_FN_STATE_STANDBY;
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index cae280e5c..8a7abac51 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -56,6 +56,7 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
struct pci_dev *pdev = to_pci_dev(dev);
struct zpci_dev *zdev = to_zpci(pdev);
int ret = 0;
+ u8 status;
/* Can't use device_remove_self() here as that would lead us to lock
* the pci_rescan_remove_lock while holding the device' kernfs lock.
@@ -82,12 +83,6 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
pci_lock_rescan_remove();
if (pci_dev_is_added(pdev)) {
pci_stop_and_remove_bus_device(pdev);
- if (zdev->dma_table) {
- ret = zpci_dma_exit_device(zdev);
- if (ret)
- goto out;
- }
-
if (zdev_enabled(zdev)) {
ret = zpci_disable_device(zdev);
/*
@@ -105,14 +100,16 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
ret = zpci_enable_device(zdev);
if (ret)
goto out;
- ret = zpci_dma_init_device(zdev);
- if (ret) {
- zpci_disable_device(zdev);
- goto out;
+
+ if (zdev->dma_table) {
+ ret = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
+ virt_to_phys(zdev->dma_table), &status);
+ if (ret)
+ zpci_disable_device(zdev);
}
- pci_rescan_bus(zdev->zbus->bus);
}
out:
+ pci_rescan_bus(zdev->zbus->bus);
pci_unlock_rescan_remove();
if (kn)
sysfs_unbreak_active_protection(kn);
diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c
index cb0aff5c0..68580cbea 100644
--- a/arch/s390/tools/gen_facilities.c
+++ b/arch/s390/tools/gen_facilities.c
@@ -46,6 +46,7 @@ static struct facility_def facility_defs[] = {
#endif
#ifdef CONFIG_HAVE_MARCH_Z13_FEATURES
53, /* load-and-zero-rightmost-byte, etc. */
+ 129, /* vector */
#endif
#ifdef CONFIG_HAVE_MARCH_Z14_FEATURES
58, /* miscellaneous-instruction-extension 2 */