summaryrefslogtreecommitdiffstats
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/boot/startup.c1
-rw-r--r--arch/s390/include/asm/cpacf.h109
-rw-r--r--arch/s390/include/asm/ftrace.h8
-rw-r--r--arch/s390/include/asm/gmap.h2
-rw-r--r--arch/s390/include/asm/mmu.h5
-rw-r--r--arch/s390/include/asm/mmu_context.h1
-rw-r--r--arch/s390/include/asm/pgtable.h20
-rw-r--r--arch/s390/include/asm/processor.h1
-rw-r--r--arch/s390/include/asm/stacktrace.h12
-rw-r--r--arch/s390/kernel/Makefile2
-rw-r--r--arch/s390/kernel/asm-offsets.c5
-rw-r--r--arch/s390/kernel/ftrace.c3
-rw-r--r--arch/s390/kernel/ipl.c10
-rw-r--r--arch/s390/kernel/perf_event.c34
-rw-r--r--arch/s390/kernel/setup.c2
-rw-r--r--arch/s390/kernel/stacktrace.c108
-rw-r--r--arch/s390/kernel/vdso.c13
-rw-r--r--arch/s390/kernel/vdso32/Makefile4
-rw-r--r--arch/s390/kernel/vdso64/Makefile4
-rw-r--r--arch/s390/kernel/vdso64/vdso_user_wrapper.S19
-rw-r--r--arch/s390/kernel/vmlinux.lds.S2
-rw-r--r--arch/s390/kvm/kvm-s390.c4
-rw-r--r--arch/s390/mm/gmap.c165
-rw-r--r--arch/s390/net/bpf_jit_comp.c8
24 files changed, 395 insertions, 147 deletions
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index 6cf893142..9e2c9027e 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -32,7 +32,6 @@ unsigned long __bootdata_preserved(max_mappable);
unsigned long __bootdata(ident_map_size);
u64 __bootdata_preserved(stfle_fac_list[16]);
-u64 __bootdata_preserved(alt_stfle_fac_list[16]);
struct oldmem_data __bootdata_preserved(oldmem_data);
struct machine_info machine;
diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h
index b378e2b57..c786538e3 100644
--- a/arch/s390/include/asm/cpacf.h
+++ b/arch/s390/include/asm/cpacf.h
@@ -166,28 +166,86 @@
typedef struct { unsigned char bytes[16]; } cpacf_mask_t;
-/**
- * cpacf_query() - check if a specific CPACF function is available
- * @opcode: the opcode of the crypto instruction
- * @func: the function code to test for
- *
- * Executes the query function for the given crypto instruction @opcode
- * and checks if @func is available
- *
- * Returns 1 if @func is available for @opcode, 0 otherwise
+/*
+ * Prototype for a not existing function to produce a link
+ * error if __cpacf_query() or __cpacf_check_opcode() is used
+ * with an invalid compile time const opcode.
*/
-static __always_inline void __cpacf_query(unsigned int opcode, cpacf_mask_t *mask)
+void __cpacf_bad_opcode(void);
+
+static __always_inline void __cpacf_query_rre(u32 opc, u8 r1, u8 r2,
+ cpacf_mask_t *mask)
{
asm volatile(
- " lghi 0,0\n" /* query function */
- " lgr 1,%[mask]\n"
- " spm 0\n" /* pckmo doesn't change the cc */
- /* Parameter regs are ignored, but must be nonzero and unique */
- "0: .insn rrf,%[opc] << 16,2,4,6,0\n"
- " brc 1,0b\n" /* handle partial completion */
- : "=m" (*mask)
- : [mask] "d" ((unsigned long)mask), [opc] "i" (opcode)
- : "cc", "0", "1");
+ " la %%r1,%[mask]\n"
+ " xgr %%r0,%%r0\n"
+ " .insn rre,%[opc] << 16,%[r1],%[r2]\n"
+ : [mask] "=R" (*mask)
+ : [opc] "i" (opc),
+ [r1] "i" (r1), [r2] "i" (r2)
+ : "cc", "r0", "r1");
+}
+
+static __always_inline void __cpacf_query_rrf(u32 opc,
+ u8 r1, u8 r2, u8 r3, u8 m4,
+ cpacf_mask_t *mask)
+{
+ asm volatile(
+ " la %%r1,%[mask]\n"
+ " xgr %%r0,%%r0\n"
+ " .insn rrf,%[opc] << 16,%[r1],%[r2],%[r3],%[m4]\n"
+ : [mask] "=R" (*mask)
+ : [opc] "i" (opc), [r1] "i" (r1), [r2] "i" (r2),
+ [r3] "i" (r3), [m4] "i" (m4)
+ : "cc", "r0", "r1");
+}
+
+static __always_inline void __cpacf_query(unsigned int opcode,
+ cpacf_mask_t *mask)
+{
+ switch (opcode) {
+ case CPACF_KDSA:
+ __cpacf_query_rre(CPACF_KDSA, 0, 2, mask);
+ break;
+ case CPACF_KIMD:
+ __cpacf_query_rre(CPACF_KIMD, 0, 2, mask);
+ break;
+ case CPACF_KLMD:
+ __cpacf_query_rre(CPACF_KLMD, 0, 2, mask);
+ break;
+ case CPACF_KM:
+ __cpacf_query_rre(CPACF_KM, 2, 4, mask);
+ break;
+ case CPACF_KMA:
+ __cpacf_query_rrf(CPACF_KMA, 2, 4, 6, 0, mask);
+ break;
+ case CPACF_KMAC:
+ __cpacf_query_rre(CPACF_KMAC, 0, 2, mask);
+ break;
+ case CPACF_KMC:
+ __cpacf_query_rre(CPACF_KMC, 2, 4, mask);
+ break;
+ case CPACF_KMCTR:
+ __cpacf_query_rrf(CPACF_KMCTR, 2, 4, 6, 0, mask);
+ break;
+ case CPACF_KMF:
+ __cpacf_query_rre(CPACF_KMF, 2, 4, mask);
+ break;
+ case CPACF_KMO:
+ __cpacf_query_rre(CPACF_KMO, 2, 4, mask);
+ break;
+ case CPACF_PCC:
+ __cpacf_query_rre(CPACF_PCC, 0, 0, mask);
+ break;
+ case CPACF_PCKMO:
+ __cpacf_query_rre(CPACF_PCKMO, 0, 0, mask);
+ break;
+ case CPACF_PRNO:
+ __cpacf_query_rre(CPACF_PRNO, 2, 4, mask);
+ break;
+ default:
+ __cpacf_bad_opcode();
+ }
}
static __always_inline int __cpacf_check_opcode(unsigned int opcode)
@@ -211,10 +269,21 @@ static __always_inline int __cpacf_check_opcode(unsigned int opcode)
case CPACF_KMA:
return test_facility(146); /* check for MSA8 */
default:
- BUG();
+ __cpacf_bad_opcode();
+ return 0;
}
}
+/**
+ * cpacf_query() - check if a specific CPACF function is available
+ * @opcode: the opcode of the crypto instruction
+ * @func: the function code to test for
+ *
+ * Executes the query function for the given crypto instruction @opcode
+ * and checks if @func is available
+ *
+ * Returns 1 if @func is available for @opcode, 0 otherwise
+ */
static __always_inline int cpacf_query(unsigned int opcode, cpacf_mask_t *mask)
{
if (__cpacf_check_opcode(opcode)) {
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index 621f23d5a..77e479d44 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -8,12 +8,8 @@
#ifndef __ASSEMBLY__
-#ifdef CONFIG_CC_IS_CLANG
-/* https://llvm.org/pr41424 */
-#define ftrace_return_address(n) 0UL
-#else
-#define ftrace_return_address(n) __builtin_return_address(n)
-#endif
+unsigned long return_address(unsigned int n);
+#define ftrace_return_address(n) return_address(n)
void ftrace_caller(void);
diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h
index 5cc46e0dd..9725586f4 100644
--- a/arch/s390/include/asm/gmap.h
+++ b/arch/s390/include/asm/gmap.h
@@ -146,7 +146,7 @@ int gmap_mprotect_notify(struct gmap *, unsigned long start,
void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4],
unsigned long gaddr, unsigned long vmaddr);
-int gmap_mark_unmergeable(void);
+int s390_disable_cow_sharing(void);
void s390_unlist_old_asce(struct gmap *gmap);
int s390_replace_asce(struct gmap *gmap);
void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns);
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index bb1b4bef1..4c2dc7abc 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -32,6 +32,11 @@ typedef struct {
unsigned int uses_skeys:1;
/* The mmu context uses CMM. */
unsigned int uses_cmm:1;
+ /*
+ * The mmu context allows COW-sharing of memory pages (KSM, zeropage).
+ * Note that COW-sharing during fork() is currently always allowed.
+ */
+ unsigned int allow_cow_sharing:1;
/* The gmaps associated with this context are allowed to use huge pages. */
unsigned int allow_gmap_hpage_1m:1;
} mm_context_t;
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 929af18b0..a7789a9f6 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -35,6 +35,7 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.has_pgste = 0;
mm->context.uses_skeys = 0;
mm->context.uses_cmm = 0;
+ mm->context.allow_cow_sharing = 1;
mm->context.allow_gmap_hpage_1m = 0;
#endif
switch (mm->context.asce_limit) {
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 60950e7a2..1077c1dae 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -566,10 +566,20 @@ static inline pud_t set_pud_bit(pud_t pud, pgprot_t prot)
}
/*
- * In the case that a guest uses storage keys
- * faults should no longer be backed by zero pages
+ * As soon as the guest uses storage keys or enables PV, we deduplicate all
+ * mapped shared zeropages and prevent new shared zeropages from getting
+ * mapped.
*/
-#define mm_forbids_zeropage mm_has_pgste
+#define mm_forbids_zeropage mm_forbids_zeropage
+static inline int mm_forbids_zeropage(struct mm_struct *mm)
+{
+#ifdef CONFIG_PGSTE
+ if (!mm->context.allow_cow_sharing)
+ return 1;
+#endif
+ return 0;
+}
+
static inline int mm_uses_skeys(struct mm_struct *mm)
{
#ifdef CONFIG_PGSTE
@@ -1768,8 +1778,10 @@ static inline pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma,
static inline pmd_t pmdp_invalidate(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmdp)
{
- pmd_t pmd = __pmd(pmd_val(*pmdp) | _SEGMENT_ENTRY_INVALID);
+ pmd_t pmd;
+ VM_WARN_ON_ONCE(!pmd_present(*pmdp));
+ pmd = __pmd(pmd_val(*pmdp) | _SEGMENT_ENTRY_INVALID);
return pmdp_xchg_direct(vma->vm_mm, addr, pmdp, pmd);
}
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index db9982f0e..bbbdc5abe 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -98,6 +98,7 @@ void cpu_detect_mhz_feature(void);
extern const struct seq_operations cpuinfo_op;
extern void execve_tail(void);
+unsigned long vdso_text_size(void);
unsigned long vdso_size(void);
/*
diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h
index 433fde85b..85b6738b8 100644
--- a/arch/s390/include/asm/stacktrace.h
+++ b/arch/s390/include/asm/stacktrace.h
@@ -2,6 +2,7 @@
#ifndef _ASM_S390_STACKTRACE_H
#define _ASM_S390_STACKTRACE_H
+#include <linux/stacktrace.h>
#include <linux/uaccess.h>
#include <linux/ptrace.h>
@@ -12,6 +13,17 @@ struct stack_frame_user {
unsigned long empty2[4];
};
+struct stack_frame_vdso_wrapper {
+ struct stack_frame_user sf;
+ unsigned long return_address;
+};
+
+struct perf_callchain_entry_ctx;
+
+void arch_stack_walk_user_common(stack_trace_consume_fn consume_entry, void *cookie,
+ struct perf_callchain_entry_ctx *entry,
+ const struct pt_regs *regs, bool perf);
+
enum stack_type {
STACK_TYPE_UNKNOWN,
STACK_TYPE_TASK,
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index fa029d0dc..db2d9ba5a 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -11,6 +11,8 @@ CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
# Do not trace early setup code
CFLAGS_REMOVE_early.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_rethook.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_stacktrace.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_unwind_bc.o = $(CC_FLAGS_FTRACE)
endif
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index fa5f6885c..2f65bca2f 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -66,6 +66,11 @@ int main(void)
OFFSET(__SF_SIE_CONTROL_PHYS, stack_frame, sie_control_block_phys);
DEFINE(STACK_FRAME_OVERHEAD, sizeof(struct stack_frame));
BLANK();
+ OFFSET(__SFUSER_BACKCHAIN, stack_frame_user, back_chain);
+ DEFINE(STACK_FRAME_USER_OVERHEAD, sizeof(struct stack_frame_user));
+ OFFSET(__SFVDSO_RETURN_ADDRESS, stack_frame_vdso_wrapper, return_address);
+ DEFINE(STACK_FRAME_VDSO_OVERHEAD, sizeof(struct stack_frame_vdso_wrapper));
+ BLANK();
/* idle data offsets */
OFFSET(__CLOCK_IDLE_ENTER, s390_idle_data, clock_idle_enter);
OFFSET(__TIMER_IDLE_ENTER, s390_idle_data, timer_idle_enter);
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index c46381ea0..7f6f8c438 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -296,6 +296,9 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
struct kprobe *p;
int bit;
+ if (unlikely(kprobe_ftrace_disabled))
+ return;
+
bit = ftrace_test_recursion_trylock(ip, parent_ip);
if (bit < 0)
return;
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 1486350a4..469e8d3fb 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -962,8 +962,8 @@ static ssize_t reipl_nvme_scpdata_write(struct file *filp, struct kobject *kobj,
scpdata_len += padding;
}
- reipl_block_nvme->hdr.len = IPL_BP_FCP_LEN + scpdata_len;
- reipl_block_nvme->nvme.len = IPL_BP0_FCP_LEN + scpdata_len;
+ reipl_block_nvme->hdr.len = IPL_BP_NVME_LEN + scpdata_len;
+ reipl_block_nvme->nvme.len = IPL_BP0_NVME_LEN + scpdata_len;
reipl_block_nvme->nvme.scp_data_len = scpdata_len;
return count;
@@ -1858,9 +1858,9 @@ static int __init dump_nvme_init(void)
}
dump_block_nvme->hdr.len = IPL_BP_NVME_LEN;
dump_block_nvme->hdr.version = IPL_PARM_BLOCK_VERSION;
- dump_block_nvme->fcp.len = IPL_BP0_NVME_LEN;
- dump_block_nvme->fcp.pbt = IPL_PBT_NVME;
- dump_block_nvme->fcp.opt = IPL_PB0_NVME_OPT_DUMP;
+ dump_block_nvme->nvme.len = IPL_BP0_NVME_LEN;
+ dump_block_nvme->nvme.pbt = IPL_PBT_NVME;
+ dump_block_nvme->nvme.opt = IPL_PB0_NVME_OPT_DUMP;
dump_capabilities |= DUMP_TYPE_NVME;
return 0;
}
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index dfa77da2f..5fff629b1 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -218,39 +218,7 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
struct pt_regs *regs)
{
- struct stack_frame_user __user *sf;
- unsigned long ip, sp;
- bool first = true;
-
- if (is_compat_task())
- return;
- perf_callchain_store(entry, instruction_pointer(regs));
- sf = (void __user *)user_stack_pointer(regs);
- pagefault_disable();
- while (entry->nr < entry->max_stack) {
- if (__get_user(sp, &sf->back_chain))
- break;
- if (__get_user(ip, &sf->gprs[8]))
- break;
- if (ip & 0x1) {
- /*
- * If the instruction address is invalid, and this
- * is the first stack frame, assume r14 has not
- * been written to the stack yet. Otherwise exit.
- */
- if (first && !(regs->gprs[14] & 0x1))
- ip = regs->gprs[14];
- else
- break;
- }
- perf_callchain_store(entry, ip);
- /* Sanity check: ABI requires SP to be aligned 8 bytes. */
- if (!sp || sp & 0x7)
- break;
- sf = (void __user *)sp;
- first = false;
- }
- pagefault_enable();
+ arch_stack_walk_user_common(NULL, NULL, entry, regs, true);
}
/* Perf definitions for PMU event attributes in sysfs */
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 24ed33f04..7ecd27c62 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -155,7 +155,7 @@ unsigned int __bootdata_preserved(zlib_dfltcc_support);
EXPORT_SYMBOL(zlib_dfltcc_support);
u64 __bootdata_preserved(stfle_fac_list[16]);
EXPORT_SYMBOL(stfle_fac_list);
-u64 __bootdata_preserved(alt_stfle_fac_list[16]);
+u64 alt_stfle_fac_list[16];
struct oldmem_data __bootdata_preserved(oldmem_data);
unsigned long VMALLOC_START;
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index 94f440e38..640363b2a 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -5,6 +5,7 @@
* Copyright IBM Corp. 2006
*/
+#include <linux/perf_event.h>
#include <linux/stacktrace.h>
#include <linux/uaccess.h>
#include <linux/compat.h>
@@ -62,42 +63,121 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
return 0;
}
-void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
- const struct pt_regs *regs)
+static inline bool store_ip(stack_trace_consume_fn consume_entry, void *cookie,
+ struct perf_callchain_entry_ctx *entry, bool perf,
+ unsigned long ip)
+{
+#ifdef CONFIG_PERF_EVENTS
+ if (perf) {
+ if (perf_callchain_store(entry, ip))
+ return false;
+ return true;
+ }
+#endif
+ return consume_entry(cookie, ip);
+}
+
+static inline bool ip_invalid(unsigned long ip)
+{
+ /*
+ * Perform some basic checks if an instruction address taken
+ * from unreliable source is invalid.
+ */
+ if (ip & 1)
+ return true;
+ if (ip < mmap_min_addr)
+ return true;
+ if (ip >= current->mm->context.asce_limit)
+ return true;
+ return false;
+}
+
+static inline bool ip_within_vdso(unsigned long ip)
{
+ return in_range(ip, current->mm->context.vdso_base, vdso_text_size());
+}
+
+void arch_stack_walk_user_common(stack_trace_consume_fn consume_entry, void *cookie,
+ struct perf_callchain_entry_ctx *entry,
+ const struct pt_regs *regs, bool perf)
+{
+ struct stack_frame_vdso_wrapper __user *sf_vdso;
struct stack_frame_user __user *sf;
unsigned long ip, sp;
bool first = true;
if (is_compat_task())
return;
- if (!consume_entry(cookie, instruction_pointer(regs)))
+ if (!current->mm)
+ return;
+ ip = instruction_pointer(regs);
+ if (!store_ip(consume_entry, cookie, entry, perf, ip))
return;
sf = (void __user *)user_stack_pointer(regs);
pagefault_disable();
while (1) {
if (__get_user(sp, &sf->back_chain))
break;
- if (__get_user(ip, &sf->gprs[8]))
+ /*
+ * VDSO entry code has a non-standard stack frame layout.
+ * See VDSO user wrapper code for details.
+ */
+ if (!sp && ip_within_vdso(ip)) {
+ sf_vdso = (void __user *)sf;
+ if (__get_user(ip, &sf_vdso->return_address))
+ break;
+ sp = (unsigned long)sf + STACK_FRAME_VDSO_OVERHEAD;
+ sf = (void __user *)sp;
+ if (__get_user(sp, &sf->back_chain))
+ break;
+ } else {
+ sf = (void __user *)sp;
+ if (__get_user(ip, &sf->gprs[8]))
+ break;
+ }
+ /* Sanity check: ABI requires SP to be 8 byte aligned. */
+ if (sp & 0x7)
break;
- if (ip & 0x1) {
+ if (ip_invalid(ip)) {
/*
* If the instruction address is invalid, and this
* is the first stack frame, assume r14 has not
* been written to the stack yet. Otherwise exit.
*/
- if (first && !(regs->gprs[14] & 0x1))
- ip = regs->gprs[14];
- else
+ if (!first)
+ break;
+ ip = regs->gprs[14];
+ if (ip_invalid(ip))
break;
}
- if (!consume_entry(cookie, ip))
- break;
- /* Sanity check: ABI requires SP to be aligned 8 bytes. */
- if (!sp || sp & 0x7)
- break;
- sf = (void __user *)sp;
+ if (!store_ip(consume_entry, cookie, entry, perf, ip))
+ return;
first = false;
}
pagefault_enable();
}
+
+void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
+ const struct pt_regs *regs)
+{
+ arch_stack_walk_user_common(consume_entry, cookie, NULL, regs, false);
+}
+
+unsigned long return_address(unsigned int n)
+{
+ struct unwind_state state;
+ unsigned long addr;
+
+ /* Increment to skip current stack entry */
+ n++;
+
+ unwind_for_each_frame(&state, NULL, NULL, 0) {
+ addr = unwind_get_return_address(&state);
+ if (!addr)
+ break;
+ if (!n--)
+ return addr;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(return_address);
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index a45b3a4c9..2f967ac2b 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -210,17 +210,22 @@ static unsigned long vdso_addr(unsigned long start, unsigned long len)
return addr;
}
-unsigned long vdso_size(void)
+unsigned long vdso_text_size(void)
{
- unsigned long size = VVAR_NR_PAGES * PAGE_SIZE;
+ unsigned long size;
if (is_compat_task())
- size += vdso32_end - vdso32_start;
+ size = vdso32_end - vdso32_start;
else
- size += vdso64_end - vdso64_start;
+ size = vdso64_end - vdso64_start;
return PAGE_ALIGN(size);
}
+unsigned long vdso_size(void)
+{
+ return vdso_text_size() + VVAR_NR_PAGES * PAGE_SIZE;
+}
+
int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
unsigned long addr = VDSO_BASE;
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
index b12a274cb..4800d80de 100644
--- a/arch/s390/kernel/vdso32/Makefile
+++ b/arch/s390/kernel/vdso32/Makefile
@@ -19,8 +19,10 @@ KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS))
KBUILD_AFLAGS_32 += -m31 -s
KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
+KBUILD_CFLAGS_32 := $(filter-out -mpacked-stack,$(KBUILD_CFLAGS))
KBUILD_CFLAGS_32 := $(filter-out -mno-pic-data-is-text-relative,$(KBUILD_CFLAGS_32))
-KBUILD_CFLAGS_32 += -m31 -fPIC -shared -fno-common -fno-builtin
+KBUILD_CFLAGS_32 := $(filter-out -fno-asynchronous-unwind-tables,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 += -m31 -fPIC -shared -fno-common -fno-builtin -fasynchronous-unwind-tables
LDFLAGS_vdso32.so.dbg += -shared -soname=linux-vdso32.so.1 \
--hash-style=both --build-id=sha1 -melf_s390 -T
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
index ef9832726..2f2e4e997 100644
--- a/arch/s390/kernel/vdso64/Makefile
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -24,9 +24,11 @@ KBUILD_AFLAGS_64 := $(filter-out -m64,$(KBUILD_AFLAGS))
KBUILD_AFLAGS_64 += -m64
KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS))
+KBUILD_CFLAGS_64 := $(filter-out -mpacked-stack,$(KBUILD_CFLAGS_64))
KBUILD_CFLAGS_64 := $(filter-out -mno-pic-data-is-text-relative,$(KBUILD_CFLAGS_64))
KBUILD_CFLAGS_64 := $(filter-out -munaligned-symbols,$(KBUILD_CFLAGS_64))
-KBUILD_CFLAGS_64 += -m64 -fPIC -fno-common -fno-builtin
+KBUILD_CFLAGS_64 := $(filter-out -fno-asynchronous-unwind-tables,$(KBUILD_CFLAGS_64))
+KBUILD_CFLAGS_64 += -m64 -fPIC -fno-common -fno-builtin -fasynchronous-unwind-tables
ldflags-y := -shared -soname=linux-vdso64.so.1 \
--hash-style=both --build-id=sha1 -T
diff --git a/arch/s390/kernel/vdso64/vdso_user_wrapper.S b/arch/s390/kernel/vdso64/vdso_user_wrapper.S
index 85247ef5a..e26e68675 100644
--- a/arch/s390/kernel/vdso64/vdso_user_wrapper.S
+++ b/arch/s390/kernel/vdso64/vdso_user_wrapper.S
@@ -6,8 +6,6 @@
#include <asm/dwarf.h>
#include <asm/ptrace.h>
-#define WRAPPER_FRAME_SIZE (STACK_FRAME_OVERHEAD+8)
-
/*
* Older glibc version called vdso without allocating a stackframe. This wrapper
* is just used to allocate a stackframe. See
@@ -20,16 +18,17 @@
__ALIGN
__kernel_\func:
CFI_STARTPROC
- aghi %r15,-WRAPPER_FRAME_SIZE
- CFI_DEF_CFA_OFFSET (STACK_FRAME_OVERHEAD + WRAPPER_FRAME_SIZE)
- CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
- stg %r14,STACK_FRAME_OVERHEAD(%r15)
- CFI_REL_OFFSET 14, STACK_FRAME_OVERHEAD
+ aghi %r15,-STACK_FRAME_VDSO_OVERHEAD
+ CFI_DEF_CFA_OFFSET (STACK_FRAME_USER_OVERHEAD + STACK_FRAME_VDSO_OVERHEAD)
+ CFI_VAL_OFFSET 15,-STACK_FRAME_USER_OVERHEAD
+ stg %r14,__SFVDSO_RETURN_ADDRESS(%r15)
+ CFI_REL_OFFSET 14,__SFVDSO_RETURN_ADDRESS
+ xc __SFUSER_BACKCHAIN(8,%r15),__SFUSER_BACKCHAIN(%r15)
brasl %r14,__s390_vdso_\func
- lg %r14,STACK_FRAME_OVERHEAD(%r15)
+ lg %r14,__SFVDSO_RETURN_ADDRESS(%r15)
CFI_RESTORE 14
- aghi %r15,WRAPPER_FRAME_SIZE
- CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
+ aghi %r15,STACK_FRAME_VDSO_OVERHEAD
+ CFI_DEF_CFA_OFFSET STACK_FRAME_USER_OVERHEAD
CFI_RESTORE 15
br %r14
CFI_ENDPROC
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 48de296e8..fb9b32f93 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -209,13 +209,13 @@ SECTIONS
.dynstr ALIGN(8) : {
*(.dynstr)
}
-#endif
.hash ALIGN(8) : {
*(.hash)
}
.gnu.hash ALIGN(8) : {
*(.gnu.hash)
}
+#endif
. = ALIGN(PAGE_SIZE);
__init_end = .; /* freed after init ends here */
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 7721eb522..82e9631cd 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2631,9 +2631,7 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
if (r)
break;
- mmap_write_lock(current->mm);
- r = gmap_mark_unmergeable();
- mmap_write_unlock(current->mm);
+ r = s390_disable_cow_sharing();
if (r)
break;
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 12d22a7fa..474a25ca5 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -2550,41 +2550,6 @@ static inline void thp_split_mm(struct mm_struct *mm)
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
/*
- * Remove all empty zero pages from the mapping for lazy refaulting
- * - This must be called after mm->context.has_pgste is set, to avoid
- * future creation of zero pages
- * - This must be called after THP was disabled.
- *
- * mm contracts with s390, that even if mm were to remove a page table,
- * racing with the loop below and so causing pte_offset_map_lock() to fail,
- * it will never insert a page table containing empty zero pages once
- * mm_forbids_zeropage(mm) i.e. mm->context.has_pgste is set.
- */
-static int __zap_zero_pages(pmd_t *pmd, unsigned long start,
- unsigned long end, struct mm_walk *walk)
-{
- unsigned long addr;
-
- for (addr = start; addr != end; addr += PAGE_SIZE) {
- pte_t *ptep;
- spinlock_t *ptl;
-
- ptep = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
- if (!ptep)
- break;
- if (is_zero_pfn(pte_pfn(*ptep)))
- ptep_xchg_direct(walk->mm, addr, ptep, __pte(_PAGE_INVALID));
- pte_unmap_unlock(ptep, ptl);
- }
- return 0;
-}
-
-static const struct mm_walk_ops zap_zero_walk_ops = {
- .pmd_entry = __zap_zero_pages,
- .walk_lock = PGWALK_WRLOCK,
-};
-
-/*
* switch on pgstes for its userspace process (for kvm)
*/
int s390_enable_sie(void)
@@ -2601,22 +2566,142 @@ int s390_enable_sie(void)
mm->context.has_pgste = 1;
/* split thp mappings and disable thp for future mappings */
thp_split_mm(mm);
- walk_page_range(mm, 0, TASK_SIZE, &zap_zero_walk_ops, NULL);
mmap_write_unlock(mm);
return 0;
}
EXPORT_SYMBOL_GPL(s390_enable_sie);
-int gmap_mark_unmergeable(void)
+static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr,
+ unsigned long end, struct mm_walk *walk)
+{
+ unsigned long *found_addr = walk->private;
+
+ /* Return 1 of the page is a zeropage. */
+ if (is_zero_pfn(pte_pfn(*pte))) {
+ /*
+ * Shared zeropage in e.g., a FS DAX mapping? We cannot do the
+ * right thing and likely don't care: FAULT_FLAG_UNSHARE
+ * currently only works in COW mappings, which is also where
+ * mm_forbids_zeropage() is checked.
+ */
+ if (!is_cow_mapping(walk->vma->vm_flags))
+ return -EFAULT;
+
+ *found_addr = addr;
+ return 1;
+ }
+ return 0;
+}
+
+static const struct mm_walk_ops find_zeropage_ops = {
+ .pte_entry = find_zeropage_pte_entry,
+ .walk_lock = PGWALK_WRLOCK,
+};
+
+/*
+ * Unshare all shared zeropages, replacing them by anonymous pages. Note that
+ * we cannot simply zap all shared zeropages, because this could later
+ * trigger unexpected userfaultfd missing events.
+ *
+ * This must be called after mm->context.allow_cow_sharing was
+ * set to 0, to avoid future mappings of shared zeropages.
+ *
+ * mm contracts with s390, that even if mm were to remove a page table,
+ * and racing with walk_page_range_vma() calling pte_offset_map_lock()
+ * would fail, it will never insert a page table containing empty zero
+ * pages once mm_forbids_zeropage(mm) i.e.
+ * mm->context.allow_cow_sharing is set to 0.
+ */
+static int __s390_unshare_zeropages(struct mm_struct *mm)
+{
+ struct vm_area_struct *vma;
+ VMA_ITERATOR(vmi, mm, 0);
+ unsigned long addr;
+ vm_fault_t fault;
+ int rc;
+
+ for_each_vma(vmi, vma) {
+ /*
+ * We could only look at COW mappings, but it's more future
+ * proof to catch unexpected zeropages in other mappings and
+ * fail.
+ */
+ if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma))
+ continue;
+ addr = vma->vm_start;
+
+retry:
+ rc = walk_page_range_vma(vma, addr, vma->vm_end,
+ &find_zeropage_ops, &addr);
+ if (rc < 0)
+ return rc;
+ else if (!rc)
+ continue;
+
+ /* addr was updated by find_zeropage_pte_entry() */
+ fault = handle_mm_fault(vma, addr,
+ FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE,
+ NULL);
+ if (fault & VM_FAULT_OOM)
+ return -ENOMEM;
+ /*
+ * See break_ksm(): even after handle_mm_fault() returned 0, we
+ * must start the lookup from the current address, because
+ * handle_mm_fault() may back out if there's any difficulty.
+ *
+ * VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but
+ * maybe they could trigger in the future on concurrent
+ * truncation. In that case, the shared zeropage would be gone
+ * and we can simply retry and make progress.
+ */
+ cond_resched();
+ goto retry;
+ }
+
+ return 0;
+}
+
+static int __s390_disable_cow_sharing(struct mm_struct *mm)
{
+ int rc;
+
+ if (!mm->context.allow_cow_sharing)
+ return 0;
+
+ mm->context.allow_cow_sharing = 0;
+
+ /* Replace all shared zeropages by anonymous pages. */
+ rc = __s390_unshare_zeropages(mm);
/*
* Make sure to disable KSM (if enabled for the whole process or
* individual VMAs). Note that nothing currently hinders user space
* from re-enabling it.
*/
- return ksm_disable(current->mm);
+ if (!rc)
+ rc = ksm_disable(mm);
+ if (rc)
+ mm->context.allow_cow_sharing = 1;
+ return rc;
+}
+
+/*
+ * Disable most COW-sharing of memory pages for the whole process:
+ * (1) Disable KSM and unmerge/unshare any KSM pages.
+ * (2) Disallow shared zeropages and unshare any zerpages that are mapped.
+ *
+ * Not that we currently don't bother with COW-shared pages that are shared
+ * with parent/child processes due to fork().
+ */
+int s390_disable_cow_sharing(void)
+{
+ int rc;
+
+ mmap_write_lock(current->mm);
+ rc = __s390_disable_cow_sharing(current->mm);
+ mmap_write_unlock(current->mm);
+ return rc;
}
-EXPORT_SYMBOL_GPL(gmap_mark_unmergeable);
+EXPORT_SYMBOL_GPL(s390_disable_cow_sharing);
/*
* Enable storage key handling from now on and initialize the storage
@@ -2685,7 +2770,7 @@ int s390_enable_skey(void)
goto out_up;
mm->context.uses_skeys = 1;
- rc = gmap_mark_unmergeable();
+ rc = __s390_disable_cow_sharing(mm);
if (rc) {
mm->context.uses_skeys = 0;
goto out_up;
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 5af0402e9..1d168a98a 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -1427,8 +1427,12 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT6_DISP_LH(0xeb000000, is32 ? (op32) : (op64), \
(insn->imm & BPF_FETCH) ? src_reg : REG_W0, \
src_reg, dst_reg, off); \
- if (is32 && (insn->imm & BPF_FETCH)) \
- EMIT_ZERO(src_reg); \
+ if (insn->imm & BPF_FETCH) { \
+ /* bcr 14,0 - see atomic_fetch_{add,and,or,xor}() */ \
+ _EMIT2(0x07e0); \
+ if (is32) \
+ EMIT_ZERO(src_reg); \
+ } \
} while (0)
case BPF_ADD:
case BPF_ADD | BPF_FETCH: