From 6d03a247468059b0e59c821ef39e6762d4d6fc30 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 19 Jun 2024 23:00:51 +0200 Subject: Merging upstream version 6.9.2. Signed-off-by: Daniel Baumann --- arch/s390/include/asm/access-regs.h | 38 ++ arch/s390/include/asm/appldata.h | 4 +- arch/s390/include/asm/asm-prototypes.h | 2 +- arch/s390/include/asm/atomic.h | 44 +- arch/s390/include/asm/atomic_ops.h | 22 +- arch/s390/include/asm/bug.h | 4 +- arch/s390/include/asm/ccwdev.h | 3 +- arch/s390/include/asm/checksum.h | 29 +- arch/s390/include/asm/cio.h | 9 +- arch/s390/include/asm/diag.h | 15 +- arch/s390/include/asm/dma-types.h | 103 +++++ arch/s390/include/asm/eadm.h | 5 +- arch/s390/include/asm/entry-common.h | 5 +- arch/s390/include/asm/fcx.h | 13 +- arch/s390/include/asm/fpu-insn-asm.h | 732 +++++++++++++++++++++++++++++++++ arch/s390/include/asm/fpu-insn.h | 486 ++++++++++++++++++++++ arch/s390/include/asm/fpu-types.h | 51 +++ arch/s390/include/asm/fpu.h | 295 +++++++++++++ arch/s390/include/asm/fpu/api.h | 126 ------ arch/s390/include/asm/fpu/internal.h | 67 --- arch/s390/include/asm/fpu/types.h | 38 -- arch/s390/include/asm/ftrace.h | 2 +- arch/s390/include/asm/gmap.h | 2 +- arch/s390/include/asm/idals.h | 176 ++++---- arch/s390/include/asm/kvm_host.h | 5 +- arch/s390/include/asm/lowcore.h | 2 +- arch/s390/include/asm/mmu.h | 5 - arch/s390/include/asm/mmu_context.h | 1 - arch/s390/include/asm/page.h | 32 +- arch/s390/include/asm/pai.h | 3 +- arch/s390/include/asm/pci.h | 3 +- arch/s390/include/asm/pgalloc.h | 4 +- arch/s390/include/asm/pgtable.h | 34 +- arch/s390/include/asm/physmem_info.h | 1 + arch/s390/include/asm/preempt.h | 36 +- arch/s390/include/asm/processor.h | 13 +- arch/s390/include/asm/ptdump.h | 14 - arch/s390/include/asm/ptrace.h | 6 +- arch/s390/include/asm/qdio.h | 17 +- arch/s390/include/asm/scsw.h | 7 +- arch/s390/include/asm/stacktrace.h | 1 - arch/s390/include/asm/switch_to.h | 49 --- arch/s390/include/asm/tlb.h | 30 +- arch/s390/include/asm/vdso/data.h | 1 - arch/s390/include/asm/vx-insn-asm.h | 681 ------------------------------ arch/s390/include/asm/vx-insn.h | 19 - arch/s390/include/asm/word-at-a-time.h | 3 +- 47 files changed, 2003 insertions(+), 1235 deletions(-) create mode 100644 arch/s390/include/asm/access-regs.h create mode 100644 arch/s390/include/asm/dma-types.h create mode 100644 arch/s390/include/asm/fpu-insn-asm.h create mode 100644 arch/s390/include/asm/fpu-insn.h create mode 100644 arch/s390/include/asm/fpu-types.h create mode 100644 arch/s390/include/asm/fpu.h delete mode 100644 arch/s390/include/asm/fpu/api.h delete mode 100644 arch/s390/include/asm/fpu/internal.h delete mode 100644 arch/s390/include/asm/fpu/types.h delete mode 100644 arch/s390/include/asm/ptdump.h delete mode 100644 arch/s390/include/asm/switch_to.h delete mode 100644 arch/s390/include/asm/vx-insn-asm.h delete mode 100644 arch/s390/include/asm/vx-insn.h (limited to 'arch/s390/include/asm') diff --git a/arch/s390/include/asm/access-regs.h b/arch/s390/include/asm/access-regs.h new file mode 100644 index 000000000..1a6412d9f --- /dev/null +++ b/arch/s390/include/asm/access-regs.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright IBM Corp. 1999, 2024 + */ + +#ifndef __ASM_S390_ACCESS_REGS_H +#define __ASM_S390_ACCESS_REGS_H + +#include +#include + +struct access_regs { + unsigned int regs[NUM_ACRS]; +}; + +static inline void save_access_regs(unsigned int *acrs) +{ + struct access_regs *regs = (struct access_regs *)acrs; + + instrument_write(regs, sizeof(*regs)); + asm volatile("stamy 0,15,%[regs]" + : [regs] "=QS" (*regs) + : + : "memory"); +} + +static inline void restore_access_regs(unsigned int *acrs) +{ + struct access_regs *regs = (struct access_regs *)acrs; + + instrument_read(regs, sizeof(*regs)); + asm volatile("lamy 0,15,%[regs]" + : + : [regs] "QS" (*regs) + : "memory"); +} + +#endif /* __ASM_S390_ACCESS_REGS_H */ diff --git a/arch/s390/include/asm/appldata.h b/arch/s390/include/asm/appldata.h index f2240392c..a92ebbc7a 100644 --- a/arch/s390/include/asm/appldata.h +++ b/arch/s390/include/asm/appldata.h @@ -54,13 +54,13 @@ static inline int appldata_asm(struct appldata_parameter_list *parm_list, parm_list->function = fn; parm_list->parlist_length = sizeof(*parm_list); parm_list->buffer_length = length; - parm_list->product_id_addr = (unsigned long) id; + parm_list->product_id_addr = virt_to_phys(id); parm_list->buffer_addr = virt_to_phys(buffer); diag_stat_inc(DIAG_STAT_X0DC); asm volatile( " diag %1,%0,0xdc" : "=d" (ry) - : "d" (parm_list), "m" (*parm_list), "m" (*id) + : "d" (virt_to_phys(parm_list)), "m" (*parm_list), "m" (*id) : "cc"); return ry; } diff --git a/arch/s390/include/asm/asm-prototypes.h b/arch/s390/include/asm/asm-prototypes.h index a873e873e..56096ae26 100644 --- a/arch/s390/include/asm/asm-prototypes.h +++ b/arch/s390/include/asm/asm-prototypes.h @@ -3,7 +3,7 @@ #include #include -#include +#include #include __int128_t __ashlti3(__int128_t a, int b); diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index 7138d189c..0c4cad7d5 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -15,31 +15,31 @@ #include #include -static inline int arch_atomic_read(const atomic_t *v) +static __always_inline int arch_atomic_read(const atomic_t *v) { return __atomic_read(v); } #define arch_atomic_read arch_atomic_read -static inline void arch_atomic_set(atomic_t *v, int i) +static __always_inline void arch_atomic_set(atomic_t *v, int i) { __atomic_set(v, i); } #define arch_atomic_set arch_atomic_set -static inline int arch_atomic_add_return(int i, atomic_t *v) +static __always_inline int arch_atomic_add_return(int i, atomic_t *v) { return __atomic_add_barrier(i, &v->counter) + i; } #define arch_atomic_add_return arch_atomic_add_return -static inline int arch_atomic_fetch_add(int i, atomic_t *v) +static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v) { return __atomic_add_barrier(i, &v->counter); } #define arch_atomic_fetch_add arch_atomic_fetch_add -static inline void arch_atomic_add(int i, atomic_t *v) +static __always_inline void arch_atomic_add(int i, atomic_t *v) { __atomic_add(i, &v->counter); } @@ -50,11 +50,11 @@ static inline void arch_atomic_add(int i, atomic_t *v) #define arch_atomic_fetch_sub(_i, _v) arch_atomic_fetch_add(-(int)(_i), _v) #define ATOMIC_OPS(op) \ -static inline void arch_atomic_##op(int i, atomic_t *v) \ +static __always_inline void arch_atomic_##op(int i, atomic_t *v) \ { \ __atomic_##op(i, &v->counter); \ } \ -static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \ +static __always_inline int arch_atomic_fetch_##op(int i, atomic_t *v) \ { \ return __atomic_##op##_barrier(i, &v->counter); \ } @@ -74,7 +74,7 @@ ATOMIC_OPS(xor) #define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new)) -static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new) +static __always_inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new) { return __atomic_cmpxchg(&v->counter, old, new); } @@ -82,31 +82,31 @@ static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new) #define ATOMIC64_INIT(i) { (i) } -static inline s64 arch_atomic64_read(const atomic64_t *v) +static __always_inline s64 arch_atomic64_read(const atomic64_t *v) { return __atomic64_read(v); } #define arch_atomic64_read arch_atomic64_read -static inline void arch_atomic64_set(atomic64_t *v, s64 i) +static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i) { __atomic64_set(v, i); } #define arch_atomic64_set arch_atomic64_set -static inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) +static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) { return __atomic64_add_barrier(i, (long *)&v->counter) + i; } #define arch_atomic64_add_return arch_atomic64_add_return -static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v) +static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v) { return __atomic64_add_barrier(i, (long *)&v->counter); } #define arch_atomic64_fetch_add arch_atomic64_fetch_add -static inline void arch_atomic64_add(s64 i, atomic64_t *v) +static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v) { __atomic64_add(i, (long *)&v->counter); } @@ -114,20 +114,20 @@ static inline void arch_atomic64_add(s64 i, atomic64_t *v) #define arch_atomic64_xchg(v, new) (arch_xchg(&((v)->counter), new)) -static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) +static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) { return __atomic64_cmpxchg((long *)&v->counter, old, new); } #define arch_atomic64_cmpxchg arch_atomic64_cmpxchg -#define ATOMIC64_OPS(op) \ -static inline void arch_atomic64_##op(s64 i, atomic64_t *v) \ -{ \ - __atomic64_##op(i, (long *)&v->counter); \ -} \ -static inline long arch_atomic64_fetch_##op(s64 i, atomic64_t *v) \ -{ \ - return __atomic64_##op##_barrier(i, (long *)&v->counter); \ +#define ATOMIC64_OPS(op) \ +static __always_inline void arch_atomic64_##op(s64 i, atomic64_t *v) \ +{ \ + __atomic64_##op(i, (long *)&v->counter); \ +} \ +static __always_inline long arch_atomic64_fetch_##op(s64 i, atomic64_t *v) \ +{ \ + return __atomic64_##op##_barrier(i, (long *)&v->counter); \ } ATOMIC64_OPS(and) diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h index 50510e08b..7fa5f96a5 100644 --- a/arch/s390/include/asm/atomic_ops.h +++ b/arch/s390/include/asm/atomic_ops.h @@ -8,7 +8,7 @@ #ifndef __ARCH_S390_ATOMIC_OPS__ #define __ARCH_S390_ATOMIC_OPS__ -static inline int __atomic_read(const atomic_t *v) +static __always_inline int __atomic_read(const atomic_t *v) { int c; @@ -18,14 +18,14 @@ static inline int __atomic_read(const atomic_t *v) return c; } -static inline void __atomic_set(atomic_t *v, int i) +static __always_inline void __atomic_set(atomic_t *v, int i) { asm volatile( " st %1,%0\n" : "=R" (v->counter) : "d" (i)); } -static inline s64 __atomic64_read(const atomic64_t *v) +static __always_inline s64 __atomic64_read(const atomic64_t *v) { s64 c; @@ -35,7 +35,7 @@ static inline s64 __atomic64_read(const atomic64_t *v) return c; } -static inline void __atomic64_set(atomic64_t *v, s64 i) +static __always_inline void __atomic64_set(atomic64_t *v, s64 i) { asm volatile( " stg %1,%0\n" @@ -45,7 +45,7 @@ static inline void __atomic64_set(atomic64_t *v, s64 i) #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES #define __ATOMIC_OP(op_name, op_type, op_string, op_barrier) \ -static inline op_type op_name(op_type val, op_type *ptr) \ +static __always_inline op_type op_name(op_type val, op_type *ptr) \ { \ op_type old; \ \ @@ -96,7 +96,7 @@ __ATOMIC_CONST_OPS(__atomic64_add_const, long, "agsi") #else /* CONFIG_HAVE_MARCH_Z196_FEATURES */ #define __ATOMIC_OP(op_name, op_string) \ -static inline int op_name(int val, int *ptr) \ +static __always_inline int op_name(int val, int *ptr) \ { \ int old, new; \ \ @@ -122,7 +122,7 @@ __ATOMIC_OPS(__atomic_xor, "xr") #undef __ATOMIC_OPS #define __ATOMIC64_OP(op_name, op_string) \ -static inline long op_name(long val, long *ptr) \ +static __always_inline long op_name(long val, long *ptr) \ { \ long old, new; \ \ @@ -154,7 +154,7 @@ __ATOMIC64_OPS(__atomic64_xor, "xgr") #endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */ -static inline int __atomic_cmpxchg(int *ptr, int old, int new) +static __always_inline int __atomic_cmpxchg(int *ptr, int old, int new) { asm volatile( " cs %[old],%[new],%[ptr]" @@ -164,7 +164,7 @@ static inline int __atomic_cmpxchg(int *ptr, int old, int new) return old; } -static inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new) +static __always_inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new) { int old_expected = old; @@ -176,7 +176,7 @@ static inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new) return old == old_expected; } -static inline long __atomic64_cmpxchg(long *ptr, long old, long new) +static __always_inline long __atomic64_cmpxchg(long *ptr, long old, long new) { asm volatile( " csg %[old],%[new],%[ptr]" @@ -186,7 +186,7 @@ static inline long __atomic64_cmpxchg(long *ptr, long old, long new) return old; } -static inline bool __atomic64_cmpxchg_bool(long *ptr, long old, long new) +static __always_inline bool __atomic64_cmpxchg_bool(long *ptr, long old, long new) { long old_expected = old; diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h index aebe1e22c..c500d45fb 100644 --- a/arch/s390/include/asm/bug.h +++ b/arch/s390/include/asm/bug.h @@ -14,7 +14,7 @@ ".section .rodata.str,\"aMS\",@progbits,1\n" \ "1: .asciz \""__FILE__"\"\n" \ ".previous\n" \ - ".section __bug_table,\"awM\",@progbits,%2\n" \ + ".section __bug_table,\"aw\"\n" \ "2: .long 0b-.\n" \ " .long 1b-.\n" \ " .short %0,%1\n" \ @@ -30,7 +30,7 @@ #define __EMIT_BUG(x) do { \ asm_inline volatile( \ "0: mc 0,0\n" \ - ".section __bug_table,\"awM\",@progbits,%1\n" \ + ".section __bug_table,\"aw\"\n" \ "1: .long 0b-.\n" \ " .short %0\n" \ " .org 1b+%1\n" \ diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h index 91d261751..436365ff6 100644 --- a/arch/s390/include/asm/ccwdev.h +++ b/arch/s390/include/asm/ccwdev.h @@ -217,7 +217,8 @@ extern void ccw_device_destroy_console(struct ccw_device *); extern int ccw_device_enable_console(struct ccw_device *); extern void ccw_device_wait_idle(struct ccw_device *); -extern void *ccw_device_dma_zalloc(struct ccw_device *cdev, size_t size); +extern void *ccw_device_dma_zalloc(struct ccw_device *cdev, size_t size, + dma32_t *dma_handle); extern void ccw_device_dma_free(struct ccw_device *cdev, void *cpu_addr, size_t size); diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h index 69837eec2..b89159591 100644 --- a/arch/s390/include/asm/checksum.h +++ b/arch/s390/include/asm/checksum.h @@ -12,36 +12,29 @@ #ifndef _S390_CHECKSUM_H #define _S390_CHECKSUM_H -#include +#include #include -/* - * Computes the checksum of a memory block at buff, length len, - * and adds in "sum" (32-bit). - * - * Returns a 32-bit number suitable for feeding into itself - * or csum_tcpudp_magic. - * - * This function must be called with even lengths, except - * for the last fragment, which may be odd. - * - * It's best to have buff aligned on a 32-bit boundary. - */ -static inline __wsum csum_partial(const void *buff, int len, __wsum sum) +static inline __wsum cksm(const void *buff, int len, __wsum sum) { union register_pair rp = { - .even = (unsigned long) buff, - .odd = (unsigned long) len, + .even = (unsigned long)buff, + .odd = (unsigned long)len, }; - kasan_check_read(buff, len); - asm volatile( + instrument_read(buff, len); + asm volatile("\n" "0: cksm %[sum],%[rp]\n" " jo 0b\n" : [sum] "+&d" (sum), [rp] "+&d" (rp.pair) : : "cc", "memory"); return sum; } +__wsum csum_partial(const void *buff, int len, __wsum sum); + +#define _HAVE_ARCH_CSUM_AND_COPY +__wsum csum_partial_copy_nocheck(const void *src, void *dst, int len); + /* * Fold a partial checksum without adding pseudo headers. */ diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h index 1c4f585dd..b6b619f34 100644 --- a/arch/s390/include/asm/cio.h +++ b/arch/s390/include/asm/cio.h @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -32,7 +33,7 @@ struct ccw1 { __u8 cmd_code; __u8 flags; __u16 count; - __u32 cda; + dma32_t cda; } __attribute__ ((packed,aligned(8))); /** @@ -152,8 +153,8 @@ struct sublog { struct esw0 { struct sublog sublog; struct erw erw; - __u32 faddr[2]; - __u32 saddr; + dma32_t faddr[2]; + dma32_t saddr; } __attribute__ ((packed)); /** @@ -364,6 +365,8 @@ extern struct device *cio_get_dma_css_dev(void); void *cio_gp_dma_zalloc(struct gen_pool *gp_dma, struct device *dma_dev, size_t size); +void *__cio_gp_dma_zalloc(struct gen_pool *gp_dma, struct device *dma_dev, + size_t size, dma32_t *dma_handle); void cio_gp_dma_free(struct gen_pool *gp_dma, void *cpu_addr, size_t size); void cio_gp_dma_destroy(struct gen_pool *gp_dma, struct device *dma_dev); struct gen_pool *cio_gp_dma_create(struct device *dma_dev, int nr_pages); diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h index bed804137..20b942201 100644 --- a/arch/s390/include/asm/diag.h +++ b/arch/s390/include/asm/diag.h @@ -44,6 +44,13 @@ enum diag_stat_enum { void diag_stat_inc(enum diag_stat_enum nr); void diag_stat_inc_norecursion(enum diag_stat_enum nr); +struct hypfs_diag0c_entry; + +/* + * Diagnose 0c: Pseudo Timer + */ +void diag0c(struct hypfs_diag0c_entry *data); + /* * Diagnose 10: Release page range */ @@ -331,10 +338,10 @@ struct hypfs_diag0c_entry; */ struct diag_ops { int (*diag210)(struct diag210 *addr); - int (*diag26c)(void *req, void *resp, enum diag26c_sc subcode); + int (*diag26c)(unsigned long rx, unsigned long rx1, enum diag26c_sc subcode); int (*diag14)(unsigned long rx, unsigned long ry1, unsigned long subcode); int (*diag8c)(struct diag8c *addr, struct ccw_dev_id *devno, size_t len); - void (*diag0c)(struct hypfs_diag0c_entry *entry); + void (*diag0c)(unsigned long rx); void (*diag308_reset)(void); }; @@ -342,9 +349,9 @@ extern struct diag_ops diag_amode31_ops; extern struct diag210 *__diag210_tmp_amode31; int _diag210_amode31(struct diag210 *addr); -int _diag26c_amode31(void *req, void *resp, enum diag26c_sc subcode); +int _diag26c_amode31(unsigned long rx, unsigned long rx1, enum diag26c_sc subcode); int _diag14_amode31(unsigned long rx, unsigned long ry1, unsigned long subcode); -void _diag0c_amode31(struct hypfs_diag0c_entry *entry); +void _diag0c_amode31(unsigned long rx); void _diag308_reset_amode31(void); int _diag8c_amode31(struct diag8c *addr, struct ccw_dev_id *devno, size_t len); diff --git a/arch/s390/include/asm/dma-types.h b/arch/s390/include/asm/dma-types.h new file mode 100644 index 000000000..5c5734e69 --- /dev/null +++ b/arch/s390/include/asm/dma-types.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_S390_DMA_TYPES_H_ +#define _ASM_S390_DMA_TYPES_H_ + +#include +#include + +/* + * typedef dma32_t + * Contains a 31 bit absolute address to a DMA capable piece of storage. + * + * For CIO, DMA addresses are always absolute addresses. These addresses tend + * to be used in architectured memory blocks (like ORB, IDAW, MIDAW). Under + * certain circumstances 31 bit wide addresses must be used because the + * address must fit in 31 bits. + * + * This type is to be used when such fields can be modelled as 32 bit wide. + */ +typedef u32 __bitwise dma32_t; + +/* + * typedef dma64_t + * Contains a 64 bit absolute address to a DMA capable piece of storage. + * + * For CIO, DMA addresses are always absolute addresses. These addresses tend + * to be used in architectured memory blocks (like ORB, IDAW, MIDAW). + * + * This type is to be used to model such 64 bit wide fields. + */ +typedef u64 __bitwise dma64_t; + +/* + * Although DMA addresses should be obtained using the DMA API, in cases when + * it is known that the first argument holds a virtual address that points to + * DMA-able 31 bit addressable storage, then this function can be safely used. + */ +static inline dma32_t virt_to_dma32(void *ptr) +{ + return (__force dma32_t)__pa32(ptr); +} + +static inline void *dma32_to_virt(dma32_t addr) +{ + return __va((__force unsigned long)addr); +} + +static inline dma32_t u32_to_dma32(u32 addr) +{ + return (__force dma32_t)addr; +} + +static inline u32 dma32_to_u32(dma32_t addr) +{ + return (__force u32)addr; +} + +static inline dma32_t dma32_add(dma32_t a, u32 b) +{ + return (__force dma32_t)((__force u32)a + b); +} + +static inline dma32_t dma32_and(dma32_t a, u32 b) +{ + return (__force dma32_t)((__force u32)a & b); +} + +/* + * Although DMA addresses should be obtained using the DMA API, in cases when + * it is known that the first argument holds a virtual address that points to + * DMA-able storage, then this function can be safely used. + */ +static inline dma64_t virt_to_dma64(void *ptr) +{ + return (__force dma64_t)__pa(ptr); +} + +static inline void *dma64_to_virt(dma64_t addr) +{ + return __va((__force unsigned long)addr); +} + +static inline dma64_t u64_to_dma64(u64 addr) +{ + return (__force dma64_t)addr; +} + +static inline u64 dma64_to_u64(dma64_t addr) +{ + return (__force u64)addr; +} + +static inline dma64_t dma64_add(dma64_t a, u64 b) +{ + return (__force dma64_t)((__force u64)a + b); +} + +static inline dma64_t dma64_and(dma64_t a, u64 b) +{ + return (__force dma64_t)((__force u64)a & b); +} + +#endif /* _ASM_S390_DMA_TYPES_H_ */ diff --git a/arch/s390/include/asm/eadm.h b/arch/s390/include/asm/eadm.h index 06f795855..c4589ec45 100644 --- a/arch/s390/include/asm/eadm.h +++ b/arch/s390/include/asm/eadm.h @@ -5,6 +5,7 @@ #include #include #include +#include struct arqb { u64 data; @@ -45,7 +46,7 @@ struct msb { u16:12; u16 bs:4; u32 blk_count; - u64 data_addr; + dma64_t data_addr; u64 scm_addr; u64:64; } __packed; @@ -54,7 +55,7 @@ struct aidaw { u8 flags; u32 :24; u32 :32; - u64 data_addr; + dma64_t data_addr; } __packed; #define MSB_OC_CLEAR 0 diff --git a/arch/s390/include/asm/entry-common.h b/arch/s390/include/asm/entry-common.h index fdd319a62..7f5004065 100644 --- a/arch/s390/include/asm/entry-common.h +++ b/arch/s390/include/asm/entry-common.h @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #define ARCH_EXIT_TO_USER_MODE_WORK (_TIF_GUARDED_STORAGE | _TIF_PER_TRAP) @@ -41,8 +41,7 @@ static __always_inline void arch_exit_to_user_mode_work(struct pt_regs *regs, static __always_inline void arch_exit_to_user_mode(void) { - if (test_cpu_flag(CIF_FPU)) - __load_fpu_regs(); + load_user_fpu_regs(); if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) debug_user_asce(1); diff --git a/arch/s390/include/asm/fcx.h b/arch/s390/include/asm/fcx.h index 29784b4b4..80f82a739 100644 --- a/arch/s390/include/asm/fcx.h +++ b/arch/s390/include/asm/fcx.h @@ -10,6 +10,7 @@ #define _ASM_S390_FCX_H #include +#include #define TCW_FORMAT_DEFAULT 0 #define TCW_TIDAW_FORMAT_DEFAULT 0 @@ -43,16 +44,16 @@ struct tcw { u32 r:1; u32 w:1; u32 :16; - u64 output; - u64 input; - u64 tsb; - u64 tccb; + dma64_t output; + dma64_t input; + dma64_t tsb; + dma64_t tccb; u32 output_count; u32 input_count; u32 :32; u32 :32; u32 :32; - u32 intrg; + dma32_t intrg; } __attribute__ ((packed, aligned(64))); #define TIDAW_FLAGS_LAST (1 << (7 - 0)) @@ -73,7 +74,7 @@ struct tidaw { u32 flags:8; u32 :24; u32 count; - u64 addr; + dma64_t addr; } __attribute__ ((packed, aligned(16))); /** diff --git a/arch/s390/include/asm/fpu-insn-asm.h b/arch/s390/include/asm/fpu-insn-asm.h new file mode 100644 index 000000000..02ccfe460 --- /dev/null +++ b/arch/s390/include/asm/fpu-insn-asm.h @@ -0,0 +1,732 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Support for Vector Instructions + * + * Assembler macros to generate .byte/.word code for particular + * vector instructions that are supported by recent binutils (>= 2.26) only. + * + * Copyright IBM Corp. 2015 + * Author(s): Hendrik Brueckner + */ + +#ifndef __ASM_S390_FPU_INSN_ASM_H +#define __ASM_S390_FPU_INSN_ASM_H + +#ifndef __ASM_S390_FPU_INSN_H +#error only can be included directly +#endif + +#ifdef __ASSEMBLY__ + +/* Macros to generate vector instruction byte code */ + +/* GR_NUM - Retrieve general-purpose register number + * + * @opd: Operand to store register number + * @r64: String designation register in the format "%rN" + */ +.macro GR_NUM opd gr + \opd = 255 + .ifc \gr,%r0 + \opd = 0 + .endif + .ifc \gr,%r1 + \opd = 1 + .endif + .ifc \gr,%r2 + \opd = 2 + .endif + .ifc \gr,%r3 + \opd = 3 + .endif + .ifc \gr,%r4 + \opd = 4 + .endif + .ifc \gr,%r5 + \opd = 5 + .endif + .ifc \gr,%r6 + \opd = 6 + .endif + .ifc \gr,%r7 + \opd = 7 + .endif + .ifc \gr,%r8 + \opd = 8 + .endif + .ifc \gr,%r9 + \opd = 9 + .endif + .ifc \gr,%r10 + \opd = 10 + .endif + .ifc \gr,%r11 + \opd = 11 + .endif + .ifc \gr,%r12 + \opd = 12 + .endif + .ifc \gr,%r13 + \opd = 13 + .endif + .ifc \gr,%r14 + \opd = 14 + .endif + .ifc \gr,%r15 + \opd = 15 + .endif + .if \opd == 255 + \opd = \gr + .endif +.endm + +/* VX_NUM - Retrieve vector register number + * + * @opd: Operand to store register number + * @vxr: String designation register in the format "%vN" + * + * The vector register number is used for as input number to the + * instruction and, as well as, to compute the RXB field of the + * instruction. + */ +.macro VX_NUM opd vxr + \opd = 255 + .ifc \vxr,%v0 + \opd = 0 + .endif + .ifc \vxr,%v1 + \opd = 1 + .endif + .ifc \vxr,%v2 + \opd = 2 + .endif + .ifc \vxr,%v3 + \opd = 3 + .endif + .ifc \vxr,%v4 + \opd = 4 + .endif + .ifc \vxr,%v5 + \opd = 5 + .endif + .ifc \vxr,%v6 + \opd = 6 + .endif + .ifc \vxr,%v7 + \opd = 7 + .endif + .ifc \vxr,%v8 + \opd = 8 + .endif + .ifc \vxr,%v9 + \opd = 9 + .endif + .ifc \vxr,%v10 + \opd = 10 + .endif + .ifc \vxr,%v11 + \opd = 11 + .endif + .ifc \vxr,%v12 + \opd = 12 + .endif + .ifc \vxr,%v13 + \opd = 13 + .endif + .ifc \vxr,%v14 + \opd = 14 + .endif + .ifc \vxr,%v15 + \opd = 15 + .endif + .ifc \vxr,%v16 + \opd = 16 + .endif + .ifc \vxr,%v17 + \opd = 17 + .endif + .ifc \vxr,%v18 + \opd = 18 + .endif + .ifc \vxr,%v19 + \opd = 19 + .endif + .ifc \vxr,%v20 + \opd = 20 + .endif + .ifc \vxr,%v21 + \opd = 21 + .endif + .ifc \vxr,%v22 + \opd = 22 + .endif + .ifc \vxr,%v23 + \opd = 23 + .endif + .ifc \vxr,%v24 + \opd = 24 + .endif + .ifc \vxr,%v25 + \opd = 25 + .endif + .ifc \vxr,%v26 + \opd = 26 + .endif + .ifc \vxr,%v27 + \opd = 27 + .endif + .ifc \vxr,%v28 + \opd = 28 + .endif + .ifc \vxr,%v29 + \opd = 29 + .endif + .ifc \vxr,%v30 + \opd = 30 + .endif + .ifc \vxr,%v31 + \opd = 31 + .endif + .if \opd == 255 + \opd = \vxr + .endif +.endm + +/* RXB - Compute most significant bit used vector registers + * + * @rxb: Operand to store computed RXB value + * @v1: Vector register designated operand whose MSB is stored in + * RXB bit 0 (instruction bit 36) and whose remaining bits + * are stored in instruction bits 8-11. + * @v2: Vector register designated operand whose MSB is stored in + * RXB bit 1 (instruction bit 37) and whose remaining bits + * are stored in instruction bits 12-15. + * @v3: Vector register designated operand whose MSB is stored in + * RXB bit 2 (instruction bit 38) and whose remaining bits + * are stored in instruction bits 16-19. + * @v4: Vector register designated operand whose MSB is stored in + * RXB bit 3 (instruction bit 39) and whose remaining bits + * are stored in instruction bits 32-35. + * + * Note: In most vector instruction formats [1] V1, V2, V3, and V4 directly + * correspond to @v1, @v2, @v3, and @v4. But there are exceptions, such as but + * not limited to the vector instruction formats VRR-g, VRR-h, VRS-a, VRS-d, + * and VSI. + * + * [1] IBM z/Architecture Principles of Operation, chapter "Program + * Execution, section "Instructions", subsection "Instruction Formats". + */ +.macro RXB rxb v1 v2=0 v3=0 v4=0 + \rxb = 0 + .if \v1 & 0x10 + \rxb = \rxb | 0x08 + .endif + .if \v2 & 0x10 + \rxb = \rxb | 0x04 + .endif + .if \v3 & 0x10 + \rxb = \rxb | 0x02 + .endif + .if \v4 & 0x10 + \rxb = \rxb | 0x01 + .endif +.endm + +/* MRXB - Generate Element Size Control and RXB value + * + * @m: Element size control + * @v1: First vector register designated operand (for RXB) + * @v2: Second vector register designated operand (for RXB) + * @v3: Third vector register designated operand (for RXB) + * @v4: Fourth vector register designated operand (for RXB) + * + * Note: For @v1, @v2, @v3, and @v4 also refer to the RXB macro + * description for further details. + */ +.macro MRXB m v1 v2=0 v3=0 v4=0 + rxb = 0 + RXB rxb, \v1, \v2, \v3, \v4 + .byte (\m << 4) | rxb +.endm + +/* MRXBOPC - Generate Element Size Control, RXB, and final Opcode fields + * + * @m: Element size control + * @opc: Opcode + * @v1: First vector register designated operand (for RXB) + * @v2: Second vector register designated operand (for RXB) + * @v3: Third vector register designated operand (for RXB) + * @v4: Fourth vector register designated operand (for RXB) + * + * Note: For @v1, @v2, @v3, and @v4 also refer to the RXB macro + * description for further details. + */ +.macro MRXBOPC m opc v1 v2=0 v3=0 v4=0 + MRXB \m, \v1, \v2, \v3, \v4 + .byte \opc +.endm + +/* Vector support instructions */ + +/* VECTOR GENERATE BYTE MASK */ +.macro VGBM vr imm2 + VX_NUM v1, \vr + .word (0xE700 | ((v1&15) << 4)) + .word \imm2 + MRXBOPC 0, 0x44, v1 +.endm +.macro VZERO vxr + VGBM \vxr, 0 +.endm +.macro VONE vxr + VGBM \vxr, 0xFFFF +.endm + +/* VECTOR LOAD VR ELEMENT FROM GR */ +.macro VLVG v, gr, disp, m + VX_NUM v1, \v + GR_NUM b2, "%r0" + GR_NUM r3, \gr + .word 0xE700 | ((v1&15) << 4) | r3 + .word (b2 << 12) | (\disp) + MRXBOPC \m, 0x22, v1 +.endm +.macro VLVGB v, gr, index, base + VLVG \v, \gr, \index, \base, 0 +.endm +.macro VLVGH v, gr, index + VLVG \v, \gr, \index, 1 +.endm +.macro VLVGF v, gr, index + VLVG \v, \gr, \index, 2 +.endm +.macro VLVGG v, gr, index + VLVG \v, \gr, \index, 3 +.endm + +/* VECTOR LOAD REGISTER */ +.macro VLR v1, v2 + VX_NUM v1, \v1 + VX_NUM v2, \v2 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word 0 + MRXBOPC 0, 0x56, v1, v2 +.endm + +/* VECTOR LOAD */ +.macro VL v, disp, index="%r0", base + VX_NUM v1, \v + GR_NUM x2, \index + GR_NUM b2, \base + .word 0xE700 | ((v1&15) << 4) | x2 + .word (b2 << 12) | (\disp) + MRXBOPC 0, 0x06, v1 +.endm + +/* VECTOR LOAD ELEMENT */ +.macro VLEx vr1, disp, index="%r0", base, m3, opc + VX_NUM v1, \vr1 + GR_NUM x2, \index + GR_NUM b2, \base + .word 0xE700 | ((v1&15) << 4) | x2 + .word (b2 << 12) | (\disp) + MRXBOPC \m3, \opc, v1 +.endm +.macro VLEB vr1, disp, index="%r0", base, m3 + VLEx \vr1, \disp, \index, \base, \m3, 0x00 +.endm +.macro VLEH vr1, disp, index="%r0", base, m3 + VLEx \vr1, \disp, \index, \base, \m3, 0x01 +.endm +.macro VLEF vr1, disp, index="%r0", base, m3 + VLEx \vr1, \disp, \index, \base, \m3, 0x03 +.endm +.macro VLEG vr1, disp, index="%r0", base, m3 + VLEx \vr1, \disp, \index, \base, \m3, 0x02 +.endm + +/* VECTOR LOAD ELEMENT IMMEDIATE */ +.macro VLEIx vr1, imm2, m3, opc + VX_NUM v1, \vr1 + .word 0xE700 | ((v1&15) << 4) + .word \imm2 + MRXBOPC \m3, \opc, v1 +.endm +.macro VLEIB vr1, imm2, index + VLEIx \vr1, \imm2, \index, 0x40 +.endm +.macro VLEIH vr1, imm2, index + VLEIx \vr1, \imm2, \index, 0x41 +.endm +.macro VLEIF vr1, imm2, index + VLEIx \vr1, \imm2, \index, 0x43 +.endm +.macro VLEIG vr1, imm2, index + VLEIx \vr1, \imm2, \index, 0x42 +.endm + +/* VECTOR LOAD GR FROM VR ELEMENT */ +.macro VLGV gr, vr, disp, base="%r0", m + GR_NUM r1, \gr + GR_NUM b2, \base + VX_NUM v3, \vr + .word 0xE700 | (r1 << 4) | (v3&15) + .word (b2 << 12) | (\disp) + MRXBOPC \m, 0x21, 0, v3 +.endm +.macro VLGVB gr, vr, disp, base="%r0" + VLGV \gr, \vr, \disp, \base, 0 +.endm +.macro VLGVH gr, vr, disp, base="%r0" + VLGV \gr, \vr, \disp, \base, 1 +.endm +.macro VLGVF gr, vr, disp, base="%r0" + VLGV \gr, \vr, \disp, \base, 2 +.endm +.macro VLGVG gr, vr, disp, base="%r0" + VLGV \gr, \vr, \disp, \base, 3 +.endm + +/* VECTOR LOAD MULTIPLE */ +.macro VLM vfrom, vto, disp, base, hint=3 + VX_NUM v1, \vfrom + VX_NUM v3, \vto + GR_NUM b2, \base + .word 0xE700 | ((v1&15) << 4) | (v3&15) + .word (b2 << 12) | (\disp) + MRXBOPC \hint, 0x36, v1, v3 +.endm + +/* VECTOR STORE */ +.macro VST vr1, disp, index="%r0", base + VX_NUM v1, \vr1 + GR_NUM x2, \index + GR_NUM b2, \base + .word 0xE700 | ((v1&15) << 4) | (x2&15) + .word (b2 << 12) | (\disp) + MRXBOPC 0, 0x0E, v1 +.endm + +/* VECTOR STORE MULTIPLE */ +.macro VSTM vfrom, vto, disp, base, hint=3 + VX_NUM v1, \vfrom + VX_NUM v3, \vto + GR_NUM b2, \base + .word 0xE700 | ((v1&15) << 4) | (v3&15) + .word (b2 << 12) | (\disp) + MRXBOPC \hint, 0x3E, v1, v3 +.endm + +/* VECTOR PERMUTE */ +.macro VPERM vr1, vr2, vr3, vr4 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + VX_NUM v3, \vr3 + VX_NUM v4, \vr4 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) + MRXBOPC (v4&15), 0x8C, v1, v2, v3, v4 +.endm + +/* VECTOR UNPACK LOGICAL LOW */ +.macro VUPLL vr1, vr2, m3 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word 0x0000 + MRXBOPC \m3, 0xD4, v1, v2 +.endm +.macro VUPLLB vr1, vr2 + VUPLL \vr1, \vr2, 0 +.endm +.macro VUPLLH vr1, vr2 + VUPLL \vr1, \vr2, 1 +.endm +.macro VUPLLF vr1, vr2 + VUPLL \vr1, \vr2, 2 +.endm + +/* VECTOR PERMUTE DOUBLEWORD IMMEDIATE */ +.macro VPDI vr1, vr2, vr3, m4 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + VX_NUM v3, \vr3 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) + MRXBOPC \m4, 0x84, v1, v2, v3 +.endm + +/* VECTOR REPLICATE */ +.macro VREP vr1, vr3, imm2, m4 + VX_NUM v1, \vr1 + VX_NUM v3, \vr3 + .word 0xE700 | ((v1&15) << 4) | (v3&15) + .word \imm2 + MRXBOPC \m4, 0x4D, v1, v3 +.endm +.macro VREPB vr1, vr3, imm2 + VREP \vr1, \vr3, \imm2, 0 +.endm +.macro VREPH vr1, vr3, imm2 + VREP \vr1, \vr3, \imm2, 1 +.endm +.macro VREPF vr1, vr3, imm2 + VREP \vr1, \vr3, \imm2, 2 +.endm +.macro VREPG vr1, vr3, imm2 + VREP \vr1, \vr3, \imm2, 3 +.endm + +/* VECTOR MERGE HIGH */ +.macro VMRH vr1, vr2, vr3, m4 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + VX_NUM v3, \vr3 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) + MRXBOPC \m4, 0x61, v1, v2, v3 +.endm +.macro VMRHB vr1, vr2, vr3 + VMRH \vr1, \vr2, \vr3, 0 +.endm +.macro VMRHH vr1, vr2, vr3 + VMRH \vr1, \vr2, \vr3, 1 +.endm +.macro VMRHF vr1, vr2, vr3 + VMRH \vr1, \vr2, \vr3, 2 +.endm +.macro VMRHG vr1, vr2, vr3 + VMRH \vr1, \vr2, \vr3, 3 +.endm + +/* VECTOR MERGE LOW */ +.macro VMRL vr1, vr2, vr3, m4 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + VX_NUM v3, \vr3 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) + MRXBOPC \m4, 0x60, v1, v2, v3 +.endm +.macro VMRLB vr1, vr2, vr3 + VMRL \vr1, \vr2, \vr3, 0 +.endm +.macro VMRLH vr1, vr2, vr3 + VMRL \vr1, \vr2, \vr3, 1 +.endm +.macro VMRLF vr1, vr2, vr3 + VMRL \vr1, \vr2, \vr3, 2 +.endm +.macro VMRLG vr1, vr2, vr3 + VMRL \vr1, \vr2, \vr3, 3 +.endm + +/* VECTOR LOAD WITH LENGTH */ +.macro VLL v, gr, disp, base + VX_NUM v1, \v + GR_NUM b2, \base + GR_NUM r3, \gr + .word 0xE700 | ((v1&15) << 4) | r3 + .word (b2 << 12) | (\disp) + MRXBOPC 0, 0x37, v1 +.endm + +/* VECTOR STORE WITH LENGTH */ +.macro VSTL v, gr, disp, base + VX_NUM v1, \v + GR_NUM b2, \base + GR_NUM r3, \gr + .word 0xE700 | ((v1&15) << 4) | r3 + .word (b2 << 12) | (\disp) + MRXBOPC 0, 0x3f, v1 +.endm + +/* Vector integer instructions */ + +/* VECTOR AND */ +.macro VN vr1, vr2, vr3 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + VX_NUM v3, \vr3 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) + MRXBOPC 0, 0x68, v1, v2, v3 +.endm + +/* VECTOR CHECKSUM */ +.macro VCKSM vr1, vr2, vr3 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + VX_NUM v3, \vr3 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) + MRXBOPC 0, 0x66, v1, v2, v3 +.endm + +/* VECTOR EXCLUSIVE OR */ +.macro VX vr1, vr2, vr3 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + VX_NUM v3, \vr3 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) + MRXBOPC 0, 0x6D, v1, v2, v3 +.endm + +/* VECTOR GALOIS FIELD MULTIPLY SUM */ +.macro VGFM vr1, vr2, vr3, m4 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + VX_NUM v3, \vr3 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) + MRXBOPC \m4, 0xB4, v1, v2, v3 +.endm +.macro VGFMB vr1, vr2, vr3 + VGFM \vr1, \vr2, \vr3, 0 +.endm +.macro VGFMH vr1, vr2, vr3 + VGFM \vr1, \vr2, \vr3, 1 +.endm +.macro VGFMF vr1, vr2, vr3 + VGFM \vr1, \vr2, \vr3, 2 +.endm +.macro VGFMG vr1, vr2, vr3 + VGFM \vr1, \vr2, \vr3, 3 +.endm + +/* VECTOR GALOIS FIELD MULTIPLY SUM AND ACCUMULATE */ +.macro VGFMA vr1, vr2, vr3, vr4, m5 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + VX_NUM v3, \vr3 + VX_NUM v4, \vr4 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) | (\m5 << 8) + MRXBOPC (v4&15), 0xBC, v1, v2, v3, v4 +.endm +.macro VGFMAB vr1, vr2, vr3, vr4 + VGFMA \vr1, \vr2, \vr3, \vr4, 0 +.endm +.macro VGFMAH vr1, vr2, vr3, vr4 + VGFMA \vr1, \vr2, \vr3, \vr4, 1 +.endm +.macro VGFMAF vr1, vr2, vr3, vr4 + VGFMA \vr1, \vr2, \vr3, \vr4, 2 +.endm +.macro VGFMAG vr1, vr2, vr3, vr4 + VGFMA \vr1, \vr2, \vr3, \vr4, 3 +.endm + +/* VECTOR SHIFT RIGHT LOGICAL BY BYTE */ +.macro VSRLB vr1, vr2, vr3 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + VX_NUM v3, \vr3 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) + MRXBOPC 0, 0x7D, v1, v2, v3 +.endm + +/* VECTOR REPLICATE IMMEDIATE */ +.macro VREPI vr1, imm2, m3 + VX_NUM v1, \vr1 + .word 0xE700 | ((v1&15) << 4) + .word \imm2 + MRXBOPC \m3, 0x45, v1 +.endm +.macro VREPIB vr1, imm2 + VREPI \vr1, \imm2, 0 +.endm +.macro VREPIH vr1, imm2 + VREPI \vr1, \imm2, 1 +.endm +.macro VREPIF vr1, imm2 + VREPI \vr1, \imm2, 2 +.endm +.macro VREPIG vr1, imm2 + VREP \vr1, \imm2, 3 +.endm + +/* VECTOR ADD */ +.macro VA vr1, vr2, vr3, m4 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + VX_NUM v3, \vr3 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) + MRXBOPC \m4, 0xF3, v1, v2, v3 +.endm +.macro VAB vr1, vr2, vr3 + VA \vr1, \vr2, \vr3, 0 +.endm +.macro VAH vr1, vr2, vr3 + VA \vr1, \vr2, \vr3, 1 +.endm +.macro VAF vr1, vr2, vr3 + VA \vr1, \vr2, \vr3, 2 +.endm +.macro VAG vr1, vr2, vr3 + VA \vr1, \vr2, \vr3, 3 +.endm +.macro VAQ vr1, vr2, vr3 + VA \vr1, \vr2, \vr3, 4 +.endm + +/* VECTOR ELEMENT SHIFT RIGHT ARITHMETIC */ +.macro VESRAV vr1, vr2, vr3, m4 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + VX_NUM v3, \vr3 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) + MRXBOPC \m4, 0x7A, v1, v2, v3 +.endm + +.macro VESRAVB vr1, vr2, vr3 + VESRAV \vr1, \vr2, \vr3, 0 +.endm +.macro VESRAVH vr1, vr2, vr3 + VESRAV \vr1, \vr2, \vr3, 1 +.endm +.macro VESRAVF vr1, vr2, vr3 + VESRAV \vr1, \vr2, \vr3, 2 +.endm +.macro VESRAVG vr1, vr2, vr3 + VESRAV \vr1, \vr2, \vr3, 3 +.endm + +/* VECTOR ELEMENT ROTATE LEFT LOGICAL */ +.macro VERLL vr1, vr3, disp, base="%r0", m4 + VX_NUM v1, \vr1 + VX_NUM v3, \vr3 + GR_NUM b2, \base + .word 0xE700 | ((v1&15) << 4) | (v3&15) + .word (b2 << 12) | (\disp) + MRXBOPC \m4, 0x33, v1, v3 +.endm +.macro VERLLB vr1, vr3, disp, base="%r0" + VERLL \vr1, \vr3, \disp, \base, 0 +.endm +.macro VERLLH vr1, vr3, disp, base="%r0" + VERLL \vr1, \vr3, \disp, \base, 1 +.endm +.macro VERLLF vr1, vr3, disp, base="%r0" + VERLL \vr1, \vr3, \disp, \base, 2 +.endm +.macro VERLLG vr1, vr3, disp, base="%r0" + VERLL \vr1, \vr3, \disp, \base, 3 +.endm + +/* VECTOR SHIFT LEFT DOUBLE BY BYTE */ +.macro VSLDB vr1, vr2, vr3, imm4 + VX_NUM v1, \vr1 + VX_NUM v2, \vr2 + VX_NUM v3, \vr3 + .word 0xE700 | ((v1&15) << 4) | (v2&15) + .word ((v3&15) << 12) | (\imm4) + MRXBOPC 0, 0x77, v1, v2, v3 +.endm + +#endif /* __ASSEMBLY__ */ +#endif /* __ASM_S390_FPU_INSN_ASM_H */ diff --git a/arch/s390/include/asm/fpu-insn.h b/arch/s390/include/asm/fpu-insn.h new file mode 100644 index 000000000..c1e2e521d --- /dev/null +++ b/arch/s390/include/asm/fpu-insn.h @@ -0,0 +1,486 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Support for Floating Point and Vector Instructions + * + */ + +#ifndef __ASM_S390_FPU_INSN_H +#define __ASM_S390_FPU_INSN_H + +#include + +#ifndef __ASSEMBLY__ + +#include +#include + +asm(".include \"asm/fpu-insn-asm.h\"\n"); + +/* + * Various small helper functions, which can and should be used within + * kernel fpu code sections. Each function represents only one floating + * point or vector instruction (except for helper functions which require + * exception handling). + * + * This allows to use floating point and vector instructions like C + * functions, which has the advantage that all supporting code, like + * e.g. loops, can be written in easy to read C code. + * + * Each of the helper functions provides support for code instrumentation, + * like e.g. KASAN. Therefore instrumentation is also covered automatically + * when using these functions. + * + * In order to ensure that code generated with the helper functions stays + * within kernel fpu sections, which are guarded with kernel_fpu_begin() + * and kernel_fpu_end() calls, each function has a mandatory "memory" + * barrier. + */ + +static __always_inline void fpu_cefbr(u8 f1, s32 val) +{ + asm volatile("cefbr %[f1],%[val]\n" + : + : [f1] "I" (f1), [val] "d" (val) + : "memory"); +} + +static __always_inline unsigned long fpu_cgebr(u8 f2, u8 mode) +{ + unsigned long val; + + asm volatile("cgebr %[val],%[mode],%[f2]\n" + : [val] "=d" (val) + : [f2] "I" (f2), [mode] "I" (mode) + : "memory"); + return val; +} + +static __always_inline void fpu_debr(u8 f1, u8 f2) +{ + asm volatile("debr %[f1],%[f2]\n" + : + : [f1] "I" (f1), [f2] "I" (f2) + : "memory"); +} + +static __always_inline void fpu_ld(unsigned short fpr, freg_t *reg) +{ + instrument_read(reg, sizeof(*reg)); + asm volatile("ld %[fpr],%[reg]\n" + : + : [fpr] "I" (fpr), [reg] "Q" (reg->ui) + : "memory"); +} + +static __always_inline void fpu_ldgr(u8 f1, u32 val) +{ + asm volatile("ldgr %[f1],%[val]\n" + : + : [f1] "I" (f1), [val] "d" (val) + : "memory"); +} + +static __always_inline void fpu_lfpc(unsigned int *fpc) +{ + instrument_read(fpc, sizeof(*fpc)); + asm volatile("lfpc %[fpc]" + : + : [fpc] "Q" (*fpc) + : "memory"); +} + +/** + * fpu_lfpc_safe - Load floating point control register safely. + * @fpc: new value for floating point control register + * + * Load floating point control register. This may lead to an exception, + * since a saved value may have been modified by user space (ptrace, + * signal return, kvm registers) to an invalid value. In such a case + * set the floating point control register to zero. + */ +static inline void fpu_lfpc_safe(unsigned int *fpc) +{ + u32 tmp; + + instrument_read(fpc, sizeof(*fpc)); + asm volatile("\n" + "0: lfpc %[fpc]\n" + "1: nopr %%r7\n" + ".pushsection .fixup, \"ax\"\n" + "2: lghi %[tmp],0\n" + " sfpc %[tmp]\n" + " jg 1b\n" + ".popsection\n" + EX_TABLE(1b, 2b) + : [tmp] "=d" (tmp) + : [fpc] "Q" (*fpc) + : "memory"); +} + +static __always_inline void fpu_std(unsigned short fpr, freg_t *reg) +{ + instrument_write(reg, sizeof(*reg)); + asm volatile("std %[fpr],%[reg]\n" + : [reg] "=Q" (reg->ui) + : [fpr] "I" (fpr) + : "memory"); +} + +static __always_inline void fpu_sfpc(unsigned int fpc) +{ + asm volatile("sfpc %[fpc]" + : + : [fpc] "d" (fpc) + : "memory"); +} + +static __always_inline void fpu_stfpc(unsigned int *fpc) +{ + instrument_write(fpc, sizeof(*fpc)); + asm volatile("stfpc %[fpc]" + : [fpc] "=Q" (*fpc) + : + : "memory"); +} + +static __always_inline void fpu_vab(u8 v1, u8 v2, u8 v3) +{ + asm volatile("VAB %[v1],%[v2],%[v3]" + : + : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3) + : "memory"); +} + +static __always_inline void fpu_vcksm(u8 v1, u8 v2, u8 v3) +{ + asm volatile("VCKSM %[v1],%[v2],%[v3]" + : + : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3) + : "memory"); +} + +static __always_inline void fpu_vesravb(u8 v1, u8 v2, u8 v3) +{ + asm volatile("VESRAVB %[v1],%[v2],%[v3]" + : + : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3) + : "memory"); +} + +static __always_inline void fpu_vgfmag(u8 v1, u8 v2, u8 v3, u8 v4) +{ + asm volatile("VGFMAG %[v1],%[v2],%[v3],%[v4]" + : + : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3), [v4] "I" (v4) + : "memory"); +} + +static __always_inline void fpu_vgfmg(u8 v1, u8 v2, u8 v3) +{ + asm volatile("VGFMG %[v1],%[v2],%[v3]" + : + : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3) + : "memory"); +} + +#ifdef CONFIG_CC_IS_CLANG + +static __always_inline void fpu_vl(u8 v1, const void *vxr) +{ + instrument_read(vxr, sizeof(__vector128)); + asm volatile("\n" + " la 1,%[vxr]\n" + " VL %[v1],0,,1\n" + : + : [vxr] "R" (*(__vector128 *)vxr), + [v1] "I" (v1) + : "memory", "1"); +} + +#else /* CONFIG_CC_IS_CLANG */ + +static __always_inline void fpu_vl(u8 v1, const void *vxr) +{ + instrument_read(vxr, sizeof(__vector128)); + asm volatile("VL %[v1],%O[vxr],,%R[vxr]\n" + : + : [vxr] "Q" (*(__vector128 *)vxr), + [v1] "I" (v1) + : "memory"); +} + +#endif /* CONFIG_CC_IS_CLANG */ + +static __always_inline void fpu_vleib(u8 v, s16 val, u8 index) +{ + asm volatile("VLEIB %[v],%[val],%[index]" + : + : [v] "I" (v), [val] "K" (val), [index] "I" (index) + : "memory"); +} + +static __always_inline void fpu_vleig(u8 v, s16 val, u8 index) +{ + asm volatile("VLEIG %[v],%[val],%[index]" + : + : [v] "I" (v), [val] "K" (val), [index] "I" (index) + : "memory"); +} + +static __always_inline u64 fpu_vlgvf(u8 v, u16 index) +{ + u64 val; + + asm volatile("VLGVF %[val],%[v],%[index]" + : [val] "=d" (val) + : [v] "I" (v), [index] "L" (index) + : "memory"); + return val; +} + +#ifdef CONFIG_CC_IS_CLANG + +static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr) +{ + unsigned int size; + + size = min(index + 1, sizeof(__vector128)); + instrument_read(vxr, size); + asm volatile("\n" + " la 1,%[vxr]\n" + " VLL %[v1],%[index],0,1\n" + : + : [vxr] "R" (*(u8 *)vxr), + [index] "d" (index), + [v1] "I" (v1) + : "memory", "1"); +} + +#else /* CONFIG_CC_IS_CLANG */ + +static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr) +{ + unsigned int size; + + size = min(index + 1, sizeof(__vector128)); + instrument_read(vxr, size); + asm volatile("VLL %[v1],%[index],%O[vxr],%R[vxr]\n" + : + : [vxr] "Q" (*(u8 *)vxr), + [index] "d" (index), + [v1] "I" (v1) + : "memory"); +} + +#endif /* CONFIG_CC_IS_CLANG */ + +#ifdef CONFIG_CC_IS_CLANG + +#define fpu_vlm(_v1, _v3, _vxrs) \ +({ \ + unsigned int size = ((_v3) - (_v1) + 1) * sizeof(__vector128); \ + struct { \ + __vector128 _v[(_v3) - (_v1) + 1]; \ + } *_v = (void *)(_vxrs); \ + \ + instrument_read(_v, size); \ + asm volatile("\n" \ + " la 1,%[vxrs]\n" \ + " VLM %[v1],%[v3],0,1\n" \ + : \ + : [vxrs] "R" (*_v), \ + [v1] "I" (_v1), [v3] "I" (_v3) \ + : "memory", "1"); \ + (_v3) - (_v1) + 1; \ +}) + +#else /* CONFIG_CC_IS_CLANG */ + +#define fpu_vlm(_v1, _v3, _vxrs) \ +({ \ + unsigned int size = ((_v3) - (_v1) + 1) * sizeof(__vector128); \ + struct { \ + __vector128 _v[(_v3) - (_v1) + 1]; \ + } *_v = (void *)(_vxrs); \ + \ + instrument_read(_v, size); \ + asm volatile("VLM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \ + : \ + : [vxrs] "Q" (*_v), \ + [v1] "I" (_v1), [v3] "I" (_v3) \ + : "memory"); \ + (_v3) - (_v1) + 1; \ +}) + +#endif /* CONFIG_CC_IS_CLANG */ + +static __always_inline void fpu_vlr(u8 v1, u8 v2) +{ + asm volatile("VLR %[v1],%[v2]" + : + : [v1] "I" (v1), [v2] "I" (v2) + : "memory"); +} + +static __always_inline void fpu_vlvgf(u8 v, u32 val, u16 index) +{ + asm volatile("VLVGF %[v],%[val],%[index]" + : + : [v] "I" (v), [val] "d" (val), [index] "L" (index) + : "memory"); +} + +static __always_inline void fpu_vn(u8 v1, u8 v2, u8 v3) +{ + asm volatile("VN %[v1],%[v2],%[v3]" + : + : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3) + : "memory"); +} + +static __always_inline void fpu_vperm(u8 v1, u8 v2, u8 v3, u8 v4) +{ + asm volatile("VPERM %[v1],%[v2],%[v3],%[v4]" + : + : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3), [v4] "I" (v4) + : "memory"); +} + +static __always_inline void fpu_vrepib(u8 v1, s16 i2) +{ + asm volatile("VREPIB %[v1],%[i2]" + : + : [v1] "I" (v1), [i2] "K" (i2) + : "memory"); +} + +static __always_inline void fpu_vsrlb(u8 v1, u8 v2, u8 v3) +{ + asm volatile("VSRLB %[v1],%[v2],%[v3]" + : + : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3) + : "memory"); +} + +#ifdef CONFIG_CC_IS_CLANG + +static __always_inline void fpu_vst(u8 v1, const void *vxr) +{ + instrument_write(vxr, sizeof(__vector128)); + asm volatile("\n" + " la 1,%[vxr]\n" + " VST %[v1],0,,1\n" + : [vxr] "=R" (*(__vector128 *)vxr) + : [v1] "I" (v1) + : "memory", "1"); +} + +#else /* CONFIG_CC_IS_CLANG */ + +static __always_inline void fpu_vst(u8 v1, const void *vxr) +{ + instrument_write(vxr, sizeof(__vector128)); + asm volatile("VST %[v1],%O[vxr],,%R[vxr]\n" + : [vxr] "=Q" (*(__vector128 *)vxr) + : [v1] "I" (v1) + : "memory"); +} + +#endif /* CONFIG_CC_IS_CLANG */ + +#ifdef CONFIG_CC_IS_CLANG + +static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) +{ + unsigned int size; + + size = min(index + 1, sizeof(__vector128)); + instrument_write(vxr, size); + asm volatile("\n" + " la 1,%[vxr]\n" + " VSTL %[v1],%[index],0,1\n" + : [vxr] "=R" (*(u8 *)vxr) + : [index] "d" (index), [v1] "I" (v1) + : "memory", "1"); +} + +#else /* CONFIG_CC_IS_CLANG */ + +static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) +{ + unsigned int size; + + size = min(index + 1, sizeof(__vector128)); + instrument_write(vxr, size); + asm volatile("VSTL %[v1],%[index],%O[vxr],%R[vxr]\n" + : [vxr] "=Q" (*(u8 *)vxr) + : [index] "d" (index), [v1] "I" (v1) + : "memory"); +} + +#endif /* CONFIG_CC_IS_CLANG */ + +#ifdef CONFIG_CC_IS_CLANG + +#define fpu_vstm(_v1, _v3, _vxrs) \ +({ \ + unsigned int size = ((_v3) - (_v1) + 1) * sizeof(__vector128); \ + struct { \ + __vector128 _v[(_v3) - (_v1) + 1]; \ + } *_v = (void *)(_vxrs); \ + \ + instrument_write(_v, size); \ + asm volatile("\n" \ + " la 1,%[vxrs]\n" \ + " VSTM %[v1],%[v3],0,1\n" \ + : [vxrs] "=R" (*_v) \ + : [v1] "I" (_v1), [v3] "I" (_v3) \ + : "memory", "1"); \ + (_v3) - (_v1) + 1; \ +}) + +#else /* CONFIG_CC_IS_CLANG */ + +#define fpu_vstm(_v1, _v3, _vxrs) \ +({ \ + unsigned int size = ((_v3) - (_v1) + 1) * sizeof(__vector128); \ + struct { \ + __vector128 _v[(_v3) - (_v1) + 1]; \ + } *_v = (void *)(_vxrs); \ + \ + instrument_write(_v, size); \ + asm volatile("VSTM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \ + : [vxrs] "=Q" (*_v) \ + : [v1] "I" (_v1), [v3] "I" (_v3) \ + : "memory"); \ + (_v3) - (_v1) + 1; \ +}) + +#endif /* CONFIG_CC_IS_CLANG */ + +static __always_inline void fpu_vupllf(u8 v1, u8 v2) +{ + asm volatile("VUPLLF %[v1],%[v2]" + : + : [v1] "I" (v1), [v2] "I" (v2) + : "memory"); +} + +static __always_inline void fpu_vx(u8 v1, u8 v2, u8 v3) +{ + asm volatile("VX %[v1],%[v2],%[v3]" + : + : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3) + : "memory"); +} + +static __always_inline void fpu_vzero(u8 v) +{ + asm volatile("VZERO %[v]" + : + : [v] "I" (v) + : "memory"); +} + +#endif /* __ASSEMBLY__ */ +#endif /* __ASM_S390_FPU_INSN_H */ diff --git a/arch/s390/include/asm/fpu-types.h b/arch/s390/include/asm/fpu-types.h new file mode 100644 index 000000000..8d58d5a95 --- /dev/null +++ b/arch/s390/include/asm/fpu-types.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * FPU data structures + * + * Copyright IBM Corp. 2015 + * Author(s): Hendrik Brueckner + */ + +#ifndef _ASM_S390_FPU_TYPES_H +#define _ASM_S390_FPU_TYPES_H + +#include + +struct fpu { + u32 fpc; + __vector128 vxrs[__NUM_VXRS] __aligned(8); +}; + +struct kernel_fpu_hdr { + int mask; + u32 fpc; +}; + +struct kernel_fpu { + struct kernel_fpu_hdr hdr; + __vector128 vxrs[] __aligned(8); +}; + +#define KERNEL_FPU_STRUCT(vxr_size) \ +struct kernel_fpu_##vxr_size { \ + struct kernel_fpu_hdr hdr; \ + __vector128 vxrs[vxr_size] __aligned(8); \ +} + +KERNEL_FPU_STRUCT(8); +KERNEL_FPU_STRUCT(16); +KERNEL_FPU_STRUCT(32); + +#define DECLARE_KERNEL_FPU_ONSTACK(vxr_size, name) \ + struct kernel_fpu_##vxr_size name __uninitialized + +#define DECLARE_KERNEL_FPU_ONSTACK8(name) \ + DECLARE_KERNEL_FPU_ONSTACK(8, name) + +#define DECLARE_KERNEL_FPU_ONSTACK16(name) \ + DECLARE_KERNEL_FPU_ONSTACK(16, name) + +#define DECLARE_KERNEL_FPU_ONSTACK32(name) \ + DECLARE_KERNEL_FPU_ONSTACK(32, name) + +#endif /* _ASM_S390_FPU_TYPES_H */ diff --git a/arch/s390/include/asm/fpu.h b/arch/s390/include/asm/fpu.h new file mode 100644 index 000000000..c84cb3391 --- /dev/null +++ b/arch/s390/include/asm/fpu.h @@ -0,0 +1,295 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * In-kernel FPU support functions + * + * + * Consider these guidelines before using in-kernel FPU functions: + * + * 1. Use kernel_fpu_begin() and kernel_fpu_end() to enclose all in-kernel + * use of floating-point or vector registers and instructions. + * + * 2. For kernel_fpu_begin(), specify the vector register range you want to + * use with the KERNEL_VXR_* constants. Consider these usage guidelines: + * + * a) If your function typically runs in process-context, use the lower + * half of the vector registers, for example, specify KERNEL_VXR_LOW. + * b) If your function typically runs in soft-irq or hard-irq context, + * prefer using the upper half of the vector registers, for example, + * specify KERNEL_VXR_HIGH. + * + * If you adhere to these guidelines, an interrupted process context + * does not require to save and restore vector registers because of + * disjoint register ranges. + * + * Also note that the __kernel_fpu_begin()/__kernel_fpu_end() functions + * includes logic to save and restore up to 16 vector registers at once. + * + * 3. You can nest kernel_fpu_begin()/kernel_fpu_end() by using different + * struct kernel_fpu states. Vector registers that are in use by outer + * levels are saved and restored. You can minimize the save and restore + * effort by choosing disjoint vector register ranges. + * + * 5. To use vector floating-point instructions, specify the KERNEL_FPC + * flag to save and restore floating-point controls in addition to any + * vector register range. + * + * 6. To use floating-point registers and instructions only, specify the + * KERNEL_FPR flag. This flag triggers a save and restore of vector + * registers V0 to V15 and floating-point controls. + * + * Copyright IBM Corp. 2015 + * Author(s): Hendrik Brueckner + */ + +#ifndef _ASM_S390_FPU_H +#define _ASM_S390_FPU_H + +#include +#include +#include +#include +#include +#include +#include +#include + +static inline bool cpu_has_vx(void) +{ + return likely(test_facility(129)); +} + +enum { + KERNEL_FPC_BIT = 0, + KERNEL_VXR_V0V7_BIT, + KERNEL_VXR_V8V15_BIT, + KERNEL_VXR_V16V23_BIT, + KERNEL_VXR_V24V31_BIT, +}; + +#define KERNEL_FPC BIT(KERNEL_FPC_BIT) +#define KERNEL_VXR_V0V7 BIT(KERNEL_VXR_V0V7_BIT) +#define KERNEL_VXR_V8V15 BIT(KERNEL_VXR_V8V15_BIT) +#define KERNEL_VXR_V16V23 BIT(KERNEL_VXR_V16V23_BIT) +#define KERNEL_VXR_V24V31 BIT(KERNEL_VXR_V24V31_BIT) + +#define KERNEL_VXR_LOW (KERNEL_VXR_V0V7 | KERNEL_VXR_V8V15) +#define KERNEL_VXR_MID (KERNEL_VXR_V8V15 | KERNEL_VXR_V16V23) +#define KERNEL_VXR_HIGH (KERNEL_VXR_V16V23 | KERNEL_VXR_V24V31) + +#define KERNEL_VXR (KERNEL_VXR_LOW | KERNEL_VXR_HIGH) +#define KERNEL_FPR (KERNEL_FPC | KERNEL_VXR_LOW) + +void load_fpu_state(struct fpu *state, int flags); +void save_fpu_state(struct fpu *state, int flags); +void __kernel_fpu_begin(struct kernel_fpu *state, int flags); +void __kernel_fpu_end(struct kernel_fpu *state, int flags); + +static __always_inline void save_vx_regs(__vector128 *vxrs) +{ + fpu_vstm(0, 15, &vxrs[0]); + fpu_vstm(16, 31, &vxrs[16]); +} + +static __always_inline void load_vx_regs(__vector128 *vxrs) +{ + fpu_vlm(0, 15, &vxrs[0]); + fpu_vlm(16, 31, &vxrs[16]); +} + +static __always_inline void __save_fp_regs(freg_t *fprs, unsigned int offset) +{ + fpu_std(0, &fprs[0 * offset]); + fpu_std(1, &fprs[1 * offset]); + fpu_std(2, &fprs[2 * offset]); + fpu_std(3, &fprs[3 * offset]); + fpu_std(4, &fprs[4 * offset]); + fpu_std(5, &fprs[5 * offset]); + fpu_std(6, &fprs[6 * offset]); + fpu_std(7, &fprs[7 * offset]); + fpu_std(8, &fprs[8 * offset]); + fpu_std(9, &fprs[9 * offset]); + fpu_std(10, &fprs[10 * offset]); + fpu_std(11, &fprs[11 * offset]); + fpu_std(12, &fprs[12 * offset]); + fpu_std(13, &fprs[13 * offset]); + fpu_std(14, &fprs[14 * offset]); + fpu_std(15, &fprs[15 * offset]); +} + +static __always_inline void __load_fp_regs(freg_t *fprs, unsigned int offset) +{ + fpu_ld(0, &fprs[0 * offset]); + fpu_ld(1, &fprs[1 * offset]); + fpu_ld(2, &fprs[2 * offset]); + fpu_ld(3, &fprs[3 * offset]); + fpu_ld(4, &fprs[4 * offset]); + fpu_ld(5, &fprs[5 * offset]); + fpu_ld(6, &fprs[6 * offset]); + fpu_ld(7, &fprs[7 * offset]); + fpu_ld(8, &fprs[8 * offset]); + fpu_ld(9, &fprs[9 * offset]); + fpu_ld(10, &fprs[10 * offset]); + fpu_ld(11, &fprs[11 * offset]); + fpu_ld(12, &fprs[12 * offset]); + fpu_ld(13, &fprs[13 * offset]); + fpu_ld(14, &fprs[14 * offset]); + fpu_ld(15, &fprs[15 * offset]); +} + +static __always_inline void save_fp_regs(freg_t *fprs) +{ + __save_fp_regs(fprs, sizeof(freg_t) / sizeof(freg_t)); +} + +static __always_inline void load_fp_regs(freg_t *fprs) +{ + __load_fp_regs(fprs, sizeof(freg_t) / sizeof(freg_t)); +} + +static __always_inline void save_fp_regs_vx(__vector128 *vxrs) +{ + freg_t *fprs = (freg_t *)&vxrs[0].high; + + __save_fp_regs(fprs, sizeof(__vector128) / sizeof(freg_t)); +} + +static __always_inline void load_fp_regs_vx(__vector128 *vxrs) +{ + freg_t *fprs = (freg_t *)&vxrs[0].high; + + __load_fp_regs(fprs, sizeof(__vector128) / sizeof(freg_t)); +} + +static inline void load_user_fpu_regs(void) +{ + struct thread_struct *thread = ¤t->thread; + + if (!thread->ufpu_flags) + return; + load_fpu_state(&thread->ufpu, thread->ufpu_flags); + thread->ufpu_flags = 0; +} + +static __always_inline void __save_user_fpu_regs(struct thread_struct *thread, int flags) +{ + save_fpu_state(&thread->ufpu, flags); + __atomic_or(flags, &thread->ufpu_flags); +} + +static inline void save_user_fpu_regs(void) +{ + struct thread_struct *thread = ¤t->thread; + int mask, flags; + + mask = __atomic_or(KERNEL_FPC | KERNEL_VXR, &thread->kfpu_flags); + flags = ~READ_ONCE(thread->ufpu_flags) & (KERNEL_FPC | KERNEL_VXR); + if (flags) + __save_user_fpu_regs(thread, flags); + barrier(); + WRITE_ONCE(thread->kfpu_flags, mask); +} + +static __always_inline void _kernel_fpu_begin(struct kernel_fpu *state, int flags) +{ + struct thread_struct *thread = ¤t->thread; + int mask, uflags; + + mask = __atomic_or(flags, &thread->kfpu_flags); + state->hdr.mask = mask; + uflags = READ_ONCE(thread->ufpu_flags); + if ((uflags & flags) != flags) + __save_user_fpu_regs(thread, ~uflags & flags); + if (mask & flags) + __kernel_fpu_begin(state, flags); +} + +static __always_inline void _kernel_fpu_end(struct kernel_fpu *state, int flags) +{ + int mask = state->hdr.mask; + + if (mask & flags) + __kernel_fpu_end(state, flags); + barrier(); + WRITE_ONCE(current->thread.kfpu_flags, mask); +} + +void __kernel_fpu_invalid_size(void); + +static __always_inline void kernel_fpu_check_size(int flags, unsigned int size) +{ + unsigned int cnt = 0; + + if (flags & KERNEL_VXR_V0V7) + cnt += 8; + if (flags & KERNEL_VXR_V8V15) + cnt += 8; + if (flags & KERNEL_VXR_V16V23) + cnt += 8; + if (flags & KERNEL_VXR_V24V31) + cnt += 8; + if (cnt != size) + __kernel_fpu_invalid_size(); +} + +#define kernel_fpu_begin(state, flags) \ +{ \ + typeof(state) s = (state); \ + int _flags = (flags); \ + \ + kernel_fpu_check_size(_flags, ARRAY_SIZE(s->vxrs)); \ + _kernel_fpu_begin((struct kernel_fpu *)s, _flags); \ +} + +#define kernel_fpu_end(state, flags) \ +{ \ + typeof(state) s = (state); \ + int _flags = (flags); \ + \ + kernel_fpu_check_size(_flags, ARRAY_SIZE(s->vxrs)); \ + _kernel_fpu_end((struct kernel_fpu *)s, _flags); \ +} + +static inline void save_kernel_fpu_regs(struct thread_struct *thread) +{ + if (!thread->kfpu_flags) + return; + save_fpu_state(&thread->kfpu, thread->kfpu_flags); +} + +static inline void restore_kernel_fpu_regs(struct thread_struct *thread) +{ + if (!thread->kfpu_flags) + return; + load_fpu_state(&thread->kfpu, thread->kfpu_flags); +} + +static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs) +{ + int i; + + for (i = 0; i < __NUM_FPRS; i++) + fprs[i].ui = vxrs[i].high; +} + +static inline void convert_fp_to_vx(__vector128 *vxrs, freg_t *fprs) +{ + int i; + + for (i = 0; i < __NUM_FPRS; i++) + vxrs[i].high = fprs[i].ui; +} + +static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu) +{ + fpregs->pad = 0; + fpregs->fpc = fpu->fpc; + convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs); +} + +static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu) +{ + fpu->fpc = fpregs->fpc; + convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs); +} + +#endif /* _ASM_S390_FPU_H */ diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h deleted file mode 100644 index d6ca8bc6c..000000000 --- a/arch/s390/include/asm/fpu/api.h +++ /dev/null @@ -1,126 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * In-kernel FPU support functions - * - * - * Consider these guidelines before using in-kernel FPU functions: - * - * 1. Use kernel_fpu_begin() and kernel_fpu_end() to enclose all in-kernel - * use of floating-point or vector registers and instructions. - * - * 2. For kernel_fpu_begin(), specify the vector register range you want to - * use with the KERNEL_VXR_* constants. Consider these usage guidelines: - * - * a) If your function typically runs in process-context, use the lower - * half of the vector registers, for example, specify KERNEL_VXR_LOW. - * b) If your function typically runs in soft-irq or hard-irq context, - * prefer using the upper half of the vector registers, for example, - * specify KERNEL_VXR_HIGH. - * - * If you adhere to these guidelines, an interrupted process context - * does not require to save and restore vector registers because of - * disjoint register ranges. - * - * Also note that the __kernel_fpu_begin()/__kernel_fpu_end() functions - * includes logic to save and restore up to 16 vector registers at once. - * - * 3. You can nest kernel_fpu_begin()/kernel_fpu_end() by using different - * struct kernel_fpu states. Vector registers that are in use by outer - * levels are saved and restored. You can minimize the save and restore - * effort by choosing disjoint vector register ranges. - * - * 5. To use vector floating-point instructions, specify the KERNEL_FPC - * flag to save and restore floating-point controls in addition to any - * vector register range. - * - * 6. To use floating-point registers and instructions only, specify the - * KERNEL_FPR flag. This flag triggers a save and restore of vector - * registers V0 to V15 and floating-point controls. - * - * Copyright IBM Corp. 2015 - * Author(s): Hendrik Brueckner - */ - -#ifndef _ASM_S390_FPU_API_H -#define _ASM_S390_FPU_API_H - -#include -#include -#include - -void save_fpu_regs(void); -void load_fpu_regs(void); -void __load_fpu_regs(void); - -/** - * sfpc_safe - Set floating point control register safely. - * @fpc: new value for floating point control register - * - * Set floating point control register. This may lead to an exception, - * since a saved value may have been modified by user space (ptrace, - * signal return, kvm registers) to an invalid value. In such a case - * set the floating point control register to zero. - */ -static inline void sfpc_safe(u32 fpc) -{ - asm volatile("\n" - "0: sfpc %[fpc]\n" - "1: nopr %%r7\n" - ".pushsection .fixup, \"ax\"\n" - "2: lghi %[fpc],0\n" - " jg 0b\n" - ".popsection\n" - EX_TABLE(1b, 2b) - : [fpc] "+d" (fpc) - : : "memory"); -} - -#define KERNEL_FPC 1 -#define KERNEL_VXR_V0V7 2 -#define KERNEL_VXR_V8V15 4 -#define KERNEL_VXR_V16V23 8 -#define KERNEL_VXR_V24V31 16 - -#define KERNEL_VXR_LOW (KERNEL_VXR_V0V7|KERNEL_VXR_V8V15) -#define KERNEL_VXR_MID (KERNEL_VXR_V8V15|KERNEL_VXR_V16V23) -#define KERNEL_VXR_HIGH (KERNEL_VXR_V16V23|KERNEL_VXR_V24V31) - -#define KERNEL_VXR (KERNEL_VXR_LOW|KERNEL_VXR_HIGH) -#define KERNEL_FPR (KERNEL_FPC|KERNEL_VXR_LOW) - -struct kernel_fpu; - -/* - * Note the functions below must be called with preemption disabled. - * Do not enable preemption before calling __kernel_fpu_end() to prevent - * an corruption of an existing kernel FPU state. - * - * Prefer using the kernel_fpu_begin()/kernel_fpu_end() pair of functions. - */ -void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags); -void __kernel_fpu_end(struct kernel_fpu *state, u32 flags); - - -static inline void kernel_fpu_begin(struct kernel_fpu *state, u32 flags) -{ - preempt_disable(); - state->mask = S390_lowcore.fpu_flags; - if (!test_cpu_flag(CIF_FPU)) - /* Save user space FPU state and register contents */ - save_fpu_regs(); - else if (state->mask & flags) - /* Save FPU/vector register in-use by the kernel */ - __kernel_fpu_begin(state, flags); - S390_lowcore.fpu_flags |= flags; -} - -static inline void kernel_fpu_end(struct kernel_fpu *state, u32 flags) -{ - S390_lowcore.fpu_flags = state->mask; - if (state->mask & flags) - /* Restore FPU/vector register in-use by the kernel */ - __kernel_fpu_end(state, flags); - preempt_enable(); -} - -#endif /* _ASM_S390_FPU_API_H */ diff --git a/arch/s390/include/asm/fpu/internal.h b/arch/s390/include/asm/fpu/internal.h deleted file mode 100644 index d511c4cf5..000000000 --- a/arch/s390/include/asm/fpu/internal.h +++ /dev/null @@ -1,67 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * FPU state and register content conversion primitives - * - * Copyright IBM Corp. 2015 - * Author(s): Hendrik Brueckner - */ - -#ifndef _ASM_S390_FPU_INTERNAL_H -#define _ASM_S390_FPU_INTERNAL_H - -#include -#include -#include - -static inline bool cpu_has_vx(void) -{ - return likely(test_facility(129)); -} - -static inline void save_vx_regs(__vector128 *vxrs) -{ - asm volatile( - " la 1,%0\n" - " .word 0xe70f,0x1000,0x003e\n" /* vstm 0,15,0(1) */ - " .word 0xe70f,0x1100,0x0c3e\n" /* vstm 16,31,256(1) */ - : "=Q" (*(struct vx_array *) vxrs) : : "1"); -} - -static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs) -{ - int i; - - for (i = 0; i < __NUM_FPRS; i++) - fprs[i].ui = vxrs[i].high; -} - -static inline void convert_fp_to_vx(__vector128 *vxrs, freg_t *fprs) -{ - int i; - - for (i = 0; i < __NUM_FPRS; i++) - vxrs[i].high = fprs[i].ui; -} - -static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu) -{ - fpregs->pad = 0; - fpregs->fpc = fpu->fpc; - if (cpu_has_vx()) - convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs); - else - memcpy((freg_t *)&fpregs->fprs, fpu->fprs, - sizeof(fpregs->fprs)); -} - -static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu) -{ - fpu->fpc = fpregs->fpc; - if (cpu_has_vx()) - convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs); - else - memcpy(fpu->fprs, (freg_t *)&fpregs->fprs, - sizeof(fpregs->fprs)); -} - -#endif /* _ASM_S390_FPU_INTERNAL_H */ diff --git a/arch/s390/include/asm/fpu/types.h b/arch/s390/include/asm/fpu/types.h deleted file mode 100644 index d889e9436..000000000 --- a/arch/s390/include/asm/fpu/types.h +++ /dev/null @@ -1,38 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * FPU data structures - * - * Copyright IBM Corp. 2015 - * Author(s): Hendrik Brueckner - */ - -#ifndef _ASM_S390_FPU_TYPES_H -#define _ASM_S390_FPU_TYPES_H - -#include - -struct fpu { - __u32 fpc; /* Floating-point control */ - void *regs; /* Pointer to the current save area */ - union { - /* Floating-point register save area */ - freg_t fprs[__NUM_FPRS]; - /* Vector register save area */ - __vector128 vxrs[__NUM_VXRS]; - }; -}; - -/* VX array structure for address operand constraints in inline assemblies */ -struct vx_array { __vector128 _[__NUM_VXRS]; }; - -/* In-kernel FPU state structure */ -struct kernel_fpu { - u32 mask; - u32 fpc; - union { - freg_t fprs[__NUM_FPRS]; - __vector128 vxrs[__NUM_VXRS]; - }; -}; - -#endif /* _ASM_S390_FPU_TYPES_H */ diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index 5a82b08f0..621f23d5a 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -9,7 +9,7 @@ #ifndef __ASSEMBLY__ #ifdef CONFIG_CC_IS_CLANG -/* https://bugs.llvm.org/show_bug.cgi?id=41424 */ +/* https://llvm.org/pr41424 */ #define ftrace_return_address(n) 0UL #else #define ftrace_return_address(n) __builtin_return_address(n) diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h index 9725586f4..5cc46e0dd 100644 --- a/arch/s390/include/asm/gmap.h +++ b/arch/s390/include/asm/gmap.h @@ -146,7 +146,7 @@ int gmap_mprotect_notify(struct gmap *, unsigned long start, void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4], unsigned long gaddr, unsigned long vmaddr); -int s390_disable_cow_sharing(void); +int gmap_mark_unmergeable(void); void s390_unlist_old_asce(struct gmap *gmap); int s390_replace_asce(struct gmap *gmap); void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns); diff --git a/arch/s390/include/asm/idals.h b/arch/s390/include/asm/idals.h index 59fcc3c72..ac68c657b 100644 --- a/arch/s390/include/asm/idals.h +++ b/arch/s390/include/asm/idals.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* +/* * Author(s)......: Holger Smolinski * Martin Schwidefsky * Bugreports.to..: @@ -17,32 +17,37 @@ #include #include #include -#include #include +#include +#include -#define IDA_SIZE_LOG 12 /* 11 for 2k , 12 for 4k */ -#define IDA_BLOCK_SIZE (1L<> 31) != 0; -} + dma64_t paddr = virt_to_dma64(vaddr); + return (((__force unsigned long)(paddr) + length - 1) >> 31) != 0; +} /* * Return the number of idal words needed for an address/length pair. */ static inline unsigned int idal_nr_words(void *vaddr, unsigned int length) { - return ((__pa(vaddr) & (IDA_BLOCK_SIZE-1)) + length + - (IDA_BLOCK_SIZE-1)) >> IDA_SIZE_LOG; + unsigned int cidaw; + + cidaw = (unsigned long)vaddr & (IDA_BLOCK_SIZE - 1); + cidaw += length + IDA_BLOCK_SIZE - 1; + cidaw >>= IDA_SIZE_SHIFT; + return cidaw; } /* @@ -50,26 +55,27 @@ static inline unsigned int idal_nr_words(void *vaddr, unsigned int length) */ static inline unsigned int idal_2k_nr_words(void *vaddr, unsigned int length) { - return ((__pa(vaddr) & (IDA_2K_BLOCK_SIZE - 1)) + length + - (IDA_2K_BLOCK_SIZE - 1)) >> IDA_2K_SIZE_LOG; + unsigned int cidaw; + + cidaw = (unsigned long)vaddr & (IDA_2K_BLOCK_SIZE - 1); + cidaw += length + IDA_2K_BLOCK_SIZE - 1; + cidaw >>= IDA_2K_SIZE_SHIFT; + return cidaw; } /* * Create the list of idal words for an address/length pair. */ -static inline unsigned long *idal_create_words(unsigned long *idaws, - void *vaddr, unsigned int length) +static inline dma64_t *idal_create_words(dma64_t *idaws, void *vaddr, unsigned int length) { - unsigned long paddr; + dma64_t paddr = virt_to_dma64(vaddr); unsigned int cidaw; - paddr = __pa(vaddr); - cidaw = ((paddr & (IDA_BLOCK_SIZE-1)) + length + - (IDA_BLOCK_SIZE-1)) >> IDA_SIZE_LOG; *idaws++ = paddr; - paddr &= -IDA_BLOCK_SIZE; + cidaw = idal_nr_words(vaddr, length); + paddr = dma64_and(paddr, -IDA_BLOCK_SIZE); while (--cidaw > 0) { - paddr += IDA_BLOCK_SIZE; + paddr = dma64_add(paddr, IDA_BLOCK_SIZE); *idaws++ = paddr; } return idaws; @@ -79,36 +85,33 @@ static inline unsigned long *idal_create_words(unsigned long *idaws, * Sets the address of the data in CCW. * If necessary it allocates an IDAL and sets the appropriate flags. */ -static inline int -set_normalized_cda(struct ccw1 * ccw, void *vaddr) +static inline int set_normalized_cda(struct ccw1 *ccw, void *vaddr) { unsigned int nridaws; - unsigned long *idal; + dma64_t *idal; if (ccw->flags & CCW_FLAG_IDA) return -EINVAL; nridaws = idal_nr_words(vaddr, ccw->count); if (nridaws > 0) { - idal = kmalloc(nridaws * sizeof(unsigned long), - GFP_ATOMIC | GFP_DMA ); - if (idal == NULL) + idal = kcalloc(nridaws, sizeof(*idal), GFP_ATOMIC | GFP_DMA); + if (!idal) return -ENOMEM; idal_create_words(idal, vaddr, ccw->count); ccw->flags |= CCW_FLAG_IDA; vaddr = idal; } - ccw->cda = (__u32)(unsigned long) vaddr; + ccw->cda = virt_to_dma32(vaddr); return 0; } /* * Releases any allocated IDAL related to the CCW. */ -static inline void -clear_normalized_cda(struct ccw1 * ccw) +static inline void clear_normalized_cda(struct ccw1 *ccw) { if (ccw->flags & CCW_FLAG_IDA) { - kfree((void *)(unsigned long) ccw->cda); + kfree(dma32_to_virt(ccw->cda)); ccw->flags &= ~CCW_FLAG_IDA; } ccw->cda = 0; @@ -120,125 +123,138 @@ clear_normalized_cda(struct ccw1 * ccw) struct idal_buffer { size_t size; size_t page_order; - void *data[]; + dma64_t data[]; }; /* * Allocate an idal buffer */ -static inline struct idal_buffer * -idal_buffer_alloc(size_t size, int page_order) +static inline struct idal_buffer *idal_buffer_alloc(size_t size, int page_order) { - struct idal_buffer *ib; int nr_chunks, nr_ptrs, i; + struct idal_buffer *ib; + void *vaddr; - nr_ptrs = (size + IDA_BLOCK_SIZE - 1) >> IDA_SIZE_LOG; - nr_chunks = (4096 << page_order) >> IDA_SIZE_LOG; + nr_ptrs = (size + IDA_BLOCK_SIZE - 1) >> IDA_SIZE_SHIFT; + nr_chunks = (PAGE_SIZE << page_order) >> IDA_SIZE_SHIFT; ib = kmalloc(struct_size(ib, data, nr_ptrs), GFP_DMA | GFP_KERNEL); - if (ib == NULL) + if (!ib) return ERR_PTR(-ENOMEM); ib->size = size; ib->page_order = page_order; for (i = 0; i < nr_ptrs; i++) { - if ((i & (nr_chunks - 1)) != 0) { - ib->data[i] = ib->data[i-1] + IDA_BLOCK_SIZE; - continue; - } - ib->data[i] = (void *) - __get_free_pages(GFP_KERNEL, page_order); - if (ib->data[i] != NULL) + if (i & (nr_chunks - 1)) { + ib->data[i] = dma64_add(ib->data[i - 1], IDA_BLOCK_SIZE); continue; - // Not enough memory - while (i >= nr_chunks) { - i -= nr_chunks; - free_pages((unsigned long) ib->data[i], - ib->page_order); } - kfree(ib); - return ERR_PTR(-ENOMEM); + vaddr = (void *)__get_free_pages(GFP_KERNEL, page_order); + if (!vaddr) + goto error; + ib->data[i] = virt_to_dma64(vaddr); } return ib; +error: + while (i >= nr_chunks) { + i -= nr_chunks; + vaddr = dma64_to_virt(ib->data[i]); + free_pages((unsigned long)vaddr, ib->page_order); + } + kfree(ib); + return ERR_PTR(-ENOMEM); } /* * Free an idal buffer. */ -static inline void -idal_buffer_free(struct idal_buffer *ib) +static inline void idal_buffer_free(struct idal_buffer *ib) { int nr_chunks, nr_ptrs, i; + void *vaddr; - nr_ptrs = (ib->size + IDA_BLOCK_SIZE - 1) >> IDA_SIZE_LOG; - nr_chunks = (4096 << ib->page_order) >> IDA_SIZE_LOG; - for (i = 0; i < nr_ptrs; i += nr_chunks) - free_pages((unsigned long) ib->data[i], ib->page_order); + nr_ptrs = (ib->size + IDA_BLOCK_SIZE - 1) >> IDA_SIZE_SHIFT; + nr_chunks = (PAGE_SIZE << ib->page_order) >> IDA_SIZE_SHIFT; + for (i = 0; i < nr_ptrs; i += nr_chunks) { + vaddr = dma64_to_virt(ib->data[i]); + free_pages((unsigned long)vaddr, ib->page_order); + } kfree(ib); } /* * Test if a idal list is really needed. */ -static inline int -__idal_buffer_is_needed(struct idal_buffer *ib) +static inline bool __idal_buffer_is_needed(struct idal_buffer *ib) { - return ib->size > (4096ul << ib->page_order) || - idal_is_needed(ib->data[0], ib->size); + if (ib->size > (PAGE_SIZE << ib->page_order)) + return true; + return idal_is_needed(dma64_to_virt(ib->data[0]), ib->size); } /* * Set channel data address to idal buffer. */ -static inline void -idal_buffer_set_cda(struct idal_buffer *ib, struct ccw1 *ccw) +static inline void idal_buffer_set_cda(struct idal_buffer *ib, struct ccw1 *ccw) { + void *vaddr; + if (__idal_buffer_is_needed(ib)) { - // setup idals; - ccw->cda = (u32)(addr_t) ib->data; + /* Setup idals */ + ccw->cda = virt_to_dma32(ib->data); ccw->flags |= CCW_FLAG_IDA; - } else - // we do not need idals - use direct addressing - ccw->cda = (u32)(addr_t) ib->data[0]; + } else { + /* + * No idals needed - use direct addressing. Convert from + * dma64_t to virt and then to dma32_t only because of type + * checking. The physical address is known to be below 2GB. + */ + vaddr = dma64_to_virt(ib->data[0]); + ccw->cda = virt_to_dma32(vaddr); + } ccw->count = ib->size; } /* * Copy count bytes from an idal buffer to user memory */ -static inline size_t -idal_buffer_to_user(struct idal_buffer *ib, void __user *to, size_t count) +static inline size_t idal_buffer_to_user(struct idal_buffer *ib, void __user *to, size_t count) { size_t left; + void *vaddr; int i; BUG_ON(count > ib->size); for (i = 0; count > IDA_BLOCK_SIZE; i++) { - left = copy_to_user(to, ib->data[i], IDA_BLOCK_SIZE); + vaddr = dma64_to_virt(ib->data[i]); + left = copy_to_user(to, vaddr, IDA_BLOCK_SIZE); if (left) return left + count - IDA_BLOCK_SIZE; - to = (void __user *) to + IDA_BLOCK_SIZE; + to = (void __user *)to + IDA_BLOCK_SIZE; count -= IDA_BLOCK_SIZE; } - return copy_to_user(to, ib->data[i], count); + vaddr = dma64_to_virt(ib->data[i]); + return copy_to_user(to, vaddr, count); } /* * Copy count bytes from user memory to an idal buffer */ -static inline size_t -idal_buffer_from_user(struct idal_buffer *ib, const void __user *from, size_t count) +static inline size_t idal_buffer_from_user(struct idal_buffer *ib, const void __user *from, size_t count) { size_t left; + void *vaddr; int i; BUG_ON(count > ib->size); for (i = 0; count > IDA_BLOCK_SIZE; i++) { - left = copy_from_user(ib->data[i], from, IDA_BLOCK_SIZE); + vaddr = dma64_to_virt(ib->data[i]); + left = copy_from_user(vaddr, from, IDA_BLOCK_SIZE); if (left) return left + count - IDA_BLOCK_SIZE; - from = (void __user *) from + IDA_BLOCK_SIZE; + from = (void __user *)from + IDA_BLOCK_SIZE; count -= IDA_BLOCK_SIZE; } - return copy_from_user(ib->data[i], from, count); + vaddr = dma64_to_virt(ib->data[i]); + return copy_from_user(vaddr, from, count); } #endif diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 52664105a..959904618 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include @@ -743,7 +743,6 @@ struct kvm_vcpu_arch { struct kvm_s390_sie_block *vsie_block; unsigned int host_acrs[NUM_ACRS]; struct gs_cb *host_gscb; - struct fpu host_fpregs; struct kvm_s390_local_interrupt local_int; struct hrtimer ckc_timer; struct kvm_s390_pgm_info pgm; @@ -765,6 +764,8 @@ struct kvm_vcpu_arch { __u64 cputm_start; bool gs_enabled; bool skey_enabled; + /* Indicator if the access registers have been loaded from guest */ + bool acrs_loaded; struct kvm_s390_pv_vcpu pv; union diag318_info diag318_info; }; diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 5dc1b6345..8c5f16857 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -157,7 +157,7 @@ struct lowcore { __s32 preempt_count; /* 0x03a8 */ __u32 spinlock_lockval; /* 0x03ac */ __u32 spinlock_index; /* 0x03b0 */ - __u32 fpu_flags; /* 0x03b4 */ + __u8 pad_0x03b4[0x03b8-0x03b4]; /* 0x03b4 */ __u64 percpu_offset; /* 0x03b8 */ __u8 pad_0x03c0[0x03c8-0x03c0]; /* 0x03c0 */ __u64 machine_flags; /* 0x03c8 */ diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index 4c2dc7abc..bb1b4bef1 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -32,11 +32,6 @@ typedef struct { unsigned int uses_skeys:1; /* The mmu context uses CMM. */ unsigned int uses_cmm:1; - /* - * The mmu context allows COW-sharing of memory pages (KSM, zeropage). - * Note that COW-sharing during fork() is currently always allowed. - */ - unsigned int allow_cow_sharing:1; /* The gmaps associated with this context are allowed to use huge pages. */ unsigned int allow_gmap_hpage_1m:1; } mm_context_t; diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index a7789a9f6..929af18b0 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -35,7 +35,6 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.has_pgste = 0; mm->context.uses_skeys = 0; mm->context.uses_cmm = 0; - mm->context.allow_cow_sharing = 1; mm->context.allow_gmap_hpage_1m = 0; #endif switch (mm->context.asce_limit) { diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 73b9c3bf3..9381879f7 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -11,7 +11,7 @@ #include #include -#define _PAGE_SHIFT 12 +#define _PAGE_SHIFT CONFIG_PAGE_SHIFT #define _PAGE_SIZE (_AC(1, UL) << _PAGE_SHIFT) #define _PAGE_MASK (~(_PAGE_SIZE - 1)) @@ -181,9 +181,35 @@ int arch_make_page_accessible(struct page *page); #define __PAGE_OFFSET 0x0UL #define PAGE_OFFSET 0x0UL -#define __pa(x) ((unsigned long)(x)) +#define __pa_nodebug(x) ((unsigned long)(x)) + +#ifdef __DECOMPRESSOR + +#define __pa(x) __pa_nodebug(x) +#define __pa32(x) __pa(x) #define __va(x) ((void *)(unsigned long)(x)) +#else /* __DECOMPRESSOR */ + +#ifdef CONFIG_DEBUG_VIRTUAL + +unsigned long __phys_addr(unsigned long x, bool is_31bit); + +#else /* CONFIG_DEBUG_VIRTUAL */ + +static inline unsigned long __phys_addr(unsigned long x, bool is_31bit) +{ + return __pa_nodebug(x); +} + +#endif /* CONFIG_DEBUG_VIRTUAL */ + +#define __pa(x) __phys_addr((unsigned long)(x), false) +#define __pa32(x) __phys_addr((unsigned long)(x), true) +#define __va(x) ((void *)(unsigned long)(x)) + +#endif /* __DECOMPRESSOR */ + #define phys_to_pfn(phys) ((phys) >> PAGE_SHIFT) #define pfn_to_phys(pfn) ((pfn) << PAGE_SHIFT) @@ -205,7 +231,7 @@ static inline unsigned long virt_to_pfn(const void *kaddr) #define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) #define page_to_virt(page) pfn_to_virt(page_to_pfn(page)) -#define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr)) +#define virt_addr_valid(kaddr) pfn_valid(phys_to_pfn(__pa_nodebug(kaddr))) #define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_NON_EXEC diff --git a/arch/s390/include/asm/pai.h b/arch/s390/include/asm/pai.h index 7d1888e3d..3f6095657 100644 --- a/arch/s390/include/asm/pai.h +++ b/arch/s390/include/asm/pai.h @@ -16,7 +16,7 @@ struct qpaci_info_block { u64 header; struct { u64 : 8; - u64 num_cc : 8; /* # of supported crypto counters */ + u64 num_cc : 8; /* # of supported crypto counters */ u64 : 9; u64 num_nnpa : 7; /* # of supported NNPA counters */ u64 : 32; @@ -81,4 +81,5 @@ enum paievt_mode { PAI_MODE_COUNTING, }; +#define PAI_SAVE_AREA(x) ((x)->hw.event_base) #endif diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index e91cd6bbc..30820a649 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -122,6 +122,7 @@ struct zpci_dev { struct rcu_head rcu; struct hotplug_slot hotplug_slot; + struct mutex state_lock; /* protect state changes */ enum zpci_state state; u32 fid; /* function ID, used by sclp */ u32 fh; /* function handle, used by insn's */ @@ -142,7 +143,6 @@ struct zpci_dev { u8 reserved : 2; unsigned int devfn; /* DEVFN part of the RID*/ - struct mutex lock; u8 pfip[CLP_PFIP_NR_SEGMENTS]; /* pci function internal path */ u32 uid; /* user defined id */ u8 util_str[CLP_UTIL_STR_LEN]; /* utility string */ @@ -170,6 +170,7 @@ struct zpci_dev { u64 dma_mask; /* DMA address space mask */ /* Function measurement block */ + struct mutex fmb_lock; struct zpci_fmb *fmb; u16 fmb_update; /* update interval */ u16 fmb_length; diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 502d655fe..7b84ef6dc 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -23,9 +23,9 @@ unsigned long *crst_table_alloc(struct mm_struct *); void crst_table_free(struct mm_struct *, unsigned long *); unsigned long *page_table_alloc(struct mm_struct *); -struct page *page_table_alloc_pgste(struct mm_struct *mm); +struct ptdesc *page_table_alloc_pgste(struct mm_struct *mm); void page_table_free(struct mm_struct *, unsigned long *); -void page_table_free_pgste(struct page *page); +void page_table_free_pgste(struct ptdesc *ptdesc); extern int page_table_allocate_pgste; static inline void crst_table_init(unsigned long *crst, unsigned long entry) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 0a7055518..60950e7a2 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -566,20 +566,10 @@ static inline pud_t set_pud_bit(pud_t pud, pgprot_t prot) } /* - * As soon as the guest uses storage keys or enables PV, we deduplicate all - * mapped shared zeropages and prevent new shared zeropages from getting - * mapped. + * In the case that a guest uses storage keys + * faults should no longer be backed by zero pages */ -#define mm_forbids_zeropage mm_forbids_zeropage -static inline int mm_forbids_zeropage(struct mm_struct *mm) -{ -#ifdef CONFIG_PGSTE - if (!mm->context.allow_cow_sharing) - return 1; -#endif - return 0; -} - +#define mm_forbids_zeropage mm_has_pgste static inline int mm_uses_skeys(struct mm_struct *mm) { #ifdef CONFIG_PGSTE @@ -715,23 +705,23 @@ static inline int pud_none(pud_t pud) return pud_val(pud) == _REGION3_ENTRY_EMPTY; } -#define pud_leaf pud_large -static inline int pud_large(pud_t pud) +#define pud_leaf pud_leaf +static inline bool pud_leaf(pud_t pud) { if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) != _REGION_ENTRY_TYPE_R3) return 0; return !!(pud_val(pud) & _REGION3_ENTRY_LARGE); } -#define pmd_leaf pmd_large -static inline int pmd_large(pmd_t pmd) +#define pmd_leaf pmd_leaf +static inline bool pmd_leaf(pmd_t pmd) { return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0; } static inline int pmd_bad(pmd_t pmd) { - if ((pmd_val(pmd) & _SEGMENT_ENTRY_TYPE_MASK) > 0 || pmd_large(pmd)) + if ((pmd_val(pmd) & _SEGMENT_ENTRY_TYPE_MASK) > 0 || pmd_leaf(pmd)) return 1; return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0; } @@ -830,8 +820,8 @@ static inline int pte_protnone(pte_t pte) static inline int pmd_protnone(pmd_t pmd) { - /* pmd_large(pmd) implies pmd_present(pmd) */ - return pmd_large(pmd) && !(pmd_val(pmd) & _SEGMENT_ENTRY_READ); + /* pmd_leaf(pmd) implies pmd_present(pmd) */ + return pmd_leaf(pmd) && !(pmd_val(pmd) & _SEGMENT_ENTRY_READ); } #endif @@ -1326,6 +1316,8 @@ pgprot_t pgprot_writecombine(pgprot_t prot); #define pgprot_writethrough pgprot_writethrough pgprot_t pgprot_writethrough(pgprot_t prot); +#define PFN_PTE_SHIFT PAGE_SHIFT + /* * Set multiple PTEs to consecutive pages with a single call. All PTEs * are within the same folio, PMD and VMA. @@ -1393,7 +1385,7 @@ static inline unsigned long pmd_deref(pmd_t pmd) unsigned long origin_mask; origin_mask = _SEGMENT_ENTRY_ORIGIN; - if (pmd_large(pmd)) + if (pmd_leaf(pmd)) origin_mask = _SEGMENT_ENTRY_ORIGIN_LARGE; return (unsigned long)__va(pmd_val(pmd) & origin_mask); } diff --git a/arch/s390/include/asm/physmem_info.h b/arch/s390/include/asm/physmem_info.h index 9e41a74fc..e747b067f 100644 --- a/arch/s390/include/asm/physmem_info.h +++ b/arch/s390/include/asm/physmem_info.h @@ -22,6 +22,7 @@ enum reserved_range_type { RR_DECOMPRESSOR, RR_INITRD, RR_VMLINUX, + RR_RELOC, RR_AMODE31, RR_IPLREPORT, RR_CERT_COMP_LIST, diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h index bf15da0fe..0e3da500e 100644 --- a/arch/s390/include/asm/preempt.h +++ b/arch/s390/include/asm/preempt.h @@ -12,12 +12,12 @@ #define PREEMPT_NEED_RESCHED 0x80000000 #define PREEMPT_ENABLED (0 + PREEMPT_NEED_RESCHED) -static inline int preempt_count(void) +static __always_inline int preempt_count(void) { return READ_ONCE(S390_lowcore.preempt_count) & ~PREEMPT_NEED_RESCHED; } -static inline void preempt_count_set(int pc) +static __always_inline void preempt_count_set(int pc) { int old, new; @@ -29,22 +29,22 @@ static inline void preempt_count_set(int pc) old, new) != old); } -static inline void set_preempt_need_resched(void) +static __always_inline void set_preempt_need_resched(void) { __atomic_and(~PREEMPT_NEED_RESCHED, &S390_lowcore.preempt_count); } -static inline void clear_preempt_need_resched(void) +static __always_inline void clear_preempt_need_resched(void) { __atomic_or(PREEMPT_NEED_RESCHED, &S390_lowcore.preempt_count); } -static inline bool test_preempt_need_resched(void) +static __always_inline bool test_preempt_need_resched(void) { return !(READ_ONCE(S390_lowcore.preempt_count) & PREEMPT_NEED_RESCHED); } -static inline void __preempt_count_add(int val) +static __always_inline void __preempt_count_add(int val) { /* * With some obscure config options and CONFIG_PROFILE_ALL_BRANCHES @@ -59,17 +59,17 @@ static inline void __preempt_count_add(int val) __atomic_add(val, &S390_lowcore.preempt_count); } -static inline void __preempt_count_sub(int val) +static __always_inline void __preempt_count_sub(int val) { __preempt_count_add(-val); } -static inline bool __preempt_count_dec_and_test(void) +static __always_inline bool __preempt_count_dec_and_test(void) { return __atomic_add(-1, &S390_lowcore.preempt_count) == 1; } -static inline bool should_resched(int preempt_offset) +static __always_inline bool should_resched(int preempt_offset) { return unlikely(READ_ONCE(S390_lowcore.preempt_count) == preempt_offset); @@ -79,45 +79,45 @@ static inline bool should_resched(int preempt_offset) #define PREEMPT_ENABLED (0) -static inline int preempt_count(void) +static __always_inline int preempt_count(void) { return READ_ONCE(S390_lowcore.preempt_count); } -static inline void preempt_count_set(int pc) +static __always_inline void preempt_count_set(int pc) { S390_lowcore.preempt_count = pc; } -static inline void set_preempt_need_resched(void) +static __always_inline void set_preempt_need_resched(void) { } -static inline void clear_preempt_need_resched(void) +static __always_inline void clear_preempt_need_resched(void) { } -static inline bool test_preempt_need_resched(void) +static __always_inline bool test_preempt_need_resched(void) { return false; } -static inline void __preempt_count_add(int val) +static __always_inline void __preempt_count_add(int val) { S390_lowcore.preempt_count += val; } -static inline void __preempt_count_sub(int val) +static __always_inline void __preempt_count_sub(int val) { S390_lowcore.preempt_count -= val; } -static inline bool __preempt_count_dec_and_test(void) +static __always_inline bool __preempt_count_dec_and_test(void) { return !--S390_lowcore.preempt_count && tif_need_resched(); } -static inline bool should_resched(int preempt_offset) +static __always_inline bool should_resched(int preempt_offset) { return unlikely(preempt_count() == preempt_offset && tif_need_resched()); diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index c0b6e74d8..db9982f0e 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -14,14 +14,14 @@ #include +#define CIF_SIE 0 /* CPU needs SIE exit cleanup */ #define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */ -#define CIF_FPU 3 /* restore FPU registers */ #define CIF_ENABLED_WAIT 5 /* in enabled wait state */ #define CIF_MCCK_GUEST 6 /* machine check happening in guest */ #define CIF_DEDICATED_CPU 7 /* this CPU is dedicated */ +#define _CIF_SIE BIT(CIF_SIE) #define _CIF_NOHZ_DELAY BIT(CIF_NOHZ_DELAY) -#define _CIF_FPU BIT(CIF_FPU) #define _CIF_ENABLED_WAIT BIT(CIF_ENABLED_WAIT) #define _CIF_MCCK_GUEST BIT(CIF_MCCK_GUEST) #define _CIF_DEDICATED_CPU BIT(CIF_DEDICATED_CPU) @@ -33,13 +33,12 @@ #include #include #include +#include #include #include #include #include #include -#include -#include #include typedef long (*sys_call_ptr_t)(struct pt_regs *regs); @@ -169,6 +168,8 @@ struct thread_struct { unsigned int gmap_write_flag; /* gmap fault write indication */ unsigned int gmap_int_code; /* int code of last gmap fault */ unsigned int gmap_pfault; /* signal of a pending guest pfault */ + int ufpu_flags; /* user fpu flags */ + int kfpu_flags; /* kernel fpu flags */ /* Per-thread information related to debugging */ struct per_regs per_user; /* User specified PER registers */ @@ -184,7 +185,8 @@ struct thread_struct { struct gs_cb *gs_cb; /* Current guarded storage cb */ struct gs_cb *gs_bc_cb; /* Broadcast guarded storage cb */ struct pgm_tdb trap_tdb; /* Transaction abort diagnose block */ - struct fpu fpu; /* FP and VX register save area */ + struct fpu ufpu; /* User FP and VX register save area */ + struct fpu kfpu; /* Kernel FP and VX register save area */ }; /* Flag to disable transactions. */ @@ -203,7 +205,6 @@ typedef struct thread_struct thread_struct; #define INIT_THREAD { \ .ksp = sizeof(init_stack) + (unsigned long) &init_stack, \ - .fpu.regs = (void *) init_task.thread.fpu.fprs, \ .last_break = 1, \ } diff --git a/arch/s390/include/asm/ptdump.h b/arch/s390/include/asm/ptdump.h deleted file mode 100644 index f960b2896..000000000 --- a/arch/s390/include/asm/ptdump.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#ifndef _ASM_S390_PTDUMP_H -#define _ASM_S390_PTDUMP_H - -void ptdump_check_wx(void); - -static inline void debug_checkwx(void) -{ - if (IS_ENABLED(CONFIG_DEBUG_WX)) - ptdump_check_wx(); -} - -#endif /* _ASM_S390_PTDUMP_H */ diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index d28bf8fb2..2ad9324f6 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -14,13 +14,11 @@ #define PIF_SYSCALL 0 /* inside a system call */ #define PIF_EXECVE_PGSTE_RESTART 1 /* restart execve for PGSTE binaries */ #define PIF_SYSCALL_RET_SET 2 /* return value was set via ptrace */ -#define PIF_GUEST_FAULT 3 /* indicates program check in sie64a */ #define PIF_FTRACE_FULL_REGS 4 /* all register contents valid (ftrace) */ #define _PIF_SYSCALL BIT(PIF_SYSCALL) #define _PIF_EXECVE_PGSTE_RESTART BIT(PIF_EXECVE_PGSTE_RESTART) #define _PIF_SYSCALL_RET_SET BIT(PIF_SYSCALL_RET_SET) -#define _PIF_GUEST_FAULT BIT(PIF_GUEST_FAULT) #define _PIF_FTRACE_FULL_REGS BIT(PIF_FTRACE_FULL_REGS) #define PSW32_MASK_PER _AC(0x40000000, UL) @@ -203,6 +201,10 @@ static inline int test_and_clear_pt_regs_flag(struct pt_regs *regs, int flag) return ret; } +struct task_struct; + +void update_cr_regs(struct task_struct *task); + /* * These are defined as per linux/ptrace.h, which see. */ diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 2f983e0b9..69c4ead0c 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -9,8 +9,9 @@ #define __QDIO_H__ #include -#include +#include #include +#include /* only use 4 queues to save some cachelines */ #define QDIO_MAX_QUEUES_PER_IRQ 4 @@ -34,9 +35,9 @@ * @dkey: access key for SLSB */ struct qdesfmt0 { - u64 sliba; - u64 sla; - u64 slsba; + dma64_t sliba; + dma64_t sla; + dma64_t slsba; u32 : 32; u32 akey : 4; u32 bkey : 4; @@ -74,7 +75,7 @@ struct qdr { /* private: */ u32 res[9]; /* public: */ - u64 qiba; + dma64_t qiba; u32 : 32; u32 qkey : 4; u32 : 28; @@ -146,7 +147,7 @@ struct qaob { u8 flags; u16 cbtbs; u8 sb_count; - u64 sba[QDIO_MAX_ELEMENTS_PER_BUFFER]; + dma64_t sba[QDIO_MAX_ELEMENTS_PER_BUFFER]; u16 dcount[QDIO_MAX_ELEMENTS_PER_BUFFER]; u64 user0; u64 res4[2]; @@ -208,7 +209,7 @@ struct qdio_buffer_element { u8 scount; u8 sflags; u32 length; - u64 addr; + dma64_t addr; } __attribute__ ((packed, aligned(16))); /** @@ -224,7 +225,7 @@ struct qdio_buffer { * @sbal: absolute SBAL address */ struct sl_element { - u64 sbal; + dma64_t sbal; } __attribute__ ((packed)); /** diff --git a/arch/s390/include/asm/scsw.h b/arch/s390/include/asm/scsw.h index 322bdcd4b..56003e26c 100644 --- a/arch/s390/include/asm/scsw.h +++ b/arch/s390/include/asm/scsw.h @@ -11,6 +11,7 @@ #include #include +#include #include /** @@ -53,7 +54,7 @@ struct cmd_scsw { __u32 fctl : 3; __u32 actl : 7; __u32 stctl : 5; - __u32 cpa; + dma32_t cpa; __u32 dstat : 8; __u32 cstat : 8; __u32 count : 16; @@ -93,7 +94,7 @@ struct tm_scsw { u32 fctl:3; u32 actl:7; u32 stctl:5; - u32 tcw; + dma32_t tcw; u32 dstat:8; u32 cstat:8; u32 fcxs:8; @@ -125,7 +126,7 @@ struct eadm_scsw { u32 fctl:3; u32 actl:7; u32 stctl:5; - u32 aob; + dma32_t aob; u32 dstat:8; u32 cstat:8; u32:16; diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h index 31ec4f545..433fde85b 100644 --- a/arch/s390/include/asm/stacktrace.h +++ b/arch/s390/include/asm/stacktrace.h @@ -4,7 +4,6 @@ #include #include -#include struct stack_frame_user { unsigned long back_chain; diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h deleted file mode 100644 index c61b2cc1a..000000000 --- a/arch/s390/include/asm/switch_to.h +++ /dev/null @@ -1,49 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright IBM Corp. 1999, 2009 - * - * Author(s): Martin Schwidefsky - */ - -#ifndef __ASM_SWITCH_TO_H -#define __ASM_SWITCH_TO_H - -#include -#include -#include -#include - -extern struct task_struct *__switch_to(void *, void *); -extern void update_cr_regs(struct task_struct *task); - -static inline void save_access_regs(unsigned int *acrs) -{ - typedef struct { int _[NUM_ACRS]; } acrstype; - - asm volatile("stam 0,15,%0" : "=Q" (*(acrstype *)acrs)); -} - -static inline void restore_access_regs(unsigned int *acrs) -{ - typedef struct { int _[NUM_ACRS]; } acrstype; - - asm volatile("lam 0,15,%0" : : "Q" (*(acrstype *)acrs)); -} - -#define switch_to(prev, next, last) do { \ - /* save_fpu_regs() sets the CIF_FPU flag, which enforces \ - * a restore of the floating point / vector registers as \ - * soon as the next task returns to user space \ - */ \ - save_fpu_regs(); \ - save_access_regs(&prev->thread.acrs[0]); \ - save_ri_cb(prev->thread.ri_cb); \ - save_gs_cb(prev->thread.gs_cb); \ - update_cr_regs(next); \ - restore_access_regs(&next->thread.acrs[0]); \ - restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \ - restore_gs_cb(next->thread.gs_cb); \ - prev = __switch_to(prev, next); \ -} while (0) - -#endif /* __ASM_SWITCH_TO_H */ diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index d1455a601..e95b2c808 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -25,8 +25,9 @@ void __tlb_remove_table(void *_table); static inline void tlb_flush(struct mmu_gather *tlb); static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, - struct encoded_page *page, - int page_size); + struct page *page, bool delay_rmap, int page_size); +static inline bool __tlb_remove_folio_pages(struct mmu_gather *tlb, + struct page *page, unsigned int nr_pages, bool delay_rmap); #define tlb_flush tlb_flush #define pte_free_tlb pte_free_tlb @@ -42,14 +43,29 @@ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, * tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page * has already been freed, so just do free_page_and_swap_cache. * - * s390 doesn't delay rmap removal, so there is nothing encoded in - * the page pointer. + * s390 doesn't delay rmap removal. */ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, - struct encoded_page *page, - int page_size) + struct page *page, bool delay_rmap, int page_size) { - free_page_and_swap_cache(encoded_page_ptr(page)); + VM_WARN_ON_ONCE(delay_rmap); + + free_page_and_swap_cache(page); + return false; +} + +static inline bool __tlb_remove_folio_pages(struct mmu_gather *tlb, + struct page *page, unsigned int nr_pages, bool delay_rmap) +{ + struct encoded_page *encoded_pages[] = { + encode_page(page, ENCODED_PAGE_BIT_NR_PAGES_NEXT), + encode_nr_pages(nr_pages), + }; + + VM_WARN_ON_ONCE(delay_rmap); + VM_WARN_ON_ONCE(page_folio(page) != page_folio(page + nr_pages - 1)); + + free_pages_and_swap_cache(encoded_pages, ARRAY_SIZE(encoded_pages)); return false; } diff --git a/arch/s390/include/asm/vdso/data.h b/arch/s390/include/asm/vdso/data.h index 73ee89142..0e2b40ef6 100644 --- a/arch/s390/include/asm/vdso/data.h +++ b/arch/s390/include/asm/vdso/data.h @@ -3,7 +3,6 @@ #define __S390_ASM_VDSO_DATA_H #include -#include struct arch_vdso_data { __s64 tod_steering_delta; diff --git a/arch/s390/include/asm/vx-insn-asm.h b/arch/s390/include/asm/vx-insn-asm.h deleted file mode 100644 index 360f8b36d..000000000 --- a/arch/s390/include/asm/vx-insn-asm.h +++ /dev/null @@ -1,681 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Support for Vector Instructions - * - * Assembler macros to generate .byte/.word code for particular - * vector instructions that are supported by recent binutils (>= 2.26) only. - * - * Copyright IBM Corp. 2015 - * Author(s): Hendrik Brueckner - */ - -#ifndef __ASM_S390_VX_INSN_INTERNAL_H -#define __ASM_S390_VX_INSN_INTERNAL_H - -#ifndef __ASM_S390_VX_INSN_H -#error only can be included directly -#endif - -#ifdef __ASSEMBLY__ - -/* Macros to generate vector instruction byte code */ - -/* GR_NUM - Retrieve general-purpose register number - * - * @opd: Operand to store register number - * @r64: String designation register in the format "%rN" - */ -.macro GR_NUM opd gr - \opd = 255 - .ifc \gr,%r0 - \opd = 0 - .endif - .ifc \gr,%r1 - \opd = 1 - .endif - .ifc \gr,%r2 - \opd = 2 - .endif - .ifc \gr,%r3 - \opd = 3 - .endif - .ifc \gr,%r4 - \opd = 4 - .endif - .ifc \gr,%r5 - \opd = 5 - .endif - .ifc \gr,%r6 - \opd = 6 - .endif - .ifc \gr,%r7 - \opd = 7 - .endif - .ifc \gr,%r8 - \opd = 8 - .endif - .ifc \gr,%r9 - \opd = 9 - .endif - .ifc \gr,%r10 - \opd = 10 - .endif - .ifc \gr,%r11 - \opd = 11 - .endif - .ifc \gr,%r12 - \opd = 12 - .endif - .ifc \gr,%r13 - \opd = 13 - .endif - .ifc \gr,%r14 - \opd = 14 - .endif - .ifc \gr,%r15 - \opd = 15 - .endif - .if \opd == 255 - \opd = \gr - .endif -.endm - -/* VX_NUM - Retrieve vector register number - * - * @opd: Operand to store register number - * @vxr: String designation register in the format "%vN" - * - * The vector register number is used for as input number to the - * instruction and, as well as, to compute the RXB field of the - * instruction. - */ -.macro VX_NUM opd vxr - \opd = 255 - .ifc \vxr,%v0 - \opd = 0 - .endif - .ifc \vxr,%v1 - \opd = 1 - .endif - .ifc \vxr,%v2 - \opd = 2 - .endif - .ifc \vxr,%v3 - \opd = 3 - .endif - .ifc \vxr,%v4 - \opd = 4 - .endif - .ifc \vxr,%v5 - \opd = 5 - .endif - .ifc \vxr,%v6 - \opd = 6 - .endif - .ifc \vxr,%v7 - \opd = 7 - .endif - .ifc \vxr,%v8 - \opd = 8 - .endif - .ifc \vxr,%v9 - \opd = 9 - .endif - .ifc \vxr,%v10 - \opd = 10 - .endif - .ifc \vxr,%v11 - \opd = 11 - .endif - .ifc \vxr,%v12 - \opd = 12 - .endif - .ifc \vxr,%v13 - \opd = 13 - .endif - .ifc \vxr,%v14 - \opd = 14 - .endif - .ifc \vxr,%v15 - \opd = 15 - .endif - .ifc \vxr,%v16 - \opd = 16 - .endif - .ifc \vxr,%v17 - \opd = 17 - .endif - .ifc \vxr,%v18 - \opd = 18 - .endif - .ifc \vxr,%v19 - \opd = 19 - .endif - .ifc \vxr,%v20 - \opd = 20 - .endif - .ifc \vxr,%v21 - \opd = 21 - .endif - .ifc \vxr,%v22 - \opd = 22 - .endif - .ifc \vxr,%v23 - \opd = 23 - .endif - .ifc \vxr,%v24 - \opd = 24 - .endif - .ifc \vxr,%v25 - \opd = 25 - .endif - .ifc \vxr,%v26 - \opd = 26 - .endif - .ifc \vxr,%v27 - \opd = 27 - .endif - .ifc \vxr,%v28 - \opd = 28 - .endif - .ifc \vxr,%v29 - \opd = 29 - .endif - .ifc \vxr,%v30 - \opd = 30 - .endif - .ifc \vxr,%v31 - \opd = 31 - .endif - .if \opd == 255 - \opd = \vxr - .endif -.endm - -/* RXB - Compute most significant bit used vector registers - * - * @rxb: Operand to store computed RXB value - * @v1: First vector register designated operand - * @v2: Second vector register designated operand - * @v3: Third vector register designated operand - * @v4: Fourth vector register designated operand - */ -.macro RXB rxb v1 v2=0 v3=0 v4=0 - \rxb = 0 - .if \v1 & 0x10 - \rxb = \rxb | 0x08 - .endif - .if \v2 & 0x10 - \rxb = \rxb | 0x04 - .endif - .if \v3 & 0x10 - \rxb = \rxb | 0x02 - .endif - .if \v4 & 0x10 - \rxb = \rxb | 0x01 - .endif -.endm - -/* MRXB - Generate Element Size Control and RXB value - * - * @m: Element size control - * @v1: First vector register designated operand (for RXB) - * @v2: Second vector register designated operand (for RXB) - * @v3: Third vector register designated operand (for RXB) - * @v4: Fourth vector register designated operand (for RXB) - */ -.macro MRXB m v1 v2=0 v3=0 v4=0 - rxb = 0 - RXB rxb, \v1, \v2, \v3, \v4 - .byte (\m << 4) | rxb -.endm - -/* MRXBOPC - Generate Element Size Control, RXB, and final Opcode fields - * - * @m: Element size control - * @opc: Opcode - * @v1: First vector register designated operand (for RXB) - * @v2: Second vector register designated operand (for RXB) - * @v3: Third vector register designated operand (for RXB) - * @v4: Fourth vector register designated operand (for RXB) - */ -.macro MRXBOPC m opc v1 v2=0 v3=0 v4=0 - MRXB \m, \v1, \v2, \v3, \v4 - .byte \opc -.endm - -/* Vector support instructions */ - -/* VECTOR GENERATE BYTE MASK */ -.macro VGBM vr imm2 - VX_NUM v1, \vr - .word (0xE700 | ((v1&15) << 4)) - .word \imm2 - MRXBOPC 0, 0x44, v1 -.endm -.macro VZERO vxr - VGBM \vxr, 0 -.endm -.macro VONE vxr - VGBM \vxr, 0xFFFF -.endm - -/* VECTOR LOAD VR ELEMENT FROM GR */ -.macro VLVG v, gr, disp, m - VX_NUM v1, \v - GR_NUM b2, "%r0" - GR_NUM r3, \gr - .word 0xE700 | ((v1&15) << 4) | r3 - .word (b2 << 12) | (\disp) - MRXBOPC \m, 0x22, v1 -.endm -.macro VLVGB v, gr, index, base - VLVG \v, \gr, \index, \base, 0 -.endm -.macro VLVGH v, gr, index - VLVG \v, \gr, \index, 1 -.endm -.macro VLVGF v, gr, index - VLVG \v, \gr, \index, 2 -.endm -.macro VLVGG v, gr, index - VLVG \v, \gr, \index, 3 -.endm - -/* VECTOR LOAD REGISTER */ -.macro VLR v1, v2 - VX_NUM v1, \v1 - VX_NUM v2, \v2 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word 0 - MRXBOPC 0, 0x56, v1, v2 -.endm - -/* VECTOR LOAD */ -.macro VL v, disp, index="%r0", base - VX_NUM v1, \v - GR_NUM x2, \index - GR_NUM b2, \base - .word 0xE700 | ((v1&15) << 4) | x2 - .word (b2 << 12) | (\disp) - MRXBOPC 0, 0x06, v1 -.endm - -/* VECTOR LOAD ELEMENT */ -.macro VLEx vr1, disp, index="%r0", base, m3, opc - VX_NUM v1, \vr1 - GR_NUM x2, \index - GR_NUM b2, \base - .word 0xE700 | ((v1&15) << 4) | x2 - .word (b2 << 12) | (\disp) - MRXBOPC \m3, \opc, v1 -.endm -.macro VLEB vr1, disp, index="%r0", base, m3 - VLEx \vr1, \disp, \index, \base, \m3, 0x00 -.endm -.macro VLEH vr1, disp, index="%r0", base, m3 - VLEx \vr1, \disp, \index, \base, \m3, 0x01 -.endm -.macro VLEF vr1, disp, index="%r0", base, m3 - VLEx \vr1, \disp, \index, \base, \m3, 0x03 -.endm -.macro VLEG vr1, disp, index="%r0", base, m3 - VLEx \vr1, \disp, \index, \base, \m3, 0x02 -.endm - -/* VECTOR LOAD ELEMENT IMMEDIATE */ -.macro VLEIx vr1, imm2, m3, opc - VX_NUM v1, \vr1 - .word 0xE700 | ((v1&15) << 4) - .word \imm2 - MRXBOPC \m3, \opc, v1 -.endm -.macro VLEIB vr1, imm2, index - VLEIx \vr1, \imm2, \index, 0x40 -.endm -.macro VLEIH vr1, imm2, index - VLEIx \vr1, \imm2, \index, 0x41 -.endm -.macro VLEIF vr1, imm2, index - VLEIx \vr1, \imm2, \index, 0x43 -.endm -.macro VLEIG vr1, imm2, index - VLEIx \vr1, \imm2, \index, 0x42 -.endm - -/* VECTOR LOAD GR FROM VR ELEMENT */ -.macro VLGV gr, vr, disp, base="%r0", m - GR_NUM r1, \gr - GR_NUM b2, \base - VX_NUM v3, \vr - .word 0xE700 | (r1 << 4) | (v3&15) - .word (b2 << 12) | (\disp) - MRXBOPC \m, 0x21, v3 -.endm -.macro VLGVB gr, vr, disp, base="%r0" - VLGV \gr, \vr, \disp, \base, 0 -.endm -.macro VLGVH gr, vr, disp, base="%r0" - VLGV \gr, \vr, \disp, \base, 1 -.endm -.macro VLGVF gr, vr, disp, base="%r0" - VLGV \gr, \vr, \disp, \base, 2 -.endm -.macro VLGVG gr, vr, disp, base="%r0" - VLGV \gr, \vr, \disp, \base, 3 -.endm - -/* VECTOR LOAD MULTIPLE */ -.macro VLM vfrom, vto, disp, base, hint=3 - VX_NUM v1, \vfrom - VX_NUM v3, \vto - GR_NUM b2, \base - .word 0xE700 | ((v1&15) << 4) | (v3&15) - .word (b2 << 12) | (\disp) - MRXBOPC \hint, 0x36, v1, v3 -.endm - -/* VECTOR STORE */ -.macro VST vr1, disp, index="%r0", base - VX_NUM v1, \vr1 - GR_NUM x2, \index - GR_NUM b2, \base - .word 0xE700 | ((v1&15) << 4) | (x2&15) - .word (b2 << 12) | (\disp) - MRXBOPC 0, 0x0E, v1 -.endm - -/* VECTOR STORE MULTIPLE */ -.macro VSTM vfrom, vto, disp, base, hint=3 - VX_NUM v1, \vfrom - VX_NUM v3, \vto - GR_NUM b2, \base - .word 0xE700 | ((v1&15) << 4) | (v3&15) - .word (b2 << 12) | (\disp) - MRXBOPC \hint, 0x3E, v1, v3 -.endm - -/* VECTOR PERMUTE */ -.macro VPERM vr1, vr2, vr3, vr4 - VX_NUM v1, \vr1 - VX_NUM v2, \vr2 - VX_NUM v3, \vr3 - VX_NUM v4, \vr4 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word ((v3&15) << 12) - MRXBOPC (v4&15), 0x8C, v1, v2, v3, v4 -.endm - -/* VECTOR UNPACK LOGICAL LOW */ -.macro VUPLL vr1, vr2, m3 - VX_NUM v1, \vr1 - VX_NUM v2, \vr2 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word 0x0000 - MRXBOPC \m3, 0xD4, v1, v2 -.endm -.macro VUPLLB vr1, vr2 - VUPLL \vr1, \vr2, 0 -.endm -.macro VUPLLH vr1, vr2 - VUPLL \vr1, \vr2, 1 -.endm -.macro VUPLLF vr1, vr2 - VUPLL \vr1, \vr2, 2 -.endm - -/* VECTOR PERMUTE DOUBLEWORD IMMEDIATE */ -.macro VPDI vr1, vr2, vr3, m4 - VX_NUM v1, \vr1 - VX_NUM v2, \vr2 - VX_NUM v3, \vr3 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word ((v3&15) << 12) - MRXBOPC \m4, 0x84, v1, v2, v3 -.endm - -/* VECTOR REPLICATE */ -.macro VREP vr1, vr3, imm2, m4 - VX_NUM v1, \vr1 - VX_NUM v3, \vr3 - .word 0xE700 | ((v1&15) << 4) | (v3&15) - .word \imm2 - MRXBOPC \m4, 0x4D, v1, v3 -.endm -.macro VREPB vr1, vr3, imm2 - VREP \vr1, \vr3, \imm2, 0 -.endm -.macro VREPH vr1, vr3, imm2 - VREP \vr1, \vr3, \imm2, 1 -.endm -.macro VREPF vr1, vr3, imm2 - VREP \vr1, \vr3, \imm2, 2 -.endm -.macro VREPG vr1, vr3, imm2 - VREP \vr1, \vr3, \imm2, 3 -.endm - -/* VECTOR MERGE HIGH */ -.macro VMRH vr1, vr2, vr3, m4 - VX_NUM v1, \vr1 - VX_NUM v2, \vr2 - VX_NUM v3, \vr3 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word ((v3&15) << 12) - MRXBOPC \m4, 0x61, v1, v2, v3 -.endm -.macro VMRHB vr1, vr2, vr3 - VMRH \vr1, \vr2, \vr3, 0 -.endm -.macro VMRHH vr1, vr2, vr3 - VMRH \vr1, \vr2, \vr3, 1 -.endm -.macro VMRHF vr1, vr2, vr3 - VMRH \vr1, \vr2, \vr3, 2 -.endm -.macro VMRHG vr1, vr2, vr3 - VMRH \vr1, \vr2, \vr3, 3 -.endm - -/* VECTOR MERGE LOW */ -.macro VMRL vr1, vr2, vr3, m4 - VX_NUM v1, \vr1 - VX_NUM v2, \vr2 - VX_NUM v3, \vr3 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word ((v3&15) << 12) - MRXBOPC \m4, 0x60, v1, v2, v3 -.endm -.macro VMRLB vr1, vr2, vr3 - VMRL \vr1, \vr2, \vr3, 0 -.endm -.macro VMRLH vr1, vr2, vr3 - VMRL \vr1, \vr2, \vr3, 1 -.endm -.macro VMRLF vr1, vr2, vr3 - VMRL \vr1, \vr2, \vr3, 2 -.endm -.macro VMRLG vr1, vr2, vr3 - VMRL \vr1, \vr2, \vr3, 3 -.endm - - -/* Vector integer instructions */ - -/* VECTOR AND */ -.macro VN vr1, vr2, vr3 - VX_NUM v1, \vr1 - VX_NUM v2, \vr2 - VX_NUM v3, \vr3 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word ((v3&15) << 12) - MRXBOPC 0, 0x68, v1, v2, v3 -.endm - -/* VECTOR EXCLUSIVE OR */ -.macro VX vr1, vr2, vr3 - VX_NUM v1, \vr1 - VX_NUM v2, \vr2 - VX_NUM v3, \vr3 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word ((v3&15) << 12) - MRXBOPC 0, 0x6D, v1, v2, v3 -.endm - -/* VECTOR GALOIS FIELD MULTIPLY SUM */ -.macro VGFM vr1, vr2, vr3, m4 - VX_NUM v1, \vr1 - VX_NUM v2, \vr2 - VX_NUM v3, \vr3 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word ((v3&15) << 12) - MRXBOPC \m4, 0xB4, v1, v2, v3 -.endm -.macro VGFMB vr1, vr2, vr3 - VGFM \vr1, \vr2, \vr3, 0 -.endm -.macro VGFMH vr1, vr2, vr3 - VGFM \vr1, \vr2, \vr3, 1 -.endm -.macro VGFMF vr1, vr2, vr3 - VGFM \vr1, \vr2, \vr3, 2 -.endm -.macro VGFMG vr1, vr2, vr3 - VGFM \vr1, \vr2, \vr3, 3 -.endm - -/* VECTOR GALOIS FIELD MULTIPLY SUM AND ACCUMULATE */ -.macro VGFMA vr1, vr2, vr3, vr4, m5 - VX_NUM v1, \vr1 - VX_NUM v2, \vr2 - VX_NUM v3, \vr3 - VX_NUM v4, \vr4 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word ((v3&15) << 12) | (\m5 << 8) - MRXBOPC (v4&15), 0xBC, v1, v2, v3, v4 -.endm -.macro VGFMAB vr1, vr2, vr3, vr4 - VGFMA \vr1, \vr2, \vr3, \vr4, 0 -.endm -.macro VGFMAH vr1, vr2, vr3, vr4 - VGFMA \vr1, \vr2, \vr3, \vr4, 1 -.endm -.macro VGFMAF vr1, vr2, vr3, vr4 - VGFMA \vr1, \vr2, \vr3, \vr4, 2 -.endm -.macro VGFMAG vr1, vr2, vr3, vr4 - VGFMA \vr1, \vr2, \vr3, \vr4, 3 -.endm - -/* VECTOR SHIFT RIGHT LOGICAL BY BYTE */ -.macro VSRLB vr1, vr2, vr3 - VX_NUM v1, \vr1 - VX_NUM v2, \vr2 - VX_NUM v3, \vr3 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word ((v3&15) << 12) - MRXBOPC 0, 0x7D, v1, v2, v3 -.endm - -/* VECTOR REPLICATE IMMEDIATE */ -.macro VREPI vr1, imm2, m3 - VX_NUM v1, \vr1 - .word 0xE700 | ((v1&15) << 4) - .word \imm2 - MRXBOPC \m3, 0x45, v1 -.endm -.macro VREPIB vr1, imm2 - VREPI \vr1, \imm2, 0 -.endm -.macro VREPIH vr1, imm2 - VREPI \vr1, \imm2, 1 -.endm -.macro VREPIF vr1, imm2 - VREPI \vr1, \imm2, 2 -.endm -.macro VREPIG vr1, imm2 - VREP \vr1, \imm2, 3 -.endm - -/* VECTOR ADD */ -.macro VA vr1, vr2, vr3, m4 - VX_NUM v1, \vr1 - VX_NUM v2, \vr2 - VX_NUM v3, \vr3 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word ((v3&15) << 12) - MRXBOPC \m4, 0xF3, v1, v2, v3 -.endm -.macro VAB vr1, vr2, vr3 - VA \vr1, \vr2, \vr3, 0 -.endm -.macro VAH vr1, vr2, vr3 - VA \vr1, \vr2, \vr3, 1 -.endm -.macro VAF vr1, vr2, vr3 - VA \vr1, \vr2, \vr3, 2 -.endm -.macro VAG vr1, vr2, vr3 - VA \vr1, \vr2, \vr3, 3 -.endm -.macro VAQ vr1, vr2, vr3 - VA \vr1, \vr2, \vr3, 4 -.endm - -/* VECTOR ELEMENT SHIFT RIGHT ARITHMETIC */ -.macro VESRAV vr1, vr2, vr3, m4 - VX_NUM v1, \vr1 - VX_NUM v2, \vr2 - VX_NUM v3, \vr3 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word ((v3&15) << 12) - MRXBOPC \m4, 0x7A, v1, v2, v3 -.endm - -.macro VESRAVB vr1, vr2, vr3 - VESRAV \vr1, \vr2, \vr3, 0 -.endm -.macro VESRAVH vr1, vr2, vr3 - VESRAV \vr1, \vr2, \vr3, 1 -.endm -.macro VESRAVF vr1, vr2, vr3 - VESRAV \vr1, \vr2, \vr3, 2 -.endm -.macro VESRAVG vr1, vr2, vr3 - VESRAV \vr1, \vr2, \vr3, 3 -.endm - -/* VECTOR ELEMENT ROTATE LEFT LOGICAL */ -.macro VERLL vr1, vr3, disp, base="%r0", m4 - VX_NUM v1, \vr1 - VX_NUM v3, \vr3 - GR_NUM b2, \base - .word 0xE700 | ((v1&15) << 4) | (v3&15) - .word (b2 << 12) | (\disp) - MRXBOPC \m4, 0x33, v1, v3 -.endm -.macro VERLLB vr1, vr3, disp, base="%r0" - VERLL \vr1, \vr3, \disp, \base, 0 -.endm -.macro VERLLH vr1, vr3, disp, base="%r0" - VERLL \vr1, \vr3, \disp, \base, 1 -.endm -.macro VERLLF vr1, vr3, disp, base="%r0" - VERLL \vr1, \vr3, \disp, \base, 2 -.endm -.macro VERLLG vr1, vr3, disp, base="%r0" - VERLL \vr1, \vr3, \disp, \base, 3 -.endm - -/* VECTOR SHIFT LEFT DOUBLE BY BYTE */ -.macro VSLDB vr1, vr2, vr3, imm4 - VX_NUM v1, \vr1 - VX_NUM v2, \vr2 - VX_NUM v3, \vr3 - .word 0xE700 | ((v1&15) << 4) | (v2&15) - .word ((v3&15) << 12) | (\imm4) - MRXBOPC 0, 0x77, v1, v2, v3 -.endm - -#endif /* __ASSEMBLY__ */ -#endif /* __ASM_S390_VX_INSN_INTERNAL_H */ diff --git a/arch/s390/include/asm/vx-insn.h b/arch/s390/include/asm/vx-insn.h deleted file mode 100644 index 8c188f1c6..000000000 --- a/arch/s390/include/asm/vx-insn.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Support for Vector Instructions - * - * This wrapper header file allows to use the vector instruction macros in - * both assembler files as well as in inline assemblies in C files. - */ - -#ifndef __ASM_S390_VX_INSN_H -#define __ASM_S390_VX_INSN_H - -#include - -#ifndef __ASSEMBLY__ - -asm(".include \"asm/vx-insn-asm.h\"\n"); - -#endif /* __ASSEMBLY__ */ -#endif /* __ASM_S390_VX_INSN_H */ diff --git a/arch/s390/include/asm/word-at-a-time.h b/arch/s390/include/asm/word-at-a-time.h index 2579f1694..203acd6e4 100644 --- a/arch/s390/include/asm/word-at-a-time.h +++ b/arch/s390/include/asm/word-at-a-time.h @@ -2,7 +2,8 @@ #ifndef _ASM_WORD_AT_A_TIME_H #define _ASM_WORD_AT_A_TIME_H -#include +#include +#include #include #include -- cgit v1.2.3