diff options
Diffstat (limited to 'arch/s390/boot')
30 files changed, 3413 insertions, 0 deletions
diff --git a/arch/s390/boot/.gitignore b/arch/s390/boot/.gitignore new file mode 100644 index 0000000000..f56591bc08 --- /dev/null +++ b/arch/s390/boot/.gitignore @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only +image +bzImage +section_cmp.* +vmlinux +vmlinux.lds +vmlinux.syms diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile new file mode 100644 index 0000000000..c7c81e5f92 --- /dev/null +++ b/arch/s390/boot/Makefile @@ -0,0 +1,133 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the linux s390-specific parts of the memory manager. +# + +KCOV_INSTRUMENT := n +GCOV_PROFILE := n +UBSAN_SANITIZE := n +KASAN_SANITIZE := n +KCSAN_SANITIZE := n + +KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR) +KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR) + +# +# Use minimum architecture for als.c to be able to print an error +# message if the kernel is started on a machine which is too old +# +ifndef CONFIG_CC_IS_CLANG +CC_FLAGS_MARCH_MINIMUM := -march=z900 +else +CC_FLAGS_MARCH_MINIMUM := -march=z10 +endif + +ifneq ($(CC_FLAGS_MARCH),$(CC_FLAGS_MARCH_MINIMUM)) +AFLAGS_REMOVE_head.o += $(CC_FLAGS_MARCH) +AFLAGS_head.o += $(CC_FLAGS_MARCH_MINIMUM) +AFLAGS_REMOVE_mem.o += $(CC_FLAGS_MARCH) +AFLAGS_mem.o += $(CC_FLAGS_MARCH_MINIMUM) +CFLAGS_REMOVE_als.o += $(CC_FLAGS_MARCH) +CFLAGS_als.o += $(CC_FLAGS_MARCH_MINIMUM) +CFLAGS_REMOVE_sclp_early_core.o += $(CC_FLAGS_MARCH) +CFLAGS_sclp_early_core.o += $(CC_FLAGS_MARCH_MINIMUM) +endif + +CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char + +obj-y := head.o als.o startup.o physmem_info.o ipl_parm.o ipl_report.o vmem.o +obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o +obj-y += version.o pgm_check_info.o ctype.o ipl_data.o machine_kexec_reloc.o +obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o +obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o +obj-y += $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o +obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o +obj-all := $(obj-y) piggy.o syms.o + +targets := bzImage section_cmp.boot.data section_cmp.boot.preserved.data $(obj-y) +targets += vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 +targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4 +targets += vmlinux.bin.zst info.bin syms.bin vmlinux.syms $(obj-all) + +OBJECTS := $(addprefix $(obj)/,$(obj-y)) +OBJECTS_ALL := $(addprefix $(obj)/,$(obj-all)) + +clean-files += vmlinux.map + +quiet_cmd_section_cmp = SECTCMP $* +define cmd_section_cmp + s1=`$(OBJDUMP) -t -j "$*" "$<" | sort | \ + sed -n "/0000000000000000/! s/.*\s$*\s\+//p" | sha256sum`; \ + s2=`$(OBJDUMP) -t -j "$*" "$(word 2,$^)" | sort | \ + sed -n "/0000000000000000/! s/.*\s$*\s\+//p" | sha256sum`; \ + if [ "$$s1" != "$$s2" ]; then \ + echo "error: section $* differs between $< and $(word 2,$^)" >&2; \ + exit 1; \ + fi; \ + touch $@ +endef + +$(obj)/bzImage: $(obj)/vmlinux $(obj)/section_cmp.boot.data $(obj)/section_cmp.boot.preserved.data FORCE + $(call if_changed,objcopy) + +$(obj)/section_cmp%: vmlinux $(obj)/vmlinux FORCE + $(call if_changed,section_cmp) + +LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup $(if $(CONFIG_VMLINUX_MAP),-Map=$(obj)/vmlinux.map) --build-id=sha1 -T +$(obj)/vmlinux: $(obj)/vmlinux.lds $(OBJECTS_ALL) FORCE + $(call if_changed,ld) + +LDFLAGS_vmlinux.syms := --oformat $(LD_BFD) -e startup -T +$(obj)/vmlinux.syms: $(obj)/vmlinux.lds $(OBJECTS) FORCE + $(call if_changed,ld) + +quiet_cmd_dumpsyms = DUMPSYMS $< +define cmd_dumpsyms + $(NM) -n -S --format=bsd "$<" | sed -nE 's/^0*([0-9a-fA-F]+) 0*([0-9a-fA-F]+) [tT] ([^ ]*)$$/\1 \2 \3/p' | tr '\n' '\0' > "$@" +endef + +$(obj)/syms.bin: $(obj)/vmlinux.syms FORCE + $(call if_changed,dumpsyms) + +OBJCOPYFLAGS_syms.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.decompressor.syms +$(obj)/syms.o: $(obj)/syms.bin FORCE + $(call if_changed,objcopy) + +OBJCOPYFLAGS_info.bin := -O binary --only-section=.vmlinux.info --set-section-flags .vmlinux.info=load +$(obj)/info.bin: vmlinux FORCE + $(call if_changed,objcopy) + +OBJCOPYFLAGS_info.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.vmlinux.info +$(obj)/info.o: $(obj)/info.bin FORCE + $(call if_changed,objcopy) + +OBJCOPYFLAGS_vmlinux.bin := -O binary --remove-section=.comment --remove-section=.vmlinux.info -S +$(obj)/vmlinux.bin: vmlinux FORCE + $(call if_changed,objcopy) + +suffix-$(CONFIG_KERNEL_GZIP) := .gz +suffix-$(CONFIG_KERNEL_BZIP2) := .bz2 +suffix-$(CONFIG_KERNEL_LZ4) := .lz4 +suffix-$(CONFIG_KERNEL_LZMA) := .lzma +suffix-$(CONFIG_KERNEL_LZO) := .lzo +suffix-$(CONFIG_KERNEL_XZ) := .xz +suffix-$(CONFIG_KERNEL_ZSTD) := .zst + +$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE + $(call if_changed,gzip) +$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE + $(call if_changed,bzip2_with_size) +$(obj)/vmlinux.bin.lz4: $(obj)/vmlinux.bin FORCE + $(call if_changed,lz4_with_size) +$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE + $(call if_changed,lzma_with_size) +$(obj)/vmlinux.bin.lzo: $(obj)/vmlinux.bin FORCE + $(call if_changed,lzo_with_size) +$(obj)/vmlinux.bin.xz: $(obj)/vmlinux.bin FORCE + $(call if_changed,xzkern_with_size) +$(obj)/vmlinux.bin.zst: $(obj)/vmlinux.bin FORCE + $(call if_changed,zstd22_with_size) + +OBJCOPYFLAGS_piggy.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.vmlinux.bin.compressed +$(obj)/piggy.o: $(obj)/vmlinux.bin$(suffix-y) FORCE + $(call if_changed,objcopy) diff --git a/arch/s390/boot/als.c b/arch/s390/boot/als.c new file mode 100644 index 0000000000..47c48fbfb5 --- /dev/null +++ b/arch/s390/boot/als.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright IBM Corp. 2016 + */ +#include <linux/kernel.h> +#include <asm/processor.h> +#include <asm/facility.h> +#include <asm/lowcore.h> +#include <asm/sclp.h> +#include "boot.h" + +/* + * The code within this file will be called very early. It may _not_ + * access anything within the bss section, since that is not cleared + * yet and may contain data (e.g. initrd) that must be saved by other + * code. + * For temporary objects the stack (16k) should be used. + */ + +static unsigned long als[] = { FACILITIES_ALS }; + +static void u16_to_hex(char *str, u16 val) +{ + int i, num; + + for (i = 1; i <= 4; i++) { + num = (val >> (16 - 4 * i)) & 0xf; + if (num >= 10) + num += 7; + *str++ = '0' + num; + } + *str = '\0'; +} + +static void print_machine_type(void) +{ + static char mach_str[80] = "Detected machine-type number: "; + char type_str[5]; + struct cpuid id; + + get_cpu_id(&id); + u16_to_hex(type_str, id.machine); + strcat(mach_str, type_str); + strcat(mach_str, "\n"); + sclp_early_printk(mach_str); +} + +static void u16_to_decimal(char *str, u16 val) +{ + int div = 1; + + while (div * 10 <= val) + div *= 10; + while (div) { + *str++ = '0' + val / div; + val %= div; + div /= 10; + } + *str = '\0'; +} + +void print_missing_facilities(void) +{ + static char als_str[80] = "Missing facilities: "; + unsigned long val; + char val_str[6]; + int i, j, first; + + first = 1; + for (i = 0; i < ARRAY_SIZE(als); i++) { + val = ~stfle_fac_list[i] & als[i]; + for (j = 0; j < BITS_PER_LONG; j++) { + if (!(val & (1UL << (BITS_PER_LONG - 1 - j)))) + continue; + if (!first) + strcat(als_str, ","); + /* + * Make sure we stay within one line. Consider that + * each facility bit adds up to five characters and + * z/VM adds a four character prefix. + */ + if (strlen(als_str) > 70) { + strcat(als_str, "\n"); + sclp_early_printk(als_str); + *als_str = '\0'; + } + u16_to_decimal(val_str, i * BITS_PER_LONG + j); + strcat(als_str, val_str); + first = 0; + } + } + strcat(als_str, "\n"); + sclp_early_printk(als_str); +} + +static void facility_mismatch(void) +{ + sclp_early_printk("The Linux kernel requires more recent processor hardware\n"); + print_machine_type(); + print_missing_facilities(); + sclp_early_printk("See Principles of Operations for facility bits\n"); + disabled_wait(); +} + +void verify_facilities(void) +{ + int i; + + __stfle(stfle_fac_list, ARRAY_SIZE(stfle_fac_list)); + for (i = 0; i < ARRAY_SIZE(als); i++) { + if ((stfle_fac_list[i] & als[i]) != als[i]) + facility_mismatch(); + } +} diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h new file mode 100644 index 0000000000..222c6886ac --- /dev/null +++ b/arch/s390/boot/boot.h @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef BOOT_BOOT_H +#define BOOT_BOOT_H + +#include <linux/types.h> + +#define IPL_START 0x200 + +#ifndef __ASSEMBLY__ + +#include <asm/physmem_info.h> + +struct machine_info { + unsigned char has_edat1 : 1; + unsigned char has_edat2 : 1; + unsigned char has_nx : 1; +}; + +struct vmlinux_info { + unsigned long default_lma; + unsigned long entry; + unsigned long image_size; /* does not include .bss */ + unsigned long bss_size; /* uncompressed image .bss size */ + unsigned long bootdata_off; + unsigned long bootdata_size; + unsigned long bootdata_preserved_off; + unsigned long bootdata_preserved_size; + unsigned long dynsym_start; + unsigned long rela_dyn_start; + unsigned long rela_dyn_end; + unsigned long amode31_size; + unsigned long init_mm_off; + unsigned long swapper_pg_dir_off; + unsigned long invalid_pg_dir_off; +#ifdef CONFIG_KASAN + unsigned long kasan_early_shadow_page_off; + unsigned long kasan_early_shadow_pte_off; + unsigned long kasan_early_shadow_pmd_off; + unsigned long kasan_early_shadow_pud_off; + unsigned long kasan_early_shadow_p4d_off; +#endif +}; + +void startup_kernel(void); +unsigned long detect_max_physmem_end(void); +void detect_physmem_online_ranges(unsigned long max_physmem_end); +void physmem_set_usable_limit(unsigned long limit); +void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size); +void physmem_free(enum reserved_range_type type); +/* for continuous/multiple allocations per type */ +unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned long size, + unsigned long align); +/* for single allocations, 1 per type */ +unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long size, + unsigned long align, unsigned long min, unsigned long max, + bool die_on_oom); +unsigned long get_physmem_alloc_pos(void); +bool ipl_report_certs_intersects(unsigned long addr, unsigned long size, + unsigned long *intersection_start); +bool is_ipl_block_dump(void); +void store_ipl_parmblock(void); +int read_ipl_report(void); +void save_ipl_cert_comp_list(void); +void setup_boot_command_line(void); +void parse_boot_command_line(void); +void verify_facilities(void); +void print_missing_facilities(void); +void sclp_early_setup_buffer(void); +void print_pgm_check_info(void); +unsigned long randomize_within_range(unsigned long size, unsigned long align, + unsigned long min, unsigned long max); +void setup_vmem(unsigned long asce_limit); +void __printf(1, 2) decompressor_printk(const char *fmt, ...); +void print_stacktrace(unsigned long sp); +void error(char *m); + +extern struct machine_info machine; + +/* Symbols defined by linker scripts */ +extern const char kernel_version[]; +extern unsigned long memory_limit; +extern unsigned long vmalloc_size; +extern int vmalloc_size_set; +extern char __boot_data_start[], __boot_data_end[]; +extern char __boot_data_preserved_start[], __boot_data_preserved_end[]; +extern char _decompressor_syms_start[], _decompressor_syms_end[]; +extern char _stack_start[], _stack_end[]; +extern char _end[], _decompressor_end[]; +extern unsigned char _compressed_start[]; +extern unsigned char _compressed_end[]; +extern struct vmlinux_info _vmlinux_info; +#define vmlinux _vmlinux_info + +#define __abs_lowcore_pa(x) (((unsigned long)(x) - __abs_lowcore) % sizeof(struct lowcore)) + +static inline bool intersects(unsigned long addr0, unsigned long size0, + unsigned long addr1, unsigned long size1) +{ + return addr0 + size0 > addr1 && addr1 + size1 > addr0; +} +#endif /* __ASSEMBLY__ */ +#endif /* BOOT_BOOT_H */ diff --git a/arch/s390/boot/clz_ctz.c b/arch/s390/boot/clz_ctz.c new file mode 100644 index 0000000000..c3ebf24859 --- /dev/null +++ b/arch/s390/boot/clz_ctz.c @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../../../../lib/clz_ctz.c" diff --git a/arch/s390/boot/cmdline.c b/arch/s390/boot/cmdline.c new file mode 100644 index 0000000000..73d826cdbd --- /dev/null +++ b/arch/s390/boot/cmdline.c @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../../../lib/cmdline.c" diff --git a/arch/s390/boot/ctype.c b/arch/s390/boot/ctype.c new file mode 100644 index 0000000000..2495810b47 --- /dev/null +++ b/arch/s390/boot/ctype.c @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../../../lib/ctype.c" diff --git a/arch/s390/boot/decompressor.c b/arch/s390/boot/decompressor.c new file mode 100644 index 0000000000..d762733a07 --- /dev/null +++ b/arch/s390/boot/decompressor.c @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Definitions and wrapper functions for kernel decompressor + * + * Copyright IBM Corp. 2010 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#include <linux/kernel.h> +#include <linux/string.h> +#include <asm/page.h> +#include "decompressor.h" +#include "boot.h" + +/* + * gzip declarations + */ +#define STATIC static + +#undef memset +#undef memcpy +#undef memmove +#define memmove memmove +#define memzero(s, n) memset((s), 0, (n)) + +#if defined(CONFIG_KERNEL_BZIP2) +#define BOOT_HEAP_SIZE 0x400000 +#elif defined(CONFIG_KERNEL_ZSTD) +#define BOOT_HEAP_SIZE 0x30000 +#else +#define BOOT_HEAP_SIZE 0x10000 +#endif + +static unsigned long free_mem_ptr = (unsigned long) _end; +static unsigned long free_mem_end_ptr = (unsigned long) _end + BOOT_HEAP_SIZE; + +#ifdef CONFIG_KERNEL_GZIP +#include "../../../../lib/decompress_inflate.c" +#endif + +#ifdef CONFIG_KERNEL_BZIP2 +#include "../../../../lib/decompress_bunzip2.c" +#endif + +#ifdef CONFIG_KERNEL_LZ4 +#include "../../../../lib/decompress_unlz4.c" +#endif + +#ifdef CONFIG_KERNEL_LZMA +#include "../../../../lib/decompress_unlzma.c" +#endif + +#ifdef CONFIG_KERNEL_LZO +#include "../../../../lib/decompress_unlzo.c" +#endif + +#ifdef CONFIG_KERNEL_XZ +#include "../../../../lib/decompress_unxz.c" +#endif + +#ifdef CONFIG_KERNEL_ZSTD +#include "../../../../lib/decompress_unzstd.c" +#endif + +#define decompress_offset ALIGN((unsigned long)_end + BOOT_HEAP_SIZE, PAGE_SIZE) + +unsigned long mem_safe_offset(void) +{ + /* + * due to 4MB HEAD_SIZE for bzip2 + * 'decompress_offset + vmlinux.image_size' could be larger than + * kernel at final position + its .bss, so take the larger of two + */ + return max(decompress_offset + vmlinux.image_size, + vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size); +} + +void *decompress_kernel(void) +{ + void *output = (void *)decompress_offset; + + __decompress(_compressed_start, _compressed_end - _compressed_start, + NULL, NULL, output, vmlinux.image_size, NULL, error); + return output; +} diff --git a/arch/s390/boot/decompressor.h b/arch/s390/boot/decompressor.h new file mode 100644 index 0000000000..92b81d2ea3 --- /dev/null +++ b/arch/s390/boot/decompressor.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef BOOT_COMPRESSED_DECOMPRESSOR_H +#define BOOT_COMPRESSED_DECOMPRESSOR_H + +#ifdef CONFIG_KERNEL_UNCOMPRESSED +static inline void *decompress_kernel(void) { return NULL; } +#else +void *decompress_kernel(void); +#endif +unsigned long mem_safe_offset(void); + +#endif /* BOOT_COMPRESSED_DECOMPRESSOR_H */ diff --git a/arch/s390/boot/ebcdic.c b/arch/s390/boot/ebcdic.c new file mode 100644 index 0000000000..7391e7d360 --- /dev/null +++ b/arch/s390/boot/ebcdic.c @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../kernel/ebcdic.c" diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S new file mode 100644 index 0000000000..637c29c3f6 --- /dev/null +++ b/arch/s390/boot/head.S @@ -0,0 +1,320 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright IBM Corp. 1999, 2010 + * + * Author(s): Hartmut Penner <hp@de.ibm.com> + * Martin Schwidefsky <schwidefsky@de.ibm.com> + * Rob van der Heij <rvdhei@iae.nl> + * + * There are 5 different IPL methods + * 1) load the image directly into ram at address 0 and do an PSW restart + * 2) linload will load the image from address 0x10000 to memory 0x10000 + * and start the code thru LPSW 0x0008000080010000 (VM only, deprecated) + * 3) generate the tape ipl header, store the generated image on a tape + * and ipl from it + * In case of SL tape you need to IPL 5 times to get past VOL1 etc + * 4) generate the vm reader ipl header, move the generated image to the + * VM reader (use option NOH!) and do a ipl from reader (VM only) + * 5) direct call of start by the SALIPL loader + * We use the cpuid to distinguish between VM and native ipl + * params for kernel are pushed to 0x10400 (see setup.h) + * + */ + +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/page.h> +#include <asm/ptrace.h> +#include <asm/sclp.h> +#include "boot.h" + +#define EP_OFFSET 0x10008 +#define EP_STRING "S390EP" +#define IPL_BS 0x730 + +__HEAD +ipl_start: + mvi __LC_AR_MODE_ID,1 # set esame flag + slr %r0,%r0 # set cpuid to zero + lhi %r1,2 # mode 2 = esame (dump) + sigp %r1,%r0,0x12 # switch to esame mode + sam64 # switch to 64 bit addressing mode + lgh %r1,__LC_SUBCHANNEL_ID # test if subchannel number + brctg %r1,.Lnoload # is valid + llgf %r1,__LC_SUBCHANNEL_ID # load ipl subchannel number + lghi %r2,IPL_BS # load start address + bras %r14,.Lloader # load rest of ipl image + larl %r12,parmarea # pointer to parameter area + stg %r1,IPL_DEVICE-PARMAREA(%r12) # save ipl device number +# +# load parameter file from ipl device +# +.Lagain1: + larl %r2,_end # ramdisk loc. is temp + bras %r14,.Lloader # load parameter file + ltgr %r2,%r2 # got anything ? + jz .Lnopf + lg %r3,MAX_COMMAND_LINE_SIZE-PARMAREA(%r12) + aghi %r3,-1 + clgr %r2,%r3 + jl .Lnotrunc + lgr %r2,%r3 +.Lnotrunc: + larl %r4,_end + larl %r13,.L_hdr + clc 0(3,%r4),0(%r13) # if it is HDRx + jz .Lagain1 # skip dataset header + larl %r13,.L_eof + clc 0(3,%r4),0(%r13) # if it is EOFx + jz .Lagain1 # skip data set trailer + lgr %r5,%r2 + la %r6,COMMAND_LINE-PARMAREA(%r12) + lgr %r7,%r2 + aghi %r7,1 + mvcl %r6,%r4 +.Lnopf: +# +# load ramdisk from ipl device +# +.Lagain2: + larl %r2,_end # addr of ramdisk + stg %r2,INITRD_START-PARMAREA(%r12) + bras %r14,.Lloader # load ramdisk + stg %r2,INITRD_SIZE-PARMAREA(%r12) # store size of rd + ltgr %r2,%r2 + jnz .Lrdcont + stg %r2,INITRD_START-PARMAREA(%r12) # no ramdisk found +.Lrdcont: + larl %r2,_end + larl %r13,.L_hdr # skip HDRx and EOFx + clc 0(3,%r2),0(%r13) + jz .Lagain2 + larl %r13,.L_eof + clc 0(3,%r2),0(%r13) + jz .Lagain2 +# +# reset files in VM reader +# + larl %r13,.Lcpuid + stidp 0(%r13) # store cpuid + tm 0(%r13),0xff # running VM ? + jno .Lnoreset + larl %r2,.Lreset + lghi %r3,26 + diag %r2,%r3,8 + larl %r5,.Lirb + stsch 0(%r5) # check if irq is pending + tm 30(%r5),0x0f # by verifying if any of the + jnz .Lwaitforirq # activity or status control + tm 31(%r5),0xff # bits is set in the schib + jz .Lnoreset +.Lwaitforirq: + bras %r14,.Lirqwait # wait for IO interrupt + c %r1,__LC_SUBCHANNEL_ID # compare subchannel number + jne .Lwaitforirq + larl %r5,.Lirb + tsch 0(%r5) +.Lnoreset: + j .Lnoload +# +# everything loaded, go for it +# +.Lnoload: + jg startup +# +# subroutine to wait for end I/O +# +.Lirqwait: + larl %r13,.Lnewpswmask # set up IO interrupt psw + mvc __LC_IO_NEW_PSW(8),0(%r13) + stg %r14,__LC_IO_NEW_PSW+8 + larl %r13,.Lwaitpsw + lpswe 0(%r13) +.Lioint: +# +# subroutine for loading cards from the reader +# +.Lloader: + lgr %r4,%r14 + larl %r3,.Lorb # r2 = address of orb into r2 + larl %r5,.Lirb # r4 = address of irb + larl %r6,.Lccws + lghi %r7,20 +.Linit: + st %r2,4(%r6) # initialize CCW data addresses + la %r2,0x50(%r2) + la %r6,8(%r6) + brctg %r7,.Linit + larl %r13,.Lcr6 + lctlg %c6,%c6,0(%r13) + xgr %r2,%r2 +.Lldlp: + ssch 0(%r3) # load chunk of 1600 bytes + jnz .Llderr +.Lwait4irq: + bras %r14,.Lirqwait + c %r1,__LC_SUBCHANNEL_ID # compare subchannel number + jne .Lwait4irq + tsch 0(%r5) + xgr %r0,%r0 + ic %r0,8(%r5) # get device status + cghi %r0,8 # channel end ? + je .Lcont + cghi %r0,12 # channel end + device end ? + je .Lcont + llgf %r0,4(%r5) + sgf %r0,8(%r3) # r0/8 = number of ccws executed + mghi %r0,10 # *10 = number of bytes in ccws + llgh %r3,10(%r5) # get residual count + sgr %r0,%r3 # #ccws*80-residual=#bytes read + agr %r2,%r0 + br %r4 # r2 contains the total size +.Lcont: + aghi %r2,0x640 # add 0x640 to total size + larl %r6,.Lccws + lghi %r7,20 +.Lincr: + l %r0,4(%r6) # update CCW data addresses + aghi %r0,0x640 + st %r0,4(%r6) + aghi %r6,8 + brctg %r7,.Lincr + j .Lldlp +.Llderr: + larl %r13,.Lcrash + lpsw 0(%r13) + + .balign 8 +.Lwaitpsw: + .quad 0x0202000180000000,.Lioint +.Lnewpswmask: + .quad 0x0000000180000000 + .balign 8 +.Lorb: .long 0x00000000,0x0080ff00,.Lccws +.Lirb: .long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 + .balign 8 +.Lcr6: .quad 0x00000000ff000000 + .balign 8 +.Lcrash:.long 0x000a0000,0x00000000 + .balign 8 +.Lccws: .rept 19 + .long 0x02600050,0x00000000 + .endr + .long 0x02200050,0x00000000 +.Lreset:.byte 0xc3,0xc8,0xc1,0xd5,0xc7,0xc5,0x40,0xd9,0xc4,0xd9,0x40 + .byte 0xc1,0xd3,0xd3,0x40,0xd2,0xc5,0xc5,0xd7,0x40,0xd5,0xd6 + .byte 0xc8,0xd6,0xd3,0xc4 # "change rdr all keep nohold" +.L_eof: .long 0xc5d6c600 /* C'EOF' */ +.L_hdr: .long 0xc8c4d900 /* C'HDR' */ + .balign 8 +.Lcpuid:.fill 8,1,0 + +# +# normal startup-code, running in absolute addressing mode +# this is called either by the ipl loader or directly by PSW restart +# or linload or SALIPL +# + .org STARTUP_NORMAL_OFFSET - IPL_START +SYM_CODE_START(startup) + j startup_normal + .org EP_OFFSET - IPL_START +# +# This is a list of s390 kernel entry points. At address 0x1000f the number of +# valid entry points is stored. +# +# IMPORTANT: Do not change this table, it is s390 kernel ABI! +# + .ascii EP_STRING + .byte 0x00,0x01 +# +# kdump startup-code, running in 64 bit absolute addressing mode +# + .org STARTUP_KDUMP_OFFSET - IPL_START + j startup_kdump +SYM_CODE_END(startup) +SYM_CODE_START_LOCAL(startup_normal) + mvi __LC_AR_MODE_ID,1 # set esame flag + slr %r0,%r0 # set cpuid to zero + lhi %r1,2 # mode 2 = esame (dump) + sigp %r1,%r0,0x12 # switch to esame mode + bras %r13,0f + .fill 16,4,0x0 +0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs + sam64 # switch to 64 bit addressing mode + larl %r13,.Lext_new_psw + mvc __LC_EXT_NEW_PSW(16),0(%r13) + larl %r13,.Lpgm_new_psw + mvc __LC_PGM_NEW_PSW(16),0(%r13) + larl %r13,.Lio_new_psw + mvc __LC_IO_NEW_PSW(16),0(%r13) + xc 0x200(256),0x200 # partially clear lowcore + xc 0x300(256),0x300 + xc 0xe00(256),0xe00 + xc 0xf00(256),0xf00 + larl %r13,.Lctl + lctlg %c0,%c15,0(%r13) # load control registers + stcke __LC_BOOT_CLOCK + mvc __LC_LAST_UPDATE_CLOCK(8),__LC_BOOT_CLOCK+1 + larl %r13,6f + spt 0(%r13) + mvc __LC_LAST_UPDATE_TIMER(8),0(%r13) + larl %r15,_stack_end-STACK_FRAME_OVERHEAD + brasl %r14,sclp_early_setup_buffer + brasl %r14,verify_facilities + brasl %r14,startup_kernel +SYM_CODE_END(startup_normal) + + .balign 8 +6: .long 0x7fffffff,0xffffffff +.Lext_new_psw: + .quad 0x0002000180000000,0x1b0 # disabled wait +.Lpgm_new_psw: + .quad 0x0000000180000000,startup_pgm_check_handler +.Lio_new_psw: + .quad 0x0002000180000000,0x1f0 # disabled wait +.Lctl: .quad 0x04040000 # cr0: AFP registers & secondary space + .quad 0 # cr1: primary space segment table + .quad 0 # cr2: dispatchable unit control table + .quad 0 # cr3: instruction authorization + .quad 0xffff # cr4: instruction authorization + .quad 0 # cr5: primary-aste origin + .quad 0 # cr6: I/O interrupts + .quad 0 # cr7: secondary space segment table + .quad 0x0000000000008000 # cr8: access registers translation + .quad 0 # cr9: tracing off + .quad 0 # cr10: tracing off + .quad 0 # cr11: tracing off + .quad 0 # cr12: tracing off + .quad 0 # cr13: home space segment table + .quad 0xc0000000 # cr14: machine check handling off + .quad 0 # cr15: linkage stack operations + +#include "head_kdump.S" + +# +# This program check is active immediately after kernel start +# and until early_pgm_check_handler is set in kernel/early.c +# It simply saves general/control registers and psw in +# the save area and does disabled wait with a faulty address. +# +SYM_CODE_START_LOCAL(startup_pgm_check_handler) + stmg %r8,%r15,__LC_SAVE_AREA_SYNC + la %r8,4095 + stctg %c0,%c15,__LC_CREGS_SAVE_AREA-4095(%r8) + stmg %r0,%r7,__LC_GPREGS_SAVE_AREA-4095(%r8) + mvc __LC_GPREGS_SAVE_AREA-4095+64(64,%r8),__LC_SAVE_AREA_SYNC + mvc __LC_PSW_SAVE_AREA-4095(16,%r8),__LC_PGM_OLD_PSW + mvc __LC_RETURN_PSW(16),__LC_PGM_OLD_PSW + ni __LC_RETURN_PSW,0xfc # remove IO and EX bits + ni __LC_RETURN_PSW+1,0xfb # remove MCHK bit + oi __LC_RETURN_PSW+1,0x2 # set wait state bit + larl %r9,.Lold_psw_disabled_wait + stg %r9,__LC_PGM_NEW_PSW+8 + larl %r15,_dump_info_stack_end-STACK_FRAME_OVERHEAD + brasl %r14,print_pgm_check_info +.Lold_psw_disabled_wait: + la %r8,4095 + lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r8) + lpswe __LC_RETURN_PSW # disabled wait +SYM_CODE_END(startup_pgm_check_handler) diff --git a/arch/s390/boot/head_kdump.S b/arch/s390/boot/head_kdump.S new file mode 100644 index 0000000000..f7107c7625 --- /dev/null +++ b/arch/s390/boot/head_kdump.S @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * S390 kdump lowlevel functions (new kernel) + * + * Copyright IBM Corp. 2011 + * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#include <asm/sigp.h> + +#define DATAMOVER_ADDR 0x4000 +#define COPY_PAGE_ADDR 0x6000 + +#ifdef CONFIG_CRASH_DUMP + +# +# kdump entry (new kernel - not yet relocated) +# +# Note: This code has to be position independent +# + +SYM_CODE_START_LOCAL(startup_kdump) + lhi %r1,2 # mode 2 = esame (dump) + sigp %r1,%r0,SIGP_SET_ARCHITECTURE # Switch to esame mode + sam64 # Switch to 64 bit addressing + basr %r13,0 +.Lbase: + larl %r2,.Lbase_addr # Check, if we have been + lg %r2,0(%r2) # already relocated: + clgr %r2,%r13 # + jne .Lrelocate # No : Start data mover + lghi %r2,0 # Yes: Start kdump kernel + brasl %r14,startup_kdump_relocated + +.Lrelocate: + larl %r4,startup + lg %r2,0x418(%r4) # Get kdump base + lg %r3,0x420(%r4) # Get kdump size + + larl %r10,.Lcopy_start # Source of data mover + lghi %r8,DATAMOVER_ADDR # Target of data mover + mvc 0(256,%r8),0(%r10) # Copy data mover code + + agr %r8,%r2 # Copy data mover to + mvc 0(256,%r8),0(%r10) # reserved mem + + lghi %r14,DATAMOVER_ADDR # Jump to copied data mover + basr %r14,%r14 +.Lbase_addr: + .quad .Lbase + +# +# kdump data mover code (runs at address DATAMOVER_ADDR) +# +# r2: kdump base address +# r3: kdump size +# +.Lcopy_start: + basr %r13,0 # Base +0: + lgr %r11,%r2 # Save kdump base address + lgr %r12,%r2 + agr %r12,%r3 # Compute kdump end address + + lghi %r5,0 + lghi %r10,COPY_PAGE_ADDR # Load copy page address +1: + mvc 0(256,%r10),0(%r5) # Copy old kernel to tmp + mvc 0(256,%r5),0(%r11) # Copy new kernel to old + mvc 0(256,%r11),0(%r10) # Copy tmp to new + aghi %r11,256 + aghi %r5,256 + clgr %r11,%r12 + jl 1b + + lg %r14,.Lstartup_kdump-0b(%r13) + basr %r14,%r14 # Start relocated kernel +.Lstartup_kdump: + .long 0x00000000,0x00000000 + startup_kdump_relocated +.Lcopy_end: + +# +# Startup of kdump (relocated new kernel) +# + .balign 2 +startup_kdump_relocated: + basr %r13,0 +0: lpswe .Lrestart_psw-0b(%r13) # Start new kernel... +SYM_CODE_END(startup_kdump) + .balign 8 +.Lrestart_psw: + .quad 0x0000000080000000,0x0000000000000000 + startup +#else +SYM_CODE_START_LOCAL(startup_kdump) + larl %r13,startup_kdump_crash + lpswe 0(%r13) +SYM_CODE_END(startup_kdump) + .balign 8 +startup_kdump_crash: + .quad 0x0002000080000000,0x0000000000000000 + startup_kdump_crash +#endif /* CONFIG_CRASH_DUMP */ diff --git a/arch/s390/boot/install.sh b/arch/s390/boot/install.sh new file mode 100755 index 0000000000..a13dd2f2aa --- /dev/null +++ b/arch/s390/boot/install.sh @@ -0,0 +1,24 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# arch/s390x/boot/install.sh +# +# Copyright (C) 1995 by Linus Torvalds +# +# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin +# +# "make install" script for s390 architecture +# +# Arguments: +# $1 - kernel version +# $2 - kernel image file +# $3 - kernel map file +# $4 - default install path (blank if root directory) + +echo "Warning: '${INSTALLKERNEL}' command not available - additional " \ + "bootloader config required" >&2 +if [ -f "$4/vmlinuz-$1" ]; then mv -- "$4/vmlinuz-$1" "$4/vmlinuz-$1.old"; fi +if [ -f "$4/System.map-$1" ]; then mv -- "$4/System.map-$1" "$4/System.map-$1.old"; fi + +cat -- "$2" > "$4/vmlinuz-$1" +cp -- "$3" "$4/System.map-$1" diff --git a/arch/s390/boot/ipl_data.c b/arch/s390/boot/ipl_data.c new file mode 100644 index 0000000000..0846e2b249 --- /dev/null +++ b/arch/s390/boot/ipl_data.c @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/compat.h> +#include <linux/ptrace.h> +#include <asm/cio.h> +#include <asm/asm-offsets.h> +#include "boot.h" + +#define CCW0(cmd, addr, cnt, flg) \ + { .cmd_code = cmd, .cda = addr, .count = cnt, .flags = flg, } + +#define PSW_MASK_DISABLED (PSW_MASK_WAIT | PSW_MASK_EA | PSW_MASK_BA) + +struct ipl_lowcore { + psw_t32 ipl_psw; /* 0x0000 */ + struct ccw0 ccwpgm[2]; /* 0x0008 */ + u8 fill[56]; /* 0x0018 */ + struct ccw0 ccwpgmcc[20]; /* 0x0050 */ + u8 pad_0xf0[0x01a0-0x00f0]; /* 0x00f0 */ + psw_t restart_psw; /* 0x01a0 */ + psw_t external_new_psw; /* 0x01b0 */ + psw_t svc_new_psw; /* 0x01c0 */ + psw_t program_new_psw; /* 0x01d0 */ + psw_t mcck_new_psw; /* 0x01e0 */ + psw_t io_new_psw; /* 0x01f0 */ +}; + +/* + * Initial lowcore for IPL: the first 24 bytes are loaded by IPL to + * addresses 0-23 (a PSW and two CCWs). Bytes 24-79 are discarded. + * The next 160 bytes are loaded to addresses 0x18-0xb7. They form + * the continuation of the CCW program started by IPL and load the + * range 0x0f0-0x730 from the image to the range 0x0f0-0x730 in + * memory. At the end of the channel program the PSW at location 0 is + * loaded. + * Initial processing starts at 0x200 = iplstart. + * + * The restart psw points to iplstart which allows to load a kernel + * image into memory and starting it by a psw restart on any cpu. All + * other default psw new locations contain a disabled wait psw where + * the address indicates which psw was loaded. + * + * Note that the 'file' utility can detect s390 kernel images. For + * that to succeed the two initial CCWs, and the 0x40 fill bytes must + * be present. + */ +static struct ipl_lowcore ipl_lowcore __used __section(".ipldata") = { + .ipl_psw = { .mask = PSW32_MASK_BASE, .addr = PSW32_ADDR_AMODE | IPL_START }, + .ccwpgm = { + [ 0] = CCW0(CCW_CMD_READ_IPL, 0x018, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + [ 1] = CCW0(CCW_CMD_READ_IPL, 0x068, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + }, + .fill = { + [ 0 ... 55] = 0x40, + }, + .ccwpgmcc = { + [ 0] = CCW0(CCW_CMD_READ_IPL, 0x0f0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + [ 1] = CCW0(CCW_CMD_READ_IPL, 0x140, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + [ 2] = CCW0(CCW_CMD_READ_IPL, 0x190, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + [ 3] = CCW0(CCW_CMD_READ_IPL, 0x1e0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + [ 4] = CCW0(CCW_CMD_READ_IPL, 0x230, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + [ 5] = CCW0(CCW_CMD_READ_IPL, 0x280, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + [ 6] = CCW0(CCW_CMD_READ_IPL, 0x2d0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + [ 7] = CCW0(CCW_CMD_READ_IPL, 0x320, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + [ 8] = CCW0(CCW_CMD_READ_IPL, 0x370, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + [ 9] = CCW0(CCW_CMD_READ_IPL, 0x3c0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + [10] = CCW0(CCW_CMD_READ_IPL, 0x410, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + [11] = CCW0(CCW_CMD_READ_IPL, 0x460, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + [12] = CCW0(CCW_CMD_READ_IPL, 0x4b0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + [13] = CCW0(CCW_CMD_READ_IPL, 0x500, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + [14] = CCW0(CCW_CMD_READ_IPL, 0x550, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + [15] = CCW0(CCW_CMD_READ_IPL, 0x5a0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + [16] = CCW0(CCW_CMD_READ_IPL, 0x5f0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + [17] = CCW0(CCW_CMD_READ_IPL, 0x640, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + [18] = CCW0(CCW_CMD_READ_IPL, 0x690, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), + [19] = CCW0(CCW_CMD_READ_IPL, 0x6e0, 0x50, CCW_FLAG_SLI), + }, + .restart_psw = { .mask = 0, .addr = IPL_START, }, + .external_new_psw = { .mask = PSW_MASK_DISABLED, .addr = __LC_EXT_NEW_PSW, }, + .svc_new_psw = { .mask = PSW_MASK_DISABLED, .addr = __LC_SVC_NEW_PSW, }, + .program_new_psw = { .mask = PSW_MASK_DISABLED, .addr = __LC_PGM_NEW_PSW, }, + .mcck_new_psw = { .mask = PSW_MASK_DISABLED, .addr = __LC_MCK_NEW_PSW, }, + .io_new_psw = { .mask = PSW_MASK_DISABLED, .addr = __LC_IO_NEW_PSW, }, +}; diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c new file mode 100644 index 0000000000..7b75217626 --- /dev/null +++ b/arch/s390/boot/ipl_parm.c @@ -0,0 +1,306 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/ctype.h> +#include <linux/pgtable.h> +#include <asm/ebcdic.h> +#include <asm/sclp.h> +#include <asm/sections.h> +#include <asm/boot_data.h> +#include <asm/facility.h> +#include <asm/setup.h> +#include <asm/uv.h> +#include "boot.h" + +struct parmarea parmarea __section(".parmarea") = { + .kernel_version = (unsigned long)kernel_version, + .max_command_line_size = COMMAND_LINE_SIZE, + .command_line = "root=/dev/ram0 ro", +}; + +char __bootdata(early_command_line)[COMMAND_LINE_SIZE]; + +unsigned int __bootdata_preserved(zlib_dfltcc_support) = ZLIB_DFLTCC_FULL; +struct ipl_parameter_block __bootdata_preserved(ipl_block); +int __bootdata_preserved(ipl_block_valid); +int __bootdata_preserved(__kaslr_enabled); + +unsigned long vmalloc_size = VMALLOC_DEFAULT_SIZE; +unsigned long memory_limit; +int vmalloc_size_set; + +static inline int __diag308(unsigned long subcode, void *addr) +{ + unsigned long reg1, reg2; + union register_pair r1; + psw_t old; + + r1.even = (unsigned long) addr; + r1.odd = 0; + asm volatile( + " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n" + " epsw %[reg1],%[reg2]\n" + " st %[reg1],0(%[psw_pgm])\n" + " st %[reg2],4(%[psw_pgm])\n" + " larl %[reg1],1f\n" + " stg %[reg1],8(%[psw_pgm])\n" + " diag %[r1],%[subcode],0x308\n" + "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n" + : [r1] "+&d" (r1.pair), + [reg1] "=&d" (reg1), + [reg2] "=&a" (reg2), + "+Q" (S390_lowcore.program_new_psw), + "=Q" (old) + : [subcode] "d" (subcode), + [psw_old] "a" (&old), + [psw_pgm] "a" (&S390_lowcore.program_new_psw) + : "cc", "memory"); + return r1.odd; +} + +void store_ipl_parmblock(void) +{ + int rc; + + rc = __diag308(DIAG308_STORE, &ipl_block); + if (rc == DIAG308_RC_OK && + ipl_block.hdr.version <= IPL_MAX_SUPPORTED_VERSION) + ipl_block_valid = 1; +} + +bool is_ipl_block_dump(void) +{ + if (ipl_block.pb0_hdr.pbt == IPL_PBT_FCP && + ipl_block.fcp.opt == IPL_PB0_FCP_OPT_DUMP) + return true; + if (ipl_block.pb0_hdr.pbt == IPL_PBT_NVME && + ipl_block.nvme.opt == IPL_PB0_NVME_OPT_DUMP) + return true; + if (ipl_block.pb0_hdr.pbt == IPL_PBT_ECKD && + ipl_block.eckd.opt == IPL_PB0_ECKD_OPT_DUMP) + return true; + return false; +} + +static size_t scpdata_length(const u8 *buf, size_t count) +{ + while (count) { + if (buf[count - 1] != '\0' && buf[count - 1] != ' ') + break; + count--; + } + return count; +} + +static size_t ipl_block_get_ascii_scpdata(char *dest, size_t size, + const struct ipl_parameter_block *ipb) +{ + const __u8 *scp_data; + __u32 scp_data_len; + int has_lowercase; + size_t count = 0; + size_t i; + + switch (ipb->pb0_hdr.pbt) { + case IPL_PBT_FCP: + scp_data_len = ipb->fcp.scp_data_len; + scp_data = ipb->fcp.scp_data; + break; + case IPL_PBT_NVME: + scp_data_len = ipb->nvme.scp_data_len; + scp_data = ipb->nvme.scp_data; + break; + case IPL_PBT_ECKD: + scp_data_len = ipb->eckd.scp_data_len; + scp_data = ipb->eckd.scp_data; + break; + + default: + goto out; + } + + count = min(size - 1, scpdata_length(scp_data, scp_data_len)); + if (!count) + goto out; + + has_lowercase = 0; + for (i = 0; i < count; i++) { + if (!isascii(scp_data[i])) { + count = 0; + goto out; + } + if (!has_lowercase && islower(scp_data[i])) + has_lowercase = 1; + } + + if (has_lowercase) + memcpy(dest, scp_data, count); + else + for (i = 0; i < count; i++) + dest[i] = tolower(scp_data[i]); +out: + dest[count] = '\0'; + return count; +} + +static void append_ipl_block_parm(void) +{ + char *parm, *delim; + size_t len, rc = 0; + + len = strlen(early_command_line); + + delim = early_command_line + len; /* '\0' character position */ + parm = early_command_line + len + 1; /* append right after '\0' */ + + switch (ipl_block.pb0_hdr.pbt) { + case IPL_PBT_CCW: + rc = ipl_block_get_ascii_vmparm( + parm, COMMAND_LINE_SIZE - len - 1, &ipl_block); + break; + case IPL_PBT_FCP: + case IPL_PBT_NVME: + case IPL_PBT_ECKD: + rc = ipl_block_get_ascii_scpdata( + parm, COMMAND_LINE_SIZE - len - 1, &ipl_block); + break; + } + if (rc) { + if (*parm == '=') + memmove(early_command_line, parm + 1, rc); + else + *delim = ' '; /* replace '\0' with space */ + } +} + +static inline int has_ebcdic_char(const char *str) +{ + int i; + + for (i = 0; str[i]; i++) + if (str[i] & 0x80) + return 1; + return 0; +} + +void setup_boot_command_line(void) +{ + parmarea.command_line[COMMAND_LINE_SIZE - 1] = 0; + /* convert arch command line to ascii if necessary */ + if (has_ebcdic_char(parmarea.command_line)) + EBCASC(parmarea.command_line, COMMAND_LINE_SIZE); + /* copy arch command line */ + strcpy(early_command_line, strim(parmarea.command_line)); + + /* append IPL PARM data to the boot command line */ + if (!is_prot_virt_guest() && ipl_block_valid) + append_ipl_block_parm(); +} + +static void modify_facility(unsigned long nr, bool clear) +{ + if (clear) + __clear_facility(nr, stfle_fac_list); + else + __set_facility(nr, stfle_fac_list); +} + +static void check_cleared_facilities(void) +{ + unsigned long als[] = { FACILITIES_ALS }; + int i; + + for (i = 0; i < ARRAY_SIZE(als); i++) { + if ((stfle_fac_list[i] & als[i]) != als[i]) { + sclp_early_printk("Warning: The Linux kernel requires facilities cleared via command line option\n"); + print_missing_facilities(); + break; + } + } +} + +static void modify_fac_list(char *str) +{ + unsigned long val, endval; + char *endp; + bool clear; + + while (*str) { + clear = false; + if (*str == '!') { + clear = true; + str++; + } + val = simple_strtoull(str, &endp, 0); + if (str == endp) + break; + str = endp; + if (*str == '-') { + str++; + endval = simple_strtoull(str, &endp, 0); + if (str == endp) + break; + str = endp; + while (val <= endval) { + modify_facility(val, clear); + val++; + } + } else { + modify_facility(val, clear); + } + if (*str != ',') + break; + str++; + } + check_cleared_facilities(); +} + +static char command_line_buf[COMMAND_LINE_SIZE]; +void parse_boot_command_line(void) +{ + char *param, *val; + bool enabled; + char *args; + int rc; + + __kaslr_enabled = IS_ENABLED(CONFIG_RANDOMIZE_BASE); + args = strcpy(command_line_buf, early_command_line); + while (*args) { + args = next_arg(args, ¶m, &val); + + if (!strcmp(param, "mem") && val) + memory_limit = round_down(memparse(val, NULL), PAGE_SIZE); + + if (!strcmp(param, "vmalloc") && val) { + vmalloc_size = round_up(memparse(val, NULL), PAGE_SIZE); + vmalloc_size_set = 1; + } + + if (!strcmp(param, "dfltcc") && val) { + if (!strcmp(val, "off")) + zlib_dfltcc_support = ZLIB_DFLTCC_DISABLED; + else if (!strcmp(val, "on")) + zlib_dfltcc_support = ZLIB_DFLTCC_FULL; + else if (!strcmp(val, "def_only")) + zlib_dfltcc_support = ZLIB_DFLTCC_DEFLATE_ONLY; + else if (!strcmp(val, "inf_only")) + zlib_dfltcc_support = ZLIB_DFLTCC_INFLATE_ONLY; + else if (!strcmp(val, "always")) + zlib_dfltcc_support = ZLIB_DFLTCC_FULL_DEBUG; + } + + if (!strcmp(param, "facilities") && val) + modify_fac_list(val); + + if (!strcmp(param, "nokaslr")) + __kaslr_enabled = 0; + +#if IS_ENABLED(CONFIG_KVM) + if (!strcmp(param, "prot_virt")) { + rc = kstrtobool(val, &enabled); + if (!rc && enabled) + prot_virt_host = 1; + } +#endif + } +} diff --git a/arch/s390/boot/ipl_report.c b/arch/s390/boot/ipl_report.c new file mode 100644 index 0000000000..1803035e68 --- /dev/null +++ b/arch/s390/boot/ipl_report.c @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/init.h> +#include <linux/ctype.h> +#include <asm/ebcdic.h> +#include <asm/sclp.h> +#include <asm/sections.h> +#include <asm/boot_data.h> +#include <asm/physmem_info.h> +#include <uapi/asm/ipl.h> +#include "boot.h" + +int __bootdata_preserved(ipl_secure_flag); + +unsigned long __bootdata_preserved(ipl_cert_list_addr); +unsigned long __bootdata_preserved(ipl_cert_list_size); + +unsigned long __bootdata(early_ipl_comp_list_addr); +unsigned long __bootdata(early_ipl_comp_list_size); + +static struct ipl_rb_certificates *certs; +static struct ipl_rb_components *comps; +static bool ipl_report_needs_saving; + +#define for_each_rb_entry(entry, rb) \ + for (entry = rb->entries; \ + (void *) entry + sizeof(*entry) <= (void *) rb + rb->len; \ + entry++) + +static unsigned long get_cert_comp_list_size(void) +{ + struct ipl_rb_certificate_entry *cert; + struct ipl_rb_component_entry *comp; + size_t size; + + /* + * Find the length for the IPL report boot data + */ + early_ipl_comp_list_size = 0; + for_each_rb_entry(comp, comps) + early_ipl_comp_list_size += sizeof(*comp); + ipl_cert_list_size = 0; + for_each_rb_entry(cert, certs) + ipl_cert_list_size += sizeof(unsigned int) + cert->len; + return ipl_cert_list_size + early_ipl_comp_list_size; +} + +bool ipl_report_certs_intersects(unsigned long addr, unsigned long size, + unsigned long *intersection_start) +{ + struct ipl_rb_certificate_entry *cert; + + if (!ipl_report_needs_saving) + return false; + + for_each_rb_entry(cert, certs) { + if (intersects(addr, size, cert->addr, cert->len)) { + *intersection_start = cert->addr; + return true; + } + } + return false; +} + +static void copy_components_bootdata(void) +{ + struct ipl_rb_component_entry *comp, *ptr; + + ptr = (struct ipl_rb_component_entry *) early_ipl_comp_list_addr; + for_each_rb_entry(comp, comps) + memcpy(ptr++, comp, sizeof(*ptr)); +} + +static void copy_certificates_bootdata(void) +{ + struct ipl_rb_certificate_entry *cert; + void *ptr; + + ptr = (void *) ipl_cert_list_addr; + for_each_rb_entry(cert, certs) { + *(unsigned int *) ptr = cert->len; + ptr += sizeof(unsigned int); + memcpy(ptr, (void *) cert->addr, cert->len); + ptr += cert->len; + } +} + +int read_ipl_report(void) +{ + struct ipl_pl_hdr *pl_hdr; + struct ipl_rl_hdr *rl_hdr; + struct ipl_rb_hdr *rb_hdr; + unsigned long tmp; + void *rl_end; + + /* + * Check if there is a IPL report by looking at the copy + * of the IPL parameter information block. + */ + if (!ipl_block_valid || + !(ipl_block.hdr.flags & IPL_PL_FLAG_IPLSR)) + return -1; + ipl_secure_flag = !!(ipl_block.hdr.flags & IPL_PL_FLAG_SIPL); + /* + * There is an IPL report, to find it load the pointer to the + * IPL parameter information block from lowcore and skip past + * the IPL parameter list, then align the address to a double + * word boundary. + */ + tmp = (unsigned long) S390_lowcore.ipl_parmblock_ptr; + pl_hdr = (struct ipl_pl_hdr *) tmp; + tmp = (tmp + pl_hdr->len + 7) & -8UL; + rl_hdr = (struct ipl_rl_hdr *) tmp; + /* Walk through the IPL report blocks in the IPL Report list */ + certs = NULL; + comps = NULL; + rl_end = (void *) rl_hdr + rl_hdr->len; + rb_hdr = (void *) rl_hdr + sizeof(*rl_hdr); + while ((void *) rb_hdr + sizeof(*rb_hdr) < rl_end && + (void *) rb_hdr + rb_hdr->len <= rl_end) { + + switch (rb_hdr->rbt) { + case IPL_RBT_CERTIFICATES: + certs = (struct ipl_rb_certificates *) rb_hdr; + break; + case IPL_RBT_COMPONENTS: + comps = (struct ipl_rb_components *) rb_hdr; + break; + default: + break; + } + + rb_hdr = (void *) rb_hdr + rb_hdr->len; + } + + /* + * With either the component list or the certificate list + * missing the kernel will stay ignorant of secure IPL. + */ + if (!comps || !certs) { + certs = NULL; + return -1; + } + + ipl_report_needs_saving = true; + physmem_reserve(RR_IPLREPORT, (unsigned long)pl_hdr, + (unsigned long)rl_end - (unsigned long)pl_hdr); + return 0; +} + +void save_ipl_cert_comp_list(void) +{ + unsigned long size; + + if (!ipl_report_needs_saving) + return; + + size = get_cert_comp_list_size(); + early_ipl_comp_list_addr = physmem_alloc_top_down(RR_CERT_COMP_LIST, size, sizeof(int)); + ipl_cert_list_addr = early_ipl_comp_list_addr + early_ipl_comp_list_size; + + copy_components_bootdata(); + copy_certificates_bootdata(); + physmem_free(RR_IPLREPORT); + ipl_report_needs_saving = false; +} diff --git a/arch/s390/boot/ipl_vmparm.c b/arch/s390/boot/ipl_vmparm.c new file mode 100644 index 0000000000..8dacd5fadf --- /dev/null +++ b/arch/s390/boot/ipl_vmparm.c @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../kernel/ipl_vmparm.c" diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c new file mode 100644 index 0000000000..90602101e2 --- /dev/null +++ b/arch/s390/boot/kaslr.c @@ -0,0 +1,198 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright IBM Corp. 2019 + */ +#include <linux/pgtable.h> +#include <asm/physmem_info.h> +#include <asm/cpacf.h> +#include <asm/timex.h> +#include <asm/sclp.h> +#include <asm/kasan.h> +#include "decompressor.h" +#include "boot.h" + +#define PRNG_MODE_TDES 1 +#define PRNG_MODE_SHA512 2 +#define PRNG_MODE_TRNG 3 + +struct prno_parm { + u32 res; + u32 reseed_counter; + u64 stream_bytes; + u8 V[112]; + u8 C[112]; +}; + +struct prng_parm { + u8 parm_block[32]; + u32 reseed_counter; + u64 byte_counter; +}; + +static int check_prng(void) +{ + if (!cpacf_query_func(CPACF_KMC, CPACF_KMC_PRNG)) { + sclp_early_printk("KASLR disabled: CPU has no PRNG\n"); + return 0; + } + if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG)) + return PRNG_MODE_TRNG; + if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_SHA512_DRNG_GEN)) + return PRNG_MODE_SHA512; + else + return PRNG_MODE_TDES; +} + +static int get_random(unsigned long limit, unsigned long *value) +{ + struct prng_parm prng = { + /* initial parameter block for tdes mode, copied from libica */ + .parm_block = { + 0x0F, 0x2B, 0x8E, 0x63, 0x8C, 0x8E, 0xD2, 0x52, + 0x64, 0xB7, 0xA0, 0x7B, 0x75, 0x28, 0xB8, 0xF4, + 0x75, 0x5F, 0xD2, 0xA6, 0x8D, 0x97, 0x11, 0xFF, + 0x49, 0xD8, 0x23, 0xF3, 0x7E, 0x21, 0xEC, 0xA0 + }, + }; + unsigned long seed, random; + struct prno_parm prno; + __u64 entropy[4]; + int mode, i; + + mode = check_prng(); + seed = get_tod_clock_fast(); + switch (mode) { + case PRNG_MODE_TRNG: + cpacf_trng(NULL, 0, (u8 *) &random, sizeof(random)); + break; + case PRNG_MODE_SHA512: + cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED, &prno, NULL, 0, + (u8 *) &seed, sizeof(seed)); + cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN, &prno, (u8 *) &random, + sizeof(random), NULL, 0); + break; + case PRNG_MODE_TDES: + /* add entropy */ + *(unsigned long *) prng.parm_block ^= seed; + for (i = 0; i < 16; i++) { + cpacf_kmc(CPACF_KMC_PRNG, prng.parm_block, + (u8 *) entropy, (u8 *) entropy, + sizeof(entropy)); + memcpy(prng.parm_block, entropy, sizeof(entropy)); + } + random = seed; + cpacf_kmc(CPACF_KMC_PRNG, prng.parm_block, (u8 *) &random, + (u8 *) &random, sizeof(random)); + break; + default: + return -1; + } + *value = random % limit; + return 0; +} + +static void sort_reserved_ranges(struct reserved_range *res, unsigned long size) +{ + struct reserved_range tmp; + int i, j; + + for (i = 1; i < size; i++) { + tmp = res[i]; + for (j = i - 1; j >= 0 && res[j].start > tmp.start; j--) + res[j + 1] = res[j]; + res[j + 1] = tmp; + } +} + +static unsigned long iterate_valid_positions(unsigned long size, unsigned long align, + unsigned long _min, unsigned long _max, + struct reserved_range *res, size_t res_count, + bool pos_count, unsigned long find_pos) +{ + unsigned long start, end, tmp_end, range_pos, pos = 0; + struct reserved_range *res_end = res + res_count; + struct reserved_range *skip_res; + int i; + + align = max(align, 8UL); + _min = round_up(_min, align); + for_each_physmem_usable_range(i, &start, &end) { + if (_min >= end) + continue; + start = round_up(start, align); + if (start >= _max) + break; + start = max(_min, start); + end = min(_max, end); + + while (start + size <= end) { + /* skip reserved ranges below the start */ + while (res && res->end <= start) { + res++; + if (res >= res_end) + res = NULL; + } + skip_res = NULL; + tmp_end = end; + /* has intersecting reserved range */ + if (res && res->start < end) { + skip_res = res; + tmp_end = res->start; + } + if (start + size <= tmp_end) { + range_pos = (tmp_end - start - size) / align + 1; + if (pos_count) { + pos += range_pos; + } else { + if (range_pos >= find_pos) + return start + (find_pos - 1) * align; + find_pos -= range_pos; + } + } + if (!skip_res) + break; + start = round_up(skip_res->end, align); + } + } + + return pos_count ? pos : 0; +} + +/* + * Two types of decompressor memory allocations/reserves are considered + * differently. + * + * "Static" or "single" allocations are done via physmem_alloc_range() and + * physmem_reserve(), and they are listed in physmem_info.reserved[]. Each + * type of "static" allocation can only have one allocation per type and + * cannot have chains. + * + * On the other hand, "dynamic" or "repetitive" allocations are done via + * physmem_alloc_top_down(). These allocations are tightly packed together + * top down from the end of online memory. physmem_alloc_pos represents + * current position where those allocations start. + * + * Functions randomize_within_range() and iterate_valid_positions() + * only consider "dynamic" allocations by never looking above + * physmem_alloc_pos. "Static" allocations, however, are explicitly + * considered by checking the "res" (reserves) array. The first + * reserved_range of a "dynamic" allocation may also be checked along the + * way, but it will always be above the maximum value anyway. + */ +unsigned long randomize_within_range(unsigned long size, unsigned long align, + unsigned long min, unsigned long max) +{ + struct reserved_range res[RR_MAX]; + unsigned long max_pos, pos; + + memcpy(res, physmem_info.reserved, sizeof(res)); + sort_reserved_ranges(res, ARRAY_SIZE(res)); + max = min(max, get_physmem_alloc_pos()); + + max_pos = iterate_valid_positions(size, align, min, max, res, ARRAY_SIZE(res), true, 0); + if (!max_pos) + return 0; + if (get_random(max_pos, &pos)) + return 0; + return iterate_valid_positions(size, align, min, max, res, ARRAY_SIZE(res), false, pos + 1); +} diff --git a/arch/s390/boot/machine_kexec_reloc.c b/arch/s390/boot/machine_kexec_reloc.c new file mode 100644 index 0000000000..b7a5d0f720 --- /dev/null +++ b/arch/s390/boot/machine_kexec_reloc.c @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../kernel/machine_kexec_reloc.c" diff --git a/arch/s390/boot/mem.S b/arch/s390/boot/mem.S new file mode 100644 index 0000000000..b33463633f --- /dev/null +++ b/arch/s390/boot/mem.S @@ -0,0 +1,2 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include "../lib/mem.S" diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c new file mode 100644 index 0000000000..97244cd7a2 --- /dev/null +++ b/arch/s390/boot/pgm_check_info.c @@ -0,0 +1,179 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/kernel.h> +#include <linux/stdarg.h> +#include <linux/string.h> +#include <linux/ctype.h> +#include <asm/stacktrace.h> +#include <asm/boot_data.h> +#include <asm/lowcore.h> +#include <asm/setup.h> +#include <asm/sclp.h> +#include <asm/uv.h> +#include "boot.h" + +const char hex_asc[] = "0123456789abcdef"; + +static char *as_hex(char *dst, unsigned long val, int pad) +{ + char *p, *end = p = dst + max(pad, (int)__fls(val | 1) / 4 + 1); + + for (*p-- = 0; p >= dst; val >>= 4) + *p-- = hex_asc[val & 0x0f]; + return end; +} + +static char *symstart(char *p) +{ + while (*p) + p--; + return p + 1; +} + +static noinline char *findsym(unsigned long ip, unsigned short *off, unsigned short *len) +{ + /* symbol entries are in a form "10000 c4 startup\0" */ + char *a = _decompressor_syms_start; + char *b = _decompressor_syms_end; + unsigned long start; + unsigned long size; + char *pivot; + char *endp; + + while (a < b) { + pivot = symstart(a + (b - a) / 2); + start = simple_strtoull(pivot, &endp, 16); + size = simple_strtoull(endp + 1, &endp, 16); + if (ip < start) { + b = pivot; + continue; + } + if (ip > start + size) { + a = pivot + strlen(pivot) + 1; + continue; + } + *off = ip - start; + *len = size; + return endp + 1; + } + return NULL; +} + +static noinline char *strsym(void *ip) +{ + static char buf[64]; + unsigned short off; + unsigned short len; + char *p; + + p = findsym((unsigned long)ip, &off, &len); + if (p) { + strncpy(buf, p, sizeof(buf)); + /* reserve 15 bytes for offset/len in symbol+0x1234/0x1234 */ + p = buf + strnlen(buf, sizeof(buf) - 15); + strcpy(p, "+0x"); + p = as_hex(p + 3, off, 0); + strcpy(p, "/0x"); + as_hex(p + 3, len, 0); + } else { + as_hex(buf, (unsigned long)ip, 16); + } + return buf; +} + +void decompressor_printk(const char *fmt, ...) +{ + char buf[1024] = { 0 }; + char *end = buf + sizeof(buf) - 1; /* make sure buf is 0 terminated */ + unsigned long pad; + char *p = buf; + va_list args; + + va_start(args, fmt); + for (; p < end && *fmt; fmt++) { + if (*fmt != '%') { + *p++ = *fmt; + continue; + } + pad = isdigit(*++fmt) ? simple_strtol(fmt, (char **)&fmt, 10) : 0; + switch (*fmt) { + case 's': + p = buf + strlcat(buf, va_arg(args, char *), sizeof(buf)); + break; + case 'p': + if (*++fmt != 'S') + goto out; + p = buf + strlcat(buf, strsym(va_arg(args, void *)), sizeof(buf)); + break; + case 'l': + if (*++fmt != 'x' || end - p <= max(sizeof(long) * 2, pad)) + goto out; + p = as_hex(p, va_arg(args, unsigned long), pad); + break; + case 'x': + if (end - p <= max(sizeof(int) * 2, pad)) + goto out; + p = as_hex(p, va_arg(args, unsigned int), pad); + break; + default: + goto out; + } + } +out: + va_end(args); + sclp_early_printk(buf); +} + +void print_stacktrace(unsigned long sp) +{ + struct stack_info boot_stack = { STACK_TYPE_TASK, (unsigned long)_stack_start, + (unsigned long)_stack_end }; + bool first = true; + + decompressor_printk("Call Trace:\n"); + while (!(sp & 0x7) && on_stack(&boot_stack, sp, sizeof(struct stack_frame))) { + struct stack_frame *sf = (struct stack_frame *)sp; + + decompressor_printk(first ? "(sp:%016lx [<%016lx>] %pS)\n" : + " sp:%016lx [<%016lx>] %pS\n", + sp, sf->gprs[8], (void *)sf->gprs[8]); + if (sf->back_chain <= sp) + break; + sp = sf->back_chain; + first = false; + } +} + +void print_pgm_check_info(void) +{ + unsigned long *gpregs = (unsigned long *)S390_lowcore.gpregs_save_area; + struct psw_bits *psw = &psw_bits(S390_lowcore.psw_save_area); + + decompressor_printk("Linux version %s\n", kernel_version); + if (!is_prot_virt_guest() && early_command_line[0]) + decompressor_printk("Kernel command line: %s\n", early_command_line); + decompressor_printk("Kernel fault: interruption code %04x ilc:%x\n", + S390_lowcore.pgm_code, S390_lowcore.pgm_ilc >> 1); + if (kaslr_enabled()) + decompressor_printk("Kernel random base: %lx\n", __kaslr_offset); + decompressor_printk("PSW : %016lx %016lx (%pS)\n", + S390_lowcore.psw_save_area.mask, + S390_lowcore.psw_save_area.addr, + (void *)S390_lowcore.psw_save_area.addr); + decompressor_printk( + " R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x P:%x AS:%x CC:%x PM:%x RI:%x EA:%x\n", + psw->per, psw->dat, psw->io, psw->ext, psw->key, psw->mcheck, + psw->wait, psw->pstate, psw->as, psw->cc, psw->pm, psw->ri, + psw->eaba); + decompressor_printk("GPRS: %016lx %016lx %016lx %016lx\n", + gpregs[0], gpregs[1], gpregs[2], gpregs[3]); + decompressor_printk(" %016lx %016lx %016lx %016lx\n", + gpregs[4], gpregs[5], gpregs[6], gpregs[7]); + decompressor_printk(" %016lx %016lx %016lx %016lx\n", + gpregs[8], gpregs[9], gpregs[10], gpregs[11]); + decompressor_printk(" %016lx %016lx %016lx %016lx\n", + gpregs[12], gpregs[13], gpregs[14], gpregs[15]); + print_stacktrace(S390_lowcore.gpregs_save_area[15]); + decompressor_printk("Last Breaking-Event-Address:\n"); + decompressor_printk(" [<%016lx>] %pS\n", (unsigned long)S390_lowcore.pgm_last_break, + (void *)S390_lowcore.pgm_last_break); +} diff --git a/arch/s390/boot/physmem_info.c b/arch/s390/boot/physmem_info.c new file mode 100644 index 0000000000..0cf79826ee --- /dev/null +++ b/arch/s390/boot/physmem_info.c @@ -0,0 +1,328 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/processor.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <asm/physmem_info.h> +#include <asm/stacktrace.h> +#include <asm/boot_data.h> +#include <asm/sparsemem.h> +#include <asm/sections.h> +#include <asm/setup.h> +#include <asm/sclp.h> +#include <asm/uv.h> +#include "decompressor.h" +#include "boot.h" + +struct physmem_info __bootdata(physmem_info); +static unsigned int physmem_alloc_ranges; +static unsigned long physmem_alloc_pos; + +/* up to 256 storage elements, 1020 subincrements each */ +#define ENTRIES_EXTENDED_MAX \ + (256 * (1020 / 2) * sizeof(struct physmem_range)) + +static struct physmem_range *__get_physmem_range_ptr(u32 n) +{ + if (n < MEM_INLINED_ENTRIES) + return &physmem_info.online[n]; + if (unlikely(!physmem_info.online_extended)) { + physmem_info.online_extended = (struct physmem_range *)physmem_alloc_range( + RR_MEM_DETECT_EXTENDED, ENTRIES_EXTENDED_MAX, sizeof(long), 0, + physmem_alloc_pos, true); + } + return &physmem_info.online_extended[n - MEM_INLINED_ENTRIES]; +} + +/* + * sequential calls to add_physmem_online_range with adjacent memory ranges + * are merged together into single memory range. + */ +void add_physmem_online_range(u64 start, u64 end) +{ + struct physmem_range *range; + + if (physmem_info.range_count) { + range = __get_physmem_range_ptr(physmem_info.range_count - 1); + if (range->end == start) { + range->end = end; + return; + } + } + + range = __get_physmem_range_ptr(physmem_info.range_count); + range->start = start; + range->end = end; + physmem_info.range_count++; +} + +static int __diag260(unsigned long rx1, unsigned long rx2) +{ + unsigned long reg1, reg2, ry; + union register_pair rx; + psw_t old; + int rc; + + rx.even = rx1; + rx.odd = rx2; + ry = 0x10; /* storage configuration */ + rc = -1; /* fail */ + asm volatile( + " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n" + " epsw %[reg1],%[reg2]\n" + " st %[reg1],0(%[psw_pgm])\n" + " st %[reg2],4(%[psw_pgm])\n" + " larl %[reg1],1f\n" + " stg %[reg1],8(%[psw_pgm])\n" + " diag %[rx],%[ry],0x260\n" + " ipm %[rc]\n" + " srl %[rc],28\n" + "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n" + : [reg1] "=&d" (reg1), + [reg2] "=&a" (reg2), + [rc] "+&d" (rc), + [ry] "+&d" (ry), + "+Q" (S390_lowcore.program_new_psw), + "=Q" (old) + : [rx] "d" (rx.pair), + [psw_old] "a" (&old), + [psw_pgm] "a" (&S390_lowcore.program_new_psw) + : "cc", "memory"); + return rc == 0 ? ry : -1; +} + +static int diag260(void) +{ + int rc, i; + + struct { + unsigned long start; + unsigned long end; + } storage_extents[8] __aligned(16); /* VM supports up to 8 extends */ + + memset(storage_extents, 0, sizeof(storage_extents)); + rc = __diag260((unsigned long)storage_extents, sizeof(storage_extents)); + if (rc == -1) + return -1; + + for (i = 0; i < min_t(int, rc, ARRAY_SIZE(storage_extents)); i++) + add_physmem_online_range(storage_extents[i].start, storage_extents[i].end + 1); + return 0; +} + +static int tprot(unsigned long addr) +{ + unsigned long reg1, reg2; + int rc = -EFAULT; + psw_t old; + + asm volatile( + " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n" + " epsw %[reg1],%[reg2]\n" + " st %[reg1],0(%[psw_pgm])\n" + " st %[reg2],4(%[psw_pgm])\n" + " larl %[reg1],1f\n" + " stg %[reg1],8(%[psw_pgm])\n" + " tprot 0(%[addr]),0\n" + " ipm %[rc]\n" + " srl %[rc],28\n" + "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n" + : [reg1] "=&d" (reg1), + [reg2] "=&a" (reg2), + [rc] "+&d" (rc), + "=Q" (S390_lowcore.program_new_psw.addr), + "=Q" (old) + : [psw_old] "a" (&old), + [psw_pgm] "a" (&S390_lowcore.program_new_psw), + [addr] "a" (addr) + : "cc", "memory"); + return rc; +} + +static unsigned long search_mem_end(void) +{ + unsigned long range = 1 << (MAX_PHYSMEM_BITS - 20); /* in 1MB blocks */ + unsigned long offset = 0; + unsigned long pivot; + + while (range > 1) { + range >>= 1; + pivot = offset + range; + if (!tprot(pivot << 20)) + offset = pivot; + } + return (offset + 1) << 20; +} + +unsigned long detect_max_physmem_end(void) +{ + unsigned long max_physmem_end = 0; + + if (!sclp_early_get_memsize(&max_physmem_end)) { + physmem_info.info_source = MEM_DETECT_SCLP_READ_INFO; + } else { + max_physmem_end = search_mem_end(); + physmem_info.info_source = MEM_DETECT_BIN_SEARCH; + } + return max_physmem_end; +} + +void detect_physmem_online_ranges(unsigned long max_physmem_end) +{ + if (!sclp_early_read_storage_info()) { + physmem_info.info_source = MEM_DETECT_SCLP_STOR_INFO; + } else if (!diag260()) { + physmem_info.info_source = MEM_DETECT_DIAG260; + } else if (max_physmem_end) { + add_physmem_online_range(0, max_physmem_end); + } +} + +void physmem_set_usable_limit(unsigned long limit) +{ + physmem_info.usable = limit; + physmem_alloc_pos = limit; +} + +static void die_oom(unsigned long size, unsigned long align, unsigned long min, unsigned long max) +{ + unsigned long start, end, total_mem = 0, total_reserved_mem = 0; + struct reserved_range *range; + enum reserved_range_type t; + int i; + + decompressor_printk("Linux version %s\n", kernel_version); + if (!is_prot_virt_guest() && early_command_line[0]) + decompressor_printk("Kernel command line: %s\n", early_command_line); + decompressor_printk("Out of memory allocating %lx bytes %lx aligned in range %lx:%lx\n", + size, align, min, max); + decompressor_printk("Reserved memory ranges:\n"); + for_each_physmem_reserved_range(t, range, &start, &end) { + decompressor_printk("%016lx %016lx %s\n", start, end, get_rr_type_name(t)); + total_reserved_mem += end - start; + } + decompressor_printk("Usable online memory ranges (info source: %s [%x]):\n", + get_physmem_info_source(), physmem_info.info_source); + for_each_physmem_usable_range(i, &start, &end) { + decompressor_printk("%016lx %016lx\n", start, end); + total_mem += end - start; + } + decompressor_printk("Usable online memory total: %lx Reserved: %lx Free: %lx\n", + total_mem, total_reserved_mem, + total_mem > total_reserved_mem ? total_mem - total_reserved_mem : 0); + print_stacktrace(current_frame_address()); + sclp_early_printk("\n\n -- System halted\n"); + disabled_wait(); +} + +void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size) +{ + physmem_info.reserved[type].start = addr; + physmem_info.reserved[type].end = addr + size; +} + +void physmem_free(enum reserved_range_type type) +{ + physmem_info.reserved[type].start = 0; + physmem_info.reserved[type].end = 0; +} + +static bool __physmem_alloc_intersects(unsigned long addr, unsigned long size, + unsigned long *intersection_start) +{ + unsigned long res_addr, res_size; + int t; + + for (t = 0; t < RR_MAX; t++) { + if (!get_physmem_reserved(t, &res_addr, &res_size)) + continue; + if (intersects(addr, size, res_addr, res_size)) { + *intersection_start = res_addr; + return true; + } + } + return ipl_report_certs_intersects(addr, size, intersection_start); +} + +static unsigned long __physmem_alloc_range(unsigned long size, unsigned long align, + unsigned long min, unsigned long max, + unsigned int from_ranges, unsigned int *ranges_left, + bool die_on_oom) +{ + unsigned int nranges = from_ranges ?: physmem_info.range_count; + unsigned long range_start, range_end; + unsigned long intersection_start; + unsigned long addr, pos = max; + + align = max(align, 8UL); + while (nranges) { + __get_physmem_range(nranges - 1, &range_start, &range_end, false); + pos = min(range_end, pos); + + if (round_up(min, align) + size > pos) + break; + addr = round_down(pos - size, align); + if (range_start > addr) { + nranges--; + continue; + } + if (__physmem_alloc_intersects(addr, size, &intersection_start)) { + pos = intersection_start; + continue; + } + + if (ranges_left) + *ranges_left = nranges; + return addr; + } + if (die_on_oom) + die_oom(size, align, min, max); + return 0; +} + +unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long size, + unsigned long align, unsigned long min, unsigned long max, + bool die_on_oom) +{ + unsigned long addr; + + max = min(max, physmem_alloc_pos); + addr = __physmem_alloc_range(size, align, min, max, 0, NULL, die_on_oom); + if (addr) + physmem_reserve(type, addr, size); + return addr; +} + +unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned long size, + unsigned long align) +{ + struct reserved_range *range = &physmem_info.reserved[type]; + struct reserved_range *new_range; + unsigned int ranges_left; + unsigned long addr; + + addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos, physmem_alloc_ranges, + &ranges_left, true); + /* if not a consecutive allocation of the same type or first allocation */ + if (range->start != addr + size) { + if (range->end) { + physmem_alloc_pos = __physmem_alloc_range( + sizeof(struct reserved_range), 0, 0, physmem_alloc_pos, + physmem_alloc_ranges, &ranges_left, true); + new_range = (struct reserved_range *)physmem_alloc_pos; + *new_range = *range; + range->chain = new_range; + addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos, + ranges_left, &ranges_left, true); + } + range->end = addr + size; + } + range->start = addr; + physmem_alloc_pos = addr; + physmem_alloc_ranges = ranges_left; + return addr; +} + +unsigned long get_physmem_alloc_pos(void) +{ + return physmem_alloc_pos; +} diff --git a/arch/s390/boot/sclp_early_core.c b/arch/s390/boot/sclp_early_core.c new file mode 100644 index 0000000000..6f30646afb --- /dev/null +++ b/arch/s390/boot/sclp_early_core.c @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "boot.h" +#include "../../../drivers/s390/char/sclp_early_core.c" + +/* SCLP early buffer must stay page-aligned and below 2GB */ +static char __sclp_early_sccb[EXT_SCCB_READ_SCP] __aligned(PAGE_SIZE); + +void sclp_early_setup_buffer(void) +{ + sclp_early_set_buffer(&__sclp_early_sccb); +} diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c new file mode 100644 index 0000000000..d3e48bd9c3 --- /dev/null +++ b/arch/s390/boot/startup.c @@ -0,0 +1,378 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/string.h> +#include <linux/elf.h> +#include <asm/boot_data.h> +#include <asm/sections.h> +#include <asm/maccess.h> +#include <asm/cpu_mf.h> +#include <asm/setup.h> +#include <asm/kasan.h> +#include <asm/kexec.h> +#include <asm/sclp.h> +#include <asm/diag.h> +#include <asm/uv.h> +#include <asm/abs_lowcore.h> +#include <asm/physmem_info.h> +#include "decompressor.h" +#include "boot.h" +#include "uv.h" + +unsigned long __bootdata_preserved(__kaslr_offset); +unsigned long __bootdata_preserved(__abs_lowcore); +unsigned long __bootdata_preserved(__memcpy_real_area); +pte_t *__bootdata_preserved(memcpy_real_ptep); +unsigned long __bootdata_preserved(VMALLOC_START); +unsigned long __bootdata_preserved(VMALLOC_END); +struct page *__bootdata_preserved(vmemmap); +unsigned long __bootdata_preserved(vmemmap_size); +unsigned long __bootdata_preserved(MODULES_VADDR); +unsigned long __bootdata_preserved(MODULES_END); +unsigned long __bootdata_preserved(max_mappable); +unsigned long __bootdata(ident_map_size); + +u64 __bootdata_preserved(stfle_fac_list[16]); +u64 __bootdata_preserved(alt_stfle_fac_list[16]); +struct oldmem_data __bootdata_preserved(oldmem_data); + +struct machine_info machine; + +void error(char *x) +{ + sclp_early_printk("\n\n"); + sclp_early_printk(x); + sclp_early_printk("\n\n -- System halted"); + + disabled_wait(); +} + +static void detect_facilities(void) +{ + if (test_facility(8)) { + machine.has_edat1 = 1; + __ctl_set_bit(0, 23); + } + if (test_facility(78)) + machine.has_edat2 = 1; + if (test_facility(130)) + machine.has_nx = 1; +} + +static void setup_lpp(void) +{ + S390_lowcore.current_pid = 0; + S390_lowcore.lpp = LPP_MAGIC; + if (test_facility(40)) + lpp(&S390_lowcore.lpp); +} + +#ifdef CONFIG_KERNEL_UNCOMPRESSED +unsigned long mem_safe_offset(void) +{ + return vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size; +} +#endif + +static void rescue_initrd(unsigned long min, unsigned long max) +{ + unsigned long old_addr, addr, size; + + if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD)) + return; + if (!get_physmem_reserved(RR_INITRD, &addr, &size)) + return; + if (addr >= min && addr + size <= max) + return; + old_addr = addr; + physmem_free(RR_INITRD); + addr = physmem_alloc_top_down(RR_INITRD, size, 0); + memmove((void *)addr, (void *)old_addr, size); +} + +static void copy_bootdata(void) +{ + if (__boot_data_end - __boot_data_start != vmlinux.bootdata_size) + error(".boot.data section size mismatch"); + memcpy((void *)vmlinux.bootdata_off, __boot_data_start, vmlinux.bootdata_size); + if (__boot_data_preserved_end - __boot_data_preserved_start != vmlinux.bootdata_preserved_size) + error(".boot.preserved.data section size mismatch"); + memcpy((void *)vmlinux.bootdata_preserved_off, __boot_data_preserved_start, vmlinux.bootdata_preserved_size); +} + +static void handle_relocs(unsigned long offset) +{ + Elf64_Rela *rela_start, *rela_end, *rela; + int r_type, r_sym, rc; + Elf64_Addr loc, val; + Elf64_Sym *dynsym; + + rela_start = (Elf64_Rela *) vmlinux.rela_dyn_start; + rela_end = (Elf64_Rela *) vmlinux.rela_dyn_end; + dynsym = (Elf64_Sym *) vmlinux.dynsym_start; + for (rela = rela_start; rela < rela_end; rela++) { + loc = rela->r_offset + offset; + val = rela->r_addend; + r_sym = ELF64_R_SYM(rela->r_info); + if (r_sym) { + if (dynsym[r_sym].st_shndx != SHN_UNDEF) + val += dynsym[r_sym].st_value + offset; + } else { + /* + * 0 == undefined symbol table index (STN_UNDEF), + * used for R_390_RELATIVE, only add KASLR offset + */ + val += offset; + } + r_type = ELF64_R_TYPE(rela->r_info); + rc = arch_kexec_do_relocs(r_type, (void *) loc, val, 0); + if (rc) + error("Unknown relocation type"); + } +} + +/* + * Merge information from several sources into a single ident_map_size value. + * "ident_map_size" represents the upper limit of physical memory we may ever + * reach. It might not be all online memory, but also include standby (offline) + * memory. "ident_map_size" could be lower then actual standby or even online + * memory present, due to limiting factors. We should never go above this limit. + * It is the size of our identity mapping. + * + * Consider the following factors: + * 1. max_physmem_end - end of physical memory online or standby. + * Always >= end of the last online memory range (get_physmem_online_end()). + * 2. CONFIG_MAX_PHYSMEM_BITS - the maximum size of physical memory the + * kernel is able to support. + * 3. "mem=" kernel command line option which limits physical memory usage. + * 4. OLDMEM_BASE which is a kdump memory limit when the kernel is executed as + * crash kernel. + * 5. "hsa" size which is a memory limit when the kernel is executed during + * zfcp/nvme dump. + */ +static void setup_ident_map_size(unsigned long max_physmem_end) +{ + unsigned long hsa_size; + + ident_map_size = max_physmem_end; + if (memory_limit) + ident_map_size = min(ident_map_size, memory_limit); + ident_map_size = min(ident_map_size, 1UL << MAX_PHYSMEM_BITS); + +#ifdef CONFIG_CRASH_DUMP + if (oldmem_data.start) { + __kaslr_enabled = 0; + ident_map_size = min(ident_map_size, oldmem_data.size); + } else if (ipl_block_valid && is_ipl_block_dump()) { + __kaslr_enabled = 0; + if (!sclp_early_get_hsa_size(&hsa_size) && hsa_size) + ident_map_size = min(ident_map_size, hsa_size); + } +#endif +} + +static unsigned long setup_kernel_memory_layout(void) +{ + unsigned long vmemmap_start; + unsigned long asce_limit; + unsigned long rte_size; + unsigned long pages; + unsigned long vsize; + unsigned long vmax; + + pages = ident_map_size / PAGE_SIZE; + /* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */ + vmemmap_size = SECTION_ALIGN_UP(pages) * sizeof(struct page); + + /* choose kernel address space layout: 4 or 3 levels. */ + vsize = round_up(ident_map_size, _REGION3_SIZE) + vmemmap_size + + MODULES_LEN + MEMCPY_REAL_SIZE + ABS_LOWCORE_MAP_SIZE; + vsize = size_add(vsize, vmalloc_size); + if (IS_ENABLED(CONFIG_KASAN) || (vsize > _REGION2_SIZE)) { + asce_limit = _REGION1_SIZE; + rte_size = _REGION2_SIZE; + } else { + asce_limit = _REGION2_SIZE; + rte_size = _REGION3_SIZE; + } + + /* + * Forcing modules and vmalloc area under the ultravisor + * secure storage limit, so that any vmalloc allocation + * we do could be used to back secure guest storage. + */ + vmax = adjust_to_uv_max(asce_limit); +#ifdef CONFIG_KASAN + /* force vmalloc and modules below kasan shadow */ + vmax = min(vmax, KASAN_SHADOW_START); +#endif + __memcpy_real_area = round_down(vmax - MEMCPY_REAL_SIZE, PAGE_SIZE); + __abs_lowcore = round_down(__memcpy_real_area - ABS_LOWCORE_MAP_SIZE, + sizeof(struct lowcore)); + MODULES_END = round_down(__abs_lowcore, _SEGMENT_SIZE); + MODULES_VADDR = MODULES_END - MODULES_LEN; + VMALLOC_END = MODULES_VADDR; + + /* allow vmalloc area to occupy up to about 1/2 of the rest virtual space left */ + vmalloc_size = min(vmalloc_size, round_down(VMALLOC_END / 2, _REGION3_SIZE)); + VMALLOC_START = VMALLOC_END - vmalloc_size; + + /* split remaining virtual space between 1:1 mapping & vmemmap array */ + pages = VMALLOC_START / (PAGE_SIZE + sizeof(struct page)); + pages = SECTION_ALIGN_UP(pages); + /* keep vmemmap_start aligned to a top level region table entry */ + vmemmap_start = round_down(VMALLOC_START - pages * sizeof(struct page), rte_size); + vmemmap_start = min(vmemmap_start, 1UL << MAX_PHYSMEM_BITS); + /* maximum mappable address as seen by arch_get_mappable_range() */ + max_mappable = vmemmap_start; + /* make sure identity map doesn't overlay with vmemmap */ + ident_map_size = min(ident_map_size, vmemmap_start); + vmemmap_size = SECTION_ALIGN_UP(ident_map_size / PAGE_SIZE) * sizeof(struct page); + /* make sure vmemmap doesn't overlay with vmalloc area */ + VMALLOC_START = max(vmemmap_start + vmemmap_size, VMALLOC_START); + vmemmap = (struct page *)vmemmap_start; + + return asce_limit; +} + +/* + * This function clears the BSS section of the decompressed Linux kernel and NOT the decompressor's. + */ +static void clear_bss_section(unsigned long vmlinux_lma) +{ + memset((void *)vmlinux_lma + vmlinux.image_size, 0, vmlinux.bss_size); +} + +/* + * Set vmalloc area size to an 8th of (potential) physical memory + * size, unless size has been set by kernel command line parameter. + */ +static void setup_vmalloc_size(void) +{ + unsigned long size; + + if (vmalloc_size_set) + return; + size = round_up(ident_map_size / 8, _SEGMENT_SIZE); + vmalloc_size = max(size, vmalloc_size); +} + +static void offset_vmlinux_info(unsigned long offset) +{ + *(unsigned long *)(&vmlinux.entry) += offset; + vmlinux.bootdata_off += offset; + vmlinux.bootdata_preserved_off += offset; + vmlinux.rela_dyn_start += offset; + vmlinux.rela_dyn_end += offset; + vmlinux.dynsym_start += offset; + vmlinux.init_mm_off += offset; + vmlinux.swapper_pg_dir_off += offset; + vmlinux.invalid_pg_dir_off += offset; +#ifdef CONFIG_KASAN + vmlinux.kasan_early_shadow_page_off += offset; + vmlinux.kasan_early_shadow_pte_off += offset; + vmlinux.kasan_early_shadow_pmd_off += offset; + vmlinux.kasan_early_shadow_pud_off += offset; + vmlinux.kasan_early_shadow_p4d_off += offset; +#endif +} + +void startup_kernel(void) +{ + unsigned long max_physmem_end; + unsigned long vmlinux_lma = 0; + unsigned long amode31_lma = 0; + unsigned long asce_limit; + unsigned long safe_addr; + void *img; + psw_t psw; + + setup_lpp(); + safe_addr = mem_safe_offset(); + + /* + * Reserve decompressor memory together with decompression heap, buffer and + * memory which might be occupied by uncompressed kernel at default 1Mb + * position (if KASLR is off or failed). + */ + physmem_reserve(RR_DECOMPRESSOR, 0, safe_addr); + if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && parmarea.initrd_size) + physmem_reserve(RR_INITRD, parmarea.initrd_start, parmarea.initrd_size); + oldmem_data.start = parmarea.oldmem_base; + oldmem_data.size = parmarea.oldmem_size; + + store_ipl_parmblock(); + read_ipl_report(); + uv_query_info(); + sclp_early_read_info(); + setup_boot_command_line(); + parse_boot_command_line(); + detect_facilities(); + sanitize_prot_virt_host(); + max_physmem_end = detect_max_physmem_end(); + setup_ident_map_size(max_physmem_end); + setup_vmalloc_size(); + asce_limit = setup_kernel_memory_layout(); + /* got final ident_map_size, physmem allocations could be performed now */ + physmem_set_usable_limit(ident_map_size); + detect_physmem_online_ranges(max_physmem_end); + save_ipl_cert_comp_list(); + rescue_initrd(safe_addr, ident_map_size); + + if (kaslr_enabled()) { + vmlinux_lma = randomize_within_range(vmlinux.image_size + vmlinux.bss_size, + THREAD_SIZE, vmlinux.default_lma, + ident_map_size); + if (vmlinux_lma) { + __kaslr_offset = vmlinux_lma - vmlinux.default_lma; + offset_vmlinux_info(__kaslr_offset); + } + } + vmlinux_lma = vmlinux_lma ?: vmlinux.default_lma; + physmem_reserve(RR_VMLINUX, vmlinux_lma, vmlinux.image_size + vmlinux.bss_size); + + if (!IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED)) { + img = decompress_kernel(); + memmove((void *)vmlinux_lma, img, vmlinux.image_size); + } else if (__kaslr_offset) { + img = (void *)vmlinux.default_lma; + memmove((void *)vmlinux_lma, img, vmlinux.image_size); + memset(img, 0, vmlinux.image_size); + } + + /* vmlinux decompression is done, shrink reserved low memory */ + physmem_reserve(RR_DECOMPRESSOR, 0, (unsigned long)_decompressor_end); + if (kaslr_enabled()) + amode31_lma = randomize_within_range(vmlinux.amode31_size, PAGE_SIZE, 0, SZ_2G); + amode31_lma = amode31_lma ?: vmlinux.default_lma - vmlinux.amode31_size; + physmem_reserve(RR_AMODE31, amode31_lma, vmlinux.amode31_size); + + /* + * The order of the following operations is important: + * + * - handle_relocs() must follow clear_bss_section() to establish static + * memory references to data in .bss to be used by setup_vmem() + * (i.e init_mm.pgd) + * + * - setup_vmem() must follow handle_relocs() to be able using + * static memory references to data in .bss (i.e init_mm.pgd) + * + * - copy_bootdata() must follow setup_vmem() to propagate changes to + * bootdata made by setup_vmem() + */ + clear_bss_section(vmlinux_lma); + handle_relocs(__kaslr_offset); + setup_vmem(asce_limit); + copy_bootdata(); + + /* + * Save KASLR offset for early dumps, before vmcore_info is set. + * Mark as uneven to distinguish from real vmcore_info pointer. + */ + S390_lowcore.vmcore_info = __kaslr_offset ? __kaslr_offset | 0x1UL : 0; + + /* + * Jump to the decompressed kernel entry point and switch DAT mode on. + */ + psw.addr = vmlinux.entry; + psw.mask = PSW_KERNEL_BITS; + __load_psw(psw); +} diff --git a/arch/s390/boot/string.c b/arch/s390/boot/string.c new file mode 100644 index 0000000000..faccb33b46 --- /dev/null +++ b/arch/s390/boot/string.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/ctype.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#undef CONFIG_KASAN +#undef CONFIG_KASAN_GENERIC +#include "../lib/string.c" + +int strncmp(const char *cs, const char *ct, size_t count) +{ + unsigned char c1, c2; + + while (count) { + c1 = *cs++; + c2 = *ct++; + if (c1 != c2) + return c1 < c2 ? -1 : 1; + if (!c1) + break; + count--; + } + return 0; +} + +char *skip_spaces(const char *str) +{ + while (isspace(*str)) + ++str; + return (char *)str; +} + +char *strim(char *s) +{ + size_t size; + char *end; + + size = strlen(s); + if (!size) + return s; + + end = s + size - 1; + while (end >= s && isspace(*end)) + end--; + *(end + 1) = '\0'; + + return skip_spaces(s); +} + +/* Works only for digits and letters, but small and fast */ +#define TOLOWER(x) ((x) | 0x20) + +static unsigned int simple_guess_base(const char *cp) +{ + if (cp[0] == '0') { + if (TOLOWER(cp[1]) == 'x' && isxdigit(cp[2])) + return 16; + else + return 8; + } else { + return 10; + } +} + +/** + * simple_strtoull - convert a string to an unsigned long long + * @cp: The start of the string + * @endp: A pointer to the end of the parsed string will be placed here + * @base: The number base to use + */ + +unsigned long long simple_strtoull(const char *cp, char **endp, + unsigned int base) +{ + unsigned long long result = 0; + + if (!base) + base = simple_guess_base(cp); + + if (base == 16 && cp[0] == '0' && TOLOWER(cp[1]) == 'x') + cp += 2; + + while (isxdigit(*cp)) { + unsigned int value; + + value = isdigit(*cp) ? *cp - '0' : TOLOWER(*cp) - 'a' + 10; + if (value >= base) + break; + result = result * base + value; + cp++; + } + if (endp) + *endp = (char *)cp; + + return result; +} + +long simple_strtol(const char *cp, char **endp, unsigned int base) +{ + if (*cp == '-') + return -simple_strtoull(cp + 1, endp, base); + + return simple_strtoull(cp, endp, base); +} + +int kstrtobool(const char *s, bool *res) +{ + if (!s) + return -EINVAL; + + switch (s[0]) { + case 'y': + case 'Y': + case '1': + *res = true; + return 0; + case 'n': + case 'N': + case '0': + *res = false; + return 0; + case 'o': + case 'O': + switch (s[1]) { + case 'n': + case 'N': + *res = true; + return 0; + case 'f': + case 'F': + *res = false; + return 0; + default: + break; + } + default: + break; + } + + return -EINVAL; +} diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c new file mode 100644 index 0000000000..1e66d2cbb0 --- /dev/null +++ b/arch/s390/boot/uv.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <asm/uv.h> +#include <asm/boot_data.h> +#include <asm/facility.h> +#include <asm/sections.h> + +#include "boot.h" +#include "uv.h" + +/* will be used in arch/s390/kernel/uv.c */ +#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST +int __bootdata_preserved(prot_virt_guest); +#endif +#if IS_ENABLED(CONFIG_KVM) +int __bootdata_preserved(prot_virt_host); +#endif +struct uv_info __bootdata_preserved(uv_info); + +void uv_query_info(void) +{ + struct uv_cb_qui uvcb = { + .header.cmd = UVC_CMD_QUI, + .header.len = sizeof(uvcb) + }; + + if (!test_facility(158)) + return; + + /* rc==0x100 means that there is additional data we do not process */ + if (uv_call(0, (uint64_t)&uvcb) && uvcb.header.rc != 0x100) + return; + + if (IS_ENABLED(CONFIG_KVM)) { + memcpy(uv_info.inst_calls_list, uvcb.inst_calls_list, sizeof(uv_info.inst_calls_list)); + uv_info.uv_base_stor_len = uvcb.uv_base_stor_len; + uv_info.guest_base_stor_len = uvcb.conf_base_phys_stor_len; + uv_info.guest_virt_base_stor_len = uvcb.conf_base_virt_stor_len; + uv_info.guest_virt_var_stor_len = uvcb.conf_virt_var_stor_len; + uv_info.guest_cpu_stor_len = uvcb.cpu_stor_len; + uv_info.max_sec_stor_addr = ALIGN(uvcb.max_guest_stor_addr, PAGE_SIZE); + uv_info.max_num_sec_conf = uvcb.max_num_sec_conf; + uv_info.max_guest_cpu_id = uvcb.max_guest_cpu_id; + uv_info.uv_feature_indications = uvcb.uv_feature_indications; + uv_info.supp_se_hdr_ver = uvcb.supp_se_hdr_versions; + uv_info.supp_se_hdr_pcf = uvcb.supp_se_hdr_pcf; + uv_info.conf_dump_storage_state_len = uvcb.conf_dump_storage_state_len; + uv_info.conf_dump_finalize_len = uvcb.conf_dump_finalize_len; + uv_info.supp_att_req_hdr_ver = uvcb.supp_att_req_hdr_ver; + uv_info.supp_att_pflags = uvcb.supp_att_pflags; + uv_info.supp_add_secret_req_ver = uvcb.supp_add_secret_req_ver; + uv_info.supp_add_secret_pcf = uvcb.supp_add_secret_pcf; + uv_info.supp_secret_types = uvcb.supp_secret_types; + uv_info.max_secrets = uvcb.max_secrets; + } + +#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST + if (test_bit_inv(BIT_UVC_CMD_SET_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list) && + test_bit_inv(BIT_UVC_CMD_REMOVE_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list)) + prot_virt_guest = 1; +#endif +} + +#if IS_ENABLED(CONFIG_KVM) +unsigned long adjust_to_uv_max(unsigned long limit) +{ + if (is_prot_virt_host() && uv_info.max_sec_stor_addr) + limit = min_t(unsigned long, limit, uv_info.max_sec_stor_addr); + return limit; +} + +static int is_prot_virt_host_capable(void) +{ + /* disable if no prot_virt=1 given on command-line */ + if (!is_prot_virt_host()) + return 0; + /* disable if protected guest virtualization is enabled */ + if (is_prot_virt_guest()) + return 0; + /* disable if no hardware support */ + if (!test_facility(158)) + return 0; + /* disable if kdump */ + if (oldmem_data.start) + return 0; + /* disable if stand-alone dump */ + if (ipl_block_valid && is_ipl_block_dump()) + return 0; + return 1; +} + +void sanitize_prot_virt_host(void) +{ + prot_virt_host = is_prot_virt_host_capable(); +} +#endif diff --git a/arch/s390/boot/uv.h b/arch/s390/boot/uv.h new file mode 100644 index 0000000000..0f3070856f --- /dev/null +++ b/arch/s390/boot/uv.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef BOOT_UV_H +#define BOOT_UV_H + +#if IS_ENABLED(CONFIG_KVM) +unsigned long adjust_to_uv_max(unsigned long limit); +void sanitize_prot_virt_host(void); +#else +static inline unsigned long adjust_to_uv_max(unsigned long limit) +{ + return limit; +} +static inline void sanitize_prot_virt_host(void) {} +#endif + +#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM) +void uv_query_info(void); +#else +static inline void uv_query_info(void) {} +#endif + +#endif /* BOOT_UV_H */ diff --git a/arch/s390/boot/version.c b/arch/s390/boot/version.c new file mode 100644 index 0000000000..fd32f03877 --- /dev/null +++ b/arch/s390/boot/version.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <generated/utsversion.h> +#include <generated/utsrelease.h> +#include <generated/compile.h> +#include "boot.h" + +const char kernel_version[] = UTS_RELEASE + " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") " UTS_VERSION; diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c new file mode 100644 index 0000000000..442a74f113 --- /dev/null +++ b/arch/s390/boot/vmem.c @@ -0,0 +1,458 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/sched/task.h> +#include <linux/pgtable.h> +#include <linux/kasan.h> +#include <asm/pgalloc.h> +#include <asm/facility.h> +#include <asm/sections.h> +#include <asm/physmem_info.h> +#include <asm/maccess.h> +#include <asm/abs_lowcore.h> +#include "decompressor.h" +#include "boot.h" + +unsigned long __bootdata_preserved(s390_invalid_asce); + +#ifdef CONFIG_PROC_FS +atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]); +#endif + +#define init_mm (*(struct mm_struct *)vmlinux.init_mm_off) +#define swapper_pg_dir vmlinux.swapper_pg_dir_off +#define invalid_pg_dir vmlinux.invalid_pg_dir_off + +enum populate_mode { + POPULATE_NONE, + POPULATE_DIRECT, + POPULATE_ABS_LOWCORE, +#ifdef CONFIG_KASAN + POPULATE_KASAN_MAP_SHADOW, + POPULATE_KASAN_ZERO_SHADOW, + POPULATE_KASAN_SHALLOW +#endif +}; + +static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode); + +#ifdef CONFIG_KASAN + +#define kasan_early_shadow_page vmlinux.kasan_early_shadow_page_off +#define kasan_early_shadow_pte ((pte_t *)vmlinux.kasan_early_shadow_pte_off) +#define kasan_early_shadow_pmd ((pmd_t *)vmlinux.kasan_early_shadow_pmd_off) +#define kasan_early_shadow_pud ((pud_t *)vmlinux.kasan_early_shadow_pud_off) +#define kasan_early_shadow_p4d ((p4d_t *)vmlinux.kasan_early_shadow_p4d_off) +#define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x)) + +static pte_t pte_z; + +static inline void kasan_populate(unsigned long start, unsigned long end, enum populate_mode mode) +{ + start = PAGE_ALIGN_DOWN(__sha(start)); + end = PAGE_ALIGN(__sha(end)); + pgtable_populate(start, end, mode); +} + +static void kasan_populate_shadow(void) +{ + pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY); + pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY); + p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY); + unsigned long memgap_start = 0; + unsigned long untracked_end; + unsigned long start, end; + int i; + + pte_z = __pte(__pa(kasan_early_shadow_page) | pgprot_val(PAGE_KERNEL_RO)); + if (!machine.has_nx) + pte_z = clear_pte_bit(pte_z, __pgprot(_PAGE_NOEXEC)); + crst_table_init((unsigned long *)kasan_early_shadow_p4d, p4d_val(p4d_z)); + crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z)); + crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z)); + memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE); + + /* + * Current memory layout: + * +- 0 -------------+ +- shadow start -+ + * |1:1 ident mapping| /|1/8 of ident map| + * | | / | | + * +-end of ident map+ / +----------------+ + * | ... gap ... | / | kasan | + * | | / | zero page | + * +- vmalloc area -+ / | mapping | + * | vmalloc_size | / | (untracked) | + * +- modules vaddr -+ / +----------------+ + * | 2Gb |/ | unmapped | allocated per module + * +- shadow start -+ +----------------+ + * | 1/8 addr space | | zero pg mapping| (untracked) + * +- shadow end ----+---------+- shadow end ---+ + * + * Current memory layout (KASAN_VMALLOC): + * +- 0 -------------+ +- shadow start -+ + * |1:1 ident mapping| /|1/8 of ident map| + * | | / | | + * +-end of ident map+ / +----------------+ + * | ... gap ... | / | kasan zero page| (untracked) + * | | / | mapping | + * +- vmalloc area -+ / +----------------+ + * | vmalloc_size | / |shallow populate| + * +- modules vaddr -+ / +----------------+ + * | 2Gb |/ |shallow populate| + * +- shadow start -+ +----------------+ + * | 1/8 addr space | | zero pg mapping| (untracked) + * +- shadow end ----+---------+- shadow end ---+ + */ + + for_each_physmem_usable_range(i, &start, &end) { + kasan_populate(start, end, POPULATE_KASAN_MAP_SHADOW); + if (memgap_start && physmem_info.info_source == MEM_DETECT_DIAG260) + kasan_populate(memgap_start, start, POPULATE_KASAN_ZERO_SHADOW); + memgap_start = end; + } + if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { + untracked_end = VMALLOC_START; + /* shallowly populate kasan shadow for vmalloc and modules */ + kasan_populate(VMALLOC_START, MODULES_END, POPULATE_KASAN_SHALLOW); + } else { + untracked_end = MODULES_VADDR; + } + /* populate kasan shadow for untracked memory */ + kasan_populate(ident_map_size, untracked_end, POPULATE_KASAN_ZERO_SHADOW); + kasan_populate(MODULES_END, _REGION1_SIZE, POPULATE_KASAN_ZERO_SHADOW); +} + +static bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr, + unsigned long end, enum populate_mode mode) +{ + if (mode == POPULATE_KASAN_ZERO_SHADOW && + IS_ALIGNED(addr, PGDIR_SIZE) && end - addr >= PGDIR_SIZE) { + pgd_populate(&init_mm, pgd, kasan_early_shadow_p4d); + return true; + } + return false; +} + +static bool kasan_p4d_populate_zero_shadow(p4d_t *p4d, unsigned long addr, + unsigned long end, enum populate_mode mode) +{ + if (mode == POPULATE_KASAN_ZERO_SHADOW && + IS_ALIGNED(addr, P4D_SIZE) && end - addr >= P4D_SIZE) { + p4d_populate(&init_mm, p4d, kasan_early_shadow_pud); + return true; + } + return false; +} + +static bool kasan_pud_populate_zero_shadow(pud_t *pud, unsigned long addr, + unsigned long end, enum populate_mode mode) +{ + if (mode == POPULATE_KASAN_ZERO_SHADOW && + IS_ALIGNED(addr, PUD_SIZE) && end - addr >= PUD_SIZE) { + pud_populate(&init_mm, pud, kasan_early_shadow_pmd); + return true; + } + return false; +} + +static bool kasan_pmd_populate_zero_shadow(pmd_t *pmd, unsigned long addr, + unsigned long end, enum populate_mode mode) +{ + if (mode == POPULATE_KASAN_ZERO_SHADOW && + IS_ALIGNED(addr, PMD_SIZE) && end - addr >= PMD_SIZE) { + pmd_populate(&init_mm, pmd, kasan_early_shadow_pte); + return true; + } + return false; +} + +static bool kasan_pte_populate_zero_shadow(pte_t *pte, enum populate_mode mode) +{ + pte_t entry; + + if (mode == POPULATE_KASAN_ZERO_SHADOW) { + set_pte(pte, pte_z); + return true; + } + return false; +} +#else + +static inline void kasan_populate_shadow(void) {} + +static inline bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr, + unsigned long end, enum populate_mode mode) +{ + return false; +} + +static inline bool kasan_p4d_populate_zero_shadow(p4d_t *p4d, unsigned long addr, + unsigned long end, enum populate_mode mode) +{ + return false; +} + +static inline bool kasan_pud_populate_zero_shadow(pud_t *pud, unsigned long addr, + unsigned long end, enum populate_mode mode) +{ + return false; +} + +static inline bool kasan_pmd_populate_zero_shadow(pmd_t *pmd, unsigned long addr, + unsigned long end, enum populate_mode mode) +{ + return false; +} + +static bool kasan_pte_populate_zero_shadow(pte_t *pte, enum populate_mode mode) +{ + return false; +} + +#endif + +/* + * Mimic virt_to_kpte() in lack of init_mm symbol. Skip pmd NULL check though. + */ +static inline pte_t *__virt_to_kpte(unsigned long va) +{ + return pte_offset_kernel(pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va), va); +} + +static void *boot_crst_alloc(unsigned long val) +{ + unsigned long size = PAGE_SIZE << CRST_ALLOC_ORDER; + unsigned long *table; + + table = (unsigned long *)physmem_alloc_top_down(RR_VMEM, size, size); + crst_table_init(table, val); + return table; +} + +static pte_t *boot_pte_alloc(void) +{ + static void *pte_leftover; + pte_t *pte; + + /* + * handling pte_leftovers this way helps to avoid memory fragmentation + * during POPULATE_KASAN_MAP_SHADOW when EDAT is off + */ + if (!pte_leftover) { + pte_leftover = (void *)physmem_alloc_top_down(RR_VMEM, PAGE_SIZE, PAGE_SIZE); + pte = pte_leftover + _PAGE_TABLE_SIZE; + } else { + pte = pte_leftover; + pte_leftover = NULL; + } + + memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE); + return pte; +} + +static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_mode mode) +{ + switch (mode) { + case POPULATE_NONE: + return -1; + case POPULATE_DIRECT: + return addr; + case POPULATE_ABS_LOWCORE: + return __abs_lowcore_pa(addr); +#ifdef CONFIG_KASAN + case POPULATE_KASAN_MAP_SHADOW: + addr = physmem_alloc_top_down(RR_VMEM, size, size); + memset((void *)addr, 0, size); + return addr; +#endif + default: + return -1; + } +} + +static bool can_large_pud(pud_t *pu_dir, unsigned long addr, unsigned long end) +{ + return machine.has_edat2 && + IS_ALIGNED(addr, PUD_SIZE) && (end - addr) >= PUD_SIZE; +} + +static bool can_large_pmd(pmd_t *pm_dir, unsigned long addr, unsigned long end) +{ + return machine.has_edat1 && + IS_ALIGNED(addr, PMD_SIZE) && (end - addr) >= PMD_SIZE; +} + +static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end, + enum populate_mode mode) +{ + unsigned long pages = 0; + pte_t *pte, entry; + + pte = pte_offset_kernel(pmd, addr); + for (; addr < end; addr += PAGE_SIZE, pte++) { + if (pte_none(*pte)) { + if (kasan_pte_populate_zero_shadow(pte, mode)) + continue; + entry = __pte(_pa(addr, PAGE_SIZE, mode)); + entry = set_pte_bit(entry, PAGE_KERNEL); + if (!machine.has_nx) + entry = clear_pte_bit(entry, __pgprot(_PAGE_NOEXEC)); + set_pte(pte, entry); + pages++; + } + } + if (mode == POPULATE_DIRECT) + update_page_count(PG_DIRECT_MAP_4K, pages); +} + +static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long end, + enum populate_mode mode) +{ + unsigned long next, pages = 0; + pmd_t *pmd, entry; + pte_t *pte; + + pmd = pmd_offset(pud, addr); + for (; addr < end; addr = next, pmd++) { + next = pmd_addr_end(addr, end); + if (pmd_none(*pmd)) { + if (kasan_pmd_populate_zero_shadow(pmd, addr, next, mode)) + continue; + if (can_large_pmd(pmd, addr, next)) { + entry = __pmd(_pa(addr, _SEGMENT_SIZE, mode)); + entry = set_pmd_bit(entry, SEGMENT_KERNEL); + if (!machine.has_nx) + entry = clear_pmd_bit(entry, __pgprot(_SEGMENT_ENTRY_NOEXEC)); + set_pmd(pmd, entry); + pages++; + continue; + } + pte = boot_pte_alloc(); + pmd_populate(&init_mm, pmd, pte); + } else if (pmd_large(*pmd)) { + continue; + } + pgtable_pte_populate(pmd, addr, next, mode); + } + if (mode == POPULATE_DIRECT) + update_page_count(PG_DIRECT_MAP_1M, pages); +} + +static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end, + enum populate_mode mode) +{ + unsigned long next, pages = 0; + pud_t *pud, entry; + pmd_t *pmd; + + pud = pud_offset(p4d, addr); + for (; addr < end; addr = next, pud++) { + next = pud_addr_end(addr, end); + if (pud_none(*pud)) { + if (kasan_pud_populate_zero_shadow(pud, addr, next, mode)) + continue; + if (can_large_pud(pud, addr, next)) { + entry = __pud(_pa(addr, _REGION3_SIZE, mode)); + entry = set_pud_bit(entry, REGION3_KERNEL); + if (!machine.has_nx) + entry = clear_pud_bit(entry, __pgprot(_REGION_ENTRY_NOEXEC)); + set_pud(pud, entry); + pages++; + continue; + } + pmd = boot_crst_alloc(_SEGMENT_ENTRY_EMPTY); + pud_populate(&init_mm, pud, pmd); + } else if (pud_large(*pud)) { + continue; + } + pgtable_pmd_populate(pud, addr, next, mode); + } + if (mode == POPULATE_DIRECT) + update_page_count(PG_DIRECT_MAP_2G, pages); +} + +static void pgtable_p4d_populate(pgd_t *pgd, unsigned long addr, unsigned long end, + enum populate_mode mode) +{ + unsigned long next; + p4d_t *p4d; + pud_t *pud; + + p4d = p4d_offset(pgd, addr); + for (; addr < end; addr = next, p4d++) { + next = p4d_addr_end(addr, end); + if (p4d_none(*p4d)) { + if (kasan_p4d_populate_zero_shadow(p4d, addr, next, mode)) + continue; + pud = boot_crst_alloc(_REGION3_ENTRY_EMPTY); + p4d_populate(&init_mm, p4d, pud); + } + pgtable_pud_populate(p4d, addr, next, mode); + } +} + +static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode) +{ + unsigned long next; + pgd_t *pgd; + p4d_t *p4d; + + pgd = pgd_offset(&init_mm, addr); + for (; addr < end; addr = next, pgd++) { + next = pgd_addr_end(addr, end); + if (pgd_none(*pgd)) { + if (kasan_pgd_populate_zero_shadow(pgd, addr, next, mode)) + continue; + p4d = boot_crst_alloc(_REGION2_ENTRY_EMPTY); + pgd_populate(&init_mm, pgd, p4d); + } +#ifdef CONFIG_KASAN + if (mode == POPULATE_KASAN_SHALLOW) + continue; +#endif + pgtable_p4d_populate(pgd, addr, next, mode); + } +} + +void setup_vmem(unsigned long asce_limit) +{ + unsigned long start, end; + unsigned long asce_type; + unsigned long asce_bits; + int i; + + if (asce_limit == _REGION1_SIZE) { + asce_type = _REGION2_ENTRY_EMPTY; + asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH; + } else { + asce_type = _REGION3_ENTRY_EMPTY; + asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; + } + s390_invalid_asce = invalid_pg_dir | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; + + crst_table_init((unsigned long *)swapper_pg_dir, asce_type); + crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY); + + /* + * To allow prefixing the lowcore must be mapped with 4KB pages. + * To prevent creation of a large page at address 0 first map + * the lowcore and create the identity mapping only afterwards. + */ + pgtable_populate(0, sizeof(struct lowcore), POPULATE_DIRECT); + for_each_physmem_usable_range(i, &start, &end) + pgtable_populate(start, end, POPULATE_DIRECT); + pgtable_populate(__abs_lowcore, __abs_lowcore + sizeof(struct lowcore), + POPULATE_ABS_LOWCORE); + pgtable_populate(__memcpy_real_area, __memcpy_real_area + PAGE_SIZE, + POPULATE_NONE); + memcpy_real_ptep = __virt_to_kpte(__memcpy_real_area); + + kasan_populate_shadow(); + + S390_lowcore.kernel_asce = swapper_pg_dir | asce_bits; + S390_lowcore.user_asce = s390_invalid_asce; + + __ctl_load(S390_lowcore.kernel_asce, 1, 1); + __ctl_load(S390_lowcore.user_asce, 7, 7); + __ctl_load(S390_lowcore.kernel_asce, 13, 13); + + init_mm.context.asce = S390_lowcore.kernel_asce; +} diff --git a/arch/s390/boot/vmlinux.lds.S b/arch/s390/boot/vmlinux.lds.S new file mode 100644 index 0000000000..389df0e0d9 --- /dev/null +++ b/arch/s390/boot/vmlinux.lds.S @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <asm-generic/vmlinux.lds.h> +#include <asm/vmlinux.lds.h> +#include <asm/thread_info.h> +#include <asm/page.h> +#include <asm/sclp.h> +#include "boot.h" + +OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390") +OUTPUT_ARCH(s390:64-bit) + +ENTRY(startup) + +SECTIONS +{ + . = 0; + .ipldata : { + *(.ipldata) + } + . = IPL_START; + .head.text : { + _head = . ; + HEAD_TEXT + _ehead = . ; + } + . = PARMAREA; + .parmarea : { + *(.parmarea) + } + .text : { + _text = .; /* Text */ + *(.text) + *(.text.*) + _etext = . ; + } + .rodata : { + _rodata = . ; + *(.rodata) /* read-only data */ + *(.rodata.*) + _erodata = . ; + } + NOTES + .data : { + _data = . ; + *(.data) + *(.data.*) + _edata = . ; + } + + BOOT_DATA + BOOT_DATA_PRESERVED + + /* + * This is the BSS section of the decompressor and not of the decompressed Linux kernel. + * It will consume place in the decompressor's image. + */ + . = ALIGN(8); + .bss : { + _bss = . ; + *(.bss) + *(.bss.*) + *(COMMON) + /* + * Stacks for the decompressor + */ + . = ALIGN(PAGE_SIZE); + _dump_info_stack_start = .; + . += PAGE_SIZE; + _dump_info_stack_end = .; + . = ALIGN(PAGE_SIZE); + _stack_start = .; + . += BOOT_STACK_SIZE; + _stack_end = .; + _ebss = .; + } + + /* + * uncompressed image info used by the decompressor it should match + * struct vmlinux_info. It comes from .vmlinux.info section of + * uncompressed vmlinux in a form of info.o + */ + . = ALIGN(8); + .vmlinux.info : { + _vmlinux_info = .; + *(.vmlinux.info) + } + + .decompressor.syms : { + . += 1; /* make sure we have \0 before the first entry */ + . = ALIGN(2); + _decompressor_syms_start = .; + *(.decompressor.syms) + _decompressor_syms_end = .; + } + + _decompressor_end = .; + +#ifdef CONFIG_KERNEL_UNCOMPRESSED + . = 0x100000; +#else + . = ALIGN(8); +#endif + .rodata.compressed : { + _compressed_start = .; + *(.vmlinux.bin.compressed) + _compressed_end = .; + } + +#define SB_TRAILER_SIZE 32 + /* Trailer needed for Secure Boot */ + . += SB_TRAILER_SIZE; /* make sure .sb.trailer does not overwrite the previous section */ + . = ALIGN(4096) - SB_TRAILER_SIZE; + .sb.trailer : { + QUAD(0) + QUAD(0) + QUAD(0) + QUAD(0x000000207a49504c) + } + _end = .; + + /* Sections to be discarded */ + /DISCARD/ : { + *(.eh_frame) + *(__ex_table) + *(*__ksymtab*) + *(___kcrctab*) + } +} |