diff options
Diffstat (limited to 'arch/powerpc')
178 files changed, 2221 insertions, 1471 deletions
diff --git a/arch/powerpc/Kbuild b/arch/powerpc/Kbuild index 22cd0d55a8..571f260b08 100644 --- a/arch/powerpc/Kbuild +++ b/arch/powerpc/Kbuild @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror +subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror -Wa,--fatal-warnings +subdir-asflags-$(CONFIG_PPC_WERROR) := -Wa,--fatal-warnings obj-y += kernel/ obj-y += mm/ diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 1c4be33736..c88c6d46a5 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -137,6 +137,7 @@ config PPC select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_HUGEPD if HUGETLB_PAGE select ARCH_HAS_KCOV + select ARCH_HAS_KERNEL_FPU_SUPPORT if PPC64 && PPC_FPU select ARCH_HAS_MEMBARRIER_CALLBACKS select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_MEMREMAP_COMPAT_ALIGN if PPC_64S_HASH_MMU @@ -156,6 +157,7 @@ config PPC select ARCH_HAS_UACCESS_FLUSHCACHE select ARCH_HAS_UBSAN select ARCH_HAVE_NMI_SAFE_CMPXCHG + select ARCH_HAVE_EXTRA_ELF_NOTES if SPU_BASE select ARCH_KEEP_MEMBLOCK select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE if PPC_RADIX_MMU select ARCH_MIGHT_HAVE_PC_PARPORT @@ -236,7 +238,7 @@ config PPC select HAVE_DYNAMIC_FTRACE_WITH_REGS if ARCH_USING_PATCHABLE_FUNCTION_ENTRY || MPROFILE_KERNEL || PPC32 select HAVE_EBPF_JIT select HAVE_EFFICIENT_UNALIGNED_ACCESS - select HAVE_FAST_GUP + select HAVE_GUP_FAST select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_ARG_ACCESS_API select HAVE_FUNCTION_DESCRIPTORS if PPC64_ELF_ABI_V1 @@ -285,7 +287,7 @@ config PPC select IOMMU_HELPER if PPC64 select IRQ_DOMAIN select IRQ_FORCED_THREADING - select KASAN_VMALLOC if KASAN && MODULES + select KASAN_VMALLOC if KASAN && EXECMEM select LOCK_MM_AND_FIND_VMA select MMU_GATHER_PAGE_SIZE select MMU_GATHER_RCU_TABLE_FREE @@ -686,6 +688,10 @@ config ARCH_SELECTS_CRASH_DUMP depends on CRASH_DUMP select RELOCATABLE if PPC64 || 44x || PPC_85xx +config ARCH_SUPPORTS_CRASH_HOTPLUG + def_bool y + depends on PPC64 + config FA_DUMP bool "Firmware-assisted dump" depends on CRASH_DUMP && PPC64 && (PPC_RTAS || PPC_POWERNV) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 65261cbe5b..a8479c881c 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -114,7 +114,6 @@ LDFLAGS_vmlinux := $(LDFLAGS_vmlinux-y) ifdef CONFIG_PPC64 ifndef CONFIG_PPC_KERNEL_PCREL -ifeq ($(call cc-option-yn,-mcmodel=medium),y) # -mcmodel=medium breaks modules because it uses 32bit offsets from # the TOC pointer to create pointers where possible. Pointers into the # percpu data area are created by this method. @@ -124,9 +123,6 @@ ifeq ($(call cc-option-yn,-mcmodel=medium),y) # kernel percpu data space (starting with 0xc...). We need a full # 64bit relocation for this to work, hence -mcmodel=large. KBUILD_CFLAGS_MODULE += -mcmodel=large -else - export NO_MINIMAL_TOC := -mno-minimal-toc -endif endif endif @@ -139,7 +135,7 @@ CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcall-aixdesc) endif endif -CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc)) +CFLAGS-$(CONFIG_PPC64) += -mcmodel=medium CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mno-pointers-to-nested-functions) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mlong-double-128) @@ -153,6 +149,9 @@ CFLAGS-$(CONFIG_PPC32) += $(call cc-option, $(MULTIPLEWORD)) CFLAGS-$(CONFIG_PPC32) += $(call cc-option,-mno-readonly-in-sdata) +CC_FLAGS_FPU := $(call cc-option,-mhard-float) +CC_FLAGS_NO_FPU := $(call cc-option,-msoft-float) + ifdef CONFIG_FUNCTION_TRACER ifdef CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY @@ -174,7 +173,7 @@ asinstr := $(call as-instr,lis 9$(comma)foo@high,-DHAVE_AS_ATHIGH=1) KBUILD_CPPFLAGS += -I $(srctree)/arch/powerpc $(asinstr) KBUILD_AFLAGS += $(AFLAGS-y) -KBUILD_CFLAGS += $(call cc-option,-msoft-float) +KBUILD_CFLAGS += $(CC_FLAGS_NO_FPU) KBUILD_CFLAGS += $(CFLAGS-y) CPP = $(CC) -E $(KBUILD_CFLAGS) diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 968aee2025..35f6b15e4c 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -108,8 +108,8 @@ DTC_FLAGS ?= -p 1024 # these files into the build dir, fix up any includes and ensure that dependent # files are copied in the right order. -# these need to be seperate variables because they are copied out of different -# directories in the kernel tree. Sure you COULd merge them, but it's a +# these need to be separate variables because they are copied out of different +# directories in the kernel tree. Sure you COULD merge them, but it's a # cure-is-worse-than-disease situation. zlib-decomp-$(CONFIG_KERNEL_GZIP) := decompress_inflate.c zlib-$(CONFIG_KERNEL_GZIP) := inffast.c inflate.c inftrees.c @@ -218,7 +218,7 @@ $(addprefix $(obj)/,$(libfdt) $(libfdtheader)): $(obj)/%: $(srctree)/scripts/dtc $(obj)/empty.c: $(Q)touch $@ -$(obj)/zImage.coff.lds $(obj)/zImage.ps3.lds : $(obj)/%: $(srctree)/$(src)/%.S +$(obj)/zImage.coff.lds $(obj)/zImage.ps3.lds : $(obj)/%: $(src)/%.S $(Q)cp $< $@ clean-files := $(zlib-) $(zlibheader-) $(zliblinuxheader-) \ @@ -252,9 +252,9 @@ targets += $(patsubst $(obj)/%,%,$(obj-boot) wrapper.a) zImage.lds extra-y := $(obj)/wrapper.a $(obj-plat) $(obj)/empty.o \ $(obj)/zImage.lds $(obj)/zImage.coff.lds $(obj)/zImage.ps3.lds -dtstree := $(srctree)/$(src)/dts +dtstree := $(src)/dts -wrapper :=$(srctree)/$(src)/wrapper +wrapper := $(src)/wrapper wrapperbits := $(extra-y) $(addprefix $(obj)/,addnote hack-coff mktree) \ $(wrapper) FORCE diff --git a/arch/powerpc/boot/decompress.c b/arch/powerpc/boot/decompress.c index 977eb15a6d..6835cb53f0 100644 --- a/arch/powerpc/boot/decompress.c +++ b/arch/powerpc/boot/decompress.c @@ -101,7 +101,7 @@ static void print_err(char *s) * @input_size: length of the input buffer * @outbuf: output buffer * @output_size: length of the output buffer - * @skip number of output bytes to ignore + * @_skip: number of output bytes to ignore * * This function takes compressed data from inbuf, decompresses and write it to * outbuf. Once output_size bytes are written to the output buffer, or the diff --git a/arch/powerpc/boot/dts/Makefile b/arch/powerpc/boot/dts/Makefile index fb335d05aa..0cd0d8558b 100644 --- a/arch/powerpc/boot/dts/Makefile +++ b/arch/powerpc/boot/dts/Makefile @@ -2,5 +2,4 @@ subdir-y += fsl -dtstree := $(srctree)/$(src) -dtb-$(CONFIG_OF_ALL_DTBS) := $(patsubst $(dtstree)/%.dts,%.dtb, $(wildcard $(dtstree)/*.dts)) +dtb-$(CONFIG_OF_ALL_DTBS) := $(patsubst $(src)/%.dts,%.dtb, $(wildcard $(src)/*.dts)) diff --git a/arch/powerpc/boot/dts/acadia.dts b/arch/powerpc/boot/dts/acadia.dts index deb52e41ab..5fedda8113 100644 --- a/arch/powerpc/boot/dts/acadia.dts +++ b/arch/powerpc/boot/dts/acadia.dts @@ -172,7 +172,7 @@ reg = <0xef602800 0x60>; interrupt-parent = <&UIC0>; interrupts = <0x4 0x4>; - /* This thing is a bit weird. It has it's own UIC + /* This thing is a bit weird. It has its own UIC * that it uses to generate snapshot triggers. We * don't really support this device yet, and it needs * work to figure this out. diff --git a/arch/powerpc/boot/dts/fsl/Makefile b/arch/powerpc/boot/dts/fsl/Makefile index 3bae982641..d3ecdf14bc 100644 --- a/arch/powerpc/boot/dts/fsl/Makefile +++ b/arch/powerpc/boot/dts/fsl/Makefile @@ -1,4 +1,3 @@ # SPDX-License-Identifier: GPL-2.0 -dtstree := $(srctree)/$(src) -dtb-$(CONFIG_OF_ALL_DTBS) := $(patsubst $(dtstree)/%.dts,%.dtb, $(wildcard $(dtstree)/*.dts)) +dtb-$(CONFIG_OF_ALL_DTBS) := $(patsubst $(src)/%.dts,%.dtb, $(wildcard $(src)/*.dts)) diff --git a/arch/powerpc/boot/dts/fsl/b4si-post.dtsi b/arch/powerpc/boot/dts/fsl/b4si-post.dtsi index 4f044b41a7..fb3200b006 100644 --- a/arch/powerpc/boot/dts/fsl/b4si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/b4si-post.dtsi @@ -50,7 +50,7 @@ &ifc { #address-cells = <2>; #size-cells = <1>; - compatible = "fsl,ifc", "simple-bus"; + compatible = "fsl,ifc"; interrupts = <25 2 0 0>; }; diff --git a/arch/powerpc/boot/dts/fsl/bsc9131rdb.dts b/arch/powerpc/boot/dts/fsl/bsc9131rdb.dts index 8da984251a..0ba86a6dce 100644 --- a/arch/powerpc/boot/dts/fsl/bsc9131rdb.dts +++ b/arch/powerpc/boot/dts/fsl/bsc9131rdb.dts @@ -15,7 +15,7 @@ device_type = "memory"; }; - board_ifc: ifc: ifc@ff71e000 { + board_ifc: ifc: memory-controller@ff71e000 { /* NAND Flash on board */ ranges = <0x0 0x0 0x0 0xff800000 0x00004000>; reg = <0x0 0xff71e000 0x0 0x2000>; diff --git a/arch/powerpc/boot/dts/fsl/bsc9131si-post.dtsi b/arch/powerpc/boot/dts/fsl/bsc9131si-post.dtsi index 2a677fd323..5c53cee875 100644 --- a/arch/powerpc/boot/dts/fsl/bsc9131si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/bsc9131si-post.dtsi @@ -35,7 +35,7 @@ &ifc { #address-cells = <2>; #size-cells = <1>; - compatible = "fsl,ifc", "simple-bus"; + compatible = "fsl,ifc"; interrupts = <16 2 0 0 20 2 0 0>; }; diff --git a/arch/powerpc/boot/dts/fsl/bsc9132qds.dts b/arch/powerpc/boot/dts/fsl/bsc9132qds.dts index 7cb2158dfe..ce642e879a 100644 --- a/arch/powerpc/boot/dts/fsl/bsc9132qds.dts +++ b/arch/powerpc/boot/dts/fsl/bsc9132qds.dts @@ -15,7 +15,7 @@ device_type = "memory"; }; - ifc: ifc@ff71e000 { + ifc: memory-controller@ff71e000 { /* NOR, NAND Flash on board */ ranges = <0x0 0x0 0x0 0x88000000 0x08000000 0x1 0x0 0x0 0xff800000 0x00010000>; diff --git a/arch/powerpc/boot/dts/fsl/bsc9132si-post.dtsi b/arch/powerpc/boot/dts/fsl/bsc9132si-post.dtsi index b8e0edd1ac..4da451e000 100644 --- a/arch/powerpc/boot/dts/fsl/bsc9132si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/bsc9132si-post.dtsi @@ -35,7 +35,7 @@ &ifc { #address-cells = <2>; #size-cells = <1>; - compatible = "fsl,ifc", "simple-bus"; + compatible = "fsl,ifc"; /* FIXME: Test whether interrupts are split */ interrupts = <16 2 0 0 20 2 0 0>; }; diff --git a/arch/powerpc/boot/dts/fsl/c293pcie.dts b/arch/powerpc/boot/dts/fsl/c293pcie.dts index 5e905e0857..e2fdac2ed4 100644 --- a/arch/powerpc/boot/dts/fsl/c293pcie.dts +++ b/arch/powerpc/boot/dts/fsl/c293pcie.dts @@ -42,7 +42,7 @@ device_type = "memory"; }; - ifc: ifc@fffe1e000 { + ifc: memory-controller@fffe1e000 { reg = <0xf 0xffe1e000 0 0x2000>; ranges = <0x0 0x0 0xf 0xec000000 0x04000000 0x1 0x0 0xf 0xff800000 0x00010000 diff --git a/arch/powerpc/boot/dts/fsl/c293si-post.dtsi b/arch/powerpc/boot/dts/fsl/c293si-post.dtsi index f208fb8f64..2d443d5192 100644 --- a/arch/powerpc/boot/dts/fsl/c293si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/c293si-post.dtsi @@ -35,7 +35,7 @@ &ifc { #address-cells = <2>; #size-cells = <1>; - compatible = "fsl,ifc", "simple-bus"; + compatible = "fsl,ifc"; interrupts = <19 2 0 0>; }; diff --git a/arch/powerpc/boot/dts/fsl/mpc8536si-post.dtsi b/arch/powerpc/boot/dts/fsl/mpc8536si-post.dtsi index 41935709eb..fba40a1bcc 100644 --- a/arch/powerpc/boot/dts/fsl/mpc8536si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/mpc8536si-post.dtsi @@ -199,6 +199,10 @@ /include/ "pq3-dma-0.dtsi" /include/ "pq3-etsec1-0.dtsi" + enet0: ethernet@24000 { + fsl,wake-on-filer; + fsl,pmc-handle = <&etsec1_clk>; + }; /include/ "pq3-etsec1-timer-0.dtsi" usb@22000 { @@ -222,9 +226,10 @@ }; /include/ "pq3-etsec1-2.dtsi" - - ethernet@26000 { + enet2: ethernet@26000 { cell-index = <1>; + fsl,wake-on-filer; + fsl,pmc-handle = <&etsec3_clk>; }; usb@2b000 { @@ -249,4 +254,9 @@ reg = <0xe0000 0x1000>; fsl,has-rstcr; }; + +/include/ "pq3-power.dtsi" + power@e0070 { + compatible = "fsl,mpc8536-pmc", "fsl,mpc8548-pmc"; + }; }; diff --git a/arch/powerpc/boot/dts/fsl/mpc8544si-post.dtsi b/arch/powerpc/boot/dts/fsl/mpc8544si-post.dtsi index b68eb119fa..ea7416af7e 100644 --- a/arch/powerpc/boot/dts/fsl/mpc8544si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/mpc8544si-post.dtsi @@ -188,4 +188,6 @@ reg = <0xe0000 0x1000>; fsl,has-rstcr; }; + +/include/ "pq3-power.dtsi" }; diff --git a/arch/powerpc/boot/dts/fsl/mpc8548si-post.dtsi b/arch/powerpc/boot/dts/fsl/mpc8548si-post.dtsi index 579d76cb8e..dddb737450 100644 --- a/arch/powerpc/boot/dts/fsl/mpc8548si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/mpc8548si-post.dtsi @@ -156,4 +156,6 @@ reg = <0xe0000 0x1000>; fsl,has-rstcr; }; + +/include/ "pq3-power.dtsi" }; diff --git a/arch/powerpc/boot/dts/fsl/mpc8572si-post.dtsi b/arch/powerpc/boot/dts/fsl/mpc8572si-post.dtsi index 49294cf36b..40a6cff770 100644 --- a/arch/powerpc/boot/dts/fsl/mpc8572si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/mpc8572si-post.dtsi @@ -193,4 +193,6 @@ reg = <0xe0000 0x1000>; fsl,has-rstcr; }; + +/include/ "pq3-power.dtsi" }; diff --git a/arch/powerpc/boot/dts/fsl/p1010rdb-pb.dts b/arch/powerpc/boot/dts/fsl/p1010rdb-pb.dts index 3a94acbb3c..ce3346d778 100644 --- a/arch/powerpc/boot/dts/fsl/p1010rdb-pb.dts +++ b/arch/powerpc/boot/dts/fsl/p1010rdb-pb.dts @@ -29,3 +29,19 @@ }; /include/ "p1010si-post.dtsi" + +&pci0 { + pcie@0 { + interrupt-map = < + /* IDSEL 0x0 */ + /* + *irq[4:5] are active-high + *irq[6:7] are active-low + */ + 0000 0x0 0x0 0x1 &mpic 0x4 0x2 0x0 0x0 + 0000 0x0 0x0 0x2 &mpic 0x5 0x2 0x0 0x0 + 0000 0x0 0x0 0x3 &mpic 0x6 0x1 0x0 0x0 + 0000 0x0 0x0 0x4 &mpic 0x7 0x1 0x0 0x0 + >; + }; +}; diff --git a/arch/powerpc/boot/dts/fsl/p1010rdb-pb_36b.dts b/arch/powerpc/boot/dts/fsl/p1010rdb-pb_36b.dts index 4cf255fedc..83590354f9 100644 --- a/arch/powerpc/boot/dts/fsl/p1010rdb-pb_36b.dts +++ b/arch/powerpc/boot/dts/fsl/p1010rdb-pb_36b.dts @@ -56,3 +56,19 @@ }; /include/ "p1010si-post.dtsi" + +&pci0 { + pcie@0 { + interrupt-map = < + /* IDSEL 0x0 */ + /* + *irq[4:5] are active-high + *irq[6:7] are active-low + */ + 0000 0x0 0x0 0x1 &mpic 0x4 0x2 0x0 0x0 + 0000 0x0 0x0 0x2 &mpic 0x5 0x2 0x0 0x0 + 0000 0x0 0x0 0x3 &mpic 0x6 0x1 0x0 0x0 + 0000 0x0 0x0 0x4 &mpic 0x7 0x1 0x0 0x0 + >; + }; +}; diff --git a/arch/powerpc/boot/dts/fsl/p1010rdb.dtsi b/arch/powerpc/boot/dts/fsl/p1010rdb.dtsi index 2ca9cee2dd..ef49a7d6c6 100644 --- a/arch/powerpc/boot/dts/fsl/p1010rdb.dtsi +++ b/arch/powerpc/boot/dts/fsl/p1010rdb.dtsi @@ -215,19 +215,3 @@ phy-connection-type = "sgmii"; }; }; - -&pci0 { - pcie@0 { - interrupt-map = < - /* IDSEL 0x0 */ - /* - *irq[4:5] are active-high - *irq[6:7] are active-low - */ - 0000 0x0 0x0 0x1 &mpic 0x4 0x2 0x0 0x0 - 0000 0x0 0x0 0x2 &mpic 0x5 0x2 0x0 0x0 - 0000 0x0 0x0 0x3 &mpic 0x6 0x1 0x0 0x0 - 0000 0x0 0x0 0x4 &mpic 0x7 0x1 0x0 0x0 - >; - }; -}; diff --git a/arch/powerpc/boot/dts/fsl/p1010rdb_32b.dtsi b/arch/powerpc/boot/dts/fsl/p1010rdb_32b.dtsi index fdc19aab2f..583a6cd050 100644 --- a/arch/powerpc/boot/dts/fsl/p1010rdb_32b.dtsi +++ b/arch/powerpc/boot/dts/fsl/p1010rdb_32b.dtsi @@ -36,7 +36,7 @@ memory { device_type = "memory"; }; -board_ifc: ifc: ifc@ffe1e000 { +board_ifc: ifc: memory-controller@ffe1e000 { /* NOR, NAND Flashes and CPLD on board */ ranges = <0x0 0x0 0x0 0xee000000 0x02000000 0x1 0x0 0x0 0xff800000 0x00010000 diff --git a/arch/powerpc/boot/dts/fsl/p1010rdb_36b.dtsi b/arch/powerpc/boot/dts/fsl/p1010rdb_36b.dtsi index de2fceed4f..4d41efe003 100644 --- a/arch/powerpc/boot/dts/fsl/p1010rdb_36b.dtsi +++ b/arch/powerpc/boot/dts/fsl/p1010rdb_36b.dtsi @@ -36,7 +36,7 @@ memory { device_type = "memory"; }; -board_ifc: ifc: ifc@fffe1e000 { +board_ifc: ifc: memory-controller@fffe1e000 { /* NOR, NAND Flashes and CPLD on board */ ranges = <0x0 0x0 0xf 0xee000000 0x02000000 0x1 0x0 0xf 0xff800000 0x00010000 diff --git a/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi index ccda0a91ab..2d2550729d 100644 --- a/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi @@ -35,7 +35,7 @@ &ifc { #address-cells = <2>; #size-cells = <1>; - compatible = "fsl,ifc", "simple-bus"; + compatible = "fsl,ifc"; interrupts = <16 2 0 0 19 2 0 0>; }; @@ -183,9 +183,23 @@ /include/ "pq3-etsec2-1.dtsi" /include/ "pq3-etsec2-2.dtsi" + enet0: ethernet@b0000 { + fsl,pmc-handle = <&etsec1_clk>; + }; + + enet1: ethernet@b1000 { + fsl,pmc-handle = <&etsec2_clk>; + }; + + enet2: ethernet@b2000 { + fsl,pmc-handle = <&etsec3_clk>; + }; + global-utilities@e0000 { compatible = "fsl,p1010-guts"; reg = <0xe0000 0x1000>; fsl,has-rstcr; }; + +/include/ "pq3-power.dtsi" }; diff --git a/arch/powerpc/boot/dts/fsl/p1020si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1020si-post.dtsi index 642dc3a83d..cc4c746100 100644 --- a/arch/powerpc/boot/dts/fsl/p1020si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p1020si-post.dtsi @@ -163,14 +163,17 @@ /include/ "pq3-etsec2-0.dtsi" enet0: enet0_grp2: ethernet@b0000 { + fsl,pmc-handle = <&etsec1_clk>; }; /include/ "pq3-etsec2-1.dtsi" enet1: enet1_grp2: ethernet@b1000 { + fsl,pmc-handle = <&etsec2_clk>; }; /include/ "pq3-etsec2-2.dtsi" enet2: enet2_grp2: ethernet@b2000 { + fsl,pmc-handle = <&etsec3_clk>; }; global-utilities@e0000 { @@ -178,6 +181,8 @@ reg = <0xe0000 0x1000>; fsl,has-rstcr; }; + +/include/ "pq3-power.dtsi" }; /include/ "pq3-etsec2-grp2-0.dtsi" diff --git a/arch/powerpc/boot/dts/fsl/p1021si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1021si-post.dtsi index 407cb5fd0f..378195db9f 100644 --- a/arch/powerpc/boot/dts/fsl/p1021si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p1021si-post.dtsi @@ -159,14 +159,17 @@ /include/ "pq3-etsec2-0.dtsi" enet0: enet0_grp2: ethernet@b0000 { + fsl,pmc-handle = <&etsec1_clk>; }; /include/ "pq3-etsec2-1.dtsi" enet1: enet1_grp2: ethernet@b1000 { + fsl,pmc-handle = <&etsec2_clk>; }; /include/ "pq3-etsec2-2.dtsi" enet2: enet2_grp2: ethernet@b2000 { + fsl,pmc-handle = <&etsec3_clk>; }; global-utilities@e0000 { @@ -174,6 +177,8 @@ reg = <0xe0000 0x1000>; fsl,has-rstcr; }; + +/include/ "pq3-power.dtsi" }; &qe { diff --git a/arch/powerpc/boot/dts/fsl/p1022si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1022si-post.dtsi index 093e4e3ed3..6ac21e8134 100644 --- a/arch/powerpc/boot/dts/fsl/p1022si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p1022si-post.dtsi @@ -225,11 +225,13 @@ /include/ "pq3-etsec2-0.dtsi" enet0: enet0_grp2: ethernet@b0000 { fsl,wake-on-filer; + fsl,pmc-handle = <&etsec1_clk>; }; /include/ "pq3-etsec2-1.dtsi" enet1: enet1_grp2: ethernet@b1000 { fsl,wake-on-filer; + fsl,pmc-handle = <&etsec2_clk>; }; global-utilities@e0000 { @@ -238,9 +240,10 @@ fsl,has-rstcr; }; +/include/ "pq3-power.dtsi" power@e0070 { - compatible = "fsl,mpc8536-pmc", "fsl,mpc8548-pmc"; - reg = <0xe0070 0x20>; + compatible = "fsl,p1022-pmc", "fsl,mpc8536-pmc", + "fsl,mpc8548-pmc"; }; }; diff --git a/arch/powerpc/boot/dts/fsl/p2020si-post.dtsi b/arch/powerpc/boot/dts/fsl/p2020si-post.dtsi index 81b9ab2119..d410082d21 100644 --- a/arch/powerpc/boot/dts/fsl/p2020si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p2020si-post.dtsi @@ -178,6 +178,10 @@ compatible = "fsl-usb2-dr-v1.6", "fsl-usb2-dr"; }; /include/ "pq3-etsec1-0.dtsi" + enet0: ethernet@24000 { + fsl,pmc-handle = <&etsec1_clk>; + + }; /include/ "pq3-etsec1-timer-0.dtsi" ptp_clock@24e00 { @@ -186,7 +190,15 @@ /include/ "pq3-etsec1-1.dtsi" + enet1: ethernet@25000 { + fsl,pmc-handle = <&etsec2_clk>; + }; + /include/ "pq3-etsec1-2.dtsi" + enet2: ethernet@26000 { + fsl,pmc-handle = <&etsec3_clk>; + }; + /include/ "pq3-esdhc-0.dtsi" sdhc@2e000 { compatible = "fsl,p2020-esdhc", "fsl,esdhc"; @@ -202,8 +214,5 @@ fsl,has-rstcr; }; - pmc: power@e0070 { - compatible = "fsl,mpc8548-pmc"; - reg = <0xe0070 0x20>; - }; +/include/ "pq3-power.dtsi" }; diff --git a/arch/powerpc/boot/dts/fsl/pq3-power.dtsi b/arch/powerpc/boot/dts/fsl/pq3-power.dtsi new file mode 100644 index 0000000000..6af1240100 --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/pq3-power.dtsi @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: (GPL-2.0+) +/* + * Copyright 2024 NXP + */ + +power@e0070 { + compatible = "fsl,mpc8548-pmc"; + reg = <0xe0070 0x20>; + + etsec1_clk: soc-clk@24 { + fsl,pmcdr-mask = <0x00000080>; + }; + etsec2_clk: soc-clk@25 { + fsl,pmcdr-mask = <0x00000040>; + }; + etsec3_clk: soc-clk@26 { + fsl,pmcdr-mask = <0x00000020>; + }; +}; diff --git a/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi index aa5152ca81..8ef0c02020 100644 --- a/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi @@ -52,7 +52,7 @@ &ifc { #address-cells = <2>; #size-cells = <1>; - compatible = "fsl,ifc", "simple-bus"; + compatible = "fsl,ifc"; interrupts = <25 2 0 0>; }; diff --git a/arch/powerpc/boot/dts/fsl/t1024rdb.dts b/arch/powerpc/boot/dts/fsl/t1024rdb.dts index 270aaf631f..7d003e07a9 100644 --- a/arch/powerpc/boot/dts/fsl/t1024rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t1024rdb.dts @@ -91,7 +91,7 @@ board-control@2,0 { #address-cells = <1>; #size-cells = <1>; - compatible = "fsl,t1024-cpld"; + compatible = "fsl,t1024-cpld", "fsl,deepsleep-cpld"; reg = <3 0 0x300>; ranges = <0 3 0 0x300>; bank-width = <1>; diff --git a/arch/powerpc/boot/dts/fsl/t1040rdb.dts b/arch/powerpc/boot/dts/fsl/t1040rdb.dts index dd3aab81e9..4347924e9a 100644 --- a/arch/powerpc/boot/dts/fsl/t1040rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t1040rdb.dts @@ -104,7 +104,7 @@ ifc: localbus@ffe124000 { cpld@3,0 { - compatible = "fsl,t1040rdb-cpld"; + compatible = "fsl,t104xrdb-cpld", "fsl,deepsleep-cpld"; }; }; }; diff --git a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi index 7767886232..c9542b73bd 100644 --- a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi @@ -52,7 +52,7 @@ &ifc { #address-cells = <2>; #size-cells = <1>; - compatible = "fsl,ifc", "simple-bus"; + compatible = "fsl,ifc"; interrupts = <25 2 0 0>; }; diff --git a/arch/powerpc/boot/dts/fsl/t1042rdb.dts b/arch/powerpc/boot/dts/fsl/t1042rdb.dts index 3ebb712224..099764322b 100644 --- a/arch/powerpc/boot/dts/fsl/t1042rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t1042rdb.dts @@ -68,7 +68,7 @@ ifc: localbus@ffe124000 { cpld@3,0 { - compatible = "fsl,t1042rdb-cpld"; + compatible = "fsl,t104xrdb-cpld", "fsl,deepsleep-cpld"; }; }; }; diff --git a/arch/powerpc/boot/dts/fsl/t1042rdb_pi.dts b/arch/powerpc/boot/dts/fsl/t1042rdb_pi.dts index 8ec3ff45e6..b10cab1a34 100644 --- a/arch/powerpc/boot/dts/fsl/t1042rdb_pi.dts +++ b/arch/powerpc/boot/dts/fsl/t1042rdb_pi.dts @@ -41,7 +41,7 @@ ifc: localbus@ffe124000 { cpld@3,0 { - compatible = "fsl,t1042rdb_pi-cpld"; + compatible = "fsl,t104xrdb-cpld", "fsl,deepsleep-cpld"; }; }; diff --git a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi index 27714dc2f0..6bb95878d3 100644 --- a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi @@ -50,7 +50,7 @@ &ifc { #address-cells = <2>; #size-cells = <1>; - compatible = "fsl,ifc", "simple-bus"; + compatible = "fsl,ifc"; interrupts = <25 2 0 0>; }; diff --git a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi index fcac73486d..65f3e17c0d 100644 --- a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi @@ -50,7 +50,7 @@ &ifc { #address-cells = <2>; #size-cells = <1>; - compatible = "fsl,ifc", "simple-bus"; + compatible = "fsl,ifc"; interrupts = <25 2 0 0>; }; diff --git a/arch/powerpc/boot/main.c b/arch/powerpc/boot/main.c index cae31a6e8f..2c0e2a1cab 100644 --- a/arch/powerpc/boot/main.c +++ b/arch/powerpc/boot/main.c @@ -188,7 +188,7 @@ static inline void prep_esm_blob(struct addr_range vmlinux, void *chosen) { } /* A buffer that may be edited by tools operating on a zImage binary so as to * edit the command line passed to vmlinux (by setting /chosen/bootargs). - * The buffer is put in it's own section so that tools may locate it easier. + * The buffer is put in its own section so that tools may locate it easier. */ static char cmdline[BOOT_COMMAND_LINE_SIZE] __attribute__((__section__("__builtin_cmdline"))); diff --git a/arch/powerpc/boot/ps3.c b/arch/powerpc/boot/ps3.c index f157717ae8..89ff46b8b2 100644 --- a/arch/powerpc/boot/ps3.c +++ b/arch/powerpc/boot/ps3.c @@ -25,7 +25,7 @@ BSS_STACK(4096); /* A buffer that may be edited by tools operating on a zImage binary so as to * edit the command line passed to vmlinux (by setting /chosen/bootargs). - * The buffer is put in it's own section so that tools may locate it easier. + * The buffer is put in its own section so that tools may locate it easier. */ static char cmdline[BOOT_COMMAND_LINE_SIZE] diff --git a/arch/powerpc/configs/85xx-hw.config b/arch/powerpc/configs/85xx-hw.config index 524db76f47..8aff832173 100644 --- a/arch/powerpc/configs/85xx-hw.config +++ b/arch/powerpc/configs/85xx-hw.config @@ -24,6 +24,7 @@ CONFIG_FS_ENET=y CONFIG_FSL_CORENET_CF=y CONFIG_FSL_DMA=y CONFIG_FSL_HV_MANAGER=y +CONFIG_FSL_IFC=y CONFIG_FSL_PQ_MDIO=y CONFIG_FSL_RIO=y CONFIG_FSL_XGMAC_MDIO=y @@ -58,6 +59,7 @@ CONFIG_INPUT_FF_MEMLESS=m CONFIG_MARVELL_PHY=y CONFIG_MDIO_BUS_MUX_GPIO=y CONFIG_MDIO_BUS_MUX_MMIOREG=y +CONFIG_MEMORY=y CONFIG_MMC_SDHCI_OF_ESDHC=y CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_SDHCI=y diff --git a/arch/powerpc/configs/adder875_defconfig b/arch/powerpc/configs/adder875_defconfig index 7f35d5bc12..97f4d48517 100644 --- a/arch/powerpc/configs/adder875_defconfig +++ b/arch/powerpc/configs/adder875_defconfig @@ -4,7 +4,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_EXPERT=y # CONFIG_ELF_CORE is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_FUTEX is not set # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/powerpc/configs/ep88xc_defconfig b/arch/powerpc/configs/ep88xc_defconfig index a98ef6a4ab..50cc59eb36 100644 --- a/arch/powerpc/configs/ep88xc_defconfig +++ b/arch/powerpc/configs/ep88xc_defconfig @@ -6,7 +6,7 @@ CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_EXPERT=y # CONFIG_ELF_CORE is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_FUTEX is not set # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/powerpc/configs/mpc866_ads_defconfig b/arch/powerpc/configs/mpc866_ads_defconfig index 5c56d36cdf..6f449411ab 100644 --- a/arch/powerpc/configs/mpc866_ads_defconfig +++ b/arch/powerpc/configs/mpc866_ads_defconfig @@ -6,7 +6,7 @@ CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_EXPERT=y # CONFIG_BUG is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_EPOLL is not set # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig index 56b876e418..77306be62e 100644 --- a/arch/powerpc/configs/mpc885_ads_defconfig +++ b/arch/powerpc/configs/mpc885_ads_defconfig @@ -7,7 +7,7 @@ CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_EXPERT=y # CONFIG_ELF_CORE is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_FUTEX is not set CONFIG_PERF_EVENTS=y # CONFIG_VM_EVENT_COUNTERS is not set diff --git a/arch/powerpc/configs/tqm8xx_defconfig b/arch/powerpc/configs/tqm8xx_defconfig index 083c2e5752..383c0966e9 100644 --- a/arch/powerpc/configs/tqm8xx_defconfig +++ b/arch/powerpc/configs/tqm8xx_defconfig @@ -6,7 +6,7 @@ CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_EXPERT=y # CONFIG_ELF_CORE is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_FUTEX is not set # CONFIG_VM_EVENT_COUNTERS is not set CONFIG_MODULES=y diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 61a8d5555c..e5fdc336c9 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -6,5 +6,4 @@ generic-y += agp.h generic-y += kvm_types.h generic-y += mcs_spinlock.h generic-y += qrwlock.h -generic-y += vtime.h generic-y += early_ioremap.h diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-4k.h b/arch/powerpc/include/asm/book3s/64/pgtable-4k.h index 48f21820af..baf934578c 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable-4k.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable-4k.h @@ -6,26 +6,6 @@ */ #ifndef __ASSEMBLY__ #ifdef CONFIG_HUGETLB_PAGE -static inline int pmd_huge(pmd_t pmd) -{ - /* - * leaf pte for huge page - */ - if (radix_enabled()) - return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE)); - return 0; -} - -static inline int pud_huge(pud_t pud) -{ - /* - * leaf pte for huge page - */ - if (radix_enabled()) - return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE)); - return 0; -} - /* * With radix , we have hugepage ptes in the pud and pmd entries. We don't * need to setup hugepage directory for them. Our pte and page directory format diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h index ced7ee8b42..6ac73da7b8 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h @@ -4,31 +4,6 @@ #ifndef __ASSEMBLY__ #ifdef CONFIG_HUGETLB_PAGE -/* - * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have - * 16GB hugepage pte in PGD and 16MB hugepage pte at PMD; - * - * Defined in such a way that we can optimize away code block at build time - * if CONFIG_HUGETLB_PAGE=n. - * - * returns true for pmd migration entries, THP, devmap, hugetlb - * But compile time dependent on CONFIG_HUGETLB_PAGE - */ -static inline int pmd_huge(pmd_t pmd) -{ - /* - * leaf pte for huge page - */ - return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE)); -} - -static inline int pud_huge(pud_t pud) -{ - /* - * leaf pte for huge page - */ - return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE)); -} /* * With 64k page size, we have hugepage ptes in the pgd and pmd entries. We don't diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index fac5615e6b..8f9432e385 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -262,6 +262,18 @@ extern unsigned long __kernel_io_end; extern struct page *vmemmap; extern unsigned long pci_io_base; + +#define pmd_leaf pmd_leaf +static inline bool pmd_leaf(pmd_t pmd) +{ + return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE)); +} + +#define pud_leaf pud_leaf +static inline bool pud_leaf(pud_t pud) +{ + return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE)); +} #endif /* __ASSEMBLY__ */ #include <asm/book3s/64/hash.h> @@ -1426,20 +1438,5 @@ static inline bool is_pte_rw_upgrade(unsigned long old_val, unsigned long new_va return false; } -/* - * Like pmd_huge(), but works regardless of config options - */ -#define pmd_leaf pmd_leaf -static inline bool pmd_leaf(pmd_t pmd) -{ - return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE)); -} - -#define pud_leaf pud_leaf -static inline bool pud_leaf(pud_t pud) -{ - return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE)); -} - #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */ diff --git a/arch/powerpc/include/asm/cpu_has_feature.h b/arch/powerpc/include/asm/cpu_has_feature.h index 727d4b3219..0efabccd82 100644 --- a/arch/powerpc/include/asm/cpu_has_feature.h +++ b/arch/powerpc/include/asm/cpu_has_feature.h @@ -29,7 +29,7 @@ static __always_inline bool cpu_has_feature(unsigned long feature) #endif #ifdef CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG - if (!static_key_initialized) { + if (!static_key_feature_checks_initialized) { printk("Warning! cpu_has_feature() used prior to jump label init!\n"); dump_stack(); return early_cpu_has_feature(feature); diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index 4961fb38e4..aff858ca99 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -32,23 +32,10 @@ #ifdef CONFIG_PPC64 #define get_accounting(tsk) (&get_paca()->accounting) #define raw_get_accounting(tsk) (&local_paca->accounting) -static inline void arch_vtime_task_switch(struct task_struct *tsk) { } #else #define get_accounting(tsk) (&task_thread_info(tsk)->accounting) #define raw_get_accounting(tsk) get_accounting(tsk) -/* - * Called from the context switch with interrupts disabled, to charge all - * accumulated times to the current process, and to prepare accounting on - * the next process. - */ -static inline void arch_vtime_task_switch(struct task_struct *prev) -{ - struct cpu_accounting_data *acct = get_accounting(current); - struct cpu_accounting_data *acct0 = get_accounting(prev); - - acct->starttime = acct0->starttime; -} #endif /* diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 514dd056c2..91a9fd5325 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -82,7 +82,7 @@ struct eeh_pe { int false_positives; /* Times of reported #ff's */ atomic_t pass_dev_cnt; /* Count of passed through devs */ struct eeh_pe *parent; /* Parent PE */ - void *data; /* PE auxillary data */ + void *data; /* PE auxiliary data */ struct list_head child_list; /* List of PEs below this PE */ struct list_head child; /* Memb. child_list/eeh_phb_pe */ struct list_head edevs; /* List of eeh_dev in this PE */ diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index 79f1c480b5..bb4b94444d 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h @@ -127,8 +127,6 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm, /* Notes used in ET_CORE. Note name is "SPU/<fd>/<filename>". */ #define NT_SPU 1 -#define ARCH_HAVE_EXTRA_ELF_NOTES - #endif /* CONFIG_SPU_BASE */ #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/include/asm/fadump-internal.h b/arch/powerpc/include/asm/fadump-internal.h index 27f9e11eda..e83869a4eb 100644 --- a/arch/powerpc/include/asm/fadump-internal.h +++ b/arch/powerpc/include/asm/fadump-internal.h @@ -42,13 +42,38 @@ static inline u64 fadump_str_to_u64(const char *str) #define FADUMP_CPU_UNKNOWN (~((u32)0)) -#define FADUMP_CRASH_INFO_MAGIC fadump_str_to_u64("FADMPINF") +/* + * The introduction of new fields in the fadump crash info header has + * led to a change in the magic key from `FADMPINF` to `FADMPSIG` for + * identifying a kernel crash from an old kernel. + * + * To prevent the need for further changes to the magic number in the + * event of future modifications to the fadump crash info header, a + * version field has been introduced to track the fadump crash info + * header version. + * + * Consider a few points before adding new members to the fadump crash info + * header structure: + * + * - Append new members; avoid adding them in between. + * - Non-primitive members should have a size member as well. + * - For every change in the fadump header, increment the + * fadump header version. This helps the updated kernel decide how to + * handle kernel dumps from older kernels. + */ +#define FADUMP_CRASH_INFO_MAGIC_OLD fadump_str_to_u64("FADMPINF") +#define FADUMP_CRASH_INFO_MAGIC fadump_str_to_u64("FADMPSIG") +#define FADUMP_HEADER_VERSION 1 /* fadump crash info structure */ struct fadump_crash_info_header { u64 magic_number; - u64 elfcorehdr_addr; + u32 version; u32 crashing_cpu; + u64 vmcoreinfo_raddr; + u64 vmcoreinfo_size; + u32 pt_regs_sz; + u32 cpu_mask_sz; struct pt_regs regs; struct cpumask cpu_mask; }; @@ -94,9 +119,13 @@ struct fw_dump { u64 boot_mem_regs_cnt; unsigned long fadumphdr_addr; + u64 elfcorehdr_addr; + u64 elfcorehdr_size; unsigned long cpu_notes_buf_vaddr; unsigned long cpu_notes_buf_size; + unsigned long param_area; + /* * Maximum size supported by firmware to copy from source to * destination address per entry. @@ -111,6 +140,7 @@ struct fw_dump { unsigned long dump_active:1; unsigned long dump_registered:1; unsigned long nocma:1; + unsigned long param_area_supported:1; struct fadump_ops *ops; }; @@ -129,6 +159,7 @@ struct fadump_ops { struct seq_file *m); void (*fadump_trigger)(struct fadump_crash_info_header *fdh, const char *msg); + int (*fadump_max_boot_mem_rgns)(void); }; /* Helper functions */ @@ -136,7 +167,6 @@ s32 __init fadump_setup_cpu_notes_buf(u32 num_cpus); void fadump_free_cpu_notes_buf(void); u32 *__init fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs); void __init fadump_update_elfcore_header(char *bufp); -bool is_fadump_boot_mem_contiguous(void); bool is_fadump_reserved_mem_contiguous(void); #else /* !CONFIG_PRESERVE_FA_DUMP */ diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h index 526a6a6473..ef40c9b697 100644 --- a/arch/powerpc/include/asm/fadump.h +++ b/arch/powerpc/include/asm/fadump.h @@ -19,12 +19,14 @@ extern int is_fadump_active(void); extern int should_fadump_crash(void); extern void crash_fadump(struct pt_regs *, const char *); extern void fadump_cleanup(void); +extern void fadump_append_bootargs(void); #else /* CONFIG_FA_DUMP */ static inline int is_fadump_active(void) { return 0; } static inline int should_fadump_crash(void) { return 0; } static inline void crash_fadump(struct pt_regs *regs, const char *str) { } static inline void fadump_cleanup(void) { } +static inline void fadump_append_bootargs(void) { } #endif /* !CONFIG_FA_DUMP */ #if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP) diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index 77824bd289..17d168dd8b 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -291,6 +291,8 @@ extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup; extern long __start___barrier_nospec_fixup, __stop___barrier_nospec_fixup; extern long __start__btb_flush_fixup, __stop__btb_flush_fixup; +extern bool static_key_feature_checks_initialized; + void apply_feature_fixups(void); void update_mmu_feature_fixups(unsigned long mask); void setup_feature_keys(void); diff --git a/arch/powerpc/include/asm/fpu.h b/arch/powerpc/include/asm/fpu.h new file mode 100644 index 0000000000..ca584e4bc4 --- /dev/null +++ b/arch/powerpc/include/asm/fpu.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 SiFive + */ + +#ifndef _ASM_POWERPC_FPU_H +#define _ASM_POWERPC_FPU_H + +#include <linux/preempt.h> + +#include <asm/cpu_has_feature.h> +#include <asm/switch_to.h> + +#define kernel_fpu_available() (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE)) + +static inline void kernel_fpu_begin(void) +{ + preempt_disable(); + enable_kernel_fp(); +} + +static inline void kernel_fpu_end(void) +{ + disable_kernel_fp(); + preempt_enable(); +} + +#endif /* ! _ASM_POWERPC_FPU_H */ diff --git a/arch/powerpc/include/asm/guest-state-buffer.h b/arch/powerpc/include/asm/guest-state-buffer.h index 808149f315..d107abe146 100644 --- a/arch/powerpc/include/asm/guest-state-buffer.h +++ b/arch/powerpc/include/asm/guest-state-buffer.h @@ -81,6 +81,7 @@ #define KVMPPC_GSID_HASHKEYR 0x1050 #define KVMPPC_GSID_HASHPKEYR 0x1051 #define KVMPPC_GSID_CTRL 0x1052 +#define KVMPPC_GSID_DPDES 0x1053 #define KVMPPC_GSID_CR 0x2000 #define KVMPPC_GSID_PIDR 0x2001 @@ -110,7 +111,7 @@ #define KVMPPC_GSE_META_COUNT (KVMPPC_GSE_META_END - KVMPPC_GSE_META_START + 1) #define KVMPPC_GSE_DW_REGS_START KVMPPC_GSID_GPR(0) -#define KVMPPC_GSE_DW_REGS_END KVMPPC_GSID_CTRL +#define KVMPPC_GSE_DW_REGS_END KVMPPC_GSID_DPDES #define KVMPPC_GSE_DW_REGS_COUNT \ (KVMPPC_GSE_DW_REGS_END - KVMPPC_GSE_DW_REGS_START + 1) diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 048e3705af..52e1b1d15f 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -982,7 +982,7 @@ static inline phys_addr_t page_to_phys(struct page *page) } /* - * 32 bits still uses virt_to_bus() for it's implementation of DMA + * 32 bits still uses virt_to_bus() for its implementation of DMA * mappings se we have to keep it defined here. We also have some old * drivers (shame shame shame) that use bus_to_virt() and haven't been * fixed yet so I need to define it here. diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h index 365d272009..b5bbb94c51 100644 --- a/arch/powerpc/include/asm/kasan.h +++ b/arch/powerpc/include/asm/kasan.h @@ -19,7 +19,7 @@ #define KASAN_SHADOW_SCALE_SHIFT 3 -#if defined(CONFIG_MODULES) && defined(CONFIG_PPC32) +#if defined(CONFIG_EXECMEM) && defined(CONFIG_PPC32) #define KASAN_KERN_START ALIGN_DOWN(PAGE_OFFSET - SZ_256M, SZ_256M) #else #define KASAN_KERN_START PAGE_OFFSET diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index fdb90e24dc..95a98b390d 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -135,6 +135,17 @@ static inline void crash_setup_regs(struct pt_regs *newregs, ppc_save_regs(newregs); } +#ifdef CONFIG_CRASH_HOTPLUG +void arch_crash_handle_hotplug_event(struct kimage *image, void *arg); +#define arch_crash_handle_hotplug_event arch_crash_handle_hotplug_event + +int arch_crash_hotplug_support(struct kimage *image, unsigned long kexec_flags); +#define arch_crash_hotplug_support arch_crash_hotplug_support + +unsigned int arch_crash_get_elfcorehdr_size(void); +#define crash_get_elfcorehdr_size arch_crash_get_elfcorehdr_size +#endif /* CONFIG_CRASH_HOTPLUG */ + extern int crashing_cpu; extern void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)); extern void crash_ipi_callback(struct pt_regs *regs); @@ -185,6 +196,10 @@ static inline void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) #endif /* CONFIG_CRASH_DUMP */ +#if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_DUMP) +int update_cpus_node(void *fdt); +#endif + #ifdef CONFIG_PPC_BOOK3S_64 #include <asm/book3s/64/kexec.h> #endif diff --git a/arch/powerpc/include/asm/kexec_ranges.h b/arch/powerpc/include/asm/kexec_ranges.h index f83866a19e..14055896cb 100644 --- a/arch/powerpc/include/asm/kexec_ranges.h +++ b/arch/powerpc/include/asm/kexec_ranges.h @@ -7,19 +7,9 @@ void sort_memory_ranges(struct crash_mem *mrngs, bool merge); struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges); int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size); -int add_tce_mem_ranges(struct crash_mem **mem_ranges); -int add_initrd_mem_range(struct crash_mem **mem_ranges); -#ifdef CONFIG_PPC_64S_HASH_MMU -int add_htab_mem_range(struct crash_mem **mem_ranges); -#else -static inline int add_htab_mem_range(struct crash_mem **mem_ranges) -{ - return 0; -} -#endif -int add_kernel_mem_range(struct crash_mem **mem_ranges); -int add_rtas_mem_range(struct crash_mem **mem_ranges); -int add_opal_mem_range(struct crash_mem **mem_ranges); -int add_reserved_mem_ranges(struct crash_mem **mem_ranges); - +int remove_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size); +int get_exclude_memory_ranges(struct crash_mem **mem_ranges); +int get_reserved_memory_ranges(struct crash_mem **mem_ranges); +int get_crash_memory_ranges(struct crash_mem **mem_ranges); +int get_usable_memory_ranges(struct crash_mem **mem_ranges); #endif /* _ASM_POWERPC_KEXEC_RANGES_H */ diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 3e1e2a698c..10618622d7 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -594,6 +594,7 @@ static inline u##size kvmppc_get_##reg(struct kvm_vcpu *vcpu) \ KVMPPC_BOOK3S_VCORE_ACCESSOR(vtb, 64, KVMPPC_GSID_VTB) +KVMPPC_BOOK3S_VCORE_ACCESSOR(dpdes, 64, KVMPPC_GSID_DPDES) KVMPPC_BOOK3S_VCORE_ACCESSOR_GET(arch_compat, 32, KVMPPC_GSID_LOGICAL_PVR) KVMPPC_BOOK3S_VCORE_ACCESSOR_GET(lpcr, 64, KVMPPC_GSID_LPCR) KVMPPC_BOOK3S_VCORE_ACCESSOR_SET(tb_offset, 64, KVMPPC_GSID_TB_OFFSET) diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 3281215097..ca3829d47a 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -287,7 +287,6 @@ struct kvmppc_ops { bool (*unmap_gfn_range)(struct kvm *kvm, struct kvm_gfn_range *range); bool (*age_gfn)(struct kvm *kvm, struct kvm_gfn_range *range); bool (*test_age_gfn)(struct kvm *kvm, struct kvm_gfn_range *range); - bool (*set_spte_gfn)(struct kvm *kvm, struct kvm_gfn_range *range); void (*free_memslot)(struct kvm_memory_slot *slot); int (*init_vm)(struct kvm *kvm); void (*destroy_vm)(struct kvm *kvm); diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 3b72c7ed24..8a27b046c6 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -251,7 +251,7 @@ static __always_inline bool mmu_has_feature(unsigned long feature) #endif #ifdef CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG - if (!static_key_initialized) { + if (!static_key_feature_checks_initialized) { printk("Warning! mmu_has_feature() used prior to jump label init!\n"); dump_stack(); return early_mmu_has_feature(feature); @@ -406,9 +406,5 @@ extern void *abatron_pteptrs[2]; #include <asm/nohash/mmu.h> #endif -#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP) -#define __HAVE_ARCH_RESERVED_KERNEL_PAGES -#endif - #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_MMU_H_ */ diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h index a8e2e8339f..300c777cc3 100644 --- a/arch/powerpc/include/asm/module.h +++ b/arch/powerpc/include/asm/module.h @@ -48,11 +48,6 @@ struct mod_arch_specific { unsigned long tramp; unsigned long tramp_regs; #endif - - /* List of BUG addresses, source line numbers and filenames */ - struct list_head bug_list; - struct bug_entry *bug_table; - unsigned int num_bugs; }; /* diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 427db14292..f5f39d4f03 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -351,16 +351,6 @@ static inline int hugepd_ok(hugepd_t hpd) #endif } -static inline int pmd_huge(pmd_t pmd) -{ - return 0; -} - -static inline int pud_huge(pud_t pud) -{ - return 0; -} - #define is_hugepd(hpd) (hugepd_ok(hpd)) #endif diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index a2bc4b95e7..8c9d4b26bf 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -1027,10 +1027,10 @@ struct opal_i2c_request { * The host will pass on OPAL, a buffer of length OPAL_SYSEPOW_MAX * with individual elements being 16 bits wide to fetch the system * wide EPOW status. Each element in the buffer will contain the - * EPOW status in it's bit representation for a particular EPOW sub + * EPOW status in its bit representation for a particular EPOW sub * class as defined here. So multiple detailed EPOW status bits * specific for any sub class can be represented in a single buffer - * element as it's bit representation. + * element as its bit representation. */ /* System EPOW type */ diff --git a/arch/powerpc/include/asm/pmac_feature.h b/arch/powerpc/include/asm/pmac_feature.h index 2495866f2e..420e2878ae 100644 --- a/arch/powerpc/include/asm/pmac_feature.h +++ b/arch/powerpc/include/asm/pmac_feature.h @@ -192,7 +192,7 @@ static inline long pmac_call_feature(int selector, struct device_node* node, /* PMAC_FTR_BMAC_ENABLE (struct device_node* node, 0, int value) * enable/disable the bmac (ethernet) cell of a mac-io ASIC, also drive - * it's reset line + * its reset line */ #define PMAC_FTR_BMAC_ENABLE PMAC_FTR_DEF(6) diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index b2c51d337e..e44cac0da3 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -260,7 +260,8 @@ struct thread_struct { unsigned long sier2; unsigned long sier3; unsigned long hashkeyr; - + unsigned long dexcr; + unsigned long dexcr_onexec; /* Reset value to load on exec */ #endif }; @@ -333,6 +334,16 @@ extern int set_endian(struct task_struct *tsk, unsigned int val); extern int get_unalign_ctl(struct task_struct *tsk, unsigned long adr); extern int set_unalign_ctl(struct task_struct *tsk, unsigned int val); +#ifdef CONFIG_PPC_BOOK3S_64 + +#define PPC_GET_DEXCR_ASPECT(tsk, asp) get_dexcr_prctl((tsk), (asp)) +#define PPC_SET_DEXCR_ASPECT(tsk, asp, val) set_dexcr_prctl((tsk), (asp), (val)) + +int get_dexcr_prctl(struct task_struct *tsk, unsigned long asp); +int set_dexcr_prctl(struct task_struct *tsk, unsigned long asp, unsigned long val); + +#endif + extern void load_fp_state(struct thread_fp_state *fp); extern void store_fp_state(struct thread_fp_state *fp); extern void load_vr_state(struct thread_vr_state *vr); diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index d3d1aea009..eed33cb916 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -615,7 +615,7 @@ #define HID1_ABE (1<<10) /* 7450 Address Broadcast Enable */ #define HID1_PS (1<<16) /* 750FX PLL selection */ #endif -#define SPRN_HID2 0x3F8 /* Hardware Implementation Register 2 */ +#define SPRN_HID2_750FX 0x3F8 /* IBM 750FX HID2 Register */ #define SPRN_HID2_GEKKO 0x398 /* Gekko HID2 Register */ #define SPRN_HID2_G2_LE 0x3F3 /* G2_LE HID2 Register */ #define HID2_G2_LE_HBE (1<<18) /* High BAT Enable (G2_LE) */ diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h index d9ac3a4f46..a7b69b2529 100644 --- a/arch/powerpc/include/asm/trace.h +++ b/arch/powerpc/include/asm/trace.h @@ -137,7 +137,7 @@ TRACE_EVENT(rtas_input, TP_fast_assign( __entry->nargs = be32_to_cpu(rtas_args->nargs); - __assign_str(name, name); + __assign_str(name); be32_to_cpu_array(__get_dynamic_array(inputs), rtas_args->args, __entry->nargs); ), @@ -162,7 +162,7 @@ TRACE_EVENT(rtas_output, TP_fast_assign( __entry->nr_other = be32_to_cpu(rtas_args->nret) - 1; __entry->status = be32_to_cpu(rtas_args->rets[0]); - __assign_str(name, name); + __assign_str(name); be32_to_cpu_array(__get_dynamic_array(other_outputs), &rtas_args->rets[1], __entry->nr_other); ), diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 4cba724c88..fd594bf6c6 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -181,8 +181,19 @@ do { \ #endif #ifdef __powerpc64__ +#ifdef CONFIG_PPC_KERNEL_PREFIXED #define __get_user_asm2_goto(x, addr, label) \ __get_user_asm_goto(x, addr, label, "ld") +#else +#define __get_user_asm2_goto(x, addr, label) \ + asm_goto_output( \ + "1: ld%U1%X1 %0, %1 # get_user\n" \ + EX_TABLE(1b, %l2) \ + : "=r" (x) \ + : DS_FORM_CONSTRAINT (*addr) \ + : \ + : label) +#endif // CONFIG_PPC_KERNEL_PREFIXED #else /* __powerpc64__ */ #define __get_user_asm2_goto(x, addr, label) \ asm_goto_output( \ diff --git a/arch/powerpc/include/asm/uninorth.h b/arch/powerpc/include/asm/uninorth.h index e278299b9b..6949b5daa3 100644 --- a/arch/powerpc/include/asm/uninorth.h +++ b/arch/powerpc/include/asm/uninorth.h @@ -144,7 +144,7 @@ #define UNI_N_HWINIT_STATE_SLEEPING 0x01 #define UNI_N_HWINIT_STATE_RUNNING 0x02 /* This last bit appear to be used by the bootROM to know the second - * CPU has started and will enter it's sleep loop with IP=0 + * CPU has started and will enter its sleep loop with IP=0 */ #define UNI_N_HWINIT_STATE_CPU1_FLAG 0x10000000 diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h index 78302f6c25..c6390890a6 100644 --- a/arch/powerpc/include/asm/vdso/gettimeofday.h +++ b/arch/powerpc/include/asm/vdso/gettimeofday.h @@ -13,6 +13,17 @@ #define VDSO_HAS_TIME 1 +/* + * powerpc specific delta calculation. + * + * This variant removes the masking of the subtraction because the + * clocksource mask of all VDSO capable clocksources on powerpc is U64_MAX + * which would result in a pointless operation. The compiler cannot + * optimize it away as the mask comes from the vdso data and is not compile + * time constant. + */ +#define VDSO_DELTA_NOMASK 1 + static __always_inline int do_syscall_2(const unsigned long _r0, const unsigned long _r3, const unsigned long _r4) { @@ -104,21 +115,6 @@ static inline bool vdso_clocksource_ok(const struct vdso_data *vd) } #define vdso_clocksource_ok vdso_clocksource_ok -/* - * powerpc specific delta calculation. - * - * This variant removes the masking of the subtraction because the - * clocksource mask of all VDSO capable clocksources on powerpc is U64_MAX - * which would result in a pointless operation. The compiler cannot - * optimize it away as the mask comes from the vdso data and is not compile - * time constant. - */ -static __always_inline u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult) -{ - return (cycles - last) * mult; -} -#define vdso_calc_delta vdso_calc_delta - #ifndef __powerpc64__ static __always_inline u64 vdso_shift_ns(u64 ns, unsigned long shift) { diff --git a/arch/powerpc/include/asm/fb.h b/arch/powerpc/include/asm/video.h index c0c5d1df7a..e1770114ff 100644 --- a/arch/powerpc/include/asm/fb.h +++ b/arch/powerpc/include/asm/video.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_FB_H_ -#define _ASM_FB_H_ +#ifndef _ASM_VIDEO_H_ +#define _ASM_VIDEO_H_ #include <asm/page.h> @@ -12,6 +12,6 @@ static inline pgprot_t pgprot_framebuffer(pgprot_t prot, } #define pgprot_framebuffer pgprot_framebuffer -#include <asm-generic/fb.h> +#include <asm-generic/video.h> -#endif /* _ASM_FB_H_ */ +#endif /* _ASM_VIDEO_H_ */ diff --git a/arch/powerpc/include/uapi/asm/bootx.h b/arch/powerpc/include/uapi/asm/bootx.h index 6728c7e24e..1b8c121071 100644 --- a/arch/powerpc/include/uapi/asm/bootx.h +++ b/arch/powerpc/include/uapi/asm/bootx.h @@ -108,7 +108,7 @@ typedef struct boot_infos /* ALL BELOW NEW (vers. 4) */ /* This defines the physical memory. Valid with BOOT_ARCH_NUBUS flag - (non-PCI) only. On PCI, memory is contiguous and it's size is in the + (non-PCI) only. On PCI, memory is contiguous and its size is in the device-tree. */ boot_info_map_entry_t physMemoryMap[MAX_MEM_MAP_SIZE]; /* Where the phys memory is */ diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h b/arch/powerpc/include/uapi/asm/papr_pdsm.h deleted file mode 100644 index 1743992504..0000000000 --- a/arch/powerpc/include/uapi/asm/papr_pdsm.h +++ /dev/null @@ -1,165 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * PAPR nvDimm Specific Methods (PDSM) and structs for libndctl - * - * (C) Copyright IBM 2020 - * - * Author: Vaibhav Jain <vaibhav at linux.ibm.com> - */ - -#ifndef _UAPI_ASM_POWERPC_PAPR_PDSM_H_ -#define _UAPI_ASM_POWERPC_PAPR_PDSM_H_ - -#include <linux/types.h> -#include <linux/ndctl.h> - -/* - * PDSM Envelope: - * - * The ioctl ND_CMD_CALL exchange data between user-space and kernel via - * envelope which consists of 2 headers sections and payload sections as - * illustrated below: - * +-----------------+---------------+---------------------------+ - * | 64-Bytes | 8-Bytes | Max 184-Bytes | - * +-----------------+---------------+---------------------------+ - * | ND-HEADER | PDSM-HEADER | PDSM-PAYLOAD | - * +-----------------+---------------+---------------------------+ - * | nd_family | | | - * | nd_size_out | cmd_status | | - * | nd_size_in | reserved | nd_pdsm_payload | - * | nd_command | payload --> | | - * | nd_fw_size | | | - * | nd_payload ---> | | | - * +---------------+-----------------+---------------------------+ - * - * ND Header: - * This is the generic libnvdimm header described as 'struct nd_cmd_pkg' - * which is interpreted by libnvdimm before passed on to papr_scm. Important - * member fields used are: - * 'nd_family' : (In) NVDIMM_FAMILY_PAPR_SCM - * 'nd_size_in' : (In) PDSM-HEADER + PDSM-IN-PAYLOAD (usually 0) - * 'nd_size_out' : (In) PDSM-HEADER + PDSM-RETURN-PAYLOAD - * 'nd_command' : (In) One of PAPR_PDSM_XXX - * 'nd_fw_size' : (Out) PDSM-HEADER + size of actual payload returned - * - * PDSM Header: - * This is papr-scm specific header that precedes the payload. This is defined - * as nd_cmd_pdsm_pkg. Following fields aare available in this header: - * - * 'cmd_status' : (Out) Errors if any encountered while servicing PDSM. - * 'reserved' : Not used, reserved for future and should be set to 0. - * 'payload' : A union of all the possible payload structs - * - * PDSM Payload: - * - * The layout of the PDSM Payload is defined by various structs shared between - * papr_scm and libndctl so that contents of payload can be interpreted. As such - * its defined as a union of all possible payload structs as - * 'union nd_pdsm_payload'. Based on the value of 'nd_cmd_pkg.nd_command' - * appropriate member of the union is accessed. - */ - -/* Max payload size that we can handle */ -#define ND_PDSM_PAYLOAD_MAX_SIZE 184 - -/* Max payload size that we can handle */ -#define ND_PDSM_HDR_SIZE \ - (sizeof(struct nd_pkg_pdsm) - ND_PDSM_PAYLOAD_MAX_SIZE) - -/* Various nvdimm health indicators */ -#define PAPR_PDSM_DIMM_HEALTHY 0 -#define PAPR_PDSM_DIMM_UNHEALTHY 1 -#define PAPR_PDSM_DIMM_CRITICAL 2 -#define PAPR_PDSM_DIMM_FATAL 3 - -/* struct nd_papr_pdsm_health.extension_flags field flags */ - -/* Indicate that the 'dimm_fuel_gauge' field is valid */ -#define PDSM_DIMM_HEALTH_RUN_GAUGE_VALID 1 - -/* Indicate that the 'dimm_dsc' field is valid */ -#define PDSM_DIMM_DSC_VALID 2 - -/* - * Struct exchanged between kernel & ndctl in for PAPR_PDSM_HEALTH - * Various flags indicate the health status of the dimm. - * - * extension_flags : Any extension fields present in the struct. - * dimm_unarmed : Dimm not armed. So contents wont persist. - * dimm_bad_shutdown : Previous shutdown did not persist contents. - * dimm_bad_restore : Contents from previous shutdown werent restored. - * dimm_scrubbed : Contents of the dimm have been scrubbed. - * dimm_locked : Contents of the dimm cant be modified until CEC reboot - * dimm_encrypted : Contents of dimm are encrypted. - * dimm_health : Dimm health indicator. One of PAPR_PDSM_DIMM_XXXX - * dimm_fuel_gauge : Life remaining of DIMM as a percentage from 0-100 - */ -struct nd_papr_pdsm_health { - union { - struct { - __u32 extension_flags; - __u8 dimm_unarmed; - __u8 dimm_bad_shutdown; - __u8 dimm_bad_restore; - __u8 dimm_scrubbed; - __u8 dimm_locked; - __u8 dimm_encrypted; - __u16 dimm_health; - - /* Extension flag PDSM_DIMM_HEALTH_RUN_GAUGE_VALID */ - __u16 dimm_fuel_gauge; - - /* Extension flag PDSM_DIMM_DSC_VALID */ - __u64 dimm_dsc; - }; - __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE]; - }; -}; - -/* Flags for injecting specific smart errors */ -#define PDSM_SMART_INJECT_HEALTH_FATAL (1 << 0) -#define PDSM_SMART_INJECT_BAD_SHUTDOWN (1 << 1) - -struct nd_papr_pdsm_smart_inject { - union { - struct { - /* One or more of PDSM_SMART_INJECT_ */ - __u32 flags; - __u8 fatal_enable; - __u8 unsafe_shutdown_enable; - }; - __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE]; - }; -}; - -/* - * Methods to be embedded in ND_CMD_CALL request. These are sent to the kernel - * via 'nd_cmd_pkg.nd_command' member of the ioctl struct - */ -enum papr_pdsm { - PAPR_PDSM_MIN = 0x0, - PAPR_PDSM_HEALTH, - PAPR_PDSM_SMART_INJECT, - PAPR_PDSM_MAX, -}; - -/* Maximal union that can hold all possible payload types */ -union nd_pdsm_payload { - struct nd_papr_pdsm_health health; - struct nd_papr_pdsm_smart_inject smart_inject; - __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE]; -} __packed; - -/* - * PDSM-header + payload expected with ND_CMD_CALL ioctl from libnvdimm - * Valid member of union 'payload' is identified via 'nd_cmd_pkg.nd_command' - * that should always precede this struct when sent to papr_scm via CMD_CALL - * interface. - */ -struct nd_pkg_pdsm { - __s32 cmd_status; /* Out: Sub-cmd status returned back */ - __u16 reserved[2]; /* Ignored and to be set as '0' */ - union nd_pdsm_payload payload; -} __packed; - -#endif /* _UAPI_ASM_POWERPC_PAPR_PDSM_H_ */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index d3282fbea4..8585d03c02 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -3,9 +3,6 @@ # Makefile for the linux kernel. # -ifdef CONFIG_PPC64 -CFLAGS_prom_init.o += $(NO_MINIMAL_TOC) -endif ifdef CONFIG_PPC32 CFLAGS_prom_init.o += -fPIC CFLAGS_btext.o += -fPIC @@ -87,6 +84,7 @@ obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_PPC_DAWR) += dawr.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o +obj-$(CONFIG_PPC_BOOK3S_64) += dexcr.o obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_64e.o obj-$(CONFIG_PPC_BARRIER_NOSPEC) += security.o @@ -190,9 +188,6 @@ GCOV_PROFILE_kprobes-ftrace.o := n KCOV_INSTRUMENT_kprobes-ftrace.o := n KCSAN_SANITIZE_kprobes-ftrace.o := n UBSAN_SANITIZE_kprobes-ftrace.o := n -GCOV_PROFILE_syscall_64.o := n -KCOV_INSTRUMENT_syscall_64.o := n -UBSAN_SANITIZE_syscall_64.o := n UBSAN_SANITIZE_vdso.o := n # Necessary for booting with kcov enabled on book3e machines diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S index bfd3f442e5..ab3ca74e67 100644 --- a/arch/powerpc/kernel/cpu_setup_6xx.S +++ b/arch/powerpc/kernel/cpu_setup_6xx.S @@ -401,7 +401,7 @@ _GLOBAL(__save_cpu_setup) andi. r3,r3,0xff00 cmpwi cr0,r3,0x0200 bne 1f - mfspr r4,SPRN_HID2 + mfspr r4,SPRN_HID2_750FX stw r4,CS_HID2(r5) 1: mtcr r7 @@ -496,7 +496,7 @@ _GLOBAL(__restore_cpu_setup) bne 4f lwz r4,CS_HID2(r5) rlwinm r4,r4,0,19,17 - mtspr SPRN_HID2,r4 + mtspr SPRN_HID2_750FX,r4 sync 4: lwz r4,CS_HID1(r5) diff --git a/arch/powerpc/kernel/dexcr.c b/arch/powerpc/kernel/dexcr.c new file mode 100644 index 0000000000..3a0358e91c --- /dev/null +++ b/arch/powerpc/kernel/dexcr.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <linux/capability.h> +#include <linux/cpu.h> +#include <linux/init.h> +#include <linux/prctl.h> +#include <linux/sched.h> + +#include <asm/cpu_has_feature.h> +#include <asm/cputable.h> +#include <asm/processor.h> +#include <asm/reg.h> + +static int __init init_task_dexcr(void) +{ + if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) + return 0; + + current->thread.dexcr_onexec = mfspr(SPRN_DEXCR); + + return 0; +} +early_initcall(init_task_dexcr) + +/* Allow thread local configuration of these by default */ +#define DEXCR_PRCTL_EDITABLE ( \ + DEXCR_PR_IBRTPD | \ + DEXCR_PR_SRAPD | \ + DEXCR_PR_NPHIE) + +static int prctl_to_aspect(unsigned long which, unsigned int *aspect) +{ + switch (which) { + case PR_PPC_DEXCR_SBHE: + *aspect = DEXCR_PR_SBHE; + break; + case PR_PPC_DEXCR_IBRTPD: + *aspect = DEXCR_PR_IBRTPD; + break; + case PR_PPC_DEXCR_SRAPD: + *aspect = DEXCR_PR_SRAPD; + break; + case PR_PPC_DEXCR_NPHIE: + *aspect = DEXCR_PR_NPHIE; + break; + default: + return -ENODEV; + } + + return 0; +} + +int get_dexcr_prctl(struct task_struct *task, unsigned long which) +{ + unsigned int aspect; + int ret; + + ret = prctl_to_aspect(which, &aspect); + if (ret) + return ret; + + if (aspect & DEXCR_PRCTL_EDITABLE) + ret |= PR_PPC_DEXCR_CTRL_EDITABLE; + + if (aspect & mfspr(SPRN_DEXCR)) + ret |= PR_PPC_DEXCR_CTRL_SET; + else + ret |= PR_PPC_DEXCR_CTRL_CLEAR; + + if (aspect & task->thread.dexcr_onexec) + ret |= PR_PPC_DEXCR_CTRL_SET_ONEXEC; + else + ret |= PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC; + + return ret; +} + +int set_dexcr_prctl(struct task_struct *task, unsigned long which, unsigned long ctrl) +{ + unsigned long dexcr; + unsigned int aspect; + int err = 0; + + err = prctl_to_aspect(which, &aspect); + if (err) + return err; + + if (!(aspect & DEXCR_PRCTL_EDITABLE)) + return -EPERM; + + if (ctrl & ~PR_PPC_DEXCR_CTRL_MASK) + return -EINVAL; + + if (ctrl & PR_PPC_DEXCR_CTRL_SET && ctrl & PR_PPC_DEXCR_CTRL_CLEAR) + return -EINVAL; + + if (ctrl & PR_PPC_DEXCR_CTRL_SET_ONEXEC && ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC) + return -EINVAL; + + /* + * We do not want an unprivileged process being able to disable + * a setuid process's hash check instructions + */ + if (aspect == DEXCR_PR_NPHIE && + ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC && + !capable(CAP_SYS_ADMIN)) + return -EPERM; + + dexcr = mfspr(SPRN_DEXCR); + + if (ctrl & PR_PPC_DEXCR_CTRL_SET) + dexcr |= aspect; + else if (ctrl & PR_PPC_DEXCR_CTRL_CLEAR) + dexcr &= ~aspect; + + if (ctrl & PR_PPC_DEXCR_CTRL_SET_ONEXEC) + task->thread.dexcr_onexec |= aspect; + else if (ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC) + task->thread.dexcr_onexec &= ~aspect; + + mtspr(SPRN_DEXCR, dexcr); + + return 0; +} diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index 8920862ffd..f0ae39e77e 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -216,6 +216,6 @@ const struct dma_map_ops dma_iommu_ops = { .get_required_mask = dma_iommu_get_required_mask, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, - .alloc_pages = dma_common_alloc_pages, + .alloc_pages_op = dma_common_alloc_pages, .free_pages = dma_common_free_pages, }; diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index ab316e155e..6670063a7a 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -506,9 +506,18 @@ int eeh_dev_check_failure(struct eeh_dev *edev) * We will punt with the following conditions: Failure to get * PE's state, EEH not support and Permanently unavailable * state, PE is in good state. + * + * On the pSeries, after reaching the threshold, get_state might + * return EEH_STATE_NOT_SUPPORT. However, it's possible that the + * device state remains uncleared if the device is not marked + * pci_channel_io_perm_failure. Therefore, consider logging the + * event to let device removal happen. + * */ if ((ret < 0) || - (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) { + (ret == EEH_STATE_NOT_SUPPORT && + dev->error_state == pci_channel_io_perm_failure) || + eeh_state_active(ret)) { eeh_stats.false_positives++; pe->false_positives++; rc = 0; diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 48773d2d9b..7efe04c68f 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -865,9 +865,18 @@ void eeh_handle_normal_event(struct eeh_pe *pe) devices++; if (!devices) { - pr_debug("EEH: Frozen PHB#%x-PE#%x is empty!\n", + pr_warn("EEH: Frozen PHB#%x-PE#%x is empty!\n", pe->phb->global_number, pe->addr); - goto out; /* nothing to recover */ + /* + * The device is removed, tear down its state, on powernv + * hotplug driver would take care of it but not on pseries, + * permanently disable the card as it is hot removed. + * + * In the case of powernv, note that the removal of device + * is covered by pci rescan lock, so no problem even if hotplug + * driver attempts to remove the device. + */ + goto recover_failed; } /* Log the event */ diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 7d1b50599d..d283d281d2 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -24,10 +24,10 @@ static int eeh_pe_aux_size = 0; static LIST_HEAD(eeh_phb_pe); /** - * eeh_set_pe_aux_size - Set PE auxillary data size - * @size: PE auxillary data size + * eeh_set_pe_aux_size - Set PE auxiliary data size + * @size: PE auxiliary data size in bytes * - * Set PE auxillary data size + * Set PE auxiliary data size. */ void eeh_set_pe_aux_size(int size) { @@ -527,7 +527,7 @@ EXPORT_SYMBOL_GPL(eeh_pe_state_mark); * eeh_pe_mark_isolated * @pe: EEH PE * - * Record that a PE has been isolated by marking the PE and it's children as + * Record that a PE has been isolated by marking the PE and its children as * EEH_PE_ISOLATED (and EEH_PE_CFG_BLOCKED, if required) and their PCI devices * as pci_channel_io_frozen. */ diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index d14eda1e85..a612e7513a 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -53,8 +53,6 @@ static struct kobject *fadump_kobj; static atomic_t cpus_in_fadump; static DEFINE_MUTEX(fadump_mutex); -static struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0, false }; - #define RESERVED_RNGS_SZ 16384 /* 16K - 128 entries */ #define RESERVED_RNGS_CNT (RESERVED_RNGS_SZ / \ sizeof(struct fadump_memory_range)) @@ -133,6 +131,41 @@ static int __init fadump_cma_init(void) static int __init fadump_cma_init(void) { return 1; } #endif /* CONFIG_CMA */ +/* + * Additional parameters meant for capture kernel are placed in a dedicated area. + * If this is capture kernel boot, append these parameters to bootargs. + */ +void __init fadump_append_bootargs(void) +{ + char *append_args; + size_t len; + + if (!fw_dump.dump_active || !fw_dump.param_area_supported || !fw_dump.param_area) + return; + + if (fw_dump.param_area >= fw_dump.boot_mem_top) { + if (memblock_reserve(fw_dump.param_area, COMMAND_LINE_SIZE)) { + pr_warn("WARNING: Can't use additional parameters area!\n"); + fw_dump.param_area = 0; + return; + } + } + + append_args = (char *)fw_dump.param_area; + len = strlen(boot_command_line); + + /* + * Too late to fail even if cmdline size exceeds. Truncate additional parameters + * to cmdline size and proceed anyway. + */ + if (len + strlen(append_args) >= COMMAND_LINE_SIZE - 1) + pr_warn("WARNING: Appending parameters exceeds cmdline size. Truncating!\n"); + + pr_debug("Cmdline: %s\n", boot_command_line); + snprintf(boot_command_line + len, COMMAND_LINE_SIZE - len, " %s", append_args); + pr_info("Updated cmdline: %s\n", boot_command_line); +} + /* Scan the Firmware Assisted dump configuration details. */ int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname, int depth, void *data) @@ -223,28 +256,6 @@ static bool is_fadump_mem_area_contiguous(u64 d_start, u64 d_end) } /* - * Returns true, if there are no holes in boot memory area, - * false otherwise. - */ -bool is_fadump_boot_mem_contiguous(void) -{ - unsigned long d_start, d_end; - bool ret = false; - int i; - - for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) { - d_start = fw_dump.boot_mem_addr[i]; - d_end = d_start + fw_dump.boot_mem_sz[i]; - - ret = is_fadump_mem_area_contiguous(d_start, d_end); - if (!ret) - break; - } - - return ret; -} - -/* * Returns true, if there are no holes in reserved memory area, * false otherwise. */ @@ -373,12 +384,6 @@ static unsigned long __init get_fadump_area_size(void) size = PAGE_ALIGN(size); size += fw_dump.boot_memory_size; size += sizeof(struct fadump_crash_info_header); - size += sizeof(struct elfhdr); /* ELF core header.*/ - size += sizeof(struct elf_phdr); /* place holder for cpu notes */ - /* Program headers for crash memory regions. */ - size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2); - - size = PAGE_ALIGN(size); /* This is to hold kernel metadata on platforms that support it */ size += (fw_dump.ops->fadump_get_metadata_size ? @@ -389,10 +394,11 @@ static unsigned long __init get_fadump_area_size(void) static int __init add_boot_mem_region(unsigned long rstart, unsigned long rsize) { + int max_boot_mem_rgns = fw_dump.ops->fadump_max_boot_mem_rgns(); int i = fw_dump.boot_mem_regs_cnt++; - if (fw_dump.boot_mem_regs_cnt > FADUMP_MAX_MEM_REGS) { - fw_dump.boot_mem_regs_cnt = FADUMP_MAX_MEM_REGS; + if (fw_dump.boot_mem_regs_cnt > max_boot_mem_rgns) { + fw_dump.boot_mem_regs_cnt = max_boot_mem_rgns; return 0; } @@ -573,22 +579,6 @@ int __init fadump_reserve_mem(void) } } - /* - * Calculate the memory boundary. - * If memory_limit is less than actual memory boundary then reserve - * the memory for fadump beyond the memory_limit and adjust the - * memory_limit accordingly, so that the running kernel can run with - * specified memory_limit. - */ - if (memory_limit && memory_limit < memblock_end_of_DRAM()) { - size = get_fadump_area_size(); - if ((memory_limit + size) < memblock_end_of_DRAM()) - memory_limit += size; - else - memory_limit = memblock_end_of_DRAM(); - printk(KERN_INFO "Adjusted memory_limit for firmware-assisted" - " dump, now %#016llx\n", memory_limit); - } if (memory_limit) mem_boundary = memory_limit; else @@ -705,7 +695,7 @@ void crash_fadump(struct pt_regs *regs, const char *str) * old_cpu == -1 means this is the first CPU which has come here, * go ahead and trigger fadump. * - * old_cpu != -1 means some other CPU has already on it's way + * old_cpu != -1 means some other CPU has already on its way * to trigger fadump, just keep looping here. */ this_cpu = smp_processor_id(); @@ -931,36 +921,6 @@ static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info, return 0; } -static int fadump_exclude_reserved_area(u64 start, u64 end) -{ - u64 ra_start, ra_end; - int ret = 0; - - ra_start = fw_dump.reserve_dump_area_start; - ra_end = ra_start + fw_dump.reserve_dump_area_size; - - if ((ra_start < end) && (ra_end > start)) { - if ((start < ra_start) && (end > ra_end)) { - ret = fadump_add_mem_range(&crash_mrange_info, - start, ra_start); - if (ret) - return ret; - - ret = fadump_add_mem_range(&crash_mrange_info, - ra_end, end); - } else if (start < ra_start) { - ret = fadump_add_mem_range(&crash_mrange_info, - start, ra_start); - } else if (ra_end < end) { - ret = fadump_add_mem_range(&crash_mrange_info, - ra_end, end); - } - } else - ret = fadump_add_mem_range(&crash_mrange_info, start, end); - - return ret; -} - static int fadump_init_elfcore_header(char *bufp) { struct elfhdr *elf; @@ -998,52 +958,6 @@ static int fadump_init_elfcore_header(char *bufp) } /* - * Traverse through memblock structure and setup crash memory ranges. These - * ranges will be used create PT_LOAD program headers in elfcore header. - */ -static int fadump_setup_crash_memory_ranges(void) -{ - u64 i, start, end; - int ret; - - pr_debug("Setup crash memory ranges.\n"); - crash_mrange_info.mem_range_cnt = 0; - - /* - * Boot memory region(s) registered with firmware are moved to - * different location at the time of crash. Create separate program - * header(s) for this memory chunk(s) with the correct offset. - */ - for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) { - start = fw_dump.boot_mem_addr[i]; - end = start + fw_dump.boot_mem_sz[i]; - ret = fadump_add_mem_range(&crash_mrange_info, start, end); - if (ret) - return ret; - } - - for_each_mem_range(i, &start, &end) { - /* - * skip the memory chunk that is already added - * (0 through boot_memory_top). - */ - if (start < fw_dump.boot_mem_top) { - if (end > fw_dump.boot_mem_top) - start = fw_dump.boot_mem_top; - else - continue; - } - - /* add this range excluding the reserved dump area. */ - ret = fadump_exclude_reserved_area(start, end); - if (ret) - return ret; - } - - return 0; -} - -/* * If the given physical address falls within the boot memory region then * return the relocated address that points to the dump region reserved * for saving initial boot memory contents. @@ -1073,36 +987,50 @@ static inline unsigned long fadump_relocate(unsigned long paddr) return raddr; } -static int fadump_create_elfcore_headers(char *bufp) +static void __init populate_elf_pt_load(struct elf_phdr *phdr, u64 start, + u64 size, unsigned long long offset) { - unsigned long long raddr, offset; - struct elf_phdr *phdr; + phdr->p_align = 0; + phdr->p_memsz = size; + phdr->p_filesz = size; + phdr->p_paddr = start; + phdr->p_offset = offset; + phdr->p_type = PT_LOAD; + phdr->p_flags = PF_R|PF_W|PF_X; + phdr->p_vaddr = (unsigned long)__va(start); +} + +static void __init fadump_populate_elfcorehdr(struct fadump_crash_info_header *fdh) +{ + char *bufp; struct elfhdr *elf; - int i, j; + struct elf_phdr *phdr; + u64 boot_mem_dest_offset; + unsigned long long i, ra_start, ra_end, ra_size, mstart, mend; + bufp = (char *) fw_dump.elfcorehdr_addr; fadump_init_elfcore_header(bufp); elf = (struct elfhdr *)bufp; bufp += sizeof(struct elfhdr); /* - * setup ELF PT_NOTE, place holder for cpu notes info. The notes info - * will be populated during second kernel boot after crash. Hence - * this PT_NOTE will always be the first elf note. + * Set up ELF PT_NOTE, a placeholder for CPU notes information. + * The notes info will be populated later by platform-specific code. + * Hence, this PT_NOTE will always be the first ELF note. * * NOTE: Any new ELF note addition should be placed after this note. */ phdr = (struct elf_phdr *)bufp; bufp += sizeof(struct elf_phdr); phdr->p_type = PT_NOTE; - phdr->p_flags = 0; - phdr->p_vaddr = 0; - phdr->p_align = 0; - - phdr->p_offset = 0; - phdr->p_paddr = 0; - phdr->p_filesz = 0; - phdr->p_memsz = 0; - + phdr->p_flags = 0; + phdr->p_vaddr = 0; + phdr->p_align = 0; + phdr->p_offset = 0; + phdr->p_paddr = 0; + phdr->p_filesz = 0; + phdr->p_memsz = 0; + /* Increment number of program headers. */ (elf->e_phnum)++; /* setup ELF PT_NOTE for vmcoreinfo */ @@ -1112,55 +1040,66 @@ static int fadump_create_elfcore_headers(char *bufp) phdr->p_flags = 0; phdr->p_vaddr = 0; phdr->p_align = 0; - - phdr->p_paddr = fadump_relocate(paddr_vmcoreinfo_note()); - phdr->p_offset = phdr->p_paddr; - phdr->p_memsz = phdr->p_filesz = VMCOREINFO_NOTE_SIZE; - + phdr->p_paddr = phdr->p_offset = fdh->vmcoreinfo_raddr; + phdr->p_memsz = phdr->p_filesz = fdh->vmcoreinfo_size; /* Increment number of program headers. */ (elf->e_phnum)++; - /* setup PT_LOAD sections. */ - j = 0; - offset = 0; - raddr = fw_dump.boot_mem_addr[0]; - for (i = 0; i < crash_mrange_info.mem_range_cnt; i++) { - u64 mbase, msize; - - mbase = crash_mrange_info.mem_ranges[i].base; - msize = crash_mrange_info.mem_ranges[i].size; - if (!msize) - continue; - + /* + * Setup PT_LOAD sections. first include boot memory regions + * and then add rest of the memory regions. + */ + boot_mem_dest_offset = fw_dump.boot_mem_dest_addr; + for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) { phdr = (struct elf_phdr *)bufp; bufp += sizeof(struct elf_phdr); - phdr->p_type = PT_LOAD; - phdr->p_flags = PF_R|PF_W|PF_X; - phdr->p_offset = mbase; - - if (mbase == raddr) { - /* - * The entire real memory region will be moved by - * firmware to the specified destination_address. - * Hence set the correct offset. - */ - phdr->p_offset = fw_dump.boot_mem_dest_addr + offset; - if (j < (fw_dump.boot_mem_regs_cnt - 1)) { - offset += fw_dump.boot_mem_sz[j]; - raddr = fw_dump.boot_mem_addr[++j]; - } + populate_elf_pt_load(phdr, fw_dump.boot_mem_addr[i], + fw_dump.boot_mem_sz[i], + boot_mem_dest_offset); + /* Increment number of program headers. */ + (elf->e_phnum)++; + boot_mem_dest_offset += fw_dump.boot_mem_sz[i]; + } + + /* Memory reserved for fadump in first kernel */ + ra_start = fw_dump.reserve_dump_area_start; + ra_size = get_fadump_area_size(); + ra_end = ra_start + ra_size; + + phdr = (struct elf_phdr *)bufp; + for_each_mem_range(i, &mstart, &mend) { + /* Boot memory regions already added, skip them now */ + if (mstart < fw_dump.boot_mem_top) { + if (mend > fw_dump.boot_mem_top) + mstart = fw_dump.boot_mem_top; + else + continue; } - phdr->p_paddr = mbase; - phdr->p_vaddr = (unsigned long)__va(mbase); - phdr->p_filesz = msize; - phdr->p_memsz = msize; - phdr->p_align = 0; + /* Handle memblock regions overlaps with fadump reserved area */ + if ((ra_start < mend) && (ra_end > mstart)) { + if ((mstart < ra_start) && (mend > ra_end)) { + populate_elf_pt_load(phdr, mstart, ra_start - mstart, mstart); + /* Increment number of program headers. */ + (elf->e_phnum)++; + bufp += sizeof(struct elf_phdr); + phdr = (struct elf_phdr *)bufp; + populate_elf_pt_load(phdr, ra_end, mend - ra_end, ra_end); + } else if (mstart < ra_start) { + populate_elf_pt_load(phdr, mstart, ra_start - mstart, mstart); + } else if (ra_end < mend) { + populate_elf_pt_load(phdr, ra_end, mend - ra_end, ra_end); + } + } else { + /* No overlap with fadump reserved memory region */ + populate_elf_pt_load(phdr, mstart, mend - mstart, mstart); + } /* Increment number of program headers. */ (elf->e_phnum)++; + bufp += sizeof(struct elf_phdr); + phdr = (struct elf_phdr *) bufp; } - return 0; } static unsigned long init_fadump_header(unsigned long addr) @@ -1175,14 +1114,25 @@ static unsigned long init_fadump_header(unsigned long addr) memset(fdh, 0, sizeof(struct fadump_crash_info_header)); fdh->magic_number = FADUMP_CRASH_INFO_MAGIC; - fdh->elfcorehdr_addr = addr; + fdh->version = FADUMP_HEADER_VERSION; /* We will set the crashing cpu id in crash_fadump() during crash. */ fdh->crashing_cpu = FADUMP_CPU_UNKNOWN; + + /* + * The physical address and size of vmcoreinfo are required in the + * second kernel to prepare elfcorehdr. + */ + fdh->vmcoreinfo_raddr = fadump_relocate(paddr_vmcoreinfo_note()); + fdh->vmcoreinfo_size = VMCOREINFO_NOTE_SIZE; + + + fdh->pt_regs_sz = sizeof(struct pt_regs); /* * When LPAR is terminated by PYHP, ensure all possible CPUs' * register data is processed while exporting the vmcore. */ fdh->cpu_mask = *cpu_possible_mask; + fdh->cpu_mask_sz = sizeof(struct cpumask); return addr; } @@ -1190,8 +1140,6 @@ static unsigned long init_fadump_header(unsigned long addr) static int register_fadump(void) { unsigned long addr; - void *vaddr; - int ret; /* * If no memory is reserved then we can not register for firmware- @@ -1200,18 +1148,10 @@ static int register_fadump(void) if (!fw_dump.reserve_dump_area_size) return -ENODEV; - ret = fadump_setup_crash_memory_ranges(); - if (ret) - return ret; - addr = fw_dump.fadumphdr_addr; /* Initialize fadump crash info header. */ addr = init_fadump_header(addr); - vaddr = __va(addr); - - pr_debug("Creating ELF core headers at %#016lx\n", addr); - fadump_create_elfcore_headers(vaddr); /* register the future kernel dump with firmware. */ pr_debug("Registering for firmware-assisted kernel dump...\n"); @@ -1230,7 +1170,6 @@ void fadump_cleanup(void) } else if (fw_dump.dump_registered) { /* Un-register Firmware-assisted dump if it was registered. */ fw_dump.ops->fadump_unregister(&fw_dump); - fadump_free_mem_ranges(&crash_mrange_info); } if (fw_dump.ops->fadump_cleanup) @@ -1416,6 +1355,22 @@ static void fadump_release_memory(u64 begin, u64 end) fadump_release_reserved_area(tstart, end); } +static void fadump_free_elfcorehdr_buf(void) +{ + if (fw_dump.elfcorehdr_addr == 0 || fw_dump.elfcorehdr_size == 0) + return; + + /* + * Before freeing the memory of `elfcorehdr`, reset the global + * `elfcorehdr_addr` to prevent modules like `vmcore` from accessing + * invalid memory. + */ + elfcorehdr_addr = ELFCORE_ADDR_ERR; + fadump_free_buffer(fw_dump.elfcorehdr_addr, fw_dump.elfcorehdr_size); + fw_dump.elfcorehdr_addr = 0; + fw_dump.elfcorehdr_size = 0; +} + static void fadump_invalidate_release_mem(void) { mutex_lock(&fadump_mutex); @@ -1427,6 +1382,7 @@ static void fadump_invalidate_release_mem(void) fadump_cleanup(); mutex_unlock(&fadump_mutex); + fadump_free_elfcorehdr_buf(); fadump_release_memory(fw_dump.boot_mem_top, memblock_end_of_DRAM()); fadump_free_cpu_notes_buf(); @@ -1484,6 +1440,18 @@ static ssize_t enabled_show(struct kobject *kobj, return sprintf(buf, "%d\n", fw_dump.fadump_enabled); } +/* + * /sys/kernel/fadump/hotplug_ready sysfs node returns 1, which inidcates + * to usersapce that fadump re-registration is not required on memory + * hotplug events. + */ +static ssize_t hotplug_ready_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", 1); +} + static ssize_t mem_reserved_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -1498,6 +1466,43 @@ static ssize_t registered_show(struct kobject *kobj, return sprintf(buf, "%d\n", fw_dump.dump_registered); } +static ssize_t bootargs_append_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%s\n", (char *)__va(fw_dump.param_area)); +} + +static ssize_t bootargs_append_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + char *params; + + if (!fw_dump.fadump_enabled || fw_dump.dump_active) + return -EPERM; + + if (count >= COMMAND_LINE_SIZE) + return -EINVAL; + + /* + * Fail here instead of handling this scenario with + * some silly workaround in capture kernel. + */ + if (saved_command_line_len + count >= COMMAND_LINE_SIZE) { + pr_err("Appending parameters exceeds cmdline size!\n"); + return -ENOSPC; + } + + params = __va(fw_dump.param_area); + strscpy_pad(params, buf, COMMAND_LINE_SIZE); + /* Remove newline character at the end. */ + if (params[count-1] == '\n') + params[count-1] = '\0'; + + return count; +} + static ssize_t registered_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) @@ -1556,11 +1561,14 @@ static struct kobj_attribute release_attr = __ATTR_WO(release_mem); static struct kobj_attribute enable_attr = __ATTR_RO(enabled); static struct kobj_attribute register_attr = __ATTR_RW(registered); static struct kobj_attribute mem_reserved_attr = __ATTR_RO(mem_reserved); +static struct kobj_attribute hotplug_ready_attr = __ATTR_RO(hotplug_ready); +static struct kobj_attribute bootargs_append_attr = __ATTR_RW(bootargs_append); static struct attribute *fadump_attrs[] = { &enable_attr.attr, ®ister_attr.attr, &mem_reserved_attr.attr, + &hotplug_ready_attr.attr, NULL, }; @@ -1632,6 +1640,150 @@ static void __init fadump_init_files(void) return; } +static int __init fadump_setup_elfcorehdr_buf(void) +{ + int elf_phdr_cnt; + unsigned long elfcorehdr_size; + + /* + * Program header for CPU notes comes first, followed by one for + * vmcoreinfo, and the remaining program headers correspond to + * memory regions. + */ + elf_phdr_cnt = 2 + fw_dump.boot_mem_regs_cnt + memblock_num_regions(memory); + elfcorehdr_size = sizeof(struct elfhdr) + (elf_phdr_cnt * sizeof(struct elf_phdr)); + elfcorehdr_size = PAGE_ALIGN(elfcorehdr_size); + + fw_dump.elfcorehdr_addr = (u64)fadump_alloc_buffer(elfcorehdr_size); + if (!fw_dump.elfcorehdr_addr) { + pr_err("Failed to allocate %lu bytes for elfcorehdr\n", + elfcorehdr_size); + return -ENOMEM; + } + fw_dump.elfcorehdr_size = elfcorehdr_size; + return 0; +} + +/* + * Check if the fadump header of crashed kernel is compatible with fadump kernel. + * + * It checks the magic number, endianness, and size of non-primitive type + * members of fadump header to ensure safe dump collection. + */ +static bool __init is_fadump_header_compatible(struct fadump_crash_info_header *fdh) +{ + if (fdh->magic_number == FADUMP_CRASH_INFO_MAGIC_OLD) { + pr_err("Old magic number, can't process the dump.\n"); + return false; + } + + if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) { + if (fdh->magic_number == swab64(FADUMP_CRASH_INFO_MAGIC)) + pr_err("Endianness mismatch between the crashed and fadump kernels.\n"); + else + pr_err("Fadump header is corrupted.\n"); + + return false; + } + + /* + * Dump collection is not safe if the size of non-primitive type members + * of the fadump header do not match between crashed and fadump kernel. + */ + if (fdh->pt_regs_sz != sizeof(struct pt_regs) || + fdh->cpu_mask_sz != sizeof(struct cpumask)) { + pr_err("Fadump header size mismatch.\n"); + return false; + } + + return true; +} + +static void __init fadump_process(void) +{ + struct fadump_crash_info_header *fdh; + + fdh = (struct fadump_crash_info_header *) __va(fw_dump.fadumphdr_addr); + if (!fdh) { + pr_err("Crash info header is empty.\n"); + goto err_out; + } + + /* Avoid processing the dump if fadump header isn't compatible */ + if (!is_fadump_header_compatible(fdh)) + goto err_out; + + /* Allocate buffer for elfcorehdr */ + if (fadump_setup_elfcorehdr_buf()) + goto err_out; + + fadump_populate_elfcorehdr(fdh); + + /* Let platform update the CPU notes in elfcorehdr */ + if (fw_dump.ops->fadump_process(&fw_dump) < 0) + goto err_out; + + /* + * elfcorehdr is now ready to be exported. + * + * set elfcorehdr_addr so that vmcore module will export the + * elfcorehdr through '/proc/vmcore'. + */ + elfcorehdr_addr = virt_to_phys((void *)fw_dump.elfcorehdr_addr); + return; + +err_out: + fadump_invalidate_release_mem(); +} + +/* + * Reserve memory to store additional parameters to be passed + * for fadump/capture kernel. + */ +static void __init fadump_setup_param_area(void) +{ + phys_addr_t range_start, range_end; + + if (!fw_dump.param_area_supported || fw_dump.dump_active) + return; + + /* This memory can't be used by PFW or bootloader as it is shared across kernels */ + if (radix_enabled()) { + /* + * Anywhere in the upper half should be good enough as all memory + * is accessible in real mode. + */ + range_start = memblock_end_of_DRAM() / 2; + range_end = memblock_end_of_DRAM(); + } else { + /* + * Passing additional parameters is supported for hash MMU only + * if the first memory block size is 768MB or higher. + */ + if (ppc64_rma_size < 0x30000000) + return; + + /* + * 640 MB to 768 MB is not used by PFW/bootloader. So, try reserving + * memory for passing additional parameters in this range to avoid + * being stomped on by PFW/bootloader. + */ + range_start = 0x2A000000; + range_end = range_start + 0x4000000; + } + + fw_dump.param_area = memblock_phys_alloc_range(COMMAND_LINE_SIZE, + COMMAND_LINE_SIZE, + range_start, + range_end); + if (!fw_dump.param_area || sysfs_create_file(fadump_kobj, &bootargs_append_attr.attr)) { + pr_warn("WARNING: Could not setup area to pass additional parameters!\n"); + return; + } + + memset(phys_to_virt(fw_dump.param_area), 0, COMMAND_LINE_SIZE); +} + /* * Prepare for firmware-assisted dump. */ @@ -1651,15 +1803,11 @@ int __init setup_fadump(void) * saving it to the disk. */ if (fw_dump.dump_active) { - /* - * if dump process fails then invalidate the registration - * and release memory before proceeding for re-registration. - */ - if (fw_dump.ops->fadump_process(&fw_dump) < 0) - fadump_invalidate_release_mem(); + fadump_process(); } /* Initialize the kernel dump memory structure and register with f/w */ else if (fw_dump.reserve_dump_area_size) { + fadump_setup_param_area(); fw_dump.ops->fadump_init_mem_struct(&fw_dump); register_fadump(); } @@ -1735,8 +1883,3 @@ static void __init fadump_reserve_crash_area(u64 base) memblock_reserve(mstart, msize); } } - -unsigned long __init arch_reserved_kernel_pages(void) -{ - return memblock_reserved_size() / PAGE_SIZE; -} diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 647b0b445e..edc479a7c2 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -199,12 +199,12 @@ instruction_counter: mfspr r10, SPRN_SRR0 /* Get effective address of fault */ INVALIDATE_ADJACENT_PAGES_CPU15(r10, r11) mtspr SPRN_MD_EPN, r10 -#ifdef CONFIG_MODULES +#ifdef CONFIG_EXECMEM mfcr r11 compare_to_kernel_boundary r10, r10 #endif mfspr r10, SPRN_M_TWB /* Get level 1 table */ -#ifdef CONFIG_MODULES +#ifdef CONFIG_EXECMEM blt+ 3f rlwinm r10, r10, 0, 20, 31 oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index c1d89764dd..57196883a0 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -419,14 +419,14 @@ InstructionTLBMiss: */ /* Get PTE (linux-style) and check access */ mfspr r3,SPRN_IMISS -#ifdef CONFIG_MODULES +#ifdef CONFIG_EXECMEM lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 #endif mfspr r2, SPRN_SDR1 li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC rlwinm r2, r2, 28, 0xfffff000 -#ifdef CONFIG_MODULES +#ifdef CONFIG_EXECMEM li r0, 3 bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ @@ -442,7 +442,7 @@ InstructionTLBMiss: andc. r1,r1,r2 /* check access & ~permission */ bne- InstructionAddressInvalid /* return if access not permitted */ /* Convert linux-style PTE to low word of PPC-style PTE */ -#ifdef CONFIG_MODULES +#ifdef CONFIG_EXECMEM rlwimi r2, r0, 0, 31, 31 /* userspace ? -> PP lsb */ #endif ori r1, r1, 0xe06 /* clear out reserved bits */ diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 29a8c8e185..b70b4f9356 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -26,6 +26,7 @@ #include <linux/iommu.h> #include <linux/sched.h> #include <linux/debugfs.h> +#include <linux/vmalloc.h> #include <asm/io.h> #include <asm/iommu.h> #include <asm/pci-bridge.h> diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index bbca90a5e2..14c5ddec30 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -19,8 +19,8 @@ #include <linux/extable.h> #include <linux/kdebug.h> #include <linux/slab.h> -#include <linux/moduleloader.h> #include <linux/set_memory.h> +#include <linux/execmem.h> #include <asm/code-patching.h> #include <asm/cacheflush.h> #include <asm/sstep.h> @@ -126,26 +126,6 @@ kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offse return (kprobe_opcode_t *)(addr + offset); } -void *alloc_insn_page(void) -{ - void *page; - - page = module_alloc(PAGE_SIZE); - if (!page) - return NULL; - - if (strict_module_rwx_enabled()) { - int err = set_memory_rox((unsigned long)page, 1); - - if (err) - goto error; - } - return page; -error: - module_memfree(page); - return NULL; -} - int arch_prepare_kprobe(struct kprobe *p) { int ret = 0; diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 1a8cdafd68..91123e102d 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -192,7 +192,7 @@ _GLOBAL(scom970_read) xori r0,r0,MSR_EE mtmsrd r0,1 - /* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits + /* rotate 24 bits SCOM address 8 bits left and mask out its low 8 bits * (including parity). On current CPUs they must be 0'd, * and finally or in RW bit */ @@ -226,7 +226,7 @@ _GLOBAL(scom970_write) xori r0,r0,MSR_EE mtmsrd r0,1 - /* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits + /* rotate 24 bits SCOM address 8 bits left and mask out its low 8 bits * (including parity). On current CPUs they must be 0'd. */ diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index f6d6ae0a16..baeb24c102 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -7,7 +7,6 @@ #include <linux/elf.h> #include <linux/moduleloader.h> #include <linux/err.h> -#include <linux/vmalloc.h> #include <linux/mm.h> #include <linux/bug.h> #include <asm/module.h> @@ -17,8 +16,6 @@ #include <asm/setup.h> #include <asm/sections.h> -static LIST_HEAD(module_bug_list); - static const Elf_Shdr *find_section(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, const char *name) @@ -88,40 +85,3 @@ int module_finalize(const Elf_Ehdr *hdr, return 0; } - -static __always_inline void * -__module_alloc(unsigned long size, unsigned long start, unsigned long end, bool nowarn) -{ - pgprot_t prot = strict_module_rwx_enabled() ? PAGE_KERNEL : PAGE_KERNEL_EXEC; - gfp_t gfp = GFP_KERNEL | (nowarn ? __GFP_NOWARN : 0); - - /* - * Don't do huge page allocations for modules yet until more testing - * is done. STRICT_MODULE_RWX may require extra work to support this - * too. - */ - return __vmalloc_node_range(size, 1, start, end, gfp, prot, - VM_FLUSH_RESET_PERMS, - NUMA_NO_NODE, __builtin_return_address(0)); -} - -void *module_alloc(unsigned long size) -{ -#ifdef MODULES_VADDR - unsigned long limit = (unsigned long)_etext - SZ_32M; - void *ptr = NULL; - - BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR); - - /* First try within 32M limit from _etext to avoid branch trampolines */ - if (MODULES_VADDR < PAGE_OFFSET && MODULES_END > limit) - ptr = __module_alloc(size, limit, MODULES_END, true); - - if (!ptr) - ptr = __module_alloc(size, MODULES_VADDR, MODULES_END, false); - - return ptr; -#else - return __module_alloc(size, VMALLOC_START, VMALLOC_END, false); -#endif -} diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index d95a48eff4..eac84d687b 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -517,7 +517,7 @@ int pci_iobar_pfn(struct pci_dev *pdev, int bar, struct vm_area_struct *vma) } /* - * This one is used by /dev/mem and fbdev who have no clue about the + * This one is used by /dev/mem and video who have no clue about the * PCI device, it tries to find the PCI device first and calls the * above routine */ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 9452a54d35..a767178676 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1185,6 +1185,9 @@ static inline void save_sprs(struct thread_struct *t) if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE)) t->hashkeyr = mfspr(SPRN_HASHKEYR); + + if (cpu_has_feature(CPU_FTR_ARCH_31)) + t->dexcr = mfspr(SPRN_DEXCR); #endif } @@ -1267,6 +1270,10 @@ static inline void restore_sprs(struct thread_struct *old_thread, if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE) && old_thread->hashkeyr != new_thread->hashkeyr) mtspr(SPRN_HASHKEYR, new_thread->hashkeyr); + + if (cpu_has_feature(CPU_FTR_ARCH_31) && + old_thread->dexcr != new_thread->dexcr) + mtspr(SPRN_DEXCR, new_thread->dexcr); #endif } @@ -1634,6 +1641,13 @@ void arch_setup_new_exec(void) current->thread.regs->amr = default_amr; current->thread.regs->iamr = default_iamr; #endif + +#ifdef CONFIG_PPC_BOOK3S_64 + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + current->thread.dexcr = current->thread.dexcr_onexec; + mtspr(SPRN_DEXCR, current->thread.dexcr); + } +#endif /* CONFIG_PPC_BOOK3S_64 */ } #ifdef CONFIG_PPC64 @@ -1647,7 +1661,7 @@ void arch_setup_new_exec(void) * cases will happen: * * 1. The correct thread is running, the wrong thread is not - * In this situation, the correct thread is woken and proceeds to pass it's + * In this situation, the correct thread is woken and proceeds to pass its * condition check. * * 2. Neither threads are running @@ -1657,15 +1671,15 @@ void arch_setup_new_exec(void) * for the wrong thread, or they will execute the condition check immediately. * * 3. The wrong thread is running, the correct thread is not - * The wrong thread will be woken, but will fail it's condition check and + * The wrong thread will be woken, but will fail its condition check and * re-execute wait. The correct thread, when scheduled, will execute either - * it's condition check (which will pass), or wait, which returns immediately - * when called the first time after the thread is scheduled, followed by it's + * its condition check (which will pass), or wait, which returns immediately + * when called the first time after the thread is scheduled, followed by its * condition check (which will pass). * * 4. Both threads are running - * Both threads will be woken. The wrong thread will fail it's condition check - * and execute another wait, while the correct thread will pass it's condition + * Both threads will be woken. The wrong thread will fail its condition check + * and execute another wait, while the correct thread will pass its condition * check. * * @t: the task to set the thread ID for @@ -1878,6 +1892,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) #ifdef CONFIG_PPC_BOOK3S_64 if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE)) p->thread.hashkeyr = current->thread.hashkeyr; + + if (cpu_has_feature(CPU_FTR_ARCH_31)) + p->thread.dexcr = mfspr(SPRN_DEXCR); #endif return 0; } diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index cd8d8883de..0be07ed407 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -331,6 +331,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node, void *data) { const char *type = of_get_flat_dt_prop(node, "device_type", NULL); + const __be32 *cpu_version = NULL; const __be32 *prop; const __be32 *intserv; int i, nthreads; @@ -420,7 +421,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node, prop = of_get_flat_dt_prop(node, "cpu-version", NULL); if (prop && (be32_to_cpup(prop) & 0xff000000) == 0x0f000000) { identify_cpu(0, be32_to_cpup(prop)); - seq_buf_printf(&ppc_hw_desc, "0x%04x ", be32_to_cpup(prop)); + cpu_version = prop; } check_cpu_feature_properties(node); @@ -431,6 +432,12 @@ static int __init early_init_dt_scan_cpus(unsigned long node, } identical_pvr_fixup(node); + + // We can now add the CPU name & PVR to the hardware description + seq_buf_printf(&ppc_hw_desc, "%s 0x%04lx ", cur_cpu_spec->cpu_name, mfspr(SPRN_PVR)); + if (cpu_version) + seq_buf_printf(&ppc_hw_desc, "0x%04x ", be32_to_cpup(cpu_version)); + init_mmu_slb_size(node); #ifdef CONFIG_PPC64 @@ -779,7 +786,7 @@ static inline void save_fscr_to_task(void) {} void __init early_init_devtree(void *params) { - phys_addr_t limit; + phys_addr_t int_vector_size; DBG(" -> early_init_devtree(%px)\n", params); @@ -813,6 +820,9 @@ void __init early_init_devtree(void *params) */ of_scan_flat_dt(early_init_dt_scan_chosen_ppc, boot_command_line); + /* Append additional parameters passed for fadump capture kernel */ + fadump_append_bootargs(); + /* Scan memory nodes and rebuild MEMBLOCKs */ early_init_dt_scan_root(); early_init_dt_scan_memory_ppc(); @@ -832,9 +842,16 @@ void __init early_init_devtree(void *params) setup_initial_memory_limit(memstart_addr, first_memblock_size); /* Reserve MEMBLOCK regions used by kernel, initrd, dt, etc... */ memblock_reserve(PHYSICAL_START, __pa(_end) - PHYSICAL_START); +#ifdef CONFIG_PPC64 + /* If relocatable, reserve at least 32k for interrupt vectors etc. */ + int_vector_size = __end_interrupts - _stext; + int_vector_size = max_t(phys_addr_t, SZ_32K, int_vector_size); +#else /* If relocatable, reserve first 32k for interrupt vectors etc. */ + int_vector_size = SZ_32K; +#endif if (PHYSICAL_START > MEMORY_START) - memblock_reserve(MEMORY_START, 0x8000); + memblock_reserve(MEMORY_START, int_vector_size); reserve_kdump_trampoline(); #if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP) /* @@ -846,9 +863,12 @@ void __init early_init_devtree(void *params) reserve_crashkernel(); early_reserve_mem(); - /* Ensure that total memory size is page-aligned. */ - limit = ALIGN(memory_limit ?: memblock_phys_mem_size(), PAGE_SIZE); - memblock_enforce_memory_limit(limit); + if (memory_limit > memblock_phys_mem_size()) + memory_limit = 0; + + /* Align down to 16 MB which is large page size with hash page translation */ + memory_limit = ALIGN_DOWN(memory_limit ?: memblock_phys_mem_size(), SZ_16M); + memblock_enforce_memory_limit(memory_limit); #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_4K_PAGES) if (!early_radix_enabled()) @@ -868,9 +888,6 @@ void __init early_init_devtree(void *params) dt_cpu_ftrs_scan(); - // We can now add the CPU name & PVR to the hardware description - seq_buf_printf(&ppc_hw_desc, "%s 0x%04lx ", cur_cpu_spec->cpu_name, mfspr(SPRN_PVR)); - /* Retrieve CPU related informations from the flat tree * (altivec support, boot CPU ID, ...) */ diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 0ef3582853..fbb68fc28e 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -817,8 +817,8 @@ static void __init early_cmdline_parse(void) opt += 4; prom_memory_limit = prom_memparse(opt, (const char **)&opt); #ifdef CONFIG_PPC64 - /* Align to 16 MB == size of ppc64 large page */ - prom_memory_limit = ALIGN(prom_memory_limit, 0x1000000); + /* Align down to 16 MB which is large page size with hash page translation */ + prom_memory_limit = ALIGN_DOWN(prom_memory_limit, SZ_16M); #endif } diff --git a/arch/powerpc/kernel/ptrace/ptrace-tm.c b/arch/powerpc/kernel/ptrace/ptrace-tm.c index 210ea834e6..447bff87fd 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-tm.c +++ b/arch/powerpc/kernel/ptrace/ptrace-tm.c @@ -12,7 +12,7 @@ void flush_tmregs_to_thread(struct task_struct *tsk) { /* * If task is not current, it will have been flushed already to - * it's thread_struct during __switch_to(). + * its thread_struct during __switch_to(). * * A reclaim flushes ALL the state or if not in TM save TM SPRs * in the appropriate thread structures from live. diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c index 584cf5c3df..c1819e0a66 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-view.c +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c @@ -469,12 +469,7 @@ static int dexcr_get(struct task_struct *target, const struct user_regset *regse if (!cpu_has_feature(CPU_FTR_ARCH_31)) return -ENODEV; - /* - * The DEXCR is currently static across all CPUs, so we don't - * store the target's value anywhere, but the static value - * will also be correct. - */ - membuf_store(&to, (u64)lower_32_bits(DEXCR_INIT)); + membuf_store(&to, (u64)lower_32_bits(target->thread.dexcr)); /* * Technically the HDEXCR is per-cpu, but a hypervisor can't reasonably diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 01ed1263e1..4bd2f87616 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -405,7 +405,7 @@ static void __init cpu_init_thread_core_maps(int tpc) cpumask_set_cpu(i, &threads_core_mask); printk(KERN_INFO "CPU maps initialized for %d thread%s per core\n", - tpc, tpc > 1 ? "s" : ""); + tpc, str_plural(tpc)); printk(KERN_DEBUG " (thread shift is %d)\n", threads_shift); } diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 12e53b3d79..46e6d2cd7a 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1567,7 +1567,7 @@ static void add_cpu_to_masks(int cpu) /* * This CPU will not be in the online mask yet so we need to manually - * add it to it's own thread sibling mask. + * add it to its own thread sibling mask. */ map_cpu_to_node(cpu, cpu_to_node(cpu)); cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 18c49fce49..ebae8415df 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -552,3 +552,4 @@ 459 common lsm_get_self_attr sys_lsm_get_self_attr 460 common lsm_set_self_attr sys_lsm_set_self_attr 461 common lsm_list_modules sys_lsm_list_modules +462 common mseal sys_mseal diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 0f39a6b841..b842c83ab4 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -139,7 +139,7 @@ static unsigned long dscr_default; * @val: Returned cpu specific DSCR default value * * This function returns the per cpu DSCR default value - * for any cpu which is contained in it's PACA structure. + * for any cpu which is contained in its PACA structure. */ static void read_dscr(void *val) { @@ -152,7 +152,7 @@ static void read_dscr(void *val) * @val: New cpu specific DSCR default value to update * * This function updates the per cpu DSCR default value - * for any cpu which is contained in it's PACA structure. + * for any cpu which is contained in its PACA structure. */ static void write_dscr(void *val) { diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index df20cf201f..c0fdc6d94f 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -354,6 +354,28 @@ void vtime_flush(struct task_struct *tsk) acct->hardirq_time = 0; acct->softirq_time = 0; } + +/* + * Called from the context switch with interrupts disabled, to charge all + * accumulated times to the current process, and to prepare accounting on + * the next process. + */ +void vtime_task_switch(struct task_struct *prev) +{ + if (is_idle_task(prev)) + vtime_account_idle(prev); + else + vtime_account_kernel(prev); + + vtime_flush(prev); + + if (!IS_ENABLED(CONFIG_PPC64)) { + struct cpu_accounting_data *acct = get_accounting(current); + struct cpu_accounting_data *acct0 = get_accounting(prev); + + acct->starttime = acct0->starttime; + } +} #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ void __no_kcsan __delay(unsigned long loops) diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile index 1b93655c28..1425b6edc6 100644 --- a/arch/powerpc/kernel/vdso/Makefile +++ b/arch/powerpc/kernel/vdso/Makefile @@ -47,12 +47,6 @@ obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) targets += $(obj-vdso64) vdso64.so.dbg vgettimeofday-64.o obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) -GCOV_PROFILE := n -KCOV_INSTRUMENT := n -UBSAN_SANITIZE := n -KASAN_SANITIZE := n -KCSAN_SANITIZE := n - ccflags-y := -fno-common -fno-builtin ldflags-y := -Wl,--hash-style=both -nostdlib -shared -z noexecstack $(CLANG_FLAGS) ldflags-$(CONFIG_LD_IS_LLD) += $(call cc-option,--ld-path=$(LD),-fuse-ld=lld) @@ -74,9 +68,9 @@ targets += vdso64.lds CPPFLAGS_vdso64.lds += -P -C # link rule for the .so file, .lds has to be first -$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday-32.o FORCE +$(obj)/vdso32.so.dbg: $(obj)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday-32.o FORCE $(call if_changed,vdso32ld_and_check) -$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday-64.o FORCE +$(obj)/vdso64.so.dbg: $(obj)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday-64.o FORCE $(call if_changed,vdso64ld_and_check) # assembly rules for the .S files @@ -90,10 +84,10 @@ $(obj)/vgettimeofday-64.o: %-64.o: %.c FORCE $(call if_changed_dep,cc_o_c) # Generate VDSO offsets using helper script -gen-vdso32sym := $(srctree)/$(src)/gen_vdso32_offsets.sh +gen-vdso32sym := $(src)/gen_vdso32_offsets.sh quiet_cmd_vdso32sym = VDSO32SYM $@ cmd_vdso32sym = $(NM) $< | $(gen-vdso32sym) | LC_ALL=C sort > $@ -gen-vdso64sym := $(srctree)/$(src)/gen_vdso64_offsets.sh +gen-vdso64sym := $(src)/gen_vdso64_offsets.sh quiet_cmd_vdso64sym = VDSO64SYM $@ cmd_vdso64sym = $(NM) $< | $(gen-vdso64sym) | LC_ALL=C sort > $@ @@ -114,5 +108,3 @@ quiet_cmd_vdso64ld_and_check = VDSO64L $@ cmd_vdso64ld_and_check = $(VDSOCC) $(ldflags-y) $(LD64FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^); $(cmd_vdso_check) quiet_cmd_vdso64as = VDSO64A $@ cmd_vdso64as = $(VDSOCC) $(a_flags) $(AS64FLAGS) -c -o $@ $< - -OBJECT_FILES_NON_STANDARD := y diff --git a/arch/powerpc/kexec/Makefile b/arch/powerpc/kexec/Makefile index 8e469c4da3..470eb0453e 100644 --- a/arch/powerpc/kexec/Makefile +++ b/arch/powerpc/kexec/Makefile @@ -3,11 +3,11 @@ # Makefile for the linux kernel. # -obj-y += core.o core_$(BITS).o +obj-y += core.o core_$(BITS).o ranges.o obj-$(CONFIG_PPC32) += relocate_32.o -obj-$(CONFIG_KEXEC_FILE) += file_load.o ranges.o file_load_$(BITS).o elf_$(BITS).o +obj-$(CONFIG_KEXEC_FILE) += file_load.o file_load_$(BITS).o elf_$(BITS).o obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o obj-$(CONFIG_CRASH_DUMP) += crash.o diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c index 27254624f6..222aa326da 100644 --- a/arch/powerpc/kexec/core_64.c +++ b/arch/powerpc/kexec/core_64.c @@ -17,6 +17,7 @@ #include <linux/cpu.h> #include <linux/hardirq.h> #include <linux/of.h> +#include <linux/libfdt.h> #include <asm/page.h> #include <asm/current.h> @@ -31,6 +32,7 @@ #include <asm/hw_breakpoint.h> #include <asm/svm.h> #include <asm/ultravisor.h> +#include <asm/crashdump-ppc64.h> int machine_kexec_prepare(struct kimage *image) { @@ -430,3 +432,113 @@ static int __init export_htab_values(void) } late_initcall(export_htab_values); #endif /* CONFIG_PPC_64S_HASH_MMU */ + +#if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_DUMP) +/** + * add_node_props - Reads node properties from device node structure and add + * them to fdt. + * @fdt: Flattened device tree of the kernel + * @node_offset: offset of the node to add a property at + * @dn: device node pointer + * + * Returns 0 on success, negative errno on error. + */ +static int add_node_props(void *fdt, int node_offset, const struct device_node *dn) +{ + int ret = 0; + struct property *pp; + + if (!dn) + return -EINVAL; + + for_each_property_of_node(dn, pp) { + ret = fdt_setprop(fdt, node_offset, pp->name, pp->value, pp->length); + if (ret < 0) { + pr_err("Unable to add %s property: %s\n", pp->name, fdt_strerror(ret)); + return ret; + } + } + return ret; +} + +/** + * update_cpus_node - Update cpus node of flattened device tree using of_root + * device node. + * @fdt: Flattened device tree of the kernel. + * + * Returns 0 on success, negative errno on error. + * + * Note: expecting no subnodes under /cpus/<node> with device_type == "cpu". + * If this changes, update this function to include them. + */ +int update_cpus_node(void *fdt) +{ + int prev_node_offset; + const char *device_type; + const struct fdt_property *prop; + struct device_node *cpus_node, *dn; + int cpus_offset, cpus_subnode_offset, ret = 0; + + cpus_offset = fdt_path_offset(fdt, "/cpus"); + if (cpus_offset < 0 && cpus_offset != -FDT_ERR_NOTFOUND) { + pr_err("Malformed device tree: error reading /cpus node: %s\n", + fdt_strerror(cpus_offset)); + return cpus_offset; + } + + prev_node_offset = cpus_offset; + /* Delete sub-nodes of /cpus node with device_type == "cpu" */ + for (cpus_subnode_offset = fdt_first_subnode(fdt, cpus_offset); cpus_subnode_offset >= 0;) { + /* Ignore nodes that do not have a device_type property or device_type != "cpu" */ + prop = fdt_get_property(fdt, cpus_subnode_offset, "device_type", NULL); + if (!prop || strcmp(prop->data, "cpu")) { + prev_node_offset = cpus_subnode_offset; + goto next_node; + } + + ret = fdt_del_node(fdt, cpus_subnode_offset); + if (ret < 0) { + pr_err("Failed to delete a cpus sub-node: %s\n", fdt_strerror(ret)); + return ret; + } +next_node: + if (prev_node_offset == cpus_offset) + cpus_subnode_offset = fdt_first_subnode(fdt, cpus_offset); + else + cpus_subnode_offset = fdt_next_subnode(fdt, prev_node_offset); + } + + cpus_node = of_find_node_by_path("/cpus"); + /* Fail here to avoid kexec/kdump kernel boot hung */ + if (!cpus_node) { + pr_err("No /cpus node found\n"); + return -EINVAL; + } + + /* Add all /cpus sub-nodes of device_type == "cpu" to FDT */ + for_each_child_of_node(cpus_node, dn) { + /* Ignore device nodes that do not have a device_type property + * or device_type != "cpu". + */ + device_type = of_get_property(dn, "device_type", NULL); + if (!device_type || strcmp(device_type, "cpu")) + continue; + + cpus_subnode_offset = fdt_add_subnode(fdt, cpus_offset, dn->full_name); + if (cpus_subnode_offset < 0) { + pr_err("Unable to add %s subnode: %s\n", dn->full_name, + fdt_strerror(cpus_subnode_offset)); + ret = cpus_subnode_offset; + goto out; + } + + ret = add_node_props(fdt, cpus_subnode_offset, dn); + if (ret < 0) + goto out; + } +out: + of_node_put(cpus_node); + of_node_put(dn); + return ret; +} +#endif /* CONFIG_KEXEC_FILE || CONFIG_CRASH_DUMP */ diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c index ef5c2d25ec..9ac3266e49 100644 --- a/arch/powerpc/kexec/crash.c +++ b/arch/powerpc/kexec/crash.c @@ -16,6 +16,8 @@ #include <linux/delay.h> #include <linux/irq.h> #include <linux/types.h> +#include <linux/libfdt.h> +#include <linux/memory.h> #include <asm/processor.h> #include <asm/machdep.h> @@ -24,6 +26,7 @@ #include <asm/setjmp.h> #include <asm/debug.h> #include <asm/interrupt.h> +#include <asm/kexec_ranges.h> /* * The primary CPU waits a while for all secondary CPUs to enter. This is to @@ -392,3 +395,195 @@ void default_machine_crash_shutdown(struct pt_regs *regs) if (ppc_md.kexec_cpu_down) ppc_md.kexec_cpu_down(1, 0); } + +#ifdef CONFIG_CRASH_HOTPLUG +#undef pr_fmt +#define pr_fmt(fmt) "crash hp: " fmt + +/* + * Advertise preferred elfcorehdr size to userspace via + * /sys/kernel/crash_elfcorehdr_size sysfs interface. + */ +unsigned int arch_crash_get_elfcorehdr_size(void) +{ + unsigned long phdr_cnt; + + /* A program header for possible CPUs + vmcoreinfo */ + phdr_cnt = num_possible_cpus() + 1; + if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG)) + phdr_cnt += CONFIG_CRASH_MAX_MEMORY_RANGES; + + return sizeof(struct elfhdr) + (phdr_cnt * sizeof(Elf64_Phdr)); +} + +/** + * update_crash_elfcorehdr() - Recreate the elfcorehdr and replace it with old + * elfcorehdr in the kexec segment array. + * @image: the active struct kimage + * @mn: struct memory_notify data handler + */ +static void update_crash_elfcorehdr(struct kimage *image, struct memory_notify *mn) +{ + int ret; + struct crash_mem *cmem = NULL; + struct kexec_segment *ksegment; + void *ptr, *mem, *elfbuf = NULL; + unsigned long elfsz, memsz, base_addr, size; + + ksegment = &image->segment[image->elfcorehdr_index]; + mem = (void *) ksegment->mem; + memsz = ksegment->memsz; + + ret = get_crash_memory_ranges(&cmem); + if (ret) { + pr_err("Failed to get crash mem range\n"); + return; + } + + /* + * The hot unplugged memory is part of crash memory ranges, + * remove it here. + */ + if (image->hp_action == KEXEC_CRASH_HP_REMOVE_MEMORY) { + base_addr = PFN_PHYS(mn->start_pfn); + size = mn->nr_pages * PAGE_SIZE; + ret = remove_mem_range(&cmem, base_addr, size); + if (ret) { + pr_err("Failed to remove hot-unplugged memory from crash memory ranges\n"); + goto out; + } + } + + ret = crash_prepare_elf64_headers(cmem, false, &elfbuf, &elfsz); + if (ret) { + pr_err("Failed to prepare elf header\n"); + goto out; + } + + /* + * It is unlikely that kernel hit this because elfcorehdr kexec + * segment (memsz) is built with addition space to accommodate growing + * number of crash memory ranges while loading the kdump kernel. It is + * Just to avoid any unforeseen case. + */ + if (elfsz > memsz) { + pr_err("Updated crash elfcorehdr elfsz %lu > memsz %lu", elfsz, memsz); + goto out; + } + + ptr = __va(mem); + if (ptr) { + /* Temporarily invalidate the crash image while it is replaced */ + xchg(&kexec_crash_image, NULL); + + /* Replace the old elfcorehdr with newly prepared elfcorehdr */ + memcpy((void *)ptr, elfbuf, elfsz); + + /* The crash image is now valid once again */ + xchg(&kexec_crash_image, image); + } +out: + kvfree(cmem); + kvfree(elfbuf); +} + +/** + * get_fdt_index - Loop through the kexec segment array and find + * the index of the FDT segment. + * @image: a pointer to kexec_crash_image + * + * Returns the index of FDT segment in the kexec segment array + * if found; otherwise -1. + */ +static int get_fdt_index(struct kimage *image) +{ + void *ptr; + unsigned long mem; + int i, fdt_index = -1; + + /* Find the FDT segment index in kexec segment array. */ + for (i = 0; i < image->nr_segments; i++) { + mem = image->segment[i].mem; + ptr = __va(mem); + + if (ptr && fdt_magic(ptr) == FDT_MAGIC) { + fdt_index = i; + break; + } + } + + return fdt_index; +} + +/** + * update_crash_fdt - updates the cpus node of the crash FDT. + * + * @image: a pointer to kexec_crash_image + */ +static void update_crash_fdt(struct kimage *image) +{ + void *fdt; + int fdt_index; + + fdt_index = get_fdt_index(image); + if (fdt_index < 0) { + pr_err("Unable to locate FDT segment.\n"); + return; + } + + fdt = __va((void *)image->segment[fdt_index].mem); + + /* Temporarily invalidate the crash image while it is replaced */ + xchg(&kexec_crash_image, NULL); + + /* update FDT to reflect changes in CPU resources */ + if (update_cpus_node(fdt)) + pr_err("Failed to update crash FDT"); + + /* The crash image is now valid once again */ + xchg(&kexec_crash_image, image); +} + +int arch_crash_hotplug_support(struct kimage *image, unsigned long kexec_flags) +{ +#ifdef CONFIG_KEXEC_FILE + if (image->file_mode) + return 1; +#endif + return kexec_flags & KEXEC_CRASH_HOTPLUG_SUPPORT; +} + +/** + * arch_crash_handle_hotplug_event - Handle crash CPU/Memory hotplug events to update the + * necessary kexec segments based on the hotplug event. + * @image: a pointer to kexec_crash_image + * @arg: struct memory_notify handler for memory hotplug case and NULL for CPU hotplug case. + * + * Update the kdump image based on the type of hotplug event, represented by image->hp_action. + * CPU add: Update the FDT segment to include the newly added CPU. + * CPU remove: No action is needed, with the assumption that it's okay to have offline CPUs + * part of the FDT. + * Memory add/remove: No action is taken as this is not yet supported. + */ +void arch_crash_handle_hotplug_event(struct kimage *image, void *arg) +{ + struct memory_notify *mn; + + switch (image->hp_action) { + case KEXEC_CRASH_HP_REMOVE_CPU: + return; + + case KEXEC_CRASH_HP_ADD_CPU: + update_crash_fdt(image); + break; + + case KEXEC_CRASH_HP_REMOVE_MEMORY: + case KEXEC_CRASH_HP_ADD_MEMORY: + mn = (struct memory_notify *)arg; + update_crash_elfcorehdr(image, mn); + return; + default: + pr_warn_once("Unknown hotplug action\n"); + } +} +#endif /* CONFIG_CRASH_HOTPLUG */ diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c index 6d8951e8e9..214c071c58 100644 --- a/arch/powerpc/kexec/elf_64.c +++ b/arch/powerpc/kexec/elf_64.c @@ -116,7 +116,8 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, if (ret) goto out_free_fdt; - fdt_pack(fdt); + if (!IS_ENABLED(CONFIG_CRASH_HOTPLUG) || image->type != KEXEC_TYPE_CRASH) + fdt_pack(fdt); kbuf.buffer = fdt; kbuf.bufsz = kbuf.memsz = fdt_totalsize(fdt); diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c index 1bc65de617..925a69ad24 100644 --- a/arch/powerpc/kexec/file_load_64.c +++ b/arch/powerpc/kexec/file_load_64.c @@ -30,6 +30,7 @@ #include <asm/iommu.h> #include <asm/prom.h> #include <asm/plpks.h> +#include <asm/cputhreads.h> struct umem_info { __be64 *buf; /* data buffer for usable-memory property */ @@ -48,83 +49,6 @@ const struct kexec_file_ops * const kexec_file_loaders[] = { }; /** - * get_exclude_memory_ranges - Get exclude memory ranges. This list includes - * regions like opal/rtas, tce-table, initrd, - * kernel, htab which should be avoided while - * setting up kexec load segments. - * @mem_ranges: Range list to add the memory ranges to. - * - * Returns 0 on success, negative errno on error. - */ -static int get_exclude_memory_ranges(struct crash_mem **mem_ranges) -{ - int ret; - - ret = add_tce_mem_ranges(mem_ranges); - if (ret) - goto out; - - ret = add_initrd_mem_range(mem_ranges); - if (ret) - goto out; - - ret = add_htab_mem_range(mem_ranges); - if (ret) - goto out; - - ret = add_kernel_mem_range(mem_ranges); - if (ret) - goto out; - - ret = add_rtas_mem_range(mem_ranges); - if (ret) - goto out; - - ret = add_opal_mem_range(mem_ranges); - if (ret) - goto out; - - ret = add_reserved_mem_ranges(mem_ranges); - if (ret) - goto out; - - /* exclude memory ranges should be sorted for easy lookup */ - sort_memory_ranges(*mem_ranges, true); -out: - if (ret) - pr_err("Failed to setup exclude memory ranges\n"); - return ret; -} - -/** - * get_reserved_memory_ranges - Get reserve memory ranges. This list includes - * memory regions that should be added to the - * memory reserve map to ensure the region is - * protected from any mischief. - * @mem_ranges: Range list to add the memory ranges to. - * - * Returns 0 on success, negative errno on error. - */ -static int get_reserved_memory_ranges(struct crash_mem **mem_ranges) -{ - int ret; - - ret = add_rtas_mem_range(mem_ranges); - if (ret) - goto out; - - ret = add_tce_mem_ranges(mem_ranges); - if (ret) - goto out; - - ret = add_reserved_mem_ranges(mem_ranges); -out: - if (ret) - pr_err("Failed to setup reserved memory ranges\n"); - return ret; -} - -/** * __locate_mem_hole_top_down - Looks top down for a large enough memory hole * in the memory regions between buf_min & buf_max * for the buffer. If found, sets kbuf->mem. @@ -323,119 +247,6 @@ static int locate_mem_hole_bottom_up_ppc64(struct kexec_buf *kbuf, #ifdef CONFIG_CRASH_DUMP /** - * get_usable_memory_ranges - Get usable memory ranges. This list includes - * regions like crashkernel, opal/rtas & tce-table, - * that kdump kernel could use. - * @mem_ranges: Range list to add the memory ranges to. - * - * Returns 0 on success, negative errno on error. - */ -static int get_usable_memory_ranges(struct crash_mem **mem_ranges) -{ - int ret; - - /* - * Early boot failure observed on guests when low memory (first memory - * block?) is not added to usable memory. So, add [0, crashk_res.end] - * instead of [crashk_res.start, crashk_res.end] to workaround it. - * Also, crashed kernel's memory must be added to reserve map to - * avoid kdump kernel from using it. - */ - ret = add_mem_range(mem_ranges, 0, crashk_res.end + 1); - if (ret) - goto out; - - ret = add_rtas_mem_range(mem_ranges); - if (ret) - goto out; - - ret = add_opal_mem_range(mem_ranges); - if (ret) - goto out; - - ret = add_tce_mem_ranges(mem_ranges); -out: - if (ret) - pr_err("Failed to setup usable memory ranges\n"); - return ret; -} - -/** - * get_crash_memory_ranges - Get crash memory ranges. This list includes - * first/crashing kernel's memory regions that - * would be exported via an elfcore. - * @mem_ranges: Range list to add the memory ranges to. - * - * Returns 0 on success, negative errno on error. - */ -static int get_crash_memory_ranges(struct crash_mem **mem_ranges) -{ - phys_addr_t base, end; - struct crash_mem *tmem; - u64 i; - int ret; - - for_each_mem_range(i, &base, &end) { - u64 size = end - base; - - /* Skip backup memory region, which needs a separate entry */ - if (base == BACKUP_SRC_START) { - if (size > BACKUP_SRC_SIZE) { - base = BACKUP_SRC_END + 1; - size -= BACKUP_SRC_SIZE; - } else - continue; - } - - ret = add_mem_range(mem_ranges, base, size); - if (ret) - goto out; - - /* Try merging adjacent ranges before reallocation attempt */ - if ((*mem_ranges)->nr_ranges == (*mem_ranges)->max_nr_ranges) - sort_memory_ranges(*mem_ranges, true); - } - - /* Reallocate memory ranges if there is no space to split ranges */ - tmem = *mem_ranges; - if (tmem && (tmem->nr_ranges == tmem->max_nr_ranges)) { - tmem = realloc_mem_ranges(mem_ranges); - if (!tmem) - goto out; - } - - /* Exclude crashkernel region */ - ret = crash_exclude_mem_range(tmem, crashk_res.start, crashk_res.end); - if (ret) - goto out; - - /* - * FIXME: For now, stay in parity with kexec-tools but if RTAS/OPAL - * regions are exported to save their context at the time of - * crash, they should actually be backed up just like the - * first 64K bytes of memory. - */ - ret = add_rtas_mem_range(mem_ranges); - if (ret) - goto out; - - ret = add_opal_mem_range(mem_ranges); - if (ret) - goto out; - - /* create a separate program header for the backup region */ - ret = add_mem_range(mem_ranges, BACKUP_SRC_START, BACKUP_SRC_SIZE); - if (ret) - goto out; - - sort_memory_ranges(*mem_ranges, false); -out: - if (ret) - pr_err("Failed to setup crash memory ranges\n"); - return ret; -} - -/** * check_realloc_usable_mem - Reallocate buffer if it can't accommodate entries * @um_info: Usable memory buffer and ranges info. * @cnt: No. of entries to accommodate. @@ -784,6 +595,23 @@ static void update_backup_region_phdr(struct kimage *image, Elf64_Ehdr *ehdr) } } +static unsigned int kdump_extra_elfcorehdr_size(struct crash_mem *cmem) +{ +#if defined(CONFIG_CRASH_HOTPLUG) && defined(CONFIG_MEMORY_HOTPLUG) + unsigned int extra_sz = 0; + + if (CONFIG_CRASH_MAX_MEMORY_RANGES > (unsigned int)PN_XNUM) + pr_warn("Number of Phdrs %u exceeds max\n", CONFIG_CRASH_MAX_MEMORY_RANGES); + else if (cmem->nr_ranges >= CONFIG_CRASH_MAX_MEMORY_RANGES) + pr_warn("Configured crash mem ranges may not be enough\n"); + else + extra_sz = (CONFIG_CRASH_MAX_MEMORY_RANGES - cmem->nr_ranges) * sizeof(Elf64_Phdr); + + return extra_sz; +#endif + return 0; +} + /** * load_elfcorehdr_segment - Setup crash memory ranges and initialize elfcorehdr * segment needed to load kdump kernel. @@ -815,7 +643,8 @@ static int load_elfcorehdr_segment(struct kimage *image, struct kexec_buf *kbuf) kbuf->buffer = headers; kbuf->mem = KEXEC_BUF_MEM_UNKNOWN; - kbuf->bufsz = kbuf->memsz = headers_sz; + kbuf->bufsz = headers_sz; + kbuf->memsz = headers_sz + kdump_extra_elfcorehdr_size(cmem); kbuf->top_down = false; ret = kexec_add_buffer(kbuf); @@ -979,6 +808,9 @@ static unsigned int kdump_extra_fdt_size_ppc64(struct kimage *image) unsigned int cpu_nodes, extra_size = 0; struct device_node *dn; u64 usm_entries; +#ifdef CONFIG_CRASH_HOTPLUG + unsigned int possible_cpu_nodes; +#endif if (!IS_ENABLED(CONFIG_CRASH_DUMP) || image->type != KEXEC_TYPE_CRASH) return 0; @@ -1006,6 +838,19 @@ static unsigned int kdump_extra_fdt_size_ppc64(struct kimage *image) if (cpu_nodes > boot_cpu_node_count) extra_size += (cpu_nodes - boot_cpu_node_count) * cpu_node_size(); +#ifdef CONFIG_CRASH_HOTPLUG + /* + * Make sure enough space is reserved to accommodate possible CPU nodes + * in the crash FDT. This allows packing possible CPU nodes which are + * not yet present in the system without regenerating the entire FDT. + */ + if (image->type == KEXEC_TYPE_CRASH) { + possible_cpu_nodes = num_possible_cpus() / threads_per_core; + if (possible_cpu_nodes > cpu_nodes) + extra_size += (possible_cpu_nodes - cpu_nodes) * cpu_node_size(); + } +#endif + return extra_size; } @@ -1028,93 +873,6 @@ unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image) return extra_size + kdump_extra_fdt_size_ppc64(image); } -/** - * add_node_props - Reads node properties from device node structure and add - * them to fdt. - * @fdt: Flattened device tree of the kernel - * @node_offset: offset of the node to add a property at - * @dn: device node pointer - * - * Returns 0 on success, negative errno on error. - */ -static int add_node_props(void *fdt, int node_offset, const struct device_node *dn) -{ - int ret = 0; - struct property *pp; - - if (!dn) - return -EINVAL; - - for_each_property_of_node(dn, pp) { - ret = fdt_setprop(fdt, node_offset, pp->name, pp->value, pp->length); - if (ret < 0) { - pr_err("Unable to add %s property: %s\n", pp->name, fdt_strerror(ret)); - return ret; - } - } - return ret; -} - -/** - * update_cpus_node - Update cpus node of flattened device tree using of_root - * device node. - * @fdt: Flattened device tree of the kernel. - * - * Returns 0 on success, negative errno on error. - */ -static int update_cpus_node(void *fdt) -{ - struct device_node *cpus_node, *dn; - int cpus_offset, cpus_subnode_offset, ret = 0; - - cpus_offset = fdt_path_offset(fdt, "/cpus"); - if (cpus_offset < 0 && cpus_offset != -FDT_ERR_NOTFOUND) { - pr_err("Malformed device tree: error reading /cpus node: %s\n", - fdt_strerror(cpus_offset)); - return cpus_offset; - } - - if (cpus_offset > 0) { - ret = fdt_del_node(fdt, cpus_offset); - if (ret < 0) { - pr_err("Error deleting /cpus node: %s\n", fdt_strerror(ret)); - return -EINVAL; - } - } - - /* Add cpus node to fdt */ - cpus_offset = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"), "cpus"); - if (cpus_offset < 0) { - pr_err("Error creating /cpus node: %s\n", fdt_strerror(cpus_offset)); - return -EINVAL; - } - - /* Add cpus node properties */ - cpus_node = of_find_node_by_path("/cpus"); - ret = add_node_props(fdt, cpus_offset, cpus_node); - of_node_put(cpus_node); - if (ret < 0) - return ret; - - /* Loop through all subnodes of cpus and add them to fdt */ - for_each_node_by_type(dn, "cpu") { - cpus_subnode_offset = fdt_add_subnode(fdt, cpus_offset, dn->full_name); - if (cpus_subnode_offset < 0) { - pr_err("Unable to add %s subnode: %s\n", dn->full_name, - fdt_strerror(cpus_subnode_offset)); - ret = cpus_subnode_offset; - goto out; - } - - ret = add_node_props(fdt, cpus_subnode_offset, dn); - if (ret < 0) - goto out; - } -out: - of_node_put(dn); - return ret; -} - static int copy_property(void *fdt, int node_offset, const struct device_node *dn, const char *propname) { diff --git a/arch/powerpc/kexec/ranges.c b/arch/powerpc/kexec/ranges.c index 33b780049a..3702b0bdab 100644 --- a/arch/powerpc/kexec/ranges.c +++ b/arch/powerpc/kexec/ranges.c @@ -20,9 +20,13 @@ #include <linux/kexec.h> #include <linux/of.h> #include <linux/slab.h> +#include <linux/memblock.h> +#include <linux/crash_core.h> #include <asm/sections.h> #include <asm/kexec_ranges.h> +#include <asm/crashdump-ppc64.h> +#if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_DUMP) /** * get_max_nr_ranges - Get the max no. of ranges crash_mem structure * could hold, given the size allocated for it. @@ -234,13 +238,16 @@ int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size) return __add_mem_range(mem_ranges, base, size); } +#endif /* CONFIG_KEXEC_FILE || CONFIG_CRASH_DUMP */ + +#ifdef CONFIG_KEXEC_FILE /** * add_tce_mem_ranges - Adds tce-table range to the given memory ranges list. * @mem_ranges: Range list to add the memory range(s) to. * * Returns 0 on success, negative errno on error. */ -int add_tce_mem_ranges(struct crash_mem **mem_ranges) +static int add_tce_mem_ranges(struct crash_mem **mem_ranges) { struct device_node *dn = NULL; int ret = 0; @@ -279,7 +286,7 @@ int add_tce_mem_ranges(struct crash_mem **mem_ranges) * * Returns 0 on success, negative errno on error. */ -int add_initrd_mem_range(struct crash_mem **mem_ranges) +static int add_initrd_mem_range(struct crash_mem **mem_ranges) { u64 base, end; int ret; @@ -296,7 +303,6 @@ int add_initrd_mem_range(struct crash_mem **mem_ranges) return ret; } -#ifdef CONFIG_PPC_64S_HASH_MMU /** * add_htab_mem_range - Adds htab range to the given memory ranges list, * if it exists @@ -304,14 +310,18 @@ int add_initrd_mem_range(struct crash_mem **mem_ranges) * * Returns 0 on success, negative errno on error. */ -int add_htab_mem_range(struct crash_mem **mem_ranges) +static int add_htab_mem_range(struct crash_mem **mem_ranges) { + +#ifdef CONFIG_PPC_64S_HASH_MMU if (!htab_address) return 0; return add_mem_range(mem_ranges, __pa(htab_address), htab_size_bytes); -} +#else + return 0; #endif +} /** * add_kernel_mem_range - Adds kernel text region to the given @@ -320,18 +330,20 @@ int add_htab_mem_range(struct crash_mem **mem_ranges) * * Returns 0 on success, negative errno on error. */ -int add_kernel_mem_range(struct crash_mem **mem_ranges) +static int add_kernel_mem_range(struct crash_mem **mem_ranges) { return add_mem_range(mem_ranges, 0, __pa(_end)); } +#endif /* CONFIG_KEXEC_FILE */ +#if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_DUMP) /** * add_rtas_mem_range - Adds RTAS region to the given memory ranges list. * @mem_ranges: Range list to add the memory range to. * * Returns 0 on success, negative errno on error. */ -int add_rtas_mem_range(struct crash_mem **mem_ranges) +static int add_rtas_mem_range(struct crash_mem **mem_ranges) { struct device_node *dn; u32 base, size; @@ -356,7 +368,7 @@ int add_rtas_mem_range(struct crash_mem **mem_ranges) * * Returns 0 on success, negative errno on error. */ -int add_opal_mem_range(struct crash_mem **mem_ranges) +static int add_opal_mem_range(struct crash_mem **mem_ranges) { struct device_node *dn; u64 base, size; @@ -374,7 +386,9 @@ int add_opal_mem_range(struct crash_mem **mem_ranges) of_node_put(dn); return ret; } +#endif /* CONFIG_KEXEC_FILE || CONFIG_CRASH_DUMP */ +#ifdef CONFIG_KEXEC_FILE /** * add_reserved_mem_ranges - Adds "/reserved-ranges" regions exported by f/w * to the given memory ranges list. @@ -382,7 +396,7 @@ int add_opal_mem_range(struct crash_mem **mem_ranges) * * Returns 0 on success, negative errno on error. */ -int add_reserved_mem_ranges(struct crash_mem **mem_ranges) +static int add_reserved_mem_ranges(struct crash_mem **mem_ranges) { int n_mem_addr_cells, n_mem_size_cells, i, len, cells, ret = 0; struct device_node *root = of_find_node_by_path("/"); @@ -412,3 +426,283 @@ int add_reserved_mem_ranges(struct crash_mem **mem_ranges) return ret; } + +/** + * get_reserved_memory_ranges - Get reserve memory ranges. This list includes + * memory regions that should be added to the + * memory reserve map to ensure the region is + * protected from any mischief. + * @mem_ranges: Range list to add the memory ranges to. + * + * Returns 0 on success, negative errno on error. + */ +int get_reserved_memory_ranges(struct crash_mem **mem_ranges) +{ + int ret; + + ret = add_rtas_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_tce_mem_ranges(mem_ranges); + if (ret) + goto out; + + ret = add_reserved_mem_ranges(mem_ranges); +out: + if (ret) + pr_err("Failed to setup reserved memory ranges\n"); + return ret; +} + +/** + * get_exclude_memory_ranges - Get exclude memory ranges. This list includes + * regions like opal/rtas, tce-table, initrd, + * kernel, htab which should be avoided while + * setting up kexec load segments. + * @mem_ranges: Range list to add the memory ranges to. + * + * Returns 0 on success, negative errno on error. + */ +int get_exclude_memory_ranges(struct crash_mem **mem_ranges) +{ + int ret; + + ret = add_tce_mem_ranges(mem_ranges); + if (ret) + goto out; + + ret = add_initrd_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_htab_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_kernel_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_rtas_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_opal_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_reserved_mem_ranges(mem_ranges); + if (ret) + goto out; + + /* exclude memory ranges should be sorted for easy lookup */ + sort_memory_ranges(*mem_ranges, true); +out: + if (ret) + pr_err("Failed to setup exclude memory ranges\n"); + return ret; +} + +#ifdef CONFIG_CRASH_DUMP +/** + * get_usable_memory_ranges - Get usable memory ranges. This list includes + * regions like crashkernel, opal/rtas & tce-table, + * that kdump kernel could use. + * @mem_ranges: Range list to add the memory ranges to. + * + * Returns 0 on success, negative errno on error. + */ +int get_usable_memory_ranges(struct crash_mem **mem_ranges) +{ + int ret; + + /* + * Early boot failure observed on guests when low memory (first memory + * block?) is not added to usable memory. So, add [0, crashk_res.end] + * instead of [crashk_res.start, crashk_res.end] to workaround it. + * Also, crashed kernel's memory must be added to reserve map to + * avoid kdump kernel from using it. + */ + ret = add_mem_range(mem_ranges, 0, crashk_res.end + 1); + if (ret) + goto out; + + ret = add_rtas_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_opal_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_tce_mem_ranges(mem_ranges); +out: + if (ret) + pr_err("Failed to setup usable memory ranges\n"); + return ret; +} +#endif /* CONFIG_CRASH_DUMP */ +#endif /* CONFIG_KEXEC_FILE */ + +#ifdef CONFIG_CRASH_DUMP +/** + * get_crash_memory_ranges - Get crash memory ranges. This list includes + * first/crashing kernel's memory regions that + * would be exported via an elfcore. + * @mem_ranges: Range list to add the memory ranges to. + * + * Returns 0 on success, negative errno on error. + */ +int get_crash_memory_ranges(struct crash_mem **mem_ranges) +{ + phys_addr_t base, end; + struct crash_mem *tmem; + u64 i; + int ret; + + for_each_mem_range(i, &base, &end) { + u64 size = end - base; + + /* Skip backup memory region, which needs a separate entry */ + if (base == BACKUP_SRC_START) { + if (size > BACKUP_SRC_SIZE) { + base = BACKUP_SRC_END + 1; + size -= BACKUP_SRC_SIZE; + } else + continue; + } + + ret = add_mem_range(mem_ranges, base, size); + if (ret) + goto out; + + /* Try merging adjacent ranges before reallocation attempt */ + if ((*mem_ranges)->nr_ranges == (*mem_ranges)->max_nr_ranges) + sort_memory_ranges(*mem_ranges, true); + } + + /* Reallocate memory ranges if there is no space to split ranges */ + tmem = *mem_ranges; + if (tmem && (tmem->nr_ranges == tmem->max_nr_ranges)) { + tmem = realloc_mem_ranges(mem_ranges); + if (!tmem) + goto out; + } + + /* Exclude crashkernel region */ + ret = crash_exclude_mem_range(tmem, crashk_res.start, crashk_res.end); + if (ret) + goto out; + + /* + * FIXME: For now, stay in parity with kexec-tools but if RTAS/OPAL + * regions are exported to save their context at the time of + * crash, they should actually be backed up just like the + * first 64K bytes of memory. + */ + ret = add_rtas_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_opal_mem_range(mem_ranges); + if (ret) + goto out; + + /* create a separate program header for the backup region */ + ret = add_mem_range(mem_ranges, BACKUP_SRC_START, BACKUP_SRC_SIZE); + if (ret) + goto out; + + sort_memory_ranges(*mem_ranges, false); +out: + if (ret) + pr_err("Failed to setup crash memory ranges\n"); + return ret; +} + +/** + * remove_mem_range - Removes the given memory range from the range list. + * @mem_ranges: Range list to remove the memory range to. + * @base: Base address of the range to remove. + * @size: Size of the memory range to remove. + * + * (Re)allocates memory, if needed. + * + * Returns 0 on success, negative errno on error. + */ +int remove_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size) +{ + u64 end; + int ret = 0; + unsigned int i; + u64 mstart, mend; + struct crash_mem *mem_rngs = *mem_ranges; + + if (!size) + return 0; + + /* + * Memory range are stored as start and end address, use + * the same format to do remove operation. + */ + end = base + size - 1; + + for (i = 0; i < mem_rngs->nr_ranges; i++) { + mstart = mem_rngs->ranges[i].start; + mend = mem_rngs->ranges[i].end; + + /* + * Memory range to remove is not part of this range entry + * in the memory range list + */ + if (!(base >= mstart && end <= mend)) + continue; + + /* + * Memory range to remove is equivalent to this entry in the + * memory range list. Remove the range entry from the list. + */ + if (base == mstart && end == mend) { + for (; i < mem_rngs->nr_ranges - 1; i++) { + mem_rngs->ranges[i].start = mem_rngs->ranges[i+1].start; + mem_rngs->ranges[i].end = mem_rngs->ranges[i+1].end; + } + mem_rngs->nr_ranges--; + goto out; + } + /* + * Start address of the memory range to remove and the + * current memory range entry in the list is same. Just + * move the start address of the current memory range + * entry in the list to end + 1. + */ + else if (base == mstart) { + mem_rngs->ranges[i].start = end + 1; + goto out; + } + /* + * End address of the memory range to remove and the + * current memory range entry in the list is same. + * Just move the end address of the current memory + * range entry in the list to base - 1. + */ + else if (end == mend) { + mem_rngs->ranges[i].end = base - 1; + goto out; + } + /* + * Memory range to remove is not at the edge of current + * memory range entry. Split the current memory entry into + * two half. + */ + else { + mem_rngs->ranges[i].end = base - 1; + size = mem_rngs->ranges[i].end - end; + ret = add_mem_range(mem_ranges, end + 1, size); + } + } +out: + return ret; +} +#endif /* CONFIG_CRASH_DUMP */ diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 8acec14412..ff6c383739 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -360,10 +360,6 @@ static int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, break; } -#if 0 - printk(KERN_INFO "Deliver interrupt 0x%x? %x\n", vec, deliver); -#endif - if (deliver) kvmppc_inject_interrupt(vcpu, vec, 0); @@ -899,11 +895,6 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) return kvm->arch.kvm_ops->test_age_gfn(kvm, range); } -bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) -{ - return kvm->arch.kvm_ops->set_spte_gfn(kvm, range); -} - int kvmppc_core_init_vm(struct kvm *kvm) { diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h index 58391b4b32..4aa2ab89af 100644 --- a/arch/powerpc/kvm/book3s.h +++ b/arch/powerpc/kvm/book3s.h @@ -12,7 +12,6 @@ extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm, extern bool kvm_unmap_gfn_range_hv(struct kvm *kvm, struct kvm_gfn_range *range); extern bool kvm_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range); extern bool kvm_test_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range); -extern bool kvm_set_spte_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range); extern int kvmppc_mmu_init_pr(struct kvm_vcpu *vcpu); extern void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 2b1f0cdd8c..1b51b1c471 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -1010,18 +1010,6 @@ bool kvm_test_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range) return kvm_test_age_rmapp(kvm, range->slot, range->start); } -bool kvm_set_spte_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range) -{ - WARN_ON(range->start + 1 != range->end); - - if (kvm_is_radix(kvm)) - kvm_unmap_radix(kvm, range->slot, range->start); - else - kvm_unmap_rmapp(kvm, range->slot, range->start); - - return false; -} - static int vcpus_running(struct kvm *kvm) { return atomic_read(&kvm->arch.vcpus_running) != 0; diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 5bbfb2eed1..de126d1533 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -714,7 +714,7 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) case SPRN_HID1: to_book3s(vcpu)->hid[1] = spr_val; break; - case SPRN_HID2: + case SPRN_HID2_750FX: to_book3s(vcpu)->hid[2] = spr_val; break; case SPRN_HID2_GEKKO: @@ -900,7 +900,7 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val case SPRN_HID1: *spr_val = to_book3s(vcpu)->hid[1]; break; - case SPRN_HID2: + case SPRN_HID2_750FX: case SPRN_HID2_GEKKO: *spr_val = to_book3s(vcpu)->hid[2]; break; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 692a7c6f5f..d8352e4d9c 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2305,7 +2305,7 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, *val = get_reg_val(id, kvmppc_get_siar_hv(vcpu)); break; case KVM_REG_PPC_SDAR: - *val = get_reg_val(id, kvmppc_get_siar_hv(vcpu)); + *val = get_reg_val(id, kvmppc_get_sdar_hv(vcpu)); break; case KVM_REG_PPC_SIER: *val = get_reg_val(id, kvmppc_get_sier_hv(vcpu, 0)); @@ -2540,7 +2540,7 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, vcpu->arch.mmcrs = set_reg_val(id, *val); break; case KVM_REG_PPC_MMCR3: - *val = get_reg_val(id, vcpu->arch.mmcr[3]); + kvmppc_set_mmcr_hv(vcpu, 3, set_reg_val(id, *val)); break; case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8: i = id - KVM_REG_PPC_PMC1; @@ -4116,6 +4116,11 @@ static int kvmhv_vcpu_entry_nestedv2(struct kvm_vcpu *vcpu, u64 time_limit, int trap; long rc; + if (vcpu->arch.doorbell_request) { + vcpu->arch.doorbell_request = 0; + kvmppc_set_dpdes(vcpu, 1); + } + io = &vcpu->arch.nestedv2_io; msr = mfmsr(); @@ -6364,7 +6369,6 @@ static struct kvmppc_ops kvm_ops_hv = { .unmap_gfn_range = kvm_unmap_gfn_range_hv, .age_gfn = kvm_age_gfn_hv, .test_age_gfn = kvm_test_age_gfn_hv, - .set_spte_gfn = kvm_set_spte_gfn_hv, .free_memslot = kvmppc_core_free_memslot_hv, .init_vm = kvmppc_core_init_vm_hv, .destroy_vm = kvmppc_core_destroy_vm_hv, diff --git a/arch/powerpc/kvm/book3s_hv_nestedv2.c b/arch/powerpc/kvm/book3s_hv_nestedv2.c index 1091f7a83b..342f583147 100644 --- a/arch/powerpc/kvm/book3s_hv_nestedv2.c +++ b/arch/powerpc/kvm/book3s_hv_nestedv2.c @@ -311,6 +311,10 @@ static int gs_msg_ops_vcpu_fill_info(struct kvmppc_gs_buff *gsb, rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.vcore->vtb); break; + case KVMPPC_GSID_DPDES: + rc = kvmppc_gse_put_u64(gsb, iden, + vcpu->arch.vcore->dpdes); + break; case KVMPPC_GSID_LPCR: rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.vcore->lpcr); @@ -543,6 +547,9 @@ static int gs_msg_ops_vcpu_refresh_info(struct kvmppc_gs_msg *gsm, case KVMPPC_GSID_VTB: vcpu->arch.vcore->vtb = kvmppc_gse_get_u64(gse); break; + case KVMPPC_GSID_DPDES: + vcpu->arch.vcore->dpdes = kvmppc_gse_get_u64(gse); + break; case KVMPPC_GSID_LPCR: vcpu->arch.vcore->lpcr = kvmppc_gse_get_u64(gse); break; diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index e429848785..f2636414d8 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -837,7 +837,7 @@ static inline void this_cpu_inc_rm(unsigned int __percpu *addr) */ static void kvmppc_rm_handle_irq_desc(struct irq_desc *desc) { - this_cpu_inc_rm(desc->kstat_irqs); + this_cpu_inc_rm(&desc->kstat_irqs->cnt); __this_cpu_inc(kstat.irqs_sum); } diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 5b92619a05..a7d7137ea0 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -461,12 +461,6 @@ static bool kvm_test_age_gfn_pr(struct kvm *kvm, struct kvm_gfn_range *range) return false; } -static bool kvm_set_spte_gfn_pr(struct kvm *kvm, struct kvm_gfn_range *range) -{ - /* The page will get remapped properly on its next fault */ - return do_kvm_unmap_gfn(kvm, range); -} - /*****************************************/ static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr) @@ -2071,7 +2065,6 @@ static struct kvmppc_ops kvm_ops_pr = { .unmap_gfn_range = kvm_unmap_gfn_range_pr, .age_gfn = kvm_age_gfn_pr, .test_age_gfn = kvm_test_age_gfn_pr, - .set_spte_gfn = kvm_set_spte_gfn_pr, .free_memslot = kvmppc_core_free_memslot_pr, .init_vm = kvmppc_core_init_vm_pr, .destroy_vm = kvmppc_core_destroy_vm_pr, diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 29a3822497..1362c67238 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -531,7 +531,7 @@ static int xive_vm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) xc->cppr = xive_prio_from_guest(new_cppr); /* - * IPIs are synthetized from MFRR and thus don't need + * IPIs are synthesized from MFRR and thus don't need * any special EOI handling. The underlying interrupt * used to signal MFRR changes is EOId when fetched from * the queue. diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index ccb8f16ffe..c664fdec75 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -747,12 +747,6 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) return false; } -bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) -{ - /* The page will get remapped properly on its next fault */ - return kvm_e500_mmu_unmap_gfn(kvm, range); -} - /*****************************************/ int e500_mmu_host_init(struct kvmppc_vcpu_e500 *vcpu_e500) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index d32abe7fe6..d11767208b 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1984,8 +1984,10 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, break; r = -ENXIO; - if (!xive_enabled()) + if (!xive_enabled()) { + fdput(f); break; + } r = -EPERM; dev = kvm_device_from_filp(f.file); diff --git a/arch/powerpc/kvm/test-guest-state-buffer.c b/arch/powerpc/kvm/test-guest-state-buffer.c index 4720b8dc88..2571ccc618 100644 --- a/arch/powerpc/kvm/test-guest-state-buffer.c +++ b/arch/powerpc/kvm/test-guest-state-buffer.c @@ -151,7 +151,7 @@ static void test_gs_bitmap(struct kunit *test) i++; } - for (u16 iden = KVMPPC_GSID_GPR(0); iden <= KVMPPC_GSID_CTRL; iden++) { + for (u16 iden = KVMPPC_GSID_GPR(0); iden <= KVMPPC_GSE_DW_REGS_END; iden++) { kvmppc_gsbm_set(&gsbm, iden); kvmppc_gsbm_set(&gsbm1, iden); KUNIT_EXPECT_TRUE(test, kvmppc_gsbm_test(&gsbm, iden)); diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 0ab65eeb93..f14ecab674 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -3,8 +3,6 @@ # Makefile for ppc-specific library files.. # -ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) - CFLAGS_code-patching.o += -fno-stack-protector CFLAGS_feature-fixups.o += -fno-stack-protector diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index c6ab46156c..0d1f3ee911 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -225,7 +225,7 @@ void __init poking_init(void) static unsigned long get_patch_pfn(void *addr) { - if (IS_ENABLED(CONFIG_MODULES) && is_vmalloc_or_module_addr(addr)) + if (IS_ENABLED(CONFIG_EXECMEM) && is_vmalloc_or_module_addr(addr)) return vmalloc_to_pfn(addr); else return __pa_symbol(addr) >> PAGE_SHIFT; @@ -372,9 +372,32 @@ int patch_instruction(u32 *addr, ppc_inst_t instr) } NOKPROBE_SYMBOL(patch_instruction); +static int patch_memset64(u64 *addr, u64 val, size_t count) +{ + for (u64 *end = addr + count; addr < end; addr++) + __put_kernel_nofault(addr, &val, u64, failed); + + return 0; + +failed: + return -EPERM; +} + +static int patch_memset32(u32 *addr, u32 val, size_t count) +{ + for (u32 *end = addr + count; addr < end; addr++) + __put_kernel_nofault(addr, &val, u32, failed); + + return 0; + +failed: + return -EPERM; +} + static int __patch_instructions(u32 *patch_addr, u32 *code, size_t len, bool repeat_instr) { unsigned long start = (unsigned long)patch_addr; + int err; /* Repeat instruction */ if (repeat_instr) { @@ -383,19 +406,19 @@ static int __patch_instructions(u32 *patch_addr, u32 *code, size_t len, bool rep if (ppc_inst_prefixed(instr)) { u64 val = ppc_inst_as_ulong(instr); - memset64((u64 *)patch_addr, val, len / 8); + err = patch_memset64((u64 *)patch_addr, val, len / 8); } else { u32 val = ppc_inst_val(instr); - memset32(patch_addr, val, len / 4); + err = patch_memset32(patch_addr, val, len / 4); } } else { - memcpy(patch_addr, code, len); + err = copy_to_kernel_nofault(patch_addr, code, len); } smp_wmb(); /* smp write barrier */ flush_icache_range(start, start + len); - return 0; + return err; } /* diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 4f82581ca2..b7201ba50b 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -25,6 +25,13 @@ #include <asm/firmware.h> #include <asm/inst.h> +/* + * Used to generate warnings if mmu or cpu feature check functions that + * use static keys before they are initialized. + */ +bool static_key_feature_checks_initialized __read_mostly; +EXPORT_SYMBOL_GPL(static_key_feature_checks_initialized); + struct fixup_entry { unsigned long mask; unsigned long value; @@ -679,6 +686,7 @@ void __init setup_feature_keys(void) jump_label_init(); cpu_feature_keys_init(); mmu_feature_keys_init(); + static_key_feature_checks_initialized = true; } static int __init check_features(void) diff --git a/arch/powerpc/lib/test-code-patching.c b/arch/powerpc/lib/test-code-patching.c index c44823292f..f76030087f 100644 --- a/arch/powerpc/lib/test-code-patching.c +++ b/arch/powerpc/lib/test-code-patching.c @@ -347,6 +347,97 @@ static void __init test_prefixed_patching(void) check(!memcmp(iptr, expected, sizeof(expected))); } +static void __init test_multi_instruction_patching(void) +{ + u32 code[32]; + void *buf; + u32 *addr32; + u64 *addr64; + ppc_inst_t inst64 = ppc_inst_prefix(OP_PREFIX << 26 | 3UL << 24, PPC_RAW_TRAP()); + u32 inst32 = PPC_RAW_NOP(); + + buf = vzalloc(PAGE_SIZE * 8); + check(buf); + if (!buf) + return; + + /* Test single page 32-bit repeated instruction */ + addr32 = buf + PAGE_SIZE; + check(!patch_instructions(addr32 + 1, &inst32, 12, true)); + + check(addr32[0] == 0); + check(addr32[1] == inst32); + check(addr32[2] == inst32); + check(addr32[3] == inst32); + check(addr32[4] == 0); + + /* Test single page 64-bit repeated instruction */ + if (IS_ENABLED(CONFIG_PPC64)) { + check(ppc_inst_prefixed(inst64)); + + addr64 = buf + PAGE_SIZE * 2; + ppc_inst_write(code, inst64); + check(!patch_instructions((u32 *)(addr64 + 1), code, 24, true)); + + check(addr64[0] == 0); + check(ppc_inst_equal(ppc_inst_read((u32 *)&addr64[1]), inst64)); + check(ppc_inst_equal(ppc_inst_read((u32 *)&addr64[2]), inst64)); + check(ppc_inst_equal(ppc_inst_read((u32 *)&addr64[3]), inst64)); + check(addr64[4] == 0); + } + + /* Test single page memcpy */ + addr32 = buf + PAGE_SIZE * 3; + + for (int i = 0; i < ARRAY_SIZE(code); i++) + code[i] = i + 1; + + check(!patch_instructions(addr32 + 1, code, sizeof(code), false)); + + check(addr32[0] == 0); + check(!memcmp(&addr32[1], code, sizeof(code))); + check(addr32[ARRAY_SIZE(code) + 1] == 0); + + /* Test multipage 32-bit repeated instruction */ + addr32 = buf + PAGE_SIZE * 4 - 8; + check(!patch_instructions(addr32 + 1, &inst32, 12, true)); + + check(addr32[0] == 0); + check(addr32[1] == inst32); + check(addr32[2] == inst32); + check(addr32[3] == inst32); + check(addr32[4] == 0); + + /* Test multipage 64-bit repeated instruction */ + if (IS_ENABLED(CONFIG_PPC64)) { + check(ppc_inst_prefixed(inst64)); + + addr64 = buf + PAGE_SIZE * 5 - 8; + ppc_inst_write(code, inst64); + check(!patch_instructions((u32 *)(addr64 + 1), code, 24, true)); + + check(addr64[0] == 0); + check(ppc_inst_equal(ppc_inst_read((u32 *)&addr64[1]), inst64)); + check(ppc_inst_equal(ppc_inst_read((u32 *)&addr64[2]), inst64)); + check(ppc_inst_equal(ppc_inst_read((u32 *)&addr64[3]), inst64)); + check(addr64[4] == 0); + } + + /* Test multipage memcpy */ + addr32 = buf + PAGE_SIZE * 6 - 12; + + for (int i = 0; i < ARRAY_SIZE(code); i++) + code[i] = i + 1; + + check(!patch_instructions(addr32 + 1, code, sizeof(code), false)); + + check(addr32[0] == 0); + check(!memcmp(&addr32[1], code, sizeof(code))); + check(addr32[ARRAY_SIZE(code) + 1] == 0); + + vfree(buf); +} + static int __init test_code_patching(void) { pr_info("Running code patching self-tests ...\n"); @@ -356,6 +447,7 @@ static int __init test_code_patching(void) test_create_function_call(); test_translate_branch(); test_prefixed_patching(); + test_multi_instruction_patching(); return 0; } diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 503a6e2499..0fe2f085c0 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -3,8 +3,6 @@ # Makefile for the linux ppc-specific parts of the memory manager. # -ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) - obj-y := fault.o mem.o pgtable.o maccess.o pageattr.o \ init_$(BITS).o pgtable_$(BITS).o \ pgtable-frag.o ioremap.o ioremap_$(BITS).o \ diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 100f999871..625fe7d08e 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -184,7 +184,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) static bool is_module_segment(unsigned long addr) { - if (!IS_ENABLED(CONFIG_MODULES)) + if (!IS_ENABLED(CONFIG_EXECMEM)) return false; if (addr < ALIGN_DOWN(MODULES_VADDR, SZ_256M)) return false; diff --git a/arch/powerpc/mm/book3s64/Makefile b/arch/powerpc/mm/book3s64/Makefile index cad2abc173..33af579585 100644 --- a/arch/powerpc/mm/book3s64/Makefile +++ b/arch/powerpc/mm/book3s64/Makefile @@ -1,7 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -ccflags-y := $(NO_MINIMAL_TOC) - obj-y += mmu_context.o pgtable.o trace.o ifdef CONFIG_PPC_64S_HASH_MMU CFLAGS_REMOVE_slb.o = $(CC_FLAGS_FTRACE) diff --git a/arch/powerpc/mm/book3s64/slice.c b/arch/powerpc/mm/book3s64/slice.c index c0b58afb9a..ef3ce37f1b 100644 --- a/arch/powerpc/mm/book3s64/slice.c +++ b/arch/powerpc/mm/book3s64/slice.c @@ -282,12 +282,10 @@ static unsigned long slice_find_area_bottomup(struct mm_struct *mm, { int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); unsigned long found, next_end; - struct vm_unmapped_area_info info; - - info.flags = 0; - info.length = len; - info.align_mask = PAGE_MASK & ((1ul << pshift) - 1); - info.align_offset = 0; + struct vm_unmapped_area_info info = { + .length = len, + .align_mask = PAGE_MASK & ((1ul << pshift) - 1), + }; /* * Check till the allow max value for this mmap request */ @@ -326,13 +324,13 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm, { int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); unsigned long found, prev; - struct vm_unmapped_area_info info; + struct vm_unmapped_area_info info = { + .flags = VM_UNMAPPED_AREA_TOPDOWN, + .length = len, + .align_mask = PAGE_MASK & ((1ul << pshift) - 1), + }; unsigned long min_addr = max(PAGE_SIZE, mmap_min_addr); - info.flags = VM_UNMAPPED_AREA_TOPDOWN; - info.length = len; - info.align_mask = PAGE_MASK & ((1ul << pshift) - 1); - info.align_offset = 0; /* * If we are trying to allocate above DEFAULT_MAP_WINDOW * Add the different to the mmap_base. diff --git a/arch/powerpc/mm/cacheflush.c b/arch/powerpc/mm/cacheflush.c index 15189592da..7186516eca 100644 --- a/arch/powerpc/mm/cacheflush.c +++ b/arch/powerpc/mm/cacheflush.c @@ -78,7 +78,7 @@ EXPORT_SYMBOL(flush_icache_range); #ifdef CONFIG_HIGHMEM /** - * flush_dcache_icache_phys() - Flush a page by it's physical address + * flush_dcache_icache_phys() - Flush a page by its physical address * @physaddr: the physical address of the page */ static void flush_dcache_icache_phys(unsigned long physaddr) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 53335ae21a..2156904524 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -71,23 +71,26 @@ static noinline int bad_area_nosemaphore(struct pt_regs *regs, unsigned long add return __bad_area_nosemaphore(regs, address, SEGV_MAPERR); } -static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code) +static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code, + struct mm_struct *mm, struct vm_area_struct *vma) { - struct mm_struct *mm = current->mm; /* * Something tried to access memory that isn't in our memory map.. * Fix it, but check if it's kernel or user first.. */ - mmap_read_unlock(mm); + if (mm) + mmap_read_unlock(mm); + else + vma_end_read(vma); return __bad_area_nosemaphore(regs, address, si_code); } static noinline int bad_access_pkey(struct pt_regs *regs, unsigned long address, + struct mm_struct *mm, struct vm_area_struct *vma) { - struct mm_struct *mm = current->mm; int pkey; /* @@ -109,7 +112,10 @@ static noinline int bad_access_pkey(struct pt_regs *regs, unsigned long address, */ pkey = vma_pkey(vma); - mmap_read_unlock(mm); + if (mm) + mmap_read_unlock(mm); + else + vma_end_read(vma); /* * If we are in kernel mode, bail out with a SEGV, this will @@ -124,9 +130,10 @@ static noinline int bad_access_pkey(struct pt_regs *regs, unsigned long address, return 0; } -static noinline int bad_access(struct pt_regs *regs, unsigned long address) +static noinline int bad_access(struct pt_regs *regs, unsigned long address, + struct mm_struct *mm, struct vm_area_struct *vma) { - return __bad_area(regs, address, SEGV_ACCERR); + return __bad_area(regs, address, SEGV_ACCERR, mm, vma); } static int do_sigbus(struct pt_regs *regs, unsigned long address, @@ -479,13 +486,13 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, if (unlikely(access_pkey_error(is_write, is_exec, (error_code & DSISR_KEYFAULT), vma))) { - vma_end_read(vma); - goto lock_mmap; + count_vm_vma_lock_event(VMA_LOCK_SUCCESS); + return bad_access_pkey(regs, address, NULL, vma); } if (unlikely(access_error(is_write, is_exec, vma))) { - vma_end_read(vma); - goto lock_mmap; + count_vm_vma_lock_event(VMA_LOCK_SUCCESS); + return bad_access(regs, address, NULL, vma); } fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs); @@ -521,10 +528,10 @@ retry: if (unlikely(access_pkey_error(is_write, is_exec, (error_code & DSISR_KEYFAULT), vma))) - return bad_access_pkey(regs, address, vma); + return bad_access_pkey(regs, address, mm, vma); if (unlikely(access_error(is_write, is_exec, vma))) - return bad_access(regs, address); + return bad_access(regs, address, mm, vma); /* * If for any reason at all we couldn't handle the fault, diff --git a/arch/powerpc/mm/kasan/init_book3e_64.c b/arch/powerpc/mm/kasan/init_book3e_64.c index 11519e88dc..43c03b84ff 100644 --- a/arch/powerpc/mm/kasan/init_book3e_64.c +++ b/arch/powerpc/mm/kasan/init_book3e_64.c @@ -112,7 +112,7 @@ void __init kasan_init(void) pte_t zero_pte = pfn_pte(virt_to_pfn(kasan_early_shadow_page), PAGE_KERNEL_RO); for_each_mem_range(i, &start, &end) - kasan_init_phys_region((void *)start, (void *)end); + kasan_init_phys_region(phys_to_virt(start), phys_to_virt(end)); if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) kasan_remove_zero_shadow((void *)VMALLOC_START, VMALLOC_SIZE); diff --git a/arch/powerpc/mm/kasan/init_book3s_64.c b/arch/powerpc/mm/kasan/init_book3s_64.c index 9300d641cf..3fb5ce4f48 100644 --- a/arch/powerpc/mm/kasan/init_book3s_64.c +++ b/arch/powerpc/mm/kasan/init_book3s_64.c @@ -62,7 +62,7 @@ void __init kasan_init(void) } for_each_mem_range(i, &start, &end) - kasan_init_phys_region((void *)start, (void *)end); + kasan_init_phys_region(phys_to_virt(start), phys_to_virt(end)); for (i = 0; i < PTRS_PER_PTE; i++) __set_pte_at(&init_mm, (unsigned long)kasan_early_shadow_page, diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 3a440004b9..d325217ab2 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -16,6 +16,8 @@ #include <linux/highmem.h> #include <linux/suspend.h> #include <linux/dma-direct.h> +#include <linux/execmem.h> +#include <linux/vmalloc.h> #include <asm/swiotlb.h> #include <asm/machdep.h> @@ -30,7 +32,7 @@ #include <mm/mmu_decl.h> -unsigned long long memory_limit; +unsigned long long memory_limit __initdata; unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); @@ -406,3 +408,66 @@ int devmem_is_allowed(unsigned long pfn) * the EHEA driver. Drop this when drivers/net/ethernet/ibm/ehea is removed. */ EXPORT_SYMBOL_GPL(walk_system_ram_range); + +#ifdef CONFIG_EXECMEM +static struct execmem_info execmem_info __ro_after_init; + +struct execmem_info __init *execmem_arch_setup(void) +{ + pgprot_t kprobes_prot = strict_module_rwx_enabled() ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC; + pgprot_t prot = strict_module_rwx_enabled() ? PAGE_KERNEL : PAGE_KERNEL_EXEC; + unsigned long fallback_start = 0, fallback_end = 0; + unsigned long start, end; + + /* + * BOOK3S_32 and 8xx define MODULES_VADDR for text allocations and + * allow allocating data in the entire vmalloc space + */ +#ifdef MODULES_VADDR + unsigned long limit = (unsigned long)_etext - SZ_32M; + + BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR); + + /* First try within 32M limit from _etext to avoid branch trampolines */ + if (MODULES_VADDR < PAGE_OFFSET && MODULES_END > limit) { + start = limit; + fallback_start = MODULES_VADDR; + fallback_end = MODULES_END; + } else { + start = MODULES_VADDR; + } + + end = MODULES_END; +#else + start = VMALLOC_START; + end = VMALLOC_END; +#endif + + execmem_info = (struct execmem_info){ + .ranges = { + [EXECMEM_DEFAULT] = { + .start = start, + .end = end, + .pgprot = prot, + .alignment = 1, + .fallback_start = fallback_start, + .fallback_end = fallback_end, + }, + [EXECMEM_KPROBES] = { + .start = VMALLOC_START, + .end = VMALLOC_END, + .pgprot = kprobes_prot, + .alignment = 1, + }, + [EXECMEM_MODULE_DATA] = { + .start = VMALLOC_START, + .end = VMALLOC_END, + .pgprot = PAGE_KERNEL, + .alignment = 1, + }, + }, + }; + + return &execmem_info; +} +#endif /* CONFIG_EXECMEM */ diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 43d4842bb1..d93433e26d 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -94,7 +94,8 @@ static int __ref __early_map_kernel_hugepage(unsigned long va, phys_addr_t pa, return -EINVAL; set_huge_pte_at(&init_mm, va, ptep, - pte_mkhuge(pfn_pte(pa >> PAGE_SHIFT, prot)), psize); + pte_mkhuge(pfn_pte(pa >> PAGE_SHIFT, prot)), + 1UL << mmu_psize_to_shift(psize)); return 0; } diff --git a/arch/powerpc/mm/nohash/Makefile b/arch/powerpc/mm/nohash/Makefile index f3894e79d5..b3f0498dd4 100644 --- a/arch/powerpc/mm/nohash/Makefile +++ b/arch/powerpc/mm/nohash/Makefile @@ -1,7 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) - obj-y += mmu_context.o tlb.o tlb_low.o kup.o obj-$(CONFIG_PPC_BOOK3E_64) += tlb_low_64e.o book3e_pgtable.o obj-$(CONFIG_40x) += 40x.o diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c index cdff129abb..5c8d1bb98b 100644 --- a/arch/powerpc/mm/nohash/kaslr_booke.c +++ b/arch/powerpc/mm/nohash/kaslr_booke.c @@ -376,7 +376,7 @@ notrace void __init kaslr_early_init(void *dt_ptr, phys_addr_t size) create_kaslr_tlb_entry(1, tlb_virt, tlb_phys); } - /* Copy the kernel to it's new location and run */ + /* Copy the kernel to its new location and run */ memcpy((void *)kernstart_virt_addr, (void *)_stext, kernel_sz); flush_icache_range(kernstart_virt_addr, kernstart_virt_addr + kernel_sz); diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 9b99113cb5..6621cfc3ba 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -102,7 +102,7 @@ struct page *p4d_page(p4d_t p4d) { if (p4d_leaf(p4d)) { if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP)) - VM_WARN_ON(!p4d_huge(p4d)); + VM_WARN_ON(!p4d_leaf(p4d)); return pte_page(p4d_pte(p4d)); } return virt_to_page(p4d_pgtable(p4d)); @@ -113,7 +113,7 @@ struct page *pud_page(pud_t pud) { if (pud_leaf(pud)) { if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP)) - VM_WARN_ON(!pud_huge(pud)); + VM_WARN_ON(!pud_leaf(pud)); return pte_page(pud_pte(pud)); } return virt_to_page(pud_pgtable(pud)); @@ -132,7 +132,7 @@ struct page *pmd_page(pmd_t pmd) * enabled so these checks can't be used. */ if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP)) - VM_WARN_ON(!(pmd_leaf(pmd) || pmd_huge(pmd))); + VM_WARN_ON(!pmd_leaf(pmd)); return pte_page(pmd_pte(pmd)); } return virt_to_page(pmd_page_vaddr(pmd)); diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c index 9a60158783..a6baa6166d 100644 --- a/arch/powerpc/mm/ptdump/hashpagetable.c +++ b/arch/powerpc/mm/ptdump/hashpagetable.c @@ -491,7 +491,7 @@ static void walk_vmemmap(struct pg_state *st) * Traverse the vmemmaped memory and dump pages that are in the hash * pagetable. */ - while (ptr->list) { + while (ptr) { hpte_find(st, ptr->virt_addr, mmu_vmemmap_psize); ptr = ptr->list; } diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 0f9a217833..984655419d 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -359,3 +359,13 @@ void bpf_jit_free(struct bpf_prog *fp) bpf_prog_unlock_free(fp); } + +bool bpf_jit_supports_kfunc_call(void) +{ + return true; +} + +bool bpf_jit_supports_far_kfunc_call(void) +{ + return IS_ENABLED(CONFIG_PPC64); +} diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 58522de615..7703dcf48b 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -208,17 +208,13 @@ bpf_jit_emit_func_call_hlp(u32 *image, u32 *fimage, struct codegen_context *ctx, unsigned long func_addr = func ? ppc_function_entry((void *)func) : 0; long reladdr; - if (WARN_ON_ONCE(!core_kernel_text(func_addr))) + if (WARN_ON_ONCE(!kernel_text_address(func_addr))) return -EINVAL; - if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) { - reladdr = func_addr - local_paca->kernelbase; +#ifdef CONFIG_PPC_KERNEL_PCREL + reladdr = func_addr - local_paca->kernelbase; - if (reladdr >= (long)SZ_8G || reladdr < -(long)SZ_8G) { - pr_err("eBPF: address of %ps out of range of 34-bit relative address.\n", - (void *)func); - return -ERANGE; - } + if (reladdr < (long)SZ_8G && reladdr >= -(long)SZ_8G) { EMIT(PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernelbase))); /* Align for subsequent prefix instruction */ if (!IS_ALIGNED((unsigned long)fimage + CTX_NIA(ctx), 8)) @@ -227,6 +223,26 @@ bpf_jit_emit_func_call_hlp(u32 *image, u32 *fimage, struct codegen_context *ctx, EMIT(PPC_PREFIX_MLS | __PPC_PRFX_R(0) | IMM_H18(reladdr)); EMIT(PPC_INST_PADDI | ___PPC_RT(_R12) | ___PPC_RA(_R12) | IMM_L(reladdr)); } else { + unsigned long pc = (unsigned long)fimage + CTX_NIA(ctx); + bool alignment_needed = !IS_ALIGNED(pc, 8); + + reladdr = func_addr - (alignment_needed ? pc + 4 : pc); + + if (reladdr < (long)SZ_8G && reladdr >= -(long)SZ_8G) { + if (alignment_needed) + EMIT(PPC_RAW_NOP()); + /* pla r12,addr */ + EMIT(PPC_PREFIX_MLS | __PPC_PRFX_R(1) | IMM_H18(reladdr)); + EMIT(PPC_INST_PADDI | ___PPC_RT(_R12) | IMM_L(reladdr)); + } else { + /* We can clobber r12 */ + PPC_LI64(_R12, func); + } + } + EMIT(PPC_RAW_MTCTR(_R12)); + EMIT(PPC_RAW_BCTRL()); +#else + if (core_kernel_text(func_addr)) { reladdr = func_addr - kernel_toc_addr(); if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { pr_err("eBPF: address of %ps out of range of kernel_toc.\n", (void *)func); @@ -235,9 +251,34 @@ bpf_jit_emit_func_call_hlp(u32 *image, u32 *fimage, struct codegen_context *ctx, EMIT(PPC_RAW_ADDIS(_R12, _R2, PPC_HA(reladdr))); EMIT(PPC_RAW_ADDI(_R12, _R12, PPC_LO(reladdr))); + EMIT(PPC_RAW_MTCTR(_R12)); + EMIT(PPC_RAW_BCTRL()); + } else { + if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V1)) { + /* func points to the function descriptor */ + PPC_LI64(bpf_to_ppc(TMP_REG_2), func); + /* Load actual entry point from function descriptor */ + EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_2), 0)); + /* ... and move it to CTR */ + EMIT(PPC_RAW_MTCTR(bpf_to_ppc(TMP_REG_1))); + /* + * Load TOC from function descriptor at offset 8. + * We can clobber r2 since we get called through a + * function pointer (so caller will save/restore r2). + */ + EMIT(PPC_RAW_LD(_R2, bpf_to_ppc(TMP_REG_2), 8)); + } else { + PPC_LI64(_R12, func); + EMIT(PPC_RAW_MTCTR(_R12)); + } + EMIT(PPC_RAW_BCTRL()); + /* + * Load r2 with kernel TOC as kernel TOC is used if function address falls + * within core kernel text. + */ + EMIT(PPC_RAW_LD(_R2, _R13, offsetof(struct paca_struct, kernel_toc))); } - EMIT(PPC_RAW_MTCTR(_R12)); - EMIT(PPC_RAW_BCTRL()); +#endif return 0; } diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 057ec2e345..d400fa391c 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -425,16 +425,6 @@ static char *memdup_to_str(char *maybe_str, int max_len, gfp_t gfp) return kasprintf(gfp, "%.*s", max_len, maybe_str); } -static ssize_t device_show_string(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct dev_ext_attribute *d; - - d = container_of(attr, struct dev_ext_attribute, attr); - - return sprintf(buf, "%s\n", (char *)d->var); -} - static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr, char *buf) { diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c index 8f75e9574c..8c1f3b629f 100644 --- a/arch/powerpc/platforms/512x/mpc512x_shared.c +++ b/arch/powerpc/platforms/512x/mpc512x_shared.c @@ -279,7 +279,7 @@ static void __init mpc512x_setup_diu(void) * and so negatively affect boot time. Instead we reserve the * already configured frame buffer area so that it won't be * destroyed. The starting address of the area to reserve and - * also it's length is passed to memblock_reserve(). It will be + * also its length is passed to memblock_reserve(). It will be * freed later on first open of fbdev, when splash image is not * needed any more. */ diff --git a/arch/powerpc/platforms/52xx/lite5200_sleep.S b/arch/powerpc/platforms/52xx/lite5200_sleep.S index 0b12647e7b..0ec2522ee4 100644 --- a/arch/powerpc/platforms/52xx/lite5200_sleep.S +++ b/arch/powerpc/platforms/52xx/lite5200_sleep.S @@ -203,7 +203,8 @@ lite5200_wakeup: /* HIDs, MSR */ LOAD_SPRN(HID1, 0x19) - LOAD_SPRN(HID2, 0x1a) + /* FIXME: Should this use HID2_G2_LE? */ + LOAD_SPRN(HID2_750FX, 0x1a) /* address translation is tricky (see turn_on_mmu) */ @@ -283,7 +284,8 @@ SYM_FUNC_START_LOCAL(save_regs) SAVE_SPRN(HID0, 0x18) SAVE_SPRN(HID1, 0x19) - SAVE_SPRN(HID2, 0x1a) + /* FIXME: Should this use HID2_G2_LE? */ + SAVE_SPRN(HID2_750FX, 0x1a) mfmsr r10 stw r10, (4*0x1b)(r4) /*SAVE_SPRN(LR, 0x1c) have to save it before the call */ diff --git a/arch/powerpc/platforms/52xx/mpc52xx_common.c b/arch/powerpc/platforms/52xx/mpc52xx_common.c index b4938e344f..253421ffb4 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_common.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_common.c @@ -12,12 +12,10 @@ #undef DEBUG -#include <linux/gpio.h> #include <linux/kernel.h> #include <linux/spinlock.h> #include <linux/of_address.h> #include <linux/of_platform.h> -#include <linux/of_gpio.h> #include <linux/export.h> #include <asm/io.h> #include <asm/mpc52xx.h> diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c index 581059527c..2bd6abcdc1 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c @@ -48,6 +48,7 @@ * the output mode. This driver does not change the output mode setting. */ +#include <linux/gpio/driver.h> #include <linux/irq.h> #include <linux/interrupt.h> #include <linux/io.h> @@ -56,7 +57,6 @@ #include <linux/of.h> #include <linux/of_address.h> #include <linux/of_irq.h> -#include <linux/of_gpio.h> #include <linux/platform_device.h> #include <linux/kernel.h> #include <linux/property.h> diff --git a/arch/powerpc/platforms/83xx/suspend-asm.S b/arch/powerpc/platforms/83xx/suspend-asm.S index bc6bd4d0ae..6a62ed6082 100644 --- a/arch/powerpc/platforms/83xx/suspend-asm.S +++ b/arch/powerpc/platforms/83xx/suspend-asm.S @@ -68,7 +68,8 @@ _GLOBAL(mpc83xx_enter_deep_sleep) mfspr r5, SPRN_HID0 mfspr r6, SPRN_HID1 - mfspr r7, SPRN_HID2 + /* FIXME: Should this use SPRN_HID2_G2_LE? */ + mfspr r7, SPRN_HID2_750FX stw r5, SS_HID+0(r3) stw r6, SS_HID+4(r3) @@ -396,7 +397,8 @@ mpc83xx_deep_resume: mtspr SPRN_HID0, r5 mtspr SPRN_HID1, r6 - mtspr SPRN_HID2, r7 + /* FIXME: Should this use SPRN_HID2_G2_LE? */ + mtspr SPRN_HID2_750FX, r7 lwz r4, SS_IABR+0(r3) lwz r5, SS_IABR+4(r3) diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index 1202a69b0a..4cd9c0de22 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -424,23 +424,6 @@ static void __init cell_iommu_setup_hardware(struct cbe_iommu *iommu, cell_iommu_enable_hardware(iommu); } -#if 0/* Unused for now */ -static struct iommu_window *find_window(struct cbe_iommu *iommu, - unsigned long offset, unsigned long size) -{ - struct iommu_window *window; - - /* todo: check for overlapping (but not equal) windows) */ - - list_for_each_entry(window, &(iommu->windows), list) { - if (window->offset == offset && window->size == size) - return window; - } - - return NULL; -} -#endif - static inline u32 cell_iommu_get_ioid(struct device_node *np) { const u32 *ioid; diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c index 30394c6f88..fee638fd89 100644 --- a/arch/powerpc/platforms/cell/smp.c +++ b/arch/powerpc/platforms/cell/smp.c @@ -54,6 +54,7 @@ static cpumask_t of_spin_map; /** * smp_startup_cpu() - start the given cpu + * @lcpu: Logical CPU ID of the CPU to be started. * * At boot time, there is nothing to do for primary threads which were * started from Open Firmware. For anything else, call RTAS with the diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index 02a8158c46..7f4e0db8eb 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -1704,23 +1704,11 @@ static int spufs_mfc_flush(struct file *file, fl_owner_t id) ret = spu_acquire(ctx); if (ret) - goto out; -#if 0 -/* this currently hangs */ - ret = spufs_wait(ctx->mfc_wq, - ctx->ops->set_mfc_query(ctx, ctx->tagwait, 2)); - if (ret) - goto out; - ret = spufs_wait(ctx->mfc_wq, - ctx->ops->read_mfc_tagstatus(ctx) == ctx->tagwait); - if (ret) - goto out; -#else - ret = 0; -#endif + return ret; + spu_release(ctx); -out: - return ret; + + return 0; } static int spufs_mfc_fsync(struct file *file, loff_t start, loff_t end, int datasync) diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 99bd027a7f..610ca85706 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -868,7 +868,7 @@ static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio) } /** - * spu_deactivate - unbind a context from it's physical spu + * spu_deactivate - unbind a context from its physical spu * @ctx: spu context to unbind * * Unbind @ctx from the physical spu it is running on and schedule diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c index b911b31717..b9ff37c7f6 100644 --- a/arch/powerpc/platforms/maple/pci.c +++ b/arch/powerpc/platforms/maple/pci.c @@ -595,7 +595,7 @@ void __init maple_pci_init(void) /* Probe root PCI hosts, that is on U3 the AGP host and the * HyperTransport host. That one is actually "kept" around - * and actually added last as it's resource management relies + * and actually added last as its resource management relies * on the AGP resources to have been setup first */ root = of_find_node_by_path("/"); diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c index 7135ea1d7d..2202bf77c7 100644 --- a/arch/powerpc/platforms/powermac/pic.c +++ b/arch/powerpc/platforms/powermac/pic.c @@ -2,7 +2,7 @@ /* * Support for the interrupt controllers found on Power Macintosh, * currently Apple's "Grand Central" interrupt controller in all - * it's incarnations. OpenPIC support used on newer machines is + * its incarnations. OpenPIC support used on newer machines is * in a separate file * * Copyright (C) 1997 Paul Mackerras (paulus@samba.org) diff --git a/arch/powerpc/platforms/powermac/sleep.S b/arch/powerpc/platforms/powermac/sleep.S index d497a60003..822ed70cdc 100644 --- a/arch/powerpc/platforms/powermac/sleep.S +++ b/arch/powerpc/platforms/powermac/sleep.S @@ -176,7 +176,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) * memory location containing the PC to resume from * at address 0. * - On Core99, we must store the wakeup vector at - * address 0x80 and eventually it's parameters + * address 0x80 and eventually its parameters * at address 0x84. I've have some trouble with those * parameters however and I no longer use them. */ diff --git a/arch/powerpc/platforms/powernv/opal-fadump.c b/arch/powerpc/platforms/powernv/opal-fadump.c index 964f464b1b..c9c1dfb354 100644 --- a/arch/powerpc/platforms/powernv/opal-fadump.c +++ b/arch/powerpc/platforms/powernv/opal-fadump.c @@ -513,8 +513,8 @@ out: final_note(note_buf); pr_debug("Updating elfcore header (%llx) with cpu notes\n", - fdh->elfcorehdr_addr); - fadump_update_elfcore_header(__va(fdh->elfcorehdr_addr)); + fadump_conf->elfcorehdr_addr); + fadump_update_elfcore_header((char *)fadump_conf->elfcorehdr_addr); return 0; } @@ -526,12 +526,7 @@ static int __init opal_fadump_process(struct fw_dump *fadump_conf) if (!opal_fdm_active || !fadump_conf->fadumphdr_addr) return rc; - /* Validate the fadump crash info header */ fdh = __va(fadump_conf->fadumphdr_addr); - if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) { - pr_err("Crash info header is not valid.\n"); - return rc; - } #ifdef CONFIG_OPAL_CORE /* @@ -545,18 +540,7 @@ static int __init opal_fadump_process(struct fw_dump *fadump_conf) kernel_initiated = true; #endif - rc = opal_fadump_build_cpu_notes(fadump_conf, fdh); - if (rc) - return rc; - - /* - * We are done validating dump info and elfcore header is now ready - * to be exported. set elfcorehdr_addr so that vmcore module will - * export the elfcore header through '/proc/vmcore'. - */ - elfcorehdr_addr = fdh->elfcorehdr_addr; - - return rc; + return opal_fadump_build_cpu_notes(fadump_conf, fdh); } static void opal_fadump_region_show(struct fw_dump *fadump_conf, @@ -615,6 +599,12 @@ static void opal_fadump_trigger(struct fadump_crash_info_header *fdh, pr_emerg("No backend support for MPIPL!\n"); } +/* FADUMP_MAX_MEM_REGS or lower */ +static int opal_fadump_max_boot_mem_rgns(void) +{ + return FADUMP_MAX_MEM_REGS; +} + static struct fadump_ops opal_fadump_ops = { .fadump_init_mem_struct = opal_fadump_init_mem_struct, .fadump_get_metadata_size = opal_fadump_get_metadata_size, @@ -627,6 +617,7 @@ static struct fadump_ops opal_fadump_ops = { .fadump_process = opal_fadump_process, .fadump_region_show = opal_fadump_region_show, .fadump_trigger = opal_fadump_trigger, + .fadump_max_boot_mem_rgns = opal_fadump_max_boot_mem_rgns, }; void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) @@ -674,8 +665,10 @@ void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) } } - fadump_conf->ops = &opal_fadump_ops; - fadump_conf->fadump_supported = 1; + fadump_conf->ops = &opal_fadump_ops; + fadump_conf->fadump_supported = 1; + /* TODO: Add support to pass additional parameters */ + fadump_conf->param_area_supported = 0; /* * Firmware supports 32-bit field for size. Align it to PAGE_SIZE diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 45dd77e3cc..5d0f35bb91 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -792,14 +792,6 @@ static int __init opal_sysfs_init(void) return 0; } -static ssize_t export_attr_read(struct file *fp, struct kobject *kobj, - struct bin_attribute *bin_attr, char *buf, - loff_t off, size_t count) -{ - return memory_read_from_buffer(buf, count, &off, bin_attr->private, - bin_attr->size); -} - static int opal_add_one_export(struct kobject *parent, const char *export_name, struct device_node *np, const char *prop_name) { @@ -826,7 +818,7 @@ static int opal_add_one_export(struct kobject *parent, const char *export_name, sysfs_bin_attr_init(attr); attr->attr.name = name; attr->attr.mode = 0400; - attr->read = export_attr_read; + attr->read = sysfs_bin_attr_simple_read; attr->private = __va(vals[0]); attr->size = vals[1]; diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c index 59882da3e7..cc7b1dd54a 100644 --- a/arch/powerpc/platforms/powernv/pci-sriov.c +++ b/arch/powerpc/platforms/powernv/pci-sriov.c @@ -238,7 +238,7 @@ void pnv_pci_ioda_fixup_iov(struct pci_dev *pdev) } else if (pdev->is_physfn) { /* * For PFs adjust their allocated IOV resources to match what - * the PHB can support using it's M64 BAR table. + * the PHB can support using its M64 BAR table. */ pnv_pci_ioda_fixup_iov_resources(pdev); } @@ -658,7 +658,7 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) list_add_tail(&pe->list, &phb->ioda.pe_list); mutex_unlock(&phb->ioda.pe_list_mutex); - /* associate this pe to it's pdn */ + /* associate this pe to its pdn */ list_for_each_entry(vf_pdn, &pdn->parent->child_list, list) { if (vf_pdn->busno == vf_bus && vf_pdn->devfn == vf_devfn) { diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c index b664838008..5147df3a18 100644 --- a/arch/powerpc/platforms/powernv/vas-window.c +++ b/arch/powerpc/platforms/powernv/vas-window.c @@ -1059,7 +1059,7 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, } } else { /* - * Interrupt hanlder or fault window setup failed. Means + * Interrupt handler or fault window setup failed. Means * NX can not generate fault for page fault. So not * opening for user space tx window. */ diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c index 878bc16024..b18e1c92e5 100644 --- a/arch/powerpc/platforms/ps3/device-init.c +++ b/arch/powerpc/platforms/ps3/device-init.c @@ -770,49 +770,51 @@ static struct task_struct *probe_task; static int ps3_probe_thread(void *data) { - struct ps3_notification_device dev; + struct { + struct ps3_notification_device dev; + u8 buf[512]; + } *local; + struct ps3_notify_cmd *notify_cmd; + struct ps3_notify_event *notify_event; int res; unsigned int irq; u64 lpar; - void *buf; - struct ps3_notify_cmd *notify_cmd; - struct ps3_notify_event *notify_event; pr_debug(" -> %s:%u: kthread started\n", __func__, __LINE__); - buf = kzalloc(512, GFP_KERNEL); - if (!buf) + local = kzalloc(sizeof(*local), GFP_KERNEL); + if (!local) return -ENOMEM; - lpar = ps3_mm_phys_to_lpar(__pa(buf)); - notify_cmd = buf; - notify_event = buf; + lpar = ps3_mm_phys_to_lpar(__pa(&local->buf)); + notify_cmd = (struct ps3_notify_cmd *)&local->buf; + notify_event = (struct ps3_notify_event *)&local->buf; /* dummy system bus device */ - dev.sbd.bus_id = (u64)data; - dev.sbd.dev_id = PS3_NOTIFICATION_DEV_ID; - dev.sbd.interrupt_id = PS3_NOTIFICATION_INTERRUPT_ID; + local->dev.sbd.bus_id = (u64)data; + local->dev.sbd.dev_id = PS3_NOTIFICATION_DEV_ID; + local->dev.sbd.interrupt_id = PS3_NOTIFICATION_INTERRUPT_ID; - res = lv1_open_device(dev.sbd.bus_id, dev.sbd.dev_id, 0); + res = lv1_open_device(local->dev.sbd.bus_id, local->dev.sbd.dev_id, 0); if (res) { pr_err("%s:%u: lv1_open_device failed %s\n", __func__, __LINE__, ps3_result(res)); goto fail_free; } - res = ps3_sb_event_receive_port_setup(&dev.sbd, PS3_BINDING_CPU_ANY, - &irq); + res = ps3_sb_event_receive_port_setup(&local->dev.sbd, + PS3_BINDING_CPU_ANY, &irq); if (res) { pr_err("%s:%u: ps3_sb_event_receive_port_setup failed %d\n", __func__, __LINE__, res); goto fail_close_device; } - spin_lock_init(&dev.lock); - rcuwait_init(&dev.wait); + spin_lock_init(&local->dev.lock); + rcuwait_init(&local->dev.wait); res = request_irq(irq, ps3_notification_interrupt, 0, - "ps3_notification", &dev); + "ps3_notification", &local->dev); if (res) { pr_err("%s:%u: request_irq failed %d\n", __func__, __LINE__, res); @@ -823,7 +825,7 @@ static int ps3_probe_thread(void *data) notify_cmd->operation_code = 0; /* must be zero */ notify_cmd->event_mask = 1UL << notify_region_probe; - res = ps3_notification_read_write(&dev, lpar, 1); + res = ps3_notification_read_write(&local->dev, lpar, 1); if (res) goto fail_free_irq; @@ -834,36 +836,37 @@ static int ps3_probe_thread(void *data) memset(notify_event, 0, sizeof(*notify_event)); - res = ps3_notification_read_write(&dev, lpar, 0); + res = ps3_notification_read_write(&local->dev, lpar, 0); if (res) break; pr_debug("%s:%u: notify event type 0x%llx bus id %llu dev id %llu" " type %llu port %llu\n", __func__, __LINE__, - notify_event->event_type, notify_event->bus_id, - notify_event->dev_id, notify_event->dev_type, - notify_event->dev_port); + notify_event->event_type, notify_event->bus_id, + notify_event->dev_id, notify_event->dev_type, + notify_event->dev_port); if (notify_event->event_type != notify_region_probe || - notify_event->bus_id != dev.sbd.bus_id) { + notify_event->bus_id != local->dev.sbd.bus_id) { pr_warn("%s:%u: bad notify_event: event %llu, dev_id %llu, dev_type %llu\n", __func__, __LINE__, notify_event->event_type, notify_event->dev_id, notify_event->dev_type); continue; } - ps3_find_and_add_device(dev.sbd.bus_id, notify_event->dev_id); + ps3_find_and_add_device(local->dev.sbd.bus_id, + notify_event->dev_id); } while (!kthread_should_stop()); fail_free_irq: - free_irq(irq, &dev); + free_irq(irq, &local->dev); fail_sb_event_receive_port_destroy: - ps3_sb_event_receive_port_destroy(&dev.sbd, irq); + ps3_sb_event_receive_port_destroy(&local->dev.sbd, irq); fail_close_device: - lv1_close_device(dev.sbd.bus_id, dev.sbd.dev_id); + lv1_close_device(local->dev.sbd.bus_id, local->dev.sbd.dev_id); fail_free: - kfree(buf); + kfree(local); probe_task = NULL; diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c index d6b5f5ecd5..56dc6b29a3 100644 --- a/arch/powerpc/platforms/ps3/system-bus.c +++ b/arch/powerpc/platforms/ps3/system-bus.c @@ -695,7 +695,7 @@ static const struct dma_map_ops ps3_sb_dma_ops = { .unmap_page = ps3_unmap_page, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, - .alloc_pages = dma_common_alloc_pages, + .alloc_pages_op = dma_common_alloc_pages, .free_pages = dma_common_free_pages, }; @@ -709,7 +709,7 @@ static const struct dma_map_ops ps3_ioc0_dma_ops = { .unmap_page = ps3_unmap_page, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, - .alloc_pages = dma_common_alloc_pages, + .alloc_pages_op = dma_common_alloc_pages, .free_pages = dma_common_free_pages, }; diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index f936962a29..7bf506f6b8 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -1,5 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) ccflags-$(CONFIG_PPC_PSERIES_DEBUG) += -DDEBUG obj-y := lpar.o hvCall.o nvram.o reconfig.o \ diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index 0ed56e5627..62da20f970 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -170,20 +170,24 @@ out: kfree(buf); } -static unsigned h_pic(unsigned long *pool_idle_time, - unsigned long *num_procs) +static long h_pic(unsigned long *pool_idle_time, + unsigned long *num_procs) { - unsigned long rc; - unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + long rc; + unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = {0}; rc = plpar_hcall(H_PIC, retbuf); - *pool_idle_time = retbuf[0]; - *num_procs = retbuf[1]; + if (pool_idle_time) + *pool_idle_time = retbuf[0]; + if (num_procs) + *num_procs = retbuf[1]; return rc; } +unsigned long boot_pool_idle_time; + /* * parse_ppp_data * Parse out the data returned from h_get_ppp and h_pic @@ -215,9 +219,15 @@ static void parse_ppp_data(struct seq_file *m) seq_printf(m, "pool_capacity=%d\n", ppp_data.active_procs_in_pool * 100); - h_pic(&pool_idle_time, &pool_procs); - seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time); - seq_printf(m, "pool_num_procs=%ld\n", pool_procs); + /* In case h_pic call is not successful, this would result in + * APP values being wrong in tools like lparstat. + */ + + if (h_pic(&pool_idle_time, &pool_procs) == H_SUCCESS) { + seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time); + seq_printf(m, "pool_num_procs=%ld\n", pool_procs); + seq_printf(m, "boot_pool_idle_time=%ld\n", boot_pool_idle_time); + } } seq_printf(m, "unallocated_capacity_weight=%d\n", @@ -792,6 +802,7 @@ static const struct proc_ops lparcfg_proc_ops = { static int __init lparcfg_init(void) { umode_t mode = 0444; + long retval; /* Allow writing if we have FW_FEATURE_SPLPAR */ if (firmware_has_feature(FW_FEATURE_SPLPAR)) @@ -801,6 +812,16 @@ static int __init lparcfg_init(void) printk(KERN_ERR "Failed to create powerpc/lparcfg\n"); return -EIO; } + + /* If this call fails, it would result in APP values + * being wrong for since boot reports of lparstat + */ + retval = h_pic(&boot_pool_idle_time, NULL); + + if (retval != H_SUCCESS) + pr_debug("H_PIC failed during lparcfg init retval: %ld\n", + retval); + return 0; } machine_device_initcall(pseries, lparcfg_init); diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index c233f9db03..9b6420eb35 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -16,7 +16,8 @@ #include <linux/nd.h> #include <asm/plpar_wrappers.h> -#include <asm/papr_pdsm.h> +#include <uapi/linux/papr_pdsm.h> +#include <linux/papr_scm.h> #include <asm/mce.h> #include <asm/unaligned.h> #include <linux/perf_event.h> @@ -29,46 +30,6 @@ (1ul << ND_CMD_SET_CONFIG_DATA) | \ (1ul << ND_CMD_CALL)) -/* DIMM health bitmap indicators */ -/* SCM device is unable to persist memory contents */ -#define PAPR_PMEM_UNARMED (1ULL << (63 - 0)) -/* SCM device failed to persist memory contents */ -#define PAPR_PMEM_SHUTDOWN_DIRTY (1ULL << (63 - 1)) -/* SCM device contents are persisted from previous IPL */ -#define PAPR_PMEM_SHUTDOWN_CLEAN (1ULL << (63 - 2)) -/* SCM device contents are not persisted from previous IPL */ -#define PAPR_PMEM_EMPTY (1ULL << (63 - 3)) -/* SCM device memory life remaining is critically low */ -#define PAPR_PMEM_HEALTH_CRITICAL (1ULL << (63 - 4)) -/* SCM device will be garded off next IPL due to failure */ -#define PAPR_PMEM_HEALTH_FATAL (1ULL << (63 - 5)) -/* SCM contents cannot persist due to current platform health status */ -#define PAPR_PMEM_HEALTH_UNHEALTHY (1ULL << (63 - 6)) -/* SCM device is unable to persist memory contents in certain conditions */ -#define PAPR_PMEM_HEALTH_NON_CRITICAL (1ULL << (63 - 7)) -/* SCM device is encrypted */ -#define PAPR_PMEM_ENCRYPTED (1ULL << (63 - 8)) -/* SCM device has been scrubbed and locked */ -#define PAPR_PMEM_SCRUBBED_AND_LOCKED (1ULL << (63 - 9)) - -/* Bits status indicators for health bitmap indicating unarmed dimm */ -#define PAPR_PMEM_UNARMED_MASK (PAPR_PMEM_UNARMED | \ - PAPR_PMEM_HEALTH_UNHEALTHY) - -/* Bits status indicators for health bitmap indicating unflushed dimm */ -#define PAPR_PMEM_BAD_SHUTDOWN_MASK (PAPR_PMEM_SHUTDOWN_DIRTY) - -/* Bits status indicators for health bitmap indicating unrestored dimm */ -#define PAPR_PMEM_BAD_RESTORE_MASK (PAPR_PMEM_EMPTY) - -/* Bit status indicators for smart event notification */ -#define PAPR_PMEM_SMART_EVENT_MASK (PAPR_PMEM_HEALTH_CRITICAL | \ - PAPR_PMEM_HEALTH_FATAL | \ - PAPR_PMEM_HEALTH_UNHEALTHY) - -#define PAPR_SCM_PERF_STATS_EYECATCHER __stringify(SCMSTATS) -#define PAPR_SCM_PERF_STATS_VERSION 0x1 - /* Struct holding a single performance metric */ struct papr_scm_perf_stat { u8 stat_id[8]; diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c index 1772ae3d19..6dbc73eb2c 100644 --- a/arch/powerpc/platforms/pseries/pci.c +++ b/arch/powerpc/platforms/pseries/pci.c @@ -18,33 +18,6 @@ #include <asm/pci.h> #include "pseries.h" -#if 0 -void pcibios_name_device(struct pci_dev *dev) -{ - struct device_node *dn; - - /* - * Add IBM loc code (slot) as a prefix to the device names for service - */ - dn = pci_device_to_OF_node(dev); - if (dn) { - const char *loc_code = of_get_property(dn, "ibm,loc-code", - NULL); - if (loc_code) { - int loc_len = strlen(loc_code); - if (loc_len < sizeof(dev->dev.name)) { - memmove(dev->dev.name+loc_len+1, dev->dev.name, - sizeof(dev->dev.name)-loc_len-1); - memcpy(dev->dev.name, loc_code, loc_len); - dev->dev.name[loc_len] = ' '; - dev->dev.name[sizeof(dev->dev.name)-1] = '\0'; - } - } - } -} -DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_name_device); -#endif - #ifdef CONFIG_PCI_IOV #define MAX_VFS_FOR_MAP_PE 256 struct pe_map_bar_entry { diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.c b/arch/powerpc/platforms/pseries/rtas-fadump.c index b5853e9fcc..eceb328938 100644 --- a/arch/powerpc/platforms/pseries/rtas-fadump.c +++ b/arch/powerpc/platforms/pseries/rtas-fadump.c @@ -18,6 +18,7 @@ #include <asm/page.h> #include <asm/rtas.h> +#include <asm/setup.h> #include <asm/fadump.h> #include <asm/fadump-internal.h> @@ -29,9 +30,6 @@ static const struct rtas_fadump_mem_struct *fdm_active; static void rtas_fadump_update_config(struct fw_dump *fadump_conf, const struct rtas_fadump_mem_struct *fdm) { - fadump_conf->boot_mem_dest_addr = - be64_to_cpu(fdm->rmr_region.destination_address); - fadump_conf->fadumphdr_addr = (fadump_conf->boot_mem_dest_addr + fadump_conf->boot_memory_size); } @@ -43,20 +41,56 @@ static void rtas_fadump_update_config(struct fw_dump *fadump_conf, static void __init rtas_fadump_get_config(struct fw_dump *fadump_conf, const struct rtas_fadump_mem_struct *fdm) { - fadump_conf->boot_mem_addr[0] = - be64_to_cpu(fdm->rmr_region.source_address); - fadump_conf->boot_mem_sz[0] = be64_to_cpu(fdm->rmr_region.source_len); - fadump_conf->boot_memory_size = fadump_conf->boot_mem_sz[0]; + unsigned long base, size, last_end, hole_size; - fadump_conf->boot_mem_top = fadump_conf->boot_memory_size; - fadump_conf->boot_mem_regs_cnt = 1; + last_end = 0; + hole_size = 0; + fadump_conf->boot_memory_size = 0; + fadump_conf->boot_mem_regs_cnt = 0; + pr_debug("Boot memory regions:\n"); + for (int i = 0; i < be16_to_cpu(fdm->header.dump_num_sections); i++) { + int type = be16_to_cpu(fdm->rgn[i].source_data_type); + u64 addr; - /* - * Start address of reserve dump area (permanent reservation) for - * re-registering FADump after dump capture. - */ - fadump_conf->reserve_dump_area_start = - be64_to_cpu(fdm->cpu_state_data.destination_address); + switch (type) { + case RTAS_FADUMP_CPU_STATE_DATA: + addr = be64_to_cpu(fdm->rgn[i].destination_address); + + fadump_conf->cpu_state_dest_vaddr = (u64)__va(addr); + /* + * Start address of reserve dump area (permanent reservation) for + * re-registering FADump after dump capture. + */ + fadump_conf->reserve_dump_area_start = addr; + break; + case RTAS_FADUMP_HPTE_REGION: + /* Not processed currently. */ + break; + case RTAS_FADUMP_REAL_MODE_REGION: + base = be64_to_cpu(fdm->rgn[i].source_address); + size = be64_to_cpu(fdm->rgn[i].source_len); + pr_debug("\t[%03d] base: 0x%lx, size: 0x%lx\n", i, base, size); + if (!base) { + fadump_conf->boot_mem_dest_addr = + be64_to_cpu(fdm->rgn[i].destination_address); + } + + fadump_conf->boot_mem_addr[fadump_conf->boot_mem_regs_cnt] = base; + fadump_conf->boot_mem_sz[fadump_conf->boot_mem_regs_cnt] = size; + fadump_conf->boot_memory_size += size; + hole_size += (base - last_end); + last_end = base + size; + fadump_conf->boot_mem_regs_cnt++; + break; + case RTAS_FADUMP_PARAM_AREA: + fadump_conf->param_area = be64_to_cpu(fdm->rgn[i].destination_address); + break; + default: + pr_warn("Section type %d unsupported on this kernel. Ignoring!\n", type); + break; + } + } + fadump_conf->boot_mem_top = fadump_conf->boot_memory_size + hole_size; rtas_fadump_update_config(fadump_conf, fdm); } @@ -64,16 +98,15 @@ static void __init rtas_fadump_get_config(struct fw_dump *fadump_conf, static u64 rtas_fadump_init_mem_struct(struct fw_dump *fadump_conf) { u64 addr = fadump_conf->reserve_dump_area_start; + u16 sec_cnt = 0; memset(&fdm, 0, sizeof(struct rtas_fadump_mem_struct)); addr = addr & PAGE_MASK; fdm.header.dump_format_version = cpu_to_be32(0x00000001); - fdm.header.dump_num_sections = cpu_to_be16(3); fdm.header.dump_status_flag = 0; fdm.header.offset_first_dump_section = - cpu_to_be32((u32)offsetof(struct rtas_fadump_mem_struct, - cpu_state_data)); + cpu_to_be32((u32)offsetof(struct rtas_fadump_mem_struct, rgn)); /* * Fields for disk dump option. @@ -89,25 +122,22 @@ static u64 rtas_fadump_init_mem_struct(struct fw_dump *fadump_conf) /* Kernel dump sections */ /* cpu state data section. */ - fdm.cpu_state_data.request_flag = - cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG); - fdm.cpu_state_data.source_data_type = - cpu_to_be16(RTAS_FADUMP_CPU_STATE_DATA); - fdm.cpu_state_data.source_address = 0; - fdm.cpu_state_data.source_len = - cpu_to_be64(fadump_conf->cpu_state_data_size); - fdm.cpu_state_data.destination_address = cpu_to_be64(addr); + fdm.rgn[sec_cnt].request_flag = cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG); + fdm.rgn[sec_cnt].source_data_type = cpu_to_be16(RTAS_FADUMP_CPU_STATE_DATA); + fdm.rgn[sec_cnt].source_address = 0; + fdm.rgn[sec_cnt].source_len = cpu_to_be64(fadump_conf->cpu_state_data_size); + fdm.rgn[sec_cnt].destination_address = cpu_to_be64(addr); addr += fadump_conf->cpu_state_data_size; + sec_cnt++; /* hpte region section */ - fdm.hpte_region.request_flag = cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG); - fdm.hpte_region.source_data_type = - cpu_to_be16(RTAS_FADUMP_HPTE_REGION); - fdm.hpte_region.source_address = 0; - fdm.hpte_region.source_len = - cpu_to_be64(fadump_conf->hpte_region_size); - fdm.hpte_region.destination_address = cpu_to_be64(addr); + fdm.rgn[sec_cnt].request_flag = cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG); + fdm.rgn[sec_cnt].source_data_type = cpu_to_be16(RTAS_FADUMP_HPTE_REGION); + fdm.rgn[sec_cnt].source_address = 0; + fdm.rgn[sec_cnt].source_len = cpu_to_be64(fadump_conf->hpte_region_size); + fdm.rgn[sec_cnt].destination_address = cpu_to_be64(addr); addr += fadump_conf->hpte_region_size; + sec_cnt++; /* * Align boot memory area destination address to page boundary to @@ -115,14 +145,29 @@ static u64 rtas_fadump_init_mem_struct(struct fw_dump *fadump_conf) */ addr = PAGE_ALIGN(addr); - /* RMA region section */ - fdm.rmr_region.request_flag = cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG); - fdm.rmr_region.source_data_type = - cpu_to_be16(RTAS_FADUMP_REAL_MODE_REGION); - fdm.rmr_region.source_address = cpu_to_be64(0); - fdm.rmr_region.source_len = cpu_to_be64(fadump_conf->boot_memory_size); - fdm.rmr_region.destination_address = cpu_to_be64(addr); - addr += fadump_conf->boot_memory_size; + /* First boot memory region destination address */ + fadump_conf->boot_mem_dest_addr = addr; + for (int i = 0; i < fadump_conf->boot_mem_regs_cnt; i++) { + /* Boot memory regions */ + fdm.rgn[sec_cnt].request_flag = cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG); + fdm.rgn[sec_cnt].source_data_type = cpu_to_be16(RTAS_FADUMP_REAL_MODE_REGION); + fdm.rgn[sec_cnt].source_address = cpu_to_be64(fadump_conf->boot_mem_addr[i]); + fdm.rgn[sec_cnt].source_len = cpu_to_be64(fadump_conf->boot_mem_sz[i]); + fdm.rgn[sec_cnt].destination_address = cpu_to_be64(addr); + addr += fadump_conf->boot_mem_sz[i]; + sec_cnt++; + } + + /* Parameters area */ + if (fadump_conf->param_area) { + fdm.rgn[sec_cnt].request_flag = cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG); + fdm.rgn[sec_cnt].source_data_type = cpu_to_be16(RTAS_FADUMP_PARAM_AREA); + fdm.rgn[sec_cnt].source_address = cpu_to_be64(fadump_conf->param_area); + fdm.rgn[sec_cnt].source_len = cpu_to_be64(COMMAND_LINE_SIZE); + fdm.rgn[sec_cnt].destination_address = cpu_to_be64(fadump_conf->param_area); + sec_cnt++; + } + fdm.header.dump_num_sections = cpu_to_be16(sec_cnt); rtas_fadump_update_config(fadump_conf, &fdm); @@ -136,14 +181,21 @@ static u64 rtas_fadump_get_bootmem_min(void) static int rtas_fadump_register(struct fw_dump *fadump_conf) { - unsigned int wait_time; + unsigned int wait_time, fdm_size; int rc, err = -EIO; + /* + * Platform requires the exact size of the Dump Memory Structure. + * Avoid including any unused rgns in the calculation, as this + * could result in a parameter error (-3) from the platform. + */ + fdm_size = sizeof(struct rtas_fadump_section_header); + fdm_size += be16_to_cpu(fdm.header.dump_num_sections) * sizeof(struct rtas_fadump_section); + /* TODO: Add upper time limit for the delay */ do { rc = rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1, - NULL, FADUMP_REGISTER, &fdm, - sizeof(struct rtas_fadump_mem_struct)); + NULL, FADUMP_REGISTER, &fdm, fdm_size); wait_time = rtas_busy_delay_time(rc); if (wait_time) @@ -161,9 +213,7 @@ static int rtas_fadump_register(struct fw_dump *fadump_conf) pr_err("Failed to register. Hardware Error(%d).\n", rc); break; case -3: - if (!is_fadump_boot_mem_contiguous()) - pr_err("Can't have holes in boot memory area.\n"); - else if (!is_fadump_reserved_mem_contiguous()) + if (!is_fadump_reserved_mem_contiguous()) pr_err("Can't have holes in reserved memory area.\n"); pr_err("Failed to register. Parameter Error(%d).\n", rc); @@ -316,11 +366,9 @@ static int __init rtas_fadump_build_cpu_notes(struct fw_dump *fadump_conf) u32 num_cpus, *note_buf; int i, rc = 0, cpu = 0; struct pt_regs regs; - unsigned long addr; void *vaddr; - addr = be64_to_cpu(fdm_active->cpu_state_data.destination_address); - vaddr = __va(addr); + vaddr = (void *)fadump_conf->cpu_state_dest_vaddr; reg_header = vaddr; if (be64_to_cpu(reg_header->magic_number) != @@ -375,11 +423,8 @@ static int __init rtas_fadump_build_cpu_notes(struct fw_dump *fadump_conf) } final_note(note_buf); - if (fdh) { - pr_debug("Updating elfcore header (%llx) with cpu notes\n", - fdh->elfcorehdr_addr); - fadump_update_elfcore_header(__va(fdh->elfcorehdr_addr)); - } + pr_debug("Updating elfcore header (%llx) with cpu notes\n", fadump_conf->elfcorehdr_addr); + fadump_update_elfcore_header((char *)fadump_conf->elfcorehdr_addr); return 0; error_out: @@ -389,57 +434,66 @@ error_out: } /* - * Validate and process the dump data stored by firmware before exporting - * it through '/proc/vmcore'. + * Validate and process the dump data stored by the firmware, and update + * the CPU notes of elfcorehdr. */ static int __init rtas_fadump_process(struct fw_dump *fadump_conf) { - struct fadump_crash_info_header *fdh; - int rc = 0; - if (!fdm_active || !fadump_conf->fadumphdr_addr) return -EINVAL; /* Check if the dump data is valid. */ - if ((be16_to_cpu(fdm_active->header.dump_status_flag) == - RTAS_FADUMP_ERROR_FLAG) || - (fdm_active->cpu_state_data.error_flags != 0) || - (fdm_active->rmr_region.error_flags != 0)) { - pr_err("Dump taken by platform is not valid\n"); - return -EINVAL; - } - if ((fdm_active->rmr_region.bytes_dumped != - fdm_active->rmr_region.source_len) || - !fdm_active->cpu_state_data.bytes_dumped) { - pr_err("Dump taken by platform is incomplete\n"); - return -EINVAL; - } + for (int i = 0; i < be16_to_cpu(fdm_active->header.dump_num_sections); i++) { + int type = be16_to_cpu(fdm_active->rgn[i].source_data_type); + int rc = 0; - /* Validate the fadump crash info header */ - fdh = __va(fadump_conf->fadumphdr_addr); - if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) { - pr_err("Crash info header is not valid.\n"); - return -EINVAL; + switch (type) { + case RTAS_FADUMP_CPU_STATE_DATA: + case RTAS_FADUMP_HPTE_REGION: + case RTAS_FADUMP_REAL_MODE_REGION: + if (fdm_active->rgn[i].error_flags != 0) { + pr_err("Dump taken by platform is not valid (%d)\n", i); + rc = -EINVAL; + } + if (fdm_active->rgn[i].bytes_dumped != fdm_active->rgn[i].source_len) { + pr_err("Dump taken by platform is incomplete (%d)\n", i); + rc = -EINVAL; + } + if (rc) { + pr_warn("Region type: %u src addr: 0x%llx dest addr: 0x%llx\n", + be16_to_cpu(fdm_active->rgn[i].source_data_type), + be64_to_cpu(fdm_active->rgn[i].source_address), + be64_to_cpu(fdm_active->rgn[i].destination_address)); + return rc; + } + break; + case RTAS_FADUMP_PARAM_AREA: + if (fdm_active->rgn[i].bytes_dumped != fdm_active->rgn[i].source_len || + fdm_active->rgn[i].error_flags != 0) { + pr_warn("Failed to process additional parameters! Proceeding anyway..\n"); + fadump_conf->param_area = 0; + } + break; + default: + /* + * If the first/crashed kernel added a new region type that the + * second/fadump kernel doesn't recognize, skip it and process + * assuming backward compatibility. + */ + pr_warn("Unknown region found: type: %u src addr: 0x%llx dest addr: 0x%llx\n", + be16_to_cpu(fdm_active->rgn[i].source_data_type), + be64_to_cpu(fdm_active->rgn[i].source_address), + be64_to_cpu(fdm_active->rgn[i].destination_address)); + break; + } } - rc = rtas_fadump_build_cpu_notes(fadump_conf); - if (rc) - return rc; - - /* - * We are done validating dump info and elfcore header is now ready - * to be exported. set elfcorehdr_addr so that vmcore module will - * export the elfcore header through '/proc/vmcore'. - */ - elfcorehdr_addr = fdh->elfcorehdr_addr; - - return 0; + return rtas_fadump_build_cpu_notes(fadump_conf); } static void rtas_fadump_region_show(struct fw_dump *fadump_conf, struct seq_file *m) { - const struct rtas_fadump_section *cpu_data_section; const struct rtas_fadump_mem_struct *fdm_ptr; if (fdm_active) @@ -447,27 +501,49 @@ static void rtas_fadump_region_show(struct fw_dump *fadump_conf, else fdm_ptr = &fdm; - cpu_data_section = &(fdm_ptr->cpu_state_data); - seq_printf(m, "CPU :[%#016llx-%#016llx] %#llx bytes, Dumped: %#llx\n", - be64_to_cpu(cpu_data_section->destination_address), - be64_to_cpu(cpu_data_section->destination_address) + - be64_to_cpu(cpu_data_section->source_len) - 1, - be64_to_cpu(cpu_data_section->source_len), - be64_to_cpu(cpu_data_section->bytes_dumped)); - - seq_printf(m, "HPTE:[%#016llx-%#016llx] %#llx bytes, Dumped: %#llx\n", - be64_to_cpu(fdm_ptr->hpte_region.destination_address), - be64_to_cpu(fdm_ptr->hpte_region.destination_address) + - be64_to_cpu(fdm_ptr->hpte_region.source_len) - 1, - be64_to_cpu(fdm_ptr->hpte_region.source_len), - be64_to_cpu(fdm_ptr->hpte_region.bytes_dumped)); - - seq_printf(m, "DUMP: Src: %#016llx, Dest: %#016llx, ", - be64_to_cpu(fdm_ptr->rmr_region.source_address), - be64_to_cpu(fdm_ptr->rmr_region.destination_address)); - seq_printf(m, "Size: %#llx, Dumped: %#llx bytes\n", - be64_to_cpu(fdm_ptr->rmr_region.source_len), - be64_to_cpu(fdm_ptr->rmr_region.bytes_dumped)); + + for (int i = 0; i < be16_to_cpu(fdm_ptr->header.dump_num_sections); i++) { + int type = be16_to_cpu(fdm_ptr->rgn[i].source_data_type); + + switch (type) { + case RTAS_FADUMP_CPU_STATE_DATA: + seq_printf(m, "CPU :[%#016llx-%#016llx] %#llx bytes, Dumped: %#llx\n", + be64_to_cpu(fdm_ptr->rgn[i].destination_address), + be64_to_cpu(fdm_ptr->rgn[i].destination_address) + + be64_to_cpu(fdm_ptr->rgn[i].source_len) - 1, + be64_to_cpu(fdm_ptr->rgn[i].source_len), + be64_to_cpu(fdm_ptr->rgn[i].bytes_dumped)); + break; + case RTAS_FADUMP_HPTE_REGION: + seq_printf(m, "HPTE:[%#016llx-%#016llx] %#llx bytes, Dumped: %#llx\n", + be64_to_cpu(fdm_ptr->rgn[i].destination_address), + be64_to_cpu(fdm_ptr->rgn[i].destination_address) + + be64_to_cpu(fdm_ptr->rgn[i].source_len) - 1, + be64_to_cpu(fdm_ptr->rgn[i].source_len), + be64_to_cpu(fdm_ptr->rgn[i].bytes_dumped)); + break; + case RTAS_FADUMP_REAL_MODE_REGION: + seq_printf(m, "DUMP: Src: %#016llx, Dest: %#016llx, ", + be64_to_cpu(fdm_ptr->rgn[i].source_address), + be64_to_cpu(fdm_ptr->rgn[i].destination_address)); + seq_printf(m, "Size: %#llx, Dumped: %#llx bytes\n", + be64_to_cpu(fdm_ptr->rgn[i].source_len), + be64_to_cpu(fdm_ptr->rgn[i].bytes_dumped)); + break; + case RTAS_FADUMP_PARAM_AREA: + seq_printf(m, "\n[%#016llx-%#016llx]: cmdline append: '%s'\n", + be64_to_cpu(fdm_ptr->rgn[i].destination_address), + be64_to_cpu(fdm_ptr->rgn[i].destination_address) + + be64_to_cpu(fdm_ptr->rgn[i].source_len) - 1, + (char *)__va(be64_to_cpu(fdm_ptr->rgn[i].destination_address))); + break; + default: + seq_printf(m, "Unknown region type %d : Src: %#016llx, Dest: %#016llx, ", + type, be64_to_cpu(fdm_ptr->rgn[i].source_address), + be64_to_cpu(fdm_ptr->rgn[i].destination_address)); + break; + } + } /* Dump is active. Show preserved area start address. */ if (fdm_active) { @@ -483,6 +559,20 @@ static void rtas_fadump_trigger(struct fadump_crash_info_header *fdh, rtas_os_term((char *)msg); } +/* FADUMP_MAX_MEM_REGS or lower */ +static int rtas_fadump_max_boot_mem_rgns(void) +{ + /* + * Version 1 of Kernel Assisted Dump Memory Structure (PAPR) supports 10 sections. + * With one each section taken for CPU state data & HPTE respectively, 8 sections + * can be used for boot memory regions. + * + * If new region(s) is(are) defined, maximum boot memory regions will decrease + * proportionally. + */ + return RTAS_FADUMP_MAX_BOOT_MEM_REGS; +} + static struct fadump_ops rtas_fadump_ops = { .fadump_init_mem_struct = rtas_fadump_init_mem_struct, .fadump_get_bootmem_min = rtas_fadump_get_bootmem_min, @@ -492,6 +582,7 @@ static struct fadump_ops rtas_fadump_ops = { .fadump_process = rtas_fadump_process, .fadump_region_show = rtas_fadump_region_show, .fadump_trigger = rtas_fadump_trigger, + .fadump_max_boot_mem_rgns = rtas_fadump_max_boot_mem_rgns, }; void __init rtas_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) @@ -508,9 +599,10 @@ void __init rtas_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) if (!token) return; - fadump_conf->ibm_configure_kernel_dump = be32_to_cpu(*token); - fadump_conf->ops = &rtas_fadump_ops; - fadump_conf->fadump_supported = 1; + fadump_conf->ibm_configure_kernel_dump = be32_to_cpu(*token); + fadump_conf->ops = &rtas_fadump_ops; + fadump_conf->fadump_supported = 1; + fadump_conf->param_area_supported = 1; /* Firmware supports 64-bit value for size, align it to pagesize. */ fadump_conf->max_copy_size = ALIGN_DOWN(U64_MAX, PAGE_SIZE); diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.h b/arch/powerpc/platforms/pseries/rtas-fadump.h index fd59bd7ca9..c109abf6be 100644 --- a/arch/powerpc/platforms/pseries/rtas-fadump.h +++ b/arch/powerpc/platforms/pseries/rtas-fadump.h @@ -23,12 +23,24 @@ #define RTAS_FADUMP_HPTE_REGION 0x0002 #define RTAS_FADUMP_REAL_MODE_REGION 0x0011 +/* OS defined sections */ +#define RTAS_FADUMP_PARAM_AREA 0x0100 + /* Dump request flag */ #define RTAS_FADUMP_REQUEST_FLAG 0x00000001 /* Dump status flag */ #define RTAS_FADUMP_ERROR_FLAG 0x2000 +/* + * The Firmware Assisted Dump Memory structure supports a maximum of 10 sections + * in the dump memory structure. Presently, three sections are used for + * CPU state data, HPTE & Parameters area, while the remaining seven sections + * can be used for boot memory regions. + */ +#define MAX_SECTIONS 10 +#define RTAS_FADUMP_MAX_BOOT_MEM_REGS 7 + /* Kernel Dump section info */ struct rtas_fadump_section { __be32 request_flag; @@ -61,20 +73,15 @@ struct rtas_fadump_section_header { * Firmware Assisted dump memory structure. This structure is required for * registering future kernel dump with power firmware through rtas call. * - * No disk dump option. Hence disk dump path string section is not included. + * In version 1, the platform permits one section header, dump-disk path + * and ten sections. + * + * Note: No disk dump option. Hence disk dump path string section is not + * included. */ struct rtas_fadump_mem_struct { struct rtas_fadump_section_header header; - - /* Kernel dump sections */ - struct rtas_fadump_section cpu_state_data; - struct rtas_fadump_section hpte_region; - - /* - * TODO: Extend multiple boot memory regions support in the kernel - * for this platform. - */ - struct rtas_fadump_section rmr_region; + struct rtas_fadump_section rgn[MAX_SECTIONS]; }; /* diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index 71d52a670d..ba3fb7a7f2 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -228,7 +228,7 @@ static irqreturn_t pseries_vas_irq_handler(int irq, void *data) struct pseries_vas_window *txwin = data; /* - * The thread hanlder will process this interrupt if it is + * The thread handler will process this interrupt if it is * already running. */ atomic_inc(&txwin->pending_faults); diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index 90ff85c879..36d1c7d415 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -611,7 +611,7 @@ static const struct dma_map_ops vio_dma_mapping_ops = { .get_required_mask = dma_iommu_get_required_mask, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, - .alloc_pages = dma_common_alloc_pages, + .alloc_pages_op = dma_common_alloc_pages, .free_pages = dma_common_free_pages, }; @@ -1592,13 +1592,9 @@ static int vio_hotplug(const struct device *dev, struct kobj_uevent_env *env) const char *cp; dn = dev->of_node; - if (!dn) - return -ENODEV; - cp = of_get_property(dn, "compatible", NULL); - if (!cp) - return -ENODEV; + if (dn && (cp = of_get_property(dn, "compatible", NULL))) + add_uevent_var(env, "MODALIAS=vio:T%sS%s", vio_dev->type, cp); - add_uevent_var(env, "MODALIAS=vio:T%sS%s", vio_dev->type, cp); return 0; } diff --git a/arch/powerpc/purgatory/Makefile b/arch/powerpc/purgatory/Makefile index 78473d69cd..e989008595 100644 --- a/arch/powerpc/purgatory/Makefile +++ b/arch/powerpc/purgatory/Makefile @@ -1,8 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -KASAN_SANITIZE := n -KCSAN_SANITIZE := n - targets += trampoline_$(BITS).o purgatory.ro # When profile-guided optimization is enabled, llvm emits two different diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile index 9cb1d02951..24a177d164 100644 --- a/arch/powerpc/sysdev/Makefile +++ b/arch/powerpc/sysdev/Makefile @@ -1,7 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) - mpic-msi-obj-$(CONFIG_PCI_MSI) += mpic_msi.o mpic_u3msi.o obj-$(CONFIG_MPIC) += mpic.o $(mpic-msi-obj-y) obj-$(CONFIG_MPIC_TIMER) += mpic_timer.o diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index 98096bbfd6..c0d10c1496 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -24,7 +24,6 @@ #include <linux/suspend.h> #include <linux/memblock.h> #include <linux/gfp.h> -#include <linux/kmemleak.h> #include <linux/of_address.h> #include <asm/io.h> #include <asm/iommu.h> @@ -243,9 +242,6 @@ static void __init allocate_dart(void) if (!dart_tablebase) panic("Failed to allocate 16MB below 2GB for DART table\n"); - /* There is no point scanning the DART space for leaks*/ - kmemleak_no_scan((void *)dart_tablebase); - /* Allocate a spare page to map all invalid DART pages. We need to do * that to work around what looks like a problem with the HT bridge * prefetching into invalid pages and corrupting data diff --git a/arch/powerpc/sysdev/fsl_gtm.c b/arch/powerpc/sysdev/fsl_gtm.c index 39186ad6b3..3dabc96218 100644 --- a/arch/powerpc/sysdev/fsl_gtm.c +++ b/arch/powerpc/sysdev/fsl_gtm.c @@ -77,7 +77,7 @@ struct gtm { static LIST_HEAD(gtms); /** - * gtm_get_timer - request GTM timer to use it with the rest of GTM API + * gtm_get_timer16 - request GTM timer to use it with the rest of GTM API * Context: non-IRQ * * This function reserves GTM timer for later use. It returns gtm_timer @@ -110,7 +110,7 @@ struct gtm_timer *gtm_get_timer16(void) EXPORT_SYMBOL(gtm_get_timer16); /** - * gtm_get_specific_timer - request specific GTM timer + * gtm_get_specific_timer16 - request specific GTM timer * @gtm: specific GTM, pass here GTM's device_node->data * @timer: specific timer number, Timer1 is 0. * Context: non-IRQ @@ -260,7 +260,7 @@ int gtm_set_timer16(struct gtm_timer *tmr, unsigned long usec, bool reload) EXPORT_SYMBOL(gtm_set_timer16); /** - * gtm_set_exact_utimer16 - (re)set 16 bits timer + * gtm_set_exact_timer16 - (re)set 16 bits timer * @tmr: pointer to the gtm_timer structure obtained from gtm_get_timer * @usec: timer interval in microseconds * @reload: if set, the timer will reset upon expiry rather than diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index a289cb97c1..fa01818c19 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -383,7 +383,7 @@ static unsigned int xive_get_irq(void) * CPU. * * If we find that there is indeed more in there, we call - * force_external_irq_replay() to make Linux synthetize an + * force_external_irq_replay() to make Linux synthesize an * external interrupt on the next call to local_irq_restore(). */ static void xive_do_queue_eoi(struct xive_cpu *xc) @@ -874,7 +874,7 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state) * * This also tells us that it's in flight to a host queue * or has already been fetched but hasn't been EOIed yet - * by the host. This it's potentially using up a host + * by the host. Thus it's potentially using up a host * queue slot. This is important to know because as long * as this is the case, we must not hard-unmask it when * "returning" that interrupt to the host. diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index f1c0fa6ece..517b963e3e 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -415,7 +415,7 @@ static void xive_native_setup_cpu(unsigned int cpu, struct xive_cpu *xc) return; } - /* Grab it's CAM value */ + /* Grab its CAM value */ rc = opal_xive_get_vp_info(vp, NULL, &vp_cam_be, NULL, NULL); if (rc) { pr_err("Failed to get pool VP info CPU %d\n", cpu); diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile index 682c7c0a6f..d778011060 100644 --- a/arch/powerpc/xmon/Makefile +++ b/arch/powerpc/xmon/Makefile @@ -10,8 +10,6 @@ KCSAN_SANITIZE := n # Disable ftrace for the entire directory ccflags-remove-$(CONFIG_FUNCTION_TRACER) += $(CC_FLAGS_FTRACE) -ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) - # Clang stores addresses on the stack causing the frame size to blow # out. See https://github.com/ClangBuiltLinux/linux/issues/252 ccflags-$(CONFIG_CC_IS_CLANG) += -Wframe-larger-than=4096 diff --git a/arch/powerpc/xmon/ppc-dis.c b/arch/powerpc/xmon/ppc-dis.c index 75fa98221d..af105e1bc3 100644 --- a/arch/powerpc/xmon/ppc-dis.c +++ b/arch/powerpc/xmon/ppc-dis.c @@ -122,32 +122,21 @@ int print_insn_powerpc (unsigned long insn, unsigned long memaddr) bool insn_is_short; ppc_cpu_t dialect; - dialect = PPC_OPCODE_PPC | PPC_OPCODE_COMMON - | PPC_OPCODE_64 | PPC_OPCODE_POWER4 | PPC_OPCODE_ALTIVEC; + dialect = PPC_OPCODE_PPC | PPC_OPCODE_COMMON; - if (cpu_has_feature(CPU_FTRS_POWER5)) - dialect |= PPC_OPCODE_POWER5; + if (IS_ENABLED(CONFIG_PPC64)) + dialect |= PPC_OPCODE_64 | PPC_OPCODE_POWER4 | PPC_OPCODE_CELL | + PPC_OPCODE_POWER5 | PPC_OPCODE_POWER6 | PPC_OPCODE_POWER7 | PPC_OPCODE_POWER8 | + PPC_OPCODE_POWER9; - if (cpu_has_feature(CPU_FTRS_CELL)) - dialect |= (PPC_OPCODE_CELL | PPC_OPCODE_ALTIVEC); + if (cpu_has_feature(CPU_FTR_TM)) + dialect |= PPC_OPCODE_HTM; - if (cpu_has_feature(CPU_FTRS_POWER6)) - dialect |= (PPC_OPCODE_POWER5 | PPC_OPCODE_POWER6 | PPC_OPCODE_ALTIVEC); + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + dialect |= PPC_OPCODE_ALTIVEC | PPC_OPCODE_ALTIVEC2; - if (cpu_has_feature(CPU_FTRS_POWER7)) - dialect |= (PPC_OPCODE_POWER5 | PPC_OPCODE_POWER6 | PPC_OPCODE_POWER7 - | PPC_OPCODE_ALTIVEC | PPC_OPCODE_VSX); - - if (cpu_has_feature(CPU_FTRS_POWER8)) - dialect |= (PPC_OPCODE_POWER5 | PPC_OPCODE_POWER6 | PPC_OPCODE_POWER7 - | PPC_OPCODE_POWER8 | PPC_OPCODE_HTM - | PPC_OPCODE_ALTIVEC | PPC_OPCODE_ALTIVEC2 | PPC_OPCODE_VSX); - - if (cpu_has_feature(CPU_FTRS_POWER9)) - dialect |= (PPC_OPCODE_POWER5 | PPC_OPCODE_POWER6 | PPC_OPCODE_POWER7 - | PPC_OPCODE_POWER8 | PPC_OPCODE_POWER9 | PPC_OPCODE_HTM - | PPC_OPCODE_ALTIVEC | PPC_OPCODE_ALTIVEC2 - | PPC_OPCODE_VSX | PPC_OPCODE_VSX3); + if (cpu_has_feature(CPU_FTR_VSX)) + dialect |= PPC_OPCODE_VSX | PPC_OPCODE_VSX3; /* Get the major opcode of the insn. */ opcode = NULL; |